Add content matching to workflow trigger

This commit is contained in:
shamoon 2023-12-30 08:47:03 -08:00
parent c67747ff9f
commit 0e716d0566
10 changed files with 177 additions and 4 deletions

View File

@ -320,7 +320,8 @@ Workflows allow you to filter by:
- File path, including wildcards. Note that enabling `PAPERLESS_CONSUMER_RECURSIVE` would allow, for - File path, including wildcards. Note that enabling `PAPERLESS_CONSUMER_RECURSIVE` would allow, for
example, automatically assigning documents to different owners based on the upload directory. example, automatically assigning documents to different owners based on the upload directory.
- Mail rule. Choosing this option will force 'mail fetch' to be the workflow source. - Mail rule. Choosing this option will force 'mail fetch' to be the workflow source.
- Tags (`Added` and `Updated` triggers only). Will filter for documents with any of the specified tags - Content matching (`Added` and `Updated` triggers only). Filter document content using the matching settings.
- Tags (`Added` and `Updated` triggers only). Filter for documents with any of the specified tags
- Document type (`Added` and `Updated` triggers only). Filter documents with this doc type - Document type (`Added` and `Updated` triggers only). Filter documents with this doc type
- Correspondent (`Added` and `Updated` triggers only). Filter documents with this correspondent - Correspondent (`Added` and `Updated` triggers only). Filter documents with this correspondent

View File

@ -181,6 +181,15 @@
<pngx-input-text i18n-title title="Filter path" formControlName="filter_path" i18n-hint hint="Apply to documents that match this path. Wildcards specified as * are allowed. Case insensitive.</a>" [error]="error?.filter_path"></pngx-input-text> <pngx-input-text i18n-title title="Filter path" formControlName="filter_path" i18n-hint hint="Apply to documents that match this path. Wildcards specified as * are allowed. Case insensitive.</a>" [error]="error?.filter_path"></pngx-input-text>
<pngx-input-select i18n-title title="Filter mail rule" [items]="mailRules" [allowNull]="true" formControlName="filter_mailrule" i18n-hint hint="Apply to documents consumed via this mail rule." [error]="error?.filter_mailrule"></pngx-input-select> <pngx-input-select i18n-title title="Filter mail rule" [items]="mailRules" [allowNull]="true" formControlName="filter_mailrule" i18n-hint hint="Apply to documents consumed via this mail rule." [error]="error?.filter_mailrule"></pngx-input-select>
} }
@if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated) {
<pngx-input-select i18n-title title="Content matching algorithm" [items]="getMatchingAlgorithms()" formControlName="matching_algorithm"></pngx-input-select>
@if (patternRequired) {
<pngx-input-text i18n-title title="Content matching pattern" formControlName="match" [error]="error?.match"></pngx-input-text>
}
@if (patternRequired) {
<pngx-input-check i18n-title title="Case insensitive" formControlName="is_insensitive"></pngx-input-check>
}
}
</div> </div>
@if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated) { @if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated) {
<div class="col-md-6"> <div class="col-md-6">

View File

@ -37,6 +37,7 @@ import {
WorkflowAction, WorkflowAction,
WorkflowActionType, WorkflowActionType,
} from 'src/app/data/workflow-action' } from 'src/app/data/workflow-action'
import { MATCHING_ALGORITHMS, MATCH_AUTO } from 'src/app/data/matching-model'
const workflow: Workflow = { const workflow: Workflow = {
name: 'Workflow 1', name: 'Workflow 1',
@ -216,4 +217,10 @@ describe('ConsumptionTemplateEditDialogComponent', () => {
expect(action1.id).toBeNull() expect(action1.id).toBeNull()
expect(action2.id).toBeNull() expect(action2.id).toBeNull()
}) })
it('should not include auto matching in algorithms', () => {
expect(component.getMatchingAlgorithms()).not.toContain(
MATCHING_ALGORITHMS.find((a) => a.id === MATCH_AUTO)
)
})
}) })

View File

@ -26,6 +26,11 @@ import {
WorkflowActionType, WorkflowActionType,
} from 'src/app/data/workflow-action' } from 'src/app/data/workflow-action'
import { CdkDragDrop, moveItemInArray } from '@angular/cdk/drag-drop' import { CdkDragDrop, moveItemInArray } from '@angular/cdk/drag-drop'
import {
MATCHING_ALGORITHMS,
MATCH_AUTO,
MATCH_NONE,
} from 'src/app/data/matching-model'
export const DOCUMENT_SOURCE_OPTIONS = [ export const DOCUMENT_SOURCE_OPTIONS = [
{ {
@ -64,6 +69,10 @@ export const WORKFLOW_ACTION_OPTIONS = [
}, },
] ]
const TRIGGER_MATCHING_ALGORITHMS = MATCHING_ALGORITHMS.filter(
(a) => a.id !== MATCH_AUTO
)
@Component({ @Component({
selector: 'pngx-workflow-edit-dialog', selector: 'pngx-workflow-edit-dialog',
templateUrl: './workflow-edit-dialog.component.html', templateUrl: './workflow-edit-dialog.component.html',
@ -141,6 +150,11 @@ export class WorkflowEditDialogComponent
}) })
} }
getMatchingAlgorithms() {
// No auto matching
return TRIGGER_MATCHING_ALGORITHMS
}
ngOnInit(): void { ngOnInit(): void {
super.ngOnInit() super.ngOnInit()
this.updateTriggerActionFields() this.updateTriggerActionFields()
@ -165,6 +179,9 @@ export class WorkflowEditDialogComponent
filter_filename: new FormControl(trigger.filter_filename), filter_filename: new FormControl(trigger.filter_filename),
filter_path: new FormControl(trigger.filter_path), filter_path: new FormControl(trigger.filter_path),
filter_mailrule: new FormControl(trigger.filter_mailrule), filter_mailrule: new FormControl(trigger.filter_mailrule),
matching_algorithm: new FormControl(MATCH_NONE),
match: new FormControl(''),
is_insensitive: new FormControl(true),
filter_has_tags: new FormControl(trigger.filter_has_tags), filter_has_tags: new FormControl(trigger.filter_has_tags),
filter_has_correspondent: new FormControl( filter_has_correspondent: new FormControl(
trigger.filter_has_correspondent trigger.filter_has_correspondent

View File

@ -23,6 +23,12 @@ export interface WorkflowTrigger extends ObjectWithId {
filter_mailrule?: number // MailRule.id filter_mailrule?: number // MailRule.id
match?: string
matching_algorithm?: number
is_insensitive?: boolean
filter_has_tags?: number[] // Tag.id[] filter_has_tags?: number[] // Tag.id[]
filter_has_correspondent?: number // Correspondent.id filter_has_correspondent?: number // Correspondent.id

View File

@ -21,9 +21,13 @@ logger = logging.getLogger("paperless.matching")
def log_reason(matching_model: MatchingModel, document: Document, reason: str): def log_reason(matching_model: MatchingModel, document: Document, reason: str):
class_name = type(matching_model).__name__ class_name = type(matching_model).__name__
name = (
matching_model.name
if hasattr(matching_model, "name")
else matching_model.__str__()
)
logger.debug( logger.debug(
f"{class_name} {matching_model.name} matched on document " f"{class_name} {name} matched on document {document} because {reason}",
f"{document} because {reason}",
) )
@ -318,6 +322,15 @@ def document_matches_workflow(
): ):
# document is type Document # document is type Document
if (
trigger.matching_algorithm > MatchingModel.MATCH_NONE
and not matches(trigger, document)
):
log_match_failure(
f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match",
)
trigger_matched = False
# Document has_tags vs document tags # Document has_tags vs document tags
if ( if (
trigger.filter_has_tags.all().count() > 0 trigger.filter_has_tags.all().count() > 0

View File

@ -407,6 +407,29 @@ class Migration(migrations.Migration):
verbose_name="filter documents from this mail rule", verbose_name="filter documents from this mail rule",
), ),
), ),
(
"matching_algorithm",
models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
],
default=0,
verbose_name="matching algorithm",
),
),
(
"match",
models.CharField(blank=True, max_length=256, verbose_name="match"),
),
(
"is_insensitive",
models.BooleanField(default=True, verbose_name="is insensitive"),
),
( (
"filter_has_tags", "filter_has_tags",
models.ManyToManyField( models.ManyToManyField(

View File

@ -889,6 +889,15 @@ if settings.AUDIT_LOG_ENABLED:
class WorkflowTrigger(models.Model): class WorkflowTrigger(models.Model):
class WorkflowTriggerMatching(models.IntegerChoices):
# No auto matching
NONE = MatchingModel.MATCH_NONE, _("None")
ANY = MatchingModel.MATCH_ANY, _("Any word")
ALL = MatchingModel.MATCH_ALL, _("All words")
LITERAL = MatchingModel.MATCH_LITERAL, _("Exact match")
REGEX = MatchingModel.MATCH_REGEX, _("Regular expression")
FUZZY = MatchingModel.MATCH_FUZZY, _("Fuzzy word")
class WorkflowTriggerType(models.IntegerChoices): class WorkflowTriggerType(models.IntegerChoices):
CONSUMPTION = 1, _("Consumption") CONSUMPTION = 1, _("Consumption")
DOCUMENT_ADDED = 2, _("Document Added") DOCUMENT_ADDED = 2, _("Document Added")
@ -943,6 +952,16 @@ class WorkflowTrigger(models.Model):
verbose_name=_("filter documents from this mail rule"), verbose_name=_("filter documents from this mail rule"),
) )
match = models.CharField(_("match"), max_length=256, blank=True)
matching_algorithm = models.PositiveIntegerField(
_("matching algorithm"),
choices=WorkflowTriggerMatching.choices,
default=WorkflowTriggerMatching.NONE,
)
is_insensitive = models.BooleanField(_("is insensitive"), default=True)
filter_has_tags = models.ManyToManyField( filter_has_tags = models.ManyToManyField(
Tag, Tag,
blank=True, blank=True,

View File

@ -1302,6 +1302,9 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer):
"filter_path", "filter_path",
"filter_filename", "filter_filename",
"filter_mailrule", "filter_mailrule",
"matching_algorithm",
"match",
"is_insensitive",
"filter_has_tags", "filter_has_tags",
"filter_has_correspondent", "filter_has_correspondent",
"filter_has_document_type", "filter_has_document_type",

View File

@ -16,6 +16,7 @@ from documents.models import Correspondent
from documents.models import CustomField from documents.models import CustomField
from documents.models import Document from documents.models import Document
from documents.models import DocumentType from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import StoragePath from documents.models import StoragePath
from documents.models import Tag from documents.models import Tag
from documents.models import Workflow from documents.models import Workflow
@ -742,6 +743,81 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
expected_str = f"Document filename {doc.original_filename} does not match" expected_str = f"Document filename {doc.original_filename} does not match"
self.assertIn(expected_str, cm.output[1]) self.assertIn(expected_str, cm.output[1])
def test_document_added_match_content_matching(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
matching_algorithm=MatchingModel.MATCH_LITERAL,
match="foo",
is_insensitive=True,
)
action = WorkflowAction.objects.create(
assign_title="Doc content matching worked",
assign_owner=self.user2,
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
content="Hello world foo bar",
)
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
document_consumption_finished.send(
sender=self.__class__,
document=doc,
)
expected_str = f"WorkflowTrigger {trigger} matched on document"
expected_str2 = 'because it contains this string: "foo"'
self.assertIn(expected_str, cm.output[0])
self.assertIn(expected_str2, cm.output[0])
expected_str = f"Document matched {trigger} from {w}"
self.assertIn(expected_str, cm.output[1])
def test_document_added_no_match_content_matching(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
matching_algorithm=MatchingModel.MATCH_LITERAL,
match="foo",
is_insensitive=True,
)
action = WorkflowAction.objects.create(
assign_title="Doc content matching worked",
assign_owner=self.user2,
)
action.save()
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
content="Hello world bar",
)
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
document_consumption_finished.send(
sender=self.__class__,
document=doc,
)
expected_str = f"Document did not match {w}"
self.assertIn(expected_str, cm.output[0])
expected_str = f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match"
self.assertIn(expected_str, cm.output[1])
def test_document_added_no_match_tags(self): def test_document_added_no_match_tags(self):
trigger = WorkflowTrigger.objects.create( trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED, type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
@ -751,7 +827,6 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
assign_title="Doc assign owner", assign_title="Doc assign owner",
assign_owner=self.user2, assign_owner=self.user2,
) )
action.save()
w = Workflow.objects.create( w = Workflow.objects.create(
name="Workflow 1", name="Workflow 1",
order=0, order=0,