From 0e716d0566e87b00f58bfd99a5b1c958ee1a9928 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Sat, 30 Dec 2023 08:47:03 -0800 Subject: [PATCH] Add content matching to workflow trigger --- docs/usage.md | 3 +- .../workflow-edit-dialog.component.html | 9 +++ .../workflow-edit-dialog.component.spec.ts | 7 ++ .../workflow-edit-dialog.component.ts | 17 ++++ src-ui/src/app/data/workflow-trigger.ts | 6 ++ src/documents/matching.py | 17 +++- ...workflowaction_workflowtrigger_and_more.py | 23 ++++++ src/documents/models.py | 19 +++++ src/documents/serialisers.py | 3 + src/documents/tests/test_workflows.py | 77 ++++++++++++++++++- 10 files changed, 177 insertions(+), 4 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 0796b1e62..e3897a9e0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -320,7 +320,8 @@ Workflows allow you to filter by: - File path, including wildcards. Note that enabling `PAPERLESS_CONSUMER_RECURSIVE` would allow, for example, automatically assigning documents to different owners based on the upload directory. - Mail rule. Choosing this option will force 'mail fetch' to be the workflow source. -- Tags (`Added` and `Updated` triggers only). Will filter for documents with any of the specified tags +- Content matching (`Added` and `Updated` triggers only). Filter document content using the matching settings. +- Tags (`Added` and `Updated` triggers only). Filter for documents with any of the specified tags - Document type (`Added` and `Updated` triggers only). Filter documents with this doc type - Correspondent (`Added` and `Updated` triggers only). Filter documents with this correspondent diff --git a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html index a92e62e0a..a318dffe7 100644 --- a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html +++ b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html @@ -181,6 +181,15 @@ } + @if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated) { + + @if (patternRequired) { + + } + @if (patternRequired) { + + } + } @if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated) {
diff --git a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.spec.ts b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.spec.ts index ebede83cd..b75f1bea0 100644 --- a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.spec.ts +++ b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.spec.ts @@ -37,6 +37,7 @@ import { WorkflowAction, WorkflowActionType, } from 'src/app/data/workflow-action' +import { MATCHING_ALGORITHMS, MATCH_AUTO } from 'src/app/data/matching-model' const workflow: Workflow = { name: 'Workflow 1', @@ -216,4 +217,10 @@ describe('ConsumptionTemplateEditDialogComponent', () => { expect(action1.id).toBeNull() expect(action2.id).toBeNull() }) + + it('should not include auto matching in algorithms', () => { + expect(component.getMatchingAlgorithms()).not.toContain( + MATCHING_ALGORITHMS.find((a) => a.id === MATCH_AUTO) + ) + }) }) diff --git a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.ts b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.ts index 4879d25ba..46faa3b0b 100644 --- a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.ts +++ b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.ts @@ -26,6 +26,11 @@ import { WorkflowActionType, } from 'src/app/data/workflow-action' import { CdkDragDrop, moveItemInArray } from '@angular/cdk/drag-drop' +import { + MATCHING_ALGORITHMS, + MATCH_AUTO, + MATCH_NONE, +} from 'src/app/data/matching-model' export const DOCUMENT_SOURCE_OPTIONS = [ { @@ -64,6 +69,10 @@ export const WORKFLOW_ACTION_OPTIONS = [ }, ] +const TRIGGER_MATCHING_ALGORITHMS = MATCHING_ALGORITHMS.filter( + (a) => a.id !== MATCH_AUTO +) + @Component({ selector: 'pngx-workflow-edit-dialog', templateUrl: './workflow-edit-dialog.component.html', @@ -141,6 +150,11 @@ export class WorkflowEditDialogComponent }) } + getMatchingAlgorithms() { + // No auto matching + return TRIGGER_MATCHING_ALGORITHMS + } + ngOnInit(): void { super.ngOnInit() this.updateTriggerActionFields() @@ -165,6 +179,9 @@ export class WorkflowEditDialogComponent filter_filename: new FormControl(trigger.filter_filename), filter_path: new FormControl(trigger.filter_path), filter_mailrule: new FormControl(trigger.filter_mailrule), + matching_algorithm: new FormControl(MATCH_NONE), + match: new FormControl(''), + is_insensitive: new FormControl(true), filter_has_tags: new FormControl(trigger.filter_has_tags), filter_has_correspondent: new FormControl( trigger.filter_has_correspondent diff --git a/src-ui/src/app/data/workflow-trigger.ts b/src-ui/src/app/data/workflow-trigger.ts index cab3762a9..3e3bf8cf8 100644 --- a/src-ui/src/app/data/workflow-trigger.ts +++ b/src-ui/src/app/data/workflow-trigger.ts @@ -23,6 +23,12 @@ export interface WorkflowTrigger extends ObjectWithId { filter_mailrule?: number // MailRule.id + match?: string + + matching_algorithm?: number + + is_insensitive?: boolean + filter_has_tags?: number[] // Tag.id[] filter_has_correspondent?: number // Correspondent.id diff --git a/src/documents/matching.py b/src/documents/matching.py index 28bc783d6..d3acde9b4 100644 --- a/src/documents/matching.py +++ b/src/documents/matching.py @@ -21,9 +21,13 @@ logger = logging.getLogger("paperless.matching") def log_reason(matching_model: MatchingModel, document: Document, reason: str): class_name = type(matching_model).__name__ + name = ( + matching_model.name + if hasattr(matching_model, "name") + else matching_model.__str__() + ) logger.debug( - f"{class_name} {matching_model.name} matched on document " - f"{document} because {reason}", + f"{class_name} {name} matched on document {document} because {reason}", ) @@ -318,6 +322,15 @@ def document_matches_workflow( ): # document is type Document + if ( + trigger.matching_algorithm > MatchingModel.MATCH_NONE + and not matches(trigger, document) + ): + log_match_failure( + f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match", + ) + trigger_matched = False + # Document has_tags vs document tags if ( trigger.filter_has_tags.all().count() > 0 diff --git a/src/documents/migrations/1044_workflow_workflowaction_workflowtrigger_and_more.py b/src/documents/migrations/1044_workflow_workflowaction_workflowtrigger_and_more.py index 4319e13cf..ac88fc0eb 100644 --- a/src/documents/migrations/1044_workflow_workflowaction_workflowtrigger_and_more.py +++ b/src/documents/migrations/1044_workflow_workflowaction_workflowtrigger_and_more.py @@ -407,6 +407,29 @@ class Migration(migrations.Migration): verbose_name="filter documents from this mail rule", ), ), + ( + "matching_algorithm", + models.PositiveIntegerField( + choices=[ + (0, "None"), + (1, "Any word"), + (2, "All words"), + (3, "Exact match"), + (4, "Regular expression"), + (5, "Fuzzy word"), + ], + default=0, + verbose_name="matching algorithm", + ), + ), + ( + "match", + models.CharField(blank=True, max_length=256, verbose_name="match"), + ), + ( + "is_insensitive", + models.BooleanField(default=True, verbose_name="is insensitive"), + ), ( "filter_has_tags", models.ManyToManyField( diff --git a/src/documents/models.py b/src/documents/models.py index cba6ad3de..638295ae9 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -889,6 +889,15 @@ if settings.AUDIT_LOG_ENABLED: class WorkflowTrigger(models.Model): + class WorkflowTriggerMatching(models.IntegerChoices): + # No auto matching + NONE = MatchingModel.MATCH_NONE, _("None") + ANY = MatchingModel.MATCH_ANY, _("Any word") + ALL = MatchingModel.MATCH_ALL, _("All words") + LITERAL = MatchingModel.MATCH_LITERAL, _("Exact match") + REGEX = MatchingModel.MATCH_REGEX, _("Regular expression") + FUZZY = MatchingModel.MATCH_FUZZY, _("Fuzzy word") + class WorkflowTriggerType(models.IntegerChoices): CONSUMPTION = 1, _("Consumption") DOCUMENT_ADDED = 2, _("Document Added") @@ -943,6 +952,16 @@ class WorkflowTrigger(models.Model): verbose_name=_("filter documents from this mail rule"), ) + match = models.CharField(_("match"), max_length=256, blank=True) + + matching_algorithm = models.PositiveIntegerField( + _("matching algorithm"), + choices=WorkflowTriggerMatching.choices, + default=WorkflowTriggerMatching.NONE, + ) + + is_insensitive = models.BooleanField(_("is insensitive"), default=True) + filter_has_tags = models.ManyToManyField( Tag, blank=True, diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index df48a78a8..510aaa244 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -1302,6 +1302,9 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer): "filter_path", "filter_filename", "filter_mailrule", + "matching_algorithm", + "match", + "is_insensitive", "filter_has_tags", "filter_has_correspondent", "filter_has_document_type", diff --git a/src/documents/tests/test_workflows.py b/src/documents/tests/test_workflows.py index 8d45d618f..bce581bbe 100644 --- a/src/documents/tests/test_workflows.py +++ b/src/documents/tests/test_workflows.py @@ -16,6 +16,7 @@ from documents.models import Correspondent from documents.models import CustomField from documents.models import Document from documents.models import DocumentType +from documents.models import MatchingModel from documents.models import StoragePath from documents.models import Tag from documents.models import Workflow @@ -742,6 +743,81 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): expected_str = f"Document filename {doc.original_filename} does not match" self.assertIn(expected_str, cm.output[1]) + def test_document_added_match_content_matching(self): + trigger = WorkflowTrigger.objects.create( + type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED, + matching_algorithm=MatchingModel.MATCH_LITERAL, + match="foo", + is_insensitive=True, + ) + action = WorkflowAction.objects.create( + assign_title="Doc content matching worked", + assign_owner=self.user2, + ) + w = Workflow.objects.create( + name="Workflow 1", + order=0, + ) + w.triggers.add(trigger) + w.actions.add(action) + w.save() + + doc = Document.objects.create( + title="sample test", + correspondent=self.c, + original_filename="sample.pdf", + content="Hello world foo bar", + ) + + with self.assertLogs("paperless.matching", level="DEBUG") as cm: + document_consumption_finished.send( + sender=self.__class__, + document=doc, + ) + expected_str = f"WorkflowTrigger {trigger} matched on document" + expected_str2 = 'because it contains this string: "foo"' + self.assertIn(expected_str, cm.output[0]) + self.assertIn(expected_str2, cm.output[0]) + expected_str = f"Document matched {trigger} from {w}" + self.assertIn(expected_str, cm.output[1]) + + def test_document_added_no_match_content_matching(self): + trigger = WorkflowTrigger.objects.create( + type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED, + matching_algorithm=MatchingModel.MATCH_LITERAL, + match="foo", + is_insensitive=True, + ) + action = WorkflowAction.objects.create( + assign_title="Doc content matching worked", + assign_owner=self.user2, + ) + action.save() + w = Workflow.objects.create( + name="Workflow 1", + order=0, + ) + w.triggers.add(trigger) + w.actions.add(action) + w.save() + + doc = Document.objects.create( + title="sample test", + correspondent=self.c, + original_filename="sample.pdf", + content="Hello world bar", + ) + + with self.assertLogs("paperless.matching", level="DEBUG") as cm: + document_consumption_finished.send( + sender=self.__class__, + document=doc, + ) + expected_str = f"Document did not match {w}" + self.assertIn(expected_str, cm.output[0]) + expected_str = f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match" + self.assertIn(expected_str, cm.output[1]) + def test_document_added_no_match_tags(self): trigger = WorkflowTrigger.objects.create( type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED, @@ -751,7 +827,6 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): assign_title="Doc assign owner", assign_owner=self.user2, ) - action.save() w = Workflow.objects.create( name="Workflow 1", order=0,