From 0e716d0566e87b00f58bfd99a5b1c958ee1a9928 Mon Sep 17 00:00:00 2001
From: shamoon <4887959+shamoon@users.noreply.github.com>
Date: Sat, 30 Dec 2023 08:47:03 -0800
Subject: [PATCH] Add content matching to workflow trigger
---
docs/usage.md | 3 +-
.../workflow-edit-dialog.component.html | 9 +++
.../workflow-edit-dialog.component.spec.ts | 7 ++
.../workflow-edit-dialog.component.ts | 17 ++++
src-ui/src/app/data/workflow-trigger.ts | 6 ++
src/documents/matching.py | 17 +++-
...workflowaction_workflowtrigger_and_more.py | 23 ++++++
src/documents/models.py | 19 +++++
src/documents/serialisers.py | 3 +
src/documents/tests/test_workflows.py | 77 ++++++++++++++++++-
10 files changed, 177 insertions(+), 4 deletions(-)
diff --git a/docs/usage.md b/docs/usage.md
index 0796b1e62..e3897a9e0 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -320,7 +320,8 @@ Workflows allow you to filter by:
- File path, including wildcards. Note that enabling `PAPERLESS_CONSUMER_RECURSIVE` would allow, for
example, automatically assigning documents to different owners based on the upload directory.
- Mail rule. Choosing this option will force 'mail fetch' to be the workflow source.
-- Tags (`Added` and `Updated` triggers only). Will filter for documents with any of the specified tags
+- Content matching (`Added` and `Updated` triggers only). Filter document content using the matching settings.
+- Tags (`Added` and `Updated` triggers only). Filter for documents with any of the specified tags
- Document type (`Added` and `Updated` triggers only). Filter documents with this doc type
- Correspondent (`Added` and `Updated` triggers only). Filter documents with this correspondent
diff --git a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html
index a92e62e0a..a318dffe7 100644
--- a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html
+++ b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html
@@ -181,6 +181,15 @@
}
+ @if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated) {
+
+ @if (patternRequired) {
+
+ }
+ @if (patternRequired) {
+
+ }
+ }
@if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated) {
diff --git a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.spec.ts b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.spec.ts
index ebede83cd..b75f1bea0 100644
--- a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.spec.ts
+++ b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.spec.ts
@@ -37,6 +37,7 @@ import {
WorkflowAction,
WorkflowActionType,
} from 'src/app/data/workflow-action'
+import { MATCHING_ALGORITHMS, MATCH_AUTO } from 'src/app/data/matching-model'
const workflow: Workflow = {
name: 'Workflow 1',
@@ -216,4 +217,10 @@ describe('ConsumptionTemplateEditDialogComponent', () => {
expect(action1.id).toBeNull()
expect(action2.id).toBeNull()
})
+
+ it('should not include auto matching in algorithms', () => {
+ expect(component.getMatchingAlgorithms()).not.toContain(
+ MATCHING_ALGORITHMS.find((a) => a.id === MATCH_AUTO)
+ )
+ })
})
diff --git a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.ts b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.ts
index 4879d25ba..46faa3b0b 100644
--- a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.ts
+++ b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.ts
@@ -26,6 +26,11 @@ import {
WorkflowActionType,
} from 'src/app/data/workflow-action'
import { CdkDragDrop, moveItemInArray } from '@angular/cdk/drag-drop'
+import {
+ MATCHING_ALGORITHMS,
+ MATCH_AUTO,
+ MATCH_NONE,
+} from 'src/app/data/matching-model'
export const DOCUMENT_SOURCE_OPTIONS = [
{
@@ -64,6 +69,10 @@ export const WORKFLOW_ACTION_OPTIONS = [
},
]
+const TRIGGER_MATCHING_ALGORITHMS = MATCHING_ALGORITHMS.filter(
+ (a) => a.id !== MATCH_AUTO
+)
+
@Component({
selector: 'pngx-workflow-edit-dialog',
templateUrl: './workflow-edit-dialog.component.html',
@@ -141,6 +150,11 @@ export class WorkflowEditDialogComponent
})
}
+ getMatchingAlgorithms() {
+ // No auto matching
+ return TRIGGER_MATCHING_ALGORITHMS
+ }
+
ngOnInit(): void {
super.ngOnInit()
this.updateTriggerActionFields()
@@ -165,6 +179,9 @@ export class WorkflowEditDialogComponent
filter_filename: new FormControl(trigger.filter_filename),
filter_path: new FormControl(trigger.filter_path),
filter_mailrule: new FormControl(trigger.filter_mailrule),
+ matching_algorithm: new FormControl(MATCH_NONE),
+ match: new FormControl(''),
+ is_insensitive: new FormControl(true),
filter_has_tags: new FormControl(trigger.filter_has_tags),
filter_has_correspondent: new FormControl(
trigger.filter_has_correspondent
diff --git a/src-ui/src/app/data/workflow-trigger.ts b/src-ui/src/app/data/workflow-trigger.ts
index cab3762a9..3e3bf8cf8 100644
--- a/src-ui/src/app/data/workflow-trigger.ts
+++ b/src-ui/src/app/data/workflow-trigger.ts
@@ -23,6 +23,12 @@ export interface WorkflowTrigger extends ObjectWithId {
filter_mailrule?: number // MailRule.id
+ match?: string
+
+ matching_algorithm?: number
+
+ is_insensitive?: boolean
+
filter_has_tags?: number[] // Tag.id[]
filter_has_correspondent?: number // Correspondent.id
diff --git a/src/documents/matching.py b/src/documents/matching.py
index 28bc783d6..d3acde9b4 100644
--- a/src/documents/matching.py
+++ b/src/documents/matching.py
@@ -21,9 +21,13 @@ logger = logging.getLogger("paperless.matching")
def log_reason(matching_model: MatchingModel, document: Document, reason: str):
class_name = type(matching_model).__name__
+ name = (
+ matching_model.name
+ if hasattr(matching_model, "name")
+ else matching_model.__str__()
+ )
logger.debug(
- f"{class_name} {matching_model.name} matched on document "
- f"{document} because {reason}",
+ f"{class_name} {name} matched on document {document} because {reason}",
)
@@ -318,6 +322,15 @@ def document_matches_workflow(
):
# document is type Document
+ if (
+ trigger.matching_algorithm > MatchingModel.MATCH_NONE
+ and not matches(trigger, document)
+ ):
+ log_match_failure(
+ f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match",
+ )
+ trigger_matched = False
+
# Document has_tags vs document tags
if (
trigger.filter_has_tags.all().count() > 0
diff --git a/src/documents/migrations/1044_workflow_workflowaction_workflowtrigger_and_more.py b/src/documents/migrations/1044_workflow_workflowaction_workflowtrigger_and_more.py
index 4319e13cf..ac88fc0eb 100644
--- a/src/documents/migrations/1044_workflow_workflowaction_workflowtrigger_and_more.py
+++ b/src/documents/migrations/1044_workflow_workflowaction_workflowtrigger_and_more.py
@@ -407,6 +407,29 @@ class Migration(migrations.Migration):
verbose_name="filter documents from this mail rule",
),
),
+ (
+ "matching_algorithm",
+ models.PositiveIntegerField(
+ choices=[
+ (0, "None"),
+ (1, "Any word"),
+ (2, "All words"),
+ (3, "Exact match"),
+ (4, "Regular expression"),
+ (5, "Fuzzy word"),
+ ],
+ default=0,
+ verbose_name="matching algorithm",
+ ),
+ ),
+ (
+ "match",
+ models.CharField(blank=True, max_length=256, verbose_name="match"),
+ ),
+ (
+ "is_insensitive",
+ models.BooleanField(default=True, verbose_name="is insensitive"),
+ ),
(
"filter_has_tags",
models.ManyToManyField(
diff --git a/src/documents/models.py b/src/documents/models.py
index cba6ad3de..638295ae9 100644
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -889,6 +889,15 @@ if settings.AUDIT_LOG_ENABLED:
class WorkflowTrigger(models.Model):
+ class WorkflowTriggerMatching(models.IntegerChoices):
+ # No auto matching
+ NONE = MatchingModel.MATCH_NONE, _("None")
+ ANY = MatchingModel.MATCH_ANY, _("Any word")
+ ALL = MatchingModel.MATCH_ALL, _("All words")
+ LITERAL = MatchingModel.MATCH_LITERAL, _("Exact match")
+ REGEX = MatchingModel.MATCH_REGEX, _("Regular expression")
+ FUZZY = MatchingModel.MATCH_FUZZY, _("Fuzzy word")
+
class WorkflowTriggerType(models.IntegerChoices):
CONSUMPTION = 1, _("Consumption")
DOCUMENT_ADDED = 2, _("Document Added")
@@ -943,6 +952,16 @@ class WorkflowTrigger(models.Model):
verbose_name=_("filter documents from this mail rule"),
)
+ match = models.CharField(_("match"), max_length=256, blank=True)
+
+ matching_algorithm = models.PositiveIntegerField(
+ _("matching algorithm"),
+ choices=WorkflowTriggerMatching.choices,
+ default=WorkflowTriggerMatching.NONE,
+ )
+
+ is_insensitive = models.BooleanField(_("is insensitive"), default=True)
+
filter_has_tags = models.ManyToManyField(
Tag,
blank=True,
diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py
index df48a78a8..510aaa244 100644
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -1302,6 +1302,9 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer):
"filter_path",
"filter_filename",
"filter_mailrule",
+ "matching_algorithm",
+ "match",
+ "is_insensitive",
"filter_has_tags",
"filter_has_correspondent",
"filter_has_document_type",
diff --git a/src/documents/tests/test_workflows.py b/src/documents/tests/test_workflows.py
index 8d45d618f..bce581bbe 100644
--- a/src/documents/tests/test_workflows.py
+++ b/src/documents/tests/test_workflows.py
@@ -16,6 +16,7 @@ from documents.models import Correspondent
from documents.models import CustomField
from documents.models import Document
from documents.models import DocumentType
+from documents.models import MatchingModel
from documents.models import StoragePath
from documents.models import Tag
from documents.models import Workflow
@@ -742,6 +743,81 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
expected_str = f"Document filename {doc.original_filename} does not match"
self.assertIn(expected_str, cm.output[1])
+ def test_document_added_match_content_matching(self):
+ trigger = WorkflowTrigger.objects.create(
+ type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
+ matching_algorithm=MatchingModel.MATCH_LITERAL,
+ match="foo",
+ is_insensitive=True,
+ )
+ action = WorkflowAction.objects.create(
+ assign_title="Doc content matching worked",
+ assign_owner=self.user2,
+ )
+ w = Workflow.objects.create(
+ name="Workflow 1",
+ order=0,
+ )
+ w.triggers.add(trigger)
+ w.actions.add(action)
+ w.save()
+
+ doc = Document.objects.create(
+ title="sample test",
+ correspondent=self.c,
+ original_filename="sample.pdf",
+ content="Hello world foo bar",
+ )
+
+ with self.assertLogs("paperless.matching", level="DEBUG") as cm:
+ document_consumption_finished.send(
+ sender=self.__class__,
+ document=doc,
+ )
+ expected_str = f"WorkflowTrigger {trigger} matched on document"
+ expected_str2 = 'because it contains this string: "foo"'
+ self.assertIn(expected_str, cm.output[0])
+ self.assertIn(expected_str2, cm.output[0])
+ expected_str = f"Document matched {trigger} from {w}"
+ self.assertIn(expected_str, cm.output[1])
+
+ def test_document_added_no_match_content_matching(self):
+ trigger = WorkflowTrigger.objects.create(
+ type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
+ matching_algorithm=MatchingModel.MATCH_LITERAL,
+ match="foo",
+ is_insensitive=True,
+ )
+ action = WorkflowAction.objects.create(
+ assign_title="Doc content matching worked",
+ assign_owner=self.user2,
+ )
+ action.save()
+ w = Workflow.objects.create(
+ name="Workflow 1",
+ order=0,
+ )
+ w.triggers.add(trigger)
+ w.actions.add(action)
+ w.save()
+
+ doc = Document.objects.create(
+ title="sample test",
+ correspondent=self.c,
+ original_filename="sample.pdf",
+ content="Hello world bar",
+ )
+
+ with self.assertLogs("paperless.matching", level="DEBUG") as cm:
+ document_consumption_finished.send(
+ sender=self.__class__,
+ document=doc,
+ )
+ expected_str = f"Document did not match {w}"
+ self.assertIn(expected_str, cm.output[0])
+ expected_str = f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match"
+ self.assertIn(expected_str, cm.output[1])
+
def test_document_added_no_match_tags(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
@@ -751,7 +827,6 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
assign_title="Doc assign owner",
assign_owner=self.user2,
)
- action.save()
w = Workflow.objects.create(
name="Workflow 1",
order=0,