Add content matching to workflow trigger

This commit is contained in:
shamoon
2023-12-30 08:47:03 -08:00
parent c67747ff9f
commit 0e716d0566
10 changed files with 177 additions and 4 deletions

View File

@@ -21,9 +21,13 @@ logger = logging.getLogger("paperless.matching")
def log_reason(matching_model: MatchingModel, document: Document, reason: str):
class_name = type(matching_model).__name__
name = (
matching_model.name
if hasattr(matching_model, "name")
else matching_model.__str__()
)
logger.debug(
f"{class_name} {matching_model.name} matched on document "
f"{document} because {reason}",
f"{class_name} {name} matched on document {document} because {reason}",
)
@@ -318,6 +322,15 @@ def document_matches_workflow(
):
# document is type Document
if (
trigger.matching_algorithm > MatchingModel.MATCH_NONE
and not matches(trigger, document)
):
log_match_failure(
f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match",
)
trigger_matched = False
# Document has_tags vs document tags
if (
trigger.filter_has_tags.all().count() > 0

View File

@@ -407,6 +407,29 @@ class Migration(migrations.Migration):
verbose_name="filter documents from this mail rule",
),
),
(
"matching_algorithm",
models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
],
default=0,
verbose_name="matching algorithm",
),
),
(
"match",
models.CharField(blank=True, max_length=256, verbose_name="match"),
),
(
"is_insensitive",
models.BooleanField(default=True, verbose_name="is insensitive"),
),
(
"filter_has_tags",
models.ManyToManyField(

View File

@@ -889,6 +889,15 @@ if settings.AUDIT_LOG_ENABLED:
class WorkflowTrigger(models.Model):
class WorkflowTriggerMatching(models.IntegerChoices):
# No auto matching
NONE = MatchingModel.MATCH_NONE, _("None")
ANY = MatchingModel.MATCH_ANY, _("Any word")
ALL = MatchingModel.MATCH_ALL, _("All words")
LITERAL = MatchingModel.MATCH_LITERAL, _("Exact match")
REGEX = MatchingModel.MATCH_REGEX, _("Regular expression")
FUZZY = MatchingModel.MATCH_FUZZY, _("Fuzzy word")
class WorkflowTriggerType(models.IntegerChoices):
CONSUMPTION = 1, _("Consumption")
DOCUMENT_ADDED = 2, _("Document Added")
@@ -943,6 +952,16 @@ class WorkflowTrigger(models.Model):
verbose_name=_("filter documents from this mail rule"),
)
match = models.CharField(_("match"), max_length=256, blank=True)
matching_algorithm = models.PositiveIntegerField(
_("matching algorithm"),
choices=WorkflowTriggerMatching.choices,
default=WorkflowTriggerMatching.NONE,
)
is_insensitive = models.BooleanField(_("is insensitive"), default=True)
filter_has_tags = models.ManyToManyField(
Tag,
blank=True,

View File

@@ -1302,6 +1302,9 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer):
"filter_path",
"filter_filename",
"filter_mailrule",
"matching_algorithm",
"match",
"is_insensitive",
"filter_has_tags",
"filter_has_correspondent",
"filter_has_document_type",

View File

@@ -16,6 +16,7 @@ from documents.models import Correspondent
from documents.models import CustomField
from documents.models import Document
from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import StoragePath
from documents.models import Tag
from documents.models import Workflow
@@ -742,6 +743,81 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
expected_str = f"Document filename {doc.original_filename} does not match"
self.assertIn(expected_str, cm.output[1])
def test_document_added_match_content_matching(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
matching_algorithm=MatchingModel.MATCH_LITERAL,
match="foo",
is_insensitive=True,
)
action = WorkflowAction.objects.create(
assign_title="Doc content matching worked",
assign_owner=self.user2,
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
content="Hello world foo bar",
)
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
document_consumption_finished.send(
sender=self.__class__,
document=doc,
)
expected_str = f"WorkflowTrigger {trigger} matched on document"
expected_str2 = 'because it contains this string: "foo"'
self.assertIn(expected_str, cm.output[0])
self.assertIn(expected_str2, cm.output[0])
expected_str = f"Document matched {trigger} from {w}"
self.assertIn(expected_str, cm.output[1])
def test_document_added_no_match_content_matching(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
matching_algorithm=MatchingModel.MATCH_LITERAL,
match="foo",
is_insensitive=True,
)
action = WorkflowAction.objects.create(
assign_title="Doc content matching worked",
assign_owner=self.user2,
)
action.save()
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
content="Hello world bar",
)
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
document_consumption_finished.send(
sender=self.__class__,
document=doc,
)
expected_str = f"Document did not match {w}"
self.assertIn(expected_str, cm.output[0])
expected_str = f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match"
self.assertIn(expected_str, cm.output[1])
def test_document_added_no_match_tags(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
@@ -751,7 +827,6 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
assign_title="Doc assign owner",
assign_owner=self.user2,
)
action.save()
w = Workflow.objects.create(
name="Workflow 1",
order=0,