Add content matching to workflow trigger
This commit is contained in:
@@ -21,9 +21,13 @@ logger = logging.getLogger("paperless.matching")
|
||||
|
||||
def log_reason(matching_model: MatchingModel, document: Document, reason: str):
|
||||
class_name = type(matching_model).__name__
|
||||
name = (
|
||||
matching_model.name
|
||||
if hasattr(matching_model, "name")
|
||||
else matching_model.__str__()
|
||||
)
|
||||
logger.debug(
|
||||
f"{class_name} {matching_model.name} matched on document "
|
||||
f"{document} because {reason}",
|
||||
f"{class_name} {name} matched on document {document} because {reason}",
|
||||
)
|
||||
|
||||
|
||||
@@ -318,6 +322,15 @@ def document_matches_workflow(
|
||||
):
|
||||
# document is type Document
|
||||
|
||||
if (
|
||||
trigger.matching_algorithm > MatchingModel.MATCH_NONE
|
||||
and not matches(trigger, document)
|
||||
):
|
||||
log_match_failure(
|
||||
f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match",
|
||||
)
|
||||
trigger_matched = False
|
||||
|
||||
# Document has_tags vs document tags
|
||||
if (
|
||||
trigger.filter_has_tags.all().count() > 0
|
||||
|
||||
@@ -407,6 +407,29 @@ class Migration(migrations.Migration):
|
||||
verbose_name="filter documents from this mail rule",
|
||||
),
|
||||
),
|
||||
(
|
||||
"matching_algorithm",
|
||||
models.PositiveIntegerField(
|
||||
choices=[
|
||||
(0, "None"),
|
||||
(1, "Any word"),
|
||||
(2, "All words"),
|
||||
(3, "Exact match"),
|
||||
(4, "Regular expression"),
|
||||
(5, "Fuzzy word"),
|
||||
],
|
||||
default=0,
|
||||
verbose_name="matching algorithm",
|
||||
),
|
||||
),
|
||||
(
|
||||
"match",
|
||||
models.CharField(blank=True, max_length=256, verbose_name="match"),
|
||||
),
|
||||
(
|
||||
"is_insensitive",
|
||||
models.BooleanField(default=True, verbose_name="is insensitive"),
|
||||
),
|
||||
(
|
||||
"filter_has_tags",
|
||||
models.ManyToManyField(
|
||||
|
||||
@@ -889,6 +889,15 @@ if settings.AUDIT_LOG_ENABLED:
|
||||
|
||||
|
||||
class WorkflowTrigger(models.Model):
|
||||
class WorkflowTriggerMatching(models.IntegerChoices):
|
||||
# No auto matching
|
||||
NONE = MatchingModel.MATCH_NONE, _("None")
|
||||
ANY = MatchingModel.MATCH_ANY, _("Any word")
|
||||
ALL = MatchingModel.MATCH_ALL, _("All words")
|
||||
LITERAL = MatchingModel.MATCH_LITERAL, _("Exact match")
|
||||
REGEX = MatchingModel.MATCH_REGEX, _("Regular expression")
|
||||
FUZZY = MatchingModel.MATCH_FUZZY, _("Fuzzy word")
|
||||
|
||||
class WorkflowTriggerType(models.IntegerChoices):
|
||||
CONSUMPTION = 1, _("Consumption")
|
||||
DOCUMENT_ADDED = 2, _("Document Added")
|
||||
@@ -943,6 +952,16 @@ class WorkflowTrigger(models.Model):
|
||||
verbose_name=_("filter documents from this mail rule"),
|
||||
)
|
||||
|
||||
match = models.CharField(_("match"), max_length=256, blank=True)
|
||||
|
||||
matching_algorithm = models.PositiveIntegerField(
|
||||
_("matching algorithm"),
|
||||
choices=WorkflowTriggerMatching.choices,
|
||||
default=WorkflowTriggerMatching.NONE,
|
||||
)
|
||||
|
||||
is_insensitive = models.BooleanField(_("is insensitive"), default=True)
|
||||
|
||||
filter_has_tags = models.ManyToManyField(
|
||||
Tag,
|
||||
blank=True,
|
||||
|
||||
@@ -1302,6 +1302,9 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer):
|
||||
"filter_path",
|
||||
"filter_filename",
|
||||
"filter_mailrule",
|
||||
"matching_algorithm",
|
||||
"match",
|
||||
"is_insensitive",
|
||||
"filter_has_tags",
|
||||
"filter_has_correspondent",
|
||||
"filter_has_document_type",
|
||||
|
||||
@@ -16,6 +16,7 @@ from documents.models import Correspondent
|
||||
from documents.models import CustomField
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import Workflow
|
||||
@@ -742,6 +743,81 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
||||
expected_str = f"Document filename {doc.original_filename} does not match"
|
||||
self.assertIn(expected_str, cm.output[1])
|
||||
|
||||
def test_document_added_match_content_matching(self):
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
|
||||
matching_algorithm=MatchingModel.MATCH_LITERAL,
|
||||
match="foo",
|
||||
is_insensitive=True,
|
||||
)
|
||||
action = WorkflowAction.objects.create(
|
||||
assign_title="Doc content matching worked",
|
||||
assign_owner=self.user2,
|
||||
)
|
||||
w = Workflow.objects.create(
|
||||
name="Workflow 1",
|
||||
order=0,
|
||||
)
|
||||
w.triggers.add(trigger)
|
||||
w.actions.add(action)
|
||||
w.save()
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="sample test",
|
||||
correspondent=self.c,
|
||||
original_filename="sample.pdf",
|
||||
content="Hello world foo bar",
|
||||
)
|
||||
|
||||
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=doc,
|
||||
)
|
||||
expected_str = f"WorkflowTrigger {trigger} matched on document"
|
||||
expected_str2 = 'because it contains this string: "foo"'
|
||||
self.assertIn(expected_str, cm.output[0])
|
||||
self.assertIn(expected_str2, cm.output[0])
|
||||
expected_str = f"Document matched {trigger} from {w}"
|
||||
self.assertIn(expected_str, cm.output[1])
|
||||
|
||||
def test_document_added_no_match_content_matching(self):
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
|
||||
matching_algorithm=MatchingModel.MATCH_LITERAL,
|
||||
match="foo",
|
||||
is_insensitive=True,
|
||||
)
|
||||
action = WorkflowAction.objects.create(
|
||||
assign_title="Doc content matching worked",
|
||||
assign_owner=self.user2,
|
||||
)
|
||||
action.save()
|
||||
w = Workflow.objects.create(
|
||||
name="Workflow 1",
|
||||
order=0,
|
||||
)
|
||||
w.triggers.add(trigger)
|
||||
w.actions.add(action)
|
||||
w.save()
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="sample test",
|
||||
correspondent=self.c,
|
||||
original_filename="sample.pdf",
|
||||
content="Hello world bar",
|
||||
)
|
||||
|
||||
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=doc,
|
||||
)
|
||||
expected_str = f"Document did not match {w}"
|
||||
self.assertIn(expected_str, cm.output[0])
|
||||
expected_str = f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match"
|
||||
self.assertIn(expected_str, cm.output[1])
|
||||
|
||||
def test_document_added_no_match_tags(self):
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
|
||||
@@ -751,7 +827,6 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
||||
assign_title="Doc assign owner",
|
||||
assign_owner=self.user2,
|
||||
)
|
||||
action.save()
|
||||
w = Workflow.objects.create(
|
||||
name="Workflow 1",
|
||||
order=0,
|
||||
|
||||
Reference in New Issue
Block a user