From 84199a6894be8885f142aa5812c3557a4ec482e5 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Mon, 25 Dec 2023 23:41:46 -0800 Subject: [PATCH] Initial implementation of document added trigger --- src/documents/apps.py | 2 + src/documents/consumer.py | 70 +++++++++++----- src/documents/matching.py | 110 +++++++++++++++----------- src/documents/signals/handlers.py | 75 ++++++++++++++++++ src/documents/tests/test_workflows.py | 89 +++++++++++++++++---- 5 files changed, 267 insertions(+), 79 deletions(-) diff --git a/src/documents/apps.py b/src/documents/apps.py index d681b9a87..3683e3fbc 100644 --- a/src/documents/apps.py +++ b/src/documents/apps.py @@ -11,6 +11,7 @@ class DocumentsConfig(AppConfig): from documents.signals import document_consumption_finished from documents.signals.handlers import add_inbox_tags from documents.signals.handlers import add_to_index + from documents.signals.handlers import run_workflows from documents.signals.handlers import set_correspondent from documents.signals.handlers import set_document_type from documents.signals.handlers import set_log_entry @@ -24,5 +25,6 @@ class DocumentsConfig(AppConfig): document_consumption_finished.connect(set_storage_path) document_consumption_finished.connect(set_log_entry) document_consumption_finished.connect(add_to_index) + document_consumption_finished.connect(run_workflows) AppConfig.ready(self) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index b0da455ec..a88716f79 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -666,10 +666,6 @@ class Consumer(LoggingMixin): return overrides def _parse_title_placeholders(self, title: str) -> str: - """ - Consumption template title placeholders can only include items that are - assigned as part of this template (since auto-matching hasnt happened yet) - """ local_added = timezone.localtime(timezone.now()) correspondent_name = ( @@ -688,20 +684,14 @@ class Consumer(LoggingMixin): else None ) - return title.format( - correspondent=correspondent_name, - document_type=doc_type_name, - added=local_added.isoformat(), - added_year=local_added.strftime("%Y"), - added_year_short=local_added.strftime("%y"), - added_month=local_added.strftime("%m"), - added_month_name=local_added.strftime("%B"), - added_month_name_short=local_added.strftime("%b"), - added_day=local_added.strftime("%d"), - owner_username=owner_username, - original_filename=Path(self.filename).stem, - added_time=local_added.strftime("%H:%M"), - ).strip() + return parse_doc_title_w_placeholders( + title, + correspondent_name, + doc_type_name, + owner_username, + local_added, + self.filename, + ) def _store( self, @@ -854,3 +844,47 @@ class Consumer(LoggingMixin): self.log.warning("Script stderr:") for line in stderr_str: self.log.warning(line) + + +def parse_doc_title_w_placeholders( + title: str, + correspondent_name: str, + doc_type_name: str, + owner_username: str, + local_added: datetime.datetime, + original_filename: str, + created: Optional[datetime.datetime] = None, +) -> str: + """ + Title placeholders for Workflows using Consumption triggers can only include + items that are assigned as part of this template (since auto-matching hasnt + happened yet) + """ + formatting = { + "correspondent": correspondent_name, + "document_type": doc_type_name, + "added": local_added.isoformat(), + "added_year": local_added.strftime("%Y"), + "added_year_short": local_added.strftime("%y"), + "added_month": local_added.strftime("%m"), + "added_month_name": local_added.strftime("%B"), + "added_month_name_short": local_added.strftime("%b"), + "added_day": local_added.strftime("%d"), + "added_time": local_added.strftime("%H:%M"), + "owner_username": owner_username, + "original_filename": Path(original_filename).stem, + } + if created is not None: + formatting.update( + { + "created": created.isoformat(), + "created_year": created.strftime("%Y"), + "created_year_short": created.strftime("%y"), + "created_month": created.strftime("%m"), + "created_month_name": created.strftime("%B"), + "created_month_name_short": created.strftime("%b"), + "created_day": created.strftime("%d"), + "created_time": created.strftime("%H:%M"), + }, + ) + return title.format(**formatting).strip() diff --git a/src/documents/matching.py b/src/documents/matching.py index 2033b98a8..045e2ba79 100644 --- a/src/documents/matching.py +++ b/src/documents/matching.py @@ -239,7 +239,7 @@ def _split_match(matching_model): def document_matches_workflow( - document: ConsumableDocument, + document: ConsumableDocument | Document, workflow: Workflow, trigger_type: WorkflowTrigger.WorkflowTriggerType, ) -> bool: @@ -258,52 +258,74 @@ def document_matches_workflow( trigger_matched = False else: for trigger in triggers: - # Document source vs template source - if document.source not in [int(x) for x in list(trigger.sources)]: - log_match_failure( - f"Document source {document.source.name} not in" - f" {[DocumentSource(int(x)).name for x in trigger.sources]}", - ) - trigger_matched = False + if trigger_type is WorkflowTrigger.WorkflowTriggerType.CONSUMPTION: + # document is type ConsumableDocument - # Document mail rule vs template mail rule - if ( - document.mailrule_id is not None - and trigger.filter_mailrule is not None - and document.mailrule_id != trigger.filter_mailrule.pk - ): - log_match_failure( - f"Document mail rule {document.mailrule_id}" - f" != {trigger.filter_mailrule.pk}", - ) - trigger_matched = False + # Document source vs template source + if document.source not in [int(x) for x in list(trigger.sources)]: + log_match_failure( + f"Document source {document.source.name} not in" + f" {[DocumentSource(int(x)).name for x in trigger.sources]}", + ) + trigger_matched = False - # Document filename vs template filename - if ( - trigger.filter_filename is not None - and len(trigger.filter_filename) > 0 - and not fnmatch( - document.original_file.name.lower(), - trigger.filter_filename.lower(), - ) - ): - log_match_failure( - f"Document filename {document.original_file.name} does not match" - f" {trigger.filter_filename.lower()}", - ) - trigger_matched = False + # Document mail rule vs template mail rule + if ( + document.mailrule_id is not None + and trigger.filter_mailrule is not None + and document.mailrule_id != trigger.filter_mailrule.pk + ): + log_match_failure( + f"Document mail rule {document.mailrule_id}" + f" != {trigger.filter_mailrule.pk}", + ) + trigger_matched = False - # Document path vs template path - if ( - trigger.filter_path is not None - and len(trigger.filter_path) > 0 - and not document.original_file.match(trigger.filter_path) - ): - log_match_failure( - f"Document path {document.original_file}" - f" does not match {trigger.filter_path}", - ) - trigger_matched = False + # Document filename vs template filename + if ( + trigger.filter_filename is not None + and len(trigger.filter_filename) > 0 + and not fnmatch( + document.original_file.name.lower(), + trigger.filter_filename.lower(), + ) + ): + log_match_failure( + f"Document filename {document.original_file.name} does not match" + f" {trigger.filter_filename.lower()}", + ) + trigger_matched = False + + # Document path vs template path + if ( + trigger.filter_path is not None + and len(trigger.filter_path) > 0 + and not document.original_file.match(trigger.filter_path) + ): + log_match_failure( + f"Document path {document.original_file}" + f" does not match {trigger.filter_path}", + ) + trigger_matched = False + + elif trigger_type is WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED: + # document is type Document + + # Document filename vs template filename + if ( + trigger.filter_filename is not None + and len(trigger.filter_filename) > 0 + and document.original_filename is not None + and not fnmatch( + document.original_filename.lower(), + trigger.filter_filename.lower(), + ) + ): + log_match_failure( + f"Document filename {document.original_filename} does not match" + f" {trigger.filter_filename.lower()}", + ) + trigger_matched = False if trigger_matched: logger.info(f"Document matched {trigger} from {workflow}") diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 117e3c38d..502626aa5 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -24,14 +24,19 @@ from filelock import FileLock from documents import matching from documents.classifier import DocumentClassifier +from documents.consumer import parse_doc_title_w_placeholders from documents.file_handling import create_source_path_directory from documents.file_handling import delete_empty_directories from documents.file_handling import generate_unique_filename +from documents.models import CustomFieldInstance from documents.models import Document from documents.models import MatchingModel from documents.models import PaperlessTask from documents.models import Tag +from documents.models import Workflow +from documents.models import WorkflowTrigger from documents.permissions import get_objects_for_user_owner_aware +from documents.permissions import set_permissions_for_object logger = logging.getLogger("paperless.handlers") @@ -514,6 +519,76 @@ def add_to_index(sender, document, **kwargs): index.add_or_update_document(document) +def run_workflows(sender, document: Document, logging_group=None, **kwargs): + for workflow in Workflow.objects.filter( + triggers__type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED, + ).order_by("order"): + if matching.document_matches_workflow( + document, + workflow, + WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED, + ): + for action in workflow.actions.all(): + if action.assign_tags.all().count() > 0: + document.tags.add(*action.assign_tags.all()) + + if action.assign_correspondent is not None: + document.correspondent = action.assign_correspondent + + if action.assign_document_type is not None: + document.document_type = action.assign_document_type + + if action.assign_storage_path is not None: + document.storage_path = action.assign_storage_path + + if action.assign_owner is not None: + document.owner = action.assign_owner + + if action.assign_title is not None: + document.title = parse_doc_title_w_placeholders( + action.assign_title, + document.correspondent.name, + document.document_type.name, + document.owner.username, + document.added, + document.original_filename, + document.created, + ) + + if ( + action.assign_view_users is not None + or action.assign_view_groups is not None + or action.assign_change_users is not None + or action.assign_change_groups is not None + ): + permissions = { + "view": { + "users": action.assign_view_users.all().values_list("id") + or [], + "groups": action.assign_view_groups.all().values_list("id") + or [], + }, + "change": { + "users": action.assign_change_users.all().values_list("id") + or [], + "groups": action.assign_change_groups.all().values_list( + "id", + ) + or [], + }, + } + set_permissions_for_object(permissions=permissions, object=document) + + if action.assign_custom_fields is not None: + for field in action.assign_custom_fields.all(): + CustomFieldInstance.objects.create( + field=field, + document=document, + ) # adds to document + + document.save() + + @before_task_publish.connect def before_task_publish_handler(sender=None, headers=None, body=None, **kwargs): """ diff --git a/src/documents/tests/test_workflows.py b/src/documents/tests/test_workflows.py index d4a4d3761..bd6bc3299 100644 --- a/src/documents/tests/test_workflows.py +++ b/src/documents/tests/test_workflows.py @@ -1,3 +1,4 @@ +from datetime import timedelta from pathlib import Path from unittest import TestCase from unittest import mock @@ -5,18 +6,21 @@ from unittest import mock import pytest from django.contrib.auth.models import Group from django.contrib.auth.models import User +from django.utils import timezone from documents import tasks from documents.data_models import ConsumableDocument from documents.data_models import DocumentSource from documents.models import Correspondent from documents.models import CustomField +from documents.models import Document from documents.models import DocumentType from documents.models import StoragePath from documents.models import Tag from documents.models import Workflow from documents.models import WorkflowAction from documents.models import WorkflowTrigger +from documents.signals import document_consumption_finished from documents.tests.utils import DirectoriesMixin from documents.tests.utils import FileSystemAssertsMixin from paperless_mail.models import MailAccount @@ -567,32 +571,35 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertIn(expected_str, cm.output[1]) @mock.patch("documents.consumer.Consumer.try_consume_file") - def test_consumption_template_repeat_custom_fields(self, m): + def test_workflow_repeat_custom_fields(self, m): """ GIVEN: - - Existing consumption templates which assign the same custom field + - Existing workflows which assign the same custom field WHEN: - File that matches is consumed THEN: - Custom field is added the first time successfully """ - ct = ConsumptionTemplate.objects.create( - name="Template 1", - order=0, + trigger = WorkflowTrigger.objects.create( + type=WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, sources=f"{DocumentSource.ApiUpload},{DocumentSource.ConsumeFolder},{DocumentSource.MailFetch}", filter_filename="*simple*", ) - ct.assign_custom_fields.add(self.cf1.pk) - ct.save() + action1 = WorkflowAction.objects.create() + action1.assign_custom_fields.add(self.cf1.pk) + action1.save() - ct2 = ConsumptionTemplate.objects.create( - name="Template 2", - order=1, - sources=f"{DocumentSource.ApiUpload},{DocumentSource.ConsumeFolder},{DocumentSource.MailFetch}", - filter_filename="*simple*", + action2 = WorkflowAction.objects.create() + action2.assign_custom_fields.add(self.cf1.pk) + action2.save() + + w = Workflow.objects.create( + name="Workflow 1", + order=0, ) - ct2.assign_custom_fields.add(self.cf1.pk) - ct2.save() + w.triggers.add(trigger) + w.actions.add(action1, action2) + w.save() test_file = self.SAMPLE_DIR / "simple.pdf" @@ -612,7 +619,55 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, TestCase): [self.cf1.pk], ) - expected_str = f"Document matched template {ct}" + expected_str = f"Document matched {trigger} from {w}" self.assertIn(expected_str, cm.output[0]) - expected_str = f"Document matched template {ct2}" - self.assertIn(expected_str, cm.output[1]) + + + def test_document_added_workflow(self): + trigger = WorkflowTrigger.objects.create( + type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED, + sources=f"{DocumentSource.ApiUpload},{DocumentSource.ConsumeFolder},{DocumentSource.MailFetch}", + filter_filename="*sample*", + ) + action = WorkflowAction.objects.create( + assign_title="Doc created in {created_year}", + assign_correspondent=self.c2, + assign_document_type=self.dt, + assign_storage_path=self.sp, + assign_owner=self.user2, + ) + action.assign_tags.add(self.t1) + action.assign_tags.add(self.t2) + action.assign_tags.add(self.t3) + action.assign_view_users.add(self.user3.pk) + action.assign_view_groups.add(self.group1.pk) + action.assign_change_users.add(self.user3.pk) + action.assign_change_groups.add(self.group1.pk) + action.assign_custom_fields.add(self.cf1.pk) + action.assign_custom_fields.add(self.cf2.pk) + action.save() + w = Workflow.objects.create( + name="Workflow 1", + order=0, + ) + w.triggers.add(trigger) + w.actions.add(action) + w.save() + + now = timezone.localtime(timezone.now()) + created = now - timedelta(weeks=520) + doc = Document.objects.create( + title="sample test", + correspondent=self.c, + original_filename="sample.pdf", + added=now, + created=created, + ) + + document_consumption_finished.send( + sender=self.__class__, + document=doc, + ) + + self.assertEqual(doc.correspondent, self.c2) + self.assertEqual(doc.title, f"Doc created in {created.year}")