diff --git a/src/documents/consumer.py b/src/documents/consumer.py index f79d3f9c3..8be67fc10 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -4,7 +4,6 @@ import os import tempfile from enum import Enum from pathlib import Path -from typing import TYPE_CHECKING import magic from django.conf import settings @@ -20,8 +19,8 @@ from documents.data_models import ConsumableDocument from documents.data_models import DocumentMetadataOverrides from documents.file_handling import create_source_path_directory from documents.file_handling import generate_unique_filename +from documents.file_handling import parse_doc_title_w_placeholders from documents.loggers import LoggingMixin -from documents.matching import document_matches_workflow from documents.models import Correspondent from documents.models import CustomField from documents.models import CustomFieldInstance @@ -30,8 +29,6 @@ from documents.models import DocumentType from documents.models import FileInfo from documents.models import StoragePath from documents.models import Tag -from documents.models import Workflow -from documents.models import WorkflowAction from documents.models import WorkflowTrigger from documents.parsers import DocumentParser from documents.parsers import ParseError @@ -46,6 +43,7 @@ from documents.plugins.helpers import ProgressManager from documents.plugins.helpers import ProgressStatusOptions from documents.signals import document_consumption_finished from documents.signals import document_consumption_started +from documents.signals.handlers import run_workflow from documents.utils import copy_basic_file_stats from documents.utils import copy_file_with_basic_stats from documents.utils import run_subprocess @@ -59,168 +57,16 @@ class WorkflowTriggerPlugin( ): NAME: str = "WorkflowTriggerPlugin" - def run(self) -> str | None: - """ - Get overrides from matching workflows - """ - msg = "" + def run(self): overrides = DocumentMetadataOverrides() - for workflow in ( - Workflow.objects.filter(enabled=True) - .prefetch_related("actions") - .prefetch_related("actions__assign_view_users") - .prefetch_related("actions__assign_view_groups") - .prefetch_related("actions__assign_change_users") - .prefetch_related("actions__assign_change_groups") - .prefetch_related("actions__assign_custom_fields") - .prefetch_related("actions__remove_tags") - .prefetch_related("actions__remove_correspondents") - .prefetch_related("actions__remove_document_types") - .prefetch_related("actions__remove_storage_paths") - .prefetch_related("actions__remove_custom_fields") - .prefetch_related("actions__remove_owners") - .prefetch_related("triggers") - .order_by("order") - ): - action_overrides = DocumentMetadataOverrides() - - if document_matches_workflow( - self.input_doc, - workflow, - WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, - ): - for action in workflow.actions.all(): - if TYPE_CHECKING: - assert isinstance(action, WorkflowAction) - msg += f"Applying {action} from {workflow}\n" - if action.type == WorkflowAction.WorkflowActionType.ASSIGNMENT: - if action.assign_title is not None: - action_overrides.title = action.assign_title - if action.assign_tags is not None: - action_overrides.tag_ids = list( - action.assign_tags.values_list("pk", flat=True), - ) - - if action.assign_correspondent is not None: - action_overrides.correspondent_id = ( - action.assign_correspondent.pk - ) - if action.assign_document_type is not None: - action_overrides.document_type_id = ( - action.assign_document_type.pk - ) - if action.assign_storage_path is not None: - action_overrides.storage_path_id = ( - action.assign_storage_path.pk - ) - if action.assign_owner is not None: - action_overrides.owner_id = action.assign_owner.pk - if action.assign_view_users is not None: - action_overrides.view_users = list( - action.assign_view_users.values_list("pk", flat=True), - ) - if action.assign_view_groups is not None: - action_overrides.view_groups = list( - action.assign_view_groups.values_list("pk", flat=True), - ) - if action.assign_change_users is not None: - action_overrides.change_users = list( - action.assign_change_users.values_list("pk", flat=True), - ) - if action.assign_change_groups is not None: - action_overrides.change_groups = list( - action.assign_change_groups.values_list( - "pk", - flat=True, - ), - ) - if action.assign_custom_fields is not None: - action_overrides.custom_field_ids = list( - action.assign_custom_fields.values_list( - "pk", - flat=True, - ), - ) - overrides.update(action_overrides) - elif action.type == WorkflowAction.WorkflowActionType.REMOVAL: - # Removal actions overwrite the current overrides - if action.remove_all_tags: - overrides.tag_ids = [] - elif overrides.tag_ids: - for tag in action.remove_custom_fields.filter( - pk__in=overrides.tag_ids, - ): - overrides.tag_ids.remove(tag.pk) - - if action.remove_all_correspondents or ( - overrides.correspondent_id is not None - and action.remove_correspondents.filter( - pk=overrides.correspondent_id, - ).exists() - ): - overrides.correspondent_id = None - - if action.remove_all_document_types or ( - overrides.document_type_id is not None - and action.remove_document_types.filter( - pk=overrides.document_type_id, - ).exists() - ): - overrides.document_type_id = None - - if action.remove_all_storage_paths or ( - overrides.storage_path_id is not None - and action.remove_storage_paths.filter( - pk=overrides.storage_path_id, - ).exists() - ): - overrides.storage_path_id = None - - if action.remove_all_custom_fields: - overrides.custom_field_ids = [] - elif overrides.custom_field_ids: - for field in action.remove_custom_fields.filter( - pk__in=overrides.custom_field_ids, - ): - overrides.custom_field_ids.remove(field.pk) - - if action.remove_all_owners or ( - overrides.owner_id is not None - and action.remove_owners.filter( - pk=overrides.owner_id, - ).exists() - ): - overrides.owner_id = None - - if action.remove_all_permissions: - overrides.view_users = [] - overrides.view_groups = [] - overrides.change_users = [] - overrides.change_groups = [] - else: - if overrides.view_users: - for user in action.remove_view_users.filter( - pk__in=overrides.view_users, - ): - overrides.view_users.remove(user.pk) - if overrides.change_users: - for user in action.remove_change_users.filter( - pk__in=overrides.change_users, - ): - overrides.change_users.remove(user.pk) - if overrides.view_groups: - for user in action.remove_view_groups.filter( - pk__in=overrides.view_groups, - ): - overrides.view_groups.remove(user.pk) - if overrides.change_groups: - for user in action.remove_change_groups.filter( - pk__in=overrides.change_groups, - ): - overrides.change_groups.remove(user.pk) + overrides = run_workflow( + WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, + self.input_doc, + "paperless.consumer", + overrides, + ) self.metadata.update(overrides) - return msg class ConsumerError(Exception): @@ -948,47 +794,3 @@ class ConsumerPlugin( copy_basic_file_stats(source, target) except Exception: # pragma: no cover pass - - -def parse_doc_title_w_placeholders( - title: str, - correspondent_name: str, - doc_type_name: str, - owner_username: str, - local_added: datetime.datetime, - original_filename: str, - created: datetime.datetime | None = None, -) -> str: - """ - Available title placeholders for Workflows depend on what has already been assigned, - e.g. for pre-consumption triggers created will not have been parsed yet, but it will - for added / updated triggers - """ - formatting = { - "correspondent": correspondent_name, - "document_type": doc_type_name, - "added": local_added.isoformat(), - "added_year": local_added.strftime("%Y"), - "added_year_short": local_added.strftime("%y"), - "added_month": local_added.strftime("%m"), - "added_month_name": local_added.strftime("%B"), - "added_month_name_short": local_added.strftime("%b"), - "added_day": local_added.strftime("%d"), - "added_time": local_added.strftime("%H:%M"), - "owner_username": owner_username, - "original_filename": Path(original_filename).stem, - } - if created is not None: - formatting.update( - { - "created": created.isoformat(), - "created_year": created.strftime("%Y"), - "created_year_short": created.strftime("%y"), - "created_month": created.strftime("%m"), - "created_month_name": created.strftime("%B"), - "created_month_name_short": created.strftime("%b"), - "created_day": created.strftime("%d"), - "created_time": created.strftime("%H:%M"), - }, - ) - return title.format(**formatting).strip() diff --git a/src/documents/file_handling.py b/src/documents/file_handling.py index 6d02bf684..73e2299bd 100644 --- a/src/documents/file_handling.py +++ b/src/documents/file_handling.py @@ -1,4 +1,6 @@ +import datetime import os +from pathlib import Path from django.conf import settings @@ -146,3 +148,47 @@ def generate_filename( filename += ".gpg" return filename + + +def parse_doc_title_w_placeholders( + title: str, + correspondent_name: str, + doc_type_name: str, + owner_username: str, + local_added: datetime.datetime, + original_filename: str, + created: datetime.datetime | None = None, +) -> str: + """ + Available title placeholders for Workflows depend on what has already been assigned, + e.g. for pre-consumption triggers created will not have been parsed yet, but it will + for added / updated triggers + """ + formatting = { + "correspondent": correspondent_name, + "document_type": doc_type_name, + "added": local_added.isoformat(), + "added_year": local_added.strftime("%Y"), + "added_year_short": local_added.strftime("%y"), + "added_month": local_added.strftime("%m"), + "added_month_name": local_added.strftime("%B"), + "added_month_name_short": local_added.strftime("%b"), + "added_day": local_added.strftime("%d"), + "added_time": local_added.strftime("%H:%M"), + "owner_username": owner_username, + "original_filename": Path(original_filename).stem, + } + if created is not None: + formatting.update( + { + "created": created.isoformat(), + "created_year": created.strftime("%Y"), + "created_year_short": created.strftime("%y"), + "created_month": created.strftime("%m"), + "created_month_name": created.strftime("%B"), + "created_month_name_short": created.strftime("%b"), + "created_day": created.strftime("%d"), + "created_time": created.strftime("%H:%M"), + }, + ) + return title.format(**formatting).strip() diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index cf6733dd5..b2d433533 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -24,10 +24,12 @@ from guardian.shortcuts import remove_perm from documents import matching from documents.caching import clear_document_caches from documents.classifier import DocumentClassifier -from documents.consumer import parse_doc_title_w_placeholders +from documents.data_models import ConsumableDocument +from documents.data_models import DocumentMetadataOverrides from documents.file_handling import create_source_path_directory from documents.file_handling import delete_empty_directories from documents.file_handling import generate_unique_filename +from documents.file_handling import parse_doc_title_w_placeholders from documents.models import CustomFieldInstance from documents.models import Document from documents.models import MatchingModel @@ -529,55 +531,79 @@ def run_workflow_updated(sender, document: Document, logging_group=None, **kwarg def run_workflow( trigger_type: WorkflowTrigger.WorkflowTriggerType, - document: Document, + document: Document | ConsumableDocument, logging_group=None, -): - def assignment_action(): + overrides: DocumentMetadataOverrides = None, +) -> None | DocumentMetadataOverrides: + """ + Run the workflow for the given document and trigger type. + If overrides is provided, the document will not be saved, and an updated DocumentMetadataOverrides object will be returned. + """ + + def assignment_action( + document: Document, + overrides: DocumentMetadataOverrides | None = None, + ): if action.assign_tags.all().count() > 0: doc_tag_ids.extend( list(action.assign_tags.all().values_list("pk", flat=True)), ) if action.assign_correspondent is not None: - document.correspondent = action.assign_correspondent + if overrides is not None: + overrides.correspondent_id = action.assign_correspondent.pk + else: + document.correspondent = action.assign_correspondent if action.assign_document_type is not None: - document.document_type = action.assign_document_type + if overrides is not None: + overrides.document_type_id = action.assign_document_type.pk + else: + document.document_type = action.assign_document_type if action.assign_storage_path is not None: - document.storage_path = action.assign_storage_path + if overrides is not None: + overrides.storage_path_id = action.assign_storage_path.pk + else: + document.storage_path = action.assign_storage_path if action.assign_owner is not None: - document.owner = action.assign_owner + if overrides is not None: + overrides.owner_id = action.assign_owner.pk + else: + document.owner = action.assign_owner if action.assign_title is not None: - try: - document.title = parse_doc_title_w_placeholders( - action.assign_title, - ( - document.correspondent.name - if document.correspondent is not None - else "" - ), - ( - document.document_type.name - if document.document_type is not None - else "" - ), - (document.owner.username if document.owner is not None else ""), - timezone.localtime(document.added), - ( - document.original_filename - if document.original_filename is not None - else "" - ), - timezone.localtime(document.created), - ) - except Exception: - logger.exception( - f"Error occurred parsing title assignment '{action.assign_title}', falling back to original", - extra={"group": logging_group}, - ) + if overrides is not None: + overrides.title = action.assign_title + else: + try: + document.title = parse_doc_title_w_placeholders( + action.assign_title, + ( + document.correspondent.name + if document.correspondent is not None + else "" + ), + ( + document.document_type.name + if document.document_type is not None + else "" + ), + (document.owner.username if document.owner is not None else ""), + timezone.localtime(document.added), + ( + document.original_filename + if document.original_filename is not None + else "" + ), + timezone.localtime(document.created), + ) + except Exception: + logger.exception( + f"Error occurred parsing title assignment '{action.assign_title}', falling back to original", + extra={"group": logging_group}, + ) if ( ( @@ -619,28 +645,42 @@ def run_workflow( or [], }, } - set_permissions_for_object( - permissions=permissions, - object=document, - merge=True, - ) + if overrides is not None: + overrides.view_users = permissions["view"]["users"] + overrides.view_groups = permissions["view"]["groups"] + overrides.change_users = permissions["change"]["users"] + overrides.change_groups = permissions["change"]["groups"] + else: + set_permissions_for_object( + permissions=permissions, + object=document, + merge=True, + ) if action.assign_custom_fields is not None: - for field in action.assign_custom_fields.all(): - if ( - CustomFieldInstance.objects.filter( - field=field, - document=document, - ).count() - == 0 - ): - # can be triggered on existing docs, so only add the field if it doesn't already exist - CustomFieldInstance.objects.create( - field=field, - document=document, - ) + if overrides is not None: + overrides.custom_field_ids = list( + action.assign_custom_fields.all().values_list("pk", flat=True), + ) + else: + for field in action.assign_custom_fields.all(): + if ( + CustomFieldInstance.objects.filter( + field=field, + document=document, + ).count() + == 0 + ): + # can be triggered on existing docs, so only add the field if it doesn't already exist + CustomFieldInstance.objects.create( + field=field, + document=document, + ) - def removal_action(): + def removal_action( + document: Document, + overrides: DocumentMetadataOverrides | None = None, + ): if action.remove_all_tags: doc_tag_ids.clear() else: @@ -649,41 +689,72 @@ def run_workflow( ).all(): doc_tag_ids.remove(tag.pk) + corresspondent_id = ( + document.correspondent.pk + if (overrides is None and document.correspondent) + else overrides.correspondent_id + ) if action.remove_all_correspondents or ( - document.correspondent + corresspondent_id and ( action.remove_correspondents.filter( - pk=document.correspondent.pk, + pk=corresspondent_id, ).exists() ) ): - document.correspondent = None + if overrides is not None: + overrides.correspondent_id = None + else: + document.correspondent = None + document_type_id = ( + document.document_type.pk + if (overrides is None and document.document_type) + else overrides.document_type_id + ) if action.remove_all_document_types or ( - document.document_type + document_type_id and ( action.remove_document_types.filter( - pk=document.document_type.pk, + pk=document_type_id, ).exists() ) ): - document.document_type = None + if overrides is not None: + overrides.document_type_id = None + else: + document.document_type = None + storage_path_id = ( + document.storage_path.pk + if (overrides is None and document.storage_path) + else overrides.storage_path_id + ) if action.remove_all_storage_paths or ( - document.storage_path + storage_path_id and ( action.remove_storage_paths.filter( - pk=document.storage_path.pk, + pk=storage_path_id, ).exists() ) ): - document.storage_path = None + if overrides is not None: + overrides.storage_path_id = None + else: + document.storage_path = None + owner_id = ( + document.owner.pk + if (overrides is None and document.owner) + else overrides.owner_id + ) if action.remove_all_owners or ( - document.owner - and (action.remove_owners.filter(pk=document.owner.pk).exists()) + owner_id and (action.remove_owners.filter(pk=owner_id).exists()) ): - document.owner = None + if overrides is not None: + overrides.owner_id = None + else: + document.owner = None if action.remove_all_permissions: permissions = { @@ -696,29 +767,72 @@ def run_workflow( "groups": [], }, } - set_permissions_for_object( - permissions=permissions, - object=document, - merge=False, - ) + if overrides is not None: + overrides.view_users = [] + overrides.view_groups = [] + overrides.change_users = [] + overrides.change_groups = [] + else: + set_permissions_for_object( + permissions=permissions, + object=document, + merge=False, + ) elif ( (action.remove_view_users.all().count() > 0) or (action.remove_view_groups.all().count() > 0) or (action.remove_change_users.all().count() > 0) or (action.remove_change_groups.all().count() > 0) ): - for user in action.remove_view_users.all(): - remove_perm("view_document", user, document) - for user in action.remove_change_users.all(): - remove_perm("change_document", user, document) - for group in action.remove_view_groups.all(): - remove_perm("view_document", group, document) - for group in action.remove_change_groups.all(): - remove_perm("change_document", group, document) + if overrides is not None: + if ( + action.remove_view_users.all().count() > 0 + and overrides.view_users is not None + ): + overrides.view_users.remove( + *action.remove_view_users.all().values_list("id", flat=True), + ) + if ( + action.remove_view_groups.all().count() > 0 + and overrides.view_groups is not None + ): + overrides.view_groups.remove( + *action.remove_view_groups.all().values_list("id", flat=True), + ) + if ( + action.remove_change_users.all().count() > 0 + and overrides.change_users is not None + ): + overrides.change_users.remove( + *action.remove_change_users.all().values_list("id", flat=True), + ) + if ( + action.remove_change_groups.all().count() > 0 + and overrides.change_groups is not None + ): + overrides.change_groups.remove( + *action.remove_change_groups.all().values_list("id", flat=True), + ) + else: + for user in action.remove_view_users.all(): + remove_perm("view_document", user, document) + for user in action.remove_change_users.all(): + remove_perm("change_document", user, document) + for group in action.remove_view_groups.all(): + remove_perm("view_document", group, document) + for group in action.remove_change_groups.all(): + remove_perm("change_document", group, document) if action.remove_all_custom_fields: - CustomFieldInstance.objects.filter(document=document).delete() + if overrides is not None: + overrides.custom_field_ids = [] + else: + CustomFieldInstance.objects.filter(document=document).delete() elif action.remove_custom_fields.all().count() > 0: + if overrides is not None and overrides.custom_field_ids is not None: + overrides.custom_field_ids.remove( + *action.remove_custom_fields.all().values_list("id", flat=True), + ) CustomFieldInstance.objects.filter( field__in=action.remove_custom_fields.all(), document=document, @@ -744,11 +858,14 @@ def run_workflow( .prefetch_related("triggers") .order_by("order") ): - # This can be called from bulk_update_documents, which may be running multiple times - # Refresh this so the matching data is fresh and instance fields are re-freshed - # Otherwise, this instance might be behind and overwrite the work another process did - document.refresh_from_db() - doc_tag_ids = list(document.tags.all().values_list("pk", flat=True)) + if overrides is None: + # This can be called from bulk_update_documents, which may be running multiple times + # Refresh this so the matching data is fresh and instance fields are re-freshed + # Otherwise, this instance might be behind and overwrite the work another process did + document.refresh_from_db() + doc_tag_ids = list(document.tags.all().values_list("pk", flat=True)) + else: + doc_tag_ids = overrides.tag_ids or [] if matching.document_matches_workflow( document, workflow, @@ -762,14 +879,17 @@ def run_workflow( ) if action.type == WorkflowAction.WorkflowActionType.ASSIGNMENT: - assignment_action() + assignment_action(document, overrides) elif action.type == WorkflowAction.WorkflowActionType.REMOVAL: - removal_action() - - # save first before setting tags - document.save() - document.tags.set(doc_tag_ids) + removal_action(document, overrides) + if overrides is not None: + overrides.tag_ids = doc_tag_ids + return overrides + else: + # save first before setting tags + document.save() + document.tags.set(doc_tag_ids) @before_task_publish.connect