From 1deacb17c6dab2ede72b12b2ea0fa3156104b0cf Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue, 8 Oct 2024 21:43:46 -0700 Subject: [PATCH] Ok a bit closer --- src/documents/apps.py | 8 +- src/documents/consumer.py | 214 +--------------- src/documents/signals/handlers.py | 364 +++++++++++++++++++-------- src/documents/templating/filepath.py | 45 ++++ 4 files changed, 312 insertions(+), 319 deletions(-) diff --git a/src/documents/apps.py b/src/documents/apps.py index 7ed006d06..c00b23ff2 100644 --- a/src/documents/apps.py +++ b/src/documents/apps.py @@ -12,8 +12,8 @@ class DocumentsConfig(AppConfig): from documents.signals import document_updated from documents.signals.handlers import add_inbox_tags from documents.signals.handlers import add_to_index - from documents.signals.handlers import run_workflow_added - from documents.signals.handlers import run_workflow_updated + from documents.signals.handlers import run_workflows_added + from documents.signals.handlers import run_workflows_updated from documents.signals.handlers import set_correspondent from documents.signals.handlers import set_document_type from documents.signals.handlers import set_log_entry @@ -27,7 +27,7 @@ class DocumentsConfig(AppConfig): document_consumption_finished.connect(set_storage_path) document_consumption_finished.connect(set_log_entry) document_consumption_finished.connect(add_to_index) - document_consumption_finished.connect(run_workflow_added) - document_updated.connect(run_workflow_updated) + document_consumption_finished.connect(run_workflows_added) + document_updated.connect(run_workflows_updated) AppConfig.ready(self) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index f79d3f9c3..61c8a4d81 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -4,7 +4,6 @@ import os import tempfile from enum import Enum from pathlib import Path -from typing import TYPE_CHECKING import magic from django.conf import settings @@ -21,7 +20,6 @@ from documents.data_models import DocumentMetadataOverrides from documents.file_handling import create_source_path_directory from documents.file_handling import generate_unique_filename from documents.loggers import LoggingMixin -from documents.matching import document_matches_workflow from documents.models import Correspondent from documents.models import CustomField from documents.models import CustomFieldInstance @@ -30,8 +28,6 @@ from documents.models import DocumentType from documents.models import FileInfo from documents.models import StoragePath from documents.models import Tag -from documents.models import Workflow -from documents.models import WorkflowAction from documents.models import WorkflowTrigger from documents.parsers import DocumentParser from documents.parsers import ParseError @@ -46,6 +42,8 @@ from documents.plugins.helpers import ProgressManager from documents.plugins.helpers import ProgressStatusOptions from documents.signals import document_consumption_finished from documents.signals import document_consumption_started +from documents.signals.handlers import run_workflows +from documents.templating.filepath import parse_doc_title_w_placeholders from documents.utils import copy_basic_file_stats from documents.utils import copy_file_with_basic_stats from documents.utils import run_subprocess @@ -64,162 +62,14 @@ class WorkflowTriggerPlugin( Get overrides from matching workflows """ msg = "" - overrides = DocumentMetadataOverrides() - for workflow in ( - Workflow.objects.filter(enabled=True) - .prefetch_related("actions") - .prefetch_related("actions__assign_view_users") - .prefetch_related("actions__assign_view_groups") - .prefetch_related("actions__assign_change_users") - .prefetch_related("actions__assign_change_groups") - .prefetch_related("actions__assign_custom_fields") - .prefetch_related("actions__remove_tags") - .prefetch_related("actions__remove_correspondents") - .prefetch_related("actions__remove_document_types") - .prefetch_related("actions__remove_storage_paths") - .prefetch_related("actions__remove_custom_fields") - .prefetch_related("actions__remove_owners") - .prefetch_related("triggers") - .order_by("order") - ): - action_overrides = DocumentMetadataOverrides() - - if document_matches_workflow( - self.input_doc, - workflow, - WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, - ): - for action in workflow.actions.all(): - if TYPE_CHECKING: - assert isinstance(action, WorkflowAction) - msg += f"Applying {action} from {workflow}\n" - if action.type == WorkflowAction.WorkflowActionType.ASSIGNMENT: - if action.assign_title is not None: - action_overrides.title = action.assign_title - if action.assign_tags is not None: - action_overrides.tag_ids = list( - action.assign_tags.values_list("pk", flat=True), - ) - - if action.assign_correspondent is not None: - action_overrides.correspondent_id = ( - action.assign_correspondent.pk - ) - if action.assign_document_type is not None: - action_overrides.document_type_id = ( - action.assign_document_type.pk - ) - if action.assign_storage_path is not None: - action_overrides.storage_path_id = ( - action.assign_storage_path.pk - ) - if action.assign_owner is not None: - action_overrides.owner_id = action.assign_owner.pk - if action.assign_view_users is not None: - action_overrides.view_users = list( - action.assign_view_users.values_list("pk", flat=True), - ) - if action.assign_view_groups is not None: - action_overrides.view_groups = list( - action.assign_view_groups.values_list("pk", flat=True), - ) - if action.assign_change_users is not None: - action_overrides.change_users = list( - action.assign_change_users.values_list("pk", flat=True), - ) - if action.assign_change_groups is not None: - action_overrides.change_groups = list( - action.assign_change_groups.values_list( - "pk", - flat=True, - ), - ) - if action.assign_custom_fields is not None: - action_overrides.custom_field_ids = list( - action.assign_custom_fields.values_list( - "pk", - flat=True, - ), - ) - overrides.update(action_overrides) - elif action.type == WorkflowAction.WorkflowActionType.REMOVAL: - # Removal actions overwrite the current overrides - if action.remove_all_tags: - overrides.tag_ids = [] - elif overrides.tag_ids: - for tag in action.remove_custom_fields.filter( - pk__in=overrides.tag_ids, - ): - overrides.tag_ids.remove(tag.pk) - - if action.remove_all_correspondents or ( - overrides.correspondent_id is not None - and action.remove_correspondents.filter( - pk=overrides.correspondent_id, - ).exists() - ): - overrides.correspondent_id = None - - if action.remove_all_document_types or ( - overrides.document_type_id is not None - and action.remove_document_types.filter( - pk=overrides.document_type_id, - ).exists() - ): - overrides.document_type_id = None - - if action.remove_all_storage_paths or ( - overrides.storage_path_id is not None - and action.remove_storage_paths.filter( - pk=overrides.storage_path_id, - ).exists() - ): - overrides.storage_path_id = None - - if action.remove_all_custom_fields: - overrides.custom_field_ids = [] - elif overrides.custom_field_ids: - for field in action.remove_custom_fields.filter( - pk__in=overrides.custom_field_ids, - ): - overrides.custom_field_ids.remove(field.pk) - - if action.remove_all_owners or ( - overrides.owner_id is not None - and action.remove_owners.filter( - pk=overrides.owner_id, - ).exists() - ): - overrides.owner_id = None - - if action.remove_all_permissions: - overrides.view_users = [] - overrides.view_groups = [] - overrides.change_users = [] - overrides.change_groups = [] - else: - if overrides.view_users: - for user in action.remove_view_users.filter( - pk__in=overrides.view_users, - ): - overrides.view_users.remove(user.pk) - if overrides.change_users: - for user in action.remove_change_users.filter( - pk__in=overrides.change_users, - ): - overrides.change_users.remove(user.pk) - if overrides.view_groups: - for user in action.remove_view_groups.filter( - pk__in=overrides.view_groups, - ): - overrides.view_groups.remove(user.pk) - if overrides.change_groups: - for user in action.remove_change_groups.filter( - pk__in=overrides.change_groups, - ): - overrides.change_groups.remove(user.pk) - - self.metadata.update(overrides) + overrides = run_workflows( + WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, + self.input_doc, + "paperless_consumer", + DocumentMetadataOverrides(), + ) + if overrides: + self.metadata.update(overrides) return msg @@ -948,47 +798,3 @@ class ConsumerPlugin( copy_basic_file_stats(source, target) except Exception: # pragma: no cover pass - - -def parse_doc_title_w_placeholders( - title: str, - correspondent_name: str, - doc_type_name: str, - owner_username: str, - local_added: datetime.datetime, - original_filename: str, - created: datetime.datetime | None = None, -) -> str: - """ - Available title placeholders for Workflows depend on what has already been assigned, - e.g. for pre-consumption triggers created will not have been parsed yet, but it will - for added / updated triggers - """ - formatting = { - "correspondent": correspondent_name, - "document_type": doc_type_name, - "added": local_added.isoformat(), - "added_year": local_added.strftime("%Y"), - "added_year_short": local_added.strftime("%y"), - "added_month": local_added.strftime("%m"), - "added_month_name": local_added.strftime("%B"), - "added_month_name_short": local_added.strftime("%b"), - "added_day": local_added.strftime("%d"), - "added_time": local_added.strftime("%H:%M"), - "owner_username": owner_username, - "original_filename": Path(original_filename).stem, - } - if created is not None: - formatting.update( - { - "created": created.isoformat(), - "created_year": created.strftime("%Y"), - "created_year_short": created.strftime("%y"), - "created_month": created.strftime("%m"), - "created_month_name": created.strftime("%B"), - "created_month_name_short": created.strftime("%b"), - "created_day": created.strftime("%d"), - "created_time": created.strftime("%H:%M"), - }, - ) - return title.format(**formatting).strip() diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index cf6733dd5..2c29545e9 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -24,7 +24,8 @@ from guardian.shortcuts import remove_perm from documents import matching from documents.caching import clear_document_caches from documents.classifier import DocumentClassifier -from documents.consumer import parse_doc_title_w_placeholders +from documents.data_models import ConsumableDocument +from documents.data_models import DocumentMetadataOverrides from documents.file_handling import create_source_path_directory from documents.file_handling import delete_empty_directories from documents.file_handling import generate_unique_filename @@ -38,6 +39,7 @@ from documents.models import WorkflowAction from documents.models import WorkflowTrigger from documents.permissions import get_objects_for_user_owner_aware from documents.permissions import set_permissions_for_object +from documents.templating.filepath import parse_doc_title_w_placeholders logger = logging.getLogger("paperless.handlers") @@ -511,73 +513,96 @@ def add_to_index(sender, document, **kwargs): index.add_or_update_document(document) -def run_workflow_added(sender, document: Document, logging_group=None, **kwargs): - run_workflow( +def run_workflows_added(sender, document: Document, logging_group=None, **kwargs): + run_workflows( WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED, document, logging_group, ) -def run_workflow_updated(sender, document: Document, logging_group=None, **kwargs): - run_workflow( +def run_workflows_updated(sender, document: Document, logging_group=None, **kwargs): + run_workflows( WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, document, logging_group, ) -def run_workflow( +def run_workflows( trigger_type: WorkflowTrigger.WorkflowTriggerType, - document: Document, + document: Document | ConsumableDocument, logging_group=None, + overrides: DocumentMetadataOverrides | None = None, ): def assignment_action(): if action.assign_tags.all().count() > 0: - doc_tag_ids.extend( - list(action.assign_tags.all().values_list("pk", flat=True)), - ) + if not use_overrides: + doc_tag_ids.extend( + list(action.assign_tags.all().values_list("pk", flat=True)), + ) + else: + if overrides.tag_ids is None: + overrides.tag_ids = [] + overrides.tag_ids.extend( + list(action.assign_tags.all().values_list("pk", flat=True)), + ) if action.assign_correspondent is not None: - document.correspondent = action.assign_correspondent + if not use_overrides: + document.correspondent = action.assign_correspondent + else: + overrides.correspondent_id = action.assign_correspondent.pk if action.assign_document_type is not None: - document.document_type = action.assign_document_type + if not use_overrides: + document.document_type = action.assign_document_type + else: + overrides.document_type_id = action.assign_document_type.pk if action.assign_storage_path is not None: - document.storage_path = action.assign_storage_path + if not use_overrides: + document.storage_path = action.assign_storage_path + else: + overrides.storage_path_id = action.assign_storage_path.pk if action.assign_owner is not None: - document.owner = action.assign_owner + if not use_overrides: + document.owner = action.assign_owner + else: + overrides.owner_id = action.assign_owner.pk if action.assign_title is not None: - try: - document.title = parse_doc_title_w_placeholders( - action.assign_title, - ( - document.correspondent.name - if document.correspondent is not None - else "" - ), - ( - document.document_type.name - if document.document_type is not None - else "" - ), - (document.owner.username if document.owner is not None else ""), - timezone.localtime(document.added), - ( - document.original_filename - if document.original_filename is not None - else "" - ), - timezone.localtime(document.created), - ) - except Exception: - logger.exception( - f"Error occurred parsing title assignment '{action.assign_title}', falling back to original", - extra={"group": logging_group}, - ) + if not use_overrides: + try: + document.title = parse_doc_title_w_placeholders( + action.assign_title, + ( + document.correspondent.name + if document.correspondent is not None + else "" + ), + ( + document.document_type.name + if document.document_type is not None + else "" + ), + (document.owner.username if document.owner is not None else ""), + timezone.localtime(document.added), + ( + document.original_filename + if document.original_filename is not None + else "" + ), + timezone.localtime(document.created), + ) + except Exception: + logger.exception( + f"Error occurred parsing title assignment '{action.assign_title}', falling back to original", + extra={"group": logging_group}, + ) + else: + overrides.title = action.assign_title if ( ( @@ -601,89 +626,161 @@ def run_workflow( "view": { "users": action.assign_view_users.all().values_list( "id", + flat=True, ) or [], "groups": action.assign_view_groups.all().values_list( "id", + flat=True, ) or [], }, "change": { "users": action.assign_change_users.all().values_list( "id", + flat=True, ) or [], "groups": action.assign_change_groups.all().values_list( "id", + flat=True, ) or [], }, } - set_permissions_for_object( - permissions=permissions, - object=document, - merge=True, - ) + if not use_overrides: + set_permissions_for_object( + permissions=permissions, + object=document, + merge=True, + ) + else: + overrides.view_users = list(permissions["view"]["users"]) + overrides.view_groups = list(permissions["view"]["groups"]) + overrides.change_users = list(permissions["change"]["users"]) + overrides.change_groups = list(permissions["change"]["groups"]) if action.assign_custom_fields is not None: - for field in action.assign_custom_fields.all(): - if ( - CustomFieldInstance.objects.filter( - field=field, - document=document, - ).count() - == 0 - ): - # can be triggered on existing docs, so only add the field if it doesn't already exist - CustomFieldInstance.objects.create( - field=field, - document=document, - ) + if not use_overrides: + for field in action.assign_custom_fields.all(): + if ( + CustomFieldInstance.objects.filter( + field=field, + document=document, + ).count() + == 0 + ): + # can be triggered on existing docs, so only add the field if it doesn't already exist + CustomFieldInstance.objects.create( + field=field, + document=document, + ) + else: + overrides.custom_field_ids = list( + action.assign_custom_fields.all().values_list("pk", flat=True), + ) def removal_action(): if action.remove_all_tags: - doc_tag_ids.clear() + if not use_overrides: + doc_tag_ids.clear() + else: + overrides.tag_ids = None else: - for tag in action.remove_tags.filter( - pk__in=list(document.tags.values_list("pk", flat=True)), - ).all(): - doc_tag_ids.remove(tag.pk) + if not use_overrides: + for tag in action.remove_tags.filter( + pk__in=list(document.tags.values_list("pk", flat=True)), + ).all(): + doc_tag_ids.remove(tag.pk) + elif overrides.tag_ids: + for tag in action.remove_tags.filter( + pk__in=overrides.tag_ids, + ): + overrides.tag_ids.remove(tag.pk) - if action.remove_all_correspondents or ( - document.correspondent - and ( - action.remove_correspondents.filter( - pk=document.correspondent.pk, - ).exists() + # correspondent + if not use_overrides and ( + action.remove_all_correspondents + or ( + document.correspondent + and ( + action.remove_correspondents.filter( + pk=document.correspondent.pk, + ).exists() + ) ) ): document.correspondent = None + elif ( + use_overrides + and overrides.correspondent_id is not None + and action.remove_correspondents.filter( + pk=overrides.correspondent_id, + ).exists() + ): + overrides.correspondent_id = None - if action.remove_all_document_types or ( - document.document_type - and ( - action.remove_document_types.filter( - pk=document.document_type.pk, - ).exists() + # document type + if not use_overrides and ( + action.remove_all_document_types + or ( + document.document_type + and ( + action.remove_document_types.filter( + pk=document.document_type.pk, + ).exists() + ) ) ): document.document_type = None + elif ( + use_overrides + and overrides.document_type_id is not None + and action.remove_document_types.filter( + pk=overrides.document_type_id, + ).exists() + ): + overrides.document_type_id = None - if action.remove_all_storage_paths or ( - document.storage_path - and ( - action.remove_storage_paths.filter( - pk=document.storage_path.pk, - ).exists() + # storage path + if not use_overrides and ( + action.remove_all_storage_paths + or ( + document.storage_path + and ( + action.remove_storage_paths.filter( + pk=document.storage_path.pk, + ).exists() + ) ) ): document.storage_path = None + elif ( + use_overrides + and overrides.storage_path_id is not None + and action.remove_storage_paths.filter( + pk=overrides.storage_path_id, + ).exists() + ): + overrides.storage_path_id = None - if action.remove_all_owners or ( - document.owner - and (action.remove_owners.filter(pk=document.owner.pk).exists()) + # owner + if not use_overrides and ( + action.remove_all_owners + or ( + document.owner + and (action.remove_owners.filter(pk=document.owner.pk).exists()) + ) ): document.owner = None + elif ( + use_overrides + and overrides.owner_id is not None + and action.remove_owners.filter( + pk=overrides.owner_id, + ).exists() + ): + overrides.owner_id = None if action.remove_all_permissions: permissions = { @@ -696,33 +793,72 @@ def run_workflow( "groups": [], }, } - set_permissions_for_object( - permissions=permissions, - object=document, - merge=False, - ) + if not use_overrides: + set_permissions_for_object( + permissions=permissions, + object=document, + merge=False, + ) + else: + overrides.view_users = None + overrides.view_groups = None + overrides.change_users = None + overrides.change_groups = None elif ( (action.remove_view_users.all().count() > 0) or (action.remove_view_groups.all().count() > 0) or (action.remove_change_users.all().count() > 0) or (action.remove_change_groups.all().count() > 0) ): - for user in action.remove_view_users.all(): - remove_perm("view_document", user, document) - for user in action.remove_change_users.all(): - remove_perm("change_document", user, document) - for group in action.remove_view_groups.all(): - remove_perm("view_document", group, document) - for group in action.remove_change_groups.all(): - remove_perm("change_document", group, document) + if not use_overrides: + for user in action.remove_view_users.all(): + remove_perm("view_document", user, document) + for user in action.remove_change_users.all(): + remove_perm("change_document", user, document) + for group in action.remove_view_groups.all(): + remove_perm("view_document", group, document) + for group in action.remove_change_groups.all(): + remove_perm("change_document", group, document) + else: + if overrides.view_users: + for user in action.remove_view_users.filter( + pk__in=overrides.view_users, + ): + overrides.view_users.remove(user.pk) + if overrides.change_users: + for user in action.remove_change_users.filter( + pk__in=overrides.change_users, + ): + overrides.change_users.remove(user.pk) + if overrides.view_groups: + for user in action.remove_view_groups.filter( + pk__in=overrides.view_groups, + ): + overrides.view_groups.remove(user.pk) + if overrides.change_groups: + for user in action.remove_change_groups.filter( + pk__in=overrides.change_groups, + ): + overrides.change_groups.remove(user.pk) if action.remove_all_custom_fields: - CustomFieldInstance.objects.filter(document=document).delete() + if not use_overrides: + CustomFieldInstance.objects.filter(document=document).delete() + else: + overrides.custom_field_ids = None elif action.remove_custom_fields.all().count() > 0: - CustomFieldInstance.objects.filter( - field__in=action.remove_custom_fields.all(), - document=document, - ).delete() + if not use_overrides: + CustomFieldInstance.objects.filter( + field__in=action.remove_custom_fields.all(), + document=document, + ).delete() + elif overrides.custom_field_ids: + for field in action.remove_custom_fields.filter( + pk__in=overrides.custom_field_ids, + ): + overrides.custom_field_ids.remove(field.pk) + + use_overrides = overrides is not None for workflow in ( Workflow.objects.filter( @@ -744,11 +880,14 @@ def run_workflow( .prefetch_related("triggers") .order_by("order") ): - # This can be called from bulk_update_documents, which may be running multiple times - # Refresh this so the matching data is fresh and instance fields are re-freshed - # Otherwise, this instance might be behind and overwrite the work another process did - document.refresh_from_db() - doc_tag_ids = list(document.tags.all().values_list("pk", flat=True)) + if not use_overrides: + # This can be called from bulk_update_documents, which may be running multiple times + # Refresh this so the matching data is fresh and instance fields are re-freshed + # Otherwise, this instance might be behind and overwrite the work another process did + document.refresh_from_db() + doc_tag_ids = list(document.tags.all().values_list("pk", flat=True)) + else: + doc_tag_ids = overrides.tag_ids or [] if matching.document_matches_workflow( document, workflow, @@ -767,9 +906,12 @@ def run_workflow( elif action.type == WorkflowAction.WorkflowActionType.REMOVAL: removal_action() - # save first before setting tags - document.save() - document.tags.set(doc_tag_ids) + if not use_overrides: + # save first before setting tags + document.save() + document.tags.set(doc_tag_ids) + else: + return overrides @before_task_publish.connect diff --git a/src/documents/templating/filepath.py b/src/documents/templating/filepath.py index 54ceb30a8..ea31461fc 100644 --- a/src/documents/templating/filepath.py +++ b/src/documents/templating/filepath.py @@ -3,6 +3,7 @@ import os import re from collections.abc import Iterable from datetime import datetime +from pathlib import Path from pathlib import PurePath import pathvalidate @@ -330,3 +331,47 @@ def validate_filepath_template_and_render( f"Invalid filename_format '{template_string}', falling back to default", ) return None + + +def parse_doc_title_w_placeholders( + title: str, + correspondent_name: str, + doc_type_name: str, + owner_username: str, + local_added: datetime, + original_filename: str, + created: datetime | None = None, +) -> str: + """ + Available title placeholders for Workflows depend on what has already been assigned, + e.g. for pre-consumption triggers created will not have been parsed yet, but it will + for added / updated triggers + """ + formatting = { + "correspondent": correspondent_name, + "document_type": doc_type_name, + "added": local_added.isoformat(), + "added_year": local_added.strftime("%Y"), + "added_year_short": local_added.strftime("%y"), + "added_month": local_added.strftime("%m"), + "added_month_name": local_added.strftime("%B"), + "added_month_name_short": local_added.strftime("%b"), + "added_day": local_added.strftime("%d"), + "added_time": local_added.strftime("%H:%M"), + "owner_username": owner_username, + "original_filename": Path(original_filename).stem, + } + if created is not None: + formatting.update( + { + "created": created.isoformat(), + "created_year": created.strftime("%Y"), + "created_year_short": created.strftime("%y"), + "created_month": created.strftime("%m"), + "created_month_name": created.strftime("%B"), + "created_month_name_short": created.strftime("%b"), + "created_day": created.strftime("%d"), + "created_time": created.strftime("%H:%M"), + }, + ) + return title.format(**formatting).strip()