Lets just see

This commit is contained in:
shamoon 2024-10-08 09:39:40 -07:00
parent 024b60638a
commit dd8dafc6e7
3 changed files with 266 additions and 298 deletions

View File

@ -4,7 +4,6 @@ import os
import tempfile import tempfile
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING
import magic import magic
from django.conf import settings from django.conf import settings
@ -20,8 +19,8 @@ from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides from documents.data_models import DocumentMetadataOverrides
from documents.file_handling import create_source_path_directory from documents.file_handling import create_source_path_directory
from documents.file_handling import generate_unique_filename from documents.file_handling import generate_unique_filename
from documents.file_handling import parse_doc_title_w_placeholders
from documents.loggers import LoggingMixin from documents.loggers import LoggingMixin
from documents.matching import document_matches_workflow
from documents.models import Correspondent from documents.models import Correspondent
from documents.models import CustomField from documents.models import CustomField
from documents.models import CustomFieldInstance from documents.models import CustomFieldInstance
@ -30,8 +29,6 @@ from documents.models import DocumentType
from documents.models import FileInfo from documents.models import FileInfo
from documents.models import StoragePath from documents.models import StoragePath
from documents.models import Tag from documents.models import Tag
from documents.models import Workflow
from documents.models import WorkflowAction
from documents.models import WorkflowTrigger from documents.models import WorkflowTrigger
from documents.parsers import DocumentParser from documents.parsers import DocumentParser
from documents.parsers import ParseError from documents.parsers import ParseError
@ -46,6 +43,7 @@ from documents.plugins.helpers import ProgressManager
from documents.plugins.helpers import ProgressStatusOptions from documents.plugins.helpers import ProgressStatusOptions
from documents.signals import document_consumption_finished from documents.signals import document_consumption_finished
from documents.signals import document_consumption_started from documents.signals import document_consumption_started
from documents.signals.handlers import run_workflow
from documents.utils import copy_basic_file_stats from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess from documents.utils import run_subprocess
@ -59,168 +57,16 @@ class WorkflowTriggerPlugin(
): ):
NAME: str = "WorkflowTriggerPlugin" NAME: str = "WorkflowTriggerPlugin"
def run(self) -> str | None: def run(self):
"""
Get overrides from matching workflows
"""
msg = ""
overrides = DocumentMetadataOverrides() overrides = DocumentMetadataOverrides()
for workflow in ( overrides = run_workflow(
Workflow.objects.filter(enabled=True)
.prefetch_related("actions")
.prefetch_related("actions__assign_view_users")
.prefetch_related("actions__assign_view_groups")
.prefetch_related("actions__assign_change_users")
.prefetch_related("actions__assign_change_groups")
.prefetch_related("actions__assign_custom_fields")
.prefetch_related("actions__remove_tags")
.prefetch_related("actions__remove_correspondents")
.prefetch_related("actions__remove_document_types")
.prefetch_related("actions__remove_storage_paths")
.prefetch_related("actions__remove_custom_fields")
.prefetch_related("actions__remove_owners")
.prefetch_related("triggers")
.order_by("order")
):
action_overrides = DocumentMetadataOverrides()
if document_matches_workflow(
self.input_doc,
workflow,
WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
): self.input_doc,
for action in workflow.actions.all(): "paperless.consumer",
if TYPE_CHECKING: overrides,
assert isinstance(action, WorkflowAction)
msg += f"Applying {action} from {workflow}\n"
if action.type == WorkflowAction.WorkflowActionType.ASSIGNMENT:
if action.assign_title is not None:
action_overrides.title = action.assign_title
if action.assign_tags is not None:
action_overrides.tag_ids = list(
action.assign_tags.values_list("pk", flat=True),
) )
if action.assign_correspondent is not None:
action_overrides.correspondent_id = (
action.assign_correspondent.pk
)
if action.assign_document_type is not None:
action_overrides.document_type_id = (
action.assign_document_type.pk
)
if action.assign_storage_path is not None:
action_overrides.storage_path_id = (
action.assign_storage_path.pk
)
if action.assign_owner is not None:
action_overrides.owner_id = action.assign_owner.pk
if action.assign_view_users is not None:
action_overrides.view_users = list(
action.assign_view_users.values_list("pk", flat=True),
)
if action.assign_view_groups is not None:
action_overrides.view_groups = list(
action.assign_view_groups.values_list("pk", flat=True),
)
if action.assign_change_users is not None:
action_overrides.change_users = list(
action.assign_change_users.values_list("pk", flat=True),
)
if action.assign_change_groups is not None:
action_overrides.change_groups = list(
action.assign_change_groups.values_list(
"pk",
flat=True,
),
)
if action.assign_custom_fields is not None:
action_overrides.custom_field_ids = list(
action.assign_custom_fields.values_list(
"pk",
flat=True,
),
)
overrides.update(action_overrides)
elif action.type == WorkflowAction.WorkflowActionType.REMOVAL:
# Removal actions overwrite the current overrides
if action.remove_all_tags:
overrides.tag_ids = []
elif overrides.tag_ids:
for tag in action.remove_custom_fields.filter(
pk__in=overrides.tag_ids,
):
overrides.tag_ids.remove(tag.pk)
if action.remove_all_correspondents or (
overrides.correspondent_id is not None
and action.remove_correspondents.filter(
pk=overrides.correspondent_id,
).exists()
):
overrides.correspondent_id = None
if action.remove_all_document_types or (
overrides.document_type_id is not None
and action.remove_document_types.filter(
pk=overrides.document_type_id,
).exists()
):
overrides.document_type_id = None
if action.remove_all_storage_paths or (
overrides.storage_path_id is not None
and action.remove_storage_paths.filter(
pk=overrides.storage_path_id,
).exists()
):
overrides.storage_path_id = None
if action.remove_all_custom_fields:
overrides.custom_field_ids = []
elif overrides.custom_field_ids:
for field in action.remove_custom_fields.filter(
pk__in=overrides.custom_field_ids,
):
overrides.custom_field_ids.remove(field.pk)
if action.remove_all_owners or (
overrides.owner_id is not None
and action.remove_owners.filter(
pk=overrides.owner_id,
).exists()
):
overrides.owner_id = None
if action.remove_all_permissions:
overrides.view_users = []
overrides.view_groups = []
overrides.change_users = []
overrides.change_groups = []
else:
if overrides.view_users:
for user in action.remove_view_users.filter(
pk__in=overrides.view_users,
):
overrides.view_users.remove(user.pk)
if overrides.change_users:
for user in action.remove_change_users.filter(
pk__in=overrides.change_users,
):
overrides.change_users.remove(user.pk)
if overrides.view_groups:
for user in action.remove_view_groups.filter(
pk__in=overrides.view_groups,
):
overrides.view_groups.remove(user.pk)
if overrides.change_groups:
for user in action.remove_change_groups.filter(
pk__in=overrides.change_groups,
):
overrides.change_groups.remove(user.pk)
self.metadata.update(overrides) self.metadata.update(overrides)
return msg
class ConsumerError(Exception): class ConsumerError(Exception):
@ -948,47 +794,3 @@ class ConsumerPlugin(
copy_basic_file_stats(source, target) copy_basic_file_stats(source, target)
except Exception: # pragma: no cover except Exception: # pragma: no cover
pass pass
def parse_doc_title_w_placeholders(
title: str,
correspondent_name: str,
doc_type_name: str,
owner_username: str,
local_added: datetime.datetime,
original_filename: str,
created: datetime.datetime | None = None,
) -> str:
"""
Available title placeholders for Workflows depend on what has already been assigned,
e.g. for pre-consumption triggers created will not have been parsed yet, but it will
for added / updated triggers
"""
formatting = {
"correspondent": correspondent_name,
"document_type": doc_type_name,
"added": local_added.isoformat(),
"added_year": local_added.strftime("%Y"),
"added_year_short": local_added.strftime("%y"),
"added_month": local_added.strftime("%m"),
"added_month_name": local_added.strftime("%B"),
"added_month_name_short": local_added.strftime("%b"),
"added_day": local_added.strftime("%d"),
"added_time": local_added.strftime("%H:%M"),
"owner_username": owner_username,
"original_filename": Path(original_filename).stem,
}
if created is not None:
formatting.update(
{
"created": created.isoformat(),
"created_year": created.strftime("%Y"),
"created_year_short": created.strftime("%y"),
"created_month": created.strftime("%m"),
"created_month_name": created.strftime("%B"),
"created_month_name_short": created.strftime("%b"),
"created_day": created.strftime("%d"),
"created_time": created.strftime("%H:%M"),
},
)
return title.format(**formatting).strip()

View File

@ -1,4 +1,6 @@
import datetime
import os import os
from pathlib import Path
from django.conf import settings from django.conf import settings
@ -146,3 +148,47 @@ def generate_filename(
filename += ".gpg" filename += ".gpg"
return filename return filename
def parse_doc_title_w_placeholders(
title: str,
correspondent_name: str,
doc_type_name: str,
owner_username: str,
local_added: datetime.datetime,
original_filename: str,
created: datetime.datetime | None = None,
) -> str:
"""
Available title placeholders for Workflows depend on what has already been assigned,
e.g. for pre-consumption triggers created will not have been parsed yet, but it will
for added / updated triggers
"""
formatting = {
"correspondent": correspondent_name,
"document_type": doc_type_name,
"added": local_added.isoformat(),
"added_year": local_added.strftime("%Y"),
"added_year_short": local_added.strftime("%y"),
"added_month": local_added.strftime("%m"),
"added_month_name": local_added.strftime("%B"),
"added_month_name_short": local_added.strftime("%b"),
"added_day": local_added.strftime("%d"),
"added_time": local_added.strftime("%H:%M"),
"owner_username": owner_username,
"original_filename": Path(original_filename).stem,
}
if created is not None:
formatting.update(
{
"created": created.isoformat(),
"created_year": created.strftime("%Y"),
"created_year_short": created.strftime("%y"),
"created_month": created.strftime("%m"),
"created_month_name": created.strftime("%B"),
"created_month_name_short": created.strftime("%b"),
"created_day": created.strftime("%d"),
"created_time": created.strftime("%H:%M"),
},
)
return title.format(**formatting).strip()

View File

@ -24,10 +24,12 @@ from guardian.shortcuts import remove_perm
from documents import matching from documents import matching
from documents.caching import clear_document_caches from documents.caching import clear_document_caches
from documents.classifier import DocumentClassifier from documents.classifier import DocumentClassifier
from documents.consumer import parse_doc_title_w_placeholders from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.file_handling import create_source_path_directory from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_unique_filename from documents.file_handling import generate_unique_filename
from documents.file_handling import parse_doc_title_w_placeholders
from documents.models import CustomFieldInstance from documents.models import CustomFieldInstance
from documents.models import Document from documents.models import Document
from documents.models import MatchingModel from documents.models import MatchingModel
@ -529,28 +531,52 @@ def run_workflow_updated(sender, document: Document, logging_group=None, **kwarg
def run_workflow( def run_workflow(
trigger_type: WorkflowTrigger.WorkflowTriggerType, trigger_type: WorkflowTrigger.WorkflowTriggerType,
document: Document, document: Document | ConsumableDocument,
logging_group=None, logging_group=None,
): overrides: DocumentMetadataOverrides = None,
def assignment_action(): ) -> None | DocumentMetadataOverrides:
"""
Run the workflow for the given document and trigger type.
If overrides is provided, the document will not be saved, and an updated DocumentMetadataOverrides object will be returned.
"""
def assignment_action(
document: Document,
overrides: DocumentMetadataOverrides | None = None,
):
if action.assign_tags.all().count() > 0: if action.assign_tags.all().count() > 0:
doc_tag_ids.extend( doc_tag_ids.extend(
list(action.assign_tags.all().values_list("pk", flat=True)), list(action.assign_tags.all().values_list("pk", flat=True)),
) )
if action.assign_correspondent is not None: if action.assign_correspondent is not None:
if overrides is not None:
overrides.correspondent_id = action.assign_correspondent.pk
else:
document.correspondent = action.assign_correspondent document.correspondent = action.assign_correspondent
if action.assign_document_type is not None: if action.assign_document_type is not None:
if overrides is not None:
overrides.document_type_id = action.assign_document_type.pk
else:
document.document_type = action.assign_document_type document.document_type = action.assign_document_type
if action.assign_storage_path is not None: if action.assign_storage_path is not None:
if overrides is not None:
overrides.storage_path_id = action.assign_storage_path.pk
else:
document.storage_path = action.assign_storage_path document.storage_path = action.assign_storage_path
if action.assign_owner is not None: if action.assign_owner is not None:
if overrides is not None:
overrides.owner_id = action.assign_owner.pk
else:
document.owner = action.assign_owner document.owner = action.assign_owner
if action.assign_title is not None: if action.assign_title is not None:
if overrides is not None:
overrides.title = action.assign_title
else:
try: try:
document.title = parse_doc_title_w_placeholders( document.title = parse_doc_title_w_placeholders(
action.assign_title, action.assign_title,
@ -619,6 +645,12 @@ def run_workflow(
or [], or [],
}, },
} }
if overrides is not None:
overrides.view_users = permissions["view"]["users"]
overrides.view_groups = permissions["view"]["groups"]
overrides.change_users = permissions["change"]["users"]
overrides.change_groups = permissions["change"]["groups"]
else:
set_permissions_for_object( set_permissions_for_object(
permissions=permissions, permissions=permissions,
object=document, object=document,
@ -626,6 +658,11 @@ def run_workflow(
) )
if action.assign_custom_fields is not None: if action.assign_custom_fields is not None:
if overrides is not None:
overrides.custom_field_ids = list(
action.assign_custom_fields.all().values_list("pk", flat=True),
)
else:
for field in action.assign_custom_fields.all(): for field in action.assign_custom_fields.all():
if ( if (
CustomFieldInstance.objects.filter( CustomFieldInstance.objects.filter(
@ -640,7 +677,10 @@ def run_workflow(
document=document, document=document,
) )
def removal_action(): def removal_action(
document: Document,
overrides: DocumentMetadataOverrides | None = None,
):
if action.remove_all_tags: if action.remove_all_tags:
doc_tag_ids.clear() doc_tag_ids.clear()
else: else:
@ -649,40 +689,71 @@ def run_workflow(
).all(): ).all():
doc_tag_ids.remove(tag.pk) doc_tag_ids.remove(tag.pk)
corresspondent_id = (
document.correspondent.pk
if (overrides is None and document.correspondent)
else overrides.correspondent_id
)
if action.remove_all_correspondents or ( if action.remove_all_correspondents or (
document.correspondent corresspondent_id
and ( and (
action.remove_correspondents.filter( action.remove_correspondents.filter(
pk=document.correspondent.pk, pk=corresspondent_id,
).exists() ).exists()
) )
): ):
if overrides is not None:
overrides.correspondent_id = None
else:
document.correspondent = None document.correspondent = None
document_type_id = (
document.document_type.pk
if (overrides is None and document.document_type)
else overrides.document_type_id
)
if action.remove_all_document_types or ( if action.remove_all_document_types or (
document.document_type document_type_id
and ( and (
action.remove_document_types.filter( action.remove_document_types.filter(
pk=document.document_type.pk, pk=document_type_id,
).exists() ).exists()
) )
): ):
if overrides is not None:
overrides.document_type_id = None
else:
document.document_type = None document.document_type = None
storage_path_id = (
document.storage_path.pk
if (overrides is None and document.storage_path)
else overrides.storage_path_id
)
if action.remove_all_storage_paths or ( if action.remove_all_storage_paths or (
document.storage_path storage_path_id
and ( and (
action.remove_storage_paths.filter( action.remove_storage_paths.filter(
pk=document.storage_path.pk, pk=storage_path_id,
).exists() ).exists()
) )
): ):
if overrides is not None:
overrides.storage_path_id = None
else:
document.storage_path = None document.storage_path = None
owner_id = (
document.owner.pk
if (overrides is None and document.owner)
else overrides.owner_id
)
if action.remove_all_owners or ( if action.remove_all_owners or (
document.owner owner_id and (action.remove_owners.filter(pk=owner_id).exists())
and (action.remove_owners.filter(pk=document.owner.pk).exists())
): ):
if overrides is not None:
overrides.owner_id = None
else:
document.owner = None document.owner = None
if action.remove_all_permissions: if action.remove_all_permissions:
@ -696,6 +767,12 @@ def run_workflow(
"groups": [], "groups": [],
}, },
} }
if overrides is not None:
overrides.view_users = []
overrides.view_groups = []
overrides.change_users = []
overrides.change_groups = []
else:
set_permissions_for_object( set_permissions_for_object(
permissions=permissions, permissions=permissions,
object=document, object=document,
@ -707,6 +784,36 @@ def run_workflow(
or (action.remove_change_users.all().count() > 0) or (action.remove_change_users.all().count() > 0)
or (action.remove_change_groups.all().count() > 0) or (action.remove_change_groups.all().count() > 0)
): ):
if overrides is not None:
if (
action.remove_view_users.all().count() > 0
and overrides.view_users is not None
):
overrides.view_users.remove(
*action.remove_view_users.all().values_list("id", flat=True),
)
if (
action.remove_view_groups.all().count() > 0
and overrides.view_groups is not None
):
overrides.view_groups.remove(
*action.remove_view_groups.all().values_list("id", flat=True),
)
if (
action.remove_change_users.all().count() > 0
and overrides.change_users is not None
):
overrides.change_users.remove(
*action.remove_change_users.all().values_list("id", flat=True),
)
if (
action.remove_change_groups.all().count() > 0
and overrides.change_groups is not None
):
overrides.change_groups.remove(
*action.remove_change_groups.all().values_list("id", flat=True),
)
else:
for user in action.remove_view_users.all(): for user in action.remove_view_users.all():
remove_perm("view_document", user, document) remove_perm("view_document", user, document)
for user in action.remove_change_users.all(): for user in action.remove_change_users.all():
@ -717,8 +824,15 @@ def run_workflow(
remove_perm("change_document", group, document) remove_perm("change_document", group, document)
if action.remove_all_custom_fields: if action.remove_all_custom_fields:
if overrides is not None:
overrides.custom_field_ids = []
else:
CustomFieldInstance.objects.filter(document=document).delete() CustomFieldInstance.objects.filter(document=document).delete()
elif action.remove_custom_fields.all().count() > 0: elif action.remove_custom_fields.all().count() > 0:
if overrides is not None and overrides.custom_field_ids is not None:
overrides.custom_field_ids.remove(
*action.remove_custom_fields.all().values_list("id", flat=True),
)
CustomFieldInstance.objects.filter( CustomFieldInstance.objects.filter(
field__in=action.remove_custom_fields.all(), field__in=action.remove_custom_fields.all(),
document=document, document=document,
@ -744,11 +858,14 @@ def run_workflow(
.prefetch_related("triggers") .prefetch_related("triggers")
.order_by("order") .order_by("order")
): ):
if overrides is None:
# This can be called from bulk_update_documents, which may be running multiple times # This can be called from bulk_update_documents, which may be running multiple times
# Refresh this so the matching data is fresh and instance fields are re-freshed # Refresh this so the matching data is fresh and instance fields are re-freshed
# Otherwise, this instance might be behind and overwrite the work another process did # Otherwise, this instance might be behind and overwrite the work another process did
document.refresh_from_db() document.refresh_from_db()
doc_tag_ids = list(document.tags.all().values_list("pk", flat=True)) doc_tag_ids = list(document.tags.all().values_list("pk", flat=True))
else:
doc_tag_ids = overrides.tag_ids or []
if matching.document_matches_workflow( if matching.document_matches_workflow(
document, document,
workflow, workflow,
@ -762,11 +879,14 @@ def run_workflow(
) )
if action.type == WorkflowAction.WorkflowActionType.ASSIGNMENT: if action.type == WorkflowAction.WorkflowActionType.ASSIGNMENT:
assignment_action() assignment_action(document, overrides)
elif action.type == WorkflowAction.WorkflowActionType.REMOVAL: elif action.type == WorkflowAction.WorkflowActionType.REMOVAL:
removal_action() removal_action(document, overrides)
if overrides is not None:
overrides.tag_ids = doc_tag_ids
return overrides
else:
# save first before setting tags # save first before setting tags
document.save() document.save()
document.tags.set(doc_tag_ids) document.tags.set(doc_tag_ids)