Lets just see

This commit is contained in:
shamoon 2024-10-08 09:39:40 -07:00
parent 024b60638a
commit dd8dafc6e7
3 changed files with 266 additions and 298 deletions

View File

@ -4,7 +4,6 @@ import os
import tempfile import tempfile
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING
import magic import magic
from django.conf import settings from django.conf import settings
@ -20,8 +19,8 @@ from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides from documents.data_models import DocumentMetadataOverrides
from documents.file_handling import create_source_path_directory from documents.file_handling import create_source_path_directory
from documents.file_handling import generate_unique_filename from documents.file_handling import generate_unique_filename
from documents.file_handling import parse_doc_title_w_placeholders
from documents.loggers import LoggingMixin from documents.loggers import LoggingMixin
from documents.matching import document_matches_workflow
from documents.models import Correspondent from documents.models import Correspondent
from documents.models import CustomField from documents.models import CustomField
from documents.models import CustomFieldInstance from documents.models import CustomFieldInstance
@ -30,8 +29,6 @@ from documents.models import DocumentType
from documents.models import FileInfo from documents.models import FileInfo
from documents.models import StoragePath from documents.models import StoragePath
from documents.models import Tag from documents.models import Tag
from documents.models import Workflow
from documents.models import WorkflowAction
from documents.models import WorkflowTrigger from documents.models import WorkflowTrigger
from documents.parsers import DocumentParser from documents.parsers import DocumentParser
from documents.parsers import ParseError from documents.parsers import ParseError
@ -46,6 +43,7 @@ from documents.plugins.helpers import ProgressManager
from documents.plugins.helpers import ProgressStatusOptions from documents.plugins.helpers import ProgressStatusOptions
from documents.signals import document_consumption_finished from documents.signals import document_consumption_finished
from documents.signals import document_consumption_started from documents.signals import document_consumption_started
from documents.signals.handlers import run_workflow
from documents.utils import copy_basic_file_stats from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess from documents.utils import run_subprocess
@ -59,168 +57,16 @@ class WorkflowTriggerPlugin(
): ):
NAME: str = "WorkflowTriggerPlugin" NAME: str = "WorkflowTriggerPlugin"
def run(self) -> str | None: def run(self):
"""
Get overrides from matching workflows
"""
msg = ""
overrides = DocumentMetadataOverrides() overrides = DocumentMetadataOverrides()
for workflow in ( overrides = run_workflow(
Workflow.objects.filter(enabled=True) WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
.prefetch_related("actions") self.input_doc,
.prefetch_related("actions__assign_view_users") "paperless.consumer",
.prefetch_related("actions__assign_view_groups") overrides,
.prefetch_related("actions__assign_change_users") )
.prefetch_related("actions__assign_change_groups")
.prefetch_related("actions__assign_custom_fields")
.prefetch_related("actions__remove_tags")
.prefetch_related("actions__remove_correspondents")
.prefetch_related("actions__remove_document_types")
.prefetch_related("actions__remove_storage_paths")
.prefetch_related("actions__remove_custom_fields")
.prefetch_related("actions__remove_owners")
.prefetch_related("triggers")
.order_by("order")
):
action_overrides = DocumentMetadataOverrides()
if document_matches_workflow(
self.input_doc,
workflow,
WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
):
for action in workflow.actions.all():
if TYPE_CHECKING:
assert isinstance(action, WorkflowAction)
msg += f"Applying {action} from {workflow}\n"
if action.type == WorkflowAction.WorkflowActionType.ASSIGNMENT:
if action.assign_title is not None:
action_overrides.title = action.assign_title
if action.assign_tags is not None:
action_overrides.tag_ids = list(
action.assign_tags.values_list("pk", flat=True),
)
if action.assign_correspondent is not None:
action_overrides.correspondent_id = (
action.assign_correspondent.pk
)
if action.assign_document_type is not None:
action_overrides.document_type_id = (
action.assign_document_type.pk
)
if action.assign_storage_path is not None:
action_overrides.storage_path_id = (
action.assign_storage_path.pk
)
if action.assign_owner is not None:
action_overrides.owner_id = action.assign_owner.pk
if action.assign_view_users is not None:
action_overrides.view_users = list(
action.assign_view_users.values_list("pk", flat=True),
)
if action.assign_view_groups is not None:
action_overrides.view_groups = list(
action.assign_view_groups.values_list("pk", flat=True),
)
if action.assign_change_users is not None:
action_overrides.change_users = list(
action.assign_change_users.values_list("pk", flat=True),
)
if action.assign_change_groups is not None:
action_overrides.change_groups = list(
action.assign_change_groups.values_list(
"pk",
flat=True,
),
)
if action.assign_custom_fields is not None:
action_overrides.custom_field_ids = list(
action.assign_custom_fields.values_list(
"pk",
flat=True,
),
)
overrides.update(action_overrides)
elif action.type == WorkflowAction.WorkflowActionType.REMOVAL:
# Removal actions overwrite the current overrides
if action.remove_all_tags:
overrides.tag_ids = []
elif overrides.tag_ids:
for tag in action.remove_custom_fields.filter(
pk__in=overrides.tag_ids,
):
overrides.tag_ids.remove(tag.pk)
if action.remove_all_correspondents or (
overrides.correspondent_id is not None
and action.remove_correspondents.filter(
pk=overrides.correspondent_id,
).exists()
):
overrides.correspondent_id = None
if action.remove_all_document_types or (
overrides.document_type_id is not None
and action.remove_document_types.filter(
pk=overrides.document_type_id,
).exists()
):
overrides.document_type_id = None
if action.remove_all_storage_paths or (
overrides.storage_path_id is not None
and action.remove_storage_paths.filter(
pk=overrides.storage_path_id,
).exists()
):
overrides.storage_path_id = None
if action.remove_all_custom_fields:
overrides.custom_field_ids = []
elif overrides.custom_field_ids:
for field in action.remove_custom_fields.filter(
pk__in=overrides.custom_field_ids,
):
overrides.custom_field_ids.remove(field.pk)
if action.remove_all_owners or (
overrides.owner_id is not None
and action.remove_owners.filter(
pk=overrides.owner_id,
).exists()
):
overrides.owner_id = None
if action.remove_all_permissions:
overrides.view_users = []
overrides.view_groups = []
overrides.change_users = []
overrides.change_groups = []
else:
if overrides.view_users:
for user in action.remove_view_users.filter(
pk__in=overrides.view_users,
):
overrides.view_users.remove(user.pk)
if overrides.change_users:
for user in action.remove_change_users.filter(
pk__in=overrides.change_users,
):
overrides.change_users.remove(user.pk)
if overrides.view_groups:
for user in action.remove_view_groups.filter(
pk__in=overrides.view_groups,
):
overrides.view_groups.remove(user.pk)
if overrides.change_groups:
for user in action.remove_change_groups.filter(
pk__in=overrides.change_groups,
):
overrides.change_groups.remove(user.pk)
self.metadata.update(overrides) self.metadata.update(overrides)
return msg
class ConsumerError(Exception): class ConsumerError(Exception):
@ -948,47 +794,3 @@ class ConsumerPlugin(
copy_basic_file_stats(source, target) copy_basic_file_stats(source, target)
except Exception: # pragma: no cover except Exception: # pragma: no cover
pass pass
def parse_doc_title_w_placeholders(
title: str,
correspondent_name: str,
doc_type_name: str,
owner_username: str,
local_added: datetime.datetime,
original_filename: str,
created: datetime.datetime | None = None,
) -> str:
"""
Available title placeholders for Workflows depend on what has already been assigned,
e.g. for pre-consumption triggers created will not have been parsed yet, but it will
for added / updated triggers
"""
formatting = {
"correspondent": correspondent_name,
"document_type": doc_type_name,
"added": local_added.isoformat(),
"added_year": local_added.strftime("%Y"),
"added_year_short": local_added.strftime("%y"),
"added_month": local_added.strftime("%m"),
"added_month_name": local_added.strftime("%B"),
"added_month_name_short": local_added.strftime("%b"),
"added_day": local_added.strftime("%d"),
"added_time": local_added.strftime("%H:%M"),
"owner_username": owner_username,
"original_filename": Path(original_filename).stem,
}
if created is not None:
formatting.update(
{
"created": created.isoformat(),
"created_year": created.strftime("%Y"),
"created_year_short": created.strftime("%y"),
"created_month": created.strftime("%m"),
"created_month_name": created.strftime("%B"),
"created_month_name_short": created.strftime("%b"),
"created_day": created.strftime("%d"),
"created_time": created.strftime("%H:%M"),
},
)
return title.format(**formatting).strip()

View File

@ -1,4 +1,6 @@
import datetime
import os import os
from pathlib import Path
from django.conf import settings from django.conf import settings
@ -146,3 +148,47 @@ def generate_filename(
filename += ".gpg" filename += ".gpg"
return filename return filename
def parse_doc_title_w_placeholders(
title: str,
correspondent_name: str,
doc_type_name: str,
owner_username: str,
local_added: datetime.datetime,
original_filename: str,
created: datetime.datetime | None = None,
) -> str:
"""
Available title placeholders for Workflows depend on what has already been assigned,
e.g. for pre-consumption triggers created will not have been parsed yet, but it will
for added / updated triggers
"""
formatting = {
"correspondent": correspondent_name,
"document_type": doc_type_name,
"added": local_added.isoformat(),
"added_year": local_added.strftime("%Y"),
"added_year_short": local_added.strftime("%y"),
"added_month": local_added.strftime("%m"),
"added_month_name": local_added.strftime("%B"),
"added_month_name_short": local_added.strftime("%b"),
"added_day": local_added.strftime("%d"),
"added_time": local_added.strftime("%H:%M"),
"owner_username": owner_username,
"original_filename": Path(original_filename).stem,
}
if created is not None:
formatting.update(
{
"created": created.isoformat(),
"created_year": created.strftime("%Y"),
"created_year_short": created.strftime("%y"),
"created_month": created.strftime("%m"),
"created_month_name": created.strftime("%B"),
"created_month_name_short": created.strftime("%b"),
"created_day": created.strftime("%d"),
"created_time": created.strftime("%H:%M"),
},
)
return title.format(**formatting).strip()

View File

@ -24,10 +24,12 @@ from guardian.shortcuts import remove_perm
from documents import matching from documents import matching
from documents.caching import clear_document_caches from documents.caching import clear_document_caches
from documents.classifier import DocumentClassifier from documents.classifier import DocumentClassifier
from documents.consumer import parse_doc_title_w_placeholders from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.file_handling import create_source_path_directory from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_unique_filename from documents.file_handling import generate_unique_filename
from documents.file_handling import parse_doc_title_w_placeholders
from documents.models import CustomFieldInstance from documents.models import CustomFieldInstance
from documents.models import Document from documents.models import Document
from documents.models import MatchingModel from documents.models import MatchingModel
@ -529,55 +531,79 @@ def run_workflow_updated(sender, document: Document, logging_group=None, **kwarg
def run_workflow( def run_workflow(
trigger_type: WorkflowTrigger.WorkflowTriggerType, trigger_type: WorkflowTrigger.WorkflowTriggerType,
document: Document, document: Document | ConsumableDocument,
logging_group=None, logging_group=None,
): overrides: DocumentMetadataOverrides = None,
def assignment_action(): ) -> None | DocumentMetadataOverrides:
"""
Run the workflow for the given document and trigger type.
If overrides is provided, the document will not be saved, and an updated DocumentMetadataOverrides object will be returned.
"""
def assignment_action(
document: Document,
overrides: DocumentMetadataOverrides | None = None,
):
if action.assign_tags.all().count() > 0: if action.assign_tags.all().count() > 0:
doc_tag_ids.extend( doc_tag_ids.extend(
list(action.assign_tags.all().values_list("pk", flat=True)), list(action.assign_tags.all().values_list("pk", flat=True)),
) )
if action.assign_correspondent is not None: if action.assign_correspondent is not None:
document.correspondent = action.assign_correspondent if overrides is not None:
overrides.correspondent_id = action.assign_correspondent.pk
else:
document.correspondent = action.assign_correspondent
if action.assign_document_type is not None: if action.assign_document_type is not None:
document.document_type = action.assign_document_type if overrides is not None:
overrides.document_type_id = action.assign_document_type.pk
else:
document.document_type = action.assign_document_type
if action.assign_storage_path is not None: if action.assign_storage_path is not None:
document.storage_path = action.assign_storage_path if overrides is not None:
overrides.storage_path_id = action.assign_storage_path.pk
else:
document.storage_path = action.assign_storage_path
if action.assign_owner is not None: if action.assign_owner is not None:
document.owner = action.assign_owner if overrides is not None:
overrides.owner_id = action.assign_owner.pk
else:
document.owner = action.assign_owner
if action.assign_title is not None: if action.assign_title is not None:
try: if overrides is not None:
document.title = parse_doc_title_w_placeholders( overrides.title = action.assign_title
action.assign_title, else:
( try:
document.correspondent.name document.title = parse_doc_title_w_placeholders(
if document.correspondent is not None action.assign_title,
else "" (
), document.correspondent.name
( if document.correspondent is not None
document.document_type.name else ""
if document.document_type is not None ),
else "" (
), document.document_type.name
(document.owner.username if document.owner is not None else ""), if document.document_type is not None
timezone.localtime(document.added), else ""
( ),
document.original_filename (document.owner.username if document.owner is not None else ""),
if document.original_filename is not None timezone.localtime(document.added),
else "" (
), document.original_filename
timezone.localtime(document.created), if document.original_filename is not None
) else ""
except Exception: ),
logger.exception( timezone.localtime(document.created),
f"Error occurred parsing title assignment '{action.assign_title}', falling back to original", )
extra={"group": logging_group}, except Exception:
) logger.exception(
f"Error occurred parsing title assignment '{action.assign_title}', falling back to original",
extra={"group": logging_group},
)
if ( if (
( (
@ -619,28 +645,42 @@ def run_workflow(
or [], or [],
}, },
} }
set_permissions_for_object( if overrides is not None:
permissions=permissions, overrides.view_users = permissions["view"]["users"]
object=document, overrides.view_groups = permissions["view"]["groups"]
merge=True, overrides.change_users = permissions["change"]["users"]
) overrides.change_groups = permissions["change"]["groups"]
else:
set_permissions_for_object(
permissions=permissions,
object=document,
merge=True,
)
if action.assign_custom_fields is not None: if action.assign_custom_fields is not None:
for field in action.assign_custom_fields.all(): if overrides is not None:
if ( overrides.custom_field_ids = list(
CustomFieldInstance.objects.filter( action.assign_custom_fields.all().values_list("pk", flat=True),
field=field, )
document=document, else:
).count() for field in action.assign_custom_fields.all():
== 0 if (
): CustomFieldInstance.objects.filter(
# can be triggered on existing docs, so only add the field if it doesn't already exist field=field,
CustomFieldInstance.objects.create( document=document,
field=field, ).count()
document=document, == 0
) ):
# can be triggered on existing docs, so only add the field if it doesn't already exist
CustomFieldInstance.objects.create(
field=field,
document=document,
)
def removal_action(): def removal_action(
document: Document,
overrides: DocumentMetadataOverrides | None = None,
):
if action.remove_all_tags: if action.remove_all_tags:
doc_tag_ids.clear() doc_tag_ids.clear()
else: else:
@ -649,41 +689,72 @@ def run_workflow(
).all(): ).all():
doc_tag_ids.remove(tag.pk) doc_tag_ids.remove(tag.pk)
corresspondent_id = (
document.correspondent.pk
if (overrides is None and document.correspondent)
else overrides.correspondent_id
)
if action.remove_all_correspondents or ( if action.remove_all_correspondents or (
document.correspondent corresspondent_id
and ( and (
action.remove_correspondents.filter( action.remove_correspondents.filter(
pk=document.correspondent.pk, pk=corresspondent_id,
).exists() ).exists()
) )
): ):
document.correspondent = None if overrides is not None:
overrides.correspondent_id = None
else:
document.correspondent = None
document_type_id = (
document.document_type.pk
if (overrides is None and document.document_type)
else overrides.document_type_id
)
if action.remove_all_document_types or ( if action.remove_all_document_types or (
document.document_type document_type_id
and ( and (
action.remove_document_types.filter( action.remove_document_types.filter(
pk=document.document_type.pk, pk=document_type_id,
).exists() ).exists()
) )
): ):
document.document_type = None if overrides is not None:
overrides.document_type_id = None
else:
document.document_type = None
storage_path_id = (
document.storage_path.pk
if (overrides is None and document.storage_path)
else overrides.storage_path_id
)
if action.remove_all_storage_paths or ( if action.remove_all_storage_paths or (
document.storage_path storage_path_id
and ( and (
action.remove_storage_paths.filter( action.remove_storage_paths.filter(
pk=document.storage_path.pk, pk=storage_path_id,
).exists() ).exists()
) )
): ):
document.storage_path = None if overrides is not None:
overrides.storage_path_id = None
else:
document.storage_path = None
owner_id = (
document.owner.pk
if (overrides is None and document.owner)
else overrides.owner_id
)
if action.remove_all_owners or ( if action.remove_all_owners or (
document.owner owner_id and (action.remove_owners.filter(pk=owner_id).exists())
and (action.remove_owners.filter(pk=document.owner.pk).exists())
): ):
document.owner = None if overrides is not None:
overrides.owner_id = None
else:
document.owner = None
if action.remove_all_permissions: if action.remove_all_permissions:
permissions = { permissions = {
@ -696,29 +767,72 @@ def run_workflow(
"groups": [], "groups": [],
}, },
} }
set_permissions_for_object( if overrides is not None:
permissions=permissions, overrides.view_users = []
object=document, overrides.view_groups = []
merge=False, overrides.change_users = []
) overrides.change_groups = []
else:
set_permissions_for_object(
permissions=permissions,
object=document,
merge=False,
)
elif ( elif (
(action.remove_view_users.all().count() > 0) (action.remove_view_users.all().count() > 0)
or (action.remove_view_groups.all().count() > 0) or (action.remove_view_groups.all().count() > 0)
or (action.remove_change_users.all().count() > 0) or (action.remove_change_users.all().count() > 0)
or (action.remove_change_groups.all().count() > 0) or (action.remove_change_groups.all().count() > 0)
): ):
for user in action.remove_view_users.all(): if overrides is not None:
remove_perm("view_document", user, document) if (
for user in action.remove_change_users.all(): action.remove_view_users.all().count() > 0
remove_perm("change_document", user, document) and overrides.view_users is not None
for group in action.remove_view_groups.all(): ):
remove_perm("view_document", group, document) overrides.view_users.remove(
for group in action.remove_change_groups.all(): *action.remove_view_users.all().values_list("id", flat=True),
remove_perm("change_document", group, document) )
if (
action.remove_view_groups.all().count() > 0
and overrides.view_groups is not None
):
overrides.view_groups.remove(
*action.remove_view_groups.all().values_list("id", flat=True),
)
if (
action.remove_change_users.all().count() > 0
and overrides.change_users is not None
):
overrides.change_users.remove(
*action.remove_change_users.all().values_list("id", flat=True),
)
if (
action.remove_change_groups.all().count() > 0
and overrides.change_groups is not None
):
overrides.change_groups.remove(
*action.remove_change_groups.all().values_list("id", flat=True),
)
else:
for user in action.remove_view_users.all():
remove_perm("view_document", user, document)
for user in action.remove_change_users.all():
remove_perm("change_document", user, document)
for group in action.remove_view_groups.all():
remove_perm("view_document", group, document)
for group in action.remove_change_groups.all():
remove_perm("change_document", group, document)
if action.remove_all_custom_fields: if action.remove_all_custom_fields:
CustomFieldInstance.objects.filter(document=document).delete() if overrides is not None:
overrides.custom_field_ids = []
else:
CustomFieldInstance.objects.filter(document=document).delete()
elif action.remove_custom_fields.all().count() > 0: elif action.remove_custom_fields.all().count() > 0:
if overrides is not None and overrides.custom_field_ids is not None:
overrides.custom_field_ids.remove(
*action.remove_custom_fields.all().values_list("id", flat=True),
)
CustomFieldInstance.objects.filter( CustomFieldInstance.objects.filter(
field__in=action.remove_custom_fields.all(), field__in=action.remove_custom_fields.all(),
document=document, document=document,
@ -744,11 +858,14 @@ def run_workflow(
.prefetch_related("triggers") .prefetch_related("triggers")
.order_by("order") .order_by("order")
): ):
# This can be called from bulk_update_documents, which may be running multiple times if overrides is None:
# Refresh this so the matching data is fresh and instance fields are re-freshed # This can be called from bulk_update_documents, which may be running multiple times
# Otherwise, this instance might be behind and overwrite the work another process did # Refresh this so the matching data is fresh and instance fields are re-freshed
document.refresh_from_db() # Otherwise, this instance might be behind and overwrite the work another process did
doc_tag_ids = list(document.tags.all().values_list("pk", flat=True)) document.refresh_from_db()
doc_tag_ids = list(document.tags.all().values_list("pk", flat=True))
else:
doc_tag_ids = overrides.tag_ids or []
if matching.document_matches_workflow( if matching.document_matches_workflow(
document, document,
workflow, workflow,
@ -762,14 +879,17 @@ def run_workflow(
) )
if action.type == WorkflowAction.WorkflowActionType.ASSIGNMENT: if action.type == WorkflowAction.WorkflowActionType.ASSIGNMENT:
assignment_action() assignment_action(document, overrides)
elif action.type == WorkflowAction.WorkflowActionType.REMOVAL: elif action.type == WorkflowAction.WorkflowActionType.REMOVAL:
removal_action() removal_action(document, overrides)
if overrides is not None:
# save first before setting tags overrides.tag_ids = doc_tag_ids
document.save() return overrides
document.tags.set(doc_tag_ids) else:
# save first before setting tags
document.save()
document.tags.set(doc_tag_ids)
@before_task_publish.connect @before_task_publish.connect