Revert "Lets just see"

This reverts commit 362b801721d9476ae2088453953d0c3ffba604e6.
This commit is contained in:
shamoon 2024-10-08 15:31:21 -07:00
parent dd8dafc6e7
commit c649654fab
3 changed files with 298 additions and 266 deletions

View File

@ -4,6 +4,7 @@ import os
import tempfile
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING
import magic
from django.conf import settings
@ -19,8 +20,8 @@ from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.file_handling import create_source_path_directory
from documents.file_handling import generate_unique_filename
from documents.file_handling import parse_doc_title_w_placeholders
from documents.loggers import LoggingMixin
from documents.matching import document_matches_workflow
from documents.models import Correspondent
from documents.models import CustomField
from documents.models import CustomFieldInstance
@ -29,6 +30,8 @@ from documents.models import DocumentType
from documents.models import FileInfo
from documents.models import StoragePath
from documents.models import Tag
from documents.models import Workflow
from documents.models import WorkflowAction
from documents.models import WorkflowTrigger
from documents.parsers import DocumentParser
from documents.parsers import ParseError
@ -43,7 +46,6 @@ from documents.plugins.helpers import ProgressManager
from documents.plugins.helpers import ProgressStatusOptions
from documents.signals import document_consumption_finished
from documents.signals import document_consumption_started
from documents.signals.handlers import run_workflow
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess
@ -57,16 +59,168 @@ class WorkflowTriggerPlugin(
):
NAME: str = "WorkflowTriggerPlugin"
def run(self):
def run(self) -> str | None:
"""
Get overrides from matching workflows
"""
msg = ""
overrides = DocumentMetadataOverrides()
overrides = run_workflow(
WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
self.input_doc,
"paperless.consumer",
overrides,
)
for workflow in (
Workflow.objects.filter(enabled=True)
.prefetch_related("actions")
.prefetch_related("actions__assign_view_users")
.prefetch_related("actions__assign_view_groups")
.prefetch_related("actions__assign_change_users")
.prefetch_related("actions__assign_change_groups")
.prefetch_related("actions__assign_custom_fields")
.prefetch_related("actions__remove_tags")
.prefetch_related("actions__remove_correspondents")
.prefetch_related("actions__remove_document_types")
.prefetch_related("actions__remove_storage_paths")
.prefetch_related("actions__remove_custom_fields")
.prefetch_related("actions__remove_owners")
.prefetch_related("triggers")
.order_by("order")
):
action_overrides = DocumentMetadataOverrides()
if document_matches_workflow(
self.input_doc,
workflow,
WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
):
for action in workflow.actions.all():
if TYPE_CHECKING:
assert isinstance(action, WorkflowAction)
msg += f"Applying {action} from {workflow}\n"
if action.type == WorkflowAction.WorkflowActionType.ASSIGNMENT:
if action.assign_title is not None:
action_overrides.title = action.assign_title
if action.assign_tags is not None:
action_overrides.tag_ids = list(
action.assign_tags.values_list("pk", flat=True),
)
if action.assign_correspondent is not None:
action_overrides.correspondent_id = (
action.assign_correspondent.pk
)
if action.assign_document_type is not None:
action_overrides.document_type_id = (
action.assign_document_type.pk
)
if action.assign_storage_path is not None:
action_overrides.storage_path_id = (
action.assign_storage_path.pk
)
if action.assign_owner is not None:
action_overrides.owner_id = action.assign_owner.pk
if action.assign_view_users is not None:
action_overrides.view_users = list(
action.assign_view_users.values_list("pk", flat=True),
)
if action.assign_view_groups is not None:
action_overrides.view_groups = list(
action.assign_view_groups.values_list("pk", flat=True),
)
if action.assign_change_users is not None:
action_overrides.change_users = list(
action.assign_change_users.values_list("pk", flat=True),
)
if action.assign_change_groups is not None:
action_overrides.change_groups = list(
action.assign_change_groups.values_list(
"pk",
flat=True,
),
)
if action.assign_custom_fields is not None:
action_overrides.custom_field_ids = list(
action.assign_custom_fields.values_list(
"pk",
flat=True,
),
)
overrides.update(action_overrides)
elif action.type == WorkflowAction.WorkflowActionType.REMOVAL:
# Removal actions overwrite the current overrides
if action.remove_all_tags:
overrides.tag_ids = []
elif overrides.tag_ids:
for tag in action.remove_custom_fields.filter(
pk__in=overrides.tag_ids,
):
overrides.tag_ids.remove(tag.pk)
if action.remove_all_correspondents or (
overrides.correspondent_id is not None
and action.remove_correspondents.filter(
pk=overrides.correspondent_id,
).exists()
):
overrides.correspondent_id = None
if action.remove_all_document_types or (
overrides.document_type_id is not None
and action.remove_document_types.filter(
pk=overrides.document_type_id,
).exists()
):
overrides.document_type_id = None
if action.remove_all_storage_paths or (
overrides.storage_path_id is not None
and action.remove_storage_paths.filter(
pk=overrides.storage_path_id,
).exists()
):
overrides.storage_path_id = None
if action.remove_all_custom_fields:
overrides.custom_field_ids = []
elif overrides.custom_field_ids:
for field in action.remove_custom_fields.filter(
pk__in=overrides.custom_field_ids,
):
overrides.custom_field_ids.remove(field.pk)
if action.remove_all_owners or (
overrides.owner_id is not None
and action.remove_owners.filter(
pk=overrides.owner_id,
).exists()
):
overrides.owner_id = None
if action.remove_all_permissions:
overrides.view_users = []
overrides.view_groups = []
overrides.change_users = []
overrides.change_groups = []
else:
if overrides.view_users:
for user in action.remove_view_users.filter(
pk__in=overrides.view_users,
):
overrides.view_users.remove(user.pk)
if overrides.change_users:
for user in action.remove_change_users.filter(
pk__in=overrides.change_users,
):
overrides.change_users.remove(user.pk)
if overrides.view_groups:
for user in action.remove_view_groups.filter(
pk__in=overrides.view_groups,
):
overrides.view_groups.remove(user.pk)
if overrides.change_groups:
for user in action.remove_change_groups.filter(
pk__in=overrides.change_groups,
):
overrides.change_groups.remove(user.pk)
self.metadata.update(overrides)
return msg
class ConsumerError(Exception):
@ -794,3 +948,47 @@ class ConsumerPlugin(
copy_basic_file_stats(source, target)
except Exception: # pragma: no cover
pass
def parse_doc_title_w_placeholders(
title: str,
correspondent_name: str,
doc_type_name: str,
owner_username: str,
local_added: datetime.datetime,
original_filename: str,
created: datetime.datetime | None = None,
) -> str:
"""
Available title placeholders for Workflows depend on what has already been assigned,
e.g. for pre-consumption triggers created will not have been parsed yet, but it will
for added / updated triggers
"""
formatting = {
"correspondent": correspondent_name,
"document_type": doc_type_name,
"added": local_added.isoformat(),
"added_year": local_added.strftime("%Y"),
"added_year_short": local_added.strftime("%y"),
"added_month": local_added.strftime("%m"),
"added_month_name": local_added.strftime("%B"),
"added_month_name_short": local_added.strftime("%b"),
"added_day": local_added.strftime("%d"),
"added_time": local_added.strftime("%H:%M"),
"owner_username": owner_username,
"original_filename": Path(original_filename).stem,
}
if created is not None:
formatting.update(
{
"created": created.isoformat(),
"created_year": created.strftime("%Y"),
"created_year_short": created.strftime("%y"),
"created_month": created.strftime("%m"),
"created_month_name": created.strftime("%B"),
"created_month_name_short": created.strftime("%b"),
"created_day": created.strftime("%d"),
"created_time": created.strftime("%H:%M"),
},
)
return title.format(**formatting).strip()

View File

@ -1,6 +1,4 @@
import datetime
import os
from pathlib import Path
from django.conf import settings
@ -148,47 +146,3 @@ def generate_filename(
filename += ".gpg"
return filename
def parse_doc_title_w_placeholders(
title: str,
correspondent_name: str,
doc_type_name: str,
owner_username: str,
local_added: datetime.datetime,
original_filename: str,
created: datetime.datetime | None = None,
) -> str:
"""
Available title placeholders for Workflows depend on what has already been assigned,
e.g. for pre-consumption triggers created will not have been parsed yet, but it will
for added / updated triggers
"""
formatting = {
"correspondent": correspondent_name,
"document_type": doc_type_name,
"added": local_added.isoformat(),
"added_year": local_added.strftime("%Y"),
"added_year_short": local_added.strftime("%y"),
"added_month": local_added.strftime("%m"),
"added_month_name": local_added.strftime("%B"),
"added_month_name_short": local_added.strftime("%b"),
"added_day": local_added.strftime("%d"),
"added_time": local_added.strftime("%H:%M"),
"owner_username": owner_username,
"original_filename": Path(original_filename).stem,
}
if created is not None:
formatting.update(
{
"created": created.isoformat(),
"created_year": created.strftime("%Y"),
"created_year_short": created.strftime("%y"),
"created_month": created.strftime("%m"),
"created_month_name": created.strftime("%B"),
"created_month_name_short": created.strftime("%b"),
"created_day": created.strftime("%d"),
"created_time": created.strftime("%H:%M"),
},
)
return title.format(**formatting).strip()

View File

@ -24,12 +24,10 @@ from guardian.shortcuts import remove_perm
from documents import matching
from documents.caching import clear_document_caches
from documents.classifier import DocumentClassifier
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.consumer import parse_doc_title_w_placeholders
from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_unique_filename
from documents.file_handling import parse_doc_title_w_placeholders
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import MatchingModel
@ -531,79 +529,55 @@ def run_workflow_updated(sender, document: Document, logging_group=None, **kwarg
def run_workflow(
trigger_type: WorkflowTrigger.WorkflowTriggerType,
document: Document | ConsumableDocument,
document: Document,
logging_group=None,
overrides: DocumentMetadataOverrides = None,
) -> None | DocumentMetadataOverrides:
"""
Run the workflow for the given document and trigger type.
If overrides is provided, the document will not be saved, and an updated DocumentMetadataOverrides object will be returned.
"""
def assignment_action(
document: Document,
overrides: DocumentMetadataOverrides | None = None,
):
):
def assignment_action():
if action.assign_tags.all().count() > 0:
doc_tag_ids.extend(
list(action.assign_tags.all().values_list("pk", flat=True)),
)
if action.assign_correspondent is not None:
if overrides is not None:
overrides.correspondent_id = action.assign_correspondent.pk
else:
document.correspondent = action.assign_correspondent
document.correspondent = action.assign_correspondent
if action.assign_document_type is not None:
if overrides is not None:
overrides.document_type_id = action.assign_document_type.pk
else:
document.document_type = action.assign_document_type
document.document_type = action.assign_document_type
if action.assign_storage_path is not None:
if overrides is not None:
overrides.storage_path_id = action.assign_storage_path.pk
else:
document.storage_path = action.assign_storage_path
document.storage_path = action.assign_storage_path
if action.assign_owner is not None:
if overrides is not None:
overrides.owner_id = action.assign_owner.pk
else:
document.owner = action.assign_owner
document.owner = action.assign_owner
if action.assign_title is not None:
if overrides is not None:
overrides.title = action.assign_title
else:
try:
document.title = parse_doc_title_w_placeholders(
action.assign_title,
(
document.correspondent.name
if document.correspondent is not None
else ""
),
(
document.document_type.name
if document.document_type is not None
else ""
),
(document.owner.username if document.owner is not None else ""),
timezone.localtime(document.added),
(
document.original_filename
if document.original_filename is not None
else ""
),
timezone.localtime(document.created),
)
except Exception:
logger.exception(
f"Error occurred parsing title assignment '{action.assign_title}', falling back to original",
extra={"group": logging_group},
)
try:
document.title = parse_doc_title_w_placeholders(
action.assign_title,
(
document.correspondent.name
if document.correspondent is not None
else ""
),
(
document.document_type.name
if document.document_type is not None
else ""
),
(document.owner.username if document.owner is not None else ""),
timezone.localtime(document.added),
(
document.original_filename
if document.original_filename is not None
else ""
),
timezone.localtime(document.created),
)
except Exception:
logger.exception(
f"Error occurred parsing title assignment '{action.assign_title}', falling back to original",
extra={"group": logging_group},
)
if (
(
@ -645,42 +619,28 @@ def run_workflow(
or [],
},
}
if overrides is not None:
overrides.view_users = permissions["view"]["users"]
overrides.view_groups = permissions["view"]["groups"]
overrides.change_users = permissions["change"]["users"]
overrides.change_groups = permissions["change"]["groups"]
else:
set_permissions_for_object(
permissions=permissions,
object=document,
merge=True,
)
set_permissions_for_object(
permissions=permissions,
object=document,
merge=True,
)
if action.assign_custom_fields is not None:
if overrides is not None:
overrides.custom_field_ids = list(
action.assign_custom_fields.all().values_list("pk", flat=True),
)
else:
for field in action.assign_custom_fields.all():
if (
CustomFieldInstance.objects.filter(
field=field,
document=document,
).count()
== 0
):
# can be triggered on existing docs, so only add the field if it doesn't already exist
CustomFieldInstance.objects.create(
field=field,
document=document,
)
for field in action.assign_custom_fields.all():
if (
CustomFieldInstance.objects.filter(
field=field,
document=document,
).count()
== 0
):
# can be triggered on existing docs, so only add the field if it doesn't already exist
CustomFieldInstance.objects.create(
field=field,
document=document,
)
def removal_action(
document: Document,
overrides: DocumentMetadataOverrides | None = None,
):
def removal_action():
if action.remove_all_tags:
doc_tag_ids.clear()
else:
@ -689,72 +649,41 @@ def run_workflow(
).all():
doc_tag_ids.remove(tag.pk)
corresspondent_id = (
document.correspondent.pk
if (overrides is None and document.correspondent)
else overrides.correspondent_id
)
if action.remove_all_correspondents or (
corresspondent_id
document.correspondent
and (
action.remove_correspondents.filter(
pk=corresspondent_id,
pk=document.correspondent.pk,
).exists()
)
):
if overrides is not None:
overrides.correspondent_id = None
else:
document.correspondent = None
document.correspondent = None
document_type_id = (
document.document_type.pk
if (overrides is None and document.document_type)
else overrides.document_type_id
)
if action.remove_all_document_types or (
document_type_id
document.document_type
and (
action.remove_document_types.filter(
pk=document_type_id,
pk=document.document_type.pk,
).exists()
)
):
if overrides is not None:
overrides.document_type_id = None
else:
document.document_type = None
document.document_type = None
storage_path_id = (
document.storage_path.pk
if (overrides is None and document.storage_path)
else overrides.storage_path_id
)
if action.remove_all_storage_paths or (
storage_path_id
document.storage_path
and (
action.remove_storage_paths.filter(
pk=storage_path_id,
pk=document.storage_path.pk,
).exists()
)
):
if overrides is not None:
overrides.storage_path_id = None
else:
document.storage_path = None
document.storage_path = None
owner_id = (
document.owner.pk
if (overrides is None and document.owner)
else overrides.owner_id
)
if action.remove_all_owners or (
owner_id and (action.remove_owners.filter(pk=owner_id).exists())
document.owner
and (action.remove_owners.filter(pk=document.owner.pk).exists())
):
if overrides is not None:
overrides.owner_id = None
else:
document.owner = None
document.owner = None
if action.remove_all_permissions:
permissions = {
@ -767,72 +696,29 @@ def run_workflow(
"groups": [],
},
}
if overrides is not None:
overrides.view_users = []
overrides.view_groups = []
overrides.change_users = []
overrides.change_groups = []
else:
set_permissions_for_object(
permissions=permissions,
object=document,
merge=False,
)
set_permissions_for_object(
permissions=permissions,
object=document,
merge=False,
)
elif (
(action.remove_view_users.all().count() > 0)
or (action.remove_view_groups.all().count() > 0)
or (action.remove_change_users.all().count() > 0)
or (action.remove_change_groups.all().count() > 0)
):
if overrides is not None:
if (
action.remove_view_users.all().count() > 0
and overrides.view_users is not None
):
overrides.view_users.remove(
*action.remove_view_users.all().values_list("id", flat=True),
)
if (
action.remove_view_groups.all().count() > 0
and overrides.view_groups is not None
):
overrides.view_groups.remove(
*action.remove_view_groups.all().values_list("id", flat=True),
)
if (
action.remove_change_users.all().count() > 0
and overrides.change_users is not None
):
overrides.change_users.remove(
*action.remove_change_users.all().values_list("id", flat=True),
)
if (
action.remove_change_groups.all().count() > 0
and overrides.change_groups is not None
):
overrides.change_groups.remove(
*action.remove_change_groups.all().values_list("id", flat=True),
)
else:
for user in action.remove_view_users.all():
remove_perm("view_document", user, document)
for user in action.remove_change_users.all():
remove_perm("change_document", user, document)
for group in action.remove_view_groups.all():
remove_perm("view_document", group, document)
for group in action.remove_change_groups.all():
remove_perm("change_document", group, document)
for user in action.remove_view_users.all():
remove_perm("view_document", user, document)
for user in action.remove_change_users.all():
remove_perm("change_document", user, document)
for group in action.remove_view_groups.all():
remove_perm("view_document", group, document)
for group in action.remove_change_groups.all():
remove_perm("change_document", group, document)
if action.remove_all_custom_fields:
if overrides is not None:
overrides.custom_field_ids = []
else:
CustomFieldInstance.objects.filter(document=document).delete()
CustomFieldInstance.objects.filter(document=document).delete()
elif action.remove_custom_fields.all().count() > 0:
if overrides is not None and overrides.custom_field_ids is not None:
overrides.custom_field_ids.remove(
*action.remove_custom_fields.all().values_list("id", flat=True),
)
CustomFieldInstance.objects.filter(
field__in=action.remove_custom_fields.all(),
document=document,
@ -858,14 +744,11 @@ def run_workflow(
.prefetch_related("triggers")
.order_by("order")
):
if overrides is None:
# This can be called from bulk_update_documents, which may be running multiple times
# Refresh this so the matching data is fresh and instance fields are re-freshed
# Otherwise, this instance might be behind and overwrite the work another process did
document.refresh_from_db()
doc_tag_ids = list(document.tags.all().values_list("pk", flat=True))
else:
doc_tag_ids = overrides.tag_ids or []
# This can be called from bulk_update_documents, which may be running multiple times
# Refresh this so the matching data is fresh and instance fields are re-freshed
# Otherwise, this instance might be behind and overwrite the work another process did
document.refresh_from_db()
doc_tag_ids = list(document.tags.all().values_list("pk", flat=True))
if matching.document_matches_workflow(
document,
workflow,
@ -879,17 +762,14 @@ def run_workflow(
)
if action.type == WorkflowAction.WorkflowActionType.ASSIGNMENT:
assignment_action(document, overrides)
assignment_action()
elif action.type == WorkflowAction.WorkflowActionType.REMOVAL:
removal_action(document, overrides)
if overrides is not None:
overrides.tag_ids = doc_tag_ids
return overrides
else:
# save first before setting tags
document.save()
document.tags.set(doc_tag_ids)
removal_action()
# save first before setting tags
document.save()
document.tags.set(doc_tag_ids)
@before_task_publish.connect