Initial implementation of document added trigger

This commit is contained in:
shamoon 2023-12-25 23:41:46 -08:00
parent b4023d3aae
commit 84199a6894
5 changed files with 267 additions and 79 deletions

View File

@ -11,6 +11,7 @@ class DocumentsConfig(AppConfig):
from documents.signals import document_consumption_finished
from documents.signals.handlers import add_inbox_tags
from documents.signals.handlers import add_to_index
from documents.signals.handlers import run_workflows
from documents.signals.handlers import set_correspondent
from documents.signals.handlers import set_document_type
from documents.signals.handlers import set_log_entry
@ -24,5 +25,6 @@ class DocumentsConfig(AppConfig):
document_consumption_finished.connect(set_storage_path)
document_consumption_finished.connect(set_log_entry)
document_consumption_finished.connect(add_to_index)
document_consumption_finished.connect(run_workflows)
AppConfig.ready(self)

View File

@ -666,10 +666,6 @@ class Consumer(LoggingMixin):
return overrides
def _parse_title_placeholders(self, title: str) -> str:
"""
Consumption template title placeholders can only include items that are
assigned as part of this template (since auto-matching hasnt happened yet)
"""
local_added = timezone.localtime(timezone.now())
correspondent_name = (
@ -688,20 +684,14 @@ class Consumer(LoggingMixin):
else None
)
return title.format(
correspondent=correspondent_name,
document_type=doc_type_name,
added=local_added.isoformat(),
added_year=local_added.strftime("%Y"),
added_year_short=local_added.strftime("%y"),
added_month=local_added.strftime("%m"),
added_month_name=local_added.strftime("%B"),
added_month_name_short=local_added.strftime("%b"),
added_day=local_added.strftime("%d"),
owner_username=owner_username,
original_filename=Path(self.filename).stem,
added_time=local_added.strftime("%H:%M"),
).strip()
return parse_doc_title_w_placeholders(
title,
correspondent_name,
doc_type_name,
owner_username,
local_added,
self.filename,
)
def _store(
self,
@ -854,3 +844,47 @@ class Consumer(LoggingMixin):
self.log.warning("Script stderr:")
for line in stderr_str:
self.log.warning(line)
def parse_doc_title_w_placeholders(
title: str,
correspondent_name: str,
doc_type_name: str,
owner_username: str,
local_added: datetime.datetime,
original_filename: str,
created: Optional[datetime.datetime] = None,
) -> str:
"""
Title placeholders for Workflows using Consumption triggers can only include
items that are assigned as part of this template (since auto-matching hasnt
happened yet)
"""
formatting = {
"correspondent": correspondent_name,
"document_type": doc_type_name,
"added": local_added.isoformat(),
"added_year": local_added.strftime("%Y"),
"added_year_short": local_added.strftime("%y"),
"added_month": local_added.strftime("%m"),
"added_month_name": local_added.strftime("%B"),
"added_month_name_short": local_added.strftime("%b"),
"added_day": local_added.strftime("%d"),
"added_time": local_added.strftime("%H:%M"),
"owner_username": owner_username,
"original_filename": Path(original_filename).stem,
}
if created is not None:
formatting.update(
{
"created": created.isoformat(),
"created_year": created.strftime("%Y"),
"created_year_short": created.strftime("%y"),
"created_month": created.strftime("%m"),
"created_month_name": created.strftime("%B"),
"created_month_name_short": created.strftime("%b"),
"created_day": created.strftime("%d"),
"created_time": created.strftime("%H:%M"),
},
)
return title.format(**formatting).strip()

View File

@ -239,7 +239,7 @@ def _split_match(matching_model):
def document_matches_workflow(
document: ConsumableDocument,
document: ConsumableDocument | Document,
workflow: Workflow,
trigger_type: WorkflowTrigger.WorkflowTriggerType,
) -> bool:
@ -258,6 +258,9 @@ def document_matches_workflow(
trigger_matched = False
else:
for trigger in triggers:
if trigger_type is WorkflowTrigger.WorkflowTriggerType.CONSUMPTION:
# document is type ConsumableDocument
# Document source vs template source
if document.source not in [int(x) for x in list(trigger.sources)]:
log_match_failure(
@ -305,6 +308,25 @@ def document_matches_workflow(
)
trigger_matched = False
elif trigger_type is WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED:
# document is type Document
# Document filename vs template filename
if (
trigger.filter_filename is not None
and len(trigger.filter_filename) > 0
and document.original_filename is not None
and not fnmatch(
document.original_filename.lower(),
trigger.filter_filename.lower(),
)
):
log_match_failure(
f"Document filename {document.original_filename} does not match"
f" {trigger.filter_filename.lower()}",
)
trigger_matched = False
if trigger_matched:
logger.info(f"Document matched {trigger} from {workflow}")
return True

View File

@ -24,14 +24,19 @@ from filelock import FileLock
from documents import matching
from documents.classifier import DocumentClassifier
from documents.consumer import parse_doc_title_w_placeholders
from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_unique_filename
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import MatchingModel
from documents.models import PaperlessTask
from documents.models import Tag
from documents.models import Workflow
from documents.models import WorkflowTrigger
from documents.permissions import get_objects_for_user_owner_aware
from documents.permissions import set_permissions_for_object
logger = logging.getLogger("paperless.handlers")
@ -514,6 +519,76 @@ def add_to_index(sender, document, **kwargs):
index.add_or_update_document(document)
def run_workflows(sender, document: Document, logging_group=None, **kwargs):
for workflow in Workflow.objects.filter(
triggers__type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
).order_by("order"):
if matching.document_matches_workflow(
document,
workflow,
WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
):
for action in workflow.actions.all():
if action.assign_tags.all().count() > 0:
document.tags.add(*action.assign_tags.all())
if action.assign_correspondent is not None:
document.correspondent = action.assign_correspondent
if action.assign_document_type is not None:
document.document_type = action.assign_document_type
if action.assign_storage_path is not None:
document.storage_path = action.assign_storage_path
if action.assign_owner is not None:
document.owner = action.assign_owner
if action.assign_title is not None:
document.title = parse_doc_title_w_placeholders(
action.assign_title,
document.correspondent.name,
document.document_type.name,
document.owner.username,
document.added,
document.original_filename,
document.created,
)
if (
action.assign_view_users is not None
or action.assign_view_groups is not None
or action.assign_change_users is not None
or action.assign_change_groups is not None
):
permissions = {
"view": {
"users": action.assign_view_users.all().values_list("id")
or [],
"groups": action.assign_view_groups.all().values_list("id")
or [],
},
"change": {
"users": action.assign_change_users.all().values_list("id")
or [],
"groups": action.assign_change_groups.all().values_list(
"id",
)
or [],
},
}
set_permissions_for_object(permissions=permissions, object=document)
if action.assign_custom_fields is not None:
for field in action.assign_custom_fields.all():
CustomFieldInstance.objects.create(
field=field,
document=document,
) # adds to document
document.save()
@before_task_publish.connect
def before_task_publish_handler(sender=None, headers=None, body=None, **kwargs):
"""

View File

@ -1,3 +1,4 @@
from datetime import timedelta
from pathlib import Path
from unittest import TestCase
from unittest import mock
@ -5,18 +6,21 @@ from unittest import mock
import pytest
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
from django.utils import timezone
from documents import tasks
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentSource
from documents.models import Correspondent
from documents.models import CustomField
from documents.models import Document
from documents.models import DocumentType
from documents.models import StoragePath
from documents.models import Tag
from documents.models import Workflow
from documents.models import WorkflowAction
from documents.models import WorkflowTrigger
from documents.signals import document_consumption_finished
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from paperless_mail.models import MailAccount
@ -567,32 +571,35 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertIn(expected_str, cm.output[1])
@mock.patch("documents.consumer.Consumer.try_consume_file")
def test_consumption_template_repeat_custom_fields(self, m):
def test_workflow_repeat_custom_fields(self, m):
"""
GIVEN:
- Existing consumption templates which assign the same custom field
- Existing workflows which assign the same custom field
WHEN:
- File that matches is consumed
THEN:
- Custom field is added the first time successfully
"""
ct = ConsumptionTemplate.objects.create(
name="Template 1",
order=0,
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
sources=f"{DocumentSource.ApiUpload},{DocumentSource.ConsumeFolder},{DocumentSource.MailFetch}",
filter_filename="*simple*",
)
ct.assign_custom_fields.add(self.cf1.pk)
ct.save()
action1 = WorkflowAction.objects.create()
action1.assign_custom_fields.add(self.cf1.pk)
action1.save()
ct2 = ConsumptionTemplate.objects.create(
name="Template 2",
order=1,
sources=f"{DocumentSource.ApiUpload},{DocumentSource.ConsumeFolder},{DocumentSource.MailFetch}",
filter_filename="*simple*",
action2 = WorkflowAction.objects.create()
action2.assign_custom_fields.add(self.cf1.pk)
action2.save()
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
ct2.assign_custom_fields.add(self.cf1.pk)
ct2.save()
w.triggers.add(trigger)
w.actions.add(action1, action2)
w.save()
test_file = self.SAMPLE_DIR / "simple.pdf"
@ -612,7 +619,55 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
[self.cf1.pk],
)
expected_str = f"Document matched template {ct}"
expected_str = f"Document matched {trigger} from {w}"
self.assertIn(expected_str, cm.output[0])
expected_str = f"Document matched template {ct2}"
self.assertIn(expected_str, cm.output[1])
def test_document_added_workflow(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
sources=f"{DocumentSource.ApiUpload},{DocumentSource.ConsumeFolder},{DocumentSource.MailFetch}",
filter_filename="*sample*",
)
action = WorkflowAction.objects.create(
assign_title="Doc created in {created_year}",
assign_correspondent=self.c2,
assign_document_type=self.dt,
assign_storage_path=self.sp,
assign_owner=self.user2,
)
action.assign_tags.add(self.t1)
action.assign_tags.add(self.t2)
action.assign_tags.add(self.t3)
action.assign_view_users.add(self.user3.pk)
action.assign_view_groups.add(self.group1.pk)
action.assign_change_users.add(self.user3.pk)
action.assign_change_groups.add(self.group1.pk)
action.assign_custom_fields.add(self.cf1.pk)
action.assign_custom_fields.add(self.cf2.pk)
action.save()
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
now = timezone.localtime(timezone.now())
created = now - timedelta(weeks=520)
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
added=now,
created=created,
)
document_consumption_finished.send(
sender=self.__class__,
document=doc,
)
self.assertEqual(doc.correspondent, self.c2)
self.assertEqual(doc.title, f"Doc created in {created.year}")