From 46c8f743166ad8d68b2bd158882b57a604c13d9b Mon Sep 17 00:00:00 2001 From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Wed, 20 Sep 2023 18:47:00 -0700 Subject: [PATCH] Re-works document to template matching so it can log a more detailed reason --- src/documents/consumer.py | 77 ++---------------------------------- src/documents/data_models.py | 45 +++++++++++++++++++++ src/documents/matching.py | 69 ++++++++++++++++++++++++++++++++ src/documents/tasks.py | 3 +- 4 files changed, 118 insertions(+), 76 deletions(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index c7ac7d813..c3ef86ef3 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -4,7 +4,6 @@ import os import tempfile import uuid from enum import Enum -from fnmatch import fnmatch from pathlib import Path from subprocess import CompletedProcess from subprocess import run @@ -23,6 +22,7 @@ from rest_framework.reverse import reverse from documents.data_models import ConsumableDocument from documents.data_models import DocumentMetadataOverrides +from documents.matching import document_matches_template from documents.permissions import set_permissions_for_object from documents.utils import copy_basic_file_stats from documents.utils import copy_file_with_basic_stats @@ -606,28 +606,7 @@ class Consumer(LoggingMixin): for template in ConsumptionTemplate.objects.all().order_by("order"): template_overrides = DocumentMetadataOverrides() - if ( - int(input_doc.source) in [int(x) for x in list(template.sources)] - and ( - input_doc.mailrule_id is None - or input_doc.mailrule_id == template.filter_mailrule.pk - ) - ) and ( - ( - template.filter_filename is None - or len(template.filter_filename) == 0 - or fnmatch( - input_doc.original_file.name.lower(), - template.filter_filename.lower(), - ) - ) - and ( - template.filter_path is None - or len(template.filter_path) == 0 - or input_doc.original_file.match(template.filter_path) - ) - ): - self.log.info(f"Document matched consumption template {template.name}") + if document_matches_template(input_doc, template): if template.assign_title is not None: template_overrides.title = template.assign_title if template.assign_tags is not None: @@ -662,10 +641,7 @@ class Consumer(LoggingMixin): template_overrides.change_groups = [ group.pk for group in template.assign_change_groups.all() ] - overrides = merge_overrides( - overridesA=overrides, - overridesB=template_overrides, - ) + overrides.update(template_overrides) return overrides def _parse_title_placeholders(self, title: str) -> str: @@ -848,50 +824,3 @@ class Consumer(LoggingMixin): self.log.warning("Script stderr:") for line in stderr_str: self.log.warning(line) - - -def merge_overrides( - overridesA: DocumentMetadataOverrides, - overridesB: DocumentMetadataOverrides, -) -> DocumentMetadataOverrides: - """ - Merges two DocumentMetadataOverrides objects such that object B's overrides - are only applied if the property is empty in object A or merged if multiple - are accepted - """ - # only if empty - if overridesA.title is None: - overridesA.title = overridesB.title - if overridesA.correspondent_id is None: - overridesA.correspondent_id = overridesB.correspondent_id - if overridesA.document_type_id is None: - overridesA.document_type_id = overridesB.document_type_id - if overridesA.storage_path_id is None: - overridesA.storage_path_id = overridesB.storage_path_id - if overridesA.owner_id is None: - overridesA.owner_id = overridesB.owner_id - # merge - if overridesA.tag_ids is None: - overridesA.tag_ids = overridesB.tag_ids - else: - overridesA.tag_ids = [*overridesA.tag_ids, *overridesB.tag_ids] - if overridesA.view_users is None: - overridesA.view_users = overridesB.view_users - else: - overridesA.view_users = [*overridesA.view_users, *overridesB.view_users] - if overridesA.view_groups is None: - overridesA.view_groups = overridesB.view_groups - else: - overridesA.view_groups = [*overridesA.view_groups, *overridesB.view_groups] - if overridesA.change_users is None: - overridesA.change_users = overridesB.change_users - else: - overridesA.change_users = [*overridesA.change_users, *overridesB.change_users] - if overridesA.change_groups is None: - overridesA.change_groups = overridesB.change_groups - else: - overridesA.change_groups = [ - *overridesA.change_groups, - *overridesB.change_groups, - ] - return overridesA diff --git a/src/documents/data_models.py b/src/documents/data_models.py index 99cf92457..b768cc814 100644 --- a/src/documents/data_models.py +++ b/src/documents/data_models.py @@ -29,6 +29,51 @@ class DocumentMetadataOverrides: change_users: Optional[list[int]] = None change_groups: Optional[list[int]] = None + def update(self, other: "DocumentMetadataOverrides") -> "DocumentMetadataOverrides": + """ + Merges two DocumentMetadataOverrides objects such that object B's overrides + are only applied if the property is empty in object A or merged if multiple + are accepted. + + The update is an in-place modification of self + """ + # only if empty + if self.title is None: + self.title = other.title + if self.correspondent_id is None: + self.correspondent_id = other.correspondent_id + if self.document_type_id is None: + self.document_type_id = other.document_type_id + if self.storage_path_id is None: + self.storage_path_id = other.storage_path_id + if self.owner_id is None: + self.owner_id = other.owner_id + # merge + if self.tag_ids is None: + self.tag_ids = other.tag_ids + else: + self.tag_ids = [*self.tag_ids, *other.tag_ids] + if self.view_users is None: + self.view_users = other.view_users + else: + self.view_users = [*self.view_users, *other.view_users] + if self.view_groups is None: + self.view_groups = other.view_groups + else: + self.view_groups = [*self.view_groups, *other.view_groups] + if self.change_users is None: + self.change_users = other.change_users + else: + self.change_users = [*self.change_users, *other.change_users] + if self.change_groups is None: + self.change_groups = other.change_groups + else: + self.change_groups = [ + *self.change_groups, + *other.change_groups, + ] + return self + class DocumentSource(IntEnum): """ diff --git a/src/documents/matching.py b/src/documents/matching.py index eb0f4f8b5..072de801b 100644 --- a/src/documents/matching.py +++ b/src/documents/matching.py @@ -1,7 +1,10 @@ import logging import re +from fnmatch import fnmatch from documents.classifier import DocumentClassifier +from documents.data_models import ConsumableDocument +from documents.models import ConsumptionTemplate from documents.models import Correspondent from documents.models import Document from documents.models import DocumentType @@ -231,3 +234,69 @@ def _split_match(matching_model): re.escape(normspace(" ", (t[0] or t[1]).strip())).replace(r"\ ", r"\s+") for t in findterms(matching_model.match) ] + + +def document_matches_template( + document: ConsumableDocument, + template: ConsumptionTemplate, +) -> bool: + """ + Returns True if the incoming document matches all filters and + settings from the template, False otherwise + """ + reason = None + # Document source vs template source + match = document.source in [int(x) for x in list(template.sources)] + + # Document mail rule vs template mail rule + if match: + match = ( + document.mailrule_id is None + or template.filter_mailrule is None + or document.mailrule_id == template.filter_mailrule.pk + ) + else: + reason = f"Document source {document.source} not in {template.sources}" + + # Document filename vs template filename + if match: + match = ( + template.filter_filename is None + or len(template.filter_filename) == 0 + or fnmatch( + document.original_file.name.lower(), + template.filter_filename.lower(), + ) + ) + else: + reason = ( + f"Document mail rule {document.mailrule_id} " + f"!= {template.filter_mailrule.pk}" + ) + + # Document path vs template path + if match: + match = ( + template.filter_path is None + or len(template.filter_path) == 0 + or document.original_file.match(template.filter_path) + ) + else: + reason = ( + f"Document filename {document.original_file.name} " + f"does not match {template.filter_filename.lower()}" + ) + + if not match: + reason = ( + f"Document path {document.original_file}" + f"does not match {template.filter_path}" + ) + + logger.info( + f"Document {'did' if match else 'did not'} match template {template.name}", + ) + if not match: + logger.debug(reason) + + return match diff --git a/src/documents/tasks.py b/src/documents/tasks.py index b21d608b8..8aea56eaa 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -23,7 +23,6 @@ from documents.classifier import DocumentClassifier from documents.classifier import load_classifier from documents.consumer import Consumer from documents.consumer import ConsumerError -from documents.consumer import merge_overrides from documents.data_models import ConsumableDocument from documents.data_models import DocumentMetadataOverrides from documents.double_sided import collate @@ -158,7 +157,7 @@ def consume_file( input_doc=input_doc, ) - overrides = merge_overrides(overridesA=overrides, overridesB=template_overrides) + overrides.update(template_overrides) # continue with consumption if no barcode was found document = Consumer().try_consume_file(