Re-works document to template matching so it can log a more detailed reason

This commit is contained in:
Trenton Holmes 2023-09-20 18:47:00 -07:00 committed by shamoon
parent 4f391e4711
commit 46c8f74316
4 changed files with 118 additions and 76 deletions

View File

@ -4,7 +4,6 @@ import os
import tempfile
import uuid
from enum import Enum
from fnmatch import fnmatch
from pathlib import Path
from subprocess import CompletedProcess
from subprocess import run
@ -23,6 +22,7 @@ from rest_framework.reverse import reverse
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.matching import document_matches_template
from documents.permissions import set_permissions_for_object
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
@ -606,28 +606,7 @@ class Consumer(LoggingMixin):
for template in ConsumptionTemplate.objects.all().order_by("order"):
template_overrides = DocumentMetadataOverrides()
if (
int(input_doc.source) in [int(x) for x in list(template.sources)]
and (
input_doc.mailrule_id is None
or input_doc.mailrule_id == template.filter_mailrule.pk
)
) and (
(
template.filter_filename is None
or len(template.filter_filename) == 0
or fnmatch(
input_doc.original_file.name.lower(),
template.filter_filename.lower(),
)
)
and (
template.filter_path is None
or len(template.filter_path) == 0
or input_doc.original_file.match(template.filter_path)
)
):
self.log.info(f"Document matched consumption template {template.name}")
if document_matches_template(input_doc, template):
if template.assign_title is not None:
template_overrides.title = template.assign_title
if template.assign_tags is not None:
@ -662,10 +641,7 @@ class Consumer(LoggingMixin):
template_overrides.change_groups = [
group.pk for group in template.assign_change_groups.all()
]
overrides = merge_overrides(
overridesA=overrides,
overridesB=template_overrides,
)
overrides.update(template_overrides)
return overrides
def _parse_title_placeholders(self, title: str) -> str:
@ -848,50 +824,3 @@ class Consumer(LoggingMixin):
self.log.warning("Script stderr:")
for line in stderr_str:
self.log.warning(line)
def merge_overrides(
overridesA: DocumentMetadataOverrides,
overridesB: DocumentMetadataOverrides,
) -> DocumentMetadataOverrides:
"""
Merges two DocumentMetadataOverrides objects such that object B's overrides
are only applied if the property is empty in object A or merged if multiple
are accepted
"""
# only if empty
if overridesA.title is None:
overridesA.title = overridesB.title
if overridesA.correspondent_id is None:
overridesA.correspondent_id = overridesB.correspondent_id
if overridesA.document_type_id is None:
overridesA.document_type_id = overridesB.document_type_id
if overridesA.storage_path_id is None:
overridesA.storage_path_id = overridesB.storage_path_id
if overridesA.owner_id is None:
overridesA.owner_id = overridesB.owner_id
# merge
if overridesA.tag_ids is None:
overridesA.tag_ids = overridesB.tag_ids
else:
overridesA.tag_ids = [*overridesA.tag_ids, *overridesB.tag_ids]
if overridesA.view_users is None:
overridesA.view_users = overridesB.view_users
else:
overridesA.view_users = [*overridesA.view_users, *overridesB.view_users]
if overridesA.view_groups is None:
overridesA.view_groups = overridesB.view_groups
else:
overridesA.view_groups = [*overridesA.view_groups, *overridesB.view_groups]
if overridesA.change_users is None:
overridesA.change_users = overridesB.change_users
else:
overridesA.change_users = [*overridesA.change_users, *overridesB.change_users]
if overridesA.change_groups is None:
overridesA.change_groups = overridesB.change_groups
else:
overridesA.change_groups = [
*overridesA.change_groups,
*overridesB.change_groups,
]
return overridesA

View File

@ -29,6 +29,51 @@ class DocumentMetadataOverrides:
change_users: Optional[list[int]] = None
change_groups: Optional[list[int]] = None
def update(self, other: "DocumentMetadataOverrides") -> "DocumentMetadataOverrides":
"""
Merges two DocumentMetadataOverrides objects such that object B's overrides
are only applied if the property is empty in object A or merged if multiple
are accepted.
The update is an in-place modification of self
"""
# only if empty
if self.title is None:
self.title = other.title
if self.correspondent_id is None:
self.correspondent_id = other.correspondent_id
if self.document_type_id is None:
self.document_type_id = other.document_type_id
if self.storage_path_id is None:
self.storage_path_id = other.storage_path_id
if self.owner_id is None:
self.owner_id = other.owner_id
# merge
if self.tag_ids is None:
self.tag_ids = other.tag_ids
else:
self.tag_ids = [*self.tag_ids, *other.tag_ids]
if self.view_users is None:
self.view_users = other.view_users
else:
self.view_users = [*self.view_users, *other.view_users]
if self.view_groups is None:
self.view_groups = other.view_groups
else:
self.view_groups = [*self.view_groups, *other.view_groups]
if self.change_users is None:
self.change_users = other.change_users
else:
self.change_users = [*self.change_users, *other.change_users]
if self.change_groups is None:
self.change_groups = other.change_groups
else:
self.change_groups = [
*self.change_groups,
*other.change_groups,
]
return self
class DocumentSource(IntEnum):
"""

View File

@ -1,7 +1,10 @@
import logging
import re
from fnmatch import fnmatch
from documents.classifier import DocumentClassifier
from documents.data_models import ConsumableDocument
from documents.models import ConsumptionTemplate
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
@ -231,3 +234,69 @@ def _split_match(matching_model):
re.escape(normspace(" ", (t[0] or t[1]).strip())).replace(r"\ ", r"\s+")
for t in findterms(matching_model.match)
]
def document_matches_template(
document: ConsumableDocument,
template: ConsumptionTemplate,
) -> bool:
"""
Returns True if the incoming document matches all filters and
settings from the template, False otherwise
"""
reason = None
# Document source vs template source
match = document.source in [int(x) for x in list(template.sources)]
# Document mail rule vs template mail rule
if match:
match = (
document.mailrule_id is None
or template.filter_mailrule is None
or document.mailrule_id == template.filter_mailrule.pk
)
else:
reason = f"Document source {document.source} not in {template.sources}"
# Document filename vs template filename
if match:
match = (
template.filter_filename is None
or len(template.filter_filename) == 0
or fnmatch(
document.original_file.name.lower(),
template.filter_filename.lower(),
)
)
else:
reason = (
f"Document mail rule {document.mailrule_id} "
f"!= {template.filter_mailrule.pk}"
)
# Document path vs template path
if match:
match = (
template.filter_path is None
or len(template.filter_path) == 0
or document.original_file.match(template.filter_path)
)
else:
reason = (
f"Document filename {document.original_file.name} "
f"does not match {template.filter_filename.lower()}"
)
if not match:
reason = (
f"Document path {document.original_file}"
f"does not match {template.filter_path}"
)
logger.info(
f"Document {'did' if match else 'did not'} match template {template.name}",
)
if not match:
logger.debug(reason)
return match

View File

@ -23,7 +23,6 @@ from documents.classifier import DocumentClassifier
from documents.classifier import load_classifier
from documents.consumer import Consumer
from documents.consumer import ConsumerError
from documents.consumer import merge_overrides
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.double_sided import collate
@ -158,7 +157,7 @@ def consume_file(
input_doc=input_doc,
)
overrides = merge_overrides(overridesA=overrides, overridesB=template_overrides)
overrides.update(template_overrides)
# continue with consumption if no barcode was found
document = Consumer().try_consume_file(