Re-works document to template matching so it can log a more detailed reason

This commit is contained in:
Trenton Holmes 2023-09-20 18:47:00 -07:00 committed by shamoon
parent 4f391e4711
commit 46c8f74316
4 changed files with 118 additions and 76 deletions

View File

@ -4,7 +4,6 @@ import os
import tempfile import tempfile
import uuid import uuid
from enum import Enum from enum import Enum
from fnmatch import fnmatch
from pathlib import Path from pathlib import Path
from subprocess import CompletedProcess from subprocess import CompletedProcess
from subprocess import run from subprocess import run
@ -23,6 +22,7 @@ from rest_framework.reverse import reverse
from documents.data_models import ConsumableDocument from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides from documents.data_models import DocumentMetadataOverrides
from documents.matching import document_matches_template
from documents.permissions import set_permissions_for_object from documents.permissions import set_permissions_for_object
from documents.utils import copy_basic_file_stats from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats from documents.utils import copy_file_with_basic_stats
@ -606,28 +606,7 @@ class Consumer(LoggingMixin):
for template in ConsumptionTemplate.objects.all().order_by("order"): for template in ConsumptionTemplate.objects.all().order_by("order"):
template_overrides = DocumentMetadataOverrides() template_overrides = DocumentMetadataOverrides()
if ( if document_matches_template(input_doc, template):
int(input_doc.source) in [int(x) for x in list(template.sources)]
and (
input_doc.mailrule_id is None
or input_doc.mailrule_id == template.filter_mailrule.pk
)
) and (
(
template.filter_filename is None
or len(template.filter_filename) == 0
or fnmatch(
input_doc.original_file.name.lower(),
template.filter_filename.lower(),
)
)
and (
template.filter_path is None
or len(template.filter_path) == 0
or input_doc.original_file.match(template.filter_path)
)
):
self.log.info(f"Document matched consumption template {template.name}")
if template.assign_title is not None: if template.assign_title is not None:
template_overrides.title = template.assign_title template_overrides.title = template.assign_title
if template.assign_tags is not None: if template.assign_tags is not None:
@ -662,10 +641,7 @@ class Consumer(LoggingMixin):
template_overrides.change_groups = [ template_overrides.change_groups = [
group.pk for group in template.assign_change_groups.all() group.pk for group in template.assign_change_groups.all()
] ]
overrides = merge_overrides( overrides.update(template_overrides)
overridesA=overrides,
overridesB=template_overrides,
)
return overrides return overrides
def _parse_title_placeholders(self, title: str) -> str: def _parse_title_placeholders(self, title: str) -> str:
@ -848,50 +824,3 @@ class Consumer(LoggingMixin):
self.log.warning("Script stderr:") self.log.warning("Script stderr:")
for line in stderr_str: for line in stderr_str:
self.log.warning(line) self.log.warning(line)
def merge_overrides(
overridesA: DocumentMetadataOverrides,
overridesB: DocumentMetadataOverrides,
) -> DocumentMetadataOverrides:
"""
Merges two DocumentMetadataOverrides objects such that object B's overrides
are only applied if the property is empty in object A or merged if multiple
are accepted
"""
# only if empty
if overridesA.title is None:
overridesA.title = overridesB.title
if overridesA.correspondent_id is None:
overridesA.correspondent_id = overridesB.correspondent_id
if overridesA.document_type_id is None:
overridesA.document_type_id = overridesB.document_type_id
if overridesA.storage_path_id is None:
overridesA.storage_path_id = overridesB.storage_path_id
if overridesA.owner_id is None:
overridesA.owner_id = overridesB.owner_id
# merge
if overridesA.tag_ids is None:
overridesA.tag_ids = overridesB.tag_ids
else:
overridesA.tag_ids = [*overridesA.tag_ids, *overridesB.tag_ids]
if overridesA.view_users is None:
overridesA.view_users = overridesB.view_users
else:
overridesA.view_users = [*overridesA.view_users, *overridesB.view_users]
if overridesA.view_groups is None:
overridesA.view_groups = overridesB.view_groups
else:
overridesA.view_groups = [*overridesA.view_groups, *overridesB.view_groups]
if overridesA.change_users is None:
overridesA.change_users = overridesB.change_users
else:
overridesA.change_users = [*overridesA.change_users, *overridesB.change_users]
if overridesA.change_groups is None:
overridesA.change_groups = overridesB.change_groups
else:
overridesA.change_groups = [
*overridesA.change_groups,
*overridesB.change_groups,
]
return overridesA

View File

@ -29,6 +29,51 @@ class DocumentMetadataOverrides:
change_users: Optional[list[int]] = None change_users: Optional[list[int]] = None
change_groups: Optional[list[int]] = None change_groups: Optional[list[int]] = None
def update(self, other: "DocumentMetadataOverrides") -> "DocumentMetadataOverrides":
"""
Merges two DocumentMetadataOverrides objects such that object B's overrides
are only applied if the property is empty in object A or merged if multiple
are accepted.
The update is an in-place modification of self
"""
# only if empty
if self.title is None:
self.title = other.title
if self.correspondent_id is None:
self.correspondent_id = other.correspondent_id
if self.document_type_id is None:
self.document_type_id = other.document_type_id
if self.storage_path_id is None:
self.storage_path_id = other.storage_path_id
if self.owner_id is None:
self.owner_id = other.owner_id
# merge
if self.tag_ids is None:
self.tag_ids = other.tag_ids
else:
self.tag_ids = [*self.tag_ids, *other.tag_ids]
if self.view_users is None:
self.view_users = other.view_users
else:
self.view_users = [*self.view_users, *other.view_users]
if self.view_groups is None:
self.view_groups = other.view_groups
else:
self.view_groups = [*self.view_groups, *other.view_groups]
if self.change_users is None:
self.change_users = other.change_users
else:
self.change_users = [*self.change_users, *other.change_users]
if self.change_groups is None:
self.change_groups = other.change_groups
else:
self.change_groups = [
*self.change_groups,
*other.change_groups,
]
return self
class DocumentSource(IntEnum): class DocumentSource(IntEnum):
""" """

View File

@ -1,7 +1,10 @@
import logging import logging
import re import re
from fnmatch import fnmatch
from documents.classifier import DocumentClassifier from documents.classifier import DocumentClassifier
from documents.data_models import ConsumableDocument
from documents.models import ConsumptionTemplate
from documents.models import Correspondent from documents.models import Correspondent
from documents.models import Document from documents.models import Document
from documents.models import DocumentType from documents.models import DocumentType
@ -231,3 +234,69 @@ def _split_match(matching_model):
re.escape(normspace(" ", (t[0] or t[1]).strip())).replace(r"\ ", r"\s+") re.escape(normspace(" ", (t[0] or t[1]).strip())).replace(r"\ ", r"\s+")
for t in findterms(matching_model.match) for t in findterms(matching_model.match)
] ]
def document_matches_template(
document: ConsumableDocument,
template: ConsumptionTemplate,
) -> bool:
"""
Returns True if the incoming document matches all filters and
settings from the template, False otherwise
"""
reason = None
# Document source vs template source
match = document.source in [int(x) for x in list(template.sources)]
# Document mail rule vs template mail rule
if match:
match = (
document.mailrule_id is None
or template.filter_mailrule is None
or document.mailrule_id == template.filter_mailrule.pk
)
else:
reason = f"Document source {document.source} not in {template.sources}"
# Document filename vs template filename
if match:
match = (
template.filter_filename is None
or len(template.filter_filename) == 0
or fnmatch(
document.original_file.name.lower(),
template.filter_filename.lower(),
)
)
else:
reason = (
f"Document mail rule {document.mailrule_id} "
f"!= {template.filter_mailrule.pk}"
)
# Document path vs template path
if match:
match = (
template.filter_path is None
or len(template.filter_path) == 0
or document.original_file.match(template.filter_path)
)
else:
reason = (
f"Document filename {document.original_file.name} "
f"does not match {template.filter_filename.lower()}"
)
if not match:
reason = (
f"Document path {document.original_file}"
f"does not match {template.filter_path}"
)
logger.info(
f"Document {'did' if match else 'did not'} match template {template.name}",
)
if not match:
logger.debug(reason)
return match

View File

@ -23,7 +23,6 @@ from documents.classifier import DocumentClassifier
from documents.classifier import load_classifier from documents.classifier import load_classifier
from documents.consumer import Consumer from documents.consumer import Consumer
from documents.consumer import ConsumerError from documents.consumer import ConsumerError
from documents.consumer import merge_overrides
from documents.data_models import ConsumableDocument from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides from documents.data_models import DocumentMetadataOverrides
from documents.double_sided import collate from documents.double_sided import collate
@ -158,7 +157,7 @@ def consume_file(
input_doc=input_doc, input_doc=input_doc,
) )
overrides = merge_overrides(overridesA=overrides, overridesB=template_overrides) overrides.update(template_overrides)
# continue with consumption if no barcode was found # continue with consumption if no barcode was found
document = Consumer().try_consume_file( document = Consumer().try_consume_file(