Adds new filtering to exclude attachments from processing

This commit is contained in:
Trenton H 2023-11-28 10:19:31 -08:00
parent 5a20c8e512
commit 72a13f41ec
9 changed files with 148 additions and 25 deletions

View File

@ -5769,7 +5769,7 @@ class TestApiConsumptionTemplates(DirectoriesMixin, APITestCase):
filter_to="someone@somewhere.com",
filter_subject="subject",
filter_body="body",
filter_attachment_filename="file.pdf",
filter_attachment_filename_include="file.pdf",
maximum_age=30,
action=MailRule.MailAction.MARK_READ,
assign_title_from=MailRule.TitleSource.FROM_SUBJECT,

View File

@ -54,7 +54,7 @@ class TestConsumptionTemplates(DirectoriesMixin, FileSystemAssertsMixin, TestCas
filter_to="someone@somewhere.com",
filter_subject="subject",
filter_body="body",
filter_attachment_filename="file.pdf",
filter_attachment_filename_include="file.pdf",
maximum_age=30,
action=MailRule.MailAction.MARK_READ,
assign_title_from=MailRule.TitleSource.NONE,

View File

@ -68,7 +68,8 @@ class MailRuleAdmin(GuardedModelAdmin):
"filter_to",
"filter_subject",
"filter_body",
"filter_attachment_filename",
"filter_attachment_filename_include",
"filter_attachment_filename_exclude",
"maximum_age",
"consumption_scope",
"attachment_type",

View File

@ -668,12 +668,29 @@ class MailAccountHandler(LoggingMixin):
)
continue
if rule.filter_attachment_filename and not fnmatch(
if rule.filter_attachment_filename_include and not fnmatch(
att.filename.lower(),
rule.filter_attachment_filename.lower(),
rule.filter_attachment_filename_include.lower(),
):
# Force the filename and pattern to the lowercase
# as this is system dependent otherwise
self.log.debug(
f"Rule {rule}: "
f"Skipping attachment {att.filename} "
f"does not match pattern {rule.filter_attachment_filename_include}",
)
continue
elif rule.filter_attachment_filename_exclude and fnmatch(
att.filename.lower(),
rule.filter_attachment_filename_exclude.lower(),
):
# Force the filename and pattern to the lowercase
# as this is system dependent otherwise
self.log.debug(
f"Rule {rule}: "
f"Skipping attachment {att.filename} "
f"does match pattern {rule.filter_attachment_filename_exclude}",
)
continue
correspondent = self._get_correspondent(message, rule)

View File

@ -0,0 +1,29 @@
# Generated by Django 4.2.7 on 2023-11-28 17:47
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0022_mailrule_assign_owner_from_rule_and_more"),
]
operations = [
migrations.RenameField(
model_name="mailrule",
old_name="filter_attachment_filename",
new_name="filter_attachment_filename_include",
),
migrations.AddField(
model_name="mailrule",
name="filter_attachment_filename_exclude",
field=models.CharField(
blank=True,
help_text="Do not consume documents which entirely match this filename if specified. Wildcards such as *.pdf or *invoice* are allowed. Case insensitive.",
max_length=256,
null=True,
verbose_name="filter attachment filename exclusive",
),
),
]

View File

@ -139,8 +139,8 @@ class MailRule(document_models.ModelWithOwner):
blank=True,
)
filter_attachment_filename = models.CharField(
_("filter attachment filename"),
filter_attachment_filename_include = models.CharField(
_("filter attachment filename inclusive"),
max_length=256,
null=True,
blank=True,
@ -151,6 +151,18 @@ class MailRule(document_models.ModelWithOwner):
),
)
filter_attachment_filename_exclude = models.CharField(
_("filter attachment filename exclusive"),
max_length=256,
null=True,
blank=True,
help_text=_(
"Do not consume documents which entirely match this "
"filename if specified. Wildcards such as *.pdf or "
"*invoice* are allowed. Case insensitive.",
),
)
maximum_age = models.PositiveIntegerField(
_("maximum age"),
default=30,

View File

@ -79,7 +79,8 @@ class MailRuleSerializer(OwnedObjectSerializer):
"filter_to",
"filter_subject",
"filter_body",
"filter_attachment_filename",
"filter_attachment_filename_include",
"filter_attachment_filename_exclude",
"maximum_age",
"action",
"action_parameter",

View File

@ -377,7 +377,7 @@ class TestAPIMailRules(DirectoriesMixin, APITestCase):
filter_to="someone@somewhere.com",
filter_subject="subject",
filter_body="body",
filter_attachment_filename="file.pdf",
filter_attachment_filename_include="file.pdf",
maximum_age=30,
action=MailRule.MailAction.MARK_READ,
assign_title_from=MailRule.TitleSource.FROM_SUBJECT,
@ -400,8 +400,8 @@ class TestAPIMailRules(DirectoriesMixin, APITestCase):
self.assertEqual(returned_rule1["filter_subject"], rule1.filter_subject)
self.assertEqual(returned_rule1["filter_body"], rule1.filter_body)
self.assertEqual(
returned_rule1["filter_attachment_filename"],
rule1.filter_attachment_filename,
returned_rule1["filter_attachment_filename_include"],
rule1.filter_attachment_filename_include,
)
self.assertEqual(returned_rule1["maximum_age"], rule1.maximum_age)
self.assertEqual(returned_rule1["action"], rule1.action)
@ -453,7 +453,7 @@ class TestAPIMailRules(DirectoriesMixin, APITestCase):
"filter_to": "aperson@aplace.com",
"filter_subject": "subject",
"filter_body": "body",
"filter_attachment_filename": "file.pdf",
"filter_attachment_filename_include": "file.pdf",
"maximum_age": 30,
"action": MailRule.MailAction.MARK_READ,
"assign_title_from": MailRule.TitleSource.FROM_SUBJECT,
@ -488,8 +488,8 @@ class TestAPIMailRules(DirectoriesMixin, APITestCase):
self.assertEqual(returned_rule1["filter_subject"], rule1["filter_subject"])
self.assertEqual(returned_rule1["filter_body"], rule1["filter_body"])
self.assertEqual(
returned_rule1["filter_attachment_filename"],
rule1["filter_attachment_filename"],
returned_rule1["filter_attachment_filename_include"],
rule1["filter_attachment_filename_include"],
)
self.assertEqual(returned_rule1["maximum_age"], rule1["maximum_age"])
self.assertEqual(returned_rule1["action"], rule1["action"])
@ -545,7 +545,7 @@ class TestAPIMailRules(DirectoriesMixin, APITestCase):
filter_from="from@example.com",
filter_subject="subject",
filter_body="body",
filter_attachment_filename="file.pdf",
filter_attachment_filename_include="file.pdf",
maximum_age=30,
action=MailRule.MailAction.MARK_READ,
assign_title_from=MailRule.TitleSource.FROM_SUBJECT,
@ -589,7 +589,7 @@ class TestAPIMailRules(DirectoriesMixin, APITestCase):
filter_from="from@example.com",
filter_subject="subject",
filter_body="body",
filter_attachment_filename="file.pdf",
filter_attachment_filename_include="file.pdf",
maximum_age=30,
action=MailRule.MailAction.MARK_READ,
assign_title_from=MailRule.TitleSource.FROM_SUBJECT,

View File

@ -526,6 +526,16 @@ class TestMail(
)
def test_filename_filter(self):
"""
GIVEN:
- Email with multiple similar named attachments
- Rule with inclusive and exclusive filters
WHEN:
- Mail action filtering is checked
THEN:
- Mail action should not be performed for files excluded
- Mail action should be performed for files included
"""
message = self.create_message(
attachments=[
_AttachmentDef(filename="f1.pdf"),
@ -537,15 +547,67 @@ class TestMail(
],
)
@dataclasses.dataclass(frozen=True)
class FilterTestCase:
name: str
include_pattern: Optional[str]
exclude_pattern: Optional[str]
expected_matches: list[str]
tests = [
("*.pdf", ["f1.pdf", "f2.pdf", "f3.pdf", "file.PDf", "f1.Pdf"]),
("f1.pdf", ["f1.pdf", "f1.Pdf"]),
("*", ["f1.pdf", "f2.pdf", "f3.pdf", "f2.png", "file.PDf", "f1.Pdf"]),
("*.png", ["f2.png"]),
FilterTestCase(
"PDF Wildcard",
include_pattern="*.pdf",
exclude_pattern=None,
expected_matches=["f1.pdf", "f2.pdf", "f3.pdf", "file.PDf", "f1.Pdf"],
),
FilterTestCase(
"F1 PDF Only",
include_pattern="f1.pdf",
exclude_pattern=None,
expected_matches=["f1.pdf", "f1.Pdf"],
),
FilterTestCase(
"All Files",
include_pattern="*",
exclude_pattern=None,
expected_matches=[
"f1.pdf",
"f2.pdf",
"f3.pdf",
"f2.png",
"file.PDf",
"f1.Pdf",
],
),
FilterTestCase(
"PNG Only",
include_pattern="*.png",
exclude_pattern=None,
expected_matches=["f2.png"],
),
FilterTestCase(
"PDF Files without f1",
include_pattern="*.pdf",
exclude_pattern="f1*",
expected_matches=["f2.pdf", "f3.pdf", "file.PDf"],
),
FilterTestCase(
"All Files, no PNG",
include_pattern="*",
exclude_pattern="*.png",
expected_matches=[
"f1.pdf",
"f2.pdf",
"f3.pdf",
"file.PDf",
"f1.Pdf",
],
),
]
for pattern, matches in tests:
with self.subTest(msg=pattern):
for test_case in tests:
with self.subTest(msg=test_case.name):
self._queue_consumption_tasks_mock.reset_mock()
account = MailAccount(name=str(uuid.uuid4()))
account.save()
@ -553,14 +615,15 @@ class TestMail(
name=str(uuid.uuid4()),
assign_title_from=MailRule.TitleSource.FROM_FILENAME,
account=account,
filter_attachment_filename=pattern,
filter_attachment_filename_include=test_case.include_pattern,
filter_attachment_filename_exclude=test_case.exclude_pattern,
)
rule.save()
self.mail_account_handler._handle_message(message, rule)
self.assert_queue_consumption_tasks_call_args(
[
[{"override_filename": m} for m in matches],
[{"override_filename": m} for m in test_case.expected_matches],
],
)
@ -593,7 +656,7 @@ class TestMail(
name=str(uuid.uuid4()),
assign_title_from=MailRule.TitleSource.FROM_FILENAME,
account=account,
filter_attachment_filename="*.pdf",
filter_attachment_filename_include="*.pdf",
attachment_type=MailRule.AttachmentProcessing.EVERYTHING,
action=MailRule.MailAction.DELETE,
)