Add content matching to workflow trigger
This commit is contained in:
parent
c67747ff9f
commit
0e716d0566
@ -320,7 +320,8 @@ Workflows allow you to filter by:
|
||||
- File path, including wildcards. Note that enabling `PAPERLESS_CONSUMER_RECURSIVE` would allow, for
|
||||
example, automatically assigning documents to different owners based on the upload directory.
|
||||
- Mail rule. Choosing this option will force 'mail fetch' to be the workflow source.
|
||||
- Tags (`Added` and `Updated` triggers only). Will filter for documents with any of the specified tags
|
||||
- Content matching (`Added` and `Updated` triggers only). Filter document content using the matching settings.
|
||||
- Tags (`Added` and `Updated` triggers only). Filter for documents with any of the specified tags
|
||||
- Document type (`Added` and `Updated` triggers only). Filter documents with this doc type
|
||||
- Correspondent (`Added` and `Updated` triggers only). Filter documents with this correspondent
|
||||
|
||||
|
@ -181,6 +181,15 @@
|
||||
<pngx-input-text i18n-title title="Filter path" formControlName="filter_path" i18n-hint hint="Apply to documents that match this path. Wildcards specified as * are allowed. Case insensitive.</a>" [error]="error?.filter_path"></pngx-input-text>
|
||||
<pngx-input-select i18n-title title="Filter mail rule" [items]="mailRules" [allowNull]="true" formControlName="filter_mailrule" i18n-hint hint="Apply to documents consumed via this mail rule." [error]="error?.filter_mailrule"></pngx-input-select>
|
||||
}
|
||||
@if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated) {
|
||||
<pngx-input-select i18n-title title="Content matching algorithm" [items]="getMatchingAlgorithms()" formControlName="matching_algorithm"></pngx-input-select>
|
||||
@if (patternRequired) {
|
||||
<pngx-input-text i18n-title title="Content matching pattern" formControlName="match" [error]="error?.match"></pngx-input-text>
|
||||
}
|
||||
@if (patternRequired) {
|
||||
<pngx-input-check i18n-title title="Case insensitive" formControlName="is_insensitive"></pngx-input-check>
|
||||
}
|
||||
}
|
||||
</div>
|
||||
@if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated) {
|
||||
<div class="col-md-6">
|
||||
|
@ -37,6 +37,7 @@ import {
|
||||
WorkflowAction,
|
||||
WorkflowActionType,
|
||||
} from 'src/app/data/workflow-action'
|
||||
import { MATCHING_ALGORITHMS, MATCH_AUTO } from 'src/app/data/matching-model'
|
||||
|
||||
const workflow: Workflow = {
|
||||
name: 'Workflow 1',
|
||||
@ -216,4 +217,10 @@ describe('ConsumptionTemplateEditDialogComponent', () => {
|
||||
expect(action1.id).toBeNull()
|
||||
expect(action2.id).toBeNull()
|
||||
})
|
||||
|
||||
it('should not include auto matching in algorithms', () => {
|
||||
expect(component.getMatchingAlgorithms()).not.toContain(
|
||||
MATCHING_ALGORITHMS.find((a) => a.id === MATCH_AUTO)
|
||||
)
|
||||
})
|
||||
})
|
||||
|
@ -26,6 +26,11 @@ import {
|
||||
WorkflowActionType,
|
||||
} from 'src/app/data/workflow-action'
|
||||
import { CdkDragDrop, moveItemInArray } from '@angular/cdk/drag-drop'
|
||||
import {
|
||||
MATCHING_ALGORITHMS,
|
||||
MATCH_AUTO,
|
||||
MATCH_NONE,
|
||||
} from 'src/app/data/matching-model'
|
||||
|
||||
export const DOCUMENT_SOURCE_OPTIONS = [
|
||||
{
|
||||
@ -64,6 +69,10 @@ export const WORKFLOW_ACTION_OPTIONS = [
|
||||
},
|
||||
]
|
||||
|
||||
const TRIGGER_MATCHING_ALGORITHMS = MATCHING_ALGORITHMS.filter(
|
||||
(a) => a.id !== MATCH_AUTO
|
||||
)
|
||||
|
||||
@Component({
|
||||
selector: 'pngx-workflow-edit-dialog',
|
||||
templateUrl: './workflow-edit-dialog.component.html',
|
||||
@ -141,6 +150,11 @@ export class WorkflowEditDialogComponent
|
||||
})
|
||||
}
|
||||
|
||||
getMatchingAlgorithms() {
|
||||
// No auto matching
|
||||
return TRIGGER_MATCHING_ALGORITHMS
|
||||
}
|
||||
|
||||
ngOnInit(): void {
|
||||
super.ngOnInit()
|
||||
this.updateTriggerActionFields()
|
||||
@ -165,6 +179,9 @@ export class WorkflowEditDialogComponent
|
||||
filter_filename: new FormControl(trigger.filter_filename),
|
||||
filter_path: new FormControl(trigger.filter_path),
|
||||
filter_mailrule: new FormControl(trigger.filter_mailrule),
|
||||
matching_algorithm: new FormControl(MATCH_NONE),
|
||||
match: new FormControl(''),
|
||||
is_insensitive: new FormControl(true),
|
||||
filter_has_tags: new FormControl(trigger.filter_has_tags),
|
||||
filter_has_correspondent: new FormControl(
|
||||
trigger.filter_has_correspondent
|
||||
|
@ -23,6 +23,12 @@ export interface WorkflowTrigger extends ObjectWithId {
|
||||
|
||||
filter_mailrule?: number // MailRule.id
|
||||
|
||||
match?: string
|
||||
|
||||
matching_algorithm?: number
|
||||
|
||||
is_insensitive?: boolean
|
||||
|
||||
filter_has_tags?: number[] // Tag.id[]
|
||||
|
||||
filter_has_correspondent?: number // Correspondent.id
|
||||
|
@ -21,9 +21,13 @@ logger = logging.getLogger("paperless.matching")
|
||||
|
||||
def log_reason(matching_model: MatchingModel, document: Document, reason: str):
|
||||
class_name = type(matching_model).__name__
|
||||
name = (
|
||||
matching_model.name
|
||||
if hasattr(matching_model, "name")
|
||||
else matching_model.__str__()
|
||||
)
|
||||
logger.debug(
|
||||
f"{class_name} {matching_model.name} matched on document "
|
||||
f"{document} because {reason}",
|
||||
f"{class_name} {name} matched on document {document} because {reason}",
|
||||
)
|
||||
|
||||
|
||||
@ -318,6 +322,15 @@ def document_matches_workflow(
|
||||
):
|
||||
# document is type Document
|
||||
|
||||
if (
|
||||
trigger.matching_algorithm > MatchingModel.MATCH_NONE
|
||||
and not matches(trigger, document)
|
||||
):
|
||||
log_match_failure(
|
||||
f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match",
|
||||
)
|
||||
trigger_matched = False
|
||||
|
||||
# Document has_tags vs document tags
|
||||
if (
|
||||
trigger.filter_has_tags.all().count() > 0
|
||||
|
@ -407,6 +407,29 @@ class Migration(migrations.Migration):
|
||||
verbose_name="filter documents from this mail rule",
|
||||
),
|
||||
),
|
||||
(
|
||||
"matching_algorithm",
|
||||
models.PositiveIntegerField(
|
||||
choices=[
|
||||
(0, "None"),
|
||||
(1, "Any word"),
|
||||
(2, "All words"),
|
||||
(3, "Exact match"),
|
||||
(4, "Regular expression"),
|
||||
(5, "Fuzzy word"),
|
||||
],
|
||||
default=0,
|
||||
verbose_name="matching algorithm",
|
||||
),
|
||||
),
|
||||
(
|
||||
"match",
|
||||
models.CharField(blank=True, max_length=256, verbose_name="match"),
|
||||
),
|
||||
(
|
||||
"is_insensitive",
|
||||
models.BooleanField(default=True, verbose_name="is insensitive"),
|
||||
),
|
||||
(
|
||||
"filter_has_tags",
|
||||
models.ManyToManyField(
|
||||
|
@ -889,6 +889,15 @@ if settings.AUDIT_LOG_ENABLED:
|
||||
|
||||
|
||||
class WorkflowTrigger(models.Model):
|
||||
class WorkflowTriggerMatching(models.IntegerChoices):
|
||||
# No auto matching
|
||||
NONE = MatchingModel.MATCH_NONE, _("None")
|
||||
ANY = MatchingModel.MATCH_ANY, _("Any word")
|
||||
ALL = MatchingModel.MATCH_ALL, _("All words")
|
||||
LITERAL = MatchingModel.MATCH_LITERAL, _("Exact match")
|
||||
REGEX = MatchingModel.MATCH_REGEX, _("Regular expression")
|
||||
FUZZY = MatchingModel.MATCH_FUZZY, _("Fuzzy word")
|
||||
|
||||
class WorkflowTriggerType(models.IntegerChoices):
|
||||
CONSUMPTION = 1, _("Consumption")
|
||||
DOCUMENT_ADDED = 2, _("Document Added")
|
||||
@ -943,6 +952,16 @@ class WorkflowTrigger(models.Model):
|
||||
verbose_name=_("filter documents from this mail rule"),
|
||||
)
|
||||
|
||||
match = models.CharField(_("match"), max_length=256, blank=True)
|
||||
|
||||
matching_algorithm = models.PositiveIntegerField(
|
||||
_("matching algorithm"),
|
||||
choices=WorkflowTriggerMatching.choices,
|
||||
default=WorkflowTriggerMatching.NONE,
|
||||
)
|
||||
|
||||
is_insensitive = models.BooleanField(_("is insensitive"), default=True)
|
||||
|
||||
filter_has_tags = models.ManyToManyField(
|
||||
Tag,
|
||||
blank=True,
|
||||
|
@ -1302,6 +1302,9 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer):
|
||||
"filter_path",
|
||||
"filter_filename",
|
||||
"filter_mailrule",
|
||||
"matching_algorithm",
|
||||
"match",
|
||||
"is_insensitive",
|
||||
"filter_has_tags",
|
||||
"filter_has_correspondent",
|
||||
"filter_has_document_type",
|
||||
|
@ -16,6 +16,7 @@ from documents.models import Correspondent
|
||||
from documents.models import CustomField
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import Workflow
|
||||
@ -742,6 +743,81 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
||||
expected_str = f"Document filename {doc.original_filename} does not match"
|
||||
self.assertIn(expected_str, cm.output[1])
|
||||
|
||||
def test_document_added_match_content_matching(self):
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
|
||||
matching_algorithm=MatchingModel.MATCH_LITERAL,
|
||||
match="foo",
|
||||
is_insensitive=True,
|
||||
)
|
||||
action = WorkflowAction.objects.create(
|
||||
assign_title="Doc content matching worked",
|
||||
assign_owner=self.user2,
|
||||
)
|
||||
w = Workflow.objects.create(
|
||||
name="Workflow 1",
|
||||
order=0,
|
||||
)
|
||||
w.triggers.add(trigger)
|
||||
w.actions.add(action)
|
||||
w.save()
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="sample test",
|
||||
correspondent=self.c,
|
||||
original_filename="sample.pdf",
|
||||
content="Hello world foo bar",
|
||||
)
|
||||
|
||||
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=doc,
|
||||
)
|
||||
expected_str = f"WorkflowTrigger {trigger} matched on document"
|
||||
expected_str2 = 'because it contains this string: "foo"'
|
||||
self.assertIn(expected_str, cm.output[0])
|
||||
self.assertIn(expected_str2, cm.output[0])
|
||||
expected_str = f"Document matched {trigger} from {w}"
|
||||
self.assertIn(expected_str, cm.output[1])
|
||||
|
||||
def test_document_added_no_match_content_matching(self):
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
|
||||
matching_algorithm=MatchingModel.MATCH_LITERAL,
|
||||
match="foo",
|
||||
is_insensitive=True,
|
||||
)
|
||||
action = WorkflowAction.objects.create(
|
||||
assign_title="Doc content matching worked",
|
||||
assign_owner=self.user2,
|
||||
)
|
||||
action.save()
|
||||
w = Workflow.objects.create(
|
||||
name="Workflow 1",
|
||||
order=0,
|
||||
)
|
||||
w.triggers.add(trigger)
|
||||
w.actions.add(action)
|
||||
w.save()
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="sample test",
|
||||
correspondent=self.c,
|
||||
original_filename="sample.pdf",
|
||||
content="Hello world bar",
|
||||
)
|
||||
|
||||
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=doc,
|
||||
)
|
||||
expected_str = f"Document did not match {w}"
|
||||
self.assertIn(expected_str, cm.output[0])
|
||||
expected_str = f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match"
|
||||
self.assertIn(expected_str, cm.output[1])
|
||||
|
||||
def test_document_added_no_match_tags(self):
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
|
||||
@ -751,7 +827,6 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
||||
assign_title="Doc assign owner",
|
||||
assign_owner=self.user2,
|
||||
)
|
||||
action.save()
|
||||
w = Workflow.objects.create(
|
||||
name="Workflow 1",
|
||||
order=0,
|
||||
|
Loading…
x
Reference in New Issue
Block a user