Compare commits

...

8 Commits

Author SHA1 Message Date
shamoon
7b75333819
custom field regex matching
[ci skip]
2025-03-20 23:17:57 -07:00
shamoon
71fdc2a36d
limit the frontend algo select too 2025-03-20 23:17:44 -07:00
shamoon
dbe58672ed
limit matching options in serializer
[ci skip]
2025-03-20 21:54:05 -07:00
shamoon
8a907c2868
Fix some merge stuff
[ci skip]
2025-03-20 21:54:05 -07:00
shamoon
6dc6c6c7bb
Add to handler, matching, retagger 2025-03-20 21:54:05 -07:00
shamoon
a632b6b711
Add custom fields to classifier 2025-03-20 16:21:34 -07:00
shamoon
b8c618abbe
Make frontend list a generic management list 2025-03-20 16:21:14 -07:00
shamoon
7a46806643
Migrate to matching model 2025-03-20 16:20:16 -07:00
21 changed files with 467 additions and 256 deletions

View File

@ -372,17 +372,19 @@ currently-imported docs. This problem is common enough that there are
tools for it.
```
document_retagger [-h] [-c] [-T] [-t] [-i] [--id-range] [--use-first] [-f]
document_retagger [-h] [-c] [-T] [-t] [-cf] [-i] [--id-range] [--use-first] [-f] [--suggest]
optional arguments:
-c, --correspondent
-T, --tags
-t, --document_type
-s, --storage_path
-cf, --custom_fields
-i, --inbox-only
--id-range
--use-first
-f, --overwrite
--suggest
```
Run this after changing or adding matching rules. It'll loop over all
@ -408,6 +410,8 @@ to override this behavior and just use the first correspondent or type
it finds. This option does not apply to tags, since any amount of tags
can be applied to a document.
If you want to suggest changes but not apply them, specify `--suggest`.
Finally, `-f` specifies that you wish to overwrite already assigned
correspondents, types and/or tags. The default behavior is to not assign
correspondents and types to documents that have this data already

View File

@ -12,7 +12,7 @@ import { DocumentAsnComponent } from './components/document-asn/document-asn.com
import { DocumentDetailComponent } from './components/document-detail/document-detail.component'
import { DocumentListComponent } from './components/document-list/document-list.component'
import { CorrespondentListComponent } from './components/manage/correspondent-list/correspondent-list.component'
import { CustomFieldsComponent } from './components/manage/custom-fields/custom-fields.component'
import { CustomFieldsListComponent } from './components/manage/custom-fields-list/custom-fields-list.component'
import { DocumentTypeListComponent } from './components/manage/document-type-list/document-type-list.component'
import { MailComponent } from './components/manage/mail/mail.component'
import { SavedViewsComponent } from './components/manage/saved-views/saved-views.component'
@ -239,7 +239,7 @@ export const routes: Routes = [
},
{
path: 'customfields',
component: CustomFieldsComponent,
component: CustomFieldsListComponent,
canActivate: [PermissionsGuard],
data: {
requiredPermission: {

View File

@ -11,7 +11,7 @@
<pngx-input-text i18n-title title="Name" formControlName="name" [error]="error?.name" autocomplete="off"></pngx-input-text>
<pngx-input-select i18n-title title="Data type" [items]="getDataTypes()" formControlName="data_type"></pngx-input-select>
@if (typeFieldDisabled) {
<small class="d-block mt-n2" i18n>Data type cannot be changed after a field is created</small>
<small class="d-block mt-n2 fst-italic text-muted" i18n>Data type cannot be changed after a field is created</small>
}
<div [formGroup]="objectForm.controls.extra_data">
@switch (objectForm.get('data_type').value) {
@ -39,6 +39,14 @@
}
}
</div>
<hr/>
<pngx-input-select i18n-title title="Matching algorithm" [items]="getMatchingAlgorithms()" formControlName="matching_algorithm"></pngx-input-select>
@if (patternRequired) {
<pngx-input-text i18n-title title="Matching pattern" formControlName="match" [error]="error?.match"></pngx-input-text>
}
@if (patternRequired) {
<pngx-input-check i18n-title title="Case insensitive" formControlName="is_insensitive" novalidate></pngx-input-check>
}
</div>
<div class="modal-footer">
<button type="button" class="btn btn-outline-secondary" (click)="cancel()" i18n [disabled]="networkActive">Cancel</button>

View File

@ -21,6 +21,7 @@ import {
CustomFieldDataType,
DATA_TYPE_LABELS,
} from 'src/app/data/custom-field'
import { MATCH_NONE, MATCHING_ALGORITHMS } from 'src/app/data/matching-model'
import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
import { UserService } from 'src/app/services/rest/user.service'
import { SettingsService } from 'src/app/services/settings.service'
@ -28,6 +29,27 @@ import { SelectComponent } from '../../input/select/select.component'
import { TextComponent } from '../../input/text/text.component'
import { EditDialogComponent, EditDialogMode } from '../edit-dialog.component'
const FIELDS_WITH_DISCRETE_MATCHING = [
CustomFieldDataType.Boolean,
CustomFieldDataType.Select,
]
const MATCHING_ALGORITHMS_FOR_ALL_FIELDS = [
// MATCH_NONE
MATCHING_ALGORITHMS[6],
// MATCH_REGEX
MATCHING_ALGORITHMS[4],
]
const MATCHING_ALGORITHMS_FOR_DISCRETE_FIELDS = [
// MATCH_NONE
MATCHING_ALGORITHMS[6],
// MATCH_AUTO
MATCHING_ALGORITHMS[0],
// MATCH_REGEX
MATCHING_ALGORITHMS[4],
]
@Component({
selector: 'pngx-custom-field-edit-dialog',
templateUrl: './custom-field-edit-dialog.component.html',
@ -107,6 +129,9 @@ export class CustomFieldEditDialogComponent
select_options: new FormArray([]),
default_currency: new FormControl(null),
}),
matching_algorithm: new FormControl(MATCH_NONE),
match: new FormControl(''),
is_insensitive: new FormControl(true),
})
}
@ -127,4 +152,15 @@ export class CustomFieldEditDialogComponent
public removeSelectOption(index: number) {
this.selectOptions.removeAt(index)
}
public getMatchingAlgorithms() {
if (
FIELDS_WITH_DISCRETE_MATCHING.includes(this.getForm().value.data_type) ||
FIELDS_WITH_DISCRETE_MATCHING.includes(this.object?.data_type)
) {
return MATCHING_ALGORITHMS_FOR_DISCRETE_FIELDS
} else {
return MATCHING_ALGORITHMS_FOR_ALL_FIELDS
}
}
}

View File

@ -28,7 +28,7 @@ import { ToastService } from 'src/app/services/toast.service'
import { ConfirmDialogComponent } from '../../common/confirm-dialog/confirm-dialog.component'
import { CustomFieldEditDialogComponent } from '../../common/edit-dialog/custom-field-edit-dialog/custom-field-edit-dialog.component'
import { PageHeaderComponent } from '../../common/page-header/page-header.component'
import { CustomFieldsComponent } from './custom-fields.component'
import { CustomFieldsListComponent } from './custom-fields-list.component'
const fields: CustomField[] = [
{
@ -43,9 +43,9 @@ const fields: CustomField[] = [
},
]
describe('CustomFieldsComponent', () => {
let component: CustomFieldsComponent
let fixture: ComponentFixture<CustomFieldsComponent>
describe('CustomFieldsListComponent', () => {
let component: CustomFieldsListComponent
let fixture: ComponentFixture<CustomFieldsListComponent>
let customFieldsService: CustomFieldsService
let modalService: NgbModal
let toastService: ToastService
@ -61,7 +61,7 @@ describe('CustomFieldsComponent', () => {
NgbModalModule,
NgbPopoverModule,
NgxBootstrapIconsModule.pick(allIcons),
CustomFieldsComponent,
CustomFieldsListComponent,
IfPermissionsDirective,
PageHeaderComponent,
ConfirmDialogComponent,
@ -94,7 +94,7 @@ describe('CustomFieldsComponent', () => {
settingsService = TestBed.inject(SettingsService)
settingsService.currentUser = { id: 0, username: 'test' }
fixture = TestBed.createComponent(CustomFieldsComponent)
fixture = TestBed.createComponent(CustomFieldsListComponent)
component = fixture.componentInstance
fixture.detectChanges()
jest.useFakeTimers()
@ -106,7 +106,7 @@ describe('CustomFieldsComponent', () => {
modalService.activeInstances.subscribe((m) => (modal = m[m.length - 1]))
const toastErrorSpy = jest.spyOn(toastService, 'showError')
const toastInfoSpy = jest.spyOn(toastService, 'showInfo')
const reloadSpy = jest.spyOn(component, 'reload')
const reloadSpy = jest.spyOn(component, 'reloadData')
const createButton = fixture.debugElement.queryAll(By.css('button'))[1]
createButton.triggerEventHandler('click')
@ -131,7 +131,7 @@ describe('CustomFieldsComponent', () => {
modalService.activeInstances.subscribe((m) => (modal = m[m.length - 1]))
const toastErrorSpy = jest.spyOn(toastService, 'showError')
const toastInfoSpy = jest.spyOn(toastService, 'showInfo')
const reloadSpy = jest.spyOn(component, 'reload')
const reloadSpy = jest.spyOn(component, 'reloadData')
const editButton = fixture.debugElement.queryAll(By.css('button'))[2]
editButton.triggerEventHandler('click')
@ -156,7 +156,7 @@ describe('CustomFieldsComponent', () => {
modalService.activeInstances.subscribe((m) => (modal = m[m.length - 1]))
const toastErrorSpy = jest.spyOn(toastService, 'showError')
const deleteSpy = jest.spyOn(customFieldsService, 'delete')
const reloadSpy = jest.spyOn(component, 'reload')
const reloadSpy = jest.spyOn(component, 'reloadData')
const deleteButton = fixture.debugElement.queryAll(By.css('button'))[5]
deleteButton.triggerEventHandler('click')

View File

@ -0,0 +1,96 @@
import { NgClass, TitleCasePipe } from '@angular/common'
import { Component } from '@angular/core'
import { FormsModule, ReactiveFormsModule } from '@angular/forms'
import {
NgbDropdownModule,
NgbModal,
NgbPaginationModule,
} from '@ng-bootstrap/ng-bootstrap'
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { CustomField, DATA_TYPE_LABELS } from 'src/app/data/custom-field'
import {
CustomFieldQueryLogicalOperator,
CustomFieldQueryOperator,
} from 'src/app/data/custom-field-query'
import { FILTER_CUSTOM_FIELDS_QUERY } from 'src/app/data/filter-rule-type'
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
import { SortableDirective } from 'src/app/directives/sortable.directive'
import { SafeHtmlPipe } from 'src/app/pipes/safehtml.pipe'
import { DocumentListViewService } from 'src/app/services/document-list-view.service'
import {
PermissionsService,
PermissionType,
} from 'src/app/services/permissions.service'
import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
import { ToastService } from 'src/app/services/toast.service'
import { CustomFieldEditDialogComponent } from '../../common/edit-dialog/custom-field-edit-dialog/custom-field-edit-dialog.component'
import { PageHeaderComponent } from '../../common/page-header/page-header.component'
import { ManagementListComponent } from '../management-list/management-list.component'
@Component({
selector: 'pngx-custom-fields-list',
templateUrl: './../management-list/management-list.component.html',
styleUrls: ['./../management-list/management-list.component.scss'],
imports: [
SortableDirective,
PageHeaderComponent,
TitleCasePipe,
IfPermissionsDirective,
SafeHtmlPipe,
FormsModule,
ReactiveFormsModule,
NgClass,
NgbDropdownModule,
NgbPaginationModule,
NgxBootstrapIconsModule,
],
})
export class CustomFieldsListComponent extends ManagementListComponent<CustomField> {
permissionsDisabled = true
constructor(
customFieldsService: CustomFieldsService,
modalService: NgbModal,
toastService: ToastService,
documentListViewService: DocumentListViewService,
permissionsService: PermissionsService
) {
super(
customFieldsService,
modalService,
CustomFieldEditDialogComponent,
toastService,
documentListViewService,
permissionsService,
0, // see filterDocuments override below
$localize`custom field`,
$localize`custom fields`,
PermissionType.CustomField,
[
{
key: 'data_type',
name: $localize`Data Type`,
valueFn: (field: CustomField) => {
return DATA_TYPE_LABELS.find((l) => l.id === field.data_type).name
},
},
]
)
}
filterDocuments(field: CustomField) {
this.documentListViewService.quickFilter([
{
rule_type: FILTER_CUSTOM_FIELDS_QUERY,
value: JSON.stringify([
CustomFieldQueryLogicalOperator.Or,
[[field.id, CustomFieldQueryOperator.Exists, true]],
]),
},
])
}
getDeleteMessage(object: CustomField) {
return $localize`Do you really want to delete the field "${object.name}"?`
}
}

View File

@ -1,72 +0,0 @@
<pngx-page-header
title="Custom Fields"
i18n-title
info="Customize the data fields that can be attached to documents."
i18n-info
infoLink="usage/#custom-fields"
>
<button type="button" class="btn btn-sm btn-outline-primary" (click)="editField()" *pngxIfPermissions="{ action: PermissionAction.Add, type: PermissionType.CustomField }">
<i-bs name="plus-circle"></i-bs>&nbsp;<ng-container i18n>Add Field</ng-container>
</button>
</pngx-page-header>
<ul class="list-group">
<li class="list-group-item">
<div class="row">
<div class="col" i18n>Name</div>
<div class="col" i18n>Data Type</div>
<div class="col" i18n>Actions</div>
</div>
</li>
@if (loading) {
<li class="list-group-item">
<div class="spinner-border spinner-border-sm me-2" role="status"></div>
<ng-container i18n>Loading...</ng-container>
</li>
}
@for (field of fields; track field) {
<li class="list-group-item">
<div class="row fade" [class.show]="show">
<div class="col d-flex align-items-center"><button class="btn btn-link p-0 text-start" type="button" (click)="editField(field)" [disabled]="!permissionsService.currentUserCan(PermissionAction.Change, PermissionType.CustomField)">{{field.name}}</button></div>
<div class="col d-flex align-items-center">{{getDataType(field)}}</div>
<div class="col">
<div class="btn-group d-block d-sm-none">
<div ngbDropdown container="body" class="d-inline-block">
<button type="button" class="btn btn-link" id="actionsMenuMobile" (click)="$event.stopPropagation()" ngbDropdownToggle>
<i-bs name="three-dots-vertical"></i-bs>
</button>
<div ngbDropdownMenu aria-labelledby="actionsMenuMobile">
<button (click)="editField(field)" *pngxIfPermissions="{ action: PermissionAction.Change, type: PermissionType.CustomField }" ngbDropdownItem i18n>Edit</button>
<button class="text-danger" (click)="deleteField(field)" *pngxIfPermissions="{ action: PermissionAction.Delete, type: PermissionType.CustomField }" ngbDropdownItem i18n>Delete</button>
@if (field.document_count > 0) {
<button (click)="filterDocuments(field)" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Document }" ngbDropdownItem i18n>Filter Documents ({{ field.document_count }})</button>
}
</div>
</div>
</div>
<div class="btn-group d-none d-sm-inline-block">
<button *pngxIfPermissions="{ action: PermissionAction.Change, type: PermissionType.CustomField }" class="btn btn-sm btn-outline-secondary" type="button" (click)="editField(field)">
<i-bs width="1em" height="1em" name="pencil"></i-bs>&nbsp;<ng-container i18n>Edit</ng-container>
</button>
<button *pngxIfPermissions="{ action: PermissionAction.Delete, type: PermissionType.CustomField }" class="btn btn-sm btn-outline-danger" type="button" (click)="deleteField(field)">
<i-bs width="1em" height="1em" name="trash"></i-bs>&nbsp;<ng-container i18n>Delete</ng-container>
</button>
</div>
@if (field.document_count > 0) {
<div class="btn-group d-none d-sm-inline-block ms-2">
<button class="btn btn-sm btn-outline-secondary" type="button" (click)="filterDocuments(field)">
<i-bs width="1em" height="1em" name="filter"></i-bs>&nbsp;<ng-container i18n>Documents</ng-container><span class="badge bg-light text-secondary ms-2">{{ field.document_count }}</span>
</button>
</div>
}
</div>
</div>
</li>
}
@if (!loading && fields.length === 0) {
<li class="list-group-item" i18n>No fields defined.</li>
}
</ul>

View File

@ -1,4 +0,0 @@
// hide caret on mobile dropdown
.d-block.d-sm-none .dropdown-toggle::after {
display: none;
}

View File

@ -1,148 +0,0 @@
import { Component, OnInit } from '@angular/core'
import {
NgbDropdownModule,
NgbModal,
NgbPaginationModule,
} from '@ng-bootstrap/ng-bootstrap'
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { delay, takeUntil, tap } from 'rxjs'
import { CustomField, DATA_TYPE_LABELS } from 'src/app/data/custom-field'
import {
CustomFieldQueryLogicalOperator,
CustomFieldQueryOperator,
} from 'src/app/data/custom-field-query'
import { FILTER_CUSTOM_FIELDS_QUERY } from 'src/app/data/filter-rule-type'
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
import { DocumentListViewService } from 'src/app/services/document-list-view.service'
import { PermissionsService } from 'src/app/services/permissions.service'
import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
import { DocumentService } from 'src/app/services/rest/document.service'
import { SavedViewService } from 'src/app/services/rest/saved-view.service'
import { SettingsService } from 'src/app/services/settings.service'
import { ToastService } from 'src/app/services/toast.service'
import { ConfirmDialogComponent } from '../../common/confirm-dialog/confirm-dialog.component'
import { CustomFieldEditDialogComponent } from '../../common/edit-dialog/custom-field-edit-dialog/custom-field-edit-dialog.component'
import { EditDialogMode } from '../../common/edit-dialog/edit-dialog.component'
import { PageHeaderComponent } from '../../common/page-header/page-header.component'
import { LoadingComponentWithPermissions } from '../../loading-component/loading.component'
@Component({
selector: 'pngx-custom-fields',
templateUrl: './custom-fields.component.html',
styleUrls: ['./custom-fields.component.scss'],
imports: [
PageHeaderComponent,
IfPermissionsDirective,
NgbDropdownModule,
NgbPaginationModule,
NgxBootstrapIconsModule,
],
})
export class CustomFieldsComponent
extends LoadingComponentWithPermissions
implements OnInit
{
public fields: CustomField[] = []
constructor(
private customFieldsService: CustomFieldsService,
public permissionsService: PermissionsService,
private modalService: NgbModal,
private toastService: ToastService,
private documentListViewService: DocumentListViewService,
private settingsService: SettingsService,
private documentService: DocumentService,
private savedViewService: SavedViewService
) {
super()
}
ngOnInit() {
this.reload()
}
reload() {
this.customFieldsService
.listAll()
.pipe(
takeUntil(this.unsubscribeNotifier),
tap((r) => {
this.fields = r.results
}),
delay(100)
)
.subscribe(() => {
this.show = true
this.loading = false
})
}
editField(field: CustomField) {
const modal = this.modalService.open(CustomFieldEditDialogComponent)
modal.componentInstance.dialogMode = field
? EditDialogMode.EDIT
: EditDialogMode.CREATE
modal.componentInstance.object = field
modal.componentInstance.succeeded
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe((newField) => {
this.toastService.showInfo($localize`Saved field "${newField.name}".`)
this.customFieldsService.clearCache()
this.settingsService.initializeDisplayFields()
this.documentService.reload()
this.reload()
})
modal.componentInstance.failed
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe((e) => {
this.toastService.showError($localize`Error saving field.`, e)
})
}
deleteField(field: CustomField) {
const modal = this.modalService.open(ConfirmDialogComponent, {
backdrop: 'static',
})
modal.componentInstance.title = $localize`Confirm delete field`
modal.componentInstance.messageBold = $localize`This operation will permanently delete this field.`
modal.componentInstance.message = $localize`This operation cannot be undone.`
modal.componentInstance.btnClass = 'btn-danger'
modal.componentInstance.btnCaption = $localize`Proceed`
modal.componentInstance.confirmClicked.subscribe(() => {
modal.componentInstance.buttonsEnabled = false
this.customFieldsService.delete(field).subscribe({
next: () => {
modal.close()
this.toastService.showInfo($localize`Deleted field "${field.name}"`)
this.customFieldsService.clearCache()
this.settingsService.initializeDisplayFields()
this.documentService.reload()
this.savedViewService.reload()
this.reload()
},
error: (e) => {
this.toastService.showError(
$localize`Error deleting field "${field.name}".`,
e
)
},
})
})
}
getDataType(field: CustomField): string {
return DATA_TYPE_LABELS.find((l) => l.id === field.data_type).name
}
filterDocuments(field: CustomField) {
this.documentListViewService.quickFilter([
{
rule_type: FILTER_CUSTOM_FIELDS_QUERY,
value: JSON.stringify([
CustomFieldQueryLogicalOperator.Or,
[[field.id, CustomFieldQueryOperator.Exists, true]],
]),
},
])
}
}

View File

@ -2,7 +2,7 @@
<button class="btn btn-sm btn-outline-secondary" (click)="clearSelection()" [hidden]="selectedObjects.size === 0">
<i-bs name="x"></i-bs>&nbsp;<ng-container i18n>Clear selection</ng-container>
</button>
<button type="button" class="btn btn-sm btn-outline-primary" (click)="setPermissions()" [disabled]="!userCanBulkEdit(PermissionAction.Change) || selectedObjects.size === 0">
<button *ngIf="!permissionsDisabled" type="button" class="btn btn-sm btn-outline-primary" (click)="setPermissions()" [disabled]="!userCanBulkEdit(PermissionAction.Change) || selectedObjects.size === 0">
<i-bs name="person-fill-lock"></i-bs>&nbsp;<ng-container i18n>Permissions</ng-container>
</button>
<button type="button" class="btn btn-sm btn-outline-danger" (click)="delete()" [disabled]="!userCanBulkEdit(PermissionAction.Delete) || selectedObjects.size === 0">

View File

@ -64,7 +64,7 @@ export abstract class ManagementListComponent<T extends MatchingModel>
private modalService: NgbModal,
private editDialogComponent: any,
private toastService: ToastService,
private documentListViewService: DocumentListViewService,
protected documentListViewService: DocumentListViewService,
private permissionsService: PermissionsService,
protected filterRuleType: number,
public typeName: string,
@ -93,6 +93,8 @@ export abstract class ManagementListComponent<T extends MatchingModel>
public selectedObjects: Set<number> = new Set()
public togggleAll: boolean = false
protected permissionsDisabled: boolean = false
ngOnInit(): void {
this.reloadData()

View File

@ -1,4 +1,4 @@
import { ObjectWithId } from './object-with-id'
import { MatchingModel } from './matching-model'
export enum CustomFieldDataType {
String = 'string',
@ -51,13 +51,11 @@ export const DATA_TYPE_LABELS = [
},
]
export interface CustomField extends ObjectWithId {
export interface CustomField extends MatchingModel {
data_type: CustomFieldDataType
name: string
created?: Date
extra_data?: {
select_options?: Array<{ label: string; id: string }>
default_currency?: string
}
document_count?: number
}

View File

@ -1,12 +1,12 @@
import { HttpClient } from '@angular/common/http'
import { Injectable } from '@angular/core'
import { CustomField } from 'src/app/data/custom-field'
import { AbstractPaperlessService } from './abstract-paperless-service'
import { AbstractNameFilterService } from './abstract-name-filter-service'
@Injectable({
providedIn: 'root',
})
export class CustomFieldsService extends AbstractPaperlessService<CustomField> {
export class CustomFieldsService extends AbstractNameFilterService<CustomField> {
constructor(http: HttpClient) {
super(http, 'custom_fields')
}

View File

@ -15,6 +15,7 @@ class DocumentsConfig(AppConfig):
from documents.signals.handlers import run_workflows_added
from documents.signals.handlers import run_workflows_updated
from documents.signals.handlers import set_correspondent
from documents.signals.handlers import set_custom_fields
from documents.signals.handlers import set_document_type
from documents.signals.handlers import set_storage_path
from documents.signals.handlers import set_tags
@ -24,6 +25,7 @@ class DocumentsConfig(AppConfig):
document_consumption_finished.connect(set_document_type)
document_consumption_finished.connect(set_tags)
document_consumption_finished.connect(set_storage_path)
document_consumption_finished.connect(set_custom_fields)
document_consumption_finished.connect(add_to_index)
document_consumption_finished.connect(run_workflows_added)
document_updated.connect(run_workflows_updated)

View File

@ -97,6 +97,8 @@ class DocumentClassifier:
self.correspondent_classifier = None
self.document_type_classifier = None
self.storage_path_classifier = None
self.custom_fields_binarizer = None
self.custom_fields_classifier = None
self._stemmer = None
self._stop_words = None
@ -120,11 +122,12 @@ class DocumentClassifier:
self.data_vectorizer = pickle.load(f)
self.tags_binarizer = pickle.load(f)
self.tags_classifier = pickle.load(f)
self.correspondent_classifier = pickle.load(f)
self.document_type_classifier = pickle.load(f)
self.storage_path_classifier = pickle.load(f)
self.custom_fields_binarizer = pickle.load(f)
self.custom_fields_classifier = pickle.load(f)
except Exception as err:
raise ClassifierModelCorruptError from err
@ -162,6 +165,9 @@ class DocumentClassifier:
pickle.dump(self.document_type_classifier, f)
pickle.dump(self.storage_path_classifier, f)
pickle.dump(self.custom_fields_binarizer, f)
pickle.dump(self.custom_fields_classifier, f)
target_file_temp.rename(target_file)
def train(self) -> bool:
@ -183,6 +189,7 @@ class DocumentClassifier:
labels_correspondent = []
labels_document_type = []
labels_storage_path = []
labels_custom_fields = []
# Step 1: Extract and preprocess training data from the database.
logger.debug("Gathering data from database...")
@ -218,13 +225,25 @@ class DocumentClassifier:
hasher.update(y.to_bytes(4, "little", signed=True))
labels_storage_path.append(y)
labels_tags_unique = {tag for tags in labels_tags for tag in tags}
custom_fields = sorted(
cf.pk
for cf in doc.custom_fields.filter(
field__matching_algorithm=MatchingModel.MATCH_AUTO,
)
)
for cf in custom_fields:
hasher.update(cf.to_bytes(4, "little", signed=True))
labels_custom_fields.append(custom_fields)
labels_tags_unique = {tag for tags in labels_tags for tag in tags}
num_tags = len(labels_tags_unique)
labels_custom_fields_unique = {cf for cfs in labels_custom_fields for cf in cfs}
num_custom_fields = len(labels_custom_fields_unique)
# Check if retraining is actually required.
# A document has been updated since the classifier was trained
# New auto tags, types, correspondent, storage paths exist
# New auto tags, types, correspondent, storage paths or custom fields exist
latest_doc_change = docs_queryset.latest("modified").modified
if (
self.last_doc_change_time is not None
@ -253,7 +272,8 @@ class DocumentClassifier:
logger.debug(
f"{docs_queryset.count()} documents, {num_tags} tag(s), {num_correspondents} correspondent(s), "
f"{num_document_types} document type(s). {num_storage_paths} storage path(s)",
f"{num_document_types} document type(s), {num_storage_paths} storage path(s), "
f"{num_custom_fields} custom field(s)",
)
from sklearn.feature_extraction.text import CountVectorizer
@ -345,6 +365,39 @@ class DocumentClassifier:
"There are no storage paths. Not training storage path classifier.",
)
if num_custom_fields > 0:
logger.debug("Training custom fields classifier...")
if num_custom_fields == 1:
# Special case where only one custom field has auto:
# Fallback to binary classification.
labels_custom_fields = [
label[0] if len(label) == 1 else -1
for label in labels_custom_fields
]
self.custom_fields_binarizer = LabelBinarizer()
labels_custom_fields_vectorized = (
self.custom_fields_binarizer.fit_transform(
labels_custom_fields,
).ravel()
)
else:
self.custom_fields_binarizer = MultiLabelBinarizer()
labels_custom_fields_vectorized = (
self.custom_fields_binarizer.fit_transform(labels_custom_fields)
)
self.custom_fields_classifier = MLPClassifier(tol=0.01)
self.custom_fields_classifier.fit(
data_vectorized,
labels_custom_fields_vectorized,
)
else:
self.custom_fields_classifier = None
logger.debug(
"There are no custom fields. Not training custom fields classifier.",
)
self.last_doc_change_time = latest_doc_change
self.last_auto_type_hash = hasher.digest()
@ -472,3 +525,29 @@ class DocumentClassifier:
return None
else:
return None
def predict_custom_fields(self, content: str) -> dict:
"""
Custom fields are a bit different from the other classifiers, as we
need to predict the values for the fields, not just the field itself.
"""
# TODO: can this return the value?
from sklearn.utils.multiclass import type_of_target
if self.custom_fields_classifier:
X = self.data_vectorizer.transform([self.preprocess_content(content)])
y = self.custom_fields_classifier.predict(X)
custom_fields_ids = self.custom_fields_binarizer.inverse_transform(y)[0]
if type_of_target(y).startswith("multilabel"):
# the usual case when there are multiple custom fields.
return list(custom_fields_ids)
elif type_of_target(y) == "binary" and custom_fields_ids != -1:
# This is for when we have binary classification with only one
# custom field and the result is to assign this custom field.
return [custom_fields_ids]
else:
# Usually binary as well with -1 as the result, but we're
# going to catch everything else here as well.
return []
else:
return []

View File

@ -7,6 +7,7 @@ from documents.classifier import load_classifier
from documents.management.commands.mixins import ProgressBarMixin
from documents.models import Document
from documents.signals.handlers import set_correspondent
from documents.signals.handlers import set_custom_fields
from documents.signals.handlers import set_document_type
from documents.signals.handlers import set_storage_path
from documents.signals.handlers import set_tags
@ -17,9 +18,9 @@ logger = logging.getLogger("paperless.management.retagger")
class Command(ProgressBarMixin, BaseCommand):
help = (
"Using the current classification model, assigns correspondents, tags "
"and document types to all documents, effectively allowing you to "
"back-tag all previously indexed documents with metadata created (or "
"modified) after their initial import."
"document types, storage paths and custom fields to all documents, effectively"
"allowing you to back-tag all previously indexed documents with metadata created "
"(or modified) after their initial import."
)
def add_arguments(self, parser):
@ -27,6 +28,12 @@ class Command(ProgressBarMixin, BaseCommand):
parser.add_argument("-T", "--tags", default=False, action="store_true")
parser.add_argument("-t", "--document_type", default=False, action="store_true")
parser.add_argument("-s", "--storage_path", default=False, action="store_true")
parser.add_argument(
"-cf",
"--custom_fields",
default=False,
action="store_true",
)
parser.add_argument("-i", "--inbox-only", default=False, action="store_true")
parser.add_argument(
"--use-first",
@ -134,3 +141,16 @@ class Command(ProgressBarMixin, BaseCommand):
stdout=self.stdout,
style_func=self.style,
)
if options["custom_fields"]:
set_custom_fields(
sender=None,
document=document,
classifier=classifier,
replace=options["overwrite"],
use_first=options["use_first"],
suggest=options["suggest"],
base_url=options["base_url"],
stdout=self.stdout,
style_func=self.style,
)

View File

@ -132,6 +132,50 @@ def match_storage_paths(document: Document, classifier: DocumentClassifier, user
)
def match_custom_fields(
document: Document,
classifier: DocumentClassifier,
user=None,
) -> dict:
"""
Custom fields work differently, we need the values for the match as well.
"""
# TODO: this needs to return values as well
predicted_custom_field_ids = (
classifier.predict_custom_fields(document.content) if classifier else []
)
fields = [instance.field for instance in document.custom_fields.all()]
matched_fields = {}
for field in fields:
if field.matching_algorithm == MatchingModel.MATCH_AUTO:
if field.pk in predicted_custom_field_ids:
matched_fields[field] = None
elif field.matching_algorithm == MatchingModel.MATCH_REGEX:
try:
match = re.search(
re.compile(field.matching_model.match),
document.content,
)
if match:
matched_fields[field] = match.group()
except re.error:
logger.error(
f"Error while processing regular expression {field.matching_model.match}",
)
return False
if match:
log_reason(
field.matching_model,
document,
f"the string {match.group()} matches the regular expression "
f"{field.matching_model.match}",
)
return matched_fields
def matches(matching_model: MatchingModel, document: Document):
search_kwargs = {}

View File

@ -0,0 +1,55 @@
# Generated by Django 5.1.6 on 2025-03-20 23:37
import django.db.models.deletion
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1065_workflowaction_assign_custom_fields_values"),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.AddField(
model_name="customfield",
name="is_insensitive",
field=models.BooleanField(default=True, verbose_name="is insensitive"),
),
migrations.AddField(
model_name="customfield",
name="match",
field=models.CharField(blank=True, max_length=256, verbose_name="match"),
),
migrations.AddField(
model_name="customfield",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=0,
verbose_name="matching algorithm",
),
),
migrations.AddField(
model_name="customfield",
name="owner",
field=models.ForeignKey(
blank=True,
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
]

View File

@ -719,7 +719,7 @@ class ShareLink(SoftDeleteModel):
return f"Share Link for {self.document.title}"
class CustomField(models.Model):
class CustomField(MatchingModel):
"""
Defines the name and type of a custom field
"""
@ -760,6 +760,12 @@ class CustomField(models.Model):
),
)
matching_algorithm = models.PositiveIntegerField(
_("matching algorithm"),
choices=MatchingModel.MATCHING_ALGORITHMS,
default=MatchingModel.MATCH_NONE, # override with CustomField.FieldDataType.NONE
)
class Meta:
ordering = ("created",)
verbose_name = _("custom field")

View File

@ -582,7 +582,7 @@ class StoragePathField(serializers.PrimaryKeyRelatedField):
return StoragePath.objects.all()
class CustomFieldSerializer(serializers.ModelSerializer):
class CustomFieldSerializer(MatchingModelSerializer, serializers.ModelSerializer):
def __init__(self, *args, **kwargs):
context = kwargs.get("context")
self.api_version = int(
@ -597,8 +597,6 @@ class CustomFieldSerializer(serializers.ModelSerializer):
read_only=False,
)
document_count = serializers.IntegerField(read_only=True)
class Meta:
model = CustomField
fields = [
@ -607,6 +605,9 @@ class CustomFieldSerializer(serializers.ModelSerializer):
"data_type",
"extra_data",
"document_count",
"match",
"matching_algorithm",
"is_insensitive",
]
def validate(self, attrs):
@ -669,6 +670,19 @@ class CustomFieldSerializer(serializers.ModelSerializer):
raise serializers.ValidationError(
{"error": "extra_data.default_currency must be a 3-character string"},
)
if (
"matching_algorithm" in attrs
and attrs["matching_algorithm"] != CustomField.MATCH_REGEX
and "data_type" in attrs
and attrs["data_type"]
not in [
CustomField.FieldDataType.SELECT,
CustomField.FieldDataType.BOOL,
]
):
raise serializers.ValidationError(
{"error": "Only discrete data types support matching"},
)
return super().validate(attrs)
def to_internal_value(self, data):

View File

@ -318,6 +318,77 @@ def set_storage_path(
document.save(update_fields=("storage_path",))
def set_custom_fields(
document: Document,
logging_group=None,
classifier: DocumentClassifier | None = None,
base_url=None,
stdout=None,
style_func=None,
*,
replace=False,
suggest=False,
**kwargs,
):
if replace:
CustomFieldInstance.objects.filter(document=document).exclude(
Q(field__match="") & ~Q(field__matching_algorithm=CustomField.MATCH_AUTO),
).delete()
current_fields = set([instance.field for instance in document.custom_fields.all()])
matched_fields_w_values: dict = matching.match_custom_fields(document, classifier)
matched_fields = matched_fields_w_values.keys()
relevant_fields = set(matched_fields) - current_fields
if suggest:
extra_fields = current_fields - set(matched_fields)
extra_fields = [
f for f in extra_fields if f.matching_algorithm == MatchingModel.MATCH_AUTO
]
if not relevant_fields and not extra_fields:
return
doc_str = style_func.SUCCESS(str(document))
if base_url:
stdout.write(doc_str)
stdout.write(f"{base_url}/documents/{document.pk}")
else:
stdout.write(doc_str + style_func.SUCCESS(f" [{document.pk}]"))
if relevant_fields:
stdout.write(
"Suggest custom fields: "
+ ", ".join([f.name for f in relevant_fields]),
)
if extra_fields:
stdout.write(
"Extra custom fields: " + ", ".join([f.name for f in extra_fields]),
)
else:
if not relevant_fields:
return
message = 'Assigning custom fields "{}" to "{}"'
logger.info(
message.format(document, ", ".join([f.name for f in relevant_fields])),
extra={"group": logging_group},
)
for field in relevant_fields:
args = {
"field": field,
"document": document,
}
if field.pk in matched_fields_w_values:
value_field_name = CustomFieldInstance.get_value_field_name(
data_type=field.data_type,
)
args[value_field_name] = matched_fields_w_values[field.pk]
CustomFieldInstance.objects.create(
**args,
)
# see empty_trash in documents/tasks.py for signal handling
def cleanup_document_deletion(sender, instance, **kwargs):
with FileLock(settings.MEDIA_LOCK):