diff --git a/src-ui/src/app/components/document-detail/document-detail.component.spec.ts b/src-ui/src/app/components/document-detail/document-detail.component.spec.ts index 658d3dd6e..ddb437910 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.spec.ts +++ b/src-ui/src/app/components/document-detail/document-detail.component.spec.ts @@ -88,6 +88,7 @@ const doc: Document = { correspondent: 11, document_type: 21, storage_path: 31, + warehouse: 51, tags: [41, 42, 43], content: 'text content', added: new Date('May 4, 2014 03:24:00'), @@ -232,7 +233,7 @@ describe('DocumentDetailComponent', () => { of({ results: [ { - id: 41, + id: 51, name: 'Warehouse41', }, ], @@ -862,7 +863,8 @@ describe('DocumentDetailComponent', () => { id: 22, name: 'Warehouse22', type: 'Warehouse', - parent_warehouse: 22, + parent_warehouse: 23, + path: '345/346/347', } as Warehouse const qfSpy = jest.spyOn(documentListViewService, 'quickFilter') component.filterDocuments([object]) diff --git a/src-ui/src/app/components/document-detail/document-detail.component.ts b/src-ui/src/app/components/document-detail/document-detail.component.ts index 29d80c90f..469ae7cf8 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.ts +++ b/src-ui/src/app/components/document-detail/document-detail.component.ts @@ -1009,7 +1009,7 @@ export class DocumentDetailComponent rule_type: FILTER_STORAGE_PATH, value: (i as StoragePath).id.toString(), } - } else if (i.hasOwnProperty('type')) { + } else if (i.hasOwnProperty('path')) { // Warehouse return { rule_type: FILTER_WAREHOUSE, diff --git a/src/documents/admin.py b/src/documents/admin.py index 46a726400..1e1c6ddc5 100644 --- a/src/documents/admin.py +++ b/src/documents/admin.py @@ -14,6 +14,7 @@ from documents.models import SavedViewFilterRule from documents.models import ShareLink from documents.models import StoragePath from documents.models import Warehouse +from documents.models import Folder from documents.models import Tag if settings.AUDIT_LOG_ENABLED: @@ -44,6 +45,11 @@ class WarehouseAdmin(GuardedModelAdmin): list_filter = ("matching_algorithm",) list_editable = ("match", "matching_algorithm") +class FolderAdmin(GuardedModelAdmin): + list_display = ("name", "path", "parent_folder", "match", "matching_algorithm") + list_filter = ("matching_algorithm",) + list_editable = ("match", "matching_algorithm") + class DocumentAdmin(GuardedModelAdmin): search_fields = ("correspondent__name", "title", "content", "tags__name") readonly_fields = ( @@ -194,6 +200,7 @@ admin.site.register(Correspondent, CorrespondentAdmin) admin.site.register(Tag, TagAdmin) admin.site.register(DocumentType, DocumentTypeAdmin) admin.site.register(Warehouse, WarehouseAdmin) +admin.site.register(Folder, FolderAdmin) admin.site.register(Document, DocumentAdmin) admin.site.register(SavedView, SavedViewAdmin) admin.site.register(StoragePath, StoragePathAdmin) diff --git a/src/documents/apps.py b/src/documents/apps.py index 3a3a429f8..0af535281 100644 --- a/src/documents/apps.py +++ b/src/documents/apps.py @@ -16,6 +16,7 @@ class DocumentsConfig(AppConfig): from documents.signals.handlers import run_workflow_updated from documents.signals.handlers import set_correspondent from documents.signals.handlers import set_warehouse + from documents.signals.handlers import set_folder from documents.signals.handlers import set_document_type from documents.signals.handlers import set_log_entry from documents.signals.handlers import set_storage_path @@ -23,6 +24,7 @@ class DocumentsConfig(AppConfig): document_consumption_finished.connect(add_inbox_tags) document_consumption_finished.connect(set_correspondent) + document_consumption_finished.connect(set_folder) document_consumption_finished.connect(set_warehouse) document_consumption_finished.connect(set_document_type) document_consumption_finished.connect(set_tags) diff --git a/src/documents/bulk_edit.py b/src/documents/bulk_edit.py index 31b309c87..24e9aca5c 100644 --- a/src/documents/bulk_edit.py +++ b/src/documents/bulk_edit.py @@ -16,6 +16,7 @@ from documents.models import Document from documents.models import DocumentType from documents.models import StoragePath from documents.models import Warehouse +from documents.models import Folder from documents.permissions import set_permissions_for_object from documents.tasks import bulk_update_documents from documents.tasks import consume_file @@ -53,6 +54,22 @@ def set_storage_path(doc_ids, storage_path): return "OK" +def set_folder(doc_ids, folder): + if folder: + folder = Folder.objects.get(id=folder) + + qs = Document.objects.filter( + Q(id__in=doc_ids) & ~Q(folder=folder), + ) + affected_docs = [doc.id for doc in qs] + qs.update(folder=folder) + + bulk_update_documents.delay( + document_ids=affected_docs, + ) + + return "OK" + def set_warehouse(doc_ids, warehouse): if warehouse: warehouse = Warehouse.objects.get(id=warehouse) diff --git a/src/documents/classifier.py b/src/documents/classifier.py index 02fbcda98..e4c0df8f1 100644 --- a/src/documents/classifier.py +++ b/src/documents/classifier.py @@ -87,6 +87,7 @@ class DocumentClassifier: self.tags_classifier = None self.correspondent_classifier = None self.warehouse_classifier = None + self.folder_classifier = None self.document_type_classifier = None self.storage_path_classifier = None @@ -114,6 +115,7 @@ class DocumentClassifier: self.tags_classifier = pickle.load(f) self.correspondent_classifier = pickle.load(f) self.warehouse_classifier = pickle.load(f) + self.folder_classifier = pickle.load(f) self.document_type_classifier = pickle.load(f) self.storage_path_classifier = pickle.load(f) except Exception as err: @@ -151,6 +153,7 @@ class DocumentClassifier: pickle.dump(self.correspondent_classifier, f) pickle.dump(self.warehouse_classifier, f) + pickle.dump(self.folder_classifier, f) pickle.dump(self.document_type_classifier, f) pickle.dump(self.storage_path_classifier, f) @@ -169,6 +172,7 @@ class DocumentClassifier: labels_tags = [] labels_correspondent = [] labels_warehouse = [] + labels_folder = [] labels_document_type = [] labels_storage_path = [] @@ -190,6 +194,13 @@ class DocumentClassifier: hasher.update(y.to_bytes(4, "little", signed=True)) labels_correspondent.append(y) + y = -1 + fo = doc.folder + if fo and fo.matching_algorithm == MatchingModel.MATCH_AUTO: + y = fo.pk + hasher.update(y.to_bytes(4, "little", signed=True)) + labels_folder.append(y) + y = -1 wh = doc.warehouse if wh and wh.matching_algorithm == MatchingModel.MATCH_AUTO: @@ -246,10 +257,11 @@ class DocumentClassifier: num_correspondents = len(set(labels_correspondent) | {-1}) - 1 num_document_types = len(set(labels_document_type) | {-1}) - 1 num_warehouses = len(set(labels_warehouse) | {-1}) - 1 + num_folders = len(set(labels_folder) | {-1}) - 1 num_storage_paths = len(set(labels_storage_path) | {-1}) - 1 logger.debug( - f"{docs_queryset.count()} documents, {num_tags} tag(s), {num_correspondents} correspondent(s), {num_warehouses} warehouse(s) " + f"{docs_queryset.count()} documents, {num_tags} tag(s), {num_correspondents} correspondent(s), {num_warehouses} warehouse(s), {num_folders} folder(s), " f"{num_document_types} document type(s). {num_storage_paths} storage path(es)", ) @@ -315,6 +327,17 @@ class DocumentClassifier: "There are no correspondents. Not training correspondent " "classifier.", ) + + if num_folders > 0: + logger.debug("Training folder classifier...") + self.folder_classifier = MLPClassifier(tol=0.01) + self.folder_classifier.fit(data_vectorized, labels_folder) + else: + self.folder_classifier = None + logger.debug( + "There are no folders. Not training folder " + "classifier.", + ) if num_warehouses > 0: logger.debug("Training warehouse classifier...") @@ -437,6 +460,17 @@ class DocumentClassifier: return None else: return None + + def predict_folder(self, content: str) -> Optional[int]: + if self.folder_classifier: + X = self.data_vectorizer.transform([self.preprocess_content(content)]) + folder_id = self.folder_classifier.predict(X) + if folder_id != -1: + return folder_id + else: + return None + else: + return None def predict_warehouse(self, content: str) -> Optional[int]: if self.warehouse_classifier: diff --git a/src/documents/consumer.py b/src/documents/consumer.py index d39aa4881..767d05c55 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -33,6 +33,7 @@ from documents.models import DocumentType from documents.models import FileInfo from documents.models import StoragePath from documents.models import Warehouse +from documents.models import Folder from documents.models import Tag from documents.models import Workflow from documents.models import WorkflowAction @@ -300,6 +301,7 @@ class Consumer(LoggingMixin): self.override_title = None self.override_correspondent_id = None self.override_warehouse_id = None + self.override_folder_id = None self.override_tag_ids = None self.override_document_type_id = None self.override_asn = None @@ -497,6 +499,7 @@ class Consumer(LoggingMixin): override_document_type_id=None, override_tag_ids=None, override_warehouse_id=None, + override_folder_id=None, override_storage_path_id=None, task_id=None, override_created=None, @@ -519,6 +522,7 @@ class Consumer(LoggingMixin): self.override_document_type_id = override_document_type_id self.override_tag_ids = override_tag_ids self.override_warehouse_id = override_warehouse_id + self.override_folder_id = override_folder_id self.override_storage_path_id = override_storage_path_id self.task_id = task_id or str(uuid.uuid4()) self.override_created = override_created @@ -878,6 +882,11 @@ class Consumer(LoggingMixin): pk=self.override_storage_path_id, ) + if self.override_folder_id: + document.folder = Folder.objects.get( + pk=self.override_folder_id, + ) + if self.override_warehouse_id: document.warehouse = Warehouse.objects.get( pk=self.override_warehouse_id, diff --git a/src/documents/data_models.py b/src/documents/data_models.py index b2a31fd54..70bf90a61 100644 --- a/src/documents/data_models.py +++ b/src/documents/data_models.py @@ -24,6 +24,7 @@ class DocumentMetadataOverrides: tag_ids: Optional[list[int]] = None storage_path_id: Optional[int] = None warehouse_id: Optional[int] = None + folder_id: Optional[int] = None created: Optional[datetime.datetime] = None asn: Optional[int] = None owner_id: Optional[int] = None @@ -51,6 +52,8 @@ class DocumentMetadataOverrides: self.storage_path_id = other.storage_path_id if other.warehouse_id is not None: self.warehouse_id = other.warehouse_id + if other.folder_id is not None: + self.folder_id = other.folder_id if other.owner_id is not None: self.owner_id = other.owner_id @@ -104,6 +107,7 @@ class DocumentMetadataOverrides: overrides.document_type_id = doc.document_type.id if doc.document_type else None overrides.storage_path_id = doc.storage_path.id if doc.storage_path else None overrides.warehouse_id = doc.warehouse.id if doc.warehouse else None + overrides.folder_id = doc.folder.id if doc.folder else None overrides.owner_id = doc.owner.id if doc.owner else None overrides.tag_ids = list(doc.tags.values_list("id", flat=True)) diff --git a/src/documents/file_handling.py b/src/documents/file_handling.py index 9daa64de9..01493c666 100644 --- a/src/documents/file_handling.py +++ b/src/documents/file_handling.py @@ -175,14 +175,6 @@ def generate_filename( else: document_type = no_value_default - if doc.warehouse: - warehouse = pathvalidate.sanitize_filename( - doc.warehouse.name, - replacement_text="-", - ) - else: - warehouse = no_value_default - if doc.archive_serial_number: asn = str(doc.archive_serial_number) else: @@ -207,7 +199,6 @@ def generate_filename( title=pathvalidate.sanitize_filename(doc.title, replacement_text="-"), correspondent=correspondent, document_type=document_type, - warehouse=warehouse, created=local_created.isoformat(), created_year=local_created.strftime("%Y"), created_year_short=local_created.strftime("%y"), diff --git a/src/documents/filters.py b/src/documents/filters.py index 771d5e784..86c7fc579 100644 --- a/src/documents/filters.py +++ b/src/documents/filters.py @@ -20,6 +20,7 @@ from documents.models import ShareLink from documents.models import StoragePath from documents.models import Tag from documents.models import Warehouse +from documents.models import Folder CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"] ID_KWARGS = ["in", "exact"] @@ -193,6 +194,8 @@ class DocumentFilterSet(FilterSet): storage_path__id__none = ObjectFilter(field_name="storage_path", exclude=True) warehouse__id__none = ObjectFilter(field_name="warehouse", exclude=True) + + folder__id__none = ObjectFilter(field_name="folder", exclude=True) is_in_inbox = InboxFilter() @@ -230,6 +233,9 @@ class DocumentFilterSet(FilterSet): "warehouse": ["isnull"], "warehouse__id": ID_KWARGS, "warehouse__name": CHAR_KWARGS, + "folder": ["isnull"], + "folder__id": ID_KWARGS, + "folder__name": CHAR_KWARGS, "owner": ["isnull"], "owner__id": ID_KWARGS, "custom_fields": ["icontains"], @@ -275,4 +281,15 @@ class WarehouseFilterSet(FilterSet): "name": CHAR_KWARGS, "type": CHAR_KWARGS, "parent_warehouse": ID_KWARGS, + "path": CHAR_KWARGS, + } + +class FolderFilterSet(FilterSet): + class Meta: + model = Folder + fields = { + "id": ID_KWARGS, + "name": CHAR_KWARGS, + "parent_folder": ID_KWARGS, + "path": CHAR_KWARGS, } \ No newline at end of file diff --git a/src/documents/index.py b/src/documents/index.py index b699243d5..96889af08 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -63,6 +63,9 @@ def get_schema(): warehouse=TEXT(sortable=True), warehouse_id=NUMERIC(), has_warehouse=BOOLEAN(), + folder=TEXT(sortable=True), + folder_id=NUMERIC(), + has_folder=BOOLEAN(), created=DATETIME(sortable=True), modified=DATETIME(sortable=True), added=DATETIME(sortable=True), @@ -161,6 +164,9 @@ def update_document(writer: AsyncWriter, doc: Document): warehouse=doc.warehouse.name if doc.warehouse else None, warehouse_id=doc.warehouse.id if doc.warehouse else None, has_warehouse=doc.warehouse is not None, + folder=doc.folder.name if doc.folder else None, + folder_id=doc.folder.id if doc.folder else None, + has_folder=doc.folder is not None, created=doc.created, added=doc.added, asn=asn, @@ -204,6 +210,7 @@ class DelayedQuery: param_map = { "correspondent": ("correspondent", ["id", "id__in", "id__none", "isnull"]), "warehouse": ("warehouse", ["id", "id__in", "id__none", "isnull"]), + "folder": ("folder", ["id", "id__in", "id__none", "isnull"]), "document_type": ("type", ["id", "id__in", "id__none", "isnull"]), "storage_path": ("path", ["id", "id__in", "id__none", "isnull"]), "owner": ("owner", ["id", "id__in", "id__none", "isnull"]), diff --git a/src/documents/matching.py b/src/documents/matching.py index 90190b788..3ef71cd91 100644 --- a/src/documents/matching.py +++ b/src/documents/matching.py @@ -8,6 +8,7 @@ from documents.data_models import ConsumableDocument from documents.data_models import DocumentSource from documents.models import Correspondent from documents.models import Warehouse +from documents.models import Folder from documents.models import Document from documents.models import DocumentType from documents.models import MatchingModel @@ -56,6 +57,29 @@ def match_correspondents(document: Document, classifier: DocumentClassifier, use correspondents, ), ) + +def match_folders(document: Document, classifier: DocumentClassifier, user=None): + pred_id = classifier.predict_folder(document.content) if classifier else None + + if user is None and document.owner is not None: + user = document.owner + + if user is not None: + folders = get_objects_for_user_owner_aware( + user, + "documents.view_folder", + Folder, + ) + else: + folders = Folder.objects.all() + + return list( + filter( + lambda o: matches(o, document) + or (o.pk == pred_id and o.matching_algorithm == MatchingModel.MATCH_AUTO), + folders, + ), + ) def match_warehouses(document: Document, classifier: DocumentClassifier, user=None): pred_id = classifier.predict_warehouse(document.content) if classifier else None diff --git a/src/documents/migrations/1054_folder_document_folder_and_more.py b/src/documents/migrations/1054_folder_document_folder_and_more.py new file mode 100644 index 000000000..fbf7c53cb --- /dev/null +++ b/src/documents/migrations/1054_folder_document_folder_and_more.py @@ -0,0 +1,48 @@ +# Generated by Django 4.2.11 on 2024-06-05 03:01 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('documents', '1053_remove_document_warehouses_document_warehouse'), + ] + + operations = [ + migrations.CreateModel( + name='Folder', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=128, verbose_name='name')), + ('match', models.CharField(blank=True, max_length=256, verbose_name='match')), + ('matching_algorithm', models.PositiveIntegerField(choices=[(0, 'None'), (1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm')), + ('is_insensitive', models.BooleanField(default=True, verbose_name='is insensitive')), + ('path', models.TextField(blank=True, null=True, verbose_name='path')), + ('owner', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL, verbose_name='owner')), + ('parent_folder', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='documents.folder')), + ], + options={ + 'verbose_name': 'folder', + 'verbose_name_plural': 'folders', + 'ordering': ('name',), + 'abstract': False, + }, + ), + migrations.AddField( + model_name='document', + name='folder', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.folder', verbose_name='folder'), + ), + migrations.AddConstraint( + model_name='folder', + constraint=models.UniqueConstraint(fields=('name', 'owner'), name='documents_folder_unique_name_owner'), + ), + migrations.AddConstraint( + model_name='folder', + constraint=models.UniqueConstraint(condition=models.Q(('owner__isnull', True)), fields=('name',), name='documents_folder_name_uniq'), + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index c241acf52..ad8963f98 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -152,7 +152,17 @@ class Warehouse(MatchingModel): def __str__(self): return self.name + +class Folder(MatchingModel): + parent_folder = models.ForeignKey('self', on_delete=models.CASCADE, null=True, blank=True ) + path = models.TextField(_("path"), null=True, blank=True) + class Meta(MatchingModel.Meta): + verbose_name = _("folder") + verbose_name_plural = _("folders") + def __str__(self): + return self.name + class Document(ModelWithOwner): STORAGE_TYPE_UNENCRYPTED = "unencrypted" STORAGE_TYPE_GPG = "gpg" @@ -179,6 +189,15 @@ class Document(ModelWithOwner): verbose_name=_("storage path"), ) + folder = models.ForeignKey( + Folder, + blank=True, + null=True, + related_name="documents", + on_delete=models.SET_NULL, + verbose_name=_("folder"), + ) + warehouse = models.ForeignKey( Warehouse, blank=True, diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index 314a8fde9..5b72a3ae9 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -46,6 +46,7 @@ from documents.models import Workflow from documents.models import WorkflowAction from documents.models import WorkflowTrigger from documents.models import Warehouse +from documents.models import Folder from documents.parsers import is_mime_type_supported from documents.permissions import get_groups_with_only_permission from documents.permissions import set_permissions_for_object @@ -432,6 +433,10 @@ class WarehouseField(serializers.PrimaryKeyRelatedField): def get_queryset(self): return Warehouse.objects.all() +class FolderField(serializers.PrimaryKeyRelatedField): + def get_queryset(self): + return Folder.objects.all() + class DocumentTypeField(serializers.PrimaryKeyRelatedField): def get_queryset(self): return DocumentType.objects.all() @@ -657,6 +662,7 @@ class DocumentSerializer( correspondent = CorrespondentField(allow_null=True) tags = TagsField(many=True) warehouse = WarehouseField(allow_null=True) + folder = FolderField(allow_null=True) document_type = DocumentTypeField(allow_null=True) storage_path = StoragePathField(allow_null=True) @@ -776,6 +782,7 @@ class DocumentSerializer( "document_type", "storage_path", "warehouse", + "folder", "title", "content", "tags", @@ -883,6 +890,7 @@ class BulkEditSerializer( "set_document_type", "set_storage_path", "set_warehouse" + "set_folder", "add_tag", "remove_tag", "modify_tags", @@ -919,6 +927,8 @@ class BulkEditSerializer( return bulk_edit.set_storage_path elif method == "set_warehouse": return bulk_edit.set_warehouse + elif method == "set_folder": + return bulk_edit.set_folder elif method == "add_tag": return bulk_edit.add_tag elif method == "remove_tag": @@ -974,6 +984,7 @@ class BulkEditSerializer( raise serializers.ValidationError("Correspondent does not exist") else: raise serializers.ValidationError("correspondent not specified") + def _validate_parameters_warehouse(self, parameters): if "warehouse" in parameters: warehouse_id = parameters["warehouse"] @@ -985,7 +996,19 @@ class BulkEditSerializer( raise serializers.ValidationError("Warehouse does not exist") else: raise serializers.ValidationError("warehouse not specified") - + + def _validate_parameters_folder(self, parameters): + if "folder" in parameters: + folder_id = parameters["folder"] + if folder_id is None: + return + try: + Folder.objects.get(id=folder_id) + except Folder.DoesNotExist: + raise serializers.ValidationError("Folder does not exist") + else: + raise serializers.ValidationError("folder not specified") + def _validate_storage_path(self, parameters): if "storage_path" in parameters: storage_path_id = parameters["storage_path"] @@ -1074,7 +1097,9 @@ class BulkEditSerializer( elif method == bulk_edit.set_storage_path: self._validate_storage_path(parameters) elif method == bulk_edit.set_warehouse: - self._validate_parameters_warehouse(parameters) + self._validate_parameters_warehouse(parameters) + elif method == bulk_edit.set_folder: + self._validate_parameters_folder(parameters) elif method == bulk_edit.set_permissions: self._validate_parameters_set_permissions(parameters) elif method == bulk_edit.rotate: @@ -1124,6 +1149,14 @@ class PostDocumentSerializer(serializers.Serializer): required=False, ) + folder = serializers.PrimaryKeyRelatedField( + queryset=Folder.objects.all(), + label="Folder", + allow_null=True, + write_only=True, + required=False, + ) + warehouse = serializers.PrimaryKeyRelatedField( queryset=Warehouse.objects.all(), label="Warehouse", @@ -1192,6 +1225,12 @@ class PostDocumentSerializer(serializers.Serializer): return storage_path.id else: return None + + def validate_folder(self, folder): + if folder: + return folder.id + else: + return None def validate_warehouse(self, warehouse): if warehouse: @@ -1262,6 +1301,7 @@ class StoragePathSerializer(MatchingModelSerializer, OwnedObjectSerializer): title="title", correspondent="correspondent", document_type="document_type", + folder="folder", warehouse="warehouse", created="created", created_year="created_year", @@ -1422,6 +1462,7 @@ class BulkEditObjectsSerializer(SerializerWithPerms, SetPermissionsMixin): "document_types", "storage_paths", "warehouses", + "folders", ], label="Object Type", write_only=True, @@ -1468,6 +1509,8 @@ class BulkEditObjectsSerializer(SerializerWithPerms, SetPermissionsMixin): object_class = StoragePath elif object_type == "warehouses": object_class = Warehouse + elif object_type == "folders": + object_class = Folder return object_class def _validate_objects(self, objects, object_type): @@ -1816,6 +1859,10 @@ class WarehouseSerializer(MatchingModelSerializer, OwnedObjectSerializer): return 0 +class FolderSerializer(MatchingModelSerializer, OwnedObjectSerializer): - + class Meta: + model = Folder + fields = '__all__' + \ No newline at end of file diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index d9fc593e1..bbbe76f37 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -129,6 +129,60 @@ def set_correspondent( document.correspondent = selected document.save(update_fields=("correspondent",)) + +def set_folder( + sender, + document: Document, + logging_group=None, + classifier: Optional[DocumentClassifier] = None, + replace=False, + use_first=True, + suggest=False, + base_url=None, + stdout=None, + style_func=None, + **kwargs, +): + if document.folder and not replace: + return + + potential_folders = matching.match_folders(document, classifier) + + potential_count = len(potential_folders) + selected = potential_folders[0] if potential_folders else None + if potential_count > 1: + if use_first: + logger.debug( + f"Detected {potential_count} potential folders, " + f"so we've opted for {selected}", + extra={"group": logging_group}, + ) + else: + logger.debug( + f"Detected {potential_count} potential folders, " + f"not assigning any folder", + extra={"group": logging_group}, + ) + return + + if selected or replace: + if suggest: + _suggestion_printer( + stdout, + style_func, + "folder", + document, + selected, + base_url, + ) + else: + logger.info( + f"Assigning folder {selected} to {document}", + extra={"group": logging_group}, + ) + + document.folder = selected + document.save(update_fields=("folder",)) def set_warehouse( sender, diff --git a/src/documents/tasks.py b/src/documents/tasks.py index c60832490..21fe96e2a 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -34,6 +34,7 @@ from documents.models import Document from documents.models import DocumentType from documents.models import StoragePath from documents.models import Warehouse +from documents.models import Folder from documents.models import Tag from documents.parsers import DocumentParser from documents.parsers import get_parser_class_for_mime_type @@ -75,6 +76,7 @@ def train_classifier(): and not DocumentType.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists() and not Correspondent.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists() and not Warehouse.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists() + and not Folder.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists() and not StoragePath.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists() ): logger.info("No automatic matching items, not training") @@ -173,6 +175,7 @@ def consume_file( override_document_type_id=overrides.document_type_id, override_tag_ids=overrides.tag_ids, override_warehouse_id=overrides.warehouse_id, + override_folder_id=overrides.folder_id, override_storage_path_id=overrides.storage_path_id, override_created=overrides.created, override_asn=overrides.asn, diff --git a/src/documents/views.py b/src/documents/views.py index 3fdb961c3..08ad0cfd9 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -100,11 +100,13 @@ from documents.filters import ShareLinkFilterSet from documents.filters import StoragePathFilterSet from documents.filters import TagFilterSet from documents.filters import WarehouseFilterSet +from documents.filters import FolderFilterSet from documents.matching import match_correspondents from documents.matching import match_document_types from documents.matching import match_storage_paths from documents.matching import match_warehouses +from documents.matching import match_folders from documents.matching import match_tags from documents.models import Correspondent from documents.models import CustomField @@ -121,6 +123,7 @@ from documents.models import Workflow from documents.models import WorkflowAction from documents.models import WorkflowTrigger from documents.models import Warehouse +from documents.models import Folder from documents.parsers import get_parser_class_for_mime_type from documents.parsers import parse_date_generator @@ -150,6 +153,7 @@ from documents.serialisers import WorkflowActionSerializer from documents.serialisers import WorkflowSerializer from documents.serialisers import WorkflowTriggerSerializer from documents.serialisers import WarehouseSerializer +from documents.serialisers import FolderSerializer from documents.signals import document_updated from documents.tasks import consume_file @@ -337,7 +341,7 @@ class DocumentViewSet( ObjectOwnedOrGrantedPermissionsFilter, ) filterset_class = DocumentFilterSet - search_fields = ("title", "correspondent__name", "content", "warehouse") + search_fields = ("title", "correspondent__name", "content", "warehouse", "folder") ordering_fields = ( "id", "title", @@ -355,7 +359,7 @@ class DocumentViewSet( return ( Document.objects.distinct() .annotate(num_notes=Count("notes")) - .select_related("correspondent", "storage_path", "document_type","warehouse", "owner") + .select_related("correspondent", "storage_path", "document_type","warehouse", "folder", "owner") .prefetch_related("tags", "custom_fields", "notes") ) @@ -533,6 +537,9 @@ class DocumentViewSet( "warehouses": [ wh.id for wh in match_warehouses(doc, classifier, request.user) ], + "folders": [ + f.id for f in match_folders(doc, classifier, request.user) + ], "tags": [t.id for t in match_tags(doc, classifier, request.user)], "document_types": [ dt.id for dt in match_document_types(doc, classifier, request.user) @@ -748,8 +755,11 @@ class DocumentViewSet( def get_queryset(self): queryset = self.queryset warehouse_id = self.request.query_params.get('warehouse_id', None) + # folder_id = self.request.query_param.get('folder_id', None) if warehouse_id is not None: queryset = self.get_warehouse(warehouse_id) + # if folder_id is not None: + # queryset = self.get_folder(folder_id) return queryset def get_warehouse(self, warehouse_id): @@ -778,6 +788,7 @@ class SearchResultSerializer(DocumentSerializer, PassUserMixin): "storage_path", "document_type", "warehouse", + "folder", "owner", ) .prefetch_related("tags", "custom_fields", "notes") @@ -967,6 +978,7 @@ class PostDocumentView(GenericAPIView): document_type_id = serializer.validated_data.get("document_type") storage_path_id = serializer.validated_data.get("storage_path") warehouse_id = serializer.validated_data.get("warehouse") + folder_id = serializer.validated_data.get("folder") tag_ids = serializer.validated_data.get("tags") title = serializer.validated_data.get("title") created = serializer.validated_data.get("created") @@ -996,6 +1008,7 @@ class PostDocumentView(GenericAPIView): document_type_id=document_type_id, storage_path_id=storage_path_id, warehouse_id=warehouse_id, + folder_id=folder_id, tag_ids=tag_ids, created=created, asn=archive_serial_number, @@ -1051,6 +1064,12 @@ class SelectionDataView(GenericAPIView): Case(When(documents__id__in=ids, then=1), output_field=IntegerField()), ), ) + + folders = Folder.objects.annotate( + document_count=Count( + Case(When(documents__id__in=ids, then=1), output_field=IntegerField()), + ), + ) r = Response( { @@ -1067,6 +1086,9 @@ class SelectionDataView(GenericAPIView): "selected_warehouses": [ {"id": t.id, "document_count": t.document_count} for t in warehouses ], + "selected_folders": [ + {"id": t.id, "document_count": t.document_count} for t in folders + ], "selected_storage_paths": [ {"id": t.id, "document_count": t.document_count} for t in storage_paths @@ -1162,6 +1184,19 @@ class StatisticsView(APIView): ), ) ) + + folder_count = ( + Folder.objects.count() + if user is None + else len( + get_objects_for_user_owner_aware( + user, + "documents.view_folder", + Folder, + ), + ) + ) + storage_path_count = ( StoragePath.objects.count() if user is None @@ -1212,6 +1247,7 @@ class StatisticsView(APIView): "document_type_count": document_type_count, "storage_path_count": storage_path_count, "warehouse_count": warehouse_count, + "folder_count": folder_count, }, ) @@ -1599,7 +1635,24 @@ class BulkEditObjectsView(PassUserMixin): boxcases.delete() shelves.delete() warehouse.delete() + + elif operation == "delete" and object_type == "folders": + for folder_id in object_ids: + folder = Folder.objects.get(id=int(folder_id)) + + def delete_folder_hierarchy(folder_instance): + documents = Document.objects.filter(folder=folder_instance) + documents.delete() + child_folders = Folder.objects.filter(parent_folder=folder_instance) + for child_folder in child_folders: + delete_folder_hierarchy(child_folder) + + folder_instance.delete() + + delete_folder_hierarchy(folder) + + return Response(status=status.HTTP_200_OK) elif operation == "delete": @@ -1773,6 +1826,9 @@ class SystemStatusView(PassUserMixin): or Warehouse.objects.filter( matching_algorithm=Tag.MATCH_AUTO, ).exists() + or Folder.objects.filter( + matching_algorithm=Tag.MATCH_AUTO, + ).exists() or StoragePath.objects.filter( matching_algorithm=Tag.MATCH_AUTO, ).exists() @@ -1903,13 +1959,14 @@ class WarehouseViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin): if old_parent_warehouse != instance.parent_warehouse: - if instance.type == Warehouse.SHELF: + if instance.type == Warehouse.SHELF and getattr(instance.parent_warehouse, 'type', "") == Warehouse.WAREHOUSE : instance.path = f"{instance.parent_warehouse.path}/{instance.id}" - elif instance.type == Warehouse.BOXCASE: + elif instance.type == Warehouse.BOXCASE and getattr(instance.parent_warehouse, 'type', "") == Warehouse.SHELF : instance.path = f"{instance.parent_warehouse.path}/{instance.id}" - else: - + elif instance.type == Warehouse.WAREHOUSE and not instance.parent_warehouse: instance.path = str(instance.id) + else: + return Response(status=status.HTTP_400_BAD_REQUEST) instance.save() boxcase_warehouses = Warehouse.objects.filter(type=Warehouse.BOXCASE, parent_warehouse=instance) @@ -1920,6 +1977,36 @@ class WarehouseViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin): return Response(serializer.data) + def partial_update(self, request, *args, **kwargs): + partial = kwargs.pop('partial', True) + instance = self.get_object() + serializer = self.get_serializer(instance, data=request.data, partial=partial) + serializer.is_valid(raise_exception=True) + + + old_parent_warehouse = instance.parent_warehouse + + self.perform_update(serializer) + + if old_parent_warehouse != instance.parent_warehouse: + + if instance.type == Warehouse.SHELF and getattr(instance.parent_warehouse, 'type', "") == Warehouse.WAREHOUSE : + instance.path = f"{instance.parent_warehouse.path}/{instance.id}" + elif instance.type == Warehouse.BOXCASE and getattr(instance.parent_warehouse, 'type', "") == Warehouse.SHELF : + instance.path = f"{instance.parent_warehouse.path}/{instance.id}" + elif instance.type == Warehouse.WAREHOUSE and not instance.parent_warehouse: + instance.path = str(instance.id) + else: + return Response(status=status.HTTP_400_BAD_REQUEST) + instance.save() + + boxcase_warehouses = Warehouse.objects.filter(type=Warehouse.BOXCASE, parent_warehouse=instance) + for boxcase_warehouse in boxcase_warehouses: + boxcase_warehouse.path = f"{instance.path}/{boxcase_warehouse.id}" + boxcase_warehouse.save() + + + return Response(serializer.data) def destroy(self, request, pk, *args, **kwargs): warehouse = Warehouse.objects.get(id=pk) @@ -1944,3 +2031,115 @@ class WarehouseViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin): return Response(status=status.HTTP_204_NO_CONTENT) + + +class FolderViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin): + model = Folder + + queryset = Folder.objects.select_related("owner").order_by( + Lower("name"), + ) + + serializer_class = FolderSerializer + pagination_class = StandardPagination + permission_classes = (IsAuthenticated, PaperlessObjectPermissions) + filter_backends = ( + DjangoFilterBackend, + OrderingFilter, + ObjectOwnedOrGrantedPermissionsFilter, + ) + filterset_class = FolderFilterSet + ordering_fields = ("name", "path", "parent_folder", "document_count") + + def create(self, request, *args, **kwargs): + # try: + serializer = FolderSerializer(data=request.data) + parent_folder = None + if serializer.is_valid(raise_exception=True): + parent_folder = serializer.validated_data.get('parent_folder',None) + + parent_folder = Folder.objects.filter(id=parent_folder.id if parent_folder else 0).first() + + if parent_folder == None: + folder = serializer.save() + folder.path = str(folder.id) + folder.save() + elif parent_folder: + folder = serializer.save(parent_folder=parent_folder) + folder.path = f"{parent_folder.path}/{folder.id}" + folder.save() + else: + return Response(status=status.HTTP_400_BAD_REQUEST) + + return Response(serializer.data,status=status.HTTP_201_CREATED) + + def update(self, request, *args, **kwargs): + partial = kwargs.pop('partial', False) + instance = self.get_object() + serializer = self.get_serializer(instance, data=request.data, partial=partial) + serializer.is_valid(raise_exception=True) + + old_parent_folder = instance.parent_folder + + self.perform_update(serializer) + + if old_parent_folder != instance.parent_folder: + if instance.parent_folder: + instance.path = f"{instance.parent_folder.path}/{instance.id}" + else: + instance.path = f"{instance.id}" + instance.save() + + self.update_child_folder_paths(instance) + + return Response(serializer.data) + + def partial_update(self, request, *args, **kwargs): + partial = kwargs.pop('partial', True) + instance = self.get_object() + serializer = self.get_serializer(instance, data=request.data, partial=partial) + serializer.is_valid(raise_exception=True) + + old_parent_folder = instance.parent_folder + + self.perform_update(serializer) + + if old_parent_folder != instance.parent_folder: + if instance.parent_folder: + instance.path = f"{instance.parent_folder.path}/{instance.id}" + else: + instance.path = f"{instance.id}" + instance.save() + + self.update_child_folder_paths(instance) + + return Response(serializer.data) + + def update_child_folder_paths(self, folder): + child_folders = Folder.objects.filter(parent_folder=folder) + for child_folder in child_folders: + if folder.path: + child_folder.path = f"{folder.path}/{child_folder.id}" + else: + child_folder.path = f"{child_folder.id}" + child_folder.save() + self.update_child_folder_paths(child_folder) + + + def destroy(self, request, pk, *args, **kwargs): + folder = Folder.objects.get(id=pk) + + def delete_folder_hierarchy(folder_instance): + documents = Document.objects.filter(folder=folder_instance) + documents.delete() + + child_folders = Folder.objects.filter(parent_folder=folder_instance) + for child_folder in child_folders: + delete_folder_hierarchy(child_folder) + + folder_instance.delete() + + delete_folder_hierarchy(folder) + + return Response(status=status.HTTP_200_OK) + diff --git a/src/paperless/urls.py b/src/paperless/urls.py index f22fd040d..5c7c89489 100644 --- a/src/paperless/urls.py +++ b/src/paperless/urls.py @@ -41,6 +41,7 @@ from documents.views import WorkflowActionViewSet from documents.views import WorkflowTriggerViewSet from documents.views import WorkflowViewSet from documents.views import WarehouseViewSet +from documents.views import FolderViewSet from paperless.consumers import StatusConsumer from paperless.views import ApplicationConfigurationViewSet from paperless.views import DisconnectSocialAccountView @@ -75,6 +76,7 @@ api_router.register(r"workflows", WorkflowViewSet) api_router.register(r"custom_fields", CustomFieldViewSet) api_router.register(r"config", ApplicationConfigurationViewSet) api_router.register(r"warehouses", WarehouseViewSet) +api_router.register(r"folders", FolderViewSet) urlpatterns = [