This commit is contained in:
shamoon
2024-03-02 16:26:54 -08:00
parent c9dd407cbe
commit d5eedbab0d
11 changed files with 357 additions and 4 deletions

View File

@@ -2,15 +2,20 @@ import hashlib
import itertools
import logging
import os
from typing import Optional
from django.db.models import Q
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import StoragePath
from documents.permissions import set_permissions_for_object
from documents.tasks import bulk_update_documents
from documents.tasks import consume_file
from documents.tasks import update_document_archive_file
from paperless import settings
@@ -179,3 +184,67 @@ def rotate(doc_ids: list[int], degrees: int):
bulk_update_documents.delay(document_ids=affected_docs)
return "OK"
def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None):
qs = Document.objects.filter(id__in=doc_ids)
import pikepdf
merged_pdf = pikepdf.new()
# use doc_ids to preserve order
for doc_id in doc_ids:
doc = qs.get(id=doc_id)
if doc is None:
continue
path = os.path.join(settings.ORIGINALS_DIR, str(doc.filename))
try:
with pikepdf.open(path, allow_overwriting_input=True) as pdf:
merged_pdf.pages.extend(pdf.pages)
except Exception as e:
logger.exception(
f"Error merging document {doc.id}, it will not be included in the merge",
e,
)
filepath = os.path.join(
settings.CONSUMPTION_DIR,
f"merged_{('_'.join([str(doc_id) for doc_id in doc_ids]))[:100]}.pdf",
)
merged_pdf.save(filepath)
overrides = DocumentMetadataOverrides()
if metadata_document_id:
metadata_document = qs.get(id=metadata_document_id)
if metadata_document is not None:
overrides.title = metadata_document.title + " (merged)"
overrides.correspondent_id = (
metadata_document.correspondent.pk
if metadata_document.correspondent
else None
)
overrides.document_type_id = (
metadata_document.document_type.pk
if metadata_document.document_type
else None
)
overrides.storage_path_id = (
metadata_document.storage_path.pk
if metadata_document.storage_path
else None
)
overrides.tag_ids = list(
metadata_document.tags.values_list("id", flat=True),
)
# Include owner and permissions?
logger.info("Adding merged document to the task queue.")
consume_file.delay(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
),
overrides,
)
return "OK"

View File

@@ -870,6 +870,7 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin):
"redo_ocr",
"set_permissions",
"rotate",
"merge",
],
label="Method",
write_only=True,
@@ -909,6 +910,8 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin):
return bulk_edit.set_permissions
elif method == "rotate":
return bulk_edit.rotate
elif method == "merge":
return bulk_edit.merge
else:
raise serializers.ValidationError("Unsupported method.")

View File

@@ -836,3 +836,26 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
m.assert_not_called()
@mock.patch("documents.serialisers.bulk_edit.merge")
def test_merge(self, m):
m.return_value = "OK"
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id, self.doc3.id],
"method": "merge",
"parameters": {"metadata_document_id": self.doc3.id},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id])
self.assertEqual(kwargs["metadata_document_id"], self.doc3.id)