Merge
This commit is contained in:
@@ -2,15 +2,20 @@ import hashlib
|
||||
import itertools
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from django.db.models import Q
|
||||
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.data_models import DocumentSource
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import StoragePath
|
||||
from documents.permissions import set_permissions_for_object
|
||||
from documents.tasks import bulk_update_documents
|
||||
from documents.tasks import consume_file
|
||||
from documents.tasks import update_document_archive_file
|
||||
from paperless import settings
|
||||
|
||||
@@ -179,3 +184,67 @@ def rotate(doc_ids: list[int], degrees: int):
|
||||
bulk_update_documents.delay(document_ids=affected_docs)
|
||||
|
||||
return "OK"
|
||||
|
||||
|
||||
def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None):
|
||||
qs = Document.objects.filter(id__in=doc_ids)
|
||||
import pikepdf
|
||||
|
||||
merged_pdf = pikepdf.new()
|
||||
# use doc_ids to preserve order
|
||||
for doc_id in doc_ids:
|
||||
doc = qs.get(id=doc_id)
|
||||
if doc is None:
|
||||
continue
|
||||
path = os.path.join(settings.ORIGINALS_DIR, str(doc.filename))
|
||||
try:
|
||||
with pikepdf.open(path, allow_overwriting_input=True) as pdf:
|
||||
merged_pdf.pages.extend(pdf.pages)
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Error merging document {doc.id}, it will not be included in the merge",
|
||||
e,
|
||||
)
|
||||
|
||||
filepath = os.path.join(
|
||||
settings.CONSUMPTION_DIR,
|
||||
f"merged_{('_'.join([str(doc_id) for doc_id in doc_ids]))[:100]}.pdf",
|
||||
)
|
||||
merged_pdf.save(filepath)
|
||||
|
||||
overrides = DocumentMetadataOverrides()
|
||||
|
||||
if metadata_document_id:
|
||||
metadata_document = qs.get(id=metadata_document_id)
|
||||
if metadata_document is not None:
|
||||
overrides.title = metadata_document.title + " (merged)"
|
||||
overrides.correspondent_id = (
|
||||
metadata_document.correspondent.pk
|
||||
if metadata_document.correspondent
|
||||
else None
|
||||
)
|
||||
overrides.document_type_id = (
|
||||
metadata_document.document_type.pk
|
||||
if metadata_document.document_type
|
||||
else None
|
||||
)
|
||||
overrides.storage_path_id = (
|
||||
metadata_document.storage_path.pk
|
||||
if metadata_document.storage_path
|
||||
else None
|
||||
)
|
||||
overrides.tag_ids = list(
|
||||
metadata_document.tags.values_list("id", flat=True),
|
||||
)
|
||||
# Include owner and permissions?
|
||||
|
||||
logger.info("Adding merged document to the task queue.")
|
||||
consume_file.delay(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=filepath,
|
||||
),
|
||||
overrides,
|
||||
)
|
||||
|
||||
return "OK"
|
||||
|
||||
@@ -870,6 +870,7 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin):
|
||||
"redo_ocr",
|
||||
"set_permissions",
|
||||
"rotate",
|
||||
"merge",
|
||||
],
|
||||
label="Method",
|
||||
write_only=True,
|
||||
@@ -909,6 +910,8 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin):
|
||||
return bulk_edit.set_permissions
|
||||
elif method == "rotate":
|
||||
return bulk_edit.rotate
|
||||
elif method == "merge":
|
||||
return bulk_edit.merge
|
||||
else:
|
||||
raise serializers.ValidationError("Unsupported method.")
|
||||
|
||||
|
||||
@@ -836,3 +836,26 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.merge")
|
||||
def test_merge(self, m):
|
||||
m.return_value = "OK"
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"method": "merge",
|
||||
"parameters": {"metadata_document_id": self.doc3.id},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id])
|
||||
self.assertEqual(kwargs["metadata_document_id"], self.doc3.id)
|
||||
|
||||
Reference in New Issue
Block a user