Split
This commit is contained in:
@@ -212,31 +212,13 @@ def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None):
|
||||
)
|
||||
merged_pdf.save(filepath)
|
||||
|
||||
overrides = DocumentMetadataOverrides()
|
||||
|
||||
if metadata_document_id:
|
||||
metadata_document = qs.get(id=metadata_document_id)
|
||||
if metadata_document is not None:
|
||||
overrides = DocumentMetadataOverrides.from_document(metadata_document)
|
||||
overrides.title = metadata_document.title + " (merged)"
|
||||
overrides.correspondent_id = (
|
||||
metadata_document.correspondent.pk
|
||||
if metadata_document.correspondent
|
||||
else None
|
||||
)
|
||||
overrides.document_type_id = (
|
||||
metadata_document.document_type.pk
|
||||
if metadata_document.document_type
|
||||
else None
|
||||
)
|
||||
overrides.storage_path_id = (
|
||||
metadata_document.storage_path.pk
|
||||
if metadata_document.storage_path
|
||||
else None
|
||||
)
|
||||
overrides.tag_ids = list(
|
||||
metadata_document.tags.values_list("id", flat=True),
|
||||
)
|
||||
# Include owner and permissions?
|
||||
else:
|
||||
overrides = DocumentMetadataOverrides()
|
||||
|
||||
logger.info("Adding merged document to the task queue.")
|
||||
consume_file.delay(
|
||||
@@ -248,3 +230,42 @@ def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None):
|
||||
)
|
||||
|
||||
return "OK"
|
||||
|
||||
|
||||
def split(doc_ids: list[int], pages: list[list[int]]):
|
||||
logger.info(
|
||||
f"Attempting to split document {doc_ids[0]} into {len(pages)} documents",
|
||||
)
|
||||
doc = Document.objects.get(id=doc_ids[0])
|
||||
import pikepdf
|
||||
|
||||
path = os.path.join(settings.ORIGINALS_DIR, str(doc.filename))
|
||||
try:
|
||||
with pikepdf.open(path, allow_overwriting_input=True) as pdf:
|
||||
for idx, split_doc in enumerate(pages):
|
||||
dst = pikepdf.new()
|
||||
for page in split_doc:
|
||||
dst.pages.append(pdf.pages[page - 1])
|
||||
filepath = os.path.join(
|
||||
settings.CONSUMPTION_DIR,
|
||||
f"{doc.filename}_{split_doc[0]}-{split_doc[-1]}.pdf",
|
||||
)
|
||||
|
||||
dst.save(filepath)
|
||||
|
||||
overrides = DocumentMetadataOverrides().from_document(doc)
|
||||
overrides.title = f"{doc.title} (split {idx + 1})"
|
||||
logger.info(
|
||||
f"Adding split document with pages {split_doc} to the task queue.",
|
||||
)
|
||||
consume_file.delay(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=filepath,
|
||||
),
|
||||
overrides,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error splitting document {doc.id}", e)
|
||||
|
||||
return "OK"
|
||||
|
||||
@@ -5,6 +5,8 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import magic
|
||||
from guardian.shortcuts import get_groups_with_perms
|
||||
from guardian.shortcuts import get_users_with_perms
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
@@ -88,6 +90,40 @@ class DocumentMetadataOverrides:
|
||||
|
||||
return self
|
||||
|
||||
@staticmethod
|
||||
def from_document(doc) -> "DocumentMetadataOverrides":
|
||||
"""
|
||||
Fills in the overrides from a document object
|
||||
"""
|
||||
overrides = DocumentMetadataOverrides()
|
||||
overrides.title = doc.title
|
||||
overrides.correspondent_id = doc.correspondent.id if doc.correspondent else None
|
||||
overrides.document_type_id = doc.document_type.id if doc.document_type else None
|
||||
overrides.storage_path_id = doc.storage_path.id if doc.storage_path else None
|
||||
overrides.owner_id = doc.owner.id if doc.owner else None
|
||||
overrides.tag_ids = list(doc.tags.values_list("id", flat=True))
|
||||
overrides.view_users = get_users_with_perms(
|
||||
doc,
|
||||
only_with_perms_in=["view_document"],
|
||||
).values_list("id", flat=True)
|
||||
overrides.view_groups = get_groups_with_perms(
|
||||
doc,
|
||||
only_with_perms_in=["view_document"],
|
||||
).values_list("id", flat=True)
|
||||
overrides.change_users = get_users_with_perms(
|
||||
doc,
|
||||
only_with_perms_in=["change_document"],
|
||||
).values_list("id", flat=True)
|
||||
overrides.change_groups = get_groups_with_perms(
|
||||
doc,
|
||||
only_with_perms_in=["change_document"],
|
||||
).values_list("id", flat=True)
|
||||
overrides.custom_field_ids = list(
|
||||
doc.custom_fields.values_list("id", flat=True),
|
||||
)
|
||||
|
||||
return overrides
|
||||
|
||||
|
||||
class DocumentSource(IntEnum):
|
||||
"""
|
||||
|
||||
@@ -871,6 +871,7 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin):
|
||||
"set_permissions",
|
||||
"rotate",
|
||||
"merge",
|
||||
"split",
|
||||
],
|
||||
label="Method",
|
||||
write_only=True,
|
||||
@@ -912,6 +913,8 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin):
|
||||
return bulk_edit.rotate
|
||||
elif method == "merge":
|
||||
return bulk_edit.merge
|
||||
elif method == "split":
|
||||
return bulk_edit.split
|
||||
else:
|
||||
raise serializers.ValidationError("Unsupported method.")
|
||||
|
||||
@@ -1000,6 +1003,29 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin):
|
||||
except ValueError:
|
||||
raise serializers.ValidationError("invalid rotation degrees")
|
||||
|
||||
def _validate_parameters_split(self, parameters):
|
||||
if "pages" not in parameters:
|
||||
raise serializers.ValidationError("pages not specified")
|
||||
try:
|
||||
pages = []
|
||||
docs = parameters["pages"].split(",")
|
||||
for doc in docs:
|
||||
if "-" in doc:
|
||||
pages.append(
|
||||
[
|
||||
x
|
||||
for x in range(
|
||||
int(doc.split("-")[0]),
|
||||
int(doc.split("-")[1]) + 1,
|
||||
)
|
||||
],
|
||||
)
|
||||
else:
|
||||
pages.append([int(doc)])
|
||||
parameters["pages"] = pages
|
||||
except ValueError:
|
||||
raise serializers.ValidationError("invalid pages specified")
|
||||
|
||||
def validate(self, attrs):
|
||||
method = attrs["method"]
|
||||
parameters = attrs["parameters"]
|
||||
@@ -1018,6 +1044,12 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin):
|
||||
self._validate_parameters_set_permissions(parameters)
|
||||
elif method == bulk_edit.rotate:
|
||||
self._validate_parameters_rotate(parameters)
|
||||
elif method == bulk_edit.split:
|
||||
if len(attrs["documents"]) > 1:
|
||||
raise serializers.ValidationError(
|
||||
"Split method only supports one document",
|
||||
)
|
||||
self._validate_parameters_split(parameters)
|
||||
|
||||
return attrs
|
||||
|
||||
|
||||
@@ -859,3 +859,75 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id])
|
||||
self.assertEqual(kwargs["metadata_document_id"], self.doc3.id)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.split")
|
||||
def test_split(self, m):
|
||||
m.return_value = "OK"
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "split",
|
||||
"parameters": {"pages": "1,2-4,5-6,7"},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id])
|
||||
self.assertEqual(kwargs["pages"], [[1], [2, 3, 4], [5, 6], [7]])
|
||||
|
||||
def test_split_invalid_params(self):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "split",
|
||||
"parameters": {}, # pages not specified
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"pages not specified", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "split",
|
||||
"parameters": {"pages": "1:7"}, # wrong format
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"invalid pages specified", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [
|
||||
self.doc1.id,
|
||||
self.doc2.id,
|
||||
], # only one document supported
|
||||
"method": "split",
|
||||
"parameters": {"pages": "1-2,3-7"}, # wrong format
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"Split method only supports one document", response.content)
|
||||
|
||||
Reference in New Issue
Block a user