Add JPG-merge functionality

Had source files being JPEG prevents later merging. This change preprocesses JPEG files and thus makes them mergeable.
This commit is contained in:
g3gg0.de 2024-04-24 18:31:10 +02:00 committed by GitHub
parent 7a0334f353
commit dfdeaaa4fe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -208,6 +208,7 @@ def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None):
qs = Document.objects.filter(id__in=doc_ids) qs = Document.objects.filter(id__in=doc_ids)
affected_docs = [] affected_docs = []
import pikepdf import pikepdf
from PIL import Image
merged_pdf = pikepdf.new() merged_pdf = pikepdf.new()
version = merged_pdf.pdf_version version = merged_pdf.pdf_version
@ -215,7 +216,20 @@ def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None):
for doc_id in doc_ids: for doc_id in doc_ids:
doc = qs.get(id=doc_id) doc = qs.get(id=doc_id)
try: try:
with pikepdf.open(str(doc.source_path)) as pdf: doc_path = str(doc.source_path)
if doc_path.lower().endswith('.jpg'):
# Convert JPG to PDF before merging
image = Image.open(doc_path)
image_pdf_path = doc_path + '.pdf'
# Convert image to PDF. If image is RGB, convert to RGBA to handle transparency.
if image.mode == 'RGB':
image = image.convert('RGBA')
image.save(image_pdf_path, 'PDF', resolution=100.0)
pdf_path = image_pdf_path
else:
pdf_path = doc_path
with pikepdf.open(pdf_path) as pdf:
version = max(version, pdf.pdf_version) version = max(version, pdf.pdf_version)
merged_pdf.pages.extend(pdf.pages) merged_pdf.pages.extend(pdf.pages)
affected_docs.append(doc.id) affected_docs.append(doc.id)