From 8bd40e2c3eac333a4b9bf7c31ea13f1e9b05a37d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Czy=C5=BC?= Date: Wed, 13 Dec 2023 16:12:25 +0100 Subject: [PATCH] Added merge PDF writer as alternative to Zipfile --- src/documents/bulk_download.py | 36 +++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/documents/bulk_download.py b/src/documents/bulk_download.py index ecabd4515..ba2646de8 100644 --- a/src/documents/bulk_download.py +++ b/src/documents/bulk_download.py @@ -1,12 +1,30 @@ import os -from zipfile import ZipFile from documents.models import Document +from documents.parsers import merge_pdfs + + +class MergedPdfFile: + def __init__(self, output_file_path): + self._output_file_path = output_file_path + self._input_file_paths = [] + + def __enter__(self): + return self + + def __exit__(self, *args): + merge_pdfs(self._input_file_paths, self._output_file_path) + + def namelist(self): + return self._input_file_paths + + def write(self, document_path, _): + self._input_file_paths.append(document_path) class BulkArchiveStrategy: - def __init__(self, zipf: ZipFile, follow_formatting: bool = False): - self.zipf = zipf + def __init__(self, writer, follow_formatting: bool = False): + self.writer = writer if follow_formatting: self.make_unique_filename = self._formatted_filepath else: @@ -27,7 +45,7 @@ class BulkArchiveStrategy: counter = 0 while True: filename = folder + doc.get_public_filename(archive, counter) - if filename in self.zipf.namelist(): + if filename in self.writer.namelist(): counter += 1 else: return filename @@ -57,29 +75,29 @@ class BulkArchiveStrategy: class OriginalsOnlyStrategy(BulkArchiveStrategy): def add_document(self, doc: Document): - self.zipf.write(doc.source_path, self.make_unique_filename(doc)) + self.writer.write(doc.source_path, self.make_unique_filename(doc)) class ArchiveOnlyStrategy(BulkArchiveStrategy): def add_document(self, doc: Document): if doc.has_archive_version: - self.zipf.write( + self.writer.write( doc.archive_path, self.make_unique_filename(doc, archive=True), ) else: - self.zipf.write(doc.source_path, self.make_unique_filename(doc)) + self.writer.write(doc.source_path, self.make_unique_filename(doc)) class OriginalAndArchiveStrategy(BulkArchiveStrategy): def add_document(self, doc: Document): if doc.has_archive_version: - self.zipf.write( + self.writer.write( doc.archive_path, self.make_unique_filename(doc, archive=True, folder="archive/"), ) - self.zipf.write( + self.writer.write( doc.source_path, self.make_unique_filename(doc, folder="originals/"), )