From 6a63e771354539a97dd7bf5b4a198a98f5dd411b Mon Sep 17 00:00:00 2001 From: Dominik Bruhn Date: Fri, 7 Jun 2024 21:07:09 +0200 Subject: [PATCH] Move deletion of files after consumption of merged file --- src/documents/bulk_edit.py | 11 +++- src/documents/tasks.py | 11 ++++ src/documents/tests/test_bulk_edit.py | 90 +++++++-------------------- src/documents/tests/test_tasks.py | 22 +++++++ 4 files changed, 62 insertions(+), 72 deletions(-) diff --git a/src/documents/bulk_edit.py b/src/documents/bulk_edit.py index 9ff73d7fd..9db319b80 100644 --- a/src/documents/bulk_edit.py +++ b/src/documents/bulk_edit.py @@ -4,6 +4,7 @@ import logging import os from typing import Optional +from celery import chain from celery import chord from django.conf import settings from django.db.models import Q @@ -19,6 +20,7 @@ from documents.models import StoragePath from documents.permissions import set_permissions_for_object from documents.tasks import bulk_update_documents from documents.tasks import consume_file +from documents.tasks import delete_documents from documents.tasks import update_document_archive_file logger = logging.getLogger("paperless.bulk_edit") @@ -281,7 +283,8 @@ def merge( overrides = DocumentMetadataOverrides() logger.info("Adding merged document to the task queue.") - consume_file.delay( + + consume_task = consume_file.s( ConsumableDocument( source=DocumentSource.ConsumeFolder, original_file=filepath, @@ -290,8 +293,10 @@ def merge( ) if delete_originals: - logger.info("Removing original documents after merge") - delete(affected_docs) + logger.info("Removing original documents after consumption of merged document") + chain(consume_task, delete_documents.si(affected_docs)).delay() + else: + consume_task.delay() return "OK" diff --git a/src/documents/tasks.py b/src/documents/tasks.py index e3b2df990..7933ac37f 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -292,3 +292,14 @@ def update_document_archive_file(document_id): ) finally: parser.cleanup() + + +@shared_task +def delete_documents(doc_ids: list[int]): + Document.objects.filter(id__in=doc_ids).delete() + + from documents import index + + with index.open_index_writer() as writer: + for id in doc_ids: + index.remove_document_by_id(writer, id) diff --git a/src/documents/tests/test_bulk_edit.py b/src/documents/tests/test_bulk_edit.py index 1f5aca584..5c5e7ef5b 100644 --- a/src/documents/tests/test_bulk_edit.py +++ b/src/documents/tests/test_bulk_edit.py @@ -376,45 +376,6 @@ class TestPDFActions(DirectoriesMixin, TestCase): / "0000003.pdf", sample3, ) - - sample4 = self.dirs.scratch_dir / "sample4.pdf" - shutil.copy( - Path(__file__).parent - / "samples" - / "documents" - / "originals" - / "0000001.pdf", - sample4, - ) - sample4_archive = self.dirs.archive_dir / "sample4_archive.pdf" - shutil.copy( - Path(__file__).parent - / "samples" - / "documents" - / "originals" - / "0000001.pdf", - sample4_archive, - ) - - sample5 = self.dirs.scratch_dir / "sample5.pdf" - shutil.copy( - Path(__file__).parent - / "samples" - / "documents" - / "originals" - / "0000002.pdf", - sample5, - ) - sample5_archive = self.dirs.archive_dir / "sample5_archive.pdf" - shutil.copy( - Path(__file__).parent - / "samples" - / "documents" - / "originals" - / "0000002.pdf", - sample5_archive, - ) - self.doc1 = Document.objects.create( checksum="A", title="A", @@ -449,25 +410,7 @@ class TestPDFActions(DirectoriesMixin, TestCase): mime_type="image/jpeg", ) - self.doc1_delete_after_merge = Document.objects.create( - checksum="Ad", - title="Adelete", - filename=sample4, - mime_type="application/pdf", - ) - self.doc1_delete_after_merge.archive_filename = sample4_archive - self.doc1_delete_after_merge.save() - - self.doc2_delete_after_merge = Document.objects.create( - checksum="Bd", - title="Bdelete", - filename=sample5, - mime_type="application/pdf", - ) - self.doc2_delete_after_merge.archive_filename = sample5_archive - self.doc2_delete_after_merge.save() - - @mock.patch("documents.tasks.consume_file.delay") + @mock.patch("documents.tasks.consume_file.s") def test_merge(self, mock_consume_file): """ GIVEN: @@ -501,18 +444,25 @@ class TestPDFActions(DirectoriesMixin, TestCase): self.assertEqual(result, "OK") - @mock.patch("documents.tasks.consume_file.delay") - def test_merge_and_delete_originals(self, mock_consume_file): + @mock.patch("documents.tasks.delete_documents.si") + @mock.patch("documents.tasks.consume_file.s") + @mock.patch("documents.bulk_edit.chain") + def test_merge_and_delete_originals( + self, + mock_chain, + mock_consume_file, + mock_delete_documents, + ): """ GIVEN: - Existing documents WHEN: - - Merge action with deleting documents is called with 2 documents + - Merge action with deleting documents is called with 3 documents THEN: - - Consume file should be called - - Documents should be deleted + - Consume file task should be called + - Document deletion task should be called """ - doc_ids = [self.doc1_delete_after_merge.id, self.doc2_delete_after_merge.id] + doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id] result = bulk_edit.merge(doc_ids, delete_originals=True) self.assertEqual(result, "OK") @@ -522,6 +472,8 @@ class TestPDFActions(DirectoriesMixin, TestCase): ) mock_consume_file.assert_called() + mock_delete_documents.assert_called() + mock_chain.assert_called_once() consume_file_args, _ = mock_consume_file.call_args self.assertEqual( @@ -530,11 +482,11 @@ class TestPDFActions(DirectoriesMixin, TestCase): ) self.assertEqual(consume_file_args[1].title, None) - with self.assertRaises(Document.DoesNotExist): - Document.objects.get(id=self.doc1_delete_after_merge.id) - - with self.assertRaises(Document.DoesNotExist): - Document.objects.get(id=self.doc2_delete_after_merge.id) + delete_documents_args, _ = mock_delete_documents.call_args + self.assertEqual( + delete_documents_args[0], + doc_ids, + ) @mock.patch("documents.tasks.consume_file.delay") @mock.patch("pikepdf.open") diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py index 334345b6a..6d0ed37a5 100644 --- a/src/documents/tests/test_tasks.py +++ b/src/documents/tests/test_tasks.py @@ -150,3 +150,25 @@ class TestBulkUpdate(DirectoriesMixin, TestCase): ) tasks.bulk_update_documents([doc1.pk]) + + +class TestDeleteDocuments(DirectoriesMixin, TestCase): + def setUp(self): + super().setUp() + self.doc1 = Document.objects.create( + checksum="Ad", + title="Adelete", + ) + self.doc2 = Document.objects.create( + checksum="Bd", + title="Bdelete", + ) + + def test(self): + tasks.delete_documents([self.doc1.id, self.doc2.id]) + + with self.assertRaises(Document.DoesNotExist): + Document.objects.get(id=self.doc1.id) + + with self.assertRaises(Document.DoesNotExist): + Document.objects.get(id=self.doc2.id)