Move deletion of files after consumption of merged file

This commit is contained in:
Dominik Bruhn 2024-06-07 21:07:09 +02:00
parent 7e9a6e9ab9
commit 6a63e77135
4 changed files with 62 additions and 72 deletions

View File

@ -4,6 +4,7 @@ import logging
import os import os
from typing import Optional from typing import Optional
from celery import chain
from celery import chord from celery import chord
from django.conf import settings from django.conf import settings
from django.db.models import Q from django.db.models import Q
@ -19,6 +20,7 @@ from documents.models import StoragePath
from documents.permissions import set_permissions_for_object from documents.permissions import set_permissions_for_object
from documents.tasks import bulk_update_documents from documents.tasks import bulk_update_documents
from documents.tasks import consume_file from documents.tasks import consume_file
from documents.tasks import delete_documents
from documents.tasks import update_document_archive_file from documents.tasks import update_document_archive_file
logger = logging.getLogger("paperless.bulk_edit") logger = logging.getLogger("paperless.bulk_edit")
@ -281,7 +283,8 @@ def merge(
overrides = DocumentMetadataOverrides() overrides = DocumentMetadataOverrides()
logger.info("Adding merged document to the task queue.") logger.info("Adding merged document to the task queue.")
consume_file.delay(
consume_task = consume_file.s(
ConsumableDocument( ConsumableDocument(
source=DocumentSource.ConsumeFolder, source=DocumentSource.ConsumeFolder,
original_file=filepath, original_file=filepath,
@ -290,8 +293,10 @@ def merge(
) )
if delete_originals: if delete_originals:
logger.info("Removing original documents after merge") logger.info("Removing original documents after consumption of merged document")
delete(affected_docs) chain(consume_task, delete_documents.si(affected_docs)).delay()
else:
consume_task.delay()
return "OK" return "OK"

View File

@ -292,3 +292,14 @@ def update_document_archive_file(document_id):
) )
finally: finally:
parser.cleanup() parser.cleanup()
@shared_task
def delete_documents(doc_ids: list[int]):
Document.objects.filter(id__in=doc_ids).delete()
from documents import index
with index.open_index_writer() as writer:
for id in doc_ids:
index.remove_document_by_id(writer, id)

View File

@ -376,45 +376,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
/ "0000003.pdf", / "0000003.pdf",
sample3, sample3,
) )
sample4 = self.dirs.scratch_dir / "sample4.pdf"
shutil.copy(
Path(__file__).parent
/ "samples"
/ "documents"
/ "originals"
/ "0000001.pdf",
sample4,
)
sample4_archive = self.dirs.archive_dir / "sample4_archive.pdf"
shutil.copy(
Path(__file__).parent
/ "samples"
/ "documents"
/ "originals"
/ "0000001.pdf",
sample4_archive,
)
sample5 = self.dirs.scratch_dir / "sample5.pdf"
shutil.copy(
Path(__file__).parent
/ "samples"
/ "documents"
/ "originals"
/ "0000002.pdf",
sample5,
)
sample5_archive = self.dirs.archive_dir / "sample5_archive.pdf"
shutil.copy(
Path(__file__).parent
/ "samples"
/ "documents"
/ "originals"
/ "0000002.pdf",
sample5_archive,
)
self.doc1 = Document.objects.create( self.doc1 = Document.objects.create(
checksum="A", checksum="A",
title="A", title="A",
@ -449,25 +410,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
mime_type="image/jpeg", mime_type="image/jpeg",
) )
self.doc1_delete_after_merge = Document.objects.create( @mock.patch("documents.tasks.consume_file.s")
checksum="Ad",
title="Adelete",
filename=sample4,
mime_type="application/pdf",
)
self.doc1_delete_after_merge.archive_filename = sample4_archive
self.doc1_delete_after_merge.save()
self.doc2_delete_after_merge = Document.objects.create(
checksum="Bd",
title="Bdelete",
filename=sample5,
mime_type="application/pdf",
)
self.doc2_delete_after_merge.archive_filename = sample5_archive
self.doc2_delete_after_merge.save()
@mock.patch("documents.tasks.consume_file.delay")
def test_merge(self, mock_consume_file): def test_merge(self, mock_consume_file):
""" """
GIVEN: GIVEN:
@ -501,18 +444,25 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertEqual(result, "OK") self.assertEqual(result, "OK")
@mock.patch("documents.tasks.consume_file.delay") @mock.patch("documents.tasks.delete_documents.si")
def test_merge_and_delete_originals(self, mock_consume_file): @mock.patch("documents.tasks.consume_file.s")
@mock.patch("documents.bulk_edit.chain")
def test_merge_and_delete_originals(
self,
mock_chain,
mock_consume_file,
mock_delete_documents,
):
""" """
GIVEN: GIVEN:
- Existing documents - Existing documents
WHEN: WHEN:
- Merge action with deleting documents is called with 2 documents - Merge action with deleting documents is called with 3 documents
THEN: THEN:
- Consume file should be called - Consume file task should be called
- Documents should be deleted - Document deletion task should be called
""" """
doc_ids = [self.doc1_delete_after_merge.id, self.doc2_delete_after_merge.id] doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
result = bulk_edit.merge(doc_ids, delete_originals=True) result = bulk_edit.merge(doc_ids, delete_originals=True)
self.assertEqual(result, "OK") self.assertEqual(result, "OK")
@ -522,6 +472,8 @@ class TestPDFActions(DirectoriesMixin, TestCase):
) )
mock_consume_file.assert_called() mock_consume_file.assert_called()
mock_delete_documents.assert_called()
mock_chain.assert_called_once()
consume_file_args, _ = mock_consume_file.call_args consume_file_args, _ = mock_consume_file.call_args
self.assertEqual( self.assertEqual(
@ -530,11 +482,11 @@ class TestPDFActions(DirectoriesMixin, TestCase):
) )
self.assertEqual(consume_file_args[1].title, None) self.assertEqual(consume_file_args[1].title, None)
with self.assertRaises(Document.DoesNotExist): delete_documents_args, _ = mock_delete_documents.call_args
Document.objects.get(id=self.doc1_delete_after_merge.id) self.assertEqual(
delete_documents_args[0],
with self.assertRaises(Document.DoesNotExist): doc_ids,
Document.objects.get(id=self.doc2_delete_after_merge.id) )
@mock.patch("documents.tasks.consume_file.delay") @mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.open") @mock.patch("pikepdf.open")

View File

@ -150,3 +150,25 @@ class TestBulkUpdate(DirectoriesMixin, TestCase):
) )
tasks.bulk_update_documents([doc1.pk]) tasks.bulk_update_documents([doc1.pk])
class TestDeleteDocuments(DirectoriesMixin, TestCase):
def setUp(self):
super().setUp()
self.doc1 = Document.objects.create(
checksum="Ad",
title="Adelete",
)
self.doc2 = Document.objects.create(
checksum="Bd",
title="Bdelete",
)
def test(self):
tasks.delete_documents([self.doc1.id, self.doc2.id])
with self.assertRaises(Document.DoesNotExist):
Document.objects.get(id=self.doc1.id)
with self.assertRaises(Document.DoesNotExist):
Document.objects.get(id=self.doc2.id)