Move deletion of files after consumption of merged file

This commit is contained in:
Dominik Bruhn 2024-06-07 21:07:09 +02:00
parent 7e9a6e9ab9
commit 6a63e77135
4 changed files with 62 additions and 72 deletions

View File

@ -4,6 +4,7 @@ import logging
import os
from typing import Optional
from celery import chain
from celery import chord
from django.conf import settings
from django.db.models import Q
@ -19,6 +20,7 @@ from documents.models import StoragePath
from documents.permissions import set_permissions_for_object
from documents.tasks import bulk_update_documents
from documents.tasks import consume_file
from documents.tasks import delete_documents
from documents.tasks import update_document_archive_file
logger = logging.getLogger("paperless.bulk_edit")
@ -281,7 +283,8 @@ def merge(
overrides = DocumentMetadataOverrides()
logger.info("Adding merged document to the task queue.")
consume_file.delay(
consume_task = consume_file.s(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
@ -290,8 +293,10 @@ def merge(
)
if delete_originals:
logger.info("Removing original documents after merge")
delete(affected_docs)
logger.info("Removing original documents after consumption of merged document")
chain(consume_task, delete_documents.si(affected_docs)).delay()
else:
consume_task.delay()
return "OK"

View File

@ -292,3 +292,14 @@ def update_document_archive_file(document_id):
)
finally:
parser.cleanup()
@shared_task
def delete_documents(doc_ids: list[int]):
Document.objects.filter(id__in=doc_ids).delete()
from documents import index
with index.open_index_writer() as writer:
for id in doc_ids:
index.remove_document_by_id(writer, id)

View File

@ -376,45 +376,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
/ "0000003.pdf",
sample3,
)
sample4 = self.dirs.scratch_dir / "sample4.pdf"
shutil.copy(
Path(__file__).parent
/ "samples"
/ "documents"
/ "originals"
/ "0000001.pdf",
sample4,
)
sample4_archive = self.dirs.archive_dir / "sample4_archive.pdf"
shutil.copy(
Path(__file__).parent
/ "samples"
/ "documents"
/ "originals"
/ "0000001.pdf",
sample4_archive,
)
sample5 = self.dirs.scratch_dir / "sample5.pdf"
shutil.copy(
Path(__file__).parent
/ "samples"
/ "documents"
/ "originals"
/ "0000002.pdf",
sample5,
)
sample5_archive = self.dirs.archive_dir / "sample5_archive.pdf"
shutil.copy(
Path(__file__).parent
/ "samples"
/ "documents"
/ "originals"
/ "0000002.pdf",
sample5_archive,
)
self.doc1 = Document.objects.create(
checksum="A",
title="A",
@ -449,25 +410,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
mime_type="image/jpeg",
)
self.doc1_delete_after_merge = Document.objects.create(
checksum="Ad",
title="Adelete",
filename=sample4,
mime_type="application/pdf",
)
self.doc1_delete_after_merge.archive_filename = sample4_archive
self.doc1_delete_after_merge.save()
self.doc2_delete_after_merge = Document.objects.create(
checksum="Bd",
title="Bdelete",
filename=sample5,
mime_type="application/pdf",
)
self.doc2_delete_after_merge.archive_filename = sample5_archive
self.doc2_delete_after_merge.save()
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("documents.tasks.consume_file.s")
def test_merge(self, mock_consume_file):
"""
GIVEN:
@ -501,18 +444,25 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertEqual(result, "OK")
@mock.patch("documents.tasks.consume_file.delay")
def test_merge_and_delete_originals(self, mock_consume_file):
@mock.patch("documents.tasks.delete_documents.si")
@mock.patch("documents.tasks.consume_file.s")
@mock.patch("documents.bulk_edit.chain")
def test_merge_and_delete_originals(
self,
mock_chain,
mock_consume_file,
mock_delete_documents,
):
"""
GIVEN:
- Existing documents
WHEN:
- Merge action with deleting documents is called with 2 documents
- Merge action with deleting documents is called with 3 documents
THEN:
- Consume file should be called
- Documents should be deleted
- Consume file task should be called
- Document deletion task should be called
"""
doc_ids = [self.doc1_delete_after_merge.id, self.doc2_delete_after_merge.id]
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
result = bulk_edit.merge(doc_ids, delete_originals=True)
self.assertEqual(result, "OK")
@ -522,6 +472,8 @@ class TestPDFActions(DirectoriesMixin, TestCase):
)
mock_consume_file.assert_called()
mock_delete_documents.assert_called()
mock_chain.assert_called_once()
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(
@ -530,11 +482,11 @@ class TestPDFActions(DirectoriesMixin, TestCase):
)
self.assertEqual(consume_file_args[1].title, None)
with self.assertRaises(Document.DoesNotExist):
Document.objects.get(id=self.doc1_delete_after_merge.id)
with self.assertRaises(Document.DoesNotExist):
Document.objects.get(id=self.doc2_delete_after_merge.id)
delete_documents_args, _ = mock_delete_documents.call_args
self.assertEqual(
delete_documents_args[0],
doc_ids,
)
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.open")

View File

@ -150,3 +150,25 @@ class TestBulkUpdate(DirectoriesMixin, TestCase):
)
tasks.bulk_update_documents([doc1.pk])
class TestDeleteDocuments(DirectoriesMixin, TestCase):
def setUp(self):
super().setUp()
self.doc1 = Document.objects.create(
checksum="Ad",
title="Adelete",
)
self.doc2 = Document.objects.create(
checksum="Bd",
title="Bdelete",
)
def test(self):
tasks.delete_documents([self.doc1.id, self.doc2.id])
with self.assertRaises(Document.DoesNotExist):
Document.objects.get(id=self.doc1.id)
with self.assertRaises(Document.DoesNotExist):
Document.objects.get(id=self.doc2.id)