Working on test updates

This commit is contained in:
Trenton H 2024-04-09 11:14:03 -07:00
parent 3d307adf82
commit c77f01b750
3 changed files with 189 additions and 114 deletions

View File

@ -14,6 +14,10 @@
{ {
"path": "./.github", "path": "./.github",
"name": "CI/CD" "name": "CI/CD"
},
{
"path": "./docs",
"name": "Documentation"
} }
], ],
@ -27,7 +31,6 @@
"**/.venv": true, "**/.venv": true,
"**/.coverage": true, "**/.coverage": true,
"**/coverage.json": true "**/coverage.json": true
}, }
"python.defaultInterpreterPath": "./.venv/bin/python3.exe",
} }
} }

View File

@ -42,7 +42,6 @@ from documents.plugins.base import AlwaysRunPluginMixin
from documents.plugins.base import ConsumeTaskPlugin from documents.plugins.base import ConsumeTaskPlugin
from documents.plugins.base import NoCleanupPluginMixin from documents.plugins.base import NoCleanupPluginMixin
from documents.plugins.base import NoSetupPluginMixin from documents.plugins.base import NoSetupPluginMixin
from documents.plugins.helpers import ProgressStatusOptions
from documents.signals import document_consumption_finished from documents.signals import document_consumption_finished
from documents.signals import document_consumption_started from documents.signals import document_consumption_started
from documents.utils import copy_basic_file_stats from documents.utils import copy_basic_file_stats
@ -266,15 +265,15 @@ class ConsumerPlugin(AlwaysRunPluginMixin, ConsumeTaskPlugin, LoggingMixin):
current_progress: int, current_progress: int,
max_progress: int, max_progress: int,
status: ConsumerFilePhase, status: ConsumerFilePhase,
message: Optional[ConsumerStatusShortMessage] = None, message: Optional[ConsumerStatusShortMessage | str] = None,
document_id=None, document_id=None,
): # pragma: no cover ): # pragma: no cover
self.status_mgr.send_progress( self.status_mgr.send_progress(
ProgressStatusOptions.WORKING, status,
message, message,
current_progress, current_progress,
max_progress, max_progress,
{ extra_args={
"document_id": document_id, "document_id": document_id,
"owner_id": self.override_owner_id if self.override_owner_id else None, "owner_id": self.override_owner_id if self.override_owner_id else None,
}, },
@ -282,7 +281,7 @@ class ConsumerPlugin(AlwaysRunPluginMixin, ConsumeTaskPlugin, LoggingMixin):
def _fail( def _fail(
self, self,
message: ConsumerStatusShortMessage, message: ConsumerStatusShortMessage | str,
log_message: Optional[str] = None, log_message: Optional[str] = None,
exc_info=None, exc_info=None,
exception: Optional[Exception] = None, exception: Optional[Exception] = None,
@ -556,9 +555,6 @@ class ConsumerPlugin(AlwaysRunPluginMixin, ConsumeTaskPlugin, LoggingMixin):
self.log.debug(f"Parser: {type(document_parser).__name__}") self.log.debug(f"Parser: {type(document_parser).__name__}")
# However, this already created working directories which we have to
# clean up.
# Parse the document. This may take some time. # Parse the document. This may take some time.
text = None text = None

View File

@ -6,6 +6,9 @@ import stat
import tempfile import tempfile
import uuid import uuid
import zoneinfo import zoneinfo
from collections.abc import Generator
from contextlib import contextmanager
from pathlib import Path
from unittest import mock from unittest import mock
from unittest.mock import MagicMock from unittest.mock import MagicMock
@ -18,9 +21,12 @@ from django.test import override_settings
from django.utils import timezone from django.utils import timezone
from guardian.core import ObjectPermissionChecker from guardian.core import ObjectPermissionChecker
from documents.consumer import Consumer
from documents.consumer import ConsumerError from documents.consumer import ConsumerError
from documents.consumer import ConsumerFilePhase from documents.consumer import ConsumerFilePhase
from documents.consumer import ConsumerPlugin
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
from documents.models import Correspondent from documents.models import Correspondent
from documents.models import CustomField from documents.models import CustomField
from documents.models import Document from documents.models import Document
@ -32,6 +38,7 @@ from documents.parsers import DocumentParser
from documents.parsers import ParseError from documents.parsers import ParseError
from documents.tasks import sanity_check from documents.tasks import sanity_check
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import DummyProgressManager
from documents.tests.utils import FileSystemAssertsMixin from documents.tests.utils import FileSystemAssertsMixin
@ -256,19 +263,18 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
last_progress=100, last_progress=100,
last_progress_max=100, last_progress_max=100,
): ):
self._send_progress.assert_called() self.assertGreaterEqual(len(self.status.payloads), 2)
args, kwargs = self._send_progress.call_args_list[0] payload = self.status.payloads[0]
self.assertEqual(args[0], first_progress) self.assertEqual(payload["data"]["current_progress"], first_progress)
self.assertEqual(args[1], first_progress_max) self.assertEqual(payload["data"]["max_progress"], first_progress_max)
self.assertEqual(args[2], first_status) self.assertEqual(payload["data"]["status"], first_status)
args, kwargs = self._send_progress.call_args_list[ payload = self.status.payloads[-1]
len(self._send_progress.call_args_list) - 1
] self.assertEqual(payload["data"]["current_progress"], last_progress)
self.assertEqual(args[0], last_progress) self.assertEqual(payload["data"]["max_progress"], last_progress_max)
self.assertEqual(args[1], last_progress_max) self.assertEqual(payload["data"]["status"], last_status)
self.assertEqual(args[2], last_status)
def make_dummy_parser(self, logging_group, progress_callback=None): def make_dummy_parser(self, logging_group, progress_callback=None):
return DummyParser( return DummyParser(
@ -287,6 +293,26 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
): ):
return FaultyGenericExceptionParser(logging_group, self.dirs.scratch_dir) return FaultyGenericExceptionParser(logging_group, self.dirs.scratch_dir)
@contextmanager
def get_consumer(
self,
filepath: Path,
overrides: DocumentMetadataOverrides | None = None,
source: DocumentSource = DocumentSource.ConsumeFolder,
) -> Generator[ConsumerPlugin, None, None]:
# Store this for verification
self.status = DummyProgressManager(filepath.name, None)
reader = ConsumerPlugin(
ConsumableDocument(source, original_file=filepath),
overrides or DocumentMetadataOverrides(),
self.status, # type: ignore
self.dirs.scratch_dir,
"task-id",
)
reader.setup()
yield reader
reader.cleanup()
def setUp(self): def setUp(self):
super().setUp() super().setUp()
@ -304,34 +330,23 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
] ]
self.addCleanup(patcher.stop) self.addCleanup(patcher.stop)
# this prevents websocket message reports during testing.
patcher = mock.patch("documents.consumer.Consumer._send_progress")
self._send_progress = patcher.start()
self.addCleanup(patcher.stop)
self.consumer = Consumer()
def get_test_file(self): def get_test_file(self):
src = os.path.join( src = (
os.path.dirname(__file__), Path(__file__).parent
"samples", / "samples"
"documents", / "documents"
"originals", / "originals"
"0000001.pdf", / "0000001.pdf"
) )
dst = os.path.join(self.dirs.scratch_dir, "sample.pdf") dst = self.dirs.scratch_dir / "sample.pdf"
shutil.copy(src, dst) shutil.copy(src, dst)
return dst return dst
def get_test_archive_file(self): def get_test_archive_file(self):
src = os.path.join( src = (
os.path.dirname(__file__), Path(__file__).parent / "samples" / "documents" / "archive" / "0000001.pdf"
"samples",
"documents",
"archive",
"0000001.pdf",
) )
dst = os.path.join(self.dirs.scratch_dir, "sample_archive.pdf") dst = self.dirs.scratch_dir / "sample_archive.pdf"
shutil.copy(src, dst) shutil.copy(src, dst)
return dst return dst
@ -343,8 +358,12 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
# Roughly equal to file modification time # Roughly equal to file modification time
rough_create_date_local = timezone.localtime(timezone.now()) rough_create_date_local = timezone.localtime(timezone.now())
# Consume the file with self.get_consumer(filename) as consumer:
document = self.consumer.try_consume_file(filename) consumer.run()
document = Document.objects.first()
self.assertIsNotNone(document)
self.assertEqual(document.content, "The Text") self.assertEqual(document.content, "The Text")
self.assertEqual( self.assertEqual(
@ -395,7 +414,12 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertIsFile(shadow_file) self.assertIsFile(shadow_file)
document = self.consumer.try_consume_file(filename) with self.get_consumer(filename) as consumer:
consumer.run()
document = Document.objects.first()
self.assertIsNotNone(document)
self.assertIsFile(document.source_path) self.assertIsFile(document.source_path)
@ -406,29 +430,48 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
filename = self.get_test_file() filename = self.get_test_file()
override_filename = "Statement for November.pdf" override_filename = "Statement for November.pdf"
document = self.consumer.try_consume_file( with self.get_consumer(
filename, filename,
override_filename=override_filename, DocumentMetadataOverrides(filename=override_filename),
) ) as consumer:
consumer.run()
document = Document.objects.first()
self.assertIsNotNone(document)
self.assertEqual(document.title, "Statement for November") self.assertEqual(document.title, "Statement for November")
self._assert_first_last_send_progress() self._assert_first_last_send_progress()
def testOverrideTitle(self): def testOverrideTitle(self):
document = self.consumer.try_consume_file(
with self.get_consumer(
self.get_test_file(), self.get_test_file(),
override_title="Override Title", DocumentMetadataOverrides(title="Override Title"),
) ) as consumer:
consumer.run()
document = Document.objects.first()
self.assertIsNotNone(document)
self.assertEqual(document.title, "Override Title") self.assertEqual(document.title, "Override Title")
self._assert_first_last_send_progress() self._assert_first_last_send_progress()
def testOverrideTitleInvalidPlaceholders(self): def testOverrideTitleInvalidPlaceholders(self):
with self.assertLogs("paperless.consumer", level="ERROR") as cm: with self.assertLogs("paperless.consumer", level="ERROR") as cm:
document = self.consumer.try_consume_file(
with self.get_consumer(
self.get_test_file(), self.get_test_file(),
override_title="Override {correspondent]", DocumentMetadataOverrides(title="Override {correspondent]"),
) ) as consumer:
consumer.run()
document = Document.objects.first()
self.assertIsNotNone(document)
self.assertEqual(document.title, "sample") self.assertEqual(document.title, "sample")
expected_str = "Error occurred parsing title override 'Override {correspondent]', falling back to original" expected_str = "Error occurred parsing title override 'Override {correspondent]', falling back to original"
self.assertIn(expected_str, cm.output[0]) self.assertIn(expected_str, cm.output[0])
@ -436,30 +479,44 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def testOverrideCorrespondent(self): def testOverrideCorrespondent(self):
c = Correspondent.objects.create(name="test") c = Correspondent.objects.create(name="test")
document = self.consumer.try_consume_file( with self.get_consumer(
self.get_test_file(), self.get_test_file(),
override_correspondent_id=c.pk, DocumentMetadataOverrides(correspondent_id=c.pk),
) ) as consumer:
consumer.run()
document = Document.objects.first()
self.assertIsNotNone(document)
self.assertEqual(document.correspondent.id, c.id) self.assertEqual(document.correspondent.id, c.id)
self._assert_first_last_send_progress() self._assert_first_last_send_progress()
def testOverrideDocumentType(self): def testOverrideDocumentType(self):
dt = DocumentType.objects.create(name="test") dt = DocumentType.objects.create(name="test")
document = self.consumer.try_consume_file( with self.get_consumer(
self.get_test_file(), self.get_test_file(),
override_document_type_id=dt.pk, DocumentMetadataOverrides(document_type_id=dt.pk),
) ) as consumer:
consumer.run()
document = Document.objects.first()
self.assertEqual(document.document_type.id, dt.id) self.assertEqual(document.document_type.id, dt.id)
self._assert_first_last_send_progress() self._assert_first_last_send_progress()
def testOverrideStoragePath(self): def testOverrideStoragePath(self):
sp = StoragePath.objects.create(name="test") sp = StoragePath.objects.create(name="test")
document = self.consumer.try_consume_file( with self.get_consumer(
self.get_test_file(), self.get_test_file(),
override_storage_path_id=sp.pk, DocumentMetadataOverrides(storage_path_id=sp.pk),
) ) as consumer:
consumer.run()
document = Document.objects.first()
self.assertEqual(document.storage_path.id, sp.id) self.assertEqual(document.storage_path.id, sp.id)
self._assert_first_last_send_progress() self._assert_first_last_send_progress()
@ -467,10 +524,14 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
t1 = Tag.objects.create(name="t1") t1 = Tag.objects.create(name="t1")
t2 = Tag.objects.create(name="t2") t2 = Tag.objects.create(name="t2")
t3 = Tag.objects.create(name="t3") t3 = Tag.objects.create(name="t3")
document = self.consumer.try_consume_file(
with self.get_consumer(
self.get_test_file(), self.get_test_file(),
override_tag_ids=[t1.id, t3.id], DocumentMetadataOverrides(tag_ids=[t1.id, t3.id]),
) ) as consumer:
consumer.run()
document = Document.objects.first()
self.assertIn(t1, document.tags.all()) self.assertIn(t1, document.tags.all())
self.assertNotIn(t2, document.tags.all()) self.assertNotIn(t2, document.tags.all())
@ -487,10 +548,14 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
name="Custom Field 3", name="Custom Field 3",
data_type="url", data_type="url",
) )
document = self.consumer.try_consume_file(
with self.get_consumer(
self.get_test_file(), self.get_test_file(),
override_custom_field_ids=[cf1.id, cf3.id], DocumentMetadataOverrides(custom_field_ids=[cf1.id, cf3.id]),
) ) as consumer:
consumer.run()
document = Document.objects.first()
fields_used = [ fields_used = [
field_instance.field for field_instance in document.custom_fields.all() field_instance.field for field_instance in document.custom_fields.all()
@ -501,10 +566,15 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self._assert_first_last_send_progress() self._assert_first_last_send_progress()
def testOverrideAsn(self): def testOverrideAsn(self):
document = self.consumer.try_consume_file(
with self.get_consumer(
self.get_test_file(), self.get_test_file(),
override_asn=123, DocumentMetadataOverrides(asn=123),
) ) as consumer:
consumer.run()
document = Document.objects.first()
self.assertEqual(document.archive_serial_number, 123) self.assertEqual(document.archive_serial_number, 123)
self._assert_first_last_send_progress() self._assert_first_last_send_progress()
@ -512,33 +582,51 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
c = Correspondent.objects.create(name="Correspondent Name") c = Correspondent.objects.create(name="Correspondent Name")
dt = DocumentType.objects.create(name="DocType Name") dt = DocumentType.objects.create(name="DocType Name")
document = self.consumer.try_consume_file( with self.get_consumer(
self.get_test_file(), self.get_test_file(),
override_correspondent_id=c.pk, DocumentMetadataOverrides(
override_document_type_id=dt.pk, correspondent_id=c.pk,
override_title="{correspondent}{document_type} {added_month}-{added_year_short}", document_type_id=dt.pk,
) title="{correspondent}{document_type} {added_month}-{added_year_short}",
),
) as consumer:
consumer.run()
document = Document.objects.first()
now = timezone.now() now = timezone.now()
self.assertEqual(document.title, f"{c.name}{dt.name} {now.strftime('%m-%y')}") self.assertEqual(document.title, f"{c.name}{dt.name} {now.strftime('%m-%y')}")
self._assert_first_last_send_progress() self._assert_first_last_send_progress()
def testOverrideOwner(self): def testOverrideOwner(self):
testuser = User.objects.create(username="testuser") testuser = User.objects.create(username="testuser")
document = self.consumer.try_consume_file(
with self.get_consumer(
self.get_test_file(), self.get_test_file(),
override_owner_id=testuser.pk, DocumentMetadataOverrides(owner_id=testuser.pk),
) ) as consumer:
consumer.run()
document = Document.objects.first()
self.assertEqual(document.owner, testuser) self.assertEqual(document.owner, testuser)
self._assert_first_last_send_progress() self._assert_first_last_send_progress()
def testOverridePermissions(self): def testOverridePermissions(self):
testuser = User.objects.create(username="testuser") testuser = User.objects.create(username="testuser")
testgroup = Group.objects.create(name="testgroup") testgroup = Group.objects.create(name="testgroup")
document = self.consumer.try_consume_file(
with self.get_consumer(
self.get_test_file(), self.get_test_file(),
override_view_users=[testuser.pk], DocumentMetadataOverrides(
override_view_groups=[testgroup.pk], view_users=[testuser.pk],
) view_groups=[testgroup.pk],
),
) as consumer:
consumer.run()
document = Document.objects.first()
user_checker = ObjectPermissionChecker(testuser) user_checker = ObjectPermissionChecker(testuser)
self.assertTrue(user_checker.has_perm("view_document", document)) self.assertTrue(user_checker.has_perm("view_document", document))
group_checker = ObjectPermissionChecker(testgroup) group_checker = ObjectPermissionChecker(testgroup)
@ -546,42 +634,37 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self._assert_first_last_send_progress() self._assert_first_last_send_progress()
def testNotAFile(self): def testNotAFile(self):
self.assertRaisesMessage(
ConsumerError,
"File not found",
self.consumer.try_consume_file,
"non-existing-file",
)
with self.get_consumer(Path("non-existing-file")) as consumer:
with self.assertRaisesMessage(ConsumerError, "File not found"):
consumer.run()
self._assert_first_last_send_progress(last_status="FAILED") self._assert_first_last_send_progress(last_status="FAILED")
def testDuplicates1(self): def testDuplicates1(self):
self.consumer.try_consume_file(self.get_test_file()) with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
self.assertRaisesMessage( with self.get_consumer(self.get_test_file()) as consumer:
ConsumerError, with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
"It is a duplicate", consumer.run()
self.consumer.try_consume_file,
self.get_test_file(),
)
self._assert_first_last_send_progress(last_status="FAILED") self._assert_first_last_send_progress(last_status="FAILED")
def testDuplicates2(self): def testDuplicates2(self):
self.consumer.try_consume_file(self.get_test_file()) with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
self.assertRaisesMessage( with self.get_consumer(self.get_test_archive_file()) as consumer:
ConsumerError, with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
"It is a duplicate", consumer.run()
self.consumer.try_consume_file,
self.get_test_archive_file(),
)
self._assert_first_last_send_progress(last_status="FAILED") self._assert_first_last_send_progress(last_status="FAILED")
def testDuplicates3(self): def testDuplicates3(self):
self.consumer.try_consume_file(self.get_test_archive_file()) with self.get_consumer(self.get_test_archive_file()) as consumer:
self.consumer.try_consume_file(self.get_test_file()) consumer.run()
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
@mock.patch("documents.parsers.document_consumer_declaration.send") @mock.patch("documents.parsers.document_consumer_declaration.send")
def testNoParsers(self, m): def testNoParsers(self, m):
@ -809,13 +892,6 @@ class TestConsumerCreatedDate(DirectoriesMixin, TestCase):
def setUp(self): def setUp(self):
super().setUp() super().setUp()
# this prevents websocket message reports during testing.
patcher = mock.patch("documents.consumer.Consumer._send_progress")
self._send_progress = patcher.start()
self.addCleanup(patcher.stop)
self.consumer = Consumer()
def test_consume_date_from_content(self): def test_consume_date_from_content(self):
""" """
GIVEN: GIVEN: