diff --git a/src/documents/classifier.py b/src/documents/classifier.py index eb1f72191..d11b5f9cf 100644 --- a/src/documents/classifier.py +++ b/src/documents/classifier.py @@ -52,7 +52,7 @@ def load_classifier() -> Optional["DocumentClassifier"]: except OSError: logger.exception("IO error while loading document classification model") classifier = None - except Exception: # pragma: nocover + except Exception: # pragma: no cover logger.exception("Unknown error while loading document classification model") classifier = None @@ -318,7 +318,7 @@ class DocumentClassifier: return True - def preprocess_content(self, content: str) -> str: # pragma: nocover + def preprocess_content(self, content: str) -> str: # pragma: no cover """ Process to contents of a document, distilling it down into words which are meaningful to the content diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index 1342a67f8..191f604de 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -26,7 +26,7 @@ from documents.tasks import consume_file try: from inotifyrecursive import INotify from inotifyrecursive import flags -except ImportError: # pragma: nocover +except ImportError: # pragma: no cover INotify = flags = None logger = logging.getLogger("paperless.management.consumer") diff --git a/src/documents/management/commands/loaddata_stdin.py b/src/documents/management/commands/loaddata_stdin.py index c3eced6e4..f6feb2e8d 100644 --- a/src/documents/management/commands/loaddata_stdin.py +++ b/src/documents/management/commands/loaddata_stdin.py @@ -5,7 +5,7 @@ from django.core.management.commands.loaddata import Command as LoadDataCommand # This class is used to migrate data between databases # That's difficult to test -class Command(LoadDataCommand): # pragma: nocover +class Command(LoadDataCommand): # pragma: no cover """ Allow the loading of data from standard in. Sourced originally from: https://gist.github.com/bmispelon/ad5a2c333443b3a1d051 (MIT licensed) diff --git a/src/documents/parsers.py b/src/documents/parsers.py index cb28c4298..3215d49a6 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -333,7 +333,7 @@ class DocumentParser(LoggingMixin): if self.progress_callback: self.progress_callback(current_progress, max_progress) - def get_settings(self): + def get_settings(self): # pragma: no cover """ A parser must implement this """ diff --git a/src/paperless/models.py b/src/paperless/models.py index 74db4726a..133668dd6 100644 --- a/src/paperless/models.py +++ b/src/paperless/models.py @@ -169,5 +169,5 @@ class ApplicationConfiguration(AbstractSingletonModel): class Meta: verbose_name = _("paperless application settings") - def __str__(self) -> str: + def __str__(self) -> str: # pragma: no cover return "ApplicationConfiguration" diff --git a/src/paperless/settings.py b/src/paperless/settings.py index ae021f6b1..b6e679dc7 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -51,6 +51,9 @@ def __get_boolean(key: str, default: str = "NO") -> bool: def __get_optional_boolean(key: str) -> Optional[bool]: + """ + Returns None if the environment key is not present, otherwise a boolean + """ if key in os.environ: return __get_boolean(key) return None @@ -64,6 +67,9 @@ def __get_int(key: str, default: int) -> int: def __get_optional_int(key: str) -> Optional[int]: + """ + Returns None if the environment key is not present, otherwise an integer + """ if key in os.environ: return __get_int(key, -1) return None @@ -76,26 +82,26 @@ def __get_float(key: str, default: float) -> float: return float(os.getenv(key, default)) -def __get_optional_float(key: str) -> Optional[float]: - if key in os.environ: - return __get_float(key, -1) - return None - - def __get_path( key: str, - default: Optional[Union[PathLike, str]] = None, -) -> Optional[Path]: + default: Union[PathLike, str], +) -> Path: """ Return a normalized, absolute path based on the environment variable or a default, - if provided. If not set and no default, returns None + if provided """ if key in os.environ: return Path(os.environ[key]).resolve() - elif default is not None: - return Path(default).resolve() - else: - return None + return Path(default).resolve() + + +def __get_optional_path(key: str) -> Optional[Path]: + """ + Returns None if the environment key is not present, otherwise an integer + """ + if key in os.environ: + return __get_path(key, "") + return None def __get_list( @@ -345,7 +351,7 @@ MIDDLEWARE = [ ] # Optional to enable compression -if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: nocover +if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover MIDDLEWARE.insert(0, "compression_middleware.middleware.CompressionMiddleware") ROOT_URLCONF = "paperless.urls" @@ -513,7 +519,7 @@ CSRF_COOKIE_NAME = f"{COOKIE_PREFIX}csrftoken" SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid" LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language" -EMAIL_CERTIFICATE_FILE = __get_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION") +EMAIL_CERTIFICATE_FILE = __get_optional_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION") ############################################################################### diff --git a/src/paperless/views.py b/src/paperless/views.py index da8cddb22..16423671a 100644 --- a/src/paperless/views.py +++ b/src/paperless/views.py @@ -73,7 +73,7 @@ class StandardPagination(PageNumberPagination): class FaviconView(View): - def get(self, request, *args, **kwargs): # pragma: nocover + def get(self, request, *args, **kwargs): # pragma: no cover favicon = os.path.join( os.path.dirname(__file__), "static", diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index 0a237439c..e8a9104f6 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -92,7 +92,7 @@ class BaseMailAction: M: MailBox, message_uid: str, parameter: str, - ): # pragma: nocover + ): # pragma: no cover """ Perform mail action on the given mail uid in the mailbox. """ @@ -171,7 +171,7 @@ class TagMailAction(BaseMailAction): return AND(NOT(gmail_label=self.keyword), no_keyword=self.keyword) else: return {"no_keyword": self.keyword} - else: # pragma: nocover + else: # pragma: no cover raise ValueError("This should never happen.") def post_consume(self, M: MailBox, message_uid: str, parameter: str): @@ -361,7 +361,7 @@ def get_rule_action(rule: MailRule, supports_gmail_labels: bool) -> BaseMailActi elif rule.action == MailRule.MailAction.TAG: return TagMailAction(rule.action_parameter, supports_gmail_labels) else: - raise NotImplementedError("Unknown action.") # pragma: nocover + raise NotImplementedError("Unknown action.") # pragma: no cover def make_criterias(rule: MailRule, supports_gmail_labels: bool): @@ -397,7 +397,7 @@ def get_mailbox(server, port, security) -> MailBox: Returns the correct MailBox instance for the given configuration. """ ssl_context = ssl.create_default_context() - if settings.EMAIL_CERTIFICATE_FILE is not None: # pragma: nocover + if settings.EMAIL_CERTIFICATE_FILE is not None: # pragma: no cover ssl_context.load_verify_locations(cafile=settings.EMAIL_CERTIFICATE_FILE) if security == MailAccount.ImapSecurity.NONE: @@ -407,7 +407,7 @@ def get_mailbox(server, port, security) -> MailBox: elif security == MailAccount.ImapSecurity.SSL: mailbox = MailBox(server, port, ssl_context=ssl_context) else: - raise NotImplementedError("Unknown IMAP security") # pragma: nocover + raise NotImplementedError("Unknown IMAP security") # pragma: no cover return mailbox @@ -450,7 +450,7 @@ class MailAccountHandler(LoggingMixin): else: raise NotImplementedError( "Unknown title selector.", - ) # pragma: nocover + ) # pragma: no cover def _get_correspondent( self, @@ -478,7 +478,7 @@ class MailAccountHandler(LoggingMixin): else: raise NotImplementedError( "Unknown correspondent selector", - ) # pragma: nocover + ) # pragma: no cover def handle_mail_account(self, account: MailAccount): """ diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 4172a5752..f6d3847c0 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -213,7 +213,7 @@ class RasterisedDocumentParser(DocumentParser): ocrmypdf_args["skip_text"] = True elif self.settings.mode == ModeChoices.REDO: ocrmypdf_args["redo_ocr"] = True - else: + else: # pragma: no cover raise ParseError(f"Invalid ocr mode: {self.settings.mode}") if self.settings.clean == CleanChoices.CLEAN: diff --git a/src/paperless_tesseract/tests/test_parser_custom_settings.py b/src/paperless_tesseract/tests/test_parser_custom_settings.py index 90f198fa0..7621092dc 100644 --- a/src/paperless_tesseract/tests/test_parser_custom_settings.py +++ b/src/paperless_tesseract/tests/test_parser_custom_settings.py @@ -1,3 +1,5 @@ +import json + from django.test import TestCase from django.test import override_settings @@ -14,6 +16,9 @@ from paperless_tesseract.parsers import RasterisedDocumentParser class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCase): @staticmethod def get_params(): + """ + Helper to get just the OCRMyPDF parameters from the parser + """ return RasterisedDocumentParser(None).construct_ocrmypdf_parameters( input_file="input.pdf", output_file="output.pdf", @@ -23,6 +28,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas ) def test_db_settings_ocr_pages(self): + """ + GIVEN: + - Django settings defines different value for OCR_PAGES than + configuration object + WHEN: + - OCR parameters are constructed + THEN: + - Configuration from database is utilized + """ with override_settings(OCR_PAGES=10): instance = ApplicationConfiguration.objects.all().first() instance.pages = 5 @@ -32,6 +46,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas self.assertEqual(params["pages"], "1-5") def test_db_settings_ocr_language(self): + """ + GIVEN: + - Django settings defines different value for OCR_LANGUAGE than + configuration object + WHEN: + - OCR parameters are constructed + THEN: + - Configuration from database is utilized + """ with override_settings(OCR_LANGUAGE="eng+deu"): instance = ApplicationConfiguration.objects.all().first() instance.language = "fra+ita" @@ -41,7 +64,16 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas self.assertEqual(params["language"], "fra+ita") def test_db_settings_ocr_output_type(self): - with override_settings(OCR_LANGUAGE="pdfa-3"): + """ + GIVEN: + - Django settings defines different value for OCR_OUTPUT_TYPE than + configuration object + WHEN: + - OCR parameters are constructed + THEN: + - Configuration from database is utilized + """ + with override_settings(OCR_OUTPUT_TYPE="pdfa-3"): instance = ApplicationConfiguration.objects.all().first() instance.output_type = OutputTypeChoices.PDF_A instance.save() @@ -50,6 +82,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas self.assertEqual(params["output_type"], "pdfa") def test_db_settings_ocr_mode(self): + """ + GIVEN: + - Django settings defines different value for OCR_MODE than + configuration object + WHEN: + - OCR parameters are constructed + THEN: + - Configuration from database is utilized + """ with override_settings(OCR_MODE="redo"): instance = ApplicationConfiguration.objects.all().first() instance.mode = ModeChoices.SKIP @@ -61,6 +102,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas self.assertNotIn("force_ocr", params) def test_db_settings_ocr_clean(self): + """ + GIVEN: + - Django settings defines different value for OCR_CLEAN than + configuration object + WHEN: + - OCR parameters are constructed + THEN: + - Configuration from database is utilized + """ with override_settings(OCR_CLEAN="clean-final"): instance = ApplicationConfiguration.objects.all().first() instance.unpaper_clean = CleanChoices.CLEAN @@ -80,6 +130,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas self.assertNotIn("clean", params) def test_db_settings_ocr_deskew(self): + """ + GIVEN: + - Django settings defines different value for OCR_DESKEW than + configuration object + WHEN: + - OCR parameters are constructed + THEN: + - Configuration from database is utilized + """ with override_settings(OCR_DESKEW=False): instance = ApplicationConfiguration.objects.all().first() instance.deskew = True @@ -89,6 +148,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas self.assertTrue(params["deskew"]) def test_db_settings_ocr_rotate(self): + """ + GIVEN: + - Django settings defines different value for OCR_ROTATE_PAGES + and OCR_ROTATE_PAGES_THRESHOLD than configuration object + WHEN: + - OCR parameters are constructed + THEN: + - Configuration from database is utilized + """ with override_settings(OCR_ROTATE_PAGES=False, OCR_ROTATE_PAGES_THRESHOLD=30.0): instance = ApplicationConfiguration.objects.all().first() instance.rotate_pages = True @@ -100,6 +168,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas self.assertAlmostEqual(params["rotate_pages_threshold"], 15.0) def test_db_settings_ocr_max_pixels(self): + """ + GIVEN: + - Django settings defines different value for OCR_MAX_IMAGE_PIXELS than + configuration object + WHEN: + - OCR parameters are constructed + THEN: + - Configuration from database is utilized + """ with override_settings(OCR_MAX_IMAGE_PIXELS=2_000_000.0): instance = ApplicationConfiguration.objects.all().first() instance.max_image_pixels = 1_000_000.0 @@ -109,6 +186,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas self.assertAlmostEqual(params["max_image_mpixels"], 1.0) def test_db_settings_ocr_color_convert(self): + """ + GIVEN: + - Django settings defines different value for OCR_COLOR_CONVERSION_STRATEGY than + configuration object + WHEN: + - OCR parameters are constructed + THEN: + - Configuration from database is utilized + """ with override_settings(OCR_COLOR_CONVERSION_STRATEGY="LeaveColorUnchanged"): instance = ApplicationConfiguration.objects.all().first() instance.color_conversion_strategy = ColorConvertChoices.INDEPENDENT @@ -119,3 +205,28 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas params["color_conversion_strategy"], "UseDeviceIndependentColor", ) + + def test_ocr_user_args(self): + """ + GIVEN: + - Django settings defines different value for OCR_USER_ARGS than + configuration object + WHEN: + - OCR parameters are constructed + THEN: + - Configuration from database is utilized + """ + with override_settings( + OCR_USER_ARGS=json.dumps({"continue_on_soft_render_error": True}), + ): + instance = ApplicationConfiguration.objects.all().first() + instance.user_args = {"unpaper_args": "--pre-rotate 90"} + instance.save() + + params = self.get_params() + + self.assertIn("unpaper_args", params) + self.assertEqual( + params["unpaper_args"], + "--pre-rotate 90", + ) diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index 4c07b5de3..1b4609bdc 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -65,7 +65,7 @@ class TikaDocumentParser(DocumentParser): document_path.read_bytes(), mime_type, ) - else: # pragma: nocover + else: # pragma: no cover raise except Exception as err: raise ParseError(