Cleaning up the testing and coverage more

This commit is contained in:
Trenton H 2023-12-28 12:23:47 -08:00
parent 8ea6bb770b
commit 4ec578f3b6
11 changed files with 149 additions and 32 deletions

View File

@ -333,7 +333,7 @@ class DocumentParser(LoggingMixin):
if self.progress_callback: if self.progress_callback:
self.progress_callback(current_progress, max_progress) self.progress_callback(current_progress, max_progress)
def get_settings(self): def get_settings(self): # pragma: no cover
""" """
A parser must implement this A parser must implement this
""" """

View File

@ -169,5 +169,5 @@ class ApplicationConfiguration(AbstractSingletonModel):
class Meta: class Meta:
verbose_name = _("paperless application settings") verbose_name = _("paperless application settings")
def __str__(self) -> str: def __str__(self) -> str: # pragma: no cover
return "ApplicationConfiguration" return "ApplicationConfiguration"

View File

@ -51,6 +51,9 @@ def __get_boolean(key: str, default: str = "NO") -> bool:
def __get_optional_boolean(key: str) -> Optional[bool]: def __get_optional_boolean(key: str) -> Optional[bool]:
"""
Returns None if the environment key is not present, otherwise a boolean
"""
if key in os.environ: if key in os.environ:
return __get_boolean(key) return __get_boolean(key)
return None return None
@ -64,6 +67,9 @@ def __get_int(key: str, default: int) -> int:
def __get_optional_int(key: str) -> Optional[int]: def __get_optional_int(key: str) -> Optional[int]:
"""
Returns None if the environment key is not present, otherwise an integer
"""
if key in os.environ: if key in os.environ:
return __get_int(key, -1) return __get_int(key, -1)
return None return None
@ -76,25 +82,25 @@ def __get_float(key: str, default: float) -> float:
return float(os.getenv(key, default)) return float(os.getenv(key, default))
def __get_optional_float(key: str) -> Optional[float]:
if key in os.environ:
return __get_float(key, -1)
return None
def __get_path( def __get_path(
key: str, key: str,
default: Optional[Union[PathLike, str]] = None, default: Union[PathLike, str],
) -> Optional[Path]: ) -> Path:
""" """
Return a normalized, absolute path based on the environment variable or a default, Return a normalized, absolute path based on the environment variable or a default,
if provided. If not set and no default, returns None if provided
""" """
if key in os.environ: if key in os.environ:
return Path(os.environ[key]).resolve() return Path(os.environ[key]).resolve()
elif default is not None:
return Path(default).resolve() return Path(default).resolve()
else:
def __get_optional_path(key: str) -> Optional[Path]:
"""
Returns None if the environment key is not present, otherwise an integer
"""
if key in os.environ:
return __get_path(key, "")
return None return None
@ -513,7 +519,7 @@ CSRF_COOKIE_NAME = f"{COOKIE_PREFIX}csrftoken"
SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid" SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language" LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
EMAIL_CERTIFICATE_FILE = __get_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION") EMAIL_CERTIFICATE_FILE = __get_optional_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
############################################################################### ###############################################################################

View File

@ -213,7 +213,7 @@ class RasterisedDocumentParser(DocumentParser):
ocrmypdf_args["skip_text"] = True ocrmypdf_args["skip_text"] = True
elif self.settings.mode == ModeChoices.REDO: elif self.settings.mode == ModeChoices.REDO:
ocrmypdf_args["redo_ocr"] = True ocrmypdf_args["redo_ocr"] = True
else: else: # pragma: no cover
raise ParseError(f"Invalid ocr mode: {self.settings.mode}") raise ParseError(f"Invalid ocr mode: {self.settings.mode}")
if self.settings.clean == CleanChoices.CLEAN: if self.settings.clean == CleanChoices.CLEAN:

View File

@ -1,3 +1,5 @@
import json
from django.test import TestCase from django.test import TestCase
from django.test import override_settings from django.test import override_settings
@ -14,6 +16,9 @@ from paperless_tesseract.parsers import RasterisedDocumentParser
class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCase): class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
@staticmethod @staticmethod
def get_params(): def get_params():
"""
Helper to get just the OCRMyPDF parameters from the parser
"""
return RasterisedDocumentParser(None).construct_ocrmypdf_parameters( return RasterisedDocumentParser(None).construct_ocrmypdf_parameters(
input_file="input.pdf", input_file="input.pdf",
output_file="output.pdf", output_file="output.pdf",
@ -23,6 +28,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
) )
def test_db_settings_ocr_pages(self): def test_db_settings_ocr_pages(self):
"""
GIVEN:
- Django settings defines different value for OCR_PAGES than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_PAGES=10): with override_settings(OCR_PAGES=10):
instance = ApplicationConfiguration.objects.all().first() instance = ApplicationConfiguration.objects.all().first()
instance.pages = 5 instance.pages = 5
@ -32,6 +46,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
self.assertEqual(params["pages"], "1-5") self.assertEqual(params["pages"], "1-5")
def test_db_settings_ocr_language(self): def test_db_settings_ocr_language(self):
"""
GIVEN:
- Django settings defines different value for OCR_LANGUAGE than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_LANGUAGE="eng+deu"): with override_settings(OCR_LANGUAGE="eng+deu"):
instance = ApplicationConfiguration.objects.all().first() instance = ApplicationConfiguration.objects.all().first()
instance.language = "fra+ita" instance.language = "fra+ita"
@ -41,7 +64,16 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
self.assertEqual(params["language"], "fra+ita") self.assertEqual(params["language"], "fra+ita")
def test_db_settings_ocr_output_type(self): def test_db_settings_ocr_output_type(self):
with override_settings(OCR_LANGUAGE="pdfa-3"): """
GIVEN:
- Django settings defines different value for OCR_OUTPUT_TYPE than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_OUTPUT_TYPE="pdfa-3"):
instance = ApplicationConfiguration.objects.all().first() instance = ApplicationConfiguration.objects.all().first()
instance.output_type = OutputTypeChoices.PDF_A instance.output_type = OutputTypeChoices.PDF_A
instance.save() instance.save()
@ -50,6 +82,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
self.assertEqual(params["output_type"], "pdfa") self.assertEqual(params["output_type"], "pdfa")
def test_db_settings_ocr_mode(self): def test_db_settings_ocr_mode(self):
"""
GIVEN:
- Django settings defines different value for OCR_MODE than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_MODE="redo"): with override_settings(OCR_MODE="redo"):
instance = ApplicationConfiguration.objects.all().first() instance = ApplicationConfiguration.objects.all().first()
instance.mode = ModeChoices.SKIP instance.mode = ModeChoices.SKIP
@ -61,6 +102,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
self.assertNotIn("force_ocr", params) self.assertNotIn("force_ocr", params)
def test_db_settings_ocr_clean(self): def test_db_settings_ocr_clean(self):
"""
GIVEN:
- Django settings defines different value for OCR_CLEAN than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_CLEAN="clean-final"): with override_settings(OCR_CLEAN="clean-final"):
instance = ApplicationConfiguration.objects.all().first() instance = ApplicationConfiguration.objects.all().first()
instance.unpaper_clean = CleanChoices.CLEAN instance.unpaper_clean = CleanChoices.CLEAN
@ -80,6 +130,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
self.assertNotIn("clean", params) self.assertNotIn("clean", params)
def test_db_settings_ocr_deskew(self): def test_db_settings_ocr_deskew(self):
"""
GIVEN:
- Django settings defines different value for OCR_DESKEW than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_DESKEW=False): with override_settings(OCR_DESKEW=False):
instance = ApplicationConfiguration.objects.all().first() instance = ApplicationConfiguration.objects.all().first()
instance.deskew = True instance.deskew = True
@ -89,6 +148,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
self.assertTrue(params["deskew"]) self.assertTrue(params["deskew"])
def test_db_settings_ocr_rotate(self): def test_db_settings_ocr_rotate(self):
"""
GIVEN:
- Django settings defines different value for OCR_ROTATE_PAGES
and OCR_ROTATE_PAGES_THRESHOLD than configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_ROTATE_PAGES=False, OCR_ROTATE_PAGES_THRESHOLD=30.0): with override_settings(OCR_ROTATE_PAGES=False, OCR_ROTATE_PAGES_THRESHOLD=30.0):
instance = ApplicationConfiguration.objects.all().first() instance = ApplicationConfiguration.objects.all().first()
instance.rotate_pages = True instance.rotate_pages = True
@ -100,6 +168,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
self.assertAlmostEqual(params["rotate_pages_threshold"], 15.0) self.assertAlmostEqual(params["rotate_pages_threshold"], 15.0)
def test_db_settings_ocr_max_pixels(self): def test_db_settings_ocr_max_pixels(self):
"""
GIVEN:
- Django settings defines different value for OCR_MAX_IMAGE_PIXELS than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_MAX_IMAGE_PIXELS=2_000_000.0): with override_settings(OCR_MAX_IMAGE_PIXELS=2_000_000.0):
instance = ApplicationConfiguration.objects.all().first() instance = ApplicationConfiguration.objects.all().first()
instance.max_image_pixels = 1_000_000.0 instance.max_image_pixels = 1_000_000.0
@ -109,6 +186,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
self.assertAlmostEqual(params["max_image_mpixels"], 1.0) self.assertAlmostEqual(params["max_image_mpixels"], 1.0)
def test_db_settings_ocr_color_convert(self): def test_db_settings_ocr_color_convert(self):
"""
GIVEN:
- Django settings defines different value for OCR_COLOR_CONVERSION_STRATEGY than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_COLOR_CONVERSION_STRATEGY="LeaveColorUnchanged"): with override_settings(OCR_COLOR_CONVERSION_STRATEGY="LeaveColorUnchanged"):
instance = ApplicationConfiguration.objects.all().first() instance = ApplicationConfiguration.objects.all().first()
instance.color_conversion_strategy = ColorConvertChoices.INDEPENDENT instance.color_conversion_strategy = ColorConvertChoices.INDEPENDENT
@ -119,3 +205,28 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
params["color_conversion_strategy"], params["color_conversion_strategy"],
"UseDeviceIndependentColor", "UseDeviceIndependentColor",
) )
def test_ocr_user_args(self):
"""
GIVEN:
- Django settings defines different value for OCR_USER_ARGS than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(
OCR_USER_ARGS=json.dumps({"continue_on_soft_render_error": True}),
):
instance = ApplicationConfiguration.objects.all().first()
instance.user_args = {"unpaper_args": "--pre-rotate 90"}
instance.save()
params = self.get_params()
self.assertIn("unpaper_args", params)
self.assertEqual(
params["unpaper_args"],
"--pre-rotate 90",
)