Cleaning up the testing and coverage more
This commit is contained in:
parent
8ea6bb770b
commit
4ec578f3b6
@ -52,7 +52,7 @@ def load_classifier() -> Optional["DocumentClassifier"]:
|
||||
except OSError:
|
||||
logger.exception("IO error while loading document classification model")
|
||||
classifier = None
|
||||
except Exception: # pragma: nocover
|
||||
except Exception: # pragma: no cover
|
||||
logger.exception("Unknown error while loading document classification model")
|
||||
classifier = None
|
||||
|
||||
@ -318,7 +318,7 @@ class DocumentClassifier:
|
||||
|
||||
return True
|
||||
|
||||
def preprocess_content(self, content: str) -> str: # pragma: nocover
|
||||
def preprocess_content(self, content: str) -> str: # pragma: no cover
|
||||
"""
|
||||
Process to contents of a document, distilling it down into
|
||||
words which are meaningful to the content
|
||||
|
@ -26,7 +26,7 @@ from documents.tasks import consume_file
|
||||
try:
|
||||
from inotifyrecursive import INotify
|
||||
from inotifyrecursive import flags
|
||||
except ImportError: # pragma: nocover
|
||||
except ImportError: # pragma: no cover
|
||||
INotify = flags = None
|
||||
|
||||
logger = logging.getLogger("paperless.management.consumer")
|
||||
|
@ -5,7 +5,7 @@ from django.core.management.commands.loaddata import Command as LoadDataCommand
|
||||
|
||||
# This class is used to migrate data between databases
|
||||
# That's difficult to test
|
||||
class Command(LoadDataCommand): # pragma: nocover
|
||||
class Command(LoadDataCommand): # pragma: no cover
|
||||
"""
|
||||
Allow the loading of data from standard in. Sourced originally from:
|
||||
https://gist.github.com/bmispelon/ad5a2c333443b3a1d051 (MIT licensed)
|
||||
|
@ -333,7 +333,7 @@ class DocumentParser(LoggingMixin):
|
||||
if self.progress_callback:
|
||||
self.progress_callback(current_progress, max_progress)
|
||||
|
||||
def get_settings(self):
|
||||
def get_settings(self): # pragma: no cover
|
||||
"""
|
||||
A parser must implement this
|
||||
"""
|
||||
|
@ -169,5 +169,5 @@ class ApplicationConfiguration(AbstractSingletonModel):
|
||||
class Meta:
|
||||
verbose_name = _("paperless application settings")
|
||||
|
||||
def __str__(self) -> str:
|
||||
def __str__(self) -> str: # pragma: no cover
|
||||
return "ApplicationConfiguration"
|
||||
|
@ -51,6 +51,9 @@ def __get_boolean(key: str, default: str = "NO") -> bool:
|
||||
|
||||
|
||||
def __get_optional_boolean(key: str) -> Optional[bool]:
|
||||
"""
|
||||
Returns None if the environment key is not present, otherwise a boolean
|
||||
"""
|
||||
if key in os.environ:
|
||||
return __get_boolean(key)
|
||||
return None
|
||||
@ -64,6 +67,9 @@ def __get_int(key: str, default: int) -> int:
|
||||
|
||||
|
||||
def __get_optional_int(key: str) -> Optional[int]:
|
||||
"""
|
||||
Returns None if the environment key is not present, otherwise an integer
|
||||
"""
|
||||
if key in os.environ:
|
||||
return __get_int(key, -1)
|
||||
return None
|
||||
@ -76,25 +82,25 @@ def __get_float(key: str, default: float) -> float:
|
||||
return float(os.getenv(key, default))
|
||||
|
||||
|
||||
def __get_optional_float(key: str) -> Optional[float]:
|
||||
if key in os.environ:
|
||||
return __get_float(key, -1)
|
||||
return None
|
||||
|
||||
|
||||
def __get_path(
|
||||
key: str,
|
||||
default: Optional[Union[PathLike, str]] = None,
|
||||
) -> Optional[Path]:
|
||||
default: Union[PathLike, str],
|
||||
) -> Path:
|
||||
"""
|
||||
Return a normalized, absolute path based on the environment variable or a default,
|
||||
if provided. If not set and no default, returns None
|
||||
if provided
|
||||
"""
|
||||
if key in os.environ:
|
||||
return Path(os.environ[key]).resolve()
|
||||
elif default is not None:
|
||||
return Path(default).resolve()
|
||||
else:
|
||||
|
||||
|
||||
def __get_optional_path(key: str) -> Optional[Path]:
|
||||
"""
|
||||
Returns None if the environment key is not present, otherwise an integer
|
||||
"""
|
||||
if key in os.environ:
|
||||
return __get_path(key, "")
|
||||
return None
|
||||
|
||||
|
||||
@ -345,7 +351,7 @@ MIDDLEWARE = [
|
||||
]
|
||||
|
||||
# Optional to enable compression
|
||||
if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: nocover
|
||||
if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover
|
||||
MIDDLEWARE.insert(0, "compression_middleware.middleware.CompressionMiddleware")
|
||||
|
||||
ROOT_URLCONF = "paperless.urls"
|
||||
@ -513,7 +519,7 @@ CSRF_COOKIE_NAME = f"{COOKIE_PREFIX}csrftoken"
|
||||
SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
|
||||
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
|
||||
|
||||
EMAIL_CERTIFICATE_FILE = __get_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
|
||||
EMAIL_CERTIFICATE_FILE = __get_optional_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
@ -73,7 +73,7 @@ class StandardPagination(PageNumberPagination):
|
||||
|
||||
|
||||
class FaviconView(View):
|
||||
def get(self, request, *args, **kwargs): # pragma: nocover
|
||||
def get(self, request, *args, **kwargs): # pragma: no cover
|
||||
favicon = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"static",
|
||||
|
@ -92,7 +92,7 @@ class BaseMailAction:
|
||||
M: MailBox,
|
||||
message_uid: str,
|
||||
parameter: str,
|
||||
): # pragma: nocover
|
||||
): # pragma: no cover
|
||||
"""
|
||||
Perform mail action on the given mail uid in the mailbox.
|
||||
"""
|
||||
@ -171,7 +171,7 @@ class TagMailAction(BaseMailAction):
|
||||
return AND(NOT(gmail_label=self.keyword), no_keyword=self.keyword)
|
||||
else:
|
||||
return {"no_keyword": self.keyword}
|
||||
else: # pragma: nocover
|
||||
else: # pragma: no cover
|
||||
raise ValueError("This should never happen.")
|
||||
|
||||
def post_consume(self, M: MailBox, message_uid: str, parameter: str):
|
||||
@ -361,7 +361,7 @@ def get_rule_action(rule: MailRule, supports_gmail_labels: bool) -> BaseMailActi
|
||||
elif rule.action == MailRule.MailAction.TAG:
|
||||
return TagMailAction(rule.action_parameter, supports_gmail_labels)
|
||||
else:
|
||||
raise NotImplementedError("Unknown action.") # pragma: nocover
|
||||
raise NotImplementedError("Unknown action.") # pragma: no cover
|
||||
|
||||
|
||||
def make_criterias(rule: MailRule, supports_gmail_labels: bool):
|
||||
@ -397,7 +397,7 @@ def get_mailbox(server, port, security) -> MailBox:
|
||||
Returns the correct MailBox instance for the given configuration.
|
||||
"""
|
||||
ssl_context = ssl.create_default_context()
|
||||
if settings.EMAIL_CERTIFICATE_FILE is not None: # pragma: nocover
|
||||
if settings.EMAIL_CERTIFICATE_FILE is not None: # pragma: no cover
|
||||
ssl_context.load_verify_locations(cafile=settings.EMAIL_CERTIFICATE_FILE)
|
||||
|
||||
if security == MailAccount.ImapSecurity.NONE:
|
||||
@ -407,7 +407,7 @@ def get_mailbox(server, port, security) -> MailBox:
|
||||
elif security == MailAccount.ImapSecurity.SSL:
|
||||
mailbox = MailBox(server, port, ssl_context=ssl_context)
|
||||
else:
|
||||
raise NotImplementedError("Unknown IMAP security") # pragma: nocover
|
||||
raise NotImplementedError("Unknown IMAP security") # pragma: no cover
|
||||
return mailbox
|
||||
|
||||
|
||||
@ -450,7 +450,7 @@ class MailAccountHandler(LoggingMixin):
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Unknown title selector.",
|
||||
) # pragma: nocover
|
||||
) # pragma: no cover
|
||||
|
||||
def _get_correspondent(
|
||||
self,
|
||||
@ -478,7 +478,7 @@ class MailAccountHandler(LoggingMixin):
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Unknown correspondent selector",
|
||||
) # pragma: nocover
|
||||
) # pragma: no cover
|
||||
|
||||
def handle_mail_account(self, account: MailAccount):
|
||||
"""
|
||||
|
@ -213,7 +213,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
ocrmypdf_args["skip_text"] = True
|
||||
elif self.settings.mode == ModeChoices.REDO:
|
||||
ocrmypdf_args["redo_ocr"] = True
|
||||
else:
|
||||
else: # pragma: no cover
|
||||
raise ParseError(f"Invalid ocr mode: {self.settings.mode}")
|
||||
|
||||
if self.settings.clean == CleanChoices.CLEAN:
|
||||
|
@ -1,3 +1,5 @@
|
||||
import json
|
||||
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
|
||||
@ -14,6 +16,9 @@ from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||
class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
@staticmethod
|
||||
def get_params():
|
||||
"""
|
||||
Helper to get just the OCRMyPDF parameters from the parser
|
||||
"""
|
||||
return RasterisedDocumentParser(None).construct_ocrmypdf_parameters(
|
||||
input_file="input.pdf",
|
||||
output_file="output.pdf",
|
||||
@ -23,6 +28,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
|
||||
)
|
||||
|
||||
def test_db_settings_ocr_pages(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_PAGES than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_PAGES=10):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.pages = 5
|
||||
@ -32,6 +46,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
|
||||
self.assertEqual(params["pages"], "1-5")
|
||||
|
||||
def test_db_settings_ocr_language(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_LANGUAGE than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_LANGUAGE="eng+deu"):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.language = "fra+ita"
|
||||
@ -41,7 +64,16 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
|
||||
self.assertEqual(params["language"], "fra+ita")
|
||||
|
||||
def test_db_settings_ocr_output_type(self):
|
||||
with override_settings(OCR_LANGUAGE="pdfa-3"):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_OUTPUT_TYPE than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_OUTPUT_TYPE="pdfa-3"):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.output_type = OutputTypeChoices.PDF_A
|
||||
instance.save()
|
||||
@ -50,6 +82,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
|
||||
self.assertEqual(params["output_type"], "pdfa")
|
||||
|
||||
def test_db_settings_ocr_mode(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_MODE than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_MODE="redo"):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.mode = ModeChoices.SKIP
|
||||
@ -61,6 +102,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
|
||||
self.assertNotIn("force_ocr", params)
|
||||
|
||||
def test_db_settings_ocr_clean(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_CLEAN than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_CLEAN="clean-final"):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.unpaper_clean = CleanChoices.CLEAN
|
||||
@ -80,6 +130,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
|
||||
self.assertNotIn("clean", params)
|
||||
|
||||
def test_db_settings_ocr_deskew(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_DESKEW than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_DESKEW=False):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.deskew = True
|
||||
@ -89,6 +148,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
|
||||
self.assertTrue(params["deskew"])
|
||||
|
||||
def test_db_settings_ocr_rotate(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_ROTATE_PAGES
|
||||
and OCR_ROTATE_PAGES_THRESHOLD than configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_ROTATE_PAGES=False, OCR_ROTATE_PAGES_THRESHOLD=30.0):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.rotate_pages = True
|
||||
@ -100,6 +168,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
|
||||
self.assertAlmostEqual(params["rotate_pages_threshold"], 15.0)
|
||||
|
||||
def test_db_settings_ocr_max_pixels(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_MAX_IMAGE_PIXELS than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_MAX_IMAGE_PIXELS=2_000_000.0):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.max_image_pixels = 1_000_000.0
|
||||
@ -109,6 +186,15 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
|
||||
self.assertAlmostEqual(params["max_image_mpixels"], 1.0)
|
||||
|
||||
def test_db_settings_ocr_color_convert(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_COLOR_CONVERSION_STRATEGY than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_COLOR_CONVERSION_STRATEGY="LeaveColorUnchanged"):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.color_conversion_strategy = ColorConvertChoices.INDEPENDENT
|
||||
@ -119,3 +205,28 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
|
||||
params["color_conversion_strategy"],
|
||||
"UseDeviceIndependentColor",
|
||||
)
|
||||
|
||||
def test_ocr_user_args(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_USER_ARGS than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(
|
||||
OCR_USER_ARGS=json.dumps({"continue_on_soft_render_error": True}),
|
||||
):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.user_args = {"unpaper_args": "--pre-rotate 90"}
|
||||
instance.save()
|
||||
|
||||
params = self.get_params()
|
||||
|
||||
self.assertIn("unpaper_args", params)
|
||||
self.assertEqual(
|
||||
params["unpaper_args"],
|
||||
"--pre-rotate 90",
|
||||
)
|
||||
|
@ -65,7 +65,7 @@ class TikaDocumentParser(DocumentParser):
|
||||
document_path.read_bytes(),
|
||||
mime_type,
|
||||
)
|
||||
else: # pragma: nocover
|
||||
else: # pragma: no cover
|
||||
raise
|
||||
except Exception as err:
|
||||
raise ParseError(
|
||||
|
Loading…
x
Reference in New Issue
Block a user