121 lines
4.4 KiB
Python
121 lines
4.4 KiB
Python
from django.test import TestCase
|
|
from django.test import override_settings
|
|
|
|
from documents.tests.utils import DirectoriesMixin
|
|
from documents.tests.utils import FileSystemAssertsMixin
|
|
from paperless.models import CommonSettings
|
|
from paperless.models import OcrSettings
|
|
from paperless_tesseract.parsers import RasterisedDocumentParser
|
|
|
|
|
|
class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|
@staticmethod
|
|
def get_params():
|
|
return RasterisedDocumentParser(None).construct_ocrmypdf_parameters(
|
|
input_file="input.pdf",
|
|
output_file="output.pdf",
|
|
sidecar_file="sidecar.txt",
|
|
mime_type="application/pdf",
|
|
safe_fallback=False,
|
|
)
|
|
|
|
def test_db_settings_ocr_pages(self):
|
|
with override_settings(OCR_PAGES=10):
|
|
instance = OcrSettings.objects.all().first()
|
|
instance.pages = 5
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertEqual(params["pages"], "1-5")
|
|
|
|
def test_db_settings_ocr_language(self):
|
|
with override_settings(OCR_LANGUAGE="eng+deu"):
|
|
instance = OcrSettings.objects.all().first()
|
|
instance.language = "fra+ita"
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertEqual(params["language"], "fra+ita")
|
|
|
|
def test_db_settings_ocr_output_type(self):
|
|
with override_settings(OCR_LANGUAGE="pdfa-3"):
|
|
instance = OcrSettings.objects.all().first()
|
|
instance.output_type = CommonSettings.OutputTypeChoices.PDF_A
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertEqual(params["output_type"], "pdfa")
|
|
|
|
def test_db_settings_ocr_mode(self):
|
|
with override_settings(OCR_MODE="redo"):
|
|
instance = OcrSettings.objects.all().first()
|
|
instance.mode = OcrSettings.ModeChoices.SKIP
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertTrue(params["skip_text"])
|
|
self.assertNotIn("redo_ocr", params)
|
|
self.assertNotIn("force_ocr", params)
|
|
|
|
def test_db_settings_ocr_clean(self):
|
|
with override_settings(OCR_CLEAN="clean-final"):
|
|
instance = OcrSettings.objects.all().first()
|
|
instance.unpaper_clean = OcrSettings.CleanChoices.CLEAN
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertTrue(params["clean"])
|
|
self.assertNotIn("clean_final", params)
|
|
|
|
with override_settings(OCR_CLEAN="clean-final"):
|
|
instance = OcrSettings.objects.all().first()
|
|
instance.unpaper_clean = OcrSettings.CleanChoices.FINAL
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertTrue(params["clean_final"])
|
|
self.assertNotIn("clean", params)
|
|
|
|
def test_db_settings_ocr_deskew(self):
|
|
with override_settings(OCR_DESKEW=False):
|
|
instance = OcrSettings.objects.all().first()
|
|
instance.deskew = True
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertTrue(params["deskew"])
|
|
|
|
def test_db_settings_ocr_rotate(self):
|
|
with override_settings(OCR_ROTATE_PAGES=False, OCR_ROTATE_PAGES_THRESHOLD=30.0):
|
|
instance = OcrSettings.objects.all().first()
|
|
instance.rotate_pages = True
|
|
instance.rotate_pages_threshold = 15.0
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertTrue(params["rotate_pages"])
|
|
self.assertAlmostEqual(params["rotate_pages_threshold"], 15.0)
|
|
|
|
def test_db_settings_ocr_max_pixels(self):
|
|
with override_settings(OCR_MAX_IMAGE_PIXELS=2_000_000.0):
|
|
instance = OcrSettings.objects.all().first()
|
|
instance.max_image_pixels = 1_000_000.0
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertAlmostEqual(params["max_image_mpixels"], 1.0)
|
|
|
|
def test_db_settings_ocr_color_convert(self):
|
|
with override_settings(OCR_COLOR_CONVERSION_STRATEGY="LeaveColorUnchanged"):
|
|
instance = OcrSettings.objects.all().first()
|
|
instance.color_conversion_strategy = (
|
|
OcrSettings.ColorConvertChoices.INDEPENDENT
|
|
)
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertEqual(
|
|
params["color_conversion_strategy"],
|
|
"UseDeviceIndependentColor",
|
|
)
|