More fixes and work
This commit is contained in:
parent
a7753b1f89
commit
25cc7ada6b
@ -41,6 +41,8 @@ from documents.settings import EXPORTER_THUMBNAIL_NAME
|
|||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
from paperless import version
|
from paperless import version
|
||||||
from paperless.db import GnuPG
|
from paperless.db import GnuPG
|
||||||
|
from paperless.models import CommonSettings
|
||||||
|
from paperless.models import OcrSettings
|
||||||
from paperless_mail.models import MailAccount
|
from paperless_mail.models import MailAccount
|
||||||
from paperless_mail.models import MailRule
|
from paperless_mail.models import MailRule
|
||||||
|
|
||||||
@ -291,6 +293,14 @@ class Command(BaseCommand):
|
|||||||
serializers.serialize("json", CustomField.objects.all()),
|
serializers.serialize("json", CustomField.objects.all()),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
manifest += json.loads(
|
||||||
|
serializers.serialize("json", CommonSettings.objects.all()),
|
||||||
|
)
|
||||||
|
|
||||||
|
manifest += json.loads(
|
||||||
|
serializers.serialize("json", OcrSettings.objects.all()),
|
||||||
|
)
|
||||||
|
|
||||||
# These are treated specially and included in the per-document manifest
|
# These are treated specially and included in the per-document manifest
|
||||||
# if that setting is enabled. Otherwise, they are just exported to the bulk
|
# if that setting is enabled. Otherwise, they are just exported to the bulk
|
||||||
# manifest
|
# manifest
|
||||||
|
@ -9,7 +9,7 @@ def _create_singleton(apps, schema_editor):
|
|||||||
"""
|
"""
|
||||||
Creates the first and only instance of the settings models
|
Creates the first and only instance of the settings models
|
||||||
"""
|
"""
|
||||||
for model_name in ["CommonSettings", "OcrSettings", "TextSettings", "TikaSettings"]:
|
for model_name in ["CommonSettings", "OcrSettings"]:
|
||||||
settings_model = apps.get_model("paperless", model_name)
|
settings_model = apps.get_model("paperless", model_name)
|
||||||
settings_model.objects.create()
|
settings_model.objects.create()
|
||||||
|
|
||||||
@ -189,60 +189,5 @@ class Migration(migrations.Migration):
|
|||||||
"verbose_name": "ocr settings",
|
"verbose_name": "ocr settings",
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
migrations.CreateModel(
|
|
||||||
name="TextSettings",
|
|
||||||
fields=[
|
|
||||||
(
|
|
||||||
"id",
|
|
||||||
models.AutoField(
|
|
||||||
auto_created=True,
|
|
||||||
primary_key=True,
|
|
||||||
serialize=False,
|
|
||||||
verbose_name="ID",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"thumbnail_font_name",
|
|
||||||
models.CharField(
|
|
||||||
blank=True,
|
|
||||||
max_length=64,
|
|
||||||
null=True,
|
|
||||||
verbose_name="Sets the output PDF type",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
],
|
|
||||||
options={
|
|
||||||
"abstract": False,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
migrations.CreateModel(
|
|
||||||
name="TikaSettings",
|
|
||||||
fields=[
|
|
||||||
(
|
|
||||||
"id",
|
|
||||||
models.AutoField(
|
|
||||||
auto_created=True,
|
|
||||||
primary_key=True,
|
|
||||||
serialize=False,
|
|
||||||
verbose_name="ID",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"tika_url",
|
|
||||||
models.URLField(blank=True, null=True, verbose_name="Tika URL"),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"gotenberg_url",
|
|
||||||
models.URLField(
|
|
||||||
blank=True,
|
|
||||||
null=True,
|
|
||||||
verbose_name="Gotenberg URL",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
],
|
|
||||||
options={
|
|
||||||
"abstract": False,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
migrations.RunPython(_create_singleton, migrations.RunPython.noop),
|
migrations.RunPython(_create_singleton, migrations.RunPython.noop),
|
||||||
]
|
]
|
||||||
|
@ -146,40 +146,3 @@ class OcrSettings(AbstractSingletonModel):
|
|||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return "OcrSettings"
|
return "OcrSettings"
|
||||||
|
|
||||||
|
|
||||||
class TextSettings(AbstractSingletonModel):
|
|
||||||
"""
|
|
||||||
Settings for the text parser
|
|
||||||
"""
|
|
||||||
|
|
||||||
thumbnail_font_name = models.CharField(
|
|
||||||
verbose_name=_("Sets the output PDF type"),
|
|
||||||
null=True,
|
|
||||||
blank=True,
|
|
||||||
max_length=64,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TikaSettings(AbstractSingletonModel):
|
|
||||||
"""
|
|
||||||
Settings for the Tika parser
|
|
||||||
"""
|
|
||||||
|
|
||||||
tika_url = models.URLField(
|
|
||||||
verbose_name=_("Tika URL"),
|
|
||||||
null=True,
|
|
||||||
blank=True,
|
|
||||||
)
|
|
||||||
gotenberg_url = models.URLField(
|
|
||||||
verbose_name=_("Gotenberg URL"),
|
|
||||||
null=True,
|
|
||||||
blank=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ConsumerSettings(AbstractSingletonModel):
|
|
||||||
delete_duplicates = models.BooleanField(
|
|
||||||
verbose_name=_("Delete duplicate consumer files"),
|
|
||||||
null=True,
|
|
||||||
)
|
|
||||||
|
@ -3,6 +3,7 @@ from django.contrib.auth.models import Permission
|
|||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from rest_framework import serializers
|
from rest_framework import serializers
|
||||||
|
|
||||||
|
from paperless.models import CommonSettings
|
||||||
from paperless.models import OcrSettings
|
from paperless.models import OcrSettings
|
||||||
|
|
||||||
|
|
||||||
@ -117,6 +118,12 @@ class ProfileSerializer(serializers.ModelSerializer):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CommonSettingsSerializer(serializers.ModelSerializer):
|
||||||
|
class Meta:
|
||||||
|
model = CommonSettings
|
||||||
|
fields = ["all"]
|
||||||
|
|
||||||
|
|
||||||
class OcrSettingsSerializer(serializers.ModelSerializer):
|
class OcrSettingsSerializer(serializers.ModelSerializer):
|
||||||
class Meta:
|
class Meta:
|
||||||
model = OcrSettings
|
model = OcrSettings
|
||||||
|
@ -34,9 +34,11 @@ from documents.views import TasksViewSet
|
|||||||
from documents.views import UiSettingsView
|
from documents.views import UiSettingsView
|
||||||
from documents.views import UnifiedSearchViewSet
|
from documents.views import UnifiedSearchViewSet
|
||||||
from paperless.consumers import StatusConsumer
|
from paperless.consumers import StatusConsumer
|
||||||
|
from paperless.views import CommonSettingsViewSet
|
||||||
from paperless.views import FaviconView
|
from paperless.views import FaviconView
|
||||||
from paperless.views import GenerateAuthTokenView
|
from paperless.views import GenerateAuthTokenView
|
||||||
from paperless.views import GroupViewSet
|
from paperless.views import GroupViewSet
|
||||||
|
from paperless.views import OcrSettingsViewSet
|
||||||
from paperless.views import ProfileView
|
from paperless.views import ProfileView
|
||||||
from paperless.views import UserViewSet
|
from paperless.views import UserViewSet
|
||||||
from paperless_mail.views import MailAccountTestView
|
from paperless_mail.views import MailAccountTestView
|
||||||
@ -59,6 +61,8 @@ api_router.register(r"mail_rules", MailRuleViewSet)
|
|||||||
api_router.register(r"share_links", ShareLinkViewSet)
|
api_router.register(r"share_links", ShareLinkViewSet)
|
||||||
api_router.register(r"consumption_templates", ConsumptionTemplateViewSet)
|
api_router.register(r"consumption_templates", ConsumptionTemplateViewSet)
|
||||||
api_router.register(r"custom_fields", CustomFieldViewSet)
|
api_router.register(r"custom_fields", CustomFieldViewSet)
|
||||||
|
api_router.register(r"common_settings", CommonSettingsViewSet)
|
||||||
|
api_router.register(r"ocr_settings", OcrSettingsViewSet)
|
||||||
|
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
|
@ -18,7 +18,9 @@ from rest_framework.viewsets import ModelViewSet
|
|||||||
from documents.permissions import PaperlessObjectPermissions
|
from documents.permissions import PaperlessObjectPermissions
|
||||||
from paperless.filters import GroupFilterSet
|
from paperless.filters import GroupFilterSet
|
||||||
from paperless.filters import UserFilterSet
|
from paperless.filters import UserFilterSet
|
||||||
|
from paperless.models import CommonSettings
|
||||||
from paperless.models import OcrSettings
|
from paperless.models import OcrSettings
|
||||||
|
from paperless.serialisers import CommonSettingsSerializer
|
||||||
from paperless.serialisers import GroupSerializer
|
from paperless.serialisers import GroupSerializer
|
||||||
from paperless.serialisers import OcrSettingsSerializer
|
from paperless.serialisers import OcrSettingsSerializer
|
||||||
from paperless.serialisers import ProfileSerializer
|
from paperless.serialisers import ProfileSerializer
|
||||||
@ -164,6 +166,15 @@ class GenerateAuthTokenView(GenericAPIView):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CommonSettingsViewSet(ModelViewSet):
|
||||||
|
model = CommonSettings
|
||||||
|
|
||||||
|
queryset = CommonSettings.objects
|
||||||
|
|
||||||
|
serializer_class = CommonSettingsSerializer
|
||||||
|
permission_classes = (IsAuthenticated,)
|
||||||
|
|
||||||
|
|
||||||
class OcrSettingsViewSet(ModelViewSet):
|
class OcrSettingsViewSet(ModelViewSet):
|
||||||
model = OcrSettings
|
model = OcrSettings
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ import re
|
|||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@ -11,7 +12,7 @@ from PIL import Image
|
|||||||
from documents.parsers import DocumentParser
|
from documents.parsers import DocumentParser
|
||||||
from documents.parsers import ParseError
|
from documents.parsers import ParseError
|
||||||
from documents.parsers import make_thumbnail_from_pdf
|
from documents.parsers import make_thumbnail_from_pdf
|
||||||
from paperless_tesseract.models import OcrSettings as OcrSettingModel
|
from paperless.models import OcrSettings as OcrSettingModel
|
||||||
from paperless_tesseract.setting_schema import OcrSetting
|
from paperless_tesseract.setting_schema import OcrSetting
|
||||||
from paperless_tesseract.setting_schema import get_ocr_settings
|
from paperless_tesseract.setting_schema import get_ocr_settings
|
||||||
|
|
||||||
@ -71,7 +72,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
self.logging_group,
|
self.logging_group,
|
||||||
)
|
)
|
||||||
|
|
||||||
def is_image(self, mime_type):
|
def is_image(self, mime_type) -> bool:
|
||||||
return mime_type in [
|
return mime_type in [
|
||||||
"image/png",
|
"image/png",
|
||||||
"image/jpeg",
|
"image/jpeg",
|
||||||
@ -81,7 +82,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
"image/webp",
|
"image/webp",
|
||||||
]
|
]
|
||||||
|
|
||||||
def has_alpha(self, image):
|
def has_alpha(self, image) -> bool:
|
||||||
with Image.open(image) as im:
|
with Image.open(image) as im:
|
||||||
return im.mode in ("RGBA", "LA")
|
return im.mode in ("RGBA", "LA")
|
||||||
|
|
||||||
@ -96,7 +97,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_dpi(self, image):
|
def get_dpi(self, image) -> Optional[int]:
|
||||||
try:
|
try:
|
||||||
with Image.open(image) as im:
|
with Image.open(image) as im:
|
||||||
x, y = im.info["dpi"]
|
x, y = im.info["dpi"]
|
||||||
@ -105,7 +106,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
self.log.warning(f"Error while getting DPI from image {image}: {e}")
|
self.log.warning(f"Error while getting DPI from image {image}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def calculate_a4_dpi(self, image):
|
def calculate_a4_dpi(self, image) -> Optional[int]:
|
||||||
try:
|
try:
|
||||||
with Image.open(image) as im:
|
with Image.open(image) as im:
|
||||||
width, height = im.size
|
width, height = im.size
|
||||||
@ -118,7 +119,11 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
self.log.warning(f"Error while calculating DPI for image {image}: {e}")
|
self.log.warning(f"Error while calculating DPI for image {image}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def extract_text(self, sidecar_file: Optional[Path], pdf_file: Path):
|
def extract_text(
|
||||||
|
self,
|
||||||
|
sidecar_file: Optional[Path],
|
||||||
|
pdf_file: Path,
|
||||||
|
) -> Optional[str]:
|
||||||
# When re-doing OCR, the sidecar contains ONLY the new text, not
|
# When re-doing OCR, the sidecar contains ONLY the new text, not
|
||||||
# the whole text, so do not utilize it in that case
|
# the whole text, so do not utilize it in that case
|
||||||
if (
|
if (
|
||||||
@ -179,6 +184,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
sidecar_file,
|
sidecar_file,
|
||||||
safe_fallback=False,
|
safe_fallback=False,
|
||||||
):
|
):
|
||||||
|
if TYPE_CHECKING:
|
||||||
assert isinstance(self.parser_settings, OcrSetting)
|
assert isinstance(self.parser_settings, OcrSetting)
|
||||||
ocrmypdf_args = {
|
ocrmypdf_args = {
|
||||||
"input_file": input_file,
|
"input_file": input_file,
|
||||||
|
@ -4,7 +4,8 @@ from typing import Optional
|
|||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
from paperless_tesseract.models import OcrSettings as OcrSettingModel
|
from paperless.models import CommonSettings
|
||||||
|
from paperless.models import OcrSettings as OcrSettingModel
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True)
|
@dataclasses.dataclass(frozen=True)
|
||||||
@ -25,16 +26,20 @@ class OcrSetting:
|
|||||||
|
|
||||||
|
|
||||||
def get_ocr_settings() -> OcrSetting:
|
def get_ocr_settings() -> OcrSetting:
|
||||||
db_settings = OcrSettingModel.objects.all().first()
|
ocr_db_settings = OcrSettingModel.objects.all().first()
|
||||||
# Workaround for a test where the migration hasn't run to create the single model
|
# Workaround for a test where the migration hasn't run to create the single model
|
||||||
if db_settings is None:
|
if ocr_db_settings is None:
|
||||||
OcrSettingModel.objects.create()
|
OcrSettingModel.objects.create()
|
||||||
db_settings = OcrSettingModel.objects.all().first()
|
ocr_db_settings = OcrSettingModel.objects.all().first()
|
||||||
assert db_settings is not None
|
|
||||||
|
cmn_db_settings = CommonSettings.objects.all().first()
|
||||||
|
if cmn_db_settings is None:
|
||||||
|
CommonSettings.objects.create()
|
||||||
|
cmn_db_settings = CommonSettings.objects.all().first()
|
||||||
|
|
||||||
user_args = None
|
user_args = None
|
||||||
if db_settings.user_args:
|
if ocr_db_settings.user_args:
|
||||||
user_args = db_settings.user_args
|
user_args = ocr_db_settings.user_args
|
||||||
elif settings.OCR_USER_ARGS is not None:
|
elif settings.OCR_USER_ARGS is not None:
|
||||||
try:
|
try:
|
||||||
user_args = json.loads(settings.OCR_USER_ARGS)
|
user_args = json.loads(settings.OCR_USER_ARGS)
|
||||||
@ -42,23 +47,25 @@ def get_ocr_settings() -> OcrSetting:
|
|||||||
user_args = {}
|
user_args = {}
|
||||||
|
|
||||||
return OcrSetting(
|
return OcrSetting(
|
||||||
pages=db_settings.pages or settings.OCR_PAGES,
|
pages=ocr_db_settings.pages or settings.OCR_PAGES,
|
||||||
language=db_settings.language or settings.OCR_LANGUAGE,
|
language=ocr_db_settings.language or settings.OCR_LANGUAGE,
|
||||||
output_type=db_settings.output_type or settings.OCR_OUTPUT_TYPE,
|
output_type=cmn_db_settings.output_type or settings.OCR_OUTPUT_TYPE,
|
||||||
mode=db_settings.mode or settings.OCR_MODE,
|
mode=ocr_db_settings.mode or settings.OCR_MODE,
|
||||||
skip_archive_file=(
|
skip_archive_file=(
|
||||||
db_settings.skip_archive_file or settings.OCR_SKIP_ARCHIVE_FILE
|
ocr_db_settings.skip_archive_file or settings.OCR_SKIP_ARCHIVE_FILE
|
||||||
),
|
),
|
||||||
image_dpi=db_settings.image_dpi or settings.OCR_IMAGE_DPI,
|
image_dpi=ocr_db_settings.image_dpi or settings.OCR_IMAGE_DPI,
|
||||||
clean=db_settings.unpaper_clean or settings.OCR_CLEAN,
|
clean=ocr_db_settings.unpaper_clean or settings.OCR_CLEAN,
|
||||||
deskew=db_settings.deskew or settings.OCR_DESKEW,
|
deskew=ocr_db_settings.deskew or settings.OCR_DESKEW,
|
||||||
rotate=db_settings.rotate_pages or settings.OCR_ROTATE_PAGES,
|
rotate=ocr_db_settings.rotate_pages or settings.OCR_ROTATE_PAGES,
|
||||||
rotate_threshold=(
|
rotate_threshold=(
|
||||||
db_settings.rotate_pages_threshold or settings.OCR_ROTATE_PAGES_THRESHOLD
|
ocr_db_settings.rotate_pages_threshold
|
||||||
|
or settings.OCR_ROTATE_PAGES_THRESHOLD
|
||||||
),
|
),
|
||||||
max_image_pixel=db_settings.max_image_pixels or settings.OCR_MAX_IMAGE_PIXELS,
|
max_image_pixel=ocr_db_settings.max_image_pixels
|
||||||
|
or settings.OCR_MAX_IMAGE_PIXELS,
|
||||||
color_conversion_strategy=(
|
color_conversion_strategy=(
|
||||||
db_settings.color_conversion_strategy
|
ocr_db_settings.color_conversion_strategy
|
||||||
or settings.OCR_COLOR_CONVERSION_STRATEGY
|
or settings.OCR_COLOR_CONVERSION_STRATEGY
|
||||||
),
|
),
|
||||||
user_args=user_args,
|
user_args=user_args,
|
||||||
|
@ -2,7 +2,6 @@ import os
|
|||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
import uuid
|
import uuid
|
||||||
from contextlib import AbstractContextManager
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
@ -17,28 +16,6 @@ from documents.tests.utils import FileSystemAssertsMixin
|
|||||||
from paperless_tesseract.parsers import RasterisedDocumentParser
|
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||||
from paperless_tesseract.parsers import post_process_text
|
from paperless_tesseract.parsers import post_process_text
|
||||||
|
|
||||||
image_to_string_calls = []
|
|
||||||
|
|
||||||
|
|
||||||
def fake_convert(input_file, output_file, **kwargs):
|
|
||||||
with open(input_file) as f:
|
|
||||||
lines = f.readlines()
|
|
||||||
|
|
||||||
for i, line in enumerate(lines):
|
|
||||||
with open(output_file % i, "w") as f2:
|
|
||||||
f2.write(line.strip())
|
|
||||||
|
|
||||||
|
|
||||||
class FakeImageFile(AbstractContextManager):
|
|
||||||
def __init__(self, fname):
|
|
||||||
self.fname = fname
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
return os.path.basename(self.fname)
|
|
||||||
|
|
||||||
|
|
||||||
class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||||
SAMPLE_FILES = Path(__file__).resolve().parent / "samples"
|
SAMPLE_FILES = Path(__file__).resolve().parent / "samples"
|
||||||
|
120
src/paperless_tesseract/tests/test_parser_custom_settings.py
Normal file
120
src/paperless_tesseract/tests/test_parser_custom_settings.py
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
from django.test import TestCase
|
||||||
|
from django.test import override_settings
|
||||||
|
|
||||||
|
from documents.tests.utils import DirectoriesMixin
|
||||||
|
from documents.tests.utils import FileSystemAssertsMixin
|
||||||
|
from paperless.models import CommonSettings
|
||||||
|
from paperless.models import OcrSettings
|
||||||
|
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||||
|
|
||||||
|
|
||||||
|
class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||||
|
@staticmethod
|
||||||
|
def get_params():
|
||||||
|
return RasterisedDocumentParser(None).construct_ocrmypdf_parameters(
|
||||||
|
input_file="input.pdf",
|
||||||
|
output_file="output.pdf",
|
||||||
|
sidecar_file="sidecar.txt",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
safe_fallback=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_db_settings_ocr_pages(self):
|
||||||
|
with override_settings(OCR_PAGES=10):
|
||||||
|
instance = OcrSettings.objects.all().first()
|
||||||
|
instance.pages = 5
|
||||||
|
instance.save()
|
||||||
|
|
||||||
|
params = self.get_params()
|
||||||
|
self.assertEqual(params["pages"], "1-5")
|
||||||
|
|
||||||
|
def test_db_settings_ocr_language(self):
|
||||||
|
with override_settings(OCR_LANGUAGE="eng+deu"):
|
||||||
|
instance = OcrSettings.objects.all().first()
|
||||||
|
instance.language = "fra+ita"
|
||||||
|
instance.save()
|
||||||
|
|
||||||
|
params = self.get_params()
|
||||||
|
self.assertEqual(params["language"], "fra+ita")
|
||||||
|
|
||||||
|
def test_db_settings_ocr_output_type(self):
|
||||||
|
with override_settings(OCR_LANGUAGE="pdfa-3"):
|
||||||
|
instance = OcrSettings.objects.all().first()
|
||||||
|
instance.output_type = CommonSettings.OutputTypeChoices.PDF_A
|
||||||
|
instance.save()
|
||||||
|
|
||||||
|
params = self.get_params()
|
||||||
|
self.assertEqual(params["output_type"], "pdfa")
|
||||||
|
|
||||||
|
def test_db_settings_ocr_mode(self):
|
||||||
|
with override_settings(OCR_MODE="redo"):
|
||||||
|
instance = OcrSettings.objects.all().first()
|
||||||
|
instance.mode = OcrSettings.ModeChoices.SKIP
|
||||||
|
instance.save()
|
||||||
|
|
||||||
|
params = self.get_params()
|
||||||
|
self.assertTrue(params["skip_text"])
|
||||||
|
self.assertNotIn("redo_ocr", params)
|
||||||
|
self.assertNotIn("force_ocr", params)
|
||||||
|
|
||||||
|
def test_db_settings_ocr_clean(self):
|
||||||
|
with override_settings(OCR_CLEAN="clean-final"):
|
||||||
|
instance = OcrSettings.objects.all().first()
|
||||||
|
instance.unpaper_clean = OcrSettings.CleanChoices.CLEAN
|
||||||
|
instance.save()
|
||||||
|
|
||||||
|
params = self.get_params()
|
||||||
|
self.assertTrue(params["clean"])
|
||||||
|
self.assertNotIn("clean_final", params)
|
||||||
|
|
||||||
|
with override_settings(OCR_CLEAN="clean-final"):
|
||||||
|
instance = OcrSettings.objects.all().first()
|
||||||
|
instance.unpaper_clean = OcrSettings.CleanChoices.FINAL
|
||||||
|
instance.save()
|
||||||
|
|
||||||
|
params = self.get_params()
|
||||||
|
self.assertTrue(params["clean_final"])
|
||||||
|
self.assertNotIn("clean", params)
|
||||||
|
|
||||||
|
def test_db_settings_ocr_deskew(self):
|
||||||
|
with override_settings(OCR_DESKEW=False):
|
||||||
|
instance = OcrSettings.objects.all().first()
|
||||||
|
instance.deskew = True
|
||||||
|
instance.save()
|
||||||
|
|
||||||
|
params = self.get_params()
|
||||||
|
self.assertTrue(params["deskew"])
|
||||||
|
|
||||||
|
def test_db_settings_ocr_rotate(self):
|
||||||
|
with override_settings(OCR_ROTATE_PAGES=False, OCR_ROTATE_PAGES_THRESHOLD=30.0):
|
||||||
|
instance = OcrSettings.objects.all().first()
|
||||||
|
instance.rotate_pages = True
|
||||||
|
instance.rotate_pages_threshold = 15.0
|
||||||
|
instance.save()
|
||||||
|
|
||||||
|
params = self.get_params()
|
||||||
|
self.assertTrue(params["rotate_pages"])
|
||||||
|
self.assertAlmostEqual(params["rotate_pages_threshold"], 15.0)
|
||||||
|
|
||||||
|
def test_db_settings_ocr_max_pixels(self):
|
||||||
|
with override_settings(OCR_MAX_IMAGE_PIXELS=2_000_000.0):
|
||||||
|
instance = OcrSettings.objects.all().first()
|
||||||
|
instance.max_image_pixels = 1_000_000.0
|
||||||
|
instance.save()
|
||||||
|
|
||||||
|
params = self.get_params()
|
||||||
|
self.assertAlmostEqual(params["max_image_mpixels"], 1.0)
|
||||||
|
|
||||||
|
def test_db_settings_ocr_color_convert(self):
|
||||||
|
with override_settings(OCR_COLOR_CONVERSION_STRATEGY="LeaveColorUnchanged"):
|
||||||
|
instance = OcrSettings.objects.all().first()
|
||||||
|
instance.color_conversion_strategy = (
|
||||||
|
OcrSettings.ColorConvertChoices.INDEPENDENT
|
||||||
|
)
|
||||||
|
instance.save()
|
||||||
|
|
||||||
|
params = self.get_params()
|
||||||
|
self.assertEqual(
|
||||||
|
params["color_conversion_strategy"],
|
||||||
|
"UseDeviceIndependentColor",
|
||||||
|
)
|
@ -18,6 +18,7 @@ omit =
|
|||||||
exclude_also =
|
exclude_also =
|
||||||
if settings.AUDIT_LOG_ENABLED:
|
if settings.AUDIT_LOG_ENABLED:
|
||||||
if AUDIT_LOG_ENABLED:
|
if AUDIT_LOG_ENABLED:
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
|
||||||
[mypy]
|
[mypy]
|
||||||
plugins = mypy_django_plugin.main, mypy_drf_plugin.main, numpy.typing.mypy_plugin
|
plugins = mypy_django_plugin.main, mypy_drf_plugin.main, numpy.typing.mypy_plugin
|
||||||
|
Loading…
x
Reference in New Issue
Block a user