Problems with migration testing need to figure out

This commit is contained in:
Trenton H
2023-12-11 17:12:56 -08:00
parent 30281bd593
commit 5266bd1590
11 changed files with 175 additions and 38 deletions

View File

@@ -1,4 +1,4 @@
# Generated by Django 4.2.7 on 2023-12-07 22:52
# Generated by Django 4.2.7 on 2023-12-11 19:59
import django.core.validators
from django.db import migrations
@@ -28,8 +28,23 @@ class Migration(migrations.Migration):
verbose_name="ID",
),
),
("pages", models.PositiveIntegerField(blank=True, null=True)),
("language", models.CharField(blank=True, max_length=32, null=True)),
(
"pages",
models.PositiveIntegerField(
blank=True,
null=True,
verbose_name="Do OCR from page 1 to this value",
),
),
(
"language",
models.CharField(
blank=True,
max_length=32,
null=True,
verbose_name="Do OCR using these languages",
),
),
(
"output_type",
models.CharField(
@@ -43,6 +58,7 @@ class Migration(migrations.Migration):
],
max_length=8,
null=True,
verbose_name="Sets the output PDF type",
),
),
(
@@ -57,6 +73,7 @@ class Migration(migrations.Migration):
],
max_length=8,
null=True,
verbose_name="Sets the OCR mode",
),
),
(
@@ -70,9 +87,16 @@ class Migration(migrations.Migration):
],
max_length=16,
null=True,
verbose_name="Controls the generation of an archive file",
),
),
(
"image_dpi",
models.PositiveIntegerField(
null=True,
verbose_name="Sets image DPI fallback value",
),
),
("image_dpi", models.PositiveIntegerField(null=True)),
(
"unpaper_clean",
models.CharField(
@@ -84,15 +108,26 @@ class Migration(migrations.Migration):
],
max_length=16,
null=True,
verbose_name="Controls the unpaper cleaning",
),
),
(
"deskew",
models.BooleanField(null=True, verbose_name="Enables deskew"),
),
(
"rotate_pages",
models.BooleanField(
null=True,
verbose_name="Enables page rotation",
),
),
("deskew", models.BooleanField(null=True)),
("rotate_pages", models.BooleanField(null=True)),
(
"rotate_pages_threshold",
models.FloatField(
null=True,
validators=[django.core.validators.MinValueValidator(0.0)],
verbose_name="Sets the threshold for rotation of pages",
),
),
(
@@ -102,6 +137,7 @@ class Migration(migrations.Migration):
validators=[
django.core.validators.MinValueValidator(1000000.0),
],
verbose_name="Sets the maximum image for decompression",
),
),
(
@@ -117,9 +153,16 @@ class Migration(migrations.Migration):
],
max_length=32,
null=True,
verbose_name="Sets the Ghostscript color conversion strategy",
),
),
(
"user_args",
models.JSONField(
null=True,
verbose_name="Adds additional user arguments for OCRMyPDF",
),
),
("user_args", models.JSONField(blank=True, null=True)),
],
options={
"verbose_name": "ocr settings",

View File

@@ -37,11 +37,21 @@ class OcrSettings(models.Model):
GRAY = ("Gray", _("Gray"))
CMYK = ("CMYK", _("CMYK"))
pages = models.PositiveIntegerField(null=True, blank=True)
pages = models.PositiveIntegerField(
verbose_name=_("Do OCR from page 1 to this value"),
null=True,
blank=True,
)
language = models.CharField(null=True, blank=True, max_length=32)
language = models.CharField(
verbose_name=_("Do OCR using these languages"),
null=True,
blank=True,
max_length=32,
)
output_type = models.CharField(
verbose_name=_("Sets the output PDF type"),
null=True,
blank=True,
max_length=8,
@@ -49,6 +59,7 @@ class OcrSettings(models.Model):
)
mode = models.CharField(
verbose_name=_("Sets the OCR mode"),
null=True,
blank=True,
max_length=8,
@@ -56,43 +67,58 @@ class OcrSettings(models.Model):
)
skip_archive_file = models.CharField(
verbose_name=_("Controls the generation of an archive file"),
null=True,
blank=True,
max_length=16,
choices=ArchiveFileChoices.choices,
)
image_dpi = models.PositiveIntegerField(null=True)
image_dpi = models.PositiveIntegerField(
verbose_name=_("Sets image DPI fallback value"),
null=True,
)
# Can't call it clean, that's a model method
unpaper_clean = models.CharField(
verbose_name=_("Controls the unpaper cleaning"),
null=True,
blank=True,
max_length=16,
choices=CleanChoices.choices,
)
deskew = models.BooleanField(null=True)
deskew = models.BooleanField(verbose_name=_("Enables deskew"), null=True)
rotate_pages = models.BooleanField(null=True)
rotate_pages = models.BooleanField(
verbose_name=_("Enables page rotation"),
null=True,
)
rotate_pages_threshold = models.FloatField(
verbose_name=_("Sets the threshold for rotation of pages"),
null=True,
validators=[MinValueValidator(0.0)],
)
max_image_pixels = models.FloatField(
verbose_name=_("Sets the maximum image for decompression"),
null=True,
validators=[MinValueValidator(1_000_000.0)],
)
color_conversion_strategy = models.CharField(
verbose_name=_("Sets the Ghostscript color conversion strategy"),
blank=True,
null=True,
max_length=32,
choices=ColorConvertChoices.choices,
)
user_args = models.JSONField(null=True)
user_args = models.JSONField(
verbose_name=_("Adds additional user arguments for OCRMyPDF"),
null=True,
)
class Meta:
verbose_name = _("ocr settings")
@@ -105,7 +131,7 @@ class OcrSettings(models.Model):
# if you'll not check for self.pk
# then error will also be raised in the update of exists model
raise ValidationError(
"There is can be only one JuicerBaseSettings instance",
"There is can be only one OcrSettings instance",
)
return super().save(*args, **kwargs)

View File

@@ -0,0 +1,9 @@
from rest_framework import serializers
from paperless_tesseract.models import OcrSettings
class OcrSettingsSerializer(serializers.ModelSerializer):
class Meta:
model = OcrSettings
fields = ["all"]

View File

@@ -26,33 +26,50 @@ class OcrSetting:
def get_ocr_settings() -> OcrSetting:
db_settings = OcrSettingModel.objects.all().first()
assert db_settings is not None
# assert db_settings is not None
user_args = None
if db_settings.user_args:
if db_settings is not None and db_settings.user_args:
user_args = db_settings.user_args
elif settings.OCR_USER_ARGS is not None:
user_args = json.loads(settings.OCR_USER_ARGS)
return OcrSetting(
pages=db_settings.pages or settings.OCR_PAGES,
language=db_settings.language or settings.OCR_LANGUAGE,
output_type=db_settings.output_type or settings.OCR_OUTPUT_TYPE,
mode=db_settings.mode or settings.OCR_MODE,
pages=db_settings.pages if db_settings is not None else settings.OCR_PAGES,
language=db_settings.language
if db_settings is not None and db_settings.language is not None
else settings.OCR_LANGUAGE,
output_type=db_settings.output_type
if db_settings is not None
else settings.OCR_OUTPUT_TYPE,
mode=db_settings.mode if db_settings is not None else settings.OCR_MODE,
skip_archive_file=(
db_settings.skip_archive_file or settings.OCR_SKIP_ARCHIVE_FILE
db_settings.skip_archive_file
if db_settings is not None
else settings.OCR_SKIP_ARCHIVE_FILE
),
image_dpi=db_settings.image_dpi or settings.OCR_IMAGE_DPI,
clean=db_settings.unpaper_clean or settings.OCR_CLEAN,
deskew=db_settings.deskew or settings.OCR_DESKEW,
rotate=db_settings.rotate_pages or settings.OCR_ROTATE_PAGES,
image_dpi=db_settings.image_dpi
if db_settings is not None
else settings.OCR_IMAGE_DPI,
clean=db_settings.unpaper_clean
if db_settings is not None
else settings.OCR_CLEAN,
deskew=db_settings.deskew if db_settings is not None else settings.OCR_DESKEW,
rotate=db_settings.rotate_pages
if db_settings is not None
else settings.OCR_ROTATE_PAGES,
rotate_threshold=(
db_settings.rotate_pages_threshold or settings.OCR_ROTATE_PAGES_THRESHOLD
db_settings.rotate_pages_threshold
if db_settings is not None
else settings.OCR_ROTATE_PAGES_THRESHOLD
),
max_image_pixel=db_settings.max_image_pixels or settings.OCR_MAX_IMAGE_PIXELS,
max_image_pixel=db_settings.max_image_pixels
if db_settings is not None
else settings.OCR_MAX_IMAGE_PIXELS,
color_conversion_strategy=(
db_settings.color_conversion_strategy
or settings.OCR_COLOR_CONVERSION_STRATEGY
if db_settings is not None
else settings.OCR_COLOR_CONVERSION_STRATEGY
),
user_args=user_args,
)

View File

@@ -0,0 +1,14 @@
from rest_framework.permissions import IsAuthenticated
from rest_framework.viewsets import ModelViewSet
from paperless_tesseract.models import OcrSettings
from paperless_tesseract.serialisers import OcrSettingsSerializer
class OcrSettingsViewSet(ModelViewSet):
model = OcrSettings
queryset = OcrSettings.objects
serializer_class = OcrSettingsSerializer
permission_classes = (IsAuthenticated,)