Allows a user to set 0 to disable the limit of maximum pixels during OCR operations

This commit is contained in:
Trenton H 2024-03-03 18:31:14 -08:00
parent 6379e7b54f
commit 6e9a90b201
4 changed files with 10 additions and 14 deletions

View File

@ -437,7 +437,7 @@ with Prometheus, as it exports metrics. For details on its capabilities,
refer to the [Flower](https://flower.readthedocs.io/en/latest/index.html)
documentation.
Flower can be enabled with the setting [PAPERLESS_ENABLE_FLOWER](configuration/#PAPERLESS_ENABLE_FLOWER).
Flower can be enabled with the setting [PAPERLESS_ENABLE_FLOWER](configuration.md#PAPERLESS_ENABLE_FLOWER).
To configure Flower further, create a `flowerconfig.py` and
place it into the `src/paperless` directory. For a Docker
installation, you can use volumes to accomplish this:

View File

@ -766,6 +766,8 @@ but could result in missing text content.
If unset, will default to the value determined by
[Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS).
Setting this value to 0 will entirely disable the limit. See the below warning.
!!! note
Increasing this limit could cause Paperless to consume additional

View File

@ -151,7 +151,7 @@ class ApplicationConfiguration(AbstractSingletonModel):
max_image_pixels = models.FloatField(
verbose_name=_("Sets the maximum image size for decompression"),
null=True,
validators=[MinValueValidator(1_000_000.0)],
validators=[MinValueValidator(0.0)],
)
color_conversion_strategy = models.CharField(

View File

@ -293,20 +293,14 @@ class RasterisedDocumentParser(DocumentParser):
f"they will not be used. Error: {e}",
)
if self.settings.max_image_pixel is not None:
if (
self.settings.max_image_pixel is not None
and self.settings.max_image_pixel >= 0
):
# Convert pixels to mega-pixels and provide to ocrmypdf
max_pixels_mpixels = self.settings.max_image_pixel / 1_000_000.0
if max_pixels_mpixels > 0:
self.log.debug(
f"Calculated {max_pixels_mpixels} megapixels for OCR",
)
self.log.debug(f"Calculated {max_pixels_mpixels} megapixels for OCR")
ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels
else:
self.log.warning(
"There is an issue with PAPERLESS_OCR_MAX_IMAGE_PIXELS, "
"this value must be at least 1 megapixel if set",
)
return ocrmypdf_args