From 6e9a90b2014b1f937cd5aae92907d5ceb300252c Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Sun, 3 Mar 2024 18:31:14 -0800 Subject: [PATCH] Allows a user to set 0 to disable the limit of maximum pixels during OCR operations --- docs/advanced_usage.md | 2 +- docs/configuration.md | 2 ++ src/paperless/models.py | 2 +- src/paperless_tesseract/parsers.py | 18 ++++++------------ 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/docs/advanced_usage.md b/docs/advanced_usage.md index d4ff80f87..863be639b 100644 --- a/docs/advanced_usage.md +++ b/docs/advanced_usage.md @@ -437,7 +437,7 @@ with Prometheus, as it exports metrics. For details on its capabilities, refer to the [Flower](https://flower.readthedocs.io/en/latest/index.html) documentation. -Flower can be enabled with the setting [PAPERLESS_ENABLE_FLOWER](configuration/#PAPERLESS_ENABLE_FLOWER). +Flower can be enabled with the setting [PAPERLESS_ENABLE_FLOWER](configuration.md#PAPERLESS_ENABLE_FLOWER). To configure Flower further, create a `flowerconfig.py` and place it into the `src/paperless` directory. For a Docker installation, you can use volumes to accomplish this: diff --git a/docs/configuration.md b/docs/configuration.md index 5fd14caf1..831476d41 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -766,6 +766,8 @@ but could result in missing text content. If unset, will default to the value determined by [Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS). + Setting this value to 0 will entirely disable the limit. See the below warning. + !!! note Increasing this limit could cause Paperless to consume additional diff --git a/src/paperless/models.py b/src/paperless/models.py index 72805dc56..1f6cfbced 100644 --- a/src/paperless/models.py +++ b/src/paperless/models.py @@ -151,7 +151,7 @@ class ApplicationConfiguration(AbstractSingletonModel): max_image_pixels = models.FloatField( verbose_name=_("Sets the maximum image size for decompression"), null=True, - validators=[MinValueValidator(1_000_000.0)], + validators=[MinValueValidator(0.0)], ) color_conversion_strategy = models.CharField( diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 09086585e..84570709e 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -293,20 +293,14 @@ class RasterisedDocumentParser(DocumentParser): f"they will not be used. Error: {e}", ) - if self.settings.max_image_pixel is not None: + if ( + self.settings.max_image_pixel is not None + and self.settings.max_image_pixel >= 0 + ): # Convert pixels to mega-pixels and provide to ocrmypdf max_pixels_mpixels = self.settings.max_image_pixel / 1_000_000.0 - if max_pixels_mpixels > 0: - self.log.debug( - f"Calculated {max_pixels_mpixels} megapixels for OCR", - ) - - ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels - else: - self.log.warning( - "There is an issue with PAPERLESS_OCR_MAX_IMAGE_PIXELS, " - "this value must be at least 1 megapixel if set", - ) + self.log.debug(f"Calculated {max_pixels_mpixels} megapixels for OCR") + ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels return ocrmypdf_args