Allows a user to set 0 to disable the limit of maximum pixels during OCR operations

2024-03-03 18:31:14 -08:00 · 2024-03-03 18:31:14 -08:00 · 6e9a90b201
commit 6e9a90b201
parent 6379e7b54f
4 changed files with 10 additions and 14 deletions
--- a/docs/advanced_usage.md
+++ b/docs/advanced_usage.md
@ -437,7 +437,7 @@ with Prometheus, as it exports metrics. For details on its capabilities,
 refer to the [Flower](https://flower.readthedocs.io/en/latest/index.html)
 documentation.
-Flower can be enabled with the setting [PAPERLESS_ENABLE_FLOWER](configuration/#PAPERLESS_ENABLE_FLOWER).
+Flower can be enabled with the setting [PAPERLESS_ENABLE_FLOWER](configuration.md#PAPERLESS_ENABLE_FLOWER).
 To configure Flower further, create a `flowerconfig.py` and
 place it into the `src/paperless` directory. For a Docker
 installation, you can use volumes to accomplish this:
--- a/docs/configuration.md
+++ b/docs/configuration.md
@ -766,6 +766,8 @@ but could result in missing text content.
    If unset, will default to the value determined by
    [Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS).
    Setting this value to 0 will entirely disable the limit.  See the below warning.
    !!! note
        Increasing this limit could cause Paperless to consume additional
--- a/src/paperless/models.py
+++ b/src/paperless/models.py
@ -151,7 +151,7 @@ class ApplicationConfiguration(AbstractSingletonModel):
    max_image_pixels = models.FloatField(
        verbose_name=_("Sets the maximum image size for decompression"),
        null=True,
-        validators=[MinValueValidator(1_000_000.0)],
+        validators=[MinValueValidator(0.0)],
    )
    color_conversion_strategy = models.CharField(
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@ -293,20 +293,14 @@ class RasterisedDocumentParser(DocumentParser):
                    f"they will not be used. Error: {e}",
                )
-        if self.settings.max_image_pixel is not None:
+        if (
            self.settings.max_image_pixel is not None
            and self.settings.max_image_pixel >= 0
        ):
            # Convert pixels to mega-pixels and provide to ocrmypdf
            max_pixels_mpixels = self.settings.max_image_pixel / 1_000_000.0
-            if max_pixels_mpixels > 0:
+            self.log.debug(f"Calculated {max_pixels_mpixels} megapixels for OCR")
-                self.log.debug(
+            ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels
                    f"Calculated {max_pixels_mpixels} megapixels for OCR",
                )
                ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels
            else:
                self.log.warning(
                    "There is an issue with PAPERLESS_OCR_MAX_IMAGE_PIXELS, "
                    "this value must be at least 1 megapixel if set",
                )
        return ocrmypdf_args