From 26e95ea11783aedac08a9efbb8fef901ef53f59e Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Mon, 4 Mar 2024 09:39:42 -0800 Subject: [PATCH] Suggestions from review --- docs/configuration.md | 2 +- src/paperless_tesseract/parsers.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 831476d41..c7b710c66 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -777,7 +777,7 @@ but could result in missing text content. !!! warning The limit is intended to prevent malicious files from consuming - system resources and causing crashes and other errors. Only increase + system resources and causing crashes and other errors. Only change this value if you are certain your documents are not malicious and you need the text which was not OCRed diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 84570709e..3e19bd8b5 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -299,7 +299,11 @@ class RasterisedDocumentParser(DocumentParser): ): # Convert pixels to mega-pixels and provide to ocrmypdf max_pixels_mpixels = self.settings.max_image_pixel / 1_000_000.0 - self.log.debug(f"Calculated {max_pixels_mpixels} megapixels for OCR") + if max_pixels_mpixels == 0: + msg = "OCR pixel limit is disabled!" + else: + msg = f"Calculated {max_pixels_mpixels} megapixels for OCR" + self.log.debug(msg) ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels return ocrmypdf_args