From 0fcd6c826d5be9d3ae553b83e168ad955b13dd86 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Thu, 28 Dec 2023 20:03:07 -0800 Subject: [PATCH] Couple more commenting and style fixes --- src/paperless/config.py | 2 +- src/paperless_tesseract/parsers.py | 3 +++ src/paperless_tika/parsers.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/paperless/config.py b/src/paperless/config.py index c5c1b753c..55d6dc3d3 100644 --- a/src/paperless/config.py +++ b/src/paperless/config.py @@ -33,7 +33,7 @@ class OutputTypeConfig: @dataclasses.dataclass class OcrConfig(OutputTypeConfig): """ - Specific settings for the Tesseract based parser. Options generall + Specific settings for the Tesseract based parser. Options generally correspond almost directly to the OCRMyPDF options """ diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index f6d3847c0..047a171b2 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -35,6 +35,9 @@ class RasterisedDocumentParser(DocumentParser): logging_name = "paperless.parsing.tesseract" def get_settings(self) -> OcrConfig: + """ + This parser uses the OCR configuration settings to parse documents + """ return OcrConfig() def extract_metadata(self, document_path, mime_type): diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index 1b4609bdc..a6ef4b14a 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -119,6 +119,6 @@ class TikaDocumentParser(DocumentParser): def get_settings(self) -> OutputTypeConfig: """ - This parser does not implement additional settings yet + This parser only uses the PDF output type configuration currently """ return OutputTypeConfig()