diff --git a/src/paperless/config.py b/src/paperless/config.py index c5c1b753c..55d6dc3d3 100644 --- a/src/paperless/config.py +++ b/src/paperless/config.py @@ -33,7 +33,7 @@ class OutputTypeConfig: @dataclasses.dataclass class OcrConfig(OutputTypeConfig): """ - Specific settings for the Tesseract based parser. Options generall + Specific settings for the Tesseract based parser. Options generally correspond almost directly to the OCRMyPDF options """ diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index f6d3847c0..047a171b2 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -35,6 +35,9 @@ class RasterisedDocumentParser(DocumentParser): logging_name = "paperless.parsing.tesseract" def get_settings(self) -> OcrConfig: + """ + This parser uses the OCR configuration settings to parse documents + """ return OcrConfig() def extract_metadata(self, document_path, mime_type): diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index 1b4609bdc..a6ef4b14a 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -119,6 +119,6 @@ class TikaDocumentParser(DocumentParser): def get_settings(self) -> OutputTypeConfig: """ - This parser does not implement additional settings yet + This parser only uses the PDF output type configuration currently """ return OutputTypeConfig()