Couple more commenting and style fixes

This commit is contained in:
Trenton H 2023-12-28 20:03:07 -08:00
parent 8b3360c462
commit 0fcd6c826d
3 changed files with 5 additions and 2 deletions

View File

@ -33,7 +33,7 @@ class OutputTypeConfig:
@dataclasses.dataclass @dataclasses.dataclass
class OcrConfig(OutputTypeConfig): class OcrConfig(OutputTypeConfig):
""" """
Specific settings for the Tesseract based parser. Options generall Specific settings for the Tesseract based parser. Options generally
correspond almost directly to the OCRMyPDF options correspond almost directly to the OCRMyPDF options
""" """

View File

@ -35,6 +35,9 @@ class RasterisedDocumentParser(DocumentParser):
logging_name = "paperless.parsing.tesseract" logging_name = "paperless.parsing.tesseract"
def get_settings(self) -> OcrConfig: def get_settings(self) -> OcrConfig:
"""
This parser uses the OCR configuration settings to parse documents
"""
return OcrConfig() return OcrConfig()
def extract_metadata(self, document_path, mime_type): def extract_metadata(self, document_path, mime_type):

View File

@ -119,6 +119,6 @@ class TikaDocumentParser(DocumentParser):
def get_settings(self) -> OutputTypeConfig: def get_settings(self) -> OutputTypeConfig:
""" """
This parser does not implement additional settings yet This parser only uses the PDF output type configuration currently
""" """
return OutputTypeConfig() return OutputTypeConfig()