Couple more commenting and style fixes
This commit is contained in:
parent
8b3360c462
commit
0fcd6c826d
@ -33,7 +33,7 @@ class OutputTypeConfig:
|
|||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class OcrConfig(OutputTypeConfig):
|
class OcrConfig(OutputTypeConfig):
|
||||||
"""
|
"""
|
||||||
Specific settings for the Tesseract based parser. Options generall
|
Specific settings for the Tesseract based parser. Options generally
|
||||||
correspond almost directly to the OCRMyPDF options
|
correspond almost directly to the OCRMyPDF options
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -35,6 +35,9 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
logging_name = "paperless.parsing.tesseract"
|
logging_name = "paperless.parsing.tesseract"
|
||||||
|
|
||||||
def get_settings(self) -> OcrConfig:
|
def get_settings(self) -> OcrConfig:
|
||||||
|
"""
|
||||||
|
This parser uses the OCR configuration settings to parse documents
|
||||||
|
"""
|
||||||
return OcrConfig()
|
return OcrConfig()
|
||||||
|
|
||||||
def extract_metadata(self, document_path, mime_type):
|
def extract_metadata(self, document_path, mime_type):
|
||||||
|
@ -119,6 +119,6 @@ class TikaDocumentParser(DocumentParser):
|
|||||||
|
|
||||||
def get_settings(self) -> OutputTypeConfig:
|
def get_settings(self) -> OutputTypeConfig:
|
||||||
"""
|
"""
|
||||||
This parser does not implement additional settings yet
|
This parser only uses the PDF output type configuration currently
|
||||||
"""
|
"""
|
||||||
return OutputTypeConfig()
|
return OutputTypeConfig()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user