At least partially working for the tesseract parser
This commit is contained in:
@@ -420,7 +420,7 @@ class Consumer(LoggingMixin):
|
||||
|
||||
document_parser: DocumentParser = parser_class(
|
||||
self.logging_group,
|
||||
progress_callback,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
self.log.debug(f"Parser: {type(document_parser).__name__}")
|
||||
|
||||
@@ -125,8 +125,10 @@ def get_parser_class_for_mime_type(mime_type: str) -> Optional[type["DocumentPar
|
||||
if not options:
|
||||
return None
|
||||
|
||||
best_parser = sorted(options, key=lambda _: _["weight"], reverse=True)[0]
|
||||
|
||||
# Return the parser with the highest weight.
|
||||
return sorted(options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
|
||||
return best_parser["parser"]
|
||||
|
||||
|
||||
def run_convert(
|
||||
@@ -318,6 +320,7 @@ class DocumentParser(LoggingMixin):
|
||||
def __init__(self, logging_group, progress_callback=None):
|
||||
super().__init__()
|
||||
self.logging_group = logging_group
|
||||
self.parser_settings = self.get_settings()
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
|
||||
@@ -330,6 +333,10 @@ class DocumentParser(LoggingMixin):
|
||||
if self.progress_callback:
|
||||
self.progress_callback(current_progress, max_progress)
|
||||
|
||||
def get_settings(self):
|
||||
# return None
|
||||
raise NotImplementedError
|
||||
|
||||
def read_file_handle_unicode_errors(self, filepath: Path) -> str:
|
||||
"""
|
||||
Helper utility for reading from a file, and handling a problem with its
|
||||
|
||||
Reference in New Issue
Block a user