From 8b023b19c6fdb5fe6a902ac958582265462f075f Mon Sep 17 00:00:00 2001 From: IKS Date: Mon, 11 Dec 2023 21:00:52 +0100 Subject: [PATCH] fix: non-english charset filenames fed to gotenberg --- src/paperless_tika/parsers.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index c9056d90d..e7b656c27 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -1,4 +1,5 @@ from pathlib import Path +import os, shutil import httpx from django.conf import settings @@ -12,6 +13,15 @@ from documents.parsers import ParseError from documents.parsers import make_thumbnail_from_pdf +def dummy_filename(func): + def inner(cls, document_path: Path, *args, **kwargs): + str_suffixes = ''.join(document_path.suffixes) + new_document_path = os.path.join(str(document_path.parent), f'tempname{str_suffixes}') + shutil.copyfile(str(document_path), new_document_path) + return func(cls, Path(new_document_path), *args, **kwargs) + return inner + + class TikaDocumentParser(DocumentParser): """ This parser sends documents to a local tika server @@ -48,6 +58,7 @@ class TikaDocumentParser(DocumentParser): ) return [] + @dummy_filename def parse(self, document_path: Path, mime_type: str, file_name=None): self.log.info(f"Sending {document_path} to Tika server")