fix: non-english charset filenames fed to gotenberg

This commit is contained in:
IKS 2023-12-11 21:00:52 +01:00
parent 7e12bd1bef
commit 8b023b19c6

View File

@ -1,4 +1,5 @@
from pathlib import Path from pathlib import Path
import os, shutil
import httpx import httpx
from django.conf import settings from django.conf import settings
@ -12,6 +13,15 @@ from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf from documents.parsers import make_thumbnail_from_pdf
def dummy_filename(func):
def inner(cls, document_path: Path, *args, **kwargs):
str_suffixes = ''.join(document_path.suffixes)
new_document_path = os.path.join(str(document_path.parent), f'tempname{str_suffixes}')
shutil.copyfile(str(document_path), new_document_path)
return func(cls, Path(new_document_path), *args, **kwargs)
return inner
class TikaDocumentParser(DocumentParser): class TikaDocumentParser(DocumentParser):
""" """
This parser sends documents to a local tika server This parser sends documents to a local tika server
@ -48,6 +58,7 @@ class TikaDocumentParser(DocumentParser):
) )
return [] return []
@dummy_filename
def parse(self, document_path: Path, mime_type: str, file_name=None): def parse(self, document_path: Path, mime_type: str, file_name=None):
self.log.info(f"Sending {document_path} to Tika server") self.log.info(f"Sending {document_path} to Tika server")