fix: non-english charset filenames fed to gotenberg
This commit is contained in:
parent
7e12bd1bef
commit
8b023b19c6
@ -1,4 +1,5 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import os, shutil
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@ -12,6 +13,15 @@ from documents.parsers import ParseError
|
|||||||
from documents.parsers import make_thumbnail_from_pdf
|
from documents.parsers import make_thumbnail_from_pdf
|
||||||
|
|
||||||
|
|
||||||
|
def dummy_filename(func):
|
||||||
|
def inner(cls, document_path: Path, *args, **kwargs):
|
||||||
|
str_suffixes = ''.join(document_path.suffixes)
|
||||||
|
new_document_path = os.path.join(str(document_path.parent), f'tempname{str_suffixes}')
|
||||||
|
shutil.copyfile(str(document_path), new_document_path)
|
||||||
|
return func(cls, Path(new_document_path), *args, **kwargs)
|
||||||
|
return inner
|
||||||
|
|
||||||
|
|
||||||
class TikaDocumentParser(DocumentParser):
|
class TikaDocumentParser(DocumentParser):
|
||||||
"""
|
"""
|
||||||
This parser sends documents to a local tika server
|
This parser sends documents to a local tika server
|
||||||
@ -48,6 +58,7 @@ class TikaDocumentParser(DocumentParser):
|
|||||||
)
|
)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
@dummy_filename
|
||||||
def parse(self, document_path: Path, mime_type: str, file_name=None):
|
def parse(self, document_path: Path, mime_type: str, file_name=None):
|
||||||
self.log.info(f"Sending {document_path} to Tika server")
|
self.log.info(f"Sending {document_path} to Tika server")
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user