37 lines
1.0 KiB
Python
37 lines
1.0 KiB
Python
import os
|
|
|
|
from django.conf import settings
|
|
from PIL import Image
|
|
from PIL import ImageDraw
|
|
from PIL import ImageFont
|
|
|
|
from documents.parsers import DocumentParser
|
|
|
|
|
|
class TextDocumentParser(DocumentParser):
|
|
"""
|
|
This parser directly parses a text document (.txt, .md, or .csv)
|
|
"""
|
|
|
|
logging_name = "paperless.parsing.text"
|
|
|
|
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
|
text = self.read_file_handle_unicode_errors(document_path)
|
|
|
|
img = Image.new("RGB", (500, 700), color="white")
|
|
draw = ImageDraw.Draw(img)
|
|
font = ImageFont.truetype(
|
|
font=settings.THUMBNAIL_FONT_NAME,
|
|
size=20,
|
|
layout_engine=ImageFont.Layout.BASIC,
|
|
)
|
|
draw.text((5, 5), text, font=font, fill="black")
|
|
|
|
out_path = os.path.join(self.tempdir, "thumb.webp")
|
|
img.save(out_path, format="WEBP")
|
|
|
|
return out_path
|
|
|
|
def parse(self, document_path, mime_type, file_name=None):
|
|
self.text = self.read_file_handle_unicode_errors(document_path)
|