mime type handling

This commit is contained in:
Jonas Winkler
2020-11-20 13:31:03 +01:00
parent cee010cc21
commit f976a0b4ba
19 changed files with 163 additions and 146 deletions

View File

@@ -1,5 +1,3 @@
import re
from .parsers import RasterisedDocumentParser
@@ -7,12 +5,9 @@ def tesseract_consumer_declaration(sender, **kwargs):
return {
"parser": RasterisedDocumentParser,
"weight": 0,
"test": tesseract_consumer_test
"mime_types": [
"application/pdf",
"image/jpeg",
"image/png"
]
}
MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")
def tesseract_consumer_test(doc):
return MATCHING_FILES.match(doc.lower())