update feature ocr

This commit is contained in:
otxtan@gmail.com 2024-05-22 14:17:12 +07:00
parent fcd079b2fe
commit 572506720f
15 changed files with 149 additions and 84 deletions

5
.env
View File

@ -10,8 +10,7 @@ PAPERLESS_DBNAME=tc_edoc
PAPERLESS_DBUSER=tc_edoc PAPERLESS_DBUSER=tc_edoc
PAPERLESS_DBPASS=27M2MV58Re2Y PAPERLESS_DBPASS=27M2MV58Re2Y
PAPERLESS_DBSSLMODE=prefer PAPERLESS_DBSSLMODE=prefer
OCR_CUSTOM_USERNAME = test
OCR_CUSTOM_PASSWORD = test
URL_LOGIN = https://ocr-core-api.tcgroup.vn/token
URL_UPLOAD_FILE = https://ocr-core-api.tcgroup.vn/api/v1/file/upload URL_UPLOAD_FILE = https://ocr-core-api.tcgroup.vn/api/v1/file/upload
URL_OCR_BY_FILEID = https://ocr-core-api.tcgroup.vn/api/v1/ocr/general URL_OCR_BY_FILEID = https://ocr-core-api.tcgroup.vn/api/v1/ocr/general

View File

@ -425,13 +425,6 @@
<context context-type="linenumber">22</context> <context context-type="linenumber">22</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="7991430199894172363" datatype="html">
<source>Read the documentation about this setting</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/config/config.component.html</context>
<context context-type="linenumber">25</context>
</context-group>
</trans-unit>
<trans-unit id="2180291763949669799" datatype="html"> <trans-unit id="2180291763949669799" datatype="html">
<source>Enable</source> <source>Enable</source>
<context-group purpose="location"> <context-group purpose="location">
@ -7217,18 +7210,25 @@
<context context-type="linenumber">164</context> <context context-type="linenumber">164</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="4301951240854951353" datatype="html">
<source>OCR Key</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/data/paperless-config.ts</context>
<context context-type="linenumber">171</context>
</context-group>
</trans-unit>
<trans-unit id="7106327322456204362" datatype="html"> <trans-unit id="7106327322456204362" datatype="html">
<source>Application Logo</source> <source>Application Logo</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/data/paperless-config.ts</context> <context context-type="sourcefile">src/app/data/paperless-config.ts</context>
<context context-type="linenumber">171</context> <context context-type="linenumber">178</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="2684743776608068095" datatype="html"> <trans-unit id="2684743776608068095" datatype="html">
<source>Application Title</source> <source>Application Title</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/data/paperless-config.ts</context> <context context-type="sourcefile">src/app/data/paperless-config.ts</context>
<context context-type="linenumber">178</context> <context context-type="linenumber">185</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="5948496158474272829" datatype="html"> <trans-unit id="5948496158474272829" datatype="html">

View File

@ -22,9 +22,9 @@
<div class="card-title"> <div class="card-title">
<h6> <h6>
{{option.title}} {{option.title}}
<a class="btn btn-sm btn-link" title="Read the documentation about this setting" i18n-title [href]="getDocsUrl(option.config_key)" target="_blank" referrerpolicy="no-referrer"> <!-- <a class="btn btn-sm btn-link" title="Read the documentation about this setting" i18n-title [href]="getDocsUrl(option.config_key)" target="_blank" referrerpolicy="no-referrer">
<i-bs name="info-circle"></i-bs> <i-bs name="info-circle"></i-bs>
</a> </a> -->
</h6> </h6>
</div> </div>
<div class="mb-n3"> <div class="mb-n3">

View File

@ -166,6 +166,13 @@ export const PaperlessConfigOptions: ConfigOption[] = [
config_key: 'PAPERLESS_OCR_USER_ARGS', config_key: 'PAPERLESS_OCR_USER_ARGS',
category: ConfigCategory.OCR, category: ConfigCategory.OCR,
}, },
{
key: 'ocr_key',
title: $localize`OCR Key`,
type: ConfigOptionType.String,
config_key: 'PAPERLESS_APP_TITLE',
category: ConfigCategory.OCR,
},
{ {
key: 'app_logo', key: 'app_logo',
title: $localize`Application Logo`, title: $localize`Application Logo`,
@ -196,6 +203,7 @@ export interface PaperlessConfig extends ObjectWithId {
max_image_pixels: number max_image_pixels: number
color_conversion_strategy: ColorConvertConfig color_conversion_strategy: ColorConvertConfig
user_args: object user_args: object
ocr_key: string
app_logo: string app_logo: string
app_title: string app_title: string
} }

View File

@ -497,7 +497,7 @@
<context context-type="sourcefile">src/app/components/admin/config/config.component.html</context> <context context-type="sourcefile">src/app/components/admin/config/config.component.html</context>
<context context-type="linenumber">34</context> <context context-type="linenumber">34</context>
</context-group> </context-group>
<target state="needs-translation">Enable</target> <target state="needs-translation">Cho phép</target>
</trans-unit> </trans-unit>
<trans-unit id="3823219296477075982" datatype="html"> <trans-unit id="3823219296477075982" datatype="html">
<source>Discard</source> <source>Discard</source>
@ -4587,7 +4587,7 @@
<context context-type="sourcefile">src/app/components/common/input/switch/switch.component.html</context> <context context-type="sourcefile">src/app/components/common/input/switch/switch.component.html</context>
<context context-type="linenumber">39</context> <context context-type="linenumber">39</context>
</context-group> </context-group>
<target state="needs-translation">Note: value has not yet been set and will not apply until explicitly changed</target> <target state="needs-translation">Lưu ý: giá trị chưa được đặt và sẽ không áp dụng cho đến khi thay đổi rõ ràng</target>
</trans-unit> </trans-unit>
<trans-unit id="6560126119609945418" datatype="html"> <trans-unit id="6560126119609945418" datatype="html">
<source>Add tag</source> <source>Add tag</source>
@ -5970,7 +5970,7 @@
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">724</context> <context context-type="linenumber">724</context>
</context-group> </context-group>
<target state="needs-translation">Do you really want to delete document "<x id="PH" equiv-text="this.document.title"/>"?</target> <target state="needs-translation">Bạn có thực sự muốn xóa tài liệu "<x id="PH" equiv-text="this.document.title"/>"?</target>
</trans-unit> </trans-unit>
<trans-unit id="6691075929777935948" datatype="html"> <trans-unit id="6691075929777935948" datatype="html">
<source>The files for this document will be deleted permanently. This operation cannot be undone.</source> <source>The files for this document will be deleted permanently. This operation cannot be undone.</source>
@ -5978,7 +5978,7 @@
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">725</context> <context context-type="linenumber">725</context>
</context-group> </context-group>
<target state="needs-translation">The files for this document will be deleted permanently. This operation cannot be undone.</target> <target state="needs-translation">Các tập tin cho tài liệu này sẽ bị xóa vĩnh viễn. Không thể hoàn tác thao tác này</target>
</trans-unit> </trans-unit>
<trans-unit id="719892092227206532" datatype="html"> <trans-unit id="719892092227206532" datatype="html">
<source>Delete document</source> <source>Delete document</source>
@ -5986,7 +5986,7 @@
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">727</context> <context context-type="linenumber">727</context>
</context-group> </context-group>
<target state="needs-translation">Delete document</target> <target state="needs-translation">Xóa tài liệu</target>
</trans-unit> </trans-unit>
<trans-unit id="7295637485862454066" datatype="html"> <trans-unit id="7295637485862454066" datatype="html">
<source>Error deleting document</source> <source>Error deleting document</source>
@ -6724,7 +6724,7 @@
<context context-type="sourcefile">src/app/components/document-list/document-list.component.html</context> <context context-type="sourcefile">src/app/components/document-list/document-list.component.html</context>
<context context-type="linenumber">146</context> <context context-type="linenumber">146</context>
</context-group> </context-group>
<target state="needs-translation">Sort by correspondent</target> <target state="needs-translation">Sắp xếp theo người biên tập</target>
</trans-unit> </trans-unit>
<trans-unit id="2066713941761361709" datatype="html"> <trans-unit id="2066713941761361709" datatype="html">
<source>Sort by title</source> <source>Sort by title</source>
@ -7132,7 +7132,7 @@
<context context-type="sourcefile">src/app/components/manage/correspondent-list/correspondent-list.component.ts</context> <context context-type="sourcefile">src/app/components/manage/correspondent-list/correspondent-list.component.ts</context>
<context context-type="linenumber">67</context> <context context-type="linenumber">67</context>
</context-group> </context-group>
<target state="needs-translation">Do you really want to delete the correspondent "<x id="PH" equiv-text="object.name"/>"?</target> <target state="needs-translation">Bạn có thực sự muốn xóa người biên tập "<x id="PH" equiv-text="object.name"/>"?</target>
</trans-unit> </trans-unit>
<trans-unit id="8384138406252790442" datatype="html"> <trans-unit id="8384138406252790442" datatype="html">
<source>Customize the data fields that can be attached to documents.</source> <source>Customize the data fields that can be attached to documents.</source>
@ -7564,7 +7564,7 @@
<context context-type="sourcefile">src/app/components/manage/management-list/management-list.component.ts</context> <context context-type="sourcefile">src/app/components/manage/management-list/management-list.component.ts</context>
<context context-type="linenumber">180</context> <context context-type="linenumber">180</context>
</context-group> </context-group>
<target state="needs-translation">Successfully updated <x id="PH" equiv-text="this.typeName"/>.</target> <target state="needs-translation">Đã cập nhật thành công<x id="PH" equiv-text="this.typeName"/>.</target>
</trans-unit> </trans-unit>
<trans-unit id="6442673774206210733" datatype="html"> <trans-unit id="6442673774206210733" datatype="html">
<source>Error occurred while saving <x id="PH" equiv-text="this.typeName"/>.</source> <source>Error occurred while saving <x id="PH" equiv-text="this.typeName"/>.</source>
@ -7668,7 +7668,7 @@
<context context-type="sourcefile">src/app/components/manage/tag-list/tag-list.component.ts</context> <context context-type="sourcefile">src/app/components/manage/tag-list/tag-list.component.ts</context>
<context context-type="linenumber">53</context> <context context-type="linenumber">53</context>
</context-group> </context-group>
<target state="needs-translation">Do you really want to delete the tag "<x id="PH" equiv-text="object.name"/>"?</target> <target state="needs-translation">Bạn có thực sự muốn xóa thẻ "<x id="PH" equiv-text="object.name"/>"?</target>
</trans-unit> </trans-unit>
<trans-unit id="1229748338333965418" datatype="html"> <trans-unit id="1229748338333965418" datatype="html">
<source>Use workflows to customize the behavior of TC GROUP when events &apos;trigger&apos; a workflow.</source> <source>Use workflows to customize the behavior of TC GROUP when events &apos;trigger&apos; a workflow.</source>
@ -7932,7 +7932,7 @@
<context context-type="sourcefile">src/app/data/paperless-config.ts</context> <context context-type="sourcefile">src/app/data/paperless-config.ts</context>
<context context-type="linenumber">50</context> <context context-type="linenumber">50</context>
</context-group> </context-group>
<target state="needs-translation">General Settings</target> <target state="needs-translation">Cài đặt chung</target>
</trans-unit> </trans-unit>
<trans-unit id="2762851116637676072" datatype="html"> <trans-unit id="2762851116637676072" datatype="html">
<source>OCR Settings</source> <source>OCR Settings</source>

View File

@ -36,7 +36,7 @@ from documents.models import Tag
from documents.models import Workflow from documents.models import Workflow
from documents.models import WorkflowAction from documents.models import WorkflowAction
from documents.models import WorkflowTrigger from documents.models import WorkflowTrigger
from documents.parsers import DocumentParser from documents.parsers import DocumentParser, custom_get_parser_class_for_mime_type
from documents.parsers import ParseError from documents.parsers import ParseError
from documents.parsers import get_parser_class_for_mime_type from documents.parsers import get_parser_class_for_mime_type
from documents.parsers import parse_date from documents.parsers import parse_date
@ -557,7 +557,7 @@ class Consumer(LoggingMixin):
self.log.debug(f"Detected mime type: {mime_type}") self.log.debug(f"Detected mime type: {mime_type}")
# Based on the mime type, get the parser for that type # Based on the mime type, get the parser for that type
parser_class: Optional[type[DocumentParser]] = get_parser_class_for_mime_type( parser_class: Optional[type[DocumentParser]] = custom_get_parser_class_for_mime_type(
mime_type, mime_type,
) )
if not parser_class: if not parser_class:

View File

@ -1,30 +0,0 @@
# Generated by Django 4.2.11 on 2024-05-15 04:18
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('documents', '1046_workflowaction_remove_all_correspondents_and_more'),
]
operations = [
migrations.CreateModel(
name='Warehouse',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=256, unique=True, verbose_name='name')),
('type', models.CharField(blank=True, choices=[(1, 'Warehouse'), (2, 'Shelf'), (3, 'Boxcase')], default=1, max_length=20, null=True)),
('owner', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL, verbose_name='owner')),
('parent_warehouse', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='parent_warehouses', to='documents.warehouse')),
],
options={
'verbose_name': 'warehouse',
'verbose_name_plural': 'warehouses',
},
),
]

View File

@ -14,11 +14,13 @@ from typing import Optional
from django.conf import settings from django.conf import settings
from django.utils import timezone from django.utils import timezone
import requests
from documents.loggers import LoggingMixin from documents.loggers import LoggingMixin
from documents.signals import document_consumer_declaration from documents.signals import document_consumer_declaration
from documents.utils import copy_file_with_basic_stats from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess from documents.utils import run_subprocess
from paperless.models import ApplicationConfiguration
# This regular expression will try to find dates in the document at # This regular expression will try to find dates in the document at
# hand and will match the following formats: # hand and will match the following formats:
@ -129,6 +131,38 @@ def get_parser_class_for_mime_type(mime_type: str) -> Optional[type["DocumentPar
# Return the parser with the highest weight. # Return the parser with the highest weight.
return best_parser["parser"] return best_parser["parser"]
def custom_get_parser_class_for_mime_type(mime_type: str) -> Optional[type["DocumentParser"]]:
"""
Returns the best parser (by weight) for the given mimetype or
None if no parser exists
"""
options = []
for response in document_consumer_declaration.send(None):
parser_declaration = response[1]
supported_mime_types = parser_declaration["mime_types"]
if mime_type in supported_mime_types:
options.append(parser_declaration)
if not options:
return None
k = ApplicationConfiguration.objects.filter().first()
best_parser = sorted(options, key=lambda _: _["weight"], reverse=True)[1]
if k.ocr_key!='':
headers = {
'Authorization': f'Bearer {k.ocr_key}'
}
url_ocr_pdf_by_fileid = settings.TCGROUP_OCR_CUSTOM["URL"]["URL_OCR_BY_FILEID"]
response_ocr = requests.post(url_ocr_pdf_by_fileid, headers=headers)
logger.debug(f'status code: {response_ocr.status_code}')
if response_ocr.status_code != 401:
best_parser = sorted(options, key=lambda _: _["weight"], reverse=True)[0]
logger.debug('Successful key authentication ...')
logger.debug('Fail key authentication ...', best_parser["parser"])
# Return the parser with the highest weight.
return best_parser["parser"]
def run_convert( def run_convert(
input_file, input_file,

View File

@ -0,0 +1,18 @@
# Generated by Django 4.2.11 on 2024-05-22 02:52
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('paperless', '0003_alter_applicationconfiguration_max_image_pixels'),
]
operations = [
migrations.AddField(
model_name='applicationconfiguration',
name='ocr_key',
field=models.CharField(blank=True, max_length=48, null=True, verbose_name='Sets key for advanced version'),
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 4.2.11 on 2024-05-22 07:01
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('paperless', '0004_applicationconfiguration_ocr_key'),
]
operations = [
migrations.AlterField(
model_name='applicationconfiguration',
name='ocr_key',
field=models.CharField(blank=True, max_length=100, null=True, verbose_name='Sets key for advanced version'),
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 4.2.11 on 2024-05-22 07:03
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('paperless', '0005_alter_applicationconfiguration_ocr_key'),
]
operations = [
migrations.AlterField(
model_name='applicationconfiguration',
name='ocr_key',
field=models.CharField(blank=True, max_length=200, null=True, verbose_name='Sets key for advanced version'),
),
]

View File

@ -184,6 +184,13 @@ class ApplicationConfiguration(AbstractSingletonModel):
upload_to="logo/", upload_to="logo/",
) )
ocr_key = models.CharField(
verbose_name=_("Sets key for advanced version"),
null=True,
blank=True,
max_length=200,
)
class Meta: class Meta:
verbose_name = _("paperless application settings") verbose_name = _("paperless application settings")

View File

@ -294,7 +294,7 @@ INSTALLED_APPS = [
"django_extensions", "django_extensions",
"paperless", "paperless",
"documents.apps.DocumentsConfig", "documents.apps.DocumentsConfig",
# "paperless_tesseract.apps.PaperlessTesseractConfig", "paperless_tesseract.apps.PaperlessTesseractConfig",
"paperless_ocr_custom.apps.PaperlessTesseractConfig", "paperless_ocr_custom.apps.PaperlessTesseractConfig",
"paperless_text.apps.PaperlessTextConfig", "paperless_text.apps.PaperlessTextConfig",
"paperless_mail.apps.PaperlessMailConfig", "paperless_mail.apps.PaperlessMailConfig",
@ -419,12 +419,7 @@ CHANNEL_LAYERS = {
# PAPERLESS_OCR_CUSTOM # PAPERLESS_OCR_CUSTOM
TCGROUP_OCR_CUSTOM = { TCGROUP_OCR_CUSTOM = {
"ACCOUNT": {
"OCR_CUSTOM_USERNAME": os.getenv("OCR_CUSTOM_USERNAME", "test"),
"OCR_CUSTOM_PASSWORD": os.getenv("OCR_CUSTOM_PASSWORD", "test"),
},
"URL": { "URL": {
"URL_LOGIN": os.getenv("URL_LOGIN","https://ocr-core-api.tcgroup.vn/token"),
"URL_UPLOAD_FILE": os.getenv("URL_UPLOAD_FILE","https://ocr-core-api.tcgroup.vn/api/v1/file/upload"), "URL_UPLOAD_FILE": os.getenv("URL_UPLOAD_FILE","https://ocr-core-api.tcgroup.vn/api/v1/file/upload"),
"URL_OCR_BY_FILEID": os.getenv("URL_OCR_BY_FILEID","https://ocr-core-api.tcgroup.vn/api/v1/ocr/general"), "URL_OCR_BY_FILEID": os.getenv("URL_OCR_BY_FILEID","https://ocr-core-api.tcgroup.vn/api/v1/ocr/general"),
} }

View File

@ -9,20 +9,16 @@ from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from typing import Optional from typing import Optional
import PyPDF2
from django.conf import settings from django.conf import settings
import requests import requests
from PyPDF2 import PdfFileWriter, PdfFileReader, PdfReader, PdfWriter from PyPDF2 import PdfReader
from reportlab.pdfgen import canvas from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter from reportlab.lib.pagesizes import letter
from PIL import Image,ImageDraw,ImageFont from PIL import Image
from reportlab.pdfgen.canvas import Canvas
from reportlab.pdfbase.ttfonts import TTFont from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase import pdfmetrics
from pdf2image import convert_from_path from pdf2image import convert_from_path
from reportlab.lib.utils import ImageReader from reportlab.lib.utils import ImageReader
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import Paragraph
from documents.parsers import DocumentParser from documents.parsers import DocumentParser
from documents.parsers import ParseError from documents.parsers import ParseError
@ -30,7 +26,7 @@ from documents.parsers import make_thumbnail_from_pdf
from documents.utils import maybe_override_pixel_limit from documents.utils import maybe_override_pixel_limit
from documents.utils import run_subprocess from documents.utils import run_subprocess
from paperless.config import OcrConfig from paperless.config import OcrConfig
from paperless.models import ArchiveFileChoices from paperless.models import ApplicationConfiguration, ArchiveFileChoices
from paperless.models import CleanChoices from paperless.models import CleanChoices
from paperless.models import ModeChoices from paperless.models import ModeChoices
@ -155,21 +151,23 @@ class RasterisedDocumentParser(DocumentParser):
# get ocr file img/pdf # get ocr file img/pdf
def ocr_file(self,path_file): def ocr_file(self,path_file):
# get text from api # get text from api
ocr_custom_username = settings.TCGROUP_OCR_CUSTOM["ACCOUNT"]["OCR_CUSTOM_USERNAME"] # ocr_custom_username = settings.TCGROUP_OCR_CUSTOM["ACCOUNT"]["OCR_CUSTOM_USERNAME"]
ocr_custom_password = settings.TCGROUP_OCR_CUSTOM["ACCOUNT"]["OCR_CUSTOM_PASSWORD"] # ocr_custom_password = settings.TCGROUP_OCR_CUSTOM["ACCOUNT"]["OCR_CUSTOM_PASSWORD"]
url_login = settings.TCGROUP_OCR_CUSTOM["URL"]["URL_LOGIN"] # url_login = settings.TCGROUP_OCR_CUSTOM["URL"]["URL_LOGIN"]
data = { # data = {
'username': ocr_custom_username, # 'username': ocr_custom_username,
'password': ocr_custom_password # 'password': ocr_custom_password
} # }
response_login = requests.post(url_login, data=data) # response_login = requests.post(url_login, data=data)
access_token = '' # access_token = ''
if response_login.status_code == 200: # if response_login.status_code == 200:
response_data = response_login.json() # response_data = response_login.json()
access_token = response_data.get('access_token','') # access_token = response_data.get('access_token','')
else: # else:
logging.error('login: ', response_login.status_code) # logging.error('login: ', response_login.status_code)
k = ApplicationConfiguration.objects.filter().first()
access_token = k.ocr_key
# upload file # upload file
get_file_id = '' get_file_id = ''
url_upload_file = settings.TCGROUP_OCR_CUSTOM["URL"]["URL_UPLOAD_FILE"] url_upload_file = settings.TCGROUP_OCR_CUSTOM["URL"]["URL_UPLOAD_FILE"]

View File

@ -7,7 +7,7 @@ def get_parser(*args, **kwargs):
def tesseract_consumer_declaration(sender, **kwargs): def tesseract_consumer_declaration(sender, **kwargs):
return { return {
"parser": get_parser, "parser": get_parser,
"weight": 0, "weight": 1,
"mime_types": { "mime_types": {
"application/pdf": ".pdf", "application/pdf": ".pdf",
"image/jpeg": ".jpg", "image/jpeg": ".jpg",