Merge pull request #10 from tienthienhd/feature/ocr-custom
update feature ocr
This commit is contained in:
commit
771d9fb5f3
5
.env
5
.env
@ -10,8 +10,7 @@ PAPERLESS_DBNAME=tc_edoc
|
|||||||
PAPERLESS_DBUSER=tc_edoc
|
PAPERLESS_DBUSER=tc_edoc
|
||||||
PAPERLESS_DBPASS=27M2MV58Re2Y
|
PAPERLESS_DBPASS=27M2MV58Re2Y
|
||||||
PAPERLESS_DBSSLMODE=prefer
|
PAPERLESS_DBSSLMODE=prefer
|
||||||
OCR_CUSTOM_USERNAME = test
|
|
||||||
OCR_CUSTOM_PASSWORD = test
|
|
||||||
URL_LOGIN = https://ocr-core-api.tcgroup.vn/token
|
|
||||||
URL_UPLOAD_FILE = https://ocr-core-api.tcgroup.vn/api/v1/file/upload
|
URL_UPLOAD_FILE = https://ocr-core-api.tcgroup.vn/api/v1/file/upload
|
||||||
URL_OCR_BY_FILEID = https://ocr-core-api.tcgroup.vn/api/v1/ocr/general
|
URL_OCR_BY_FILEID = https://ocr-core-api.tcgroup.vn/api/v1/ocr/general
|
||||||
|
@ -425,13 +425,6 @@
|
|||||||
<context context-type="linenumber">22</context>
|
<context context-type="linenumber">22</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="7991430199894172363" datatype="html">
|
|
||||||
<source>Read the documentation about this setting</source>
|
|
||||||
<context-group purpose="location">
|
|
||||||
<context context-type="sourcefile">src/app/components/admin/config/config.component.html</context>
|
|
||||||
<context context-type="linenumber">25</context>
|
|
||||||
</context-group>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="2180291763949669799" datatype="html">
|
<trans-unit id="2180291763949669799" datatype="html">
|
||||||
<source>Enable</source>
|
<source>Enable</source>
|
||||||
<context-group purpose="location">
|
<context-group purpose="location">
|
||||||
@ -7217,18 +7210,25 @@
|
|||||||
<context context-type="linenumber">164</context>
|
<context context-type="linenumber">164</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
|
<trans-unit id="4301951240854951353" datatype="html">
|
||||||
|
<source>OCR Key</source>
|
||||||
|
<context-group purpose="location">
|
||||||
|
<context context-type="sourcefile">src/app/data/paperless-config.ts</context>
|
||||||
|
<context context-type="linenumber">171</context>
|
||||||
|
</context-group>
|
||||||
|
</trans-unit>
|
||||||
<trans-unit id="7106327322456204362" datatype="html">
|
<trans-unit id="7106327322456204362" datatype="html">
|
||||||
<source>Application Logo</source>
|
<source>Application Logo</source>
|
||||||
<context-group purpose="location">
|
<context-group purpose="location">
|
||||||
<context context-type="sourcefile">src/app/data/paperless-config.ts</context>
|
<context context-type="sourcefile">src/app/data/paperless-config.ts</context>
|
||||||
<context context-type="linenumber">171</context>
|
<context context-type="linenumber">178</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="2684743776608068095" datatype="html">
|
<trans-unit id="2684743776608068095" datatype="html">
|
||||||
<source>Application Title</source>
|
<source>Application Title</source>
|
||||||
<context-group purpose="location">
|
<context-group purpose="location">
|
||||||
<context context-type="sourcefile">src/app/data/paperless-config.ts</context>
|
<context context-type="sourcefile">src/app/data/paperless-config.ts</context>
|
||||||
<context context-type="linenumber">178</context>
|
<context context-type="linenumber">185</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="5948496158474272829" datatype="html">
|
<trans-unit id="5948496158474272829" datatype="html">
|
||||||
|
@ -22,9 +22,9 @@
|
|||||||
<div class="card-title">
|
<div class="card-title">
|
||||||
<h6>
|
<h6>
|
||||||
{{option.title}}
|
{{option.title}}
|
||||||
<a class="btn btn-sm btn-link" title="Read the documentation about this setting" i18n-title [href]="getDocsUrl(option.config_key)" target="_blank" referrerpolicy="no-referrer">
|
<!-- <a class="btn btn-sm btn-link" title="Read the documentation about this setting" i18n-title [href]="getDocsUrl(option.config_key)" target="_blank" referrerpolicy="no-referrer">
|
||||||
<i-bs name="info-circle"></i-bs>
|
<i-bs name="info-circle"></i-bs>
|
||||||
</a>
|
</a> -->
|
||||||
</h6>
|
</h6>
|
||||||
</div>
|
</div>
|
||||||
<div class="mb-n3">
|
<div class="mb-n3">
|
||||||
|
@ -166,6 +166,13 @@ export const PaperlessConfigOptions: ConfigOption[] = [
|
|||||||
config_key: 'PAPERLESS_OCR_USER_ARGS',
|
config_key: 'PAPERLESS_OCR_USER_ARGS',
|
||||||
category: ConfigCategory.OCR,
|
category: ConfigCategory.OCR,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
key: 'ocr_key',
|
||||||
|
title: $localize`OCR Key`,
|
||||||
|
type: ConfigOptionType.String,
|
||||||
|
config_key: 'PAPERLESS_APP_TITLE',
|
||||||
|
category: ConfigCategory.OCR,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
key: 'app_logo',
|
key: 'app_logo',
|
||||||
title: $localize`Application Logo`,
|
title: $localize`Application Logo`,
|
||||||
@ -196,6 +203,7 @@ export interface PaperlessConfig extends ObjectWithId {
|
|||||||
max_image_pixels: number
|
max_image_pixels: number
|
||||||
color_conversion_strategy: ColorConvertConfig
|
color_conversion_strategy: ColorConvertConfig
|
||||||
user_args: object
|
user_args: object
|
||||||
|
ocr_key: string
|
||||||
app_logo: string
|
app_logo: string
|
||||||
app_title: string
|
app_title: string
|
||||||
}
|
}
|
||||||
|
@ -497,7 +497,7 @@
|
|||||||
<context context-type="sourcefile">src/app/components/admin/config/config.component.html</context>
|
<context context-type="sourcefile">src/app/components/admin/config/config.component.html</context>
|
||||||
<context context-type="linenumber">34</context>
|
<context context-type="linenumber">34</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
<target state="needs-translation">Enable</target>
|
<target state="needs-translation">Cho phép</target>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="3823219296477075982" datatype="html">
|
<trans-unit id="3823219296477075982" datatype="html">
|
||||||
<source>Discard</source>
|
<source>Discard</source>
|
||||||
@ -4587,7 +4587,7 @@
|
|||||||
<context context-type="sourcefile">src/app/components/common/input/switch/switch.component.html</context>
|
<context context-type="sourcefile">src/app/components/common/input/switch/switch.component.html</context>
|
||||||
<context context-type="linenumber">39</context>
|
<context context-type="linenumber">39</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
<target state="needs-translation">Note: value has not yet been set and will not apply until explicitly changed</target>
|
<target state="needs-translation">Lưu ý: giá trị chưa được đặt và sẽ không áp dụng cho đến khi thay đổi rõ ràng</target>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="6560126119609945418" datatype="html">
|
<trans-unit id="6560126119609945418" datatype="html">
|
||||||
<source>Add tag</source>
|
<source>Add tag</source>
|
||||||
@ -5970,7 +5970,7 @@
|
|||||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||||
<context context-type="linenumber">724</context>
|
<context context-type="linenumber">724</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
<target state="needs-translation">Do you really want to delete document "<x id="PH" equiv-text="this.document.title"/>"?</target>
|
<target state="needs-translation">Bạn có thực sự muốn xóa tài liệu "<x id="PH" equiv-text="this.document.title"/>"?</target>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="6691075929777935948" datatype="html">
|
<trans-unit id="6691075929777935948" datatype="html">
|
||||||
<source>The files for this document will be deleted permanently. This operation cannot be undone.</source>
|
<source>The files for this document will be deleted permanently. This operation cannot be undone.</source>
|
||||||
@ -5978,7 +5978,7 @@
|
|||||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||||
<context context-type="linenumber">725</context>
|
<context context-type="linenumber">725</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
<target state="needs-translation">The files for this document will be deleted permanently. This operation cannot be undone.</target>
|
<target state="needs-translation">Các tập tin cho tài liệu này sẽ bị xóa vĩnh viễn. Không thể hoàn tác thao tác này</target>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="719892092227206532" datatype="html">
|
<trans-unit id="719892092227206532" datatype="html">
|
||||||
<source>Delete document</source>
|
<source>Delete document</source>
|
||||||
@ -5986,7 +5986,7 @@
|
|||||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||||
<context context-type="linenumber">727</context>
|
<context context-type="linenumber">727</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
<target state="needs-translation">Delete document</target>
|
<target state="needs-translation">Xóa tài liệu</target>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="7295637485862454066" datatype="html">
|
<trans-unit id="7295637485862454066" datatype="html">
|
||||||
<source>Error deleting document</source>
|
<source>Error deleting document</source>
|
||||||
@ -6724,7 +6724,7 @@
|
|||||||
<context context-type="sourcefile">src/app/components/document-list/document-list.component.html</context>
|
<context context-type="sourcefile">src/app/components/document-list/document-list.component.html</context>
|
||||||
<context context-type="linenumber">146</context>
|
<context context-type="linenumber">146</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
<target state="needs-translation">Sort by correspondent</target>
|
<target state="needs-translation">Sắp xếp theo người biên tập</target>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="2066713941761361709" datatype="html">
|
<trans-unit id="2066713941761361709" datatype="html">
|
||||||
<source>Sort by title</source>
|
<source>Sort by title</source>
|
||||||
@ -7132,7 +7132,7 @@
|
|||||||
<context context-type="sourcefile">src/app/components/manage/correspondent-list/correspondent-list.component.ts</context>
|
<context context-type="sourcefile">src/app/components/manage/correspondent-list/correspondent-list.component.ts</context>
|
||||||
<context context-type="linenumber">67</context>
|
<context context-type="linenumber">67</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
<target state="needs-translation">Do you really want to delete the correspondent "<x id="PH" equiv-text="object.name"/>"?</target>
|
<target state="needs-translation">Bạn có thực sự muốn xóa người biên tập "<x id="PH" equiv-text="object.name"/>"?</target>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="8384138406252790442" datatype="html">
|
<trans-unit id="8384138406252790442" datatype="html">
|
||||||
<source>Customize the data fields that can be attached to documents.</source>
|
<source>Customize the data fields that can be attached to documents.</source>
|
||||||
@ -7564,7 +7564,7 @@
|
|||||||
<context context-type="sourcefile">src/app/components/manage/management-list/management-list.component.ts</context>
|
<context context-type="sourcefile">src/app/components/manage/management-list/management-list.component.ts</context>
|
||||||
<context context-type="linenumber">180</context>
|
<context context-type="linenumber">180</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
<target state="needs-translation">Successfully updated <x id="PH" equiv-text="this.typeName"/>.</target>
|
<target state="needs-translation">Đã cập nhật thành công<x id="PH" equiv-text="this.typeName"/>.</target>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="6442673774206210733" datatype="html">
|
<trans-unit id="6442673774206210733" datatype="html">
|
||||||
<source>Error occurred while saving <x id="PH" equiv-text="this.typeName"/>.</source>
|
<source>Error occurred while saving <x id="PH" equiv-text="this.typeName"/>.</source>
|
||||||
@ -7668,7 +7668,7 @@
|
|||||||
<context context-type="sourcefile">src/app/components/manage/tag-list/tag-list.component.ts</context>
|
<context context-type="sourcefile">src/app/components/manage/tag-list/tag-list.component.ts</context>
|
||||||
<context context-type="linenumber">53</context>
|
<context context-type="linenumber">53</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
<target state="needs-translation">Do you really want to delete the tag "<x id="PH" equiv-text="object.name"/>"?</target>
|
<target state="needs-translation">Bạn có thực sự muốn xóa thẻ "<x id="PH" equiv-text="object.name"/>"?</target>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="1229748338333965418" datatype="html">
|
<trans-unit id="1229748338333965418" datatype="html">
|
||||||
<source>Use workflows to customize the behavior of TC GROUP when events 'trigger' a workflow.</source>
|
<source>Use workflows to customize the behavior of TC GROUP when events 'trigger' a workflow.</source>
|
||||||
@ -7932,7 +7932,7 @@
|
|||||||
<context context-type="sourcefile">src/app/data/paperless-config.ts</context>
|
<context context-type="sourcefile">src/app/data/paperless-config.ts</context>
|
||||||
<context context-type="linenumber">50</context>
|
<context context-type="linenumber">50</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
<target state="needs-translation">General Settings</target>
|
<target state="needs-translation">Cài đặt chung</target>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="2762851116637676072" datatype="html">
|
<trans-unit id="2762851116637676072" datatype="html">
|
||||||
<source>OCR Settings</source>
|
<source>OCR Settings</source>
|
||||||
|
@ -36,7 +36,7 @@ from documents.models import Tag
|
|||||||
from documents.models import Workflow
|
from documents.models import Workflow
|
||||||
from documents.models import WorkflowAction
|
from documents.models import WorkflowAction
|
||||||
from documents.models import WorkflowTrigger
|
from documents.models import WorkflowTrigger
|
||||||
from documents.parsers import DocumentParser
|
from documents.parsers import DocumentParser, custom_get_parser_class_for_mime_type
|
||||||
from documents.parsers import ParseError
|
from documents.parsers import ParseError
|
||||||
from documents.parsers import get_parser_class_for_mime_type
|
from documents.parsers import get_parser_class_for_mime_type
|
||||||
from documents.parsers import parse_date
|
from documents.parsers import parse_date
|
||||||
@ -557,7 +557,7 @@ class Consumer(LoggingMixin):
|
|||||||
self.log.debug(f"Detected mime type: {mime_type}")
|
self.log.debug(f"Detected mime type: {mime_type}")
|
||||||
|
|
||||||
# Based on the mime type, get the parser for that type
|
# Based on the mime type, get the parser for that type
|
||||||
parser_class: Optional[type[DocumentParser]] = get_parser_class_for_mime_type(
|
parser_class: Optional[type[DocumentParser]] = custom_get_parser_class_for_mime_type(
|
||||||
mime_type,
|
mime_type,
|
||||||
)
|
)
|
||||||
if not parser_class:
|
if not parser_class:
|
||||||
|
@ -1,30 +0,0 @@
|
|||||||
# Generated by Django 4.2.11 on 2024-05-15 04:18
|
|
||||||
|
|
||||||
from django.conf import settings
|
|
||||||
from django.db import migrations, models
|
|
||||||
import django.db.models.deletion
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
|
||||||
('documents', '1046_workflowaction_remove_all_correspondents_and_more'),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.CreateModel(
|
|
||||||
name='Warehouse',
|
|
||||||
fields=[
|
|
||||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
|
||||||
('name', models.CharField(max_length=256, unique=True, verbose_name='name')),
|
|
||||||
('type', models.CharField(blank=True, choices=[(1, 'Warehouse'), (2, 'Shelf'), (3, 'Boxcase')], default=1, max_length=20, null=True)),
|
|
||||||
('owner', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL, verbose_name='owner')),
|
|
||||||
('parent_warehouse', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='parent_warehouses', to='documents.warehouse')),
|
|
||||||
],
|
|
||||||
options={
|
|
||||||
'verbose_name': 'warehouse',
|
|
||||||
'verbose_name_plural': 'warehouses',
|
|
||||||
},
|
|
||||||
),
|
|
||||||
]
|
|
@ -14,11 +14,13 @@ from typing import Optional
|
|||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
import requests
|
||||||
|
|
||||||
from documents.loggers import LoggingMixin
|
from documents.loggers import LoggingMixin
|
||||||
from documents.signals import document_consumer_declaration
|
from documents.signals import document_consumer_declaration
|
||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
from documents.utils import run_subprocess
|
from documents.utils import run_subprocess
|
||||||
|
from paperless.models import ApplicationConfiguration
|
||||||
|
|
||||||
# This regular expression will try to find dates in the document at
|
# This regular expression will try to find dates in the document at
|
||||||
# hand and will match the following formats:
|
# hand and will match the following formats:
|
||||||
@ -129,6 +131,38 @@ def get_parser_class_for_mime_type(mime_type: str) -> Optional[type["DocumentPar
|
|||||||
# Return the parser with the highest weight.
|
# Return the parser with the highest weight.
|
||||||
return best_parser["parser"]
|
return best_parser["parser"]
|
||||||
|
|
||||||
|
def custom_get_parser_class_for_mime_type(mime_type: str) -> Optional[type["DocumentParser"]]:
|
||||||
|
"""
|
||||||
|
Returns the best parser (by weight) for the given mimetype or
|
||||||
|
None if no parser exists
|
||||||
|
"""
|
||||||
|
|
||||||
|
options = []
|
||||||
|
|
||||||
|
for response in document_consumer_declaration.send(None):
|
||||||
|
parser_declaration = response[1]
|
||||||
|
supported_mime_types = parser_declaration["mime_types"]
|
||||||
|
|
||||||
|
if mime_type in supported_mime_types:
|
||||||
|
options.append(parser_declaration)
|
||||||
|
|
||||||
|
if not options:
|
||||||
|
return None
|
||||||
|
k = ApplicationConfiguration.objects.filter().first()
|
||||||
|
best_parser = sorted(options, key=lambda _: _["weight"], reverse=True)[1]
|
||||||
|
if k.ocr_key!='':
|
||||||
|
headers = {
|
||||||
|
'Authorization': f'Bearer {k.ocr_key}'
|
||||||
|
}
|
||||||
|
url_ocr_pdf_by_fileid = settings.TCGROUP_OCR_CUSTOM["URL"]["URL_OCR_BY_FILEID"]
|
||||||
|
response_ocr = requests.post(url_ocr_pdf_by_fileid, headers=headers)
|
||||||
|
logger.debug(f'status code: {response_ocr.status_code}')
|
||||||
|
if response_ocr.status_code != 401:
|
||||||
|
best_parser = sorted(options, key=lambda _: _["weight"], reverse=True)[0]
|
||||||
|
logger.debug('Successful key authentication ...')
|
||||||
|
logger.debug('Fail key authentication ...', best_parser["parser"])
|
||||||
|
# Return the parser with the highest weight.
|
||||||
|
return best_parser["parser"]
|
||||||
|
|
||||||
def run_convert(
|
def run_convert(
|
||||||
input_file,
|
input_file,
|
||||||
|
@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 4.2.11 on 2024-05-22 02:52
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('paperless', '0003_alter_applicationconfiguration_max_image_pixels'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='applicationconfiguration',
|
||||||
|
name='ocr_key',
|
||||||
|
field=models.CharField(blank=True, max_length=48, null=True, verbose_name='Sets key for advanced version'),
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 4.2.11 on 2024-05-22 07:01
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('paperless', '0004_applicationconfiguration_ocr_key'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='applicationconfiguration',
|
||||||
|
name='ocr_key',
|
||||||
|
field=models.CharField(blank=True, max_length=100, null=True, verbose_name='Sets key for advanced version'),
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 4.2.11 on 2024-05-22 07:03
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('paperless', '0005_alter_applicationconfiguration_ocr_key'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='applicationconfiguration',
|
||||||
|
name='ocr_key',
|
||||||
|
field=models.CharField(blank=True, max_length=200, null=True, verbose_name='Sets key for advanced version'),
|
||||||
|
),
|
||||||
|
]
|
@ -184,6 +184,13 @@ class ApplicationConfiguration(AbstractSingletonModel):
|
|||||||
upload_to="logo/",
|
upload_to="logo/",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
ocr_key = models.CharField(
|
||||||
|
verbose_name=_("Sets key for advanced version"),
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
max_length=200,
|
||||||
|
)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
verbose_name = _("paperless application settings")
|
verbose_name = _("paperless application settings")
|
||||||
|
|
||||||
|
@ -294,7 +294,7 @@ INSTALLED_APPS = [
|
|||||||
"django_extensions",
|
"django_extensions",
|
||||||
"paperless",
|
"paperless",
|
||||||
"documents.apps.DocumentsConfig",
|
"documents.apps.DocumentsConfig",
|
||||||
# "paperless_tesseract.apps.PaperlessTesseractConfig",
|
"paperless_tesseract.apps.PaperlessTesseractConfig",
|
||||||
"paperless_ocr_custom.apps.PaperlessTesseractConfig",
|
"paperless_ocr_custom.apps.PaperlessTesseractConfig",
|
||||||
"paperless_text.apps.PaperlessTextConfig",
|
"paperless_text.apps.PaperlessTextConfig",
|
||||||
"paperless_mail.apps.PaperlessMailConfig",
|
"paperless_mail.apps.PaperlessMailConfig",
|
||||||
@ -419,12 +419,7 @@ CHANNEL_LAYERS = {
|
|||||||
|
|
||||||
# PAPERLESS_OCR_CUSTOM
|
# PAPERLESS_OCR_CUSTOM
|
||||||
TCGROUP_OCR_CUSTOM = {
|
TCGROUP_OCR_CUSTOM = {
|
||||||
"ACCOUNT": {
|
|
||||||
"OCR_CUSTOM_USERNAME": os.getenv("OCR_CUSTOM_USERNAME", "test"),
|
|
||||||
"OCR_CUSTOM_PASSWORD": os.getenv("OCR_CUSTOM_PASSWORD", "test"),
|
|
||||||
},
|
|
||||||
"URL": {
|
"URL": {
|
||||||
"URL_LOGIN": os.getenv("URL_LOGIN","https://ocr-core-api.tcgroup.vn/token"),
|
|
||||||
"URL_UPLOAD_FILE": os.getenv("URL_UPLOAD_FILE","https://ocr-core-api.tcgroup.vn/api/v1/file/upload"),
|
"URL_UPLOAD_FILE": os.getenv("URL_UPLOAD_FILE","https://ocr-core-api.tcgroup.vn/api/v1/file/upload"),
|
||||||
"URL_OCR_BY_FILEID": os.getenv("URL_OCR_BY_FILEID","https://ocr-core-api.tcgroup.vn/api/v1/ocr/general"),
|
"URL_OCR_BY_FILEID": os.getenv("URL_OCR_BY_FILEID","https://ocr-core-api.tcgroup.vn/api/v1/ocr/general"),
|
||||||
}
|
}
|
||||||
|
@ -9,20 +9,16 @@ from pathlib import Path
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import PyPDF2
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
import requests
|
import requests
|
||||||
from PyPDF2 import PdfFileWriter, PdfFileReader, PdfReader, PdfWriter
|
from PyPDF2 import PdfReader
|
||||||
from reportlab.pdfgen import canvas
|
from reportlab.pdfgen import canvas
|
||||||
from reportlab.lib.pagesizes import letter
|
from reportlab.lib.pagesizes import letter
|
||||||
from PIL import Image,ImageDraw,ImageFont
|
from PIL import Image
|
||||||
from reportlab.pdfgen.canvas import Canvas
|
|
||||||
from reportlab.pdfbase.ttfonts import TTFont
|
from reportlab.pdfbase.ttfonts import TTFont
|
||||||
from reportlab.pdfbase import pdfmetrics
|
from reportlab.pdfbase import pdfmetrics
|
||||||
from pdf2image import convert_from_path
|
from pdf2image import convert_from_path
|
||||||
from reportlab.lib.utils import ImageReader
|
from reportlab.lib.utils import ImageReader
|
||||||
from reportlab.lib.styles import getSampleStyleSheet
|
|
||||||
from reportlab.platypus import Paragraph
|
|
||||||
|
|
||||||
from documents.parsers import DocumentParser
|
from documents.parsers import DocumentParser
|
||||||
from documents.parsers import ParseError
|
from documents.parsers import ParseError
|
||||||
@ -30,7 +26,7 @@ from documents.parsers import make_thumbnail_from_pdf
|
|||||||
from documents.utils import maybe_override_pixel_limit
|
from documents.utils import maybe_override_pixel_limit
|
||||||
from documents.utils import run_subprocess
|
from documents.utils import run_subprocess
|
||||||
from paperless.config import OcrConfig
|
from paperless.config import OcrConfig
|
||||||
from paperless.models import ArchiveFileChoices
|
from paperless.models import ApplicationConfiguration, ArchiveFileChoices
|
||||||
from paperless.models import CleanChoices
|
from paperless.models import CleanChoices
|
||||||
from paperless.models import ModeChoices
|
from paperless.models import ModeChoices
|
||||||
|
|
||||||
@ -155,21 +151,23 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
# get ocr file img/pdf
|
# get ocr file img/pdf
|
||||||
def ocr_file(self,path_file):
|
def ocr_file(self,path_file):
|
||||||
# get text from api
|
# get text from api
|
||||||
ocr_custom_username = settings.TCGROUP_OCR_CUSTOM["ACCOUNT"]["OCR_CUSTOM_USERNAME"]
|
# ocr_custom_username = settings.TCGROUP_OCR_CUSTOM["ACCOUNT"]["OCR_CUSTOM_USERNAME"]
|
||||||
ocr_custom_password = settings.TCGROUP_OCR_CUSTOM["ACCOUNT"]["OCR_CUSTOM_PASSWORD"]
|
# ocr_custom_password = settings.TCGROUP_OCR_CUSTOM["ACCOUNT"]["OCR_CUSTOM_PASSWORD"]
|
||||||
url_login = settings.TCGROUP_OCR_CUSTOM["URL"]["URL_LOGIN"]
|
# url_login = settings.TCGROUP_OCR_CUSTOM["URL"]["URL_LOGIN"]
|
||||||
data = {
|
# data = {
|
||||||
'username': ocr_custom_username,
|
# 'username': ocr_custom_username,
|
||||||
'password': ocr_custom_password
|
# 'password': ocr_custom_password
|
||||||
}
|
# }
|
||||||
response_login = requests.post(url_login, data=data)
|
# response_login = requests.post(url_login, data=data)
|
||||||
access_token = ''
|
# access_token = ''
|
||||||
if response_login.status_code == 200:
|
# if response_login.status_code == 200:
|
||||||
response_data = response_login.json()
|
# response_data = response_login.json()
|
||||||
access_token = response_data.get('access_token','')
|
# access_token = response_data.get('access_token','')
|
||||||
else:
|
# else:
|
||||||
logging.error('login: ', response_login.status_code)
|
# logging.error('login: ', response_login.status_code)
|
||||||
|
|
||||||
|
k = ApplicationConfiguration.objects.filter().first()
|
||||||
|
access_token = k.ocr_key
|
||||||
# upload file
|
# upload file
|
||||||
get_file_id = ''
|
get_file_id = ''
|
||||||
url_upload_file = settings.TCGROUP_OCR_CUSTOM["URL"]["URL_UPLOAD_FILE"]
|
url_upload_file = settings.TCGROUP_OCR_CUSTOM["URL"]["URL_UPLOAD_FILE"]
|
||||||
|
@ -7,7 +7,7 @@ def get_parser(*args, **kwargs):
|
|||||||
def tesseract_consumer_declaration(sender, **kwargs):
|
def tesseract_consumer_declaration(sender, **kwargs):
|
||||||
return {
|
return {
|
||||||
"parser": get_parser,
|
"parser": get_parser,
|
||||||
"weight": 0,
|
"weight": 1,
|
||||||
"mime_types": {
|
"mime_types": {
|
||||||
"application/pdf": ".pdf",
|
"application/pdf": ".pdf",
|
||||||
"image/jpeg": ".jpg",
|
"image/jpeg": ".jpg",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user