diff --git a/src-ui/src/app/components/admin/config/config.component.html b/src-ui/src/app/components/admin/config/config.component.html
index 03ca04b7b..71bb63e76 100644
--- a/src-ui/src/app/components/admin/config/config.component.html
+++ b/src-ui/src/app/components/admin/config/config.component.html
@@ -22,9 +22,9 @@
diff --git a/src-ui/src/app/data/paperless-config.ts b/src-ui/src/app/data/paperless-config.ts
index 3ae485ff2..f6c203983 100644
--- a/src-ui/src/app/data/paperless-config.ts
+++ b/src-ui/src/app/data/paperless-config.ts
@@ -166,6 +166,13 @@ export const PaperlessConfigOptions: ConfigOption[] = [
config_key: 'PAPERLESS_OCR_USER_ARGS',
category: ConfigCategory.OCR,
},
+ {
+ key: 'ocr_key',
+ title: $localize`OCR Key`,
+ type: ConfigOptionType.String,
+ config_key: 'PAPERLESS_APP_TITLE',
+ category: ConfigCategory.OCR,
+ },
{
key: 'app_logo',
title: $localize`Application Logo`,
@@ -196,6 +203,7 @@ export interface PaperlessConfig extends ObjectWithId {
max_image_pixels: number
color_conversion_strategy: ColorConvertConfig
user_args: object
+ ocr_key: string
app_logo: string
app_title: string
}
diff --git a/src-ui/src/locale/messages.vi_VN.xlf b/src-ui/src/locale/messages.vi_VN.xlf
index 327cf13da..7c3ed0e08 100644
--- a/src-ui/src/locale/messages.vi_VN.xlf
+++ b/src-ui/src/locale/messages.vi_VN.xlf
@@ -497,7 +497,7 @@
src/app/components/admin/config/config.component.html
34
- Enable
+ Cho phép
Discard
@@ -4587,7 +4587,7 @@
src/app/components/common/input/switch/switch.component.html
39
- Note: value has not yet been set and will not apply until explicitly changed
+ Lưu ý: giá trị chưa được đặt và sẽ không áp dụng cho đến khi thay đổi rõ ràng
Add tag
@@ -5970,7 +5970,7 @@
src/app/components/document-detail/document-detail.component.ts
724
- Do you really want to delete document ""?
+ Bạn có thực sự muốn xóa tài liệu ""?
The files for this document will be deleted permanently. This operation cannot be undone.
@@ -5978,7 +5978,7 @@
src/app/components/document-detail/document-detail.component.ts
725
- The files for this document will be deleted permanently. This operation cannot be undone.
+ Các tập tin cho tài liệu này sẽ bị xóa vĩnh viễn. Không thể hoàn tác thao tác này
Delete document
@@ -5986,7 +5986,7 @@
src/app/components/document-detail/document-detail.component.ts
727
- Delete document
+ Xóa tài liệu
Error deleting document
@@ -6724,7 +6724,7 @@
src/app/components/document-list/document-list.component.html
146
- Sort by correspondent
+ Sắp xếp theo người biên tập
Sort by title
@@ -7132,7 +7132,7 @@
src/app/components/manage/correspondent-list/correspondent-list.component.ts
67
- Do you really want to delete the correspondent ""?
+ Bạn có thực sự muốn xóa người biên tập ""?
Customize the data fields that can be attached to documents.
@@ -7564,7 +7564,7 @@
src/app/components/manage/management-list/management-list.component.ts
180
- Successfully updated .
+ Đã cập nhật thành công.
Error occurred while saving .
@@ -7668,7 +7668,7 @@
src/app/components/manage/tag-list/tag-list.component.ts
53
- Do you really want to delete the tag ""?
+ Bạn có thực sự muốn xóa thẻ ""?
Use workflows to customize the behavior of TC GROUP when events 'trigger' a workflow.
@@ -7932,7 +7932,7 @@
src/app/data/paperless-config.ts
50
- General Settings
+ Cài đặt chung
OCR Settings
diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index c735ed4c8..fa3bf2e75 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -36,7 +36,7 @@ from documents.models import Tag
from documents.models import Workflow
from documents.models import WorkflowAction
from documents.models import WorkflowTrigger
-from documents.parsers import DocumentParser
+from documents.parsers import DocumentParser, custom_get_parser_class_for_mime_type
from documents.parsers import ParseError
from documents.parsers import get_parser_class_for_mime_type
from documents.parsers import parse_date
@@ -557,7 +557,7 @@ class Consumer(LoggingMixin):
self.log.debug(f"Detected mime type: {mime_type}")
# Based on the mime type, get the parser for that type
- parser_class: Optional[type[DocumentParser]] = get_parser_class_for_mime_type(
+ parser_class: Optional[type[DocumentParser]] = custom_get_parser_class_for_mime_type(
mime_type,
)
if not parser_class:
diff --git a/src/documents/migrations/1047_warehouse.py b/src/documents/migrations/1047_warehouse.py
deleted file mode 100644
index 1ac590460..000000000
--- a/src/documents/migrations/1047_warehouse.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Generated by Django 4.2.11 on 2024-05-15 04:18
-
-from django.conf import settings
-from django.db import migrations, models
-import django.db.models.deletion
-
-
-class Migration(migrations.Migration):
-
- dependencies = [
- migrations.swappable_dependency(settings.AUTH_USER_MODEL),
- ('documents', '1046_workflowaction_remove_all_correspondents_and_more'),
- ]
-
- operations = [
- migrations.CreateModel(
- name='Warehouse',
- fields=[
- ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
- ('name', models.CharField(max_length=256, unique=True, verbose_name='name')),
- ('type', models.CharField(blank=True, choices=[(1, 'Warehouse'), (2, 'Shelf'), (3, 'Boxcase')], default=1, max_length=20, null=True)),
- ('owner', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL, verbose_name='owner')),
- ('parent_warehouse', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='parent_warehouses', to='documents.warehouse')),
- ],
- options={
- 'verbose_name': 'warehouse',
- 'verbose_name_plural': 'warehouses',
- },
- ),
- ]
diff --git a/src/documents/parsers.py b/src/documents/parsers.py
index d781ddb9f..3a43886d1 100644
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -14,11 +14,13 @@ from typing import Optional
from django.conf import settings
from django.utils import timezone
+import requests
from documents.loggers import LoggingMixin
from documents.signals import document_consumer_declaration
from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess
+from paperless.models import ApplicationConfiguration
# This regular expression will try to find dates in the document at
# hand and will match the following formats:
@@ -129,6 +131,38 @@ def get_parser_class_for_mime_type(mime_type: str) -> Optional[type["DocumentPar
# Return the parser with the highest weight.
return best_parser["parser"]
+def custom_get_parser_class_for_mime_type(mime_type: str) -> Optional[type["DocumentParser"]]:
+ """
+ Returns the best parser (by weight) for the given mimetype or
+ None if no parser exists
+ """
+
+ options = []
+
+ for response in document_consumer_declaration.send(None):
+ parser_declaration = response[1]
+ supported_mime_types = parser_declaration["mime_types"]
+
+ if mime_type in supported_mime_types:
+ options.append(parser_declaration)
+
+ if not options:
+ return None
+ k = ApplicationConfiguration.objects.filter().first()
+ best_parser = sorted(options, key=lambda _: _["weight"], reverse=True)[1]
+ if k.ocr_key!='':
+ headers = {
+ 'Authorization': f'Bearer {k.ocr_key}'
+ }
+ url_ocr_pdf_by_fileid = settings.TCGROUP_OCR_CUSTOM["URL"]["URL_OCR_BY_FILEID"]
+ response_ocr = requests.post(url_ocr_pdf_by_fileid, headers=headers)
+ logger.debug(f'status code: {response_ocr.status_code}')
+ if response_ocr.status_code != 401:
+ best_parser = sorted(options, key=lambda _: _["weight"], reverse=True)[0]
+ logger.debug('Successful key authentication ...')
+ logger.debug('Fail key authentication ...', best_parser["parser"])
+ # Return the parser with the highest weight.
+ return best_parser["parser"]
def run_convert(
input_file,
diff --git a/src/paperless/migrations/0004_applicationconfiguration_ocr_key.py b/src/paperless/migrations/0004_applicationconfiguration_ocr_key.py
new file mode 100644
index 000000000..a1a115f73
--- /dev/null
+++ b/src/paperless/migrations/0004_applicationconfiguration_ocr_key.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.11 on 2024-05-22 02:52
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('paperless', '0003_alter_applicationconfiguration_max_image_pixels'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='applicationconfiguration',
+ name='ocr_key',
+ field=models.CharField(blank=True, max_length=48, null=True, verbose_name='Sets key for advanced version'),
+ ),
+ ]
diff --git a/src/paperless/migrations/0005_alter_applicationconfiguration_ocr_key.py b/src/paperless/migrations/0005_alter_applicationconfiguration_ocr_key.py
new file mode 100644
index 000000000..1b33698ed
--- /dev/null
+++ b/src/paperless/migrations/0005_alter_applicationconfiguration_ocr_key.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.11 on 2024-05-22 07:01
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('paperless', '0004_applicationconfiguration_ocr_key'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='applicationconfiguration',
+ name='ocr_key',
+ field=models.CharField(blank=True, max_length=100, null=True, verbose_name='Sets key for advanced version'),
+ ),
+ ]
diff --git a/src/paperless/migrations/0006_alter_applicationconfiguration_ocr_key.py b/src/paperless/migrations/0006_alter_applicationconfiguration_ocr_key.py
new file mode 100644
index 000000000..a3225215b
--- /dev/null
+++ b/src/paperless/migrations/0006_alter_applicationconfiguration_ocr_key.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.11 on 2024-05-22 07:03
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('paperless', '0005_alter_applicationconfiguration_ocr_key'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='applicationconfiguration',
+ name='ocr_key',
+ field=models.CharField(blank=True, max_length=200, null=True, verbose_name='Sets key for advanced version'),
+ ),
+ ]
diff --git a/src/paperless/models.py b/src/paperless/models.py
index 1f6cfbced..03b74bbc9 100644
--- a/src/paperless/models.py
+++ b/src/paperless/models.py
@@ -184,6 +184,13 @@ class ApplicationConfiguration(AbstractSingletonModel):
upload_to="logo/",
)
+ ocr_key = models.CharField(
+ verbose_name=_("Sets key for advanced version"),
+ null=True,
+ blank=True,
+ max_length=200,
+ )
+
class Meta:
verbose_name = _("paperless application settings")
diff --git a/src/paperless/settings.py b/src/paperless/settings.py
index 33e882101..a63c8931b 100644
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -294,7 +294,7 @@ INSTALLED_APPS = [
"django_extensions",
"paperless",
"documents.apps.DocumentsConfig",
- # "paperless_tesseract.apps.PaperlessTesseractConfig",
+ "paperless_tesseract.apps.PaperlessTesseractConfig",
"paperless_ocr_custom.apps.PaperlessTesseractConfig",
"paperless_text.apps.PaperlessTextConfig",
"paperless_mail.apps.PaperlessMailConfig",
@@ -419,12 +419,7 @@ CHANNEL_LAYERS = {
# PAPERLESS_OCR_CUSTOM
TCGROUP_OCR_CUSTOM = {
- "ACCOUNT": {
- "OCR_CUSTOM_USERNAME": os.getenv("OCR_CUSTOM_USERNAME", "test"),
- "OCR_CUSTOM_PASSWORD": os.getenv("OCR_CUSTOM_PASSWORD", "test"),
- },
"URL": {
- "URL_LOGIN": os.getenv("URL_LOGIN","https://ocr-core-api.tcgroup.vn/token"),
"URL_UPLOAD_FILE": os.getenv("URL_UPLOAD_FILE","https://ocr-core-api.tcgroup.vn/api/v1/file/upload"),
"URL_OCR_BY_FILEID": os.getenv("URL_OCR_BY_FILEID","https://ocr-core-api.tcgroup.vn/api/v1/ocr/general"),
}
diff --git a/src/paperless_ocr_custom/parsers.py b/src/paperless_ocr_custom/parsers.py
index de1d7e2c3..3448151a9 100644
--- a/src/paperless_ocr_custom/parsers.py
+++ b/src/paperless_ocr_custom/parsers.py
@@ -9,20 +9,16 @@ from pathlib import Path
from typing import TYPE_CHECKING
from typing import Optional
-import PyPDF2
from django.conf import settings
import requests
-from PyPDF2 import PdfFileWriter, PdfFileReader, PdfReader, PdfWriter
+from PyPDF2 import PdfReader
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
-from PIL import Image,ImageDraw,ImageFont
-from reportlab.pdfgen.canvas import Canvas
+from PIL import Image
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics
from pdf2image import convert_from_path
from reportlab.lib.utils import ImageReader
-from reportlab.lib.styles import getSampleStyleSheet
-from reportlab.platypus import Paragraph
from documents.parsers import DocumentParser
from documents.parsers import ParseError
@@ -30,7 +26,7 @@ from documents.parsers import make_thumbnail_from_pdf
from documents.utils import maybe_override_pixel_limit
from documents.utils import run_subprocess
from paperless.config import OcrConfig
-from paperless.models import ArchiveFileChoices
+from paperless.models import ApplicationConfiguration, ArchiveFileChoices
from paperless.models import CleanChoices
from paperless.models import ModeChoices
@@ -155,21 +151,23 @@ class RasterisedDocumentParser(DocumentParser):
# get ocr file img/pdf
def ocr_file(self,path_file):
# get text from api
- ocr_custom_username = settings.TCGROUP_OCR_CUSTOM["ACCOUNT"]["OCR_CUSTOM_USERNAME"]
- ocr_custom_password = settings.TCGROUP_OCR_CUSTOM["ACCOUNT"]["OCR_CUSTOM_PASSWORD"]
- url_login = settings.TCGROUP_OCR_CUSTOM["URL"]["URL_LOGIN"]
- data = {
- 'username': ocr_custom_username,
- 'password': ocr_custom_password
- }
- response_login = requests.post(url_login, data=data)
- access_token = ''
- if response_login.status_code == 200:
- response_data = response_login.json()
- access_token = response_data.get('access_token','')
- else:
- logging.error('login: ', response_login.status_code)
+ # ocr_custom_username = settings.TCGROUP_OCR_CUSTOM["ACCOUNT"]["OCR_CUSTOM_USERNAME"]
+ # ocr_custom_password = settings.TCGROUP_OCR_CUSTOM["ACCOUNT"]["OCR_CUSTOM_PASSWORD"]
+ # url_login = settings.TCGROUP_OCR_CUSTOM["URL"]["URL_LOGIN"]
+ # data = {
+ # 'username': ocr_custom_username,
+ # 'password': ocr_custom_password
+ # }
+ # response_login = requests.post(url_login, data=data)
+ # access_token = ''
+ # if response_login.status_code == 200:
+ # response_data = response_login.json()
+ # access_token = response_data.get('access_token','')
+ # else:
+ # logging.error('login: ', response_login.status_code)
+ k = ApplicationConfiguration.objects.filter().first()
+ access_token = k.ocr_key
# upload file
get_file_id = ''
url_upload_file = settings.TCGROUP_OCR_CUSTOM["URL"]["URL_UPLOAD_FILE"]
diff --git a/src/paperless_ocr_custom/signals.py b/src/paperless_ocr_custom/signals.py
index d924e0439..d03450b5f 100644
--- a/src/paperless_ocr_custom/signals.py
+++ b/src/paperless_ocr_custom/signals.py
@@ -7,7 +7,7 @@ def get_parser(*args, **kwargs):
def tesseract_consumer_declaration(sender, **kwargs):
return {
"parser": get_parser,
- "weight": 0,
+ "weight": 1,
"mime_types": {
"application/pdf": ".pdf",
"image/jpeg": ".jpg",