initial translations support using deepl

This commit is contained in:
Yo'av Moshe 2024-04-11 23:05:38 +02:00
parent a47d36f5e5
commit 84d072b763
9 changed files with 74 additions and 2 deletions

View File

@ -199,6 +199,10 @@
<div> <div>
<textarea class="form-control" id="content" rows="20" formControlName='content' [class.rtl]="isRTL"></textarea> <textarea class="form-control" id="content" rows="20" formControlName='content' [class.rtl]="isRTL"></textarea>
</div> </div>
<h3>Translation</h3>
<div>
<textarea class="form-control" id="translation" rows="20" formControlName='translation' [class.rtl]="isRTL"></textarea>
</div>
</ng-template> </ng-template>
</li> </li>

View File

@ -86,6 +86,7 @@ const doc: Document = {
storage_path: 31, storage_path: 31,
tags: [41, 42, 43], tags: [41, 42, 43],
content: 'text content', content: 'text content',
translation: 'text content',
added: new Date('May 4, 2014 03:24:00'), added: new Date('May 4, 2014 03:24:00'),
created: new Date('May 4, 2014 03:24:00'), created: new Date('May 4, 2014 03:24:00'),
modified: new Date('May 4, 2014 03:24:00'), modified: new Date('May 4, 2014 03:24:00'),

View File

@ -138,6 +138,7 @@ export class DocumentDetailComponent
documentForm: FormGroup = new FormGroup({ documentForm: FormGroup = new FormGroup({
title: new FormControl(''), title: new FormControl(''),
content: new FormControl(''), content: new FormControl(''),
translation: new FormControl(''),
created_date: new FormControl(), created_date: new FormControl(),
correspondent: new FormControl(), correspondent: new FormControl(),
document_type: new FormControl(), document_type: new FormControl(),
@ -404,6 +405,7 @@ export class DocumentDetailComponent
this.store = new BehaviorSubject({ this.store = new BehaviorSubject({
title: doc.title, title: doc.title,
content: doc.content, content: doc.content,
translation: doc.translation,
created_date: doc.created_date, created_date: doc.created_date,
correspondent: doc.correspondent, correspondent: doc.correspondent,
document_type: doc.document_type, document_type: doc.document_type,

View File

@ -32,6 +32,8 @@ export interface Document extends ObjectWithPermissions {
content?: string content?: string
translation?: string
tags$?: Observable<Tag[]> tags$?: Observable<Tag[]>
tags?: number[] tags?: number[]

View File

@ -104,7 +104,11 @@ class InboxFilter(Filter):
class TitleContentFilter(Filter): class TitleContentFilter(Filter):
def filter(self, qs, value): def filter(self, qs, value):
if value: if value:
return qs.filter(Q(title__icontains=value) | Q(content__icontains=value)) return qs.filter(
Q(title__icontains=value)
| Q(translation__icontains=value)
| Q(content__icontains=value),
)
else: else:
return qs return qs

View File

@ -0,0 +1,23 @@
# Generated by Django 4.2.11 on 2024-04-11 18:24
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1046_workflowaction_remove_all_correspondents_and_more"),
]
operations = [
migrations.AddField(
model_name="document",
name="translation",
field=models.TextField(
blank=True,
help_text="The translated version of the content field. This field can also be used for searching.",
verbose_name="translation",
),
),
]

View File

@ -176,6 +176,15 @@ class Document(ModelWithOwner):
), ),
) )
translation = models.TextField(
_("translation"),
blank=True,
help_text=_(
"The translated version of the content field. This field can "
"also be used for searching.",
),
)
mime_type = models.CharField(_("mime type"), max_length=256, editable=False) mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
tags = models.ManyToManyField( tags = models.ManyToManyField(

View File

@ -772,6 +772,7 @@ class DocumentSerializer(
"storage_path", "storage_path",
"title", "title",
"content", "content",
"translation",
"tags", "tags",
"created", "created",
"created_date", "created_date",

View File

@ -6,6 +6,7 @@ from pathlib import Path
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
from typing import Optional from typing import Optional
import requests
import tqdm import tqdm
from celery import Task from celery import Task
from celery import shared_task from celery import shared_task
@ -49,6 +50,29 @@ if settings.AUDIT_LOG_ENABLED:
logger = logging.getLogger("paperless.tasks") logger = logging.getLogger("paperless.tasks")
def translate_content(content):
headers = {
"Authorization": "DeepL-Auth-Key " + settings.DEEPL_TOKEN,
"Content-Type": "application/json",
}
json_data = {
"text": [
content,
],
"target_lang": settings.TRANSLATION_TARGET,
}
response = requests.post(
"https://api-free.deepl.com/v2/translate",
headers=headers,
json=json_data,
)
return response.json()["translations"][0]["text"]
@shared_task @shared_task
def index_optimize(): def index_optimize():
ix = index.open_index() ix = index.open_index()
@ -243,9 +267,11 @@ def update_document_archive_file(document_id):
archive_filename=True, archive_filename=True,
) )
oldDocument = Document.objects.get(pk=document.pk) oldDocument = Document.objects.get(pk=document.pk)
content = parser.get_text()
Document.objects.filter(pk=document.pk).update( Document.objects.filter(pk=document.pk).update(
archive_checksum=checksum, archive_checksum=checksum,
content=parser.get_text(), content=content,
translation=translate_content(content),
archive_filename=document.archive_filename, archive_filename=document.archive_filename,
) )
newDocument = Document.objects.get(pk=document.pk) newDocument = Document.objects.get(pk=document.pk)