initial translations support using deepl

This commit is contained in:
Yo'av Moshe 2024-04-11 23:05:38 +02:00
parent a47d36f5e5
commit 84d072b763
9 changed files with 74 additions and 2 deletions

View File

@ -199,6 +199,10 @@
<div>
<textarea class="form-control" id="content" rows="20" formControlName='content' [class.rtl]="isRTL"></textarea>
</div>
<h3>Translation</h3>
<div>
<textarea class="form-control" id="translation" rows="20" formControlName='translation' [class.rtl]="isRTL"></textarea>
</div>
</ng-template>
</li>

View File

@ -86,6 +86,7 @@ const doc: Document = {
storage_path: 31,
tags: [41, 42, 43],
content: 'text content',
translation: 'text content',
added: new Date('May 4, 2014 03:24:00'),
created: new Date('May 4, 2014 03:24:00'),
modified: new Date('May 4, 2014 03:24:00'),

View File

@ -138,6 +138,7 @@ export class DocumentDetailComponent
documentForm: FormGroup = new FormGroup({
title: new FormControl(''),
content: new FormControl(''),
translation: new FormControl(''),
created_date: new FormControl(),
correspondent: new FormControl(),
document_type: new FormControl(),
@ -404,6 +405,7 @@ export class DocumentDetailComponent
this.store = new BehaviorSubject({
title: doc.title,
content: doc.content,
translation: doc.translation,
created_date: doc.created_date,
correspondent: doc.correspondent,
document_type: doc.document_type,

View File

@ -32,6 +32,8 @@ export interface Document extends ObjectWithPermissions {
content?: string
translation?: string
tags$?: Observable<Tag[]>
tags?: number[]

View File

@ -104,7 +104,11 @@ class InboxFilter(Filter):
class TitleContentFilter(Filter):
def filter(self, qs, value):
if value:
return qs.filter(Q(title__icontains=value) | Q(content__icontains=value))
return qs.filter(
Q(title__icontains=value)
| Q(translation__icontains=value)
| Q(content__icontains=value),
)
else:
return qs

View File

@ -0,0 +1,23 @@
# Generated by Django 4.2.11 on 2024-04-11 18:24
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1046_workflowaction_remove_all_correspondents_and_more"),
]
operations = [
migrations.AddField(
model_name="document",
name="translation",
field=models.TextField(
blank=True,
help_text="The translated version of the content field. This field can also be used for searching.",
verbose_name="translation",
),
),
]

View File

@ -176,6 +176,15 @@ class Document(ModelWithOwner):
),
)
translation = models.TextField(
_("translation"),
blank=True,
help_text=_(
"The translated version of the content field. This field can "
"also be used for searching.",
),
)
mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
tags = models.ManyToManyField(

View File

@ -772,6 +772,7 @@ class DocumentSerializer(
"storage_path",
"title",
"content",
"translation",
"tags",
"created",
"created_date",

View File

@ -6,6 +6,7 @@ from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Optional
import requests
import tqdm
from celery import Task
from celery import shared_task
@ -49,6 +50,29 @@ if settings.AUDIT_LOG_ENABLED:
logger = logging.getLogger("paperless.tasks")
def translate_content(content):
headers = {
"Authorization": "DeepL-Auth-Key " + settings.DEEPL_TOKEN,
"Content-Type": "application/json",
}
json_data = {
"text": [
content,
],
"target_lang": settings.TRANSLATION_TARGET,
}
response = requests.post(
"https://api-free.deepl.com/v2/translate",
headers=headers,
json=json_data,
)
return response.json()["translations"][0]["text"]
@shared_task
def index_optimize():
ix = index.open_index()
@ -243,9 +267,11 @@ def update_document_archive_file(document_id):
archive_filename=True,
)
oldDocument = Document.objects.get(pk=document.pk)
content = parser.get_text()
Document.objects.filter(pk=document.pk).update(
archive_checksum=checksum,
content=parser.get_text(),
content=content,
translation=translate_content(content),
archive_filename=document.archive_filename,
)
newDocument = Document.objects.get(pk=document.pk)