allow multiple models and avoid translation if not needed

This commit is contained in:
Yo'av Moshe 2024-04-20 11:49:56 +02:00
parent 2996430b66
commit 0e608c5871
2 changed files with 26 additions and 10 deletions

View File

@ -94,3 +94,6 @@ types-Pygments = "*"
types-colorama = "*"
types-psycopg2 = "*"
types-setuptools = "*"
[translation]
bergamot = "0.4.5"

View File

@ -13,6 +13,7 @@ from django.conf import settings
from django.db import transaction
from django.db.models.signals import post_save
from filelock import FileLock
from langdetect import detect
from whoosh.writing import AsyncWriter
from documents import index
@ -52,10 +53,21 @@ logger = logging.getLogger("paperless.tasks")
def translate_content(content):
import bergamot
models = settings.TRANSLATION_MODELS.split(",")
original_language = detect(content)
# Avoid translating if we already have the target language
if original_language == settings.TRANSLATION_TARGET_LANGUAGE:
return ""
for model in models:
# Find the right model for the translation
# bergamot models usually end with "tiny" or "base" so we remove that
if original_language in model.replace("base", "").replace("tiny", ""):
service = bergamot.Service(bergamot.ServiceConfig())
model = service.modelFromConfigPath(
bergamot.REPOSITORY.modelConfigPath("browsermt", settings.TRANSLATION_MODEL),
bergamot.REPOSITORY.modelConfigPath("browsermt", model),
)
result = service.translate(
model,
@ -63,6 +75,7 @@ def translate_content(content):
bergamot.ResponseOptions(),
)
return next(r.target.text for r in result)
return ""
@shared_task