Upgrades to testing and various small fixes

This commit is contained in:
Trenton H 2024-10-02 09:28:55 -07:00
parent 5b1cb50793
commit 733cdc71ce
4 changed files with 98 additions and 50 deletions

View File

@ -112,8 +112,33 @@ def generate_unique_filename(doc, archive_filename=False):
return new_filename return new_filename
def convert_to_django_template_format(old_format: str) -> str:
"""
Converts old Python string format (with {}) to Django template style (with {{ }}),
while ignoring existing {{ ... }} placeholders.
:param old_format: The old style format string (e.g., "{title} by {author}")
:return: Converted string in Django Template style (e.g., "{{ title }} by {{ author }}")
"""
# Step 1: Match placeholders with single curly braces but not those with double braces
pattern = r"(?<!\{)\{(\w*)\}(?!\})" # Matches {var} but not {{var}}
# Step 2: Replace the placeholders with {{ var }} or {{ }}
def replace_with_django(match):
variable = match.group(1) # The variable inside the braces
return f"{{{{ {variable} }}}}" # Convert to {{ variable }}
# Apply the substitution
converted_format = re.sub(pattern, replace_with_django, old_format)
return converted_format
def create_dummy_document(): def create_dummy_document():
"""Create a dummy Document instance with all possible fields filled, including tags and custom fields.""" """
Create a dummy Document instance with all possible fields filled
"""
# Populate the document with representative values for every field # Populate the document with representative values for every field
dummy_doc = Document( dummy_doc = Document(
pk=1, pk=1,
@ -139,6 +164,10 @@ def create_dummy_document():
def get_creation_date_context(document: Document) -> dict[str, str]: def get_creation_date_context(document: Document) -> dict[str, str]:
"""
Given a Document, localizes the creation date and builds a context dictionary with some common, shorthand
formatted values from it
"""
local_created = timezone.localdate(document.created) local_created = timezone.localdate(document.created)
return { return {
@ -153,6 +182,10 @@ def get_creation_date_context(document: Document) -> dict[str, str]:
def get_added_date_context(document: Document) -> dict[str, str]: def get_added_date_context(document: Document) -> dict[str, str]:
"""
Given a Document, localizes the added date and builds a context dictionary with some common, shorthand
formatted values from it
"""
local_added = timezone.localdate(document.added) local_added = timezone.localdate(document.added)
return { return {
@ -171,6 +204,12 @@ def get_basic_metadata_context(
*, *,
no_value_default: str, no_value_default: str,
) -> dict[str, str]: ) -> dict[str, str]:
"""
Given a Document, constructs some basic information about it. If certain values are not set,
they will be replaced with the no_value_default.
Regardless of set or not, the values will be sanitized
"""
return { return {
"title": pathvalidate.sanitize_filename( "title": pathvalidate.sanitize_filename(
document.title, document.title,
@ -201,7 +240,10 @@ def get_basic_metadata_context(
} }
def get_tags_context(tags: Iterable[Tag]) -> dict[str, str]: def get_tags_context(tags: Iterable[Tag]) -> dict[str, str | list[str]]:
"""
Given an Iterable of tags, constructs some context from them for usage
"""
return { return {
"tag_list": pathvalidate.sanitize_filename( "tag_list": pathvalidate.sanitize_filename(
",".join( ",".join(
@ -209,12 +251,18 @@ def get_tags_context(tags: Iterable[Tag]) -> dict[str, str]:
), ),
replacement_text="-", replacement_text="-",
), ),
# Assumed to be ordered, but a template could loop through to find what they want
"tag_name_list": [x.name for x in tags],
} }
def get_custom_fields_context( def get_custom_fields_context(
custom_fields: Iterable[CustomFieldInstance], custom_fields: Iterable[CustomFieldInstance],
) -> dict[str, dict[str, str]]: ) -> dict[str, dict[str, str]]:
"""
Given an Iterable of CustomFieldInstance, builds a dictionary mapping the field name
to its type and value
"""
return { return {
pathvalidate.sanitize_filename( pathvalidate.sanitize_filename(
field_instance.field.name, field_instance.field.name,
@ -225,7 +273,7 @@ def get_custom_fields_context(
replacement_text="-", replacement_text="-",
), ),
"value": pathvalidate.sanitize_filename( "value": pathvalidate.sanitize_filename(
field_instance.value, str(field_instance.value),
replacement_text="-", replacement_text="-",
), ),
} }
@ -274,7 +322,7 @@ def validate_template_and_render(
] ]
else: else:
# or use the real document information # or use the real document information
tags_list = document.tags.all() tags_list = document.tags.order_by("name").all()
custom_fields = document.custom_fields.all() custom_fields = document.custom_fields.all()
# Build the context dictionary # Build the context dictionary
@ -295,6 +343,8 @@ def validate_template_and_render(
) )
rendered_template = template.render(Context(context)) rendered_template = template.render(Context(context))
logger.info(rendered_template)
# Check for errors # Check for errors
undefined_vars = detect_undefined_variables(rendered_template) undefined_vars = detect_undefined_variables(rendered_template)
if undefined_vars: if undefined_vars:
@ -321,28 +371,6 @@ def generate_filename(
): ):
path = "" path = ""
def convert_to_django_template_format(old_format: str) -> str:
"""
Converts old Python string format (with {}) to Django template style (with {{ }}),
while ignoring existing {{ ... }} placeholders.
:param old_format: The old style format string (e.g., "{title} by {author}")
:return: Converted string in Django Template style (e.g., "{{ title }} by {{ author }}")
"""
# Step 1: Match placeholders with single curly braces but not those with double braces
pattern = r"(?<!\{)\{(\w*)\}(?!\})" # Matches {var} but not {{var}}
# Step 2: Replace the placeholders with {{ var }} or {{ }}
def replace_with_django(match):
variable = match.group(1) # The variable inside the braces
return f"{{{{ {variable} }}}}" # Convert to {{ variable }}
# Apply the substitution
converted_format = re.sub(pattern, replace_with_django, old_format)
return converted_format
def format_filename(document: Document, template_str: str) -> str | None: def format_filename(document: Document, template_str: str) -> str | None:
rendered_filename = validate_template_and_render(template_str, document) rendered_filename = validate_template_and_render(template_str, document)
if rendered_filename is None: if rendered_filename is None:

View File

@ -1,6 +1,5 @@
# Generated by Django 5.1.1 on 2024-10-01 20:42 # Generated by Django 5.1.1 on 2024-10-01 20:42
import re
from django.db import migrations from django.db import migrations
from django.db import transaction from django.db import transaction
@ -11,27 +10,7 @@ def convert_from_format_to_template(apps, schema_editor):
StoragePath = apps.get_model("documents", "StoragePath") StoragePath = apps.get_model("documents", "StoragePath")
def convert_to_django_template_format(old_format): from documents.file_handling import convert_to_django_template_format
"""
Converts old Python string format (with {}) to Django template style (with {{ }}),
while ignoring existing {{ ... }} placeholders.
:param old_format: The old style format string (e.g., "{title} by {author}")
:return: Converted string in Django Template style (e.g., "{{ title }} by {{ author }}")
"""
# Step 1: Match placeholders with single curly braces but not those with double braces
pattern = r"(?<!\{)\{(\w*)\}(?!\})" # Matches {var} but not {{var}}
# Step 2: Replace the placeholders with {{ var }} or {{ }}
def replace_with_django(match):
variable = match.group(1) # The variable inside the braces
return f"{{{{ {variable} }}}}" # Convert to {{ variable }}
# Apply the substitution
converted_format = re.sub(pattern, replace_with_django, old_format)
return converted_format
with transaction.atomic(): with transaction.atomic():
for storage_path in StoragePath.objects.all(): for storage_path in StoragePath.objects.all():

View File

@ -1,4 +1,5 @@
import datetime import datetime
import logging
import math import math
import re import re
import zoneinfo import zoneinfo
@ -28,6 +29,8 @@ from rest_framework import fields
from rest_framework import serializers from rest_framework import serializers
from rest_framework.fields import SerializerMethodField from rest_framework.fields import SerializerMethodField
from documents.file_handling import convert_to_django_template_format
if settings.AUDIT_LOG_ENABLED: if settings.AUDIT_LOG_ENABLED:
from auditlog.context import set_actor from auditlog.context import set_actor
@ -55,6 +58,8 @@ from documents.permissions import get_groups_with_only_permission
from documents.permissions import set_permissions_for_object from documents.permissions import set_permissions_for_object
from documents.validators import uri_validator from documents.validators import uri_validator
logger = logging.getLogger("paperless.serializers")
# https://www.django-rest-framework.org/api-guide/serializers/#example # https://www.django-rest-framework.org/api-guide/serializers/#example
class DynamicFieldsModelSerializer(serializers.ModelSerializer): class DynamicFieldsModelSerializer(serializers.ModelSerializer):
@ -1481,12 +1486,17 @@ class StoragePathSerializer(MatchingModelSerializer, OwnedObjectSerializer):
) )
def validate_path(self, path: str): def validate_path(self, path: str):
result = validate_template_and_render(path) converted_path = convert_to_django_template_format(path)
if converted_path != path:
logger.warning(
f"Storage path {path} is not using the new style format, consider updating",
)
result = validate_template_and_render(converted_path)
if result is None: if result is None:
raise serializers.ValidationError(_("Invalid variable detected.")) raise serializers.ValidationError(_("Invalid variable detected."))
return path return converted_path
def update(self, instance, validated_data): def update(self, instance, validated_data):
""" """

View File

@ -1,4 +1,5 @@
import datetime import datetime
import logging
import os import os
import tempfile import tempfile
from pathlib import Path from pathlib import Path
@ -1210,3 +1211,33 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
generate_filename(doc_a), generate_filename(doc_a),
"somepath/asn-201-400/asn-3xx/Does Matter.pdf", "somepath/asn-201-400/asn-3xx/Does Matter.pdf",
) )
@override_settings(
FILENAME_FORMAT="{{creation_date}}/{title}",
)
def test_template_with_undefined_var(self):
doc_a = Document.objects.create(
title="Does Matter",
created=timezone.make_aware(datetime.datetime(2020, 6, 25, 7, 36, 51, 153)),
added=timezone.make_aware(datetime.datetime(2024, 10, 1, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=2,
checksum="2",
archive_serial_number=25,
)
with self.assertLogs(level=logging.ERROR) as capture:
self.assertEqual(
generate_filename(doc_a),
"0000002.pdf",
)
self.assertEqual(len(capture.output), 2)
self.assertEqual(
capture.output[0],
"ERROR:paperless.filehandling:Template contained 1 undefined values:",
)
self.assertEqual(
capture.output[1],
"ERROR:paperless.filehandling: Variable 'creation_date' was undefined",
)