diff --git a/src/documents/file_handling.py b/src/documents/file_handling.py index 1d21b1358..98f628634 100644 --- a/src/documents/file_handling.py +++ b/src/documents/file_handling.py @@ -7,7 +7,7 @@ from pathlib import PurePath import pathvalidate from django.conf import settings from django.template import Context -from django.template import Template +from django.template import Engine from django.utils import timezone from documents.models import Correspondent @@ -20,6 +20,14 @@ from documents.models import Tag logger = logging.getLogger("paperless.filehandling") +INVALID_VARIABLE_STR = "InvalidVarError" + +filepath_engine = Engine( + autoescape=False, + string_if_invalid=f"{INVALID_VARIABLE_STR}: %s", + libraries={"filepath": "documents.templatetags"}, +) + def create_source_path_directory(source_path): os.makedirs(os.path.dirname(source_path), exist_ok=True) @@ -235,6 +243,22 @@ def validate_template_and_render( Returns None if the string is not valid or an error occurred, otherwise """ + def detect_undefined_variables(rendered_string: str) -> list[str] | None: + """ + Checks the rendered template for variables which were not defined/invalid and returns a + listing of them or None if none were found. + + Used to provide context to the user, rather than mostly failing silently + + """ + pattern = rf"{INVALID_VARIABLE_STR}: (\w+)" + matches = re.findall(pattern, rendered_string) + + if matches: + return list(set(matches)) + else: + return None + # Create the dummy document object with all fields filled in for validation purposes if document is None: document = create_dummy_document() @@ -250,10 +274,10 @@ def validate_template_and_render( ] else: # or use the real document information - logger.info("Using real document") tags_list = document.tags.all() custom_fields = document.custom_fields.all() + # Build the context dictionary context = ( {"document": document} | get_basic_metadata_context(document, no_value_default="-none-") @@ -263,13 +287,23 @@ def validate_template_and_render( | get_custom_fields_context(custom_fields) ) - logger.info(context) - # Try rendering the template try: - template = Template(template_string) + # We load the custom tag used to remove spaces and newlines from the final string around the user string + template = filepath_engine.from_string( + "{% load filepath %}{% filepath %}" + template_string + "{% endfilepath %}", + ) rendered_template = template.render(Context(context)) - logger.info(f"Template is valid and rendered successfully: {rendered_template}") + + # Check for errors + undefined_vars = detect_undefined_variables(rendered_template) + if undefined_vars: + logger.error(f"Template contained {len(undefined_vars)} undefined values:") + for x in undefined_vars: + logger.error(f" Variable '{x}' was undefined") + return None + + # We're good! return rendered_template except Exception as e: logger.warning(f"Error in filename generation: {e}") @@ -287,7 +321,7 @@ def generate_filename( ): path = "" - def convert_to_django_template_format(old_format): + def convert_to_django_template_format(old_format: str) -> str: """ Converts old Python string format (with {}) to Django template style (with {{ }}), while ignoring existing {{ ... }} placeholders. @@ -314,8 +348,7 @@ def generate_filename( if rendered_filename is None: return None - logger.info(rendered_filename) - + # Apply this setting. It could become a filter in the future (or users could use |default) if settings.FILENAME_FORMAT_REMOVE_NONE: rendered_filename = rendered_filename.replace("/-none-/", "/") rendered_filename = rendered_filename.replace(" -none-", "") @@ -326,10 +359,6 @@ def generate_filename( "none", ) # backward compatibility - rendered_filename = ( - rendered_filename.strip(os.sep).replace("\n", "").replace("\r", "").strip() - ) - return rendered_filename # Determine the source of the format string @@ -346,6 +375,7 @@ def generate_filename( ) # Warn the user they should update + # TODO: Move this to system check if filename_format != settings.FILENAME_FORMAT: logger.warning( f"Filename format {settings.FILENAME_FORMAT} is using the old style, please update to use double curly brackets", diff --git a/src/documents/templatetags.py b/src/documents/templatetags.py new file mode 100644 index 000000000..9a2b4b969 --- /dev/null +++ b/src/documents/templatetags.py @@ -0,0 +1,43 @@ +import re + +from django import template + +register = template.Library() + + +class FilePathNode(template.Node): + """ + A custom tag to remove extra spaces before and after / as well as remove + any newlines from the resulting string. + + https://docs.djangoproject.com/en/5.1/howto/custom-template-tags/#parsing-until-another-block-tag + """ + + def __init__(self, nodelist): + self.nodelist = nodelist + + def render(self, context): + def clean_filepath(value): + """ + Clean up a filepath by: + 1. Removing newlines and carriage returns + 2. Removing extra spaces before and after forward slashes + 3. Preserving spaces in other parts of the path + """ + value = value.replace("\n", "").replace("\r", "") + value = re.sub(r"\s*/\s*", "/", value) + return value.strip() + + output = self.nodelist.render(context) + return clean_filepath(output) + + +@register.tag(name="filepath") +def construct_filepath(parser, token): + """ + The registered tag as {% filepath %}, which is always loaded around the user provided template string to + render everything as a single line, with minimal spaces + """ + nodelist = parser.parse(("endfilepath",)) + parser.delete_first_token() + return FilePathNode(nodelist) diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 2a2cf5e4d..d63432967 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -1145,11 +1145,15 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase): def test_complex_template_strings(self): sp = StoragePath.objects.create( name="sp1", - path="""{% if document.checksum == \"2\" %} - {{created}} - {% else %} - {{added}} - {% endif %}""", + path=""" + somepath/ + {% if document.checksum == '2' %} + some where/{{created}} + {% else %} + {{added}} + {% endif %} + /{{ title }} + """, ) doc_a = Document.objects.create( @@ -1159,16 +1163,50 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase): mime_type="application/pdf", pk=2, checksum="2", - archive_serial_number=4, + archive_serial_number=25, storage_path=sp, ) - self.assertEqual(generate_filename(doc_a), "2020-06-25.pdf") + self.assertEqual( + generate_filename(doc_a), + "somepath/some where/2020-06-25/Does Matter.pdf", + ) doc_a.checksum = "5" - self.assertEqual(generate_filename(doc_a), "2024-10-01.pdf") + self.assertEqual( + generate_filename(doc_a), + "somepath/2024-10-01/Does Matter.pdf", + ) - sp.path = '{{ document.title|lower }}{{ document.asn|add:"-2" }}' + sp.path = ( + "{{ document.title|lower }}{{ document.archive_serial_number|add:'-2' }}" + ) sp.save() - self.assertEqual(generate_filename(doc_a), "does matter-2.pdf") + self.assertEqual(generate_filename(doc_a), "does matter23.pdf") + + sp.path = """ + somepath/ + {% if document.archive_serial_number >= 0 and document.archive_serial_number <= 200 %} + asn-000-200/{{title}} + {% elif document.archive_serial_number >= 201 and document.archive_serial_number <= 400 %} + asn-201-400 + {% if document.archive_serial_number >= 201 and document.archive_serial_number < 300 %} + /asn-2xx + {% elif document.archive_serial_number >= 300 and document.archive_serial_number < 400 %} + /asn-3xx + {% endif %} + {% endif %} + /{{ title }} + """ + sp.save() + self.assertEqual( + generate_filename(doc_a), + "somepath/asn-000-200/Does Matter/Does Matter.pdf", + ) + doc_a.archive_serial_number = 301 + doc_a.save() + self.assertEqual( + generate_filename(doc_a), + "somepath/asn-201-400/asn-3xx/Does Matter.pdf", + )