Creates a custom tag to handle the whitespace business

This commit is contained in:
Trenton H 2024-10-02 08:52:27 -07:00
parent 109fd7d11e
commit 5b1cb50793
3 changed files with 134 additions and 23 deletions

View File

@ -7,7 +7,7 @@ from pathlib import PurePath
import pathvalidate import pathvalidate
from django.conf import settings from django.conf import settings
from django.template import Context from django.template import Context
from django.template import Template from django.template import Engine
from django.utils import timezone from django.utils import timezone
from documents.models import Correspondent from documents.models import Correspondent
@ -20,6 +20,14 @@ from documents.models import Tag
logger = logging.getLogger("paperless.filehandling") logger = logging.getLogger("paperless.filehandling")
INVALID_VARIABLE_STR = "InvalidVarError"
filepath_engine = Engine(
autoescape=False,
string_if_invalid=f"{INVALID_VARIABLE_STR}: %s",
libraries={"filepath": "documents.templatetags"},
)
def create_source_path_directory(source_path): def create_source_path_directory(source_path):
os.makedirs(os.path.dirname(source_path), exist_ok=True) os.makedirs(os.path.dirname(source_path), exist_ok=True)
@ -235,6 +243,22 @@ def validate_template_and_render(
Returns None if the string is not valid or an error occurred, otherwise Returns None if the string is not valid or an error occurred, otherwise
""" """
def detect_undefined_variables(rendered_string: str) -> list[str] | None:
"""
Checks the rendered template for variables which were not defined/invalid and returns a
listing of them or None if none were found.
Used to provide context to the user, rather than mostly failing silently
"""
pattern = rf"{INVALID_VARIABLE_STR}: (\w+)"
matches = re.findall(pattern, rendered_string)
if matches:
return list(set(matches))
else:
return None
# Create the dummy document object with all fields filled in for validation purposes # Create the dummy document object with all fields filled in for validation purposes
if document is None: if document is None:
document = create_dummy_document() document = create_dummy_document()
@ -250,10 +274,10 @@ def validate_template_and_render(
] ]
else: else:
# or use the real document information # or use the real document information
logger.info("Using real document")
tags_list = document.tags.all() tags_list = document.tags.all()
custom_fields = document.custom_fields.all() custom_fields = document.custom_fields.all()
# Build the context dictionary
context = ( context = (
{"document": document} {"document": document}
| get_basic_metadata_context(document, no_value_default="-none-") | get_basic_metadata_context(document, no_value_default="-none-")
@ -263,13 +287,23 @@ def validate_template_and_render(
| get_custom_fields_context(custom_fields) | get_custom_fields_context(custom_fields)
) )
logger.info(context)
# Try rendering the template # Try rendering the template
try: try:
template = Template(template_string) # We load the custom tag used to remove spaces and newlines from the final string around the user string
template = filepath_engine.from_string(
"{% load filepath %}{% filepath %}" + template_string + "{% endfilepath %}",
)
rendered_template = template.render(Context(context)) rendered_template = template.render(Context(context))
logger.info(f"Template is valid and rendered successfully: {rendered_template}")
# Check for errors
undefined_vars = detect_undefined_variables(rendered_template)
if undefined_vars:
logger.error(f"Template contained {len(undefined_vars)} undefined values:")
for x in undefined_vars:
logger.error(f" Variable '{x}' was undefined")
return None
# We're good!
return rendered_template return rendered_template
except Exception as e: except Exception as e:
logger.warning(f"Error in filename generation: {e}") logger.warning(f"Error in filename generation: {e}")
@ -287,7 +321,7 @@ def generate_filename(
): ):
path = "" path = ""
def convert_to_django_template_format(old_format): def convert_to_django_template_format(old_format: str) -> str:
""" """
Converts old Python string format (with {}) to Django template style (with {{ }}), Converts old Python string format (with {}) to Django template style (with {{ }}),
while ignoring existing {{ ... }} placeholders. while ignoring existing {{ ... }} placeholders.
@ -314,8 +348,7 @@ def generate_filename(
if rendered_filename is None: if rendered_filename is None:
return None return None
logger.info(rendered_filename) # Apply this setting. It could become a filter in the future (or users could use |default)
if settings.FILENAME_FORMAT_REMOVE_NONE: if settings.FILENAME_FORMAT_REMOVE_NONE:
rendered_filename = rendered_filename.replace("/-none-/", "/") rendered_filename = rendered_filename.replace("/-none-/", "/")
rendered_filename = rendered_filename.replace(" -none-", "") rendered_filename = rendered_filename.replace(" -none-", "")
@ -326,10 +359,6 @@ def generate_filename(
"none", "none",
) # backward compatibility ) # backward compatibility
rendered_filename = (
rendered_filename.strip(os.sep).replace("\n", "").replace("\r", "").strip()
)
return rendered_filename return rendered_filename
# Determine the source of the format string # Determine the source of the format string
@ -346,6 +375,7 @@ def generate_filename(
) )
# Warn the user they should update # Warn the user they should update
# TODO: Move this to system check
if filename_format != settings.FILENAME_FORMAT: if filename_format != settings.FILENAME_FORMAT:
logger.warning( logger.warning(
f"Filename format {settings.FILENAME_FORMAT} is using the old style, please update to use double curly brackets", f"Filename format {settings.FILENAME_FORMAT} is using the old style, please update to use double curly brackets",

View File

@ -0,0 +1,43 @@
import re
from django import template
register = template.Library()
class FilePathNode(template.Node):
"""
A custom tag to remove extra spaces before and after / as well as remove
any newlines from the resulting string.
https://docs.djangoproject.com/en/5.1/howto/custom-template-tags/#parsing-until-another-block-tag
"""
def __init__(self, nodelist):
self.nodelist = nodelist
def render(self, context):
def clean_filepath(value):
"""
Clean up a filepath by:
1. Removing newlines and carriage returns
2. Removing extra spaces before and after forward slashes
3. Preserving spaces in other parts of the path
"""
value = value.replace("\n", "").replace("\r", "")
value = re.sub(r"\s*/\s*", "/", value)
return value.strip()
output = self.nodelist.render(context)
return clean_filepath(output)
@register.tag(name="filepath")
def construct_filepath(parser, token):
"""
The registered tag as {% filepath %}, which is always loaded around the user provided template string to
render everything as a single line, with minimal spaces
"""
nodelist = parser.parse(("endfilepath",))
parser.delete_first_token()
return FilePathNode(nodelist)

View File

@ -1145,11 +1145,15 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
def test_complex_template_strings(self): def test_complex_template_strings(self):
sp = StoragePath.objects.create( sp = StoragePath.objects.create(
name="sp1", name="sp1",
path="""{% if document.checksum == \"2\" %} path="""
{{created}} somepath/
{% else %} {% if document.checksum == '2' %}
{{added}} some where/{{created}}
{% endif %}""", {% else %}
{{added}}
{% endif %}
/{{ title }}
""",
) )
doc_a = Document.objects.create( doc_a = Document.objects.create(
@ -1159,16 +1163,50 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
mime_type="application/pdf", mime_type="application/pdf",
pk=2, pk=2,
checksum="2", checksum="2",
archive_serial_number=4, archive_serial_number=25,
storage_path=sp, storage_path=sp,
) )
self.assertEqual(generate_filename(doc_a), "2020-06-25.pdf") self.assertEqual(
generate_filename(doc_a),
"somepath/some where/2020-06-25/Does Matter.pdf",
)
doc_a.checksum = "5" doc_a.checksum = "5"
self.assertEqual(generate_filename(doc_a), "2024-10-01.pdf") self.assertEqual(
generate_filename(doc_a),
"somepath/2024-10-01/Does Matter.pdf",
)
sp.path = '{{ document.title|lower }}{{ document.asn|add:"-2" }}' sp.path = (
"{{ document.title|lower }}{{ document.archive_serial_number|add:'-2' }}"
)
sp.save() sp.save()
self.assertEqual(generate_filename(doc_a), "does matter-2.pdf") self.assertEqual(generate_filename(doc_a), "does matter23.pdf")
sp.path = """
somepath/
{% if document.archive_serial_number >= 0 and document.archive_serial_number <= 200 %}
asn-000-200/{{title}}
{% elif document.archive_serial_number >= 201 and document.archive_serial_number <= 400 %}
asn-201-400
{% if document.archive_serial_number >= 201 and document.archive_serial_number < 300 %}
/asn-2xx
{% elif document.archive_serial_number >= 300 and document.archive_serial_number < 400 %}
/asn-3xx
{% endif %}
{% endif %}
/{{ title }}
"""
sp.save()
self.assertEqual(
generate_filename(doc_a),
"somepath/asn-000-200/Does Matter/Does Matter.pdf",
)
doc_a.archive_serial_number = 301
doc_a.save()
self.assertEqual(
generate_filename(doc_a),
"somepath/asn-201-400/asn-3xx/Does Matter.pdf",
)