Creates a custom tag to handle the whitespace business

This commit is contained in:
Trenton H 2024-10-02 08:52:27 -07:00
parent 109fd7d11e
commit 5b1cb50793
3 changed files with 134 additions and 23 deletions

View File

@ -7,7 +7,7 @@ from pathlib import PurePath
import pathvalidate
from django.conf import settings
from django.template import Context
from django.template import Template
from django.template import Engine
from django.utils import timezone
from documents.models import Correspondent
@ -20,6 +20,14 @@ from documents.models import Tag
logger = logging.getLogger("paperless.filehandling")
INVALID_VARIABLE_STR = "InvalidVarError"
filepath_engine = Engine(
autoescape=False,
string_if_invalid=f"{INVALID_VARIABLE_STR}: %s",
libraries={"filepath": "documents.templatetags"},
)
def create_source_path_directory(source_path):
os.makedirs(os.path.dirname(source_path), exist_ok=True)
@ -235,6 +243,22 @@ def validate_template_and_render(
Returns None if the string is not valid or an error occurred, otherwise
"""
def detect_undefined_variables(rendered_string: str) -> list[str] | None:
"""
Checks the rendered template for variables which were not defined/invalid and returns a
listing of them or None if none were found.
Used to provide context to the user, rather than mostly failing silently
"""
pattern = rf"{INVALID_VARIABLE_STR}: (\w+)"
matches = re.findall(pattern, rendered_string)
if matches:
return list(set(matches))
else:
return None
# Create the dummy document object with all fields filled in for validation purposes
if document is None:
document = create_dummy_document()
@ -250,10 +274,10 @@ def validate_template_and_render(
]
else:
# or use the real document information
logger.info("Using real document")
tags_list = document.tags.all()
custom_fields = document.custom_fields.all()
# Build the context dictionary
context = (
{"document": document}
| get_basic_metadata_context(document, no_value_default="-none-")
@ -263,13 +287,23 @@ def validate_template_and_render(
| get_custom_fields_context(custom_fields)
)
logger.info(context)
# Try rendering the template
try:
template = Template(template_string)
# We load the custom tag used to remove spaces and newlines from the final string around the user string
template = filepath_engine.from_string(
"{% load filepath %}{% filepath %}" + template_string + "{% endfilepath %}",
)
rendered_template = template.render(Context(context))
logger.info(f"Template is valid and rendered successfully: {rendered_template}")
# Check for errors
undefined_vars = detect_undefined_variables(rendered_template)
if undefined_vars:
logger.error(f"Template contained {len(undefined_vars)} undefined values:")
for x in undefined_vars:
logger.error(f" Variable '{x}' was undefined")
return None
# We're good!
return rendered_template
except Exception as e:
logger.warning(f"Error in filename generation: {e}")
@ -287,7 +321,7 @@ def generate_filename(
):
path = ""
def convert_to_django_template_format(old_format):
def convert_to_django_template_format(old_format: str) -> str:
"""
Converts old Python string format (with {}) to Django template style (with {{ }}),
while ignoring existing {{ ... }} placeholders.
@ -314,8 +348,7 @@ def generate_filename(
if rendered_filename is None:
return None
logger.info(rendered_filename)
# Apply this setting. It could become a filter in the future (or users could use |default)
if settings.FILENAME_FORMAT_REMOVE_NONE:
rendered_filename = rendered_filename.replace("/-none-/", "/")
rendered_filename = rendered_filename.replace(" -none-", "")
@ -326,10 +359,6 @@ def generate_filename(
"none",
) # backward compatibility
rendered_filename = (
rendered_filename.strip(os.sep).replace("\n", "").replace("\r", "").strip()
)
return rendered_filename
# Determine the source of the format string
@ -346,6 +375,7 @@ def generate_filename(
)
# Warn the user they should update
# TODO: Move this to system check
if filename_format != settings.FILENAME_FORMAT:
logger.warning(
f"Filename format {settings.FILENAME_FORMAT} is using the old style, please update to use double curly brackets",

View File

@ -0,0 +1,43 @@
import re
from django import template
register = template.Library()
class FilePathNode(template.Node):
"""
A custom tag to remove extra spaces before and after / as well as remove
any newlines from the resulting string.
https://docs.djangoproject.com/en/5.1/howto/custom-template-tags/#parsing-until-another-block-tag
"""
def __init__(self, nodelist):
self.nodelist = nodelist
def render(self, context):
def clean_filepath(value):
"""
Clean up a filepath by:
1. Removing newlines and carriage returns
2. Removing extra spaces before and after forward slashes
3. Preserving spaces in other parts of the path
"""
value = value.replace("\n", "").replace("\r", "")
value = re.sub(r"\s*/\s*", "/", value)
return value.strip()
output = self.nodelist.render(context)
return clean_filepath(output)
@register.tag(name="filepath")
def construct_filepath(parser, token):
"""
The registered tag as {% filepath %}, which is always loaded around the user provided template string to
render everything as a single line, with minimal spaces
"""
nodelist = parser.parse(("endfilepath",))
parser.delete_first_token()
return FilePathNode(nodelist)

View File

@ -1145,11 +1145,15 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
def test_complex_template_strings(self):
sp = StoragePath.objects.create(
name="sp1",
path="""{% if document.checksum == \"2\" %}
{{created}}
{% else %}
{{added}}
{% endif %}""",
path="""
somepath/
{% if document.checksum == '2' %}
some where/{{created}}
{% else %}
{{added}}
{% endif %}
/{{ title }}
""",
)
doc_a = Document.objects.create(
@ -1159,16 +1163,50 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
mime_type="application/pdf",
pk=2,
checksum="2",
archive_serial_number=4,
archive_serial_number=25,
storage_path=sp,
)
self.assertEqual(generate_filename(doc_a), "2020-06-25.pdf")
self.assertEqual(
generate_filename(doc_a),
"somepath/some where/2020-06-25/Does Matter.pdf",
)
doc_a.checksum = "5"
self.assertEqual(generate_filename(doc_a), "2024-10-01.pdf")
self.assertEqual(
generate_filename(doc_a),
"somepath/2024-10-01/Does Matter.pdf",
)
sp.path = '{{ document.title|lower }}{{ document.asn|add:"-2" }}'
sp.path = (
"{{ document.title|lower }}{{ document.archive_serial_number|add:'-2' }}"
)
sp.save()
self.assertEqual(generate_filename(doc_a), "does matter-2.pdf")
self.assertEqual(generate_filename(doc_a), "does matter23.pdf")
sp.path = """
somepath/
{% if document.archive_serial_number >= 0 and document.archive_serial_number <= 200 %}
asn-000-200/{{title}}
{% elif document.archive_serial_number >= 201 and document.archive_serial_number <= 400 %}
asn-201-400
{% if document.archive_serial_number >= 201 and document.archive_serial_number < 300 %}
/asn-2xx
{% elif document.archive_serial_number >= 300 and document.archive_serial_number < 400 %}
/asn-3xx
{% endif %}
{% endif %}
/{{ title }}
"""
sp.save()
self.assertEqual(
generate_filename(doc_a),
"somepath/asn-000-200/Does Matter/Does Matter.pdf",
)
doc_a.archive_serial_number = 301
doc_a.save()
self.assertEqual(
generate_filename(doc_a),
"somepath/asn-201-400/asn-3xx/Does Matter.pdf",
)