Feature: Enhanced templating for filename format (#7836)

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
Trenton H
2024-10-06 12:54:01 -07:00
committed by GitHub
parent e49ed58f1a
commit 7c11a37150
29 changed files with 1299 additions and 615 deletions

View File

@@ -1,21 +1,10 @@
import logging
import os
from collections import defaultdict
from pathlib import PurePath
import pathvalidate
from django.conf import settings
from django.template.defaultfilters import slugify
from django.utils import timezone
from documents.models import Document
logger = logging.getLogger("paperless.filehandling")
class defaultdictNoStr(defaultdict):
def __str__(self):
raise ValueError("Don't use {tags} directly.")
from documents.templating.filepath import validate_filepath_template_and_render
from documents.templating.utils import convert_format_str_to_template_format
def create_source_path_directory(source_path):
@@ -54,32 +43,6 @@ def delete_empty_directories(directory, root):
directory = os.path.normpath(os.path.dirname(directory))
def many_to_dictionary(field):
# Converts ManyToManyField to dictionary by assuming, that field
# entries contain an _ or - which will be used as a delimiter
mydictionary = dict()
for index, t in enumerate(field.all()):
# Populate tag names by index
mydictionary[index] = slugify(t.name)
# Find delimiter
delimiter = t.name.find("_")
if delimiter == -1:
delimiter = t.name.find("-")
if delimiter == -1:
continue
key = t.name[:delimiter]
value = t.name[delimiter + 1 :]
mydictionary[slugify(key)] = slugify(value)
return mydictionary
def generate_unique_filename(doc, archive_filename=False):
"""
Generates a unique filename for doc in settings.ORIGINALS_DIR.
@@ -134,116 +97,51 @@ def generate_filename(
archive_filename=False,
):
path = ""
filename_format = settings.FILENAME_FORMAT
try:
if doc.storage_path is not None:
logger.debug(
f"Document has storage_path {doc.storage_path.id} "
f"({doc.storage_path.path}) set",
)
filename_format = doc.storage_path.path
if filename_format is not None:
tags = defaultdictNoStr(
lambda: slugify(None),
many_to_dictionary(doc.tags),
)
tag_list = pathvalidate.sanitize_filename(
",".join(
sorted(tag.name for tag in doc.tags.all()),
),
replacement_text="-",
)
no_value_default = "-none-"
if doc.correspondent:
correspondent = pathvalidate.sanitize_filename(
doc.correspondent.name,
replacement_text="-",
)
else:
correspondent = no_value_default
if doc.document_type:
document_type = pathvalidate.sanitize_filename(
doc.document_type.name,
replacement_text="-",
)
else:
document_type = no_value_default
if doc.archive_serial_number:
asn = str(doc.archive_serial_number)
else:
asn = no_value_default
if doc.owner is not None:
owner_username_str = str(doc.owner.username)
else:
owner_username_str = no_value_default
if doc.original_filename is not None:
# No extension
original_name = PurePath(doc.original_filename).with_suffix("").name
else:
original_name = no_value_default
# Convert UTC database datetime to localized date
local_added = timezone.localdate(doc.added)
local_created = timezone.localdate(doc.created)
path = filename_format.format(
title=pathvalidate.sanitize_filename(doc.title, replacement_text="-"),
correspondent=correspondent,
document_type=document_type,
created=local_created.isoformat(),
created_year=local_created.strftime("%Y"),
created_year_short=local_created.strftime("%y"),
created_month=local_created.strftime("%m"),
created_month_name=local_created.strftime("%B"),
created_month_name_short=local_created.strftime("%b"),
created_day=local_created.strftime("%d"),
added=local_added.isoformat(),
added_year=local_added.strftime("%Y"),
added_year_short=local_added.strftime("%y"),
added_month=local_added.strftime("%m"),
added_month_name=local_added.strftime("%B"),
added_month_name_short=local_added.strftime("%b"),
added_day=local_added.strftime("%d"),
asn=asn,
tags=tags,
tag_list=tag_list,
owner_username=owner_username_str,
original_name=original_name,
doc_pk=f"{doc.pk:07}",
).strip()
if settings.FILENAME_FORMAT_REMOVE_NONE:
path = path.replace("/-none-/", "/") # remove empty directories
path = path.replace(" -none-", "") # remove when spaced, with space
path = path.replace("-none-", "") # remove rest of the occurrences
path = path.replace("-none-", "none") # backward compatibility
path = path.strip(os.sep)
except (ValueError, KeyError, IndexError):
logger.warning(
f"Invalid filename_format '{filename_format}', falling back to default",
def format_filename(document: Document, template_str: str) -> str | None:
rendered_filename = validate_filepath_template_and_render(
template_str,
document,
)
if rendered_filename is None:
return None
# Apply this setting. It could become a filter in the future (or users could use |default)
if settings.FILENAME_FORMAT_REMOVE_NONE:
rendered_filename = rendered_filename.replace("/-none-/", "/")
rendered_filename = rendered_filename.replace(" -none-", "")
rendered_filename = rendered_filename.replace("-none-", "")
rendered_filename = rendered_filename.replace(
"-none-",
"none",
) # backward compatibility
return rendered_filename
# Determine the source of the format string
if doc.storage_path is not None:
filename_format = doc.storage_path.path
elif settings.FILENAME_FORMAT is not None:
# Maybe convert old to new style
filename_format = convert_format_str_to_template_format(
settings.FILENAME_FORMAT,
)
else:
filename_format = None
# If we have one, render it
if filename_format is not None:
path = format_filename(doc, filename_format)
counter_str = f"_{counter:02}" if counter else ""
filetype_str = ".pdf" if archive_filename else doc.file_type
if len(path) > 0:
if path:
filename = f"{path}{counter_str}{filetype_str}"
else:
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
# Append .gpg for encrypted files
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
filename += ".gpg"