Fix: Ensure export filenames fit within 143-character limit

The eCryptfs filesystem imposes a filename length limit of 143 characters.
This limit still applies on some systems, such as Synology and QNAP NAS devices when using encrypted folders.

When saving documents with long names, Paperless-ngx triggers a warning (`[Errno 36] File name too long`),
but it gracefully falls back to a default or truncated name.

However, the `document_exporter` crashes when encountering such filenames during export.

This change ensures that exported document base names are capped at 120 characters,
leaving room for extensions and suffixes and keeping the total filename length within 143 characters.

Using the document exported with the "--delete" parameter will take care of eventual old files with a long name, if the filesystem supported it.
This commit is contained in:
Antoine Merino 2025-03-07 00:12:45 +01:00
parent cf48f47a8c
commit b0d2cda34a
No known key found for this signature in database
GPG Key ID: 1B17E4011697A9FD
6 changed files with 61 additions and 5 deletions

View File

@ -324,9 +324,11 @@ must be provided to import. If this value is lost, the export cannot be imported
!!! warning
If exporting with the file name format, there may be errors due to
your operating system's maximum path lengths. Try adjusting the export
target or consider not using the filename format.
Some operating systems, particularly those using encrypted filesystems
(e.g., some Synology and QNAP NAS devices), impose strict limits
on file name lengths (143 characters).
To ensure compatibility, file names may be truncated to fit within this limit,
meaning the configured filename format may not be fully respected.
### Document importer {#importer}

View File

@ -96,7 +96,13 @@ def generate_filename(
counter=0,
append_gpg=True,
archive_filename=False,
basename_max_length=None,
):
if basename_max_length and basename_max_length < 20:
raise ValueError(
f"The base name length limit ({basename_max_length}) should be at least 20 characters",
)
path = ""
def format_filename(document: Document, template_str: str) -> str | None:
@ -135,6 +141,8 @@ def generate_filename(
# If we have one, render it
if filename_format is not None:
path = format_filename(doc, filename_format)
if basename_max_length:
path = path[:basename_max_length]
counter_str = f"_{counter:02}" if counter else ""
filetype_str = ".pdf" if archive_filename else doc.file_type

View File

@ -427,9 +427,13 @@ class Command(CryptMixin, BaseCommand):
document,
counter=filename_counter,
append_gpg=False,
basename_max_length=120,
)
else:
base_name = document.get_public_filename(counter=filename_counter)
base_name = document.get_public_filename(
counter=filename_counter,
basename_max_length=120,
)
if base_name not in self.exported_files:
self.exported_files.add(base_name)

View File

@ -332,12 +332,27 @@ class Document(SoftDeleteModel, ModelWithOwner):
def archive_file(self):
return Path(self.archive_path).open("rb")
def get_public_filename(self, *, archive=False, counter=0, suffix=None) -> str:
def get_public_filename(
self,
*,
archive=False,
counter=0,
suffix=None,
basename_max_length=None,
) -> str:
"""
Returns a sanitized filename for the document, not including any paths.
"""
if basename_max_length and basename_max_length < 20:
raise ValueError(
f"The base name length limit ({basename_max_length}) should be at least 20 characters",
)
result = str(self)
if basename_max_length:
result = result[:basename_max_length]
if counter:
result += f"_{counter:02}"

View File

@ -4,6 +4,7 @@ import zoneinfo
from pathlib import Path
from unittest import mock
import pytest
from django.test import TestCase
from django.test import override_settings
from django.utils import timezone
@ -85,6 +86,17 @@ class TestDocument(TestCase):
)
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf")
def test_shorter_file_name_for_archive(self):
doc = Document(
mime_type="application/pdf",
title="This file has a very long filename that will exceed filename limits on some obscure filesystems, such as eCryptfs which accepts up to 143 characters",
created=timezone.datetime(2025, 3, 7, tzinfo=zoneinfo.ZoneInfo("UTC")),
)
self.assertEqual(len(doc.get_public_filename()), 163)
self.assertLessEqual(len(doc.get_public_filename(basename_max_length=120)), 143)
with pytest.raises(ValueError):
doc.get_public_filename(basename_max_length=19)
@override_settings(
TIME_ZONE="Europe/Berlin",
)

View File

@ -5,6 +5,7 @@ import tempfile
from pathlib import Path
from unittest import mock
import pytest
from auditlog.context import disable_auditlog
from django.conf import settings
from django.contrib.auth.models import User
@ -1130,6 +1131,20 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(owned_doc), "user1/The Title.pdf")
self.assertEqual(generate_filename(no_owner_doc), "none/does matter.pdf")
@override_settings(
FILENAME_FORMAT="{title}",
)
def test_generate_shorted_file_name(self):
doc = Document.objects.create(
mime_type="application/pdf",
title="This file has a very long filename that will exceed filename limits on some obscure filesystems, such as eCryptfs which accepts up to 143 characters",
checksum="3",
)
self.assertEqual(len(generate_filename(doc)), 152)
self.assertLessEqual(len(generate_filename(doc, basename_max_length=120)), 143)
with pytest.raises(ValueError):
generate_filename(doc, basename_max_length=19)
@override_settings(
FILENAME_FORMAT="{original_name}",
)