From b0d2cda34ae077777fbd050db54baeed4dea9e5e Mon Sep 17 00:00:00 2001 From: Antoine Merino Date: Fri, 7 Mar 2025 00:12:45 +0100 Subject: [PATCH] Fix: Ensure export filenames fit within 143-character limit The eCryptfs filesystem imposes a filename length limit of 143 characters. This limit still applies on some systems, such as Synology and QNAP NAS devices when using encrypted folders. When saving documents with long names, Paperless-ngx triggers a warning (`[Errno 36] File name too long`), but it gracefully falls back to a default or truncated name. However, the `document_exporter` crashes when encountering such filenames during export. This change ensures that exported document base names are capped at 120 characters, leaving room for extensions and suffixes and keeping the total filename length within 143 characters. Using the document exported with the "--delete" parameter will take care of eventual old files with a long name, if the filesystem supported it. --- docs/administration.md | 8 +++++--- src/documents/file_handling.py | 8 ++++++++ .../management/commands/document_exporter.py | 6 +++++- src/documents/models.py | 17 ++++++++++++++++- src/documents/tests/test_document_model.py | 12 ++++++++++++ src/documents/tests/test_file_handling.py | 15 +++++++++++++++ 6 files changed, 61 insertions(+), 5 deletions(-) diff --git a/docs/administration.md b/docs/administration.md index 8e646b326..edf9aa37e 100644 --- a/docs/administration.md +++ b/docs/administration.md @@ -324,9 +324,11 @@ must be provided to import. If this value is lost, the export cannot be imported !!! warning - If exporting with the file name format, there may be errors due to - your operating system's maximum path lengths. Try adjusting the export - target or consider not using the filename format. + Some operating systems, particularly those using encrypted filesystems + (e.g., some Synology and QNAP NAS devices), impose strict limits + on file name lengths (143 characters). + To ensure compatibility, file names may be truncated to fit within this limit, + meaning the configured filename format may not be fully respected. ### Document importer {#importer} diff --git a/src/documents/file_handling.py b/src/documents/file_handling.py index 3d1a643df..021c49e30 100644 --- a/src/documents/file_handling.py +++ b/src/documents/file_handling.py @@ -96,7 +96,13 @@ def generate_filename( counter=0, append_gpg=True, archive_filename=False, + basename_max_length=None, ): + if basename_max_length and basename_max_length < 20: + raise ValueError( + f"The base name length limit ({basename_max_length}) should be at least 20 characters", + ) + path = "" def format_filename(document: Document, template_str: str) -> str | None: @@ -135,6 +141,8 @@ def generate_filename( # If we have one, render it if filename_format is not None: path = format_filename(doc, filename_format) + if basename_max_length: + path = path[:basename_max_length] counter_str = f"_{counter:02}" if counter else "" filetype_str = ".pdf" if archive_filename else doc.file_type diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index 6dc89479e..3d90f7f57 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -427,9 +427,13 @@ class Command(CryptMixin, BaseCommand): document, counter=filename_counter, append_gpg=False, + basename_max_length=120, ) else: - base_name = document.get_public_filename(counter=filename_counter) + base_name = document.get_public_filename( + counter=filename_counter, + basename_max_length=120, + ) if base_name not in self.exported_files: self.exported_files.add(base_name) diff --git a/src/documents/models.py b/src/documents/models.py index 4b3f97e50..123f14a5d 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -332,12 +332,27 @@ class Document(SoftDeleteModel, ModelWithOwner): def archive_file(self): return Path(self.archive_path).open("rb") - def get_public_filename(self, *, archive=False, counter=0, suffix=None) -> str: + def get_public_filename( + self, + *, + archive=False, + counter=0, + suffix=None, + basename_max_length=None, + ) -> str: """ Returns a sanitized filename for the document, not including any paths. """ + if basename_max_length and basename_max_length < 20: + raise ValueError( + f"The base name length limit ({basename_max_length}) should be at least 20 characters", + ) + result = str(self) + if basename_max_length: + result = result[:basename_max_length] + if counter: result += f"_{counter:02}" diff --git a/src/documents/tests/test_document_model.py b/src/documents/tests/test_document_model.py index eca08f82a..5798c0c0b 100644 --- a/src/documents/tests/test_document_model.py +++ b/src/documents/tests/test_document_model.py @@ -4,6 +4,7 @@ import zoneinfo from pathlib import Path from unittest import mock +import pytest from django.test import TestCase from django.test import override_settings from django.utils import timezone @@ -85,6 +86,17 @@ class TestDocument(TestCase): ) self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf") + def test_shorter_file_name_for_archive(self): + doc = Document( + mime_type="application/pdf", + title="This file has a very long filename that will exceed filename limits on some obscure filesystems, such as eCryptfs which accepts up to 143 characters", + created=timezone.datetime(2025, 3, 7, tzinfo=zoneinfo.ZoneInfo("UTC")), + ) + self.assertEqual(len(doc.get_public_filename()), 163) + self.assertLessEqual(len(doc.get_public_filename(basename_max_length=120)), 143) + with pytest.raises(ValueError): + doc.get_public_filename(basename_max_length=19) + @override_settings( TIME_ZONE="Europe/Berlin", ) diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 6d2d396fc..983b1656c 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -5,6 +5,7 @@ import tempfile from pathlib import Path from unittest import mock +import pytest from auditlog.context import disable_auditlog from django.conf import settings from django.contrib.auth.models import User @@ -1130,6 +1131,20 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase): self.assertEqual(generate_filename(owned_doc), "user1/The Title.pdf") self.assertEqual(generate_filename(no_owner_doc), "none/does matter.pdf") + @override_settings( + FILENAME_FORMAT="{title}", + ) + def test_generate_shorted_file_name(self): + doc = Document.objects.create( + mime_type="application/pdf", + title="This file has a very long filename that will exceed filename limits on some obscure filesystems, such as eCryptfs which accepts up to 143 characters", + checksum="3", + ) + self.assertEqual(len(generate_filename(doc)), 152) + self.assertLessEqual(len(generate_filename(doc, basename_max_length=120)), 143) + with pytest.raises(ValueError): + generate_filename(doc, basename_max_length=19) + @override_settings( FILENAME_FORMAT="{original_name}", )