mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-01-14 21:54:22 -06:00
Normalize filenames and titles to NFC
This commit is contained in:
@@ -46,6 +46,7 @@ from documents.signals.handlers import run_workflows
|
|||||||
from documents.templating.workflows import parse_w_workflow_placeholders
|
from documents.templating.workflows import parse_w_workflow_placeholders
|
||||||
from documents.utils import copy_basic_file_stats
|
from documents.utils import copy_basic_file_stats
|
||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
|
from documents.utils import normalize_nfc
|
||||||
from documents.utils import run_subprocess
|
from documents.utils import run_subprocess
|
||||||
from paperless_mail.parsers import MailDocumentParser
|
from paperless_mail.parsers import MailDocumentParser
|
||||||
|
|
||||||
@@ -111,7 +112,12 @@ class ConsumerPluginMixin:
|
|||||||
|
|
||||||
self.renew_logging_group()
|
self.renew_logging_group()
|
||||||
|
|
||||||
self.filename = self.metadata.filename or self.input_doc.original_file.name
|
self.metadata.filename = normalize_nfc(self.metadata.filename)
|
||||||
|
self.metadata.title = normalize_nfc(self.metadata.title)
|
||||||
|
|
||||||
|
self.filename = normalize_nfc(
|
||||||
|
self.metadata.filename or self.input_doc.original_file.name,
|
||||||
|
)
|
||||||
|
|
||||||
def _send_progress(
|
def _send_progress(
|
||||||
self,
|
self,
|
||||||
@@ -652,6 +658,8 @@ class ConsumerPlugin(
|
|||||||
f"Error occurred parsing title override '{self.metadata.title}', falling back to original. Exception: {e}",
|
f"Error occurred parsing title override '{self.metadata.title}', falling back to original. Exception: {e}",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
title = normalize_nfc(title)
|
||||||
|
|
||||||
file_for_checksum = (
|
file_for_checksum = (
|
||||||
self.unmodified_original
|
self.unmodified_original
|
||||||
if self.unmodified_original is not None
|
if self.unmodified_original is not None
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from django.conf import settings
|
|||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.templating.filepath import validate_filepath_template_and_render
|
from documents.templating.filepath import validate_filepath_template_and_render
|
||||||
from documents.templating.utils import convert_format_str_to_template_format
|
from documents.templating.utils import convert_format_str_to_template_format
|
||||||
|
from documents.utils import normalize_nfc
|
||||||
|
|
||||||
|
|
||||||
def create_source_path_directory(source_path: Path) -> None:
|
def create_source_path_directory(source_path: Path) -> None:
|
||||||
@@ -55,11 +56,11 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
|
|||||||
"""
|
"""
|
||||||
if archive_filename:
|
if archive_filename:
|
||||||
old_filename: Path | None = (
|
old_filename: Path | None = (
|
||||||
Path(doc.archive_filename) if doc.archive_filename else None
|
Path(normalize_nfc(doc.archive_filename)) if doc.archive_filename else None
|
||||||
)
|
)
|
||||||
root = settings.ARCHIVE_DIR
|
root = settings.ARCHIVE_DIR
|
||||||
else:
|
else:
|
||||||
old_filename = Path(doc.filename) if doc.filename else None
|
old_filename = Path(normalize_nfc(doc.filename)) if doc.filename else None
|
||||||
root = settings.ORIGINALS_DIR
|
root = settings.ORIGINALS_DIR
|
||||||
|
|
||||||
# If generating archive filenames, try to make a name that is similar to
|
# If generating archive filenames, try to make a name that is similar to
|
||||||
@@ -91,7 +92,7 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
|
|||||||
)
|
)
|
||||||
if new_filename == old_filename:
|
if new_filename == old_filename:
|
||||||
# still the same as before.
|
# still the same as before.
|
||||||
return new_filename
|
return Path(normalize_nfc(str(new_filename)))
|
||||||
|
|
||||||
if (root / new_filename).exists():
|
if (root / new_filename).exists():
|
||||||
counter += 1
|
counter += 1
|
||||||
@@ -119,7 +120,7 @@ def format_filename(document: Document, template_str: str) -> str | None:
|
|||||||
"none",
|
"none",
|
||||||
) # backward compatibility
|
) # backward compatibility
|
||||||
|
|
||||||
return rendered_filename
|
return normalize_nfc(rendered_filename)
|
||||||
|
|
||||||
|
|
||||||
def generate_filename(
|
def generate_filename(
|
||||||
@@ -174,4 +175,4 @@ def generate_filename(
|
|||||||
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
|
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
|
||||||
full_path = full_path.with_suffix(full_path.suffix + ".gpg")
|
full_path = full_path.with_suffix(full_path.suffix + ".gpg")
|
||||||
|
|
||||||
return full_path
|
return Path(normalize_nfc(str(full_path)))
|
||||||
|
|||||||
@@ -290,6 +290,23 @@ class TestConsumer(
|
|||||||
|
|
||||||
self._assert_first_last_send_progress()
|
self._assert_first_last_send_progress()
|
||||||
|
|
||||||
|
def test_override_filename_normalized(self):
|
||||||
|
filename = self.get_test_file()
|
||||||
|
override_filename = "Inhaltsu\u0308bersicht.pdf"
|
||||||
|
|
||||||
|
with self.get_consumer(
|
||||||
|
filename,
|
||||||
|
DocumentMetadataOverrides(filename=override_filename),
|
||||||
|
) as consumer:
|
||||||
|
consumer.run()
|
||||||
|
|
||||||
|
document = Document.objects.first()
|
||||||
|
|
||||||
|
self.assertIsNotNone(document)
|
||||||
|
self.assertEqual(document.original_filename, "Inhaltsübersicht.pdf")
|
||||||
|
self.assertEqual(document.title, "Inhaltsübersicht")
|
||||||
|
self._assert_first_last_send_progress()
|
||||||
|
|
||||||
def testOverrideTitle(self):
|
def testOverrideTitle(self):
|
||||||
with self.get_consumer(
|
with self.get_consumer(
|
||||||
self.get_test_file(),
|
self.get_test_file(),
|
||||||
@@ -304,6 +321,25 @@ class TestConsumer(
|
|||||||
self.assertEqual(document.title, "Override Title")
|
self.assertEqual(document.title, "Override Title")
|
||||||
self._assert_first_last_send_progress()
|
self._assert_first_last_send_progress()
|
||||||
|
|
||||||
|
@override_settings(FILENAME_FORMAT="{{ title }}")
|
||||||
|
def test_filename_format_normalized(self):
|
||||||
|
filename = self.get_test_file()
|
||||||
|
title = "Inhaltsu\u0308bersicht Faszination"
|
||||||
|
|
||||||
|
with self.get_consumer(
|
||||||
|
filename,
|
||||||
|
DocumentMetadataOverrides(title=title),
|
||||||
|
) as consumer:
|
||||||
|
consumer.run()
|
||||||
|
|
||||||
|
document = Document.objects.first()
|
||||||
|
|
||||||
|
self.assertIsNotNone(document)
|
||||||
|
self.assertEqual(document.title, "Inhaltsübersicht Faszination")
|
||||||
|
self.assertEqual(document.filename, "Inhaltsübersicht Faszination.pdf")
|
||||||
|
self.assertIsFile(document.source_path)
|
||||||
|
self._assert_first_last_send_progress()
|
||||||
|
|
||||||
def testOverrideCorrespondent(self):
|
def testOverrideCorrespondent(self):
|
||||||
c = Correspondent.objects.create(name="test")
|
c = Correspondent.objects.create(name="test")
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
|
import unicodedata
|
||||||
|
from os import PathLike
|
||||||
from os import utime
|
from os import utime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from subprocess import CompletedProcess
|
from subprocess import CompletedProcess
|
||||||
@@ -16,6 +18,14 @@ def _coerce_to_path(
|
|||||||
return Path(source).resolve(), Path(dest).resolve()
|
return Path(source).resolve(), Path(dest).resolve()
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_nfc(value: str | PathLike[str] | None) -> str | None:
|
||||||
|
"""Return NFC-normalized string for filesystem-safe comparisons."""
|
||||||
|
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
return unicodedata.normalize("NFC", str(value))
|
||||||
|
|
||||||
|
|
||||||
def copy_basic_file_stats(source: Path | str, dest: Path | str) -> None:
|
def copy_basic_file_stats(source: Path | str, dest: Path | str) -> None:
|
||||||
"""
|
"""
|
||||||
Copies only the m_time and a_time attributes from source to destination.
|
Copies only the m_time and a_time attributes from source to destination.
|
||||||
|
|||||||
Reference in New Issue
Block a user