mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-01-12 21:44:21 -06:00
Compare commits
1 Commits
feature-un
...
feature-re
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5fe6c73e34 |
@@ -32,7 +32,7 @@ RUN set -eux \
|
|||||||
# Purpose: Installs s6-overlay and rootfs
|
# Purpose: Installs s6-overlay and rootfs
|
||||||
# Comments:
|
# Comments:
|
||||||
# - Don't leave anything extra in here either
|
# - Don't leave anything extra in here either
|
||||||
FROM ghcr.io/astral-sh/uv:0.9.15-python3.12-trixie-slim AS s6-overlay-base
|
FROM ghcr.io/astral-sh/uv:0.9-python3.12-trixie-slim AS s6-overlay-base
|
||||||
|
|
||||||
WORKDIR /usr/src/s6
|
WORKDIR /usr/src/s6
|
||||||
|
|
||||||
|
|||||||
@@ -46,7 +46,6 @@ from documents.signals.handlers import run_workflows
|
|||||||
from documents.templating.workflows import parse_w_workflow_placeholders
|
from documents.templating.workflows import parse_w_workflow_placeholders
|
||||||
from documents.utils import copy_basic_file_stats
|
from documents.utils import copy_basic_file_stats
|
||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
from documents.utils import normalize_nfc
|
|
||||||
from documents.utils import run_subprocess
|
from documents.utils import run_subprocess
|
||||||
from paperless_mail.parsers import MailDocumentParser
|
from paperless_mail.parsers import MailDocumentParser
|
||||||
|
|
||||||
@@ -112,12 +111,7 @@ class ConsumerPluginMixin:
|
|||||||
|
|
||||||
self.renew_logging_group()
|
self.renew_logging_group()
|
||||||
|
|
||||||
self.metadata.filename = normalize_nfc(self.metadata.filename)
|
self.filename = self.metadata.filename or self.input_doc.original_file.name
|
||||||
self.metadata.title = normalize_nfc(self.metadata.title)
|
|
||||||
|
|
||||||
self.filename = normalize_nfc(
|
|
||||||
self.metadata.filename or self.input_doc.original_file.name,
|
|
||||||
)
|
|
||||||
|
|
||||||
def _send_progress(
|
def _send_progress(
|
||||||
self,
|
self,
|
||||||
@@ -658,8 +652,6 @@ class ConsumerPlugin(
|
|||||||
f"Error occurred parsing title override '{self.metadata.title}', falling back to original. Exception: {e}",
|
f"Error occurred parsing title override '{self.metadata.title}', falling back to original. Exception: {e}",
|
||||||
)
|
)
|
||||||
|
|
||||||
title = normalize_nfc(title)
|
|
||||||
|
|
||||||
file_for_checksum = (
|
file_for_checksum = (
|
||||||
self.unmodified_original
|
self.unmodified_original
|
||||||
if self.unmodified_original is not None
|
if self.unmodified_original is not None
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ from django.conf import settings
|
|||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.templating.filepath import validate_filepath_template_and_render
|
from documents.templating.filepath import validate_filepath_template_and_render
|
||||||
from documents.templating.utils import convert_format_str_to_template_format
|
from documents.templating.utils import convert_format_str_to_template_format
|
||||||
from documents.utils import normalize_nfc
|
|
||||||
|
|
||||||
|
|
||||||
def create_source_path_directory(source_path: Path) -> None:
|
def create_source_path_directory(source_path: Path) -> None:
|
||||||
@@ -56,11 +55,11 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
|
|||||||
"""
|
"""
|
||||||
if archive_filename:
|
if archive_filename:
|
||||||
old_filename: Path | None = (
|
old_filename: Path | None = (
|
||||||
Path(normalize_nfc(doc.archive_filename)) if doc.archive_filename else None
|
Path(doc.archive_filename) if doc.archive_filename else None
|
||||||
)
|
)
|
||||||
root = settings.ARCHIVE_DIR
|
root = settings.ARCHIVE_DIR
|
||||||
else:
|
else:
|
||||||
old_filename = Path(normalize_nfc(doc.filename)) if doc.filename else None
|
old_filename = Path(doc.filename) if doc.filename else None
|
||||||
root = settings.ORIGINALS_DIR
|
root = settings.ORIGINALS_DIR
|
||||||
|
|
||||||
# If generating archive filenames, try to make a name that is similar to
|
# If generating archive filenames, try to make a name that is similar to
|
||||||
@@ -92,7 +91,7 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
|
|||||||
)
|
)
|
||||||
if new_filename == old_filename:
|
if new_filename == old_filename:
|
||||||
# still the same as before.
|
# still the same as before.
|
||||||
return Path(normalize_nfc(str(new_filename)))
|
return new_filename
|
||||||
|
|
||||||
if (root / new_filename).exists():
|
if (root / new_filename).exists():
|
||||||
counter += 1
|
counter += 1
|
||||||
@@ -120,7 +119,7 @@ def format_filename(document: Document, template_str: str) -> str | None:
|
|||||||
"none",
|
"none",
|
||||||
) # backward compatibility
|
) # backward compatibility
|
||||||
|
|
||||||
return normalize_nfc(rendered_filename)
|
return rendered_filename
|
||||||
|
|
||||||
|
|
||||||
def generate_filename(
|
def generate_filename(
|
||||||
@@ -175,4 +174,4 @@ def generate_filename(
|
|||||||
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
|
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
|
||||||
full_path = full_path.with_suffix(full_path.suffix + ".gpg")
|
full_path = full_path.with_suffix(full_path.suffix + ".gpg")
|
||||||
|
|
||||||
return Path(normalize_nfc(str(full_path)))
|
return full_path
|
||||||
|
|||||||
@@ -41,7 +41,6 @@ from documents.models import PaperlessTask
|
|||||||
from documents.models import ShareLink
|
from documents.models import ShareLink
|
||||||
from documents.models import StoragePath
|
from documents.models import StoragePath
|
||||||
from documents.models import Tag
|
from documents.models import Tag
|
||||||
from documents.utils import normalize_nfc
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
@@ -163,11 +162,7 @@ class TitleContentFilter(Filter):
|
|||||||
def filter(self, qs, value):
|
def filter(self, qs, value):
|
||||||
value = value.strip() if isinstance(value, str) else value
|
value = value.strip() if isinstance(value, str) else value
|
||||||
if value:
|
if value:
|
||||||
normalized = normalize_nfc(value) or ""
|
return qs.filter(Q(title__icontains=value) | Q(content__icontains=value))
|
||||||
folded = normalized.casefold()
|
|
||||||
return qs.filter(
|
|
||||||
Q(title__icontains=folded) | Q(content__icontains=folded),
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
return qs
|
return qs
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ from __future__ import annotations
|
|||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
import unicodedata
|
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@@ -59,14 +58,6 @@ if TYPE_CHECKING:
|
|||||||
logger = logging.getLogger("paperless.index")
|
logger = logging.getLogger("paperless.index")
|
||||||
|
|
||||||
|
|
||||||
def _normalize_for_index(value: str | None) -> str | None:
|
|
||||||
"""Normalize text to NFC for consistent search/index matching."""
|
|
||||||
|
|
||||||
if value is None:
|
|
||||||
return None
|
|
||||||
return unicodedata.normalize("NFC", value)
|
|
||||||
|
|
||||||
|
|
||||||
def get_schema() -> Schema:
|
def get_schema() -> Schema:
|
||||||
return Schema(
|
return Schema(
|
||||||
id=NUMERIC(stored=True, unique=True),
|
id=NUMERIC(stored=True, unique=True),
|
||||||
@@ -172,41 +163,37 @@ def update_document(writer: AsyncWriter, doc: Document) -> None:
|
|||||||
viewer_ids: str = ",".join([str(u.id) for u in users_with_perms])
|
viewer_ids: str = ",".join([str(u.id) for u in users_with_perms])
|
||||||
writer.update_document(
|
writer.update_document(
|
||||||
id=doc.pk,
|
id=doc.pk,
|
||||||
title=_normalize_for_index(doc.title),
|
title=doc.title,
|
||||||
content=_normalize_for_index(doc.content),
|
content=doc.content,
|
||||||
correspondent=_normalize_for_index(
|
correspondent=doc.correspondent.name if doc.correspondent else None,
|
||||||
doc.correspondent.name if doc.correspondent else None,
|
|
||||||
),
|
|
||||||
correspondent_id=doc.correspondent.id if doc.correspondent else None,
|
correspondent_id=doc.correspondent.id if doc.correspondent else None,
|
||||||
has_correspondent=doc.correspondent is not None,
|
has_correspondent=doc.correspondent is not None,
|
||||||
tag=_normalize_for_index(tags) if tags else None,
|
tag=tags if tags else None,
|
||||||
tag_id=tags_ids if tags_ids else None,
|
tag_id=tags_ids if tags_ids else None,
|
||||||
has_tag=len(tags) > 0,
|
has_tag=len(tags) > 0,
|
||||||
type=_normalize_for_index(
|
type=doc.document_type.name if doc.document_type else None,
|
||||||
doc.document_type.name if doc.document_type else None,
|
|
||||||
),
|
|
||||||
type_id=doc.document_type.id if doc.document_type else None,
|
type_id=doc.document_type.id if doc.document_type else None,
|
||||||
has_type=doc.document_type is not None,
|
has_type=doc.document_type is not None,
|
||||||
created=datetime.combine(doc.created, time.min),
|
created=datetime.combine(doc.created, time.min),
|
||||||
added=doc.added,
|
added=doc.added,
|
||||||
asn=asn,
|
asn=asn,
|
||||||
modified=doc.modified,
|
modified=doc.modified,
|
||||||
path=_normalize_for_index(doc.storage_path.name if doc.storage_path else None),
|
path=doc.storage_path.name if doc.storage_path else None,
|
||||||
path_id=doc.storage_path.id if doc.storage_path else None,
|
path_id=doc.storage_path.id if doc.storage_path else None,
|
||||||
has_path=doc.storage_path is not None,
|
has_path=doc.storage_path is not None,
|
||||||
notes=_normalize_for_index(notes),
|
notes=notes,
|
||||||
num_notes=len(notes),
|
num_notes=len(notes),
|
||||||
custom_fields=_normalize_for_index(custom_fields),
|
custom_fields=custom_fields,
|
||||||
custom_field_count=len(doc.custom_fields.all()),
|
custom_field_count=len(doc.custom_fields.all()),
|
||||||
has_custom_fields=len(custom_fields) > 0,
|
has_custom_fields=len(custom_fields) > 0,
|
||||||
custom_fields_id=custom_fields_ids if custom_fields_ids else None,
|
custom_fields_id=custom_fields_ids if custom_fields_ids else None,
|
||||||
owner=_normalize_for_index(doc.owner.username if doc.owner else None),
|
owner=doc.owner.username if doc.owner else None,
|
||||||
owner_id=doc.owner.id if doc.owner else None,
|
owner_id=doc.owner.id if doc.owner else None,
|
||||||
has_owner=doc.owner is not None,
|
has_owner=doc.owner is not None,
|
||||||
viewer_id=viewer_ids if viewer_ids else None,
|
viewer_id=viewer_ids if viewer_ids else None,
|
||||||
checksum=doc.checksum,
|
checksum=doc.checksum,
|
||||||
page_count=doc.page_count,
|
page_count=doc.page_count,
|
||||||
original_filename=_normalize_for_index(doc.original_filename),
|
original_filename=doc.original_filename,
|
||||||
is_shared=len(viewer_ids) > 0,
|
is_shared=len(viewer_ids) > 0,
|
||||||
)
|
)
|
||||||
logger.debug(f"Index updated for document {doc.pk}.")
|
logger.debug(f"Index updated for document {doc.pk}.")
|
||||||
@@ -434,7 +421,7 @@ class LocalDateParser(English):
|
|||||||
|
|
||||||
class DelayedFullTextQuery(DelayedQuery):
|
class DelayedFullTextQuery(DelayedQuery):
|
||||||
def _get_query(self) -> tuple:
|
def _get_query(self) -> tuple:
|
||||||
q_str = _normalize_for_index(self.query_params["query"]) or ""
|
q_str = self.query_params["query"]
|
||||||
q_str = rewrite_natural_date_keywords(q_str)
|
q_str = rewrite_natural_date_keywords(q_str)
|
||||||
qp = MultifieldParser(
|
qp = MultifieldParser(
|
||||||
[
|
[
|
||||||
@@ -473,12 +460,7 @@ class DelayedFullTextQuery(DelayedQuery):
|
|||||||
class DelayedMoreLikeThisQuery(DelayedQuery):
|
class DelayedMoreLikeThisQuery(DelayedQuery):
|
||||||
def _get_query(self) -> tuple:
|
def _get_query(self) -> tuple:
|
||||||
more_like_doc_id = int(self.query_params["more_like_id"])
|
more_like_doc_id = int(self.query_params["more_like_id"])
|
||||||
content = (
|
content = Document.objects.get(id=more_like_doc_id).content
|
||||||
_normalize_for_index(
|
|
||||||
Document.objects.get(id=more_like_doc_id).content,
|
|
||||||
)
|
|
||||||
or ""
|
|
||||||
)
|
|
||||||
|
|
||||||
docnum = self.searcher.document_number(id=more_like_doc_id)
|
docnum = self.searcher.document_number(id=more_like_doc_id)
|
||||||
kts = self.searcher.key_terms_from_text(
|
kts = self.searcher.key_terms_from_text(
|
||||||
@@ -506,7 +488,6 @@ def autocomplete(
|
|||||||
Mimics whoosh.reading.IndexReader.most_distinctive_terms with permissions
|
Mimics whoosh.reading.IndexReader.most_distinctive_terms with permissions
|
||||||
and without scoring
|
and without scoring
|
||||||
"""
|
"""
|
||||||
term = _normalize_for_index(term) or ""
|
|
||||||
terms = []
|
terms = []
|
||||||
|
|
||||||
with ix.searcher(weighting=TF_IDF()) as s:
|
with ix.searcher(weighting=TF_IDF()) as s:
|
||||||
|
|||||||
@@ -2,12 +2,10 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import unicodedata
|
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
from fnmatch import translate as fnmatch_translate
|
from fnmatch import translate as fnmatch_translate
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from django.db.models import Q
|
|
||||||
from rest_framework import serializers
|
from rest_framework import serializers
|
||||||
|
|
||||||
from documents.data_models import ConsumableDocument
|
from documents.data_models import ConsumableDocument
|
||||||
@@ -23,7 +21,6 @@ from documents.models import Workflow
|
|||||||
from documents.models import WorkflowTrigger
|
from documents.models import WorkflowTrigger
|
||||||
from documents.permissions import get_objects_for_user_owner_aware
|
from documents.permissions import get_objects_for_user_owner_aware
|
||||||
from documents.regex import safe_regex_search
|
from documents.regex import safe_regex_search
|
||||||
from documents.utils import normalize_nfc
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from django.db.models import QuerySet
|
from django.db.models import QuerySet
|
||||||
@@ -33,34 +30,6 @@ if TYPE_CHECKING:
|
|||||||
logger = logging.getLogger("paperless.matching")
|
logger = logging.getLogger("paperless.matching")
|
||||||
|
|
||||||
|
|
||||||
def _normalize_glob_value(value: str) -> str:
|
|
||||||
"""Normalize strings for glob-style matching (case-insensitive)."""
|
|
||||||
|
|
||||||
return (normalize_nfc(value) or "").casefold()
|
|
||||||
|
|
||||||
|
|
||||||
def _normalized_fnmatch(name: str, pattern: str) -> bool:
|
|
||||||
"""Canonicalize Unicode and compare using fnmatch semantics."""
|
|
||||||
|
|
||||||
return fnmatch(_normalize_glob_value(name), _normalize_glob_value(pattern))
|
|
||||||
|
|
||||||
|
|
||||||
def _glob_regex_variants(pattern: str) -> list[str]:
|
|
||||||
"""
|
|
||||||
Build regex patterns that match both NFC and NFD forms of a glob pattern.
|
|
||||||
Using both forms lets DB prefilters remain Unicode-normalization agnostic.
|
|
||||||
"""
|
|
||||||
|
|
||||||
regexes = set()
|
|
||||||
for normalized in {
|
|
||||||
normalize_nfc(pattern) or "",
|
|
||||||
unicodedata.normalize("NFD", pattern),
|
|
||||||
}:
|
|
||||||
regex = fnmatch_translate(normalized).lstrip("^").rstrip("$")
|
|
||||||
regexes.add(regex)
|
|
||||||
return list(regexes)
|
|
||||||
|
|
||||||
|
|
||||||
def log_reason(
|
def log_reason(
|
||||||
matching_model: MatchingModel | WorkflowTrigger,
|
matching_model: MatchingModel | WorkflowTrigger,
|
||||||
document: Document,
|
document: Document,
|
||||||
@@ -336,9 +305,9 @@ def consumable_document_matches_workflow(
|
|||||||
if (
|
if (
|
||||||
trigger.filter_filename is not None
|
trigger.filter_filename is not None
|
||||||
and len(trigger.filter_filename) > 0
|
and len(trigger.filter_filename) > 0
|
||||||
and not _normalized_fnmatch(
|
and not fnmatch(
|
||||||
document.original_file.name,
|
document.original_file.name.lower(),
|
||||||
trigger.filter_filename,
|
trigger.filter_filename.lower(),
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
reason = (
|
reason = (
|
||||||
@@ -359,7 +328,7 @@ def consumable_document_matches_workflow(
|
|||||||
if (
|
if (
|
||||||
trigger.filter_path is not None
|
trigger.filter_path is not None
|
||||||
and len(trigger.filter_path) > 0
|
and len(trigger.filter_path) > 0
|
||||||
and not _normalized_fnmatch(
|
and not fnmatch(
|
||||||
match_against,
|
match_against,
|
||||||
trigger.filter_path,
|
trigger.filter_path,
|
||||||
)
|
)
|
||||||
@@ -523,9 +492,9 @@ def existing_document_matches_workflow(
|
|||||||
trigger.filter_filename is not None
|
trigger.filter_filename is not None
|
||||||
and len(trigger.filter_filename) > 0
|
and len(trigger.filter_filename) > 0
|
||||||
and document.original_filename is not None
|
and document.original_filename is not None
|
||||||
and not _normalized_fnmatch(
|
and not fnmatch(
|
||||||
document.original_filename,
|
document.original_filename.lower(),
|
||||||
trigger.filter_filename,
|
trigger.filter_filename.lower(),
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
return (
|
return (
|
||||||
@@ -604,11 +573,8 @@ def prefilter_documents_by_workflowtrigger(
|
|||||||
documents = documents.annotate(**annotations).filter(custom_field_q)
|
documents = documents.annotate(**annotations).filter(custom_field_q)
|
||||||
|
|
||||||
if trigger.filter_filename:
|
if trigger.filter_filename:
|
||||||
regexes = _glob_regex_variants(trigger.filter_filename)
|
regex = fnmatch_translate(trigger.filter_filename).lstrip("^").rstrip("$")
|
||||||
filename_q = Q()
|
documents = documents.filter(original_filename__iregex=regex)
|
||||||
for regex in regexes:
|
|
||||||
filename_q |= Q(original_filename__iregex=regex)
|
|
||||||
documents = documents.filter(filename_q)
|
|
||||||
|
|
||||||
return documents
|
return documents
|
||||||
|
|
||||||
|
|||||||
@@ -89,23 +89,6 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
|||||||
self.assertEqual(len(results), 0)
|
self.assertEqual(len(results), 0)
|
||||||
self.assertCountEqual(response.data["all"], [])
|
self.assertCountEqual(response.data["all"], [])
|
||||||
|
|
||||||
def test_search_handles_diacritics_normalization(self):
|
|
||||||
doc = Document.objects.create(
|
|
||||||
title="certida\u0303o de nascimento",
|
|
||||||
content="birth record without keyword",
|
|
||||||
checksum="D",
|
|
||||||
pk=10,
|
|
||||||
)
|
|
||||||
with AsyncWriter(index.open_index()) as writer:
|
|
||||||
index.update_document(writer, doc)
|
|
||||||
|
|
||||||
response = self.client.get("/api/documents/?query=certidão")
|
|
||||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
|
||||||
results = response.data["results"]
|
|
||||||
self.assertEqual(response.data["count"], 1)
|
|
||||||
self.assertEqual(len(results), 1)
|
|
||||||
self.assertEqual(results[0]["id"], doc.id)
|
|
||||||
|
|
||||||
def test_search_custom_field_ordering(self):
|
def test_search_custom_field_ordering(self):
|
||||||
custom_field = CustomField.objects.create(
|
custom_field = CustomField.objects.create(
|
||||||
name="Sortable field",
|
name="Sortable field",
|
||||||
|
|||||||
@@ -290,23 +290,6 @@ class TestConsumer(
|
|||||||
|
|
||||||
self._assert_first_last_send_progress()
|
self._assert_first_last_send_progress()
|
||||||
|
|
||||||
def test_override_filename_normalized(self):
|
|
||||||
filename = self.get_test_file()
|
|
||||||
override_filename = "Inhaltsu\u0308bersicht.pdf"
|
|
||||||
|
|
||||||
with self.get_consumer(
|
|
||||||
filename,
|
|
||||||
DocumentMetadataOverrides(filename=override_filename),
|
|
||||||
) as consumer:
|
|
||||||
consumer.run()
|
|
||||||
|
|
||||||
document = Document.objects.first()
|
|
||||||
|
|
||||||
self.assertIsNotNone(document)
|
|
||||||
self.assertEqual(document.original_filename, "Inhaltsübersicht.pdf")
|
|
||||||
self.assertEqual(document.title, "Inhaltsübersicht")
|
|
||||||
self._assert_first_last_send_progress()
|
|
||||||
|
|
||||||
def testOverrideTitle(self):
|
def testOverrideTitle(self):
|
||||||
with self.get_consumer(
|
with self.get_consumer(
|
||||||
self.get_test_file(),
|
self.get_test_file(),
|
||||||
@@ -321,25 +304,6 @@ class TestConsumer(
|
|||||||
self.assertEqual(document.title, "Override Title")
|
self.assertEqual(document.title, "Override Title")
|
||||||
self._assert_first_last_send_progress()
|
self._assert_first_last_send_progress()
|
||||||
|
|
||||||
@override_settings(FILENAME_FORMAT="{{ title }}")
|
|
||||||
def test_filename_format_normalized(self):
|
|
||||||
filename = self.get_test_file()
|
|
||||||
title = "Inhaltsu\u0308bersicht Faszination"
|
|
||||||
|
|
||||||
with self.get_consumer(
|
|
||||||
filename,
|
|
||||||
DocumentMetadataOverrides(title=title),
|
|
||||||
) as consumer:
|
|
||||||
consumer.run()
|
|
||||||
|
|
||||||
document = Document.objects.first()
|
|
||||||
|
|
||||||
self.assertIsNotNone(document)
|
|
||||||
self.assertEqual(document.title, "Inhaltsübersicht Faszination")
|
|
||||||
self.assertEqual(document.filename, "Inhaltsübersicht Faszination.pdf")
|
|
||||||
self.assertIsFile(document.source_path)
|
|
||||||
self._assert_first_last_send_progress()
|
|
||||||
|
|
||||||
def testOverrideCorrespondent(self):
|
def testOverrideCorrespondent(self):
|
||||||
c = Correspondent.objects.create(name="test")
|
c = Correspondent.objects.create(name="test")
|
||||||
|
|
||||||
|
|||||||
@@ -557,50 +557,6 @@ class TestWorkflows(
|
|||||||
expected_str = f"Document filename {test_file.name} does not match"
|
expected_str = f"Document filename {test_file.name} does not match"
|
||||||
self.assertIn(expected_str, cm.output[1])
|
self.assertIn(expected_str, cm.output[1])
|
||||||
|
|
||||||
def test_workflow_match_filename_diacritics_normalized(self):
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Consumption workflow filtering on filename with diacritics
|
|
||||||
WHEN:
|
|
||||||
- File with decomposed Unicode filename is consumed
|
|
||||||
THEN:
|
|
||||||
- Workflow still matches and applies overrides
|
|
||||||
"""
|
|
||||||
trigger = WorkflowTrigger.objects.create(
|
|
||||||
type=WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
|
|
||||||
sources=f"{DocumentSource.ApiUpload},{DocumentSource.ConsumeFolder},{DocumentSource.MailFetch}",
|
|
||||||
filter_filename="*račun*",
|
|
||||||
)
|
|
||||||
action = WorkflowAction.objects.create(
|
|
||||||
assign_title="Diacritics matched",
|
|
||||||
)
|
|
||||||
action.save()
|
|
||||||
|
|
||||||
w = Workflow.objects.create(
|
|
||||||
name="Workflow 1",
|
|
||||||
order=0,
|
|
||||||
)
|
|
||||||
w.triggers.add(trigger)
|
|
||||||
w.actions.add(action)
|
|
||||||
w.save()
|
|
||||||
|
|
||||||
decomposed_name = "rac\u030cun.pdf"
|
|
||||||
test_file = shutil.copy(
|
|
||||||
self.SAMPLE_DIR / "simple.pdf",
|
|
||||||
self.dirs.scratch_dir / decomposed_name,
|
|
||||||
)
|
|
||||||
|
|
||||||
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
|
||||||
tasks.consume_file(
|
|
||||||
ConsumableDocument(
|
|
||||||
source=DocumentSource.ConsumeFolder,
|
|
||||||
original_file=test_file,
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
document = Document.objects.first()
|
|
||||||
self.assertEqual(document.title, "Diacritics matched")
|
|
||||||
|
|
||||||
def test_workflow_no_match_path(self):
|
def test_workflow_no_match_path(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -990,35 +946,6 @@ class TestWorkflows(
|
|||||||
self.assertEqual(doc.correspondent, self.c2)
|
self.assertEqual(doc.correspondent, self.c2)
|
||||||
self.assertEqual(doc.title, f"Doc created in {created.year}")
|
self.assertEqual(doc.title, f"Doc created in {created.year}")
|
||||||
|
|
||||||
def test_document_added_filename_diacritics_normalized(self):
|
|
||||||
trigger = WorkflowTrigger.objects.create(
|
|
||||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
|
|
||||||
filter_filename="*račun*",
|
|
||||||
)
|
|
||||||
action = WorkflowAction.objects.create(
|
|
||||||
assign_title="Matched diacritics",
|
|
||||||
)
|
|
||||||
w = Workflow.objects.create(
|
|
||||||
name="Workflow 1",
|
|
||||||
order=0,
|
|
||||||
)
|
|
||||||
w.triggers.add(trigger)
|
|
||||||
w.actions.add(action)
|
|
||||||
w.save()
|
|
||||||
|
|
||||||
doc = Document.objects.create(
|
|
||||||
title="sample test",
|
|
||||||
correspondent=self.c,
|
|
||||||
original_filename="rac\u030cun.pdf",
|
|
||||||
)
|
|
||||||
|
|
||||||
document_consumption_finished.send(
|
|
||||||
sender=self.__class__,
|
|
||||||
document=doc,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(doc.title, "Matched diacritics")
|
|
||||||
|
|
||||||
def test_document_added_no_match_filename(self):
|
def test_document_added_no_match_filename(self):
|
||||||
trigger = WorkflowTrigger.objects.create(
|
trigger = WorkflowTrigger.objects.create(
|
||||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
|
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
import unicodedata
|
|
||||||
from os import PathLike
|
|
||||||
from os import utime
|
from os import utime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from subprocess import CompletedProcess
|
from subprocess import CompletedProcess
|
||||||
@@ -18,14 +16,6 @@ def _coerce_to_path(
|
|||||||
return Path(source).resolve(), Path(dest).resolve()
|
return Path(source).resolve(), Path(dest).resolve()
|
||||||
|
|
||||||
|
|
||||||
def normalize_nfc(value: str | PathLike[str] | None) -> str | None:
|
|
||||||
"""Return NFC-normalized string for filesystem-safe comparisons."""
|
|
||||||
|
|
||||||
if value is None:
|
|
||||||
return None
|
|
||||||
return unicodedata.normalize("NFC", str(value))
|
|
||||||
|
|
||||||
|
|
||||||
def copy_basic_file_stats(source: Path | str, dest: Path | str) -> None:
|
def copy_basic_file_stats(source: Path | str, dest: Path | str) -> None:
|
||||||
"""
|
"""
|
||||||
Copies only the m_time and a_time attributes from source to destination.
|
Copies only the m_time and a_time attributes from source to destination.
|
||||||
|
|||||||
Reference in New Issue
Block a user