Fix: include application config language settings for dateparser auto-detection (#10722)

2026-01-30 23:08:59 -06:00 · 2025-08-31 15:22:39 -07:00
parent 107374af71
commit cb927c5b22
6 changed files with 223 additions and 186 deletions
--- a/src/paperless/tests/test_settings.py
+++ b/src/paperless/tests/test_settings.py
@@ -6,7 +6,6 @@ from unittest import mock
 import pytest
 from celery.schedules import crontab

-from paperless.settings import _ocr_to_dateparser_languages
 from paperless.settings import _parse_base_paths
 from paperless.settings import _parse_beat_schedule
 from paperless.settings import _parse_dateparser_languages
@@ -476,33 +475,6 @@ class TestPathSettings(TestCase):
        self.assertEqual("/foobar/", base_paths[4])  # LOGOUT_REDIRECT_URL


-@pytest.mark.parametrize(
-    ("ocr_language", "expected"),
-    [
-        # One language
-        ("eng", ["en"]),
-        # Multiple languages
-        ("fra+ita+lao", ["fr", "it", "lo"]),
-        # Languages that don't have a two-letter equivalent
-        ("fil", ["fil"]),
-        # Languages with a script part supported by dateparser
-        ("aze_cyrl+srp_latn", ["az-Cyrl", "sr-Latn"]),
-        # Languages with a script part not supported by dateparser
-        # In this case, default to the language without script
-        ("deu_frak", ["de"]),
-        # Traditional and simplified chinese don't have the same name in dateparser,
-        # so they're converted to the general chinese language
-        ("chi_tra+chi_sim", ["zh"]),
-        # If a language is not supported by dateparser, fallback to the supported ones
-        ("eng+unsupported_language+por", ["en", "pt"]),
-        # If no language is supported, fallback to default
-        ("unsupported1+unsupported2", []),
-    ],
-)
-def test_ocr_to_dateparser_languages(ocr_language, expected):
-    assert sorted(_ocr_to_dateparser_languages(ocr_language)) == sorted(expected)
-
-
@pytest.mark.parametrize(
    ("languages", "expected"),
    [
--- a/src/paperless/tests/test_utils.py
+++ b/src/paperless/tests/test_utils.py
@@ -0,0 +1,52 @@
+import logging
+
+import pytest
+
+from paperless import utils
+from paperless.utils import ocr_to_dateparser_languages
+
+
+@pytest.mark.parametrize(
+    ("ocr_language", "expected"),
+    [
+        # One language
+        ("eng", ["en"]),
+        # Multiple languages
+        ("fra+ita+lao", ["fr", "it", "lo"]),
+        # Languages that don't have a two-letter equivalent
+        ("fil", ["fil"]),
+        # Languages with a script part supported by dateparser
+        ("aze_cyrl+srp_latn", ["az-Cyrl", "sr-Latn"]),
+        # Languages with a script part not supported by dateparser
+        # In this case, default to the language without script
+        ("deu_frak", ["de"]),
+        # Traditional and simplified chinese don't have the same name in dateparser,
+        # so they're converted to the general chinese language
+        ("chi_tra+chi_sim", ["zh"]),
+        # If a language is not supported by dateparser, fallback to the supported ones
+        ("eng+unsupported_language+por", ["en", "pt"]),
+        # If no language is supported, fallback to default
+        ("unsupported1+unsupported2", []),
+        # Duplicate languages, should not duplicate in result
+        ("eng+eng", ["en"]),
+        # Language with script, but script is not mapped
+        ("ita_unknownscript", ["it"]),
+    ],
+)
+def test_ocr_to_dateparser_languages(ocr_language, expected):
+    assert sorted(ocr_to_dateparser_languages(ocr_language)) == sorted(expected)
+
+
+def test_ocr_to_dateparser_languages_exception(monkeypatch, caplog):
+    # Patch LocaleDataLoader.get_locale_map to raise an exception
+    class DummyLoader:
+        def get_locale_map(self, locales=None):
+            raise RuntimeError("Simulated error")
+
+    with caplog.at_level(logging.WARNING):
+        monkeypatch.setattr(utils, "LocaleDataLoader", lambda: DummyLoader())
+        result = utils.ocr_to_dateparser_languages("eng+fra")
+        assert result == []
+        assert (
+            "Set PAPERLESS_DATE_PARSER_LANGUAGES parameter to avoid this" in caplog.text
+        )