Compare commits

...

4 Commits

Author SHA1 Message Date
GitHub Actions
a6e41b4145 Auto translate strings 2025-08-31 22:25:05 +00:00
shamoon
cb927c5b22 Fix: include application config language settings for dateparser auto-detection (#10722) 2025-08-31 15:22:39 -07:00
GitHub Actions
107374af71 Auto translate strings 2025-08-30 16:11:17 +00:00
shamoon
a77141e133 Fix: ensure title gets marked as dirty 2025-08-30 09:09:43 -07:00
9 changed files with 291 additions and 253 deletions

View File

@@ -2544,11 +2544,11 @@
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1017</context> <context context-type="linenumber">1018</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1382</context> <context context-type="linenumber">1383</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -3156,7 +3156,7 @@
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">970</context> <context context-type="linenumber">971</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -6579,7 +6579,7 @@
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1381</context> <context context-type="linenumber">1382</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="6490688569532630280" datatype="html"> <trans-unit id="6490688569532630280" datatype="html">
@@ -6904,21 +6904,21 @@
<source>Next document</source> <source>Next document</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">573</context> <context context-type="linenumber">574</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="651985345816518480" datatype="html"> <trans-unit id="651985345816518480" datatype="html">
<source>Previous document</source> <source>Previous document</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">583</context> <context context-type="linenumber">584</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="2885986061416655600" datatype="html"> <trans-unit id="2885986061416655600" datatype="html">
<source>Close document</source> <source>Close document</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">591</context> <context context-type="linenumber">592</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/services/open-documents.service.ts</context> <context context-type="sourcefile">src/app/services/open-documents.service.ts</context>
@@ -6929,67 +6929,67 @@
<source>Save document</source> <source>Save document</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">598</context> <context context-type="linenumber">599</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="1784543155727940353" datatype="html"> <trans-unit id="1784543155727940353" datatype="html">
<source>Save and close / next</source> <source>Save and close / next</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">607</context> <context context-type="linenumber">608</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="5758784066858623886" datatype="html"> <trans-unit id="5758784066858623886" datatype="html">
<source>Error retrieving metadata</source> <source>Error retrieving metadata</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">659</context> <context context-type="linenumber">660</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="3456881259945295697" datatype="html"> <trans-unit id="3456881259945295697" datatype="html">
<source>Error retrieving suggestions.</source> <source>Error retrieving suggestions.</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">688</context> <context context-type="linenumber">689</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="2194092841814123758" datatype="html"> <trans-unit id="2194092841814123758" datatype="html">
<source>Document &quot;<x id="PH" equiv-text="newValues.title"/>&quot; saved successfully.</source> <source>Document &quot;<x id="PH" equiv-text="newValues.title"/>&quot; saved successfully.</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">860</context> <context context-type="linenumber">861</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">884</context> <context context-type="linenumber">885</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="6626387786259219838" datatype="html"> <trans-unit id="6626387786259219838" datatype="html">
<source>Error saving document &quot;<x id="PH" equiv-text="this.document.title"/>&quot;</source> <source>Error saving document &quot;<x id="PH" equiv-text="this.document.title"/>&quot;</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">890</context> <context context-type="linenumber">891</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="448882439049417053" datatype="html"> <trans-unit id="448882439049417053" datatype="html">
<source>Error saving document</source> <source>Error saving document</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">939</context> <context context-type="linenumber">940</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="8410796510716511826" datatype="html"> <trans-unit id="8410796510716511826" datatype="html">
<source>Do you really want to move the document &quot;<x id="PH" equiv-text="this.document.title"/>&quot; to the trash?</source> <source>Do you really want to move the document &quot;<x id="PH" equiv-text="this.document.title"/>&quot; to the trash?</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">971</context> <context context-type="linenumber">972</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="282586936710748252" datatype="html"> <trans-unit id="282586936710748252" datatype="html">
<source>Documents can be restored prior to permanent deletion.</source> <source>Documents can be restored prior to permanent deletion.</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">972</context> <context context-type="linenumber">973</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -7000,7 +7000,7 @@
<source>Move to trash</source> <source>Move to trash</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">974</context> <context context-type="linenumber">975</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -7011,14 +7011,14 @@
<source>Error deleting document</source> <source>Error deleting document</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">993</context> <context context-type="linenumber">994</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="619486176823357521" datatype="html"> <trans-unit id="619486176823357521" datatype="html">
<source>Reprocess confirm</source> <source>Reprocess confirm</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1013</context> <context context-type="linenumber">1014</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -7029,67 +7029,67 @@
<source>This operation will permanently recreate the archive file for this document.</source> <source>This operation will permanently recreate the archive file for this document.</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1014</context> <context context-type="linenumber">1015</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="302054111564709516" datatype="html"> <trans-unit id="302054111564709516" datatype="html">
<source>The archive file will be re-generated with the current settings.</source> <source>The archive file will be re-generated with the current settings.</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1015</context> <context context-type="linenumber">1016</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="8251197608401006898" datatype="html"> <trans-unit id="8251197608401006898" datatype="html">
<source>Reprocess operation for &quot;<x id="PH" equiv-text="this.document.title"/>&quot; will begin in the background. Close and re-open or reload this document after the operation has completed to see new content.</source> <source>Reprocess operation for &quot;<x id="PH" equiv-text="this.document.title"/>&quot; will begin in the background. Close and re-open or reload this document after the operation has completed to see new content.</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1025</context> <context context-type="linenumber">1026</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="4409560272830824468" datatype="html"> <trans-unit id="4409560272830824468" datatype="html">
<source>Error executing operation</source> <source>Error executing operation</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1036</context> <context context-type="linenumber">1037</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="6030453331794586802" datatype="html"> <trans-unit id="6030453331794586802" datatype="html">
<source>Error downloading document</source> <source>Error downloading document</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1085</context> <context context-type="linenumber">1086</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="4458954481601077369" datatype="html"> <trans-unit id="4458954481601077369" datatype="html">
<source>Page Fit</source> <source>Page Fit</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1162</context> <context context-type="linenumber">1163</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="4663705961777238777" datatype="html"> <trans-unit id="4663705961777238777" datatype="html">
<source>PDF edit operation for &quot;<x id="PH" equiv-text="this.document.title"/>&quot; will begin in the background.</source> <source>PDF edit operation for &quot;<x id="PH" equiv-text="this.document.title"/>&quot; will begin in the background.</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1400</context> <context context-type="linenumber">1401</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="9043972994040261999" datatype="html"> <trans-unit id="9043972994040261999" datatype="html">
<source>Error executing PDF edit operation</source> <source>Error executing PDF edit operation</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1412</context> <context context-type="linenumber">1413</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="6085793215710522488" datatype="html"> <trans-unit id="6085793215710522488" datatype="html">
<source>An error occurred loading tiff: <x id="PH" equiv-text="err.toString()"/></source> <source>An error occurred loading tiff: <x id="PH" equiv-text="err.toString()"/></source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1479</context> <context context-type="linenumber">1480</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1483</context> <context context-type="linenumber">1484</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="4958946940233632319" datatype="html"> <trans-unit id="4958946940233632319" datatype="html">

View File

@@ -472,6 +472,7 @@ export class DocumentDetailComponent
if (titleValue !== this.titleInput.value) return if (titleValue !== this.titleInput.value) return
this.title = titleValue this.title = titleValue
this.documentForm.patchValue({ title: titleValue }) this.documentForm.patchValue({ title: titleValue })
this.documentForm.get('title').markAsDirty()
}) })
this.setupDirtyTracking(useDoc, doc) this.setupDirtyTracking(useDoc, doc)
}, },

View File

@@ -19,6 +19,8 @@ from documents.loggers import LoggingMixin
from documents.signals import document_consumer_declaration from documents.signals import document_consumer_declaration
from documents.utils import copy_file_with_basic_stats from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess from documents.utils import run_subprocess
from paperless.config import OcrConfig
from paperless.utils import ocr_to_dateparser_languages
if TYPE_CHECKING: if TYPE_CHECKING:
import datetime import datetime
@@ -272,6 +274,11 @@ def parse_date_generator(filename, text) -> Iterator[datetime.datetime]:
""" """
import dateparser import dateparser
ocr_config = OcrConfig()
languages = settings.DATE_PARSER_LANGUAGES or ocr_to_dateparser_languages(
ocr_config.language,
)
return dateparser.parse( return dateparser.parse(
ds, ds,
settings={ settings={
@@ -280,7 +287,7 @@ def parse_date_generator(filename, text) -> Iterator[datetime.datetime]:
"RETURN_AS_TIMEZONE_AWARE": True, "RETURN_AS_TIMEZONE_AWARE": True,
"TIMEZONE": settings.TIME_ZONE, "TIMEZONE": settings.TIME_ZONE,
}, },
locales=settings.DATE_PARSER_LANGUAGES, locales=languages,
) )
def __filter(date: datetime.datetime) -> datetime.datetime | None: def __filter(date: datetime.datetime) -> datetime.datetime | None:

View File

@@ -1,12 +1,14 @@
import datetime import datetime
from zoneinfo import ZoneInfo from zoneinfo import ZoneInfo
import pytest
from pytest_django.fixtures import SettingsWrapper from pytest_django.fixtures import SettingsWrapper
from documents.parsers import parse_date from documents.parsers import parse_date
from documents.parsers import parse_date_generator from documents.parsers import parse_date_generator
@pytest.mark.django_db()
class TestDate: class TestDate:
def test_date_format_1(self): def test_date_format_1(self):
text = "lorem ipsum 130218 lorem ipsum" text = "lorem ipsum 130218 lorem ipsum"
@@ -49,7 +51,7 @@ class TestDate:
settings: SettingsWrapper, settings: SettingsWrapper,
settings_timezone: ZoneInfo, settings_timezone: ZoneInfo,
): ):
settings.DATE_PARSER_LANGUAGES = [] settings.DATE_PARSER_LANGUAGES = ["de"]
text = "lorem ipsum\nMärz 2019\nlorem ipsum" text = "lorem ipsum\nMärz 2019\nlorem ipsum"
date = parse_date("", text) date = parse_date("", text)
assert date == datetime.datetime(2019, 3, 1, 0, 0, tzinfo=settings_timezone) assert date == datetime.datetime(2019, 3, 1, 0, 0, tzinfo=settings_timezone)

View File

@@ -2,7 +2,7 @@ msgid ""
msgstr "" msgstr ""
"Project-Id-Version: paperless-ngx\n" "Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n" "Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-08-16 14:34+0000\n" "POT-Creation-Date: 2025-08-31 22:24+0000\n"
"PO-Revision-Date: 2022-02-17 04:17\n" "PO-Revision-Date: 2022-02-17 04:17\n"
"Last-Translator: \n" "Last-Translator: \n"
"Language-Team: English\n" "Language-Team: English\n"
@@ -1645,147 +1645,147 @@ msgstr ""
msgid "paperless application settings" msgid "paperless application settings"
msgstr "" msgstr ""
#: paperless/settings.py:774 #: paperless/settings.py:772
msgid "English (US)" msgid "English (US)"
msgstr "" msgstr ""
#: paperless/settings.py:775 #: paperless/settings.py:773
msgid "Arabic" msgid "Arabic"
msgstr "" msgstr ""
#: paperless/settings.py:776 #: paperless/settings.py:774
msgid "Afrikaans" msgid "Afrikaans"
msgstr "" msgstr ""
#: paperless/settings.py:777 #: paperless/settings.py:775
msgid "Belarusian" msgid "Belarusian"
msgstr "" msgstr ""
#: paperless/settings.py:778 #: paperless/settings.py:776
msgid "Bulgarian" msgid "Bulgarian"
msgstr "" msgstr ""
#: paperless/settings.py:779 #: paperless/settings.py:777
msgid "Catalan" msgid "Catalan"
msgstr "" msgstr ""
#: paperless/settings.py:780 #: paperless/settings.py:778
msgid "Czech" msgid "Czech"
msgstr "" msgstr ""
#: paperless/settings.py:781 #: paperless/settings.py:779
msgid "Danish" msgid "Danish"
msgstr "" msgstr ""
#: paperless/settings.py:782 #: paperless/settings.py:780
msgid "German" msgid "German"
msgstr "" msgstr ""
#: paperless/settings.py:783 #: paperless/settings.py:781
msgid "Greek" msgid "Greek"
msgstr "" msgstr ""
#: paperless/settings.py:784 #: paperless/settings.py:782
msgid "English (GB)" msgid "English (GB)"
msgstr "" msgstr ""
#: paperless/settings.py:785 #: paperless/settings.py:783
msgid "Spanish" msgid "Spanish"
msgstr "" msgstr ""
#: paperless/settings.py:786 #: paperless/settings.py:784
msgid "Persian" msgid "Persian"
msgstr "" msgstr ""
#: paperless/settings.py:787 #: paperless/settings.py:785
msgid "Finnish" msgid "Finnish"
msgstr "" msgstr ""
#: paperless/settings.py:788 #: paperless/settings.py:786
msgid "French" msgid "French"
msgstr "" msgstr ""
#: paperless/settings.py:789 #: paperless/settings.py:787
msgid "Hungarian" msgid "Hungarian"
msgstr "" msgstr ""
#: paperless/settings.py:790 #: paperless/settings.py:788
msgid "Italian" msgid "Italian"
msgstr "" msgstr ""
#: paperless/settings.py:791 #: paperless/settings.py:789
msgid "Japanese" msgid "Japanese"
msgstr "" msgstr ""
#: paperless/settings.py:792 #: paperless/settings.py:790
msgid "Korean" msgid "Korean"
msgstr "" msgstr ""
#: paperless/settings.py:793 #: paperless/settings.py:791
msgid "Luxembourgish" msgid "Luxembourgish"
msgstr "" msgstr ""
#: paperless/settings.py:794 #: paperless/settings.py:792
msgid "Norwegian" msgid "Norwegian"
msgstr "" msgstr ""
#: paperless/settings.py:795 #: paperless/settings.py:793
msgid "Dutch" msgid "Dutch"
msgstr "" msgstr ""
#: paperless/settings.py:796 #: paperless/settings.py:794
msgid "Polish" msgid "Polish"
msgstr "" msgstr ""
#: paperless/settings.py:797 #: paperless/settings.py:795
msgid "Portuguese (Brazil)" msgid "Portuguese (Brazil)"
msgstr "" msgstr ""
#: paperless/settings.py:798 #: paperless/settings.py:796
msgid "Portuguese" msgid "Portuguese"
msgstr "" msgstr ""
#: paperless/settings.py:799 #: paperless/settings.py:797
msgid "Romanian" msgid "Romanian"
msgstr "" msgstr ""
#: paperless/settings.py:800 #: paperless/settings.py:798
msgid "Russian" msgid "Russian"
msgstr "" msgstr ""
#: paperless/settings.py:801 #: paperless/settings.py:799
msgid "Slovak" msgid "Slovak"
msgstr "" msgstr ""
#: paperless/settings.py:802 #: paperless/settings.py:800
msgid "Slovenian" msgid "Slovenian"
msgstr "" msgstr ""
#: paperless/settings.py:803 #: paperless/settings.py:801
msgid "Serbian" msgid "Serbian"
msgstr "" msgstr ""
#: paperless/settings.py:804 #: paperless/settings.py:802
msgid "Swedish" msgid "Swedish"
msgstr "" msgstr ""
#: paperless/settings.py:805 #: paperless/settings.py:803
msgid "Turkish" msgid "Turkish"
msgstr "" msgstr ""
#: paperless/settings.py:806 #: paperless/settings.py:804
msgid "Ukrainian" msgid "Ukrainian"
msgstr "" msgstr ""
#: paperless/settings.py:807 #: paperless/settings.py:805
msgid "Vietnamese" msgid "Vietnamese"
msgstr "" msgstr ""
#: paperless/settings.py:808 #: paperless/settings.py:806
msgid "Chinese Simplified" msgid "Chinese Simplified"
msgstr "" msgstr ""
#: paperless/settings.py:809 #: paperless/settings.py:807
msgid "Chinese Traditional" msgid "Chinese Traditional"
msgstr "" msgstr ""

View File

@@ -17,8 +17,6 @@ from dateparser.languages.loader import LocaleDataLoader
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from dotenv import load_dotenv from dotenv import load_dotenv
from paperless.utils import ocr_to_dateparser_languages
logger = logging.getLogger("paperless.settings") logger = logging.getLogger("paperless.settings")
# Tap paperless.conf if it's available # Tap paperless.conf if it's available
@@ -1184,61 +1182,6 @@ DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER") FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
def _ocr_to_dateparser_languages(ocr_languages: str) -> list[str]:
"""
Convert Tesseract OCR_LANGUAGE codes (ISO 639-2, e.g. "eng+fra", with optional scripts like "aze_Cyrl")
into a list of locales compatible with the `dateparser` library.
- If a script is provided (e.g., "aze_Cyrl"), attempts to use the full locale (e.g., "az-Cyrl").
Falls back to the base language (e.g., "az") if needed.
- If a language cannot be mapped or validated, it is skipped with a warning.
- Returns a list of valid locales, or an empty list if none could be converted.
"""
ocr_to_dateparser = ocr_to_dateparser_languages()
loader = LocaleDataLoader()
result = []
try:
for ocr_language in ocr_languages.split("+"):
# Split into language and optional script
ocr_lang_part, *script = ocr_language.split("_")
ocr_script_part = script[0] if script else None
language_part = ocr_to_dateparser.get(ocr_lang_part)
if language_part is None:
logger.debug(
f'Unable to map OCR language "{ocr_lang_part}" to dateparser locale. ',
)
continue
# Ensure base language is supported by dateparser
loader.get_locale_map(locales=[language_part])
# Try to add the script part if it's supported by dateparser
if ocr_script_part:
dateparser_language = f"{language_part}-{ocr_script_part.title()}"
try:
loader.get_locale_map(locales=[dateparser_language])
except Exception:
logger.info(
f"Language variant '{dateparser_language}' not supported by dateparser; falling back to base language '{language_part}'. You can manually set PAPERLESS_DATE_PARSER_LANGUAGES if needed.",
)
dateparser_language = language_part
else:
dateparser_language = language_part
if dateparser_language not in result:
result.append(dateparser_language)
except Exception as e:
logger.warning(
f"Error auto-configuring dateparser languages. Set PAPERLESS_DATE_PARSER_LANGUAGES parameter to avoid this. Detail: {e}",
)
return []
if not result:
logger.info(
"Unable to automatically determine dateparser languages from OCR_LANGUAGE, falling back to multi-language support.",
)
return result
def _parse_dateparser_languages(languages: str | None): def _parse_dateparser_languages(languages: str | None):
language_list = languages.split("+") if languages else [] language_list = languages.split("+") if languages else []
# There is an unfixed issue in zh-Hant and zh-Hans locales in the dateparser lib. # There is an unfixed issue in zh-Hant and zh-Hans locales in the dateparser lib.
@@ -1253,12 +1196,14 @@ def _parse_dateparser_languages(languages: str | None):
return list(LocaleDataLoader().get_locale_map(locales=language_list)) return list(LocaleDataLoader().get_locale_map(locales=language_list))
if os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES"): # If not set, we will infer it at runtime
DATE_PARSER_LANGUAGES = _parse_dateparser_languages( DATE_PARSER_LANGUAGES = (
_parse_dateparser_languages(
os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES"), os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES"),
) )
else: if os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES")
DATE_PARSER_LANGUAGES = _ocr_to_dateparser_languages(OCR_LANGUAGE) else None
)
# Maximum number of dates taken from document start to end to show as suggestions for # Maximum number of dates taken from document start to end to show as suggestions for

View File

@@ -6,7 +6,6 @@ from unittest import mock
import pytest import pytest
from celery.schedules import crontab from celery.schedules import crontab
from paperless.settings import _ocr_to_dateparser_languages
from paperless.settings import _parse_base_paths from paperless.settings import _parse_base_paths
from paperless.settings import _parse_beat_schedule from paperless.settings import _parse_beat_schedule
from paperless.settings import _parse_dateparser_languages from paperless.settings import _parse_dateparser_languages
@@ -476,33 +475,6 @@ class TestPathSettings(TestCase):
self.assertEqual("/foobar/", base_paths[4]) # LOGOUT_REDIRECT_URL self.assertEqual("/foobar/", base_paths[4]) # LOGOUT_REDIRECT_URL
@pytest.mark.parametrize(
("ocr_language", "expected"),
[
# One language
("eng", ["en"]),
# Multiple languages
("fra+ita+lao", ["fr", "it", "lo"]),
# Languages that don't have a two-letter equivalent
("fil", ["fil"]),
# Languages with a script part supported by dateparser
("aze_cyrl+srp_latn", ["az-Cyrl", "sr-Latn"]),
# Languages with a script part not supported by dateparser
# In this case, default to the language without script
("deu_frak", ["de"]),
# Traditional and simplified chinese don't have the same name in dateparser,
# so they're converted to the general chinese language
("chi_tra+chi_sim", ["zh"]),
# If a language is not supported by dateparser, fallback to the supported ones
("eng+unsupported_language+por", ["en", "pt"]),
# If no language is supported, fallback to default
("unsupported1+unsupported2", []),
],
)
def test_ocr_to_dateparser_languages(ocr_language, expected):
assert sorted(_ocr_to_dateparser_languages(ocr_language)) == sorted(expected)
@pytest.mark.parametrize( @pytest.mark.parametrize(
("languages", "expected"), ("languages", "expected"),
[ [

View File

@@ -0,0 +1,52 @@
import logging
import pytest
from paperless import utils
from paperless.utils import ocr_to_dateparser_languages
@pytest.mark.parametrize(
("ocr_language", "expected"),
[
# One language
("eng", ["en"]),
# Multiple languages
("fra+ita+lao", ["fr", "it", "lo"]),
# Languages that don't have a two-letter equivalent
("fil", ["fil"]),
# Languages with a script part supported by dateparser
("aze_cyrl+srp_latn", ["az-Cyrl", "sr-Latn"]),
# Languages with a script part not supported by dateparser
# In this case, default to the language without script
("deu_frak", ["de"]),
# Traditional and simplified chinese don't have the same name in dateparser,
# so they're converted to the general chinese language
("chi_tra+chi_sim", ["zh"]),
# If a language is not supported by dateparser, fallback to the supported ones
("eng+unsupported_language+por", ["en", "pt"]),
# If no language is supported, fallback to default
("unsupported1+unsupported2", []),
# Duplicate languages, should not duplicate in result
("eng+eng", ["en"]),
# Language with script, but script is not mapped
("ita_unknownscript", ["it"]),
],
)
def test_ocr_to_dateparser_languages(ocr_language, expected):
assert sorted(ocr_to_dateparser_languages(ocr_language)) == sorted(expected)
def test_ocr_to_dateparser_languages_exception(monkeypatch, caplog):
# Patch LocaleDataLoader.get_locale_map to raise an exception
class DummyLoader:
def get_locale_map(self, locales=None):
raise RuntimeError("Simulated error")
with caplog.at_level(logging.WARNING):
monkeypatch.setattr(utils, "LocaleDataLoader", lambda: DummyLoader())
result = utils.ocr_to_dateparser_languages("eng+fra")
assert result == []
assert (
"Set PAPERLESS_DATE_PARSER_LANGUAGES parameter to avoid this" in caplog.text
)

View File

@@ -1,4 +1,10 @@
def ocr_to_dateparser_languages() -> dict[str, str]: import logging
from dateparser.languages.loader import LocaleDataLoader
logger = logging.getLogger("paperless.utils")
OCR_TO_DATEPARSER_LANGUAGES = {
""" """
Translation map from languages supported by Tesseract OCR Translation map from languages supported by Tesseract OCR
to languages supported by dateparser. to languages supported by dateparser.
@@ -14,97 +20,150 @@ def ocr_to_dateparser_languages() -> dict[str, str]:
# agq, asa, bez, brx, cgg, ckb, dav, dje, dyo, ebu, guz, jgo, jmc, kde, kea, khq, kln, # agq, asa, bez, brx, cgg, ckb, dav, dje, dyo, ebu, guz, jgo, jmc, kde, kea, khq, kln,
# ksb, ksf, ksh, lag, lkt, lrc, luy, mer, mfe, mgh, mgo, mua, mzn, naq, nmg, nnh, nus, # ksb, ksf, ksh, lag, lkt, lrc, luy, mer, mfe, mgh, mgo, mua, mzn, naq, nmg, nnh, nus,
# rof, rwk, saq, sbp, she, ses, shi, teo, twq, tzm, vun, wae, xog, yav, yue # rof, rwk, saq, sbp, she, ses, shi, teo, twq, tzm, vun, wae, xog, yav, yue
return { "afr": "af",
"afr": "af", "amh": "am",
"amh": "am", "ara": "ar",
"ara": "ar", "asm": "as",
"asm": "as", "ast": "ast",
"ast": "ast", "aze": "az",
"aze": "az", "bel": "be",
"bel": "be", "bul": "bg",
"bul": "bg", "ben": "bn",
"ben": "bn", "bod": "bo",
"bod": "bo", "bre": "br",
"bre": "br", "bos": "bs",
"bos": "bs", "cat": "ca",
"cat": "ca", "cher": "chr",
"cher": "chr", "ces": "cs",
"ces": "cs", "cym": "cy",
"cym": "cy", "dan": "da",
"dan": "da", "deu": "de",
"deu": "de", "dzo": "dz",
"dzo": "dz", "ell": "el",
"ell": "el", "eng": "en",
"eng": "en", "epo": "eo",
"epo": "eo", "spa": "es",
"spa": "es", "est": "et",
"est": "et", "eus": "eu",
"eus": "eu", "fas": "fa",
"fas": "fa", "fin": "fi",
"fin": "fi", "fil": "fil",
"fil": "fil", "fao": "fo", # codespell:ignore
"fao": "fo", # codespell:ignore "fra": "fr",
"fra": "fr", "fry": "fy",
"fry": "fy", "gle": "ga",
"gle": "ga", "gla": "gd",
"gla": "gd", "glg": "gl",
"glg": "gl", "guj": "gu",
"guj": "gu", "heb": "he",
"heb": "he", "hin": "hi",
"hin": "hi", "hrv": "hr",
"hrv": "hr", "hun": "hu",
"hun": "hu", "hye": "hy",
"hye": "hy", "ind": "id",
"ind": "id", "isl": "is",
"isl": "is", "ita": "it",
"ita": "it", "jpn": "ja",
"jpn": "ja", "kat": "ka",
"kat": "ka", "kaz": "kk",
"kaz": "kk", "khm": "km",
"khm": "km", "knda": "kn",
"knda": "kn", "kor": "ko",
"kor": "ko", "kir": "ky",
"kir": "ky", "ltz": "lb",
"ltz": "lb", "lao": "lo",
"lao": "lo", "lit": "lt",
"lit": "lt", "lav": "lv",
"lav": "lv", "mal": "ml",
"mal": "ml", "mon": "mn",
"mon": "mn", "mar": "mr",
"mar": "mr", "msa": "ms",
"msa": "ms", "mlt": "mt",
"mlt": "mt", "mya": "my",
"mya": "my", "nep": "ne",
"nep": "ne", "nld": "nl",
"nld": "nl", "ori": "or",
"ori": "or", "pan": "pa",
"pan": "pa", "pol": "pl",
"pol": "pl", "pus": "ps",
"pus": "ps", "por": "pt",
"por": "pt", "que": "qu",
"que": "qu", "ron": "ro",
"ron": "ro", "rus": "ru",
"rus": "ru", "sin": "si",
"sin": "si", "slk": "sk",
"slk": "sk", "slv": "sl",
"slv": "sl", "sqi": "sq",
"sqi": "sq", "srp": "sr",
"srp": "sr", "swe": "sv",
"swe": "sv", "swa": "sw",
"swa": "sw", "tam": "ta",
"tam": "ta", "tel": "te", # codespell:ignore
"tel": "te", # codespell:ignore "tha": "th", # codespell:ignore
"tha": "th", # codespell:ignore "tir": "ti",
"tir": "ti", "tgl": "tl",
"tgl": "tl", "ton": "to",
"ton": "to", "tur": "tr",
"tur": "tr", "uig": "ug",
"uig": "ug", "ukr": "uk",
"ukr": "uk", "urd": "ur",
"urd": "ur", "uzb": "uz",
"uzb": "uz", "via": "vi",
"via": "vi", "yid": "yi",
"yid": "yi", "yor": "yo",
"yor": "yo", "chi": "zh",
"chi": "zh", }
}
def ocr_to_dateparser_languages(ocr_languages: str) -> list[str]:
"""
Convert Tesseract OCR_LANGUAGE codes (ISO 639-2, e.g. "eng+fra", with optional scripts like "aze_Cyrl")
into a list of locales compatible with the `dateparser` library.
- If a script is provided (e.g., "aze_Cyrl"), attempts to use the full locale (e.g., "az-Cyrl").
Falls back to the base language (e.g., "az") if needed.
- If a language cannot be mapped or validated, it is skipped with a warning.
- Returns a list of valid locales, or an empty list if none could be converted.
"""
loader = LocaleDataLoader()
result = []
try:
for ocr_language in ocr_languages.split("+"):
# Split into language and optional script
ocr_lang_part, *script = ocr_language.split("_")
ocr_script_part = script[0] if script else None
language_part = OCR_TO_DATEPARSER_LANGUAGES.get(ocr_lang_part)
if language_part is None:
logger.debug(
f'Unable to map OCR language "{ocr_lang_part}" to dateparser locale. ',
)
continue
# Ensure base language is supported by dateparser
loader.get_locale_map(locales=[language_part])
# Try to add the script part if it's supported by dateparser
if ocr_script_part:
dateparser_language = f"{language_part}-{ocr_script_part.title()}"
try:
loader.get_locale_map(locales=[dateparser_language])
except Exception:
logger.info(
f"Language variant '{dateparser_language}' not supported by dateparser; falling back to base language '{language_part}'. You can manually set PAPERLESS_DATE_PARSER_LANGUAGES if needed.",
)
dateparser_language = language_part
else:
dateparser_language = language_part
if dateparser_language not in result:
result.append(dateparser_language)
except Exception as e:
logger.warning(
f"Error auto-configuring dateparser languages. Set PAPERLESS_DATE_PARSER_LANGUAGES parameter to avoid this. Detail: {e}",
)
return []
if not result:
logger.info(
"Unable to automatically determine dateparser languages from OCR_LANGUAGE, falling back to multi-language support.",
)
return result