mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Compare commits
	
		
			4 Commits
		
	
	
		
			117dfb83fe
			...
			a6e41b4145
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | a6e41b4145 | ||
|   | cb927c5b22 | ||
|   | 107374af71 | ||
|   | a77141e133 | 
| @@ -2544,11 +2544,11 @@ | ||||
|         </context-group> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">1017</context> | ||||
|           <context context-type="linenumber">1018</context> | ||||
|         </context-group> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">1382</context> | ||||
|           <context context-type="linenumber">1383</context> | ||||
|         </context-group> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> | ||||
| @@ -3156,7 +3156,7 @@ | ||||
|         </context-group> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">970</context> | ||||
|           <context context-type="linenumber">971</context> | ||||
|         </context-group> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> | ||||
| @@ -6579,7 +6579,7 @@ | ||||
|         </context-group> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">1381</context> | ||||
|           <context context-type="linenumber">1382</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="6490688569532630280" datatype="html"> | ||||
| @@ -6904,21 +6904,21 @@ | ||||
|         <source>Next document</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">573</context> | ||||
|           <context context-type="linenumber">574</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="651985345816518480" datatype="html"> | ||||
|         <source>Previous document</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">583</context> | ||||
|           <context context-type="linenumber">584</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="2885986061416655600" datatype="html"> | ||||
|         <source>Close document</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">591</context> | ||||
|           <context context-type="linenumber">592</context> | ||||
|         </context-group> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/services/open-documents.service.ts</context> | ||||
| @@ -6929,67 +6929,67 @@ | ||||
|         <source>Save document</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">598</context> | ||||
|           <context context-type="linenumber">599</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="1784543155727940353" datatype="html"> | ||||
|         <source>Save and close / next</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">607</context> | ||||
|           <context context-type="linenumber">608</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="5758784066858623886" datatype="html"> | ||||
|         <source>Error retrieving metadata</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">659</context> | ||||
|           <context context-type="linenumber">660</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="3456881259945295697" datatype="html"> | ||||
|         <source>Error retrieving suggestions.</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">688</context> | ||||
|           <context context-type="linenumber">689</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="2194092841814123758" datatype="html"> | ||||
|         <source>Document "<x id="PH" equiv-text="newValues.title"/>" saved successfully.</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">860</context> | ||||
|           <context context-type="linenumber">861</context> | ||||
|         </context-group> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">884</context> | ||||
|           <context context-type="linenumber">885</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="6626387786259219838" datatype="html"> | ||||
|         <source>Error saving document "<x id="PH" equiv-text="this.document.title"/>"</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">890</context> | ||||
|           <context context-type="linenumber">891</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="448882439049417053" datatype="html"> | ||||
|         <source>Error saving document</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">939</context> | ||||
|           <context context-type="linenumber">940</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="8410796510716511826" datatype="html"> | ||||
|         <source>Do you really want to move the document "<x id="PH" equiv-text="this.document.title"/>" to the trash?</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">971</context> | ||||
|           <context context-type="linenumber">972</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="282586936710748252" datatype="html"> | ||||
|         <source>Documents can be restored prior to permanent deletion.</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">972</context> | ||||
|           <context context-type="linenumber">973</context> | ||||
|         </context-group> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> | ||||
| @@ -7000,7 +7000,7 @@ | ||||
|         <source>Move to trash</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">974</context> | ||||
|           <context context-type="linenumber">975</context> | ||||
|         </context-group> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> | ||||
| @@ -7011,14 +7011,14 @@ | ||||
|         <source>Error deleting document</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">993</context> | ||||
|           <context context-type="linenumber">994</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="619486176823357521" datatype="html"> | ||||
|         <source>Reprocess confirm</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">1013</context> | ||||
|           <context context-type="linenumber">1014</context> | ||||
|         </context-group> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> | ||||
| @@ -7029,67 +7029,67 @@ | ||||
|         <source>This operation will permanently recreate the archive file for this document.</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">1014</context> | ||||
|           <context context-type="linenumber">1015</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="302054111564709516" datatype="html"> | ||||
|         <source>The archive file will be re-generated with the current settings.</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">1015</context> | ||||
|           <context context-type="linenumber">1016</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="8251197608401006898" datatype="html"> | ||||
|         <source>Reprocess operation for "<x id="PH" equiv-text="this.document.title"/>" will begin in the background. Close and re-open or reload this document after the operation has completed to see new content.</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">1025</context> | ||||
|           <context context-type="linenumber">1026</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="4409560272830824468" datatype="html"> | ||||
|         <source>Error executing operation</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">1036</context> | ||||
|           <context context-type="linenumber">1037</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="6030453331794586802" datatype="html"> | ||||
|         <source>Error downloading document</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">1085</context> | ||||
|           <context context-type="linenumber">1086</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="4458954481601077369" datatype="html"> | ||||
|         <source>Page Fit</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">1162</context> | ||||
|           <context context-type="linenumber">1163</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="4663705961777238777" datatype="html"> | ||||
|         <source>PDF edit operation for "<x id="PH" equiv-text="this.document.title"/>" will begin in the background.</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">1400</context> | ||||
|           <context context-type="linenumber">1401</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="9043972994040261999" datatype="html"> | ||||
|         <source>Error executing PDF edit operation</source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">1412</context> | ||||
|           <context context-type="linenumber">1413</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="6085793215710522488" datatype="html"> | ||||
|         <source>An error occurred loading tiff: <x id="PH" equiv-text="err.toString()"/></source> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">1479</context> | ||||
|           <context context-type="linenumber">1480</context> | ||||
|         </context-group> | ||||
|         <context-group purpose="location"> | ||||
|           <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> | ||||
|           <context context-type="linenumber">1483</context> | ||||
|           <context context-type="linenumber">1484</context> | ||||
|         </context-group> | ||||
|       </trans-unit> | ||||
|       <trans-unit id="4958946940233632319" datatype="html"> | ||||
|   | ||||
| @@ -472,6 +472,7 @@ export class DocumentDetailComponent | ||||
|               if (titleValue !== this.titleInput.value) return | ||||
|               this.title = titleValue | ||||
|               this.documentForm.patchValue({ title: titleValue }) | ||||
|               this.documentForm.get('title').markAsDirty() | ||||
|             }) | ||||
|           this.setupDirtyTracking(useDoc, doc) | ||||
|         }, | ||||
|   | ||||
| @@ -19,6 +19,8 @@ from documents.loggers import LoggingMixin | ||||
| from documents.signals import document_consumer_declaration | ||||
| from documents.utils import copy_file_with_basic_stats | ||||
| from documents.utils import run_subprocess | ||||
| from paperless.config import OcrConfig | ||||
| from paperless.utils import ocr_to_dateparser_languages | ||||
|  | ||||
| if TYPE_CHECKING: | ||||
|     import datetime | ||||
| @@ -272,6 +274,11 @@ def parse_date_generator(filename, text) -> Iterator[datetime.datetime]: | ||||
|         """ | ||||
|         import dateparser | ||||
|  | ||||
|         ocr_config = OcrConfig() | ||||
|         languages = settings.DATE_PARSER_LANGUAGES or ocr_to_dateparser_languages( | ||||
|             ocr_config.language, | ||||
|         ) | ||||
|  | ||||
|         return dateparser.parse( | ||||
|             ds, | ||||
|             settings={ | ||||
| @@ -280,7 +287,7 @@ def parse_date_generator(filename, text) -> Iterator[datetime.datetime]: | ||||
|                 "RETURN_AS_TIMEZONE_AWARE": True, | ||||
|                 "TIMEZONE": settings.TIME_ZONE, | ||||
|             }, | ||||
|             locales=settings.DATE_PARSER_LANGUAGES, | ||||
|             locales=languages, | ||||
|         ) | ||||
|  | ||||
|     def __filter(date: datetime.datetime) -> datetime.datetime | None: | ||||
|   | ||||
| @@ -1,12 +1,14 @@ | ||||
| import datetime | ||||
| from zoneinfo import ZoneInfo | ||||
|  | ||||
| import pytest | ||||
| from pytest_django.fixtures import SettingsWrapper | ||||
|  | ||||
| from documents.parsers import parse_date | ||||
| from documents.parsers import parse_date_generator | ||||
|  | ||||
|  | ||||
| @pytest.mark.django_db() | ||||
| class TestDate: | ||||
|     def test_date_format_1(self): | ||||
|         text = "lorem ipsum 130218 lorem ipsum" | ||||
| @@ -49,7 +51,7 @@ class TestDate: | ||||
|         settings: SettingsWrapper, | ||||
|         settings_timezone: ZoneInfo, | ||||
|     ): | ||||
|         settings.DATE_PARSER_LANGUAGES = [] | ||||
|         settings.DATE_PARSER_LANGUAGES = ["de"] | ||||
|         text = "lorem ipsum\nMärz 2019\nlorem ipsum" | ||||
|         date = parse_date("", text) | ||||
|         assert date == datetime.datetime(2019, 3, 1, 0, 0, tzinfo=settings_timezone) | ||||
|   | ||||
| @@ -2,7 +2,7 @@ msgid "" | ||||
| msgstr "" | ||||
| "Project-Id-Version: paperless-ngx\n" | ||||
| "Report-Msgid-Bugs-To: \n" | ||||
| "POT-Creation-Date: 2025-08-16 14:34+0000\n" | ||||
| "POT-Creation-Date: 2025-08-31 22:24+0000\n" | ||||
| "PO-Revision-Date: 2022-02-17 04:17\n" | ||||
| "Last-Translator: \n" | ||||
| "Language-Team: English\n" | ||||
| @@ -1645,147 +1645,147 @@ msgstr "" | ||||
| msgid "paperless application settings" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:774 | ||||
| #: paperless/settings.py:772 | ||||
| msgid "English (US)" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:775 | ||||
| #: paperless/settings.py:773 | ||||
| msgid "Arabic" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:776 | ||||
| #: paperless/settings.py:774 | ||||
| msgid "Afrikaans" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:777 | ||||
| #: paperless/settings.py:775 | ||||
| msgid "Belarusian" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:778 | ||||
| #: paperless/settings.py:776 | ||||
| msgid "Bulgarian" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:779 | ||||
| #: paperless/settings.py:777 | ||||
| msgid "Catalan" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:780 | ||||
| #: paperless/settings.py:778 | ||||
| msgid "Czech" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:781 | ||||
| #: paperless/settings.py:779 | ||||
| msgid "Danish" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:782 | ||||
| #: paperless/settings.py:780 | ||||
| msgid "German" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:783 | ||||
| #: paperless/settings.py:781 | ||||
| msgid "Greek" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:784 | ||||
| #: paperless/settings.py:782 | ||||
| msgid "English (GB)" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:785 | ||||
| #: paperless/settings.py:783 | ||||
| msgid "Spanish" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:786 | ||||
| #: paperless/settings.py:784 | ||||
| msgid "Persian" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:787 | ||||
| #: paperless/settings.py:785 | ||||
| msgid "Finnish" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:788 | ||||
| #: paperless/settings.py:786 | ||||
| msgid "French" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:789 | ||||
| #: paperless/settings.py:787 | ||||
| msgid "Hungarian" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:790 | ||||
| #: paperless/settings.py:788 | ||||
| msgid "Italian" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:791 | ||||
| #: paperless/settings.py:789 | ||||
| msgid "Japanese" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:792 | ||||
| #: paperless/settings.py:790 | ||||
| msgid "Korean" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:793 | ||||
| #: paperless/settings.py:791 | ||||
| msgid "Luxembourgish" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:794 | ||||
| #: paperless/settings.py:792 | ||||
| msgid "Norwegian" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:795 | ||||
| #: paperless/settings.py:793 | ||||
| msgid "Dutch" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:796 | ||||
| #: paperless/settings.py:794 | ||||
| msgid "Polish" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:797 | ||||
| #: paperless/settings.py:795 | ||||
| msgid "Portuguese (Brazil)" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:798 | ||||
| #: paperless/settings.py:796 | ||||
| msgid "Portuguese" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:799 | ||||
| #: paperless/settings.py:797 | ||||
| msgid "Romanian" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:800 | ||||
| #: paperless/settings.py:798 | ||||
| msgid "Russian" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:801 | ||||
| #: paperless/settings.py:799 | ||||
| msgid "Slovak" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:802 | ||||
| #: paperless/settings.py:800 | ||||
| msgid "Slovenian" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:803 | ||||
| #: paperless/settings.py:801 | ||||
| msgid "Serbian" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:804 | ||||
| #: paperless/settings.py:802 | ||||
| msgid "Swedish" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:805 | ||||
| #: paperless/settings.py:803 | ||||
| msgid "Turkish" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:806 | ||||
| #: paperless/settings.py:804 | ||||
| msgid "Ukrainian" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:807 | ||||
| #: paperless/settings.py:805 | ||||
| msgid "Vietnamese" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:808 | ||||
| #: paperless/settings.py:806 | ||||
| msgid "Chinese Simplified" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:809 | ||||
| #: paperless/settings.py:807 | ||||
| msgid "Chinese Traditional" | ||||
| msgstr "" | ||||
|  | ||||
|   | ||||
| @@ -17,8 +17,6 @@ from dateparser.languages.loader import LocaleDataLoader | ||||
| from django.utils.translation import gettext_lazy as _ | ||||
| from dotenv import load_dotenv | ||||
|  | ||||
| from paperless.utils import ocr_to_dateparser_languages | ||||
|  | ||||
| logger = logging.getLogger("paperless.settings") | ||||
|  | ||||
| # Tap paperless.conf if it's available | ||||
| @@ -1184,61 +1182,6 @@ DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY") | ||||
| FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER") | ||||
|  | ||||
|  | ||||
| def _ocr_to_dateparser_languages(ocr_languages: str) -> list[str]: | ||||
|     """ | ||||
|     Convert Tesseract OCR_LANGUAGE codes (ISO 639-2, e.g. "eng+fra", with optional scripts like "aze_Cyrl") | ||||
|     into a list of locales compatible with the `dateparser` library. | ||||
|  | ||||
|     - If a script is provided (e.g., "aze_Cyrl"), attempts to use the full locale (e.g., "az-Cyrl"). | ||||
|     Falls back to the base language (e.g., "az") if needed. | ||||
|     - If a language cannot be mapped or validated, it is skipped with a warning. | ||||
|     - Returns a list of valid locales, or an empty list if none could be converted. | ||||
|     """ | ||||
|     ocr_to_dateparser = ocr_to_dateparser_languages() | ||||
|     loader = LocaleDataLoader() | ||||
|     result = [] | ||||
|     try: | ||||
|         for ocr_language in ocr_languages.split("+"): | ||||
|             # Split into language and optional script | ||||
|             ocr_lang_part, *script = ocr_language.split("_") | ||||
|             ocr_script_part = script[0] if script else None | ||||
|  | ||||
|             language_part = ocr_to_dateparser.get(ocr_lang_part) | ||||
|             if language_part is None: | ||||
|                 logger.debug( | ||||
|                     f'Unable to map OCR language "{ocr_lang_part}" to dateparser locale. ', | ||||
|                 ) | ||||
|                 continue | ||||
|  | ||||
|             # Ensure base language is supported by dateparser | ||||
|             loader.get_locale_map(locales=[language_part]) | ||||
|  | ||||
|             # Try to add the script part if it's supported by dateparser | ||||
|             if ocr_script_part: | ||||
|                 dateparser_language = f"{language_part}-{ocr_script_part.title()}" | ||||
|                 try: | ||||
|                     loader.get_locale_map(locales=[dateparser_language]) | ||||
|                 except Exception: | ||||
|                     logger.info( | ||||
|                         f"Language variant '{dateparser_language}' not supported by dateparser; falling back to base language '{language_part}'. You can manually set PAPERLESS_DATE_PARSER_LANGUAGES if needed.", | ||||
|                     ) | ||||
|                     dateparser_language = language_part | ||||
|             else: | ||||
|                 dateparser_language = language_part | ||||
|             if dateparser_language not in result: | ||||
|                 result.append(dateparser_language) | ||||
|     except Exception as e: | ||||
|         logger.warning( | ||||
|             f"Error auto-configuring dateparser languages. Set PAPERLESS_DATE_PARSER_LANGUAGES parameter to avoid this. Detail: {e}", | ||||
|         ) | ||||
|         return [] | ||||
|     if not result: | ||||
|         logger.info( | ||||
|             "Unable to automatically determine dateparser languages from OCR_LANGUAGE, falling back to multi-language support.", | ||||
|         ) | ||||
|     return result | ||||
|  | ||||
|  | ||||
| def _parse_dateparser_languages(languages: str | None): | ||||
|     language_list = languages.split("+") if languages else [] | ||||
|     # There is an unfixed issue in zh-Hant and zh-Hans locales in the dateparser lib. | ||||
| @@ -1253,12 +1196,14 @@ def _parse_dateparser_languages(languages: str | None): | ||||
|     return list(LocaleDataLoader().get_locale_map(locales=language_list)) | ||||
|  | ||||
|  | ||||
| if os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES"): | ||||
|     DATE_PARSER_LANGUAGES = _parse_dateparser_languages( | ||||
| # If not set, we will infer it at runtime | ||||
| DATE_PARSER_LANGUAGES = ( | ||||
|     _parse_dateparser_languages( | ||||
|         os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES"), | ||||
|     ) | ||||
| else: | ||||
|     DATE_PARSER_LANGUAGES = _ocr_to_dateparser_languages(OCR_LANGUAGE) | ||||
|     if os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES") | ||||
|     else None | ||||
| ) | ||||
|  | ||||
|  | ||||
| # Maximum number of dates taken from document start to end to show as suggestions for | ||||
|   | ||||
| @@ -6,7 +6,6 @@ from unittest import mock | ||||
| import pytest | ||||
| from celery.schedules import crontab | ||||
|  | ||||
| from paperless.settings import _ocr_to_dateparser_languages | ||||
| from paperless.settings import _parse_base_paths | ||||
| from paperless.settings import _parse_beat_schedule | ||||
| from paperless.settings import _parse_dateparser_languages | ||||
| @@ -476,33 +475,6 @@ class TestPathSettings(TestCase): | ||||
|         self.assertEqual("/foobar/", base_paths[4])  # LOGOUT_REDIRECT_URL | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("ocr_language", "expected"), | ||||
|     [ | ||||
|         # One language | ||||
|         ("eng", ["en"]), | ||||
|         # Multiple languages | ||||
|         ("fra+ita+lao", ["fr", "it", "lo"]), | ||||
|         # Languages that don't have a two-letter equivalent | ||||
|         ("fil", ["fil"]), | ||||
|         # Languages with a script part supported by dateparser | ||||
|         ("aze_cyrl+srp_latn", ["az-Cyrl", "sr-Latn"]), | ||||
|         # Languages with a script part not supported by dateparser | ||||
|         # In this case, default to the language without script | ||||
|         ("deu_frak", ["de"]), | ||||
|         # Traditional and simplified chinese don't have the same name in dateparser, | ||||
|         # so they're converted to the general chinese language | ||||
|         ("chi_tra+chi_sim", ["zh"]), | ||||
|         # If a language is not supported by dateparser, fallback to the supported ones | ||||
|         ("eng+unsupported_language+por", ["en", "pt"]), | ||||
|         # If no language is supported, fallback to default | ||||
|         ("unsupported1+unsupported2", []), | ||||
|     ], | ||||
| ) | ||||
| def test_ocr_to_dateparser_languages(ocr_language, expected): | ||||
|     assert sorted(_ocr_to_dateparser_languages(ocr_language)) == sorted(expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("languages", "expected"), | ||||
|     [ | ||||
|   | ||||
							
								
								
									
										52
									
								
								src/paperless/tests/test_utils.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								src/paperless/tests/test_utils.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,52 @@ | ||||
| import logging | ||||
|  | ||||
| import pytest | ||||
|  | ||||
| from paperless import utils | ||||
| from paperless.utils import ocr_to_dateparser_languages | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("ocr_language", "expected"), | ||||
|     [ | ||||
|         # One language | ||||
|         ("eng", ["en"]), | ||||
|         # Multiple languages | ||||
|         ("fra+ita+lao", ["fr", "it", "lo"]), | ||||
|         # Languages that don't have a two-letter equivalent | ||||
|         ("fil", ["fil"]), | ||||
|         # Languages with a script part supported by dateparser | ||||
|         ("aze_cyrl+srp_latn", ["az-Cyrl", "sr-Latn"]), | ||||
|         # Languages with a script part not supported by dateparser | ||||
|         # In this case, default to the language without script | ||||
|         ("deu_frak", ["de"]), | ||||
|         # Traditional and simplified chinese don't have the same name in dateparser, | ||||
|         # so they're converted to the general chinese language | ||||
|         ("chi_tra+chi_sim", ["zh"]), | ||||
|         # If a language is not supported by dateparser, fallback to the supported ones | ||||
|         ("eng+unsupported_language+por", ["en", "pt"]), | ||||
|         # If no language is supported, fallback to default | ||||
|         ("unsupported1+unsupported2", []), | ||||
|         # Duplicate languages, should not duplicate in result | ||||
|         ("eng+eng", ["en"]), | ||||
|         # Language with script, but script is not mapped | ||||
|         ("ita_unknownscript", ["it"]), | ||||
|     ], | ||||
| ) | ||||
| def test_ocr_to_dateparser_languages(ocr_language, expected): | ||||
|     assert sorted(ocr_to_dateparser_languages(ocr_language)) == sorted(expected) | ||||
|  | ||||
|  | ||||
| def test_ocr_to_dateparser_languages_exception(monkeypatch, caplog): | ||||
|     # Patch LocaleDataLoader.get_locale_map to raise an exception | ||||
|     class DummyLoader: | ||||
|         def get_locale_map(self, locales=None): | ||||
|             raise RuntimeError("Simulated error") | ||||
|  | ||||
|     with caplog.at_level(logging.WARNING): | ||||
|         monkeypatch.setattr(utils, "LocaleDataLoader", lambda: DummyLoader()) | ||||
|         result = utils.ocr_to_dateparser_languages("eng+fra") | ||||
|         assert result == [] | ||||
|         assert ( | ||||
|             "Set PAPERLESS_DATE_PARSER_LANGUAGES parameter to avoid this" in caplog.text | ||||
|         ) | ||||
| @@ -1,4 +1,10 @@ | ||||
| def ocr_to_dateparser_languages() -> dict[str, str]: | ||||
| import logging | ||||
|  | ||||
| from dateparser.languages.loader import LocaleDataLoader | ||||
|  | ||||
| logger = logging.getLogger("paperless.utils") | ||||
|  | ||||
| OCR_TO_DATEPARSER_LANGUAGES = { | ||||
|     """ | ||||
|     Translation map from languages supported by Tesseract OCR | ||||
|     to languages supported by dateparser. | ||||
| @@ -14,97 +20,150 @@ def ocr_to_dateparser_languages() -> dict[str, str]: | ||||
|     # agq, asa, bez, brx, cgg, ckb, dav, dje, dyo, ebu, guz, jgo, jmc, kde, kea, khq, kln, | ||||
|     # ksb, ksf, ksh, lag, lkt, lrc, luy, mer, mfe, mgh, mgo, mua, mzn, naq, nmg, nnh, nus, | ||||
|     # rof, rwk, saq, sbp, she, ses, shi, teo, twq, tzm, vun, wae, xog, yav, yue | ||||
|     return { | ||||
|         "afr": "af", | ||||
|         "amh": "am", | ||||
|         "ara": "ar", | ||||
|         "asm": "as", | ||||
|         "ast": "ast", | ||||
|         "aze": "az", | ||||
|         "bel": "be", | ||||
|         "bul": "bg", | ||||
|         "ben": "bn", | ||||
|         "bod": "bo", | ||||
|         "bre": "br", | ||||
|         "bos": "bs", | ||||
|         "cat": "ca", | ||||
|         "cher": "chr", | ||||
|         "ces": "cs", | ||||
|         "cym": "cy", | ||||
|         "dan": "da", | ||||
|         "deu": "de", | ||||
|         "dzo": "dz", | ||||
|         "ell": "el", | ||||
|         "eng": "en", | ||||
|         "epo": "eo", | ||||
|         "spa": "es", | ||||
|         "est": "et", | ||||
|         "eus": "eu", | ||||
|         "fas": "fa", | ||||
|         "fin": "fi", | ||||
|         "fil": "fil", | ||||
|         "fao": "fo",  # codespell:ignore | ||||
|         "fra": "fr", | ||||
|         "fry": "fy", | ||||
|         "gle": "ga", | ||||
|         "gla": "gd", | ||||
|         "glg": "gl", | ||||
|         "guj": "gu", | ||||
|         "heb": "he", | ||||
|         "hin": "hi", | ||||
|         "hrv": "hr", | ||||
|         "hun": "hu", | ||||
|         "hye": "hy", | ||||
|         "ind": "id", | ||||
|         "isl": "is", | ||||
|         "ita": "it", | ||||
|         "jpn": "ja", | ||||
|         "kat": "ka", | ||||
|         "kaz": "kk", | ||||
|         "khm": "km", | ||||
|         "knda": "kn", | ||||
|         "kor": "ko", | ||||
|         "kir": "ky", | ||||
|         "ltz": "lb", | ||||
|         "lao": "lo", | ||||
|         "lit": "lt", | ||||
|         "lav": "lv", | ||||
|         "mal": "ml", | ||||
|         "mon": "mn", | ||||
|         "mar": "mr", | ||||
|         "msa": "ms", | ||||
|         "mlt": "mt", | ||||
|         "mya": "my", | ||||
|         "nep": "ne", | ||||
|         "nld": "nl", | ||||
|         "ori": "or", | ||||
|         "pan": "pa", | ||||
|         "pol": "pl", | ||||
|         "pus": "ps", | ||||
|         "por": "pt", | ||||
|         "que": "qu", | ||||
|         "ron": "ro", | ||||
|         "rus": "ru", | ||||
|         "sin": "si", | ||||
|         "slk": "sk", | ||||
|         "slv": "sl", | ||||
|         "sqi": "sq", | ||||
|         "srp": "sr", | ||||
|         "swe": "sv", | ||||
|         "swa": "sw", | ||||
|         "tam": "ta", | ||||
|         "tel": "te",  # codespell:ignore | ||||
|         "tha": "th",  # codespell:ignore | ||||
|         "tir": "ti", | ||||
|         "tgl": "tl", | ||||
|         "ton": "to", | ||||
|         "tur": "tr", | ||||
|         "uig": "ug", | ||||
|         "ukr": "uk", | ||||
|         "urd": "ur", | ||||
|         "uzb": "uz", | ||||
|         "via": "vi", | ||||
|         "yid": "yi", | ||||
|         "yor": "yo", | ||||
|         "chi": "zh", | ||||
|     } | ||||
|     "afr": "af", | ||||
|     "amh": "am", | ||||
|     "ara": "ar", | ||||
|     "asm": "as", | ||||
|     "ast": "ast", | ||||
|     "aze": "az", | ||||
|     "bel": "be", | ||||
|     "bul": "bg", | ||||
|     "ben": "bn", | ||||
|     "bod": "bo", | ||||
|     "bre": "br", | ||||
|     "bos": "bs", | ||||
|     "cat": "ca", | ||||
|     "cher": "chr", | ||||
|     "ces": "cs", | ||||
|     "cym": "cy", | ||||
|     "dan": "da", | ||||
|     "deu": "de", | ||||
|     "dzo": "dz", | ||||
|     "ell": "el", | ||||
|     "eng": "en", | ||||
|     "epo": "eo", | ||||
|     "spa": "es", | ||||
|     "est": "et", | ||||
|     "eus": "eu", | ||||
|     "fas": "fa", | ||||
|     "fin": "fi", | ||||
|     "fil": "fil", | ||||
|     "fao": "fo",  # codespell:ignore | ||||
|     "fra": "fr", | ||||
|     "fry": "fy", | ||||
|     "gle": "ga", | ||||
|     "gla": "gd", | ||||
|     "glg": "gl", | ||||
|     "guj": "gu", | ||||
|     "heb": "he", | ||||
|     "hin": "hi", | ||||
|     "hrv": "hr", | ||||
|     "hun": "hu", | ||||
|     "hye": "hy", | ||||
|     "ind": "id", | ||||
|     "isl": "is", | ||||
|     "ita": "it", | ||||
|     "jpn": "ja", | ||||
|     "kat": "ka", | ||||
|     "kaz": "kk", | ||||
|     "khm": "km", | ||||
|     "knda": "kn", | ||||
|     "kor": "ko", | ||||
|     "kir": "ky", | ||||
|     "ltz": "lb", | ||||
|     "lao": "lo", | ||||
|     "lit": "lt", | ||||
|     "lav": "lv", | ||||
|     "mal": "ml", | ||||
|     "mon": "mn", | ||||
|     "mar": "mr", | ||||
|     "msa": "ms", | ||||
|     "mlt": "mt", | ||||
|     "mya": "my", | ||||
|     "nep": "ne", | ||||
|     "nld": "nl", | ||||
|     "ori": "or", | ||||
|     "pan": "pa", | ||||
|     "pol": "pl", | ||||
|     "pus": "ps", | ||||
|     "por": "pt", | ||||
|     "que": "qu", | ||||
|     "ron": "ro", | ||||
|     "rus": "ru", | ||||
|     "sin": "si", | ||||
|     "slk": "sk", | ||||
|     "slv": "sl", | ||||
|     "sqi": "sq", | ||||
|     "srp": "sr", | ||||
|     "swe": "sv", | ||||
|     "swa": "sw", | ||||
|     "tam": "ta", | ||||
|     "tel": "te",  # codespell:ignore | ||||
|     "tha": "th",  # codespell:ignore | ||||
|     "tir": "ti", | ||||
|     "tgl": "tl", | ||||
|     "ton": "to", | ||||
|     "tur": "tr", | ||||
|     "uig": "ug", | ||||
|     "ukr": "uk", | ||||
|     "urd": "ur", | ||||
|     "uzb": "uz", | ||||
|     "via": "vi", | ||||
|     "yid": "yi", | ||||
|     "yor": "yo", | ||||
|     "chi": "zh", | ||||
| } | ||||
|  | ||||
|  | ||||
| def ocr_to_dateparser_languages(ocr_languages: str) -> list[str]: | ||||
|     """ | ||||
|     Convert Tesseract OCR_LANGUAGE codes (ISO 639-2, e.g. "eng+fra", with optional scripts like "aze_Cyrl") | ||||
|     into a list of locales compatible with the `dateparser` library. | ||||
|  | ||||
|     - If a script is provided (e.g., "aze_Cyrl"), attempts to use the full locale (e.g., "az-Cyrl"). | ||||
|     Falls back to the base language (e.g., "az") if needed. | ||||
|     - If a language cannot be mapped or validated, it is skipped with a warning. | ||||
|     - Returns a list of valid locales, or an empty list if none could be converted. | ||||
|     """ | ||||
|     loader = LocaleDataLoader() | ||||
|     result = [] | ||||
|     try: | ||||
|         for ocr_language in ocr_languages.split("+"): | ||||
|             # Split into language and optional script | ||||
|             ocr_lang_part, *script = ocr_language.split("_") | ||||
|             ocr_script_part = script[0] if script else None | ||||
|  | ||||
|             language_part = OCR_TO_DATEPARSER_LANGUAGES.get(ocr_lang_part) | ||||
|             if language_part is None: | ||||
|                 logger.debug( | ||||
|                     f'Unable to map OCR language "{ocr_lang_part}" to dateparser locale. ', | ||||
|                 ) | ||||
|                 continue | ||||
|  | ||||
|             # Ensure base language is supported by dateparser | ||||
|             loader.get_locale_map(locales=[language_part]) | ||||
|  | ||||
|             # Try to add the script part if it's supported by dateparser | ||||
|             if ocr_script_part: | ||||
|                 dateparser_language = f"{language_part}-{ocr_script_part.title()}" | ||||
|                 try: | ||||
|                     loader.get_locale_map(locales=[dateparser_language]) | ||||
|                 except Exception: | ||||
|                     logger.info( | ||||
|                         f"Language variant '{dateparser_language}' not supported by dateparser; falling back to base language '{language_part}'. You can manually set PAPERLESS_DATE_PARSER_LANGUAGES if needed.", | ||||
|                     ) | ||||
|                     dateparser_language = language_part | ||||
|             else: | ||||
|                 dateparser_language = language_part | ||||
|             if dateparser_language not in result: | ||||
|                 result.append(dateparser_language) | ||||
|     except Exception as e: | ||||
|         logger.warning( | ||||
|             f"Error auto-configuring dateparser languages. Set PAPERLESS_DATE_PARSER_LANGUAGES parameter to avoid this. Detail: {e}", | ||||
|         ) | ||||
|         return [] | ||||
|     if not result: | ||||
|         logger.info( | ||||
|             "Unable to automatically determine dateparser languages from OCR_LANGUAGE, falling back to multi-language support.", | ||||
|         ) | ||||
|     return result | ||||
|   | ||||
		Reference in New Issue
	
	Block a user