From cf59853f34d775b4b6fcf621a9eaeb078fdc69f5 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Wed, 7 Jan 2026 14:27:13 -0800 Subject: [PATCH 1/8] Tweakhancement: use anchor element for management list quick filter buttons (#11692) --- .../correspondent-list.component.ts | 2 ++ .../custom-fields.component.html | 18 ++++++++++---- .../custom-fields.component.spec.ts | 24 +++++++++++++------ .../custom-fields/custom-fields.component.ts | 6 +++-- .../document-type-list.component.ts | 2 ++ .../management-list.component.html | 21 ++++++++++++---- .../management-list.component.spec.ts | 14 +++++++---- .../management-list.component.ts | 4 ++-- .../storage-path-list.component.ts | 2 ++ .../manage/tag-list/tag-list.component.ts | 2 ++ .../document-list-view.service.spec.ts | 21 ++++++++++++++++ .../services/document-list-view.service.ts | 14 ++++++++++- 12 files changed, 105 insertions(+), 25 deletions(-) diff --git a/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.ts b/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.ts index 0131ac992..957371e08 100644 --- a/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.ts +++ b/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.ts @@ -1,6 +1,7 @@ import { NgClass, NgTemplateOutlet, TitleCasePipe } from '@angular/common' import { Component, inject } from '@angular/core' import { FormsModule, ReactiveFormsModule } from '@angular/forms' +import { RouterModule } from '@angular/router' import { NgbDropdownModule, NgbPaginationModule, @@ -29,6 +30,7 @@ import { ManagementListComponent } from '../management-list/management-list.comp TitleCasePipe, FormsModule, ReactiveFormsModule, + RouterModule, NgClass, NgTemplateOutlet, NgbDropdownModule, diff --git a/src-ui/src/app/components/manage/custom-fields/custom-fields.component.html b/src-ui/src/app/components/manage/custom-fields/custom-fields.component.html index 185e9da35..0a6d80658 100644 --- a/src-ui/src/app/components/manage/custom-fields/custom-fields.component.html +++ b/src-ui/src/app/components/manage/custom-fields/custom-fields.component.html @@ -42,7 +42,13 @@ @if (field.document_count > 0) { - + Filter Documents ({{ field.document_count }}) } @@ -57,9 +63,13 @@ @if (field.document_count > 0) {
- + +  Documents{{ field.document_count }} +
} diff --git a/src-ui/src/app/components/manage/custom-fields/custom-fields.component.spec.ts b/src-ui/src/app/components/manage/custom-fields/custom-fields.component.spec.ts index e94470d64..b86d476f3 100644 --- a/src-ui/src/app/components/manage/custom-fields/custom-fields.component.spec.ts +++ b/src-ui/src/app/components/manage/custom-fields/custom-fields.component.spec.ts @@ -4,6 +4,7 @@ import { provideHttpClient, withInterceptorsFromDi } from '@angular/common/http' import { provideHttpClientTesting } from '@angular/common/http/testing' import { FormsModule, ReactiveFormsModule } from '@angular/forms' import { By } from '@angular/platform-browser' +import { RouterTestingModule } from '@angular/router/testing' import { NgbModal, NgbModalModule, @@ -61,6 +62,7 @@ describe('CustomFieldsComponent', () => { NgbModalModule, NgbPopoverModule, NgxBootstrapIconsModule.pick(allIcons), + RouterTestingModule, CustomFieldsComponent, IfPermissionsDirective, PageHeaderComponent, @@ -108,7 +110,9 @@ describe('CustomFieldsComponent', () => { const toastInfoSpy = jest.spyOn(toastService, 'showInfo') const reloadSpy = jest.spyOn(component, 'reload') - const createButton = fixture.debugElement.queryAll(By.css('button'))[1] + const createButton = fixture.debugElement + .queryAll(By.css('button')) + .find((btn) => btn.nativeElement.textContent.trim().includes('Add Field')) createButton.triggerEventHandler('click') expect(modal).not.toBeUndefined() @@ -133,7 +137,11 @@ describe('CustomFieldsComponent', () => { const toastInfoSpy = jest.spyOn(toastService, 'showInfo') const reloadSpy = jest.spyOn(component, 'reload') - const editButton = fixture.debugElement.queryAll(By.css('button'))[2] + const editButton = fixture.debugElement + .queryAll(By.css('button')) + .find((btn) => + btn.nativeElement.textContent.trim().includes(fields[0].name) + ) editButton.triggerEventHandler('click') expect(modal).not.toBeUndefined() @@ -158,7 +166,9 @@ describe('CustomFieldsComponent', () => { const deleteSpy = jest.spyOn(customFieldsService, 'delete') const reloadSpy = jest.spyOn(component, 'reload') - const deleteButton = fixture.debugElement.queryAll(By.css('button'))[5] + const deleteButton = fixture.debugElement + .queryAll(By.css('button')) + .find((btn) => btn.nativeElement.textContent.trim().includes('Delete')) deleteButton.triggerEventHandler('click') expect(modal).not.toBeUndefined() @@ -176,10 +186,10 @@ describe('CustomFieldsComponent', () => { expect(reloadSpy).toHaveBeenCalled() }) - it('should support filter documents', () => { - const filterSpy = jest.spyOn(listViewService, 'quickFilter') - component.filterDocuments(fields[0]) - expect(filterSpy).toHaveBeenCalledWith([ + it('should provide document filter url', () => { + const urlSpy = jest.spyOn(listViewService, 'getQuickFilterUrl') + component.getDocumentFilterUrl(fields[0]) + expect(urlSpy).toHaveBeenCalledWith([ { rule_type: FILTER_CUSTOM_FIELDS_QUERY, value: JSON.stringify([ diff --git a/src-ui/src/app/components/manage/custom-fields/custom-fields.component.ts b/src-ui/src/app/components/manage/custom-fields/custom-fields.component.ts index 9e7ecf78a..8ecd713ef 100644 --- a/src-ui/src/app/components/manage/custom-fields/custom-fields.component.ts +++ b/src-ui/src/app/components/manage/custom-fields/custom-fields.component.ts @@ -1,4 +1,5 @@ import { Component, OnInit, inject } from '@angular/core' +import { RouterModule } from '@angular/router' import { NgbDropdownModule, NgbModal, @@ -36,6 +37,7 @@ import { LoadingComponentWithPermissions } from '../../loading-component/loading NgbDropdownModule, NgbPaginationModule, NgxBootstrapIconsModule, + RouterModule, ], }) export class CustomFieldsComponent @@ -130,8 +132,8 @@ export class CustomFieldsComponent return DATA_TYPE_LABELS.find((l) => l.id === field.data_type).name } - filterDocuments(field: CustomField) { - this.documentListViewService.quickFilter([ + getDocumentFilterUrl(field: CustomField) { + return this.documentListViewService.getQuickFilterUrl([ { rule_type: FILTER_CUSTOM_FIELDS_QUERY, value: JSON.stringify([ diff --git a/src-ui/src/app/components/manage/document-type-list/document-type-list.component.ts b/src-ui/src/app/components/manage/document-type-list/document-type-list.component.ts index 21a4779e9..b561af2d1 100644 --- a/src-ui/src/app/components/manage/document-type-list/document-type-list.component.ts +++ b/src-ui/src/app/components/manage/document-type-list/document-type-list.component.ts @@ -1,6 +1,7 @@ import { NgClass, NgTemplateOutlet, TitleCasePipe } from '@angular/common' import { Component, inject } from '@angular/core' import { FormsModule, ReactiveFormsModule } from '@angular/forms' +import { RouterModule } from '@angular/router' import { NgbDropdownModule, NgbPaginationModule, @@ -27,6 +28,7 @@ import { ManagementListComponent } from '../management-list/management-list.comp IfPermissionsDirective, FormsModule, ReactiveFormsModule, + RouterModule, NgClass, NgTemplateOutlet, NgbDropdownModule, diff --git a/src-ui/src/app/components/manage/management-list/management-list.component.html b/src-ui/src/app/components/manage/management-list/management-list.component.html index 8fac6f44f..1cfb3aa0d 100644 --- a/src-ui/src/app/components/manage/management-list/management-list.component.html +++ b/src-ui/src/app/components/manage/management-list/management-list.component.html @@ -120,7 +120,14 @@ @if (getDocumentCount(object) > 0) { - + Filter Documents ({{ getDocumentCount(object) }}) } @@ -135,9 +142,15 @@ @if (getDocumentCount(object) > 0) {
- + +  Documents{{ getDocumentCount(object) }} +
} diff --git a/src-ui/src/app/components/manage/management-list/management-list.component.spec.ts b/src-ui/src/app/components/manage/management-list/management-list.component.spec.ts index 813c81148..a9f7a0626 100644 --- a/src-ui/src/app/components/manage/management-list/management-list.component.spec.ts +++ b/src-ui/src/app/components/manage/management-list/management-list.component.spec.ts @@ -13,6 +13,7 @@ import { } from '@angular/core/testing' import { FormsModule, ReactiveFormsModule } from '@angular/forms' import { By } from '@angular/platform-browser' +import { RouterLinkWithHref } from '@angular/router' import { RouterTestingModule } from '@angular/router/testing' import { NgbModal, @@ -230,12 +231,15 @@ describe('ManagementListComponent', () => { }) it('should support quick filter for objects', () => { - const qfSpy = jest.spyOn(documentListViewService, 'quickFilter') - const filterButton = fixture.debugElement.queryAll(By.css('button'))[9] - filterButton.triggerEventHandler('click') - expect(qfSpy).toHaveBeenCalledWith([ + const expectedUrl = documentListViewService.getQuickFilterUrl([ { rule_type: FILTER_HAS_TAGS_ALL, value: tags[0].id.toString() }, - ]) // subclasses set the filter rule type + ]) + const filterLink = fixture.debugElement.query( + By.css('a.btn-outline-secondary') + ) + expect(filterLink).toBeTruthy() + const routerLink = filterLink.injector.get(RouterLinkWithHref) + expect(routerLink.urlTree).toEqual(expectedUrl) }) it('should reload on sort', () => { diff --git a/src-ui/src/app/components/manage/management-list/management-list.component.ts b/src-ui/src/app/components/manage/management-list/management-list.component.ts index b1af1f1d1..e8e7a3bb3 100644 --- a/src-ui/src/app/components/manage/management-list/management-list.component.ts +++ b/src-ui/src/app/components/manage/management-list/management-list.component.ts @@ -230,8 +230,8 @@ export abstract class ManagementListComponent abstract getDeleteMessage(object: T) - filterDocuments(object: MatchingModel) { - this.documentListViewService.quickFilter([ + getDocumentFilterUrl(object: MatchingModel) { + return this.documentListViewService.getQuickFilterUrl([ { rule_type: this.filterRuleType, value: object.id.toString() }, ]) } diff --git a/src-ui/src/app/components/manage/storage-path-list/storage-path-list.component.ts b/src-ui/src/app/components/manage/storage-path-list/storage-path-list.component.ts index 413ccc33a..cac8637d7 100644 --- a/src-ui/src/app/components/manage/storage-path-list/storage-path-list.component.ts +++ b/src-ui/src/app/components/manage/storage-path-list/storage-path-list.component.ts @@ -1,6 +1,7 @@ import { NgClass, NgTemplateOutlet, TitleCasePipe } from '@angular/common' import { Component, inject } from '@angular/core' import { FormsModule, ReactiveFormsModule } from '@angular/forms' +import { RouterModule } from '@angular/router' import { NgbDropdownModule, NgbPaginationModule, @@ -27,6 +28,7 @@ import { ManagementListComponent } from '../management-list/management-list.comp IfPermissionsDirective, FormsModule, ReactiveFormsModule, + RouterModule, NgClass, NgTemplateOutlet, NgbDropdownModule, diff --git a/src-ui/src/app/components/manage/tag-list/tag-list.component.ts b/src-ui/src/app/components/manage/tag-list/tag-list.component.ts index 0ba0a0855..544e99b58 100644 --- a/src-ui/src/app/components/manage/tag-list/tag-list.component.ts +++ b/src-ui/src/app/components/manage/tag-list/tag-list.component.ts @@ -1,6 +1,7 @@ import { NgClass, NgTemplateOutlet, TitleCasePipe } from '@angular/common' import { Component, inject } from '@angular/core' import { FormsModule, ReactiveFormsModule } from '@angular/forms' +import { RouterModule } from '@angular/router' import { NgbDropdownModule, NgbPaginationModule, @@ -27,6 +28,7 @@ import { ManagementListComponent } from '../management-list/management-list.comp IfPermissionsDirective, FormsModule, ReactiveFormsModule, + RouterModule, NgClass, NgTemplateOutlet, NgbDropdownModule, diff --git a/src-ui/src/app/services/document-list-view.service.spec.ts b/src-ui/src/app/services/document-list-view.service.spec.ts index 82d3ac425..fdbfa2069 100644 --- a/src-ui/src/app/services/document-list-view.service.spec.ts +++ b/src-ui/src/app/services/document-list-view.service.spec.ts @@ -651,4 +651,25 @@ describe('DocumentListViewService', () => { documentListViewService.displayFields = customFields as any expect(documentListViewService.displayFields).toEqual(['custom_field_1']) }) + + it('should generate quick filter URL with filter rules', () => { + const routerSpy = jest.spyOn(router, 'createUrlTree') + const urlTree = documentListViewService.getQuickFilterUrl(filterRules) + expect(routerSpy).toHaveBeenCalledWith(['/documents'], { + queryParams: expect.objectContaining({ + tags__id__all: tags__id__all, + }), + }) + expect(urlTree).toBeDefined() + }) + + it('should generate quick filter URL preserving default state', () => { + documentListViewService.reload() + httpTestingController.expectOne( + `${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true` + ) + const urlTree = documentListViewService.getQuickFilterUrl(filterRules) + expect(urlTree).toBeDefined() + expect(router.createUrlTree).toBeDefined() + }) }) diff --git a/src-ui/src/app/services/document-list-view.service.ts b/src-ui/src/app/services/document-list-view.service.ts index 9c64a7641..0bc43b782 100644 --- a/src-ui/src/app/services/document-list-view.service.ts +++ b/src-ui/src/app/services/document-list-view.service.ts @@ -1,5 +1,5 @@ import { Injectable, inject } from '@angular/core' -import { ParamMap, Router } from '@angular/router' +import { ParamMap, Router, UrlTree } from '@angular/router' import { Observable, Subject, first, takeUntil } from 'rxjs' import { DEFAULT_DISPLAY_FIELDS, @@ -483,6 +483,18 @@ export class DocumentListViewService { this.router.navigate(['documents']) } + getQuickFilterUrl(filterRules: FilterRule[]): UrlTree { + const defaultState = { + ...this.defaultListViewState(), + ...this.listViewStates.get(null), + filterRules, + } + const params = paramsFromViewState(defaultState) + return this.router.createUrlTree(['/documents'], { + queryParams: params, + }) + } + getLastPage(): number { return Math.ceil(this.collectionSize / this.pageSize) } From 39d46bc2dff0cc04fd5f06dc342dee735e821b50 Mon Sep 17 00:00:00 2001 From: GitHub Actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 7 Jan 2026 22:29:36 +0000 Subject: [PATCH 2/8] Auto translate strings --- src-ui/messages.xlf | 88 ++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/src-ui/messages.xlf b/src-ui/messages.xlf index bd1b943b3..c2ecbee24 100644 --- a/src-ui/messages.xlf +++ b/src-ui/messages.xlf @@ -328,23 +328,23 @@ src/app/components/manage/custom-fields/custom-fields.component.html - 61 + 70 src/app/components/manage/management-list/management-list.component.html - 139 + 151 src/app/components/manage/management-list/management-list.component.html - 139 + 151 src/app/components/manage/management-list/management-list.component.html - 139 + 151 src/app/components/manage/management-list/management-list.component.html - 139 + 151 @@ -2164,7 +2164,7 @@ src/app/components/manage/custom-fields/custom-fields.component.html - 55 + 61 src/app/components/manage/mail/mail.component.html @@ -2216,19 +2216,19 @@ src/app/components/manage/management-list/management-list.component.html - 133 + 140 src/app/components/manage/management-list/management-list.component.html - 133 + 140 src/app/components/manage/management-list/management-list.component.html - 133 + 140 src/app/components/manage/management-list/management-list.component.html - 133 + 140 src/app/components/manage/management-list/management-list.component.ts @@ -2300,7 +2300,7 @@ src/app/components/manage/custom-fields/custom-fields.component.ts - 104 + 106 src/app/components/manage/mail/mail.component.ts @@ -2483,7 +2483,7 @@ src/app/components/manage/custom-fields/custom-fields.component.html - 52 + 58 src/app/components/manage/mail/mail.component.html @@ -2519,19 +2519,19 @@ src/app/components/manage/management-list/management-list.component.html - 130 + 137 src/app/components/manage/management-list/management-list.component.html - 130 + 137 src/app/components/manage/management-list/management-list.component.html - 130 + 137 src/app/components/manage/management-list/management-list.component.html - 130 + 137 src/app/components/manage/workflows/workflows.component.html @@ -2627,7 +2627,7 @@ src/app/components/manage/custom-fields/custom-fields.component.ts - 106 + 108 src/app/components/manage/mail/mail.component.ts @@ -3340,7 +3340,7 @@ src/app/components/manage/custom-fields/custom-fields.component.ts - 85 + 87 @@ -3351,7 +3351,7 @@ src/app/components/manage/custom-fields/custom-fields.component.ts - 94 + 96 @@ -4361,7 +4361,7 @@ src/app/components/manage/storage-path-list/storage-path-list.component.ts - 49 + 51 @@ -4436,7 +4436,7 @@ src/app/components/manage/tag-list/tag-list.component.ts - 49 + 51 @@ -8500,28 +8500,28 @@ correspondent src/app/components/manage/correspondent-list/correspondent-list.component.ts - 47 + 49 correspondents src/app/components/manage/correspondent-list/correspondent-list.component.ts - 48 + 50 Last used src/app/components/manage/correspondent-list/correspondent-list.component.ts - 53 + 55 Do you really want to delete the correspondent ""? src/app/components/manage/correspondent-list/correspondent-list.component.ts - 78 + 80 @@ -8549,79 +8549,79 @@ Filter Documents () src/app/components/manage/custom-fields/custom-fields.component.html - 45 + 50 src/app/components/manage/management-list/management-list.component.html - 123 + 129 src/app/components/manage/management-list/management-list.component.html - 123 + 129 src/app/components/manage/management-list/management-list.component.html - 123 + 129 src/app/components/manage/management-list/management-list.component.html - 123 + 129 No fields defined. src/app/components/manage/custom-fields/custom-fields.component.html - 70 + 80 Confirm delete field src/app/components/manage/custom-fields/custom-fields.component.ts - 102 + 104 This operation will permanently delete this field. src/app/components/manage/custom-fields/custom-fields.component.ts - 103 + 105 Deleted field "" src/app/components/manage/custom-fields/custom-fields.component.ts - 112 + 114 Error deleting field "". src/app/components/manage/custom-fields/custom-fields.component.ts - 121 + 123 document type src/app/components/manage/document-type-list/document-type-list.component.ts - 43 + 45 document types src/app/components/manage/document-type-list/document-type-list.component.ts - 44 + 46 Do you really want to delete the document type ""? src/app/components/manage/document-type-list/document-type-list.component.ts - 49 + 51 @@ -9161,42 +9161,42 @@ storage path src/app/components/manage/storage-path-list/storage-path-list.component.ts - 43 + 45 storage paths src/app/components/manage/storage-path-list/storage-path-list.component.ts - 44 + 46 Do you really want to delete the storage path ""? src/app/components/manage/storage-path-list/storage-path-list.component.ts - 60 + 62 tag src/app/components/manage/tag-list/tag-list.component.ts - 43 + 45 tags src/app/components/manage/tag-list/tag-list.component.ts - 44 + 46 Do you really want to delete the tag ""? src/app/components/manage/tag-list/tag-list.component.ts - 60 + 62 From 4c2f5f3473d165f0b62e5720620958cdb1fd1950 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Wed, 7 Jan 2026 14:49:24 -0800 Subject: [PATCH 3/8] Fixhancement: add error handling and retry when opening index (#11731) --- src/documents/index.py | 34 +++++++-- src/documents/tests/test_index.py | 118 ++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+), 5 deletions(-) diff --git a/src/documents/index.py b/src/documents/index.py index 6b994ac8c..ea26ea926 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -10,6 +10,7 @@ from datetime import time from datetime import timedelta from datetime import timezone from shutil import rmtree +from time import sleep from typing import TYPE_CHECKING from typing import Literal @@ -32,6 +33,7 @@ from whoosh.highlight import HtmlFormatter from whoosh.idsets import BitSet from whoosh.idsets import DocIdSet from whoosh.index import FileIndex +from whoosh.index import LockError from whoosh.index import create_in from whoosh.index import exists_in from whoosh.index import open_dir @@ -97,11 +99,33 @@ def get_schema() -> Schema: def open_index(*, recreate=False) -> FileIndex: - try: - if exists_in(settings.INDEX_DIR) and not recreate: - return open_dir(settings.INDEX_DIR, schema=get_schema()) - except Exception: - logger.exception("Error while opening the index, recreating.") + transient_exceptions = (FileNotFoundError, LockError) + max_retries = 3 + retry_delay = 0.1 + + for attempt in range(max_retries + 1): + try: + if exists_in(settings.INDEX_DIR) and not recreate: + return open_dir(settings.INDEX_DIR, schema=get_schema()) + break + except transient_exceptions as exc: + is_last_attempt = attempt == max_retries or recreate + if is_last_attempt: + logger.exception( + "Error while opening the index after retries, recreating.", + ) + break + + logger.warning( + "Transient error while opening the index (attempt %s/%s): %s. Retrying.", + attempt + 1, + max_retries + 1, + exc, + ) + sleep(retry_delay) + except Exception: + logger.exception("Error while opening the index, recreating.") + break # create_in doesn't handle corrupted indexes very well, remove the directory entirely first if settings.INDEX_DIR.is_dir(): diff --git a/src/documents/tests/test_index.py b/src/documents/tests/test_index.py index f216feedb..3167bb762 100644 --- a/src/documents/tests/test_index.py +++ b/src/documents/tests/test_index.py @@ -1,6 +1,7 @@ from datetime import datetime from unittest import mock +from django.conf import settings from django.contrib.auth.models import User from django.test import SimpleTestCase from django.test import TestCase @@ -251,3 +252,120 @@ class TestRewriteNaturalDateKeywords(SimpleTestCase): result = self._rewrite_with_now("added:today", fixed_now) # Should convert to UTC properly self.assertIn("added:[20250719", result) + + +class TestIndexResilience(DirectoriesMixin, SimpleTestCase): + def _assert_recreate_called(self, mock_create_in): + mock_create_in.assert_called_once() + path_arg, schema_arg = mock_create_in.call_args.args + self.assertEqual(path_arg, settings.INDEX_DIR) + self.assertEqual(schema_arg.__class__.__name__, "Schema") + + def test_transient_missing_segment_does_not_force_recreate(self): + """ + GIVEN: + - Index directory exists + WHEN: + - open_index is called + - Opening the index raises FileNotFoundError once due to a + transient missing segment + THEN: + - Index is opened successfully on retry + - Index is not recreated + """ + file_marker = settings.INDEX_DIR / "file_marker.txt" + file_marker.write_text("keep") + expected_index = object() + + with ( + mock.patch("documents.index.exists_in", return_value=True), + mock.patch( + "documents.index.open_dir", + side_effect=[FileNotFoundError("missing"), expected_index], + ) as mock_open_dir, + mock.patch( + "documents.index.create_in", + ) as mock_create_in, + mock.patch( + "documents.index.rmtree", + ) as mock_rmtree, + ): + ix = index.open_index() + + self.assertIs(ix, expected_index) + self.assertGreaterEqual(mock_open_dir.call_count, 2) + mock_rmtree.assert_not_called() + mock_create_in.assert_not_called() + self.assertEqual(file_marker.read_text(), "keep") + + def test_transient_errors_exhaust_retries_and_recreate(self): + """ + GIVEN: + - Index directory exists + WHEN: + - open_index is called + - Opening the index raises FileNotFoundError multiple times due to + transient missing segments + THEN: + - Index is recreated after retries are exhausted + """ + recreated_index = object() + + with ( + self.assertLogs("paperless.index", level="ERROR") as cm, + mock.patch("documents.index.exists_in", return_value=True), + mock.patch( + "documents.index.open_dir", + side_effect=FileNotFoundError("missing"), + ) as mock_open_dir, + mock.patch("documents.index.rmtree") as mock_rmtree, + mock.patch( + "documents.index.create_in", + return_value=recreated_index, + ) as mock_create_in, + ): + ix = index.open_index() + + self.assertIs(ix, recreated_index) + self.assertEqual(mock_open_dir.call_count, 4) + mock_rmtree.assert_called_once_with(settings.INDEX_DIR) + self._assert_recreate_called(mock_create_in) + self.assertIn( + "Error while opening the index after retries, recreating.", + cm.output[0], + ) + + def test_non_transient_error_recreates_index(self): + """ + GIVEN: + - Index directory exists + WHEN: + - open_index is called + - Opening the index raises a "non-transient" error + THEN: + - Index is recreated + """ + recreated_index = object() + + with ( + self.assertLogs("paperless.index", level="ERROR") as cm, + mock.patch("documents.index.exists_in", return_value=True), + mock.patch( + "documents.index.open_dir", + side_effect=RuntimeError("boom"), + ), + mock.patch("documents.index.rmtree") as mock_rmtree, + mock.patch( + "documents.index.create_in", + return_value=recreated_index, + ) as mock_create_in, + ): + ix = index.open_index() + + self.assertIs(ix, recreated_index) + mock_rmtree.assert_called_once_with(settings.INDEX_DIR) + self._assert_recreate_called(mock_create_in) + self.assertIn( + "Error while opening the index, recreating.", + cm.output[0], + ) From f3e3ba49d13ff90e25e13c6ea07aa49a40c89554 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Thu, 8 Jan 2026 09:52:53 -0800 Subject: [PATCH 4/8] Fix: recurring workflow to respect latest run time (#11735) --- src/documents/tasks.py | 2 +- src/documents/tests/test_workflows.py | 62 +++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 606f278db..6c415ad69 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -493,7 +493,7 @@ def check_scheduled_workflows(): trigger.schedule_is_recurring and workflow_runs.exists() and ( - workflow_runs.last().run_at + workflow_runs.first().run_at > now - datetime.timedelta( days=trigger.schedule_recurring_interval_days, diff --git a/src/documents/tests/test_workflows.py b/src/documents/tests/test_workflows.py index 249183b6e..deb40a165 100644 --- a/src/documents/tests/test_workflows.py +++ b/src/documents/tests/test_workflows.py @@ -2094,6 +2094,68 @@ class TestWorkflows( doc.refresh_from_db() self.assertIsNone(doc.owner) + def test_workflow_scheduled_recurring_respects_latest_run(self): + """ + GIVEN: + - Scheduled workflow marked as recurring with a 1-day interval + - Document that matches the trigger + - Two prior runs exist: one 2 days ago and one 1 hour ago + WHEN: + - Scheduled workflows are checked again + THEN: + - Workflow does not run because the most recent run is inside the interval + """ + trigger = WorkflowTrigger.objects.create( + type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED, + schedule_date_field=WorkflowTrigger.ScheduleDateField.CREATED, + schedule_is_recurring=True, + schedule_recurring_interval_days=1, + ) + action = WorkflowAction.objects.create( + assign_title="Doc assign owner", + assign_owner=self.user2, + ) + w = Workflow.objects.create( + name="Workflow 1", + order=0, + ) + w.triggers.add(trigger) + w.actions.add(action) + w.save() + + doc = Document.objects.create( + title="sample test", + correspondent=self.c, + original_filename="sample.pdf", + created=timezone.now().date() - timedelta(days=3), + ) + + WorkflowRun.objects.create( + workflow=w, + document=doc, + type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED, + run_at=timezone.now() - timedelta(days=2), + ) + WorkflowRun.objects.create( + workflow=w, + document=doc, + type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED, + run_at=timezone.now() - timedelta(hours=1), + ) + + tasks.check_scheduled_workflows() + + doc.refresh_from_db() + self.assertIsNone(doc.owner) + self.assertEqual( + WorkflowRun.objects.filter( + workflow=w, + document=doc, + type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED, + ).count(), + 2, + ) + def test_workflow_scheduled_trigger_negative_offset_customfield(self): """ GIVEN: From 5b1e66be918b203fa416e82da2cd94d1cb64bc21 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Thu, 8 Jan 2026 13:36:11 -0800 Subject: [PATCH 5/8] Feature: password removal action (#11656) --- docs/api.md | 7 + ...word-removal-confirm-dialog.component.html | 75 +++++++++ ...word-removal-confirm-dialog.component.scss | 0 ...d-removal-confirm-dialog.component.spec.ts | 53 +++++++ ...ssword-removal-confirm-dialog.component.ts | 38 +++++ .../document-detail.component.html | 6 + .../document-detail.component.spec.ts | 83 ++++++++++ .../document-detail.component.ts | 59 +++++++ src-ui/src/main.ts | 2 + src/documents/bulk_edit.py | 71 +++++++++ src/documents/serialisers.py | 11 ++ src/documents/tests/test_api_bulk_edit.py | 52 +++++++ src/documents/tests/test_bulk_edit.py | 145 ++++++++++++++++++ src/documents/views.py | 5 +- 14 files changed, 606 insertions(+), 1 deletion(-) create mode 100644 src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.html create mode 100644 src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.scss create mode 100644 src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.spec.ts create mode 100644 src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.ts diff --git a/docs/api.md b/docs/api.md index f7e12bf67..1ac634162 100644 --- a/docs/api.md +++ b/docs/api.md @@ -294,6 +294,13 @@ The following methods are supported: - `"delete_original": true` to delete the original documents after editing. - `"update_document": true` to update the existing document with the edited PDF. - `"include_metadata": true` to copy metadata from the original document to the edited document. +- `remove_password` + - Requires `parameters`: + - `"password": "PASSWORD_STRING"` The password to remove from the PDF documents. + - Optional `parameters`: + - `"update_document": true` to replace the existing document with the password-less PDF. + - `"delete_original": true` to delete the original document after editing. + - `"include_metadata": true` to copy metadata from the original document to the new password-less document. - `merge` - No additional `parameters` required. - The ordering of the merged document is determined by the list of IDs. diff --git a/src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.html b/src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.html new file mode 100644 index 000000000..fc866fe40 --- /dev/null +++ b/src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.html @@ -0,0 +1,75 @@ + + + diff --git a/src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.scss b/src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.scss new file mode 100644 index 000000000..e69de29bb diff --git a/src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.spec.ts b/src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.spec.ts new file mode 100644 index 000000000..a1449511b --- /dev/null +++ b/src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.spec.ts @@ -0,0 +1,53 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing' +import { By } from '@angular/platform-browser' +import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap' +import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons' +import { PasswordRemovalConfirmDialogComponent } from './password-removal-confirm-dialog.component' + +describe('PasswordRemovalConfirmDialogComponent', () => { + let component: PasswordRemovalConfirmDialogComponent + let fixture: ComponentFixture + + beforeEach(async () => { + await TestBed.configureTestingModule({ + providers: [NgbActiveModal], + imports: [ + NgxBootstrapIconsModule.pick(allIcons), + PasswordRemovalConfirmDialogComponent, + ], + }).compileComponents() + + fixture = TestBed.createComponent(PasswordRemovalConfirmDialogComponent) + component = fixture.componentInstance + fixture.detectChanges() + }) + + it('should default to replacing the document', () => { + expect(component.updateDocument).toBe(true) + expect( + fixture.debugElement.query(By.css('#removeReplace')).nativeElement.checked + ).toBe(true) + }) + + it('should allow creating a new document with metadata and delete toggle', () => { + component.onUpdateDocumentChange(false) + fixture.detectChanges() + + expect(component.updateDocument).toBe(false) + expect(fixture.debugElement.query(By.css('#copyMetaRemove'))).not.toBeNull() + + component.includeMetadata = false + component.deleteOriginal = true + component.onUpdateDocumentChange(true) + expect(component.updateDocument).toBe(true) + expect(component.includeMetadata).toBe(true) + expect(component.deleteOriginal).toBe(false) + }) + + it('should emit confirm when confirmed', () => { + let confirmed = false + component.confirmClicked.subscribe(() => (confirmed = true)) + component.confirm() + expect(confirmed).toBe(true) + }) +}) diff --git a/src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.ts b/src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.ts new file mode 100644 index 000000000..82444ad13 --- /dev/null +++ b/src-ui/src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.ts @@ -0,0 +1,38 @@ +import { Component, Input } from '@angular/core' +import { FormsModule } from '@angular/forms' +import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons' +import { ConfirmDialogComponent } from '../confirm-dialog.component' + +@Component({ + selector: 'pngx-password-removal-confirm-dialog', + templateUrl: './password-removal-confirm-dialog.component.html', + styleUrls: ['./password-removal-confirm-dialog.component.scss'], + imports: [FormsModule, NgxBootstrapIconsModule], +}) +export class PasswordRemovalConfirmDialogComponent extends ConfirmDialogComponent { + updateDocument: boolean = true + includeMetadata: boolean = true + deleteOriginal: boolean = false + + @Input() + override title = $localize`Remove password protection` + + @Input() + override message = + $localize`Create an unprotected copy or replace the existing file.` + + @Input() + override btnCaption = $localize`Start` + + constructor() { + super() + } + + onUpdateDocumentChange(updateDocument: boolean) { + this.updateDocument = updateDocument + if (this.updateDocument) { + this.deleteOriginal = false + this.includeMetadata = true + } + } +} diff --git a/src-ui/src/app/components/document-detail/document-detail.component.html b/src-ui/src/app/components/document-detail/document-detail.component.html index c3dbc4805..f8a942ba3 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.html +++ b/src-ui/src/app/components/document-detail/document-detail.component.html @@ -65,6 +65,12 @@ + + @if (userIsOwner && (requiresPassword || password)) { + + } diff --git a/src-ui/src/app/components/document-detail/document-detail.component.spec.ts b/src-ui/src/app/components/document-detail/document-detail.component.spec.ts index dada60074..b1b3650c6 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.spec.ts +++ b/src-ui/src/app/components/document-detail/document-detail.component.spec.ts @@ -66,6 +66,7 @@ import { SettingsService } from 'src/app/services/settings.service' import { ToastService } from 'src/app/services/toast.service' import { environment } from 'src/environments/environment' import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component' +import { PasswordRemovalConfirmDialogComponent } from '../common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component' import { CustomFieldsDropdownComponent } from '../common/custom-fields-dropdown/custom-fields-dropdown.component' import { DocumentDetailComponent, @@ -1209,6 +1210,88 @@ describe('DocumentDetailComponent', () => { expect(closeSpy).toHaveBeenCalled() }) + it('should support removing password protection from pdfs', () => { + let modal: NgbModalRef + modalService.activeInstances.subscribe((m) => (modal = m[0])) + initNormally() + component.password = 'secret' + component.removePassword() + const dialog = + modal.componentInstance as PasswordRemovalConfirmDialogComponent + dialog.updateDocument = false + dialog.includeMetadata = false + dialog.deleteOriginal = true + dialog.confirm() + const req = httpTestingController.expectOne( + `${environment.apiBaseUrl}documents/bulk_edit/` + ) + expect(req.request.body).toEqual({ + documents: [doc.id], + method: 'remove_password', + parameters: { + password: 'secret', + update_document: false, + include_metadata: false, + delete_original: true, + }, + }) + req.flush(true) + }) + + it('should require the current password before removing it', () => { + initNormally() + const errorSpy = jest.spyOn(toastService, 'showError') + component.requiresPassword = true + component.password = '' + + component.removePassword() + + expect(errorSpy).toHaveBeenCalled() + httpTestingController.expectNone( + `${environment.apiBaseUrl}documents/bulk_edit/` + ) + }) + + it('should handle failures when removing password protection', () => { + let modal: NgbModalRef + modalService.activeInstances.subscribe((m) => (modal = m[0])) + initNormally() + const errorSpy = jest.spyOn(toastService, 'showError') + component.password = 'secret' + + component.removePassword() + const dialog = + modal.componentInstance as PasswordRemovalConfirmDialogComponent + dialog.confirm() + const req = httpTestingController.expectOne( + `${environment.apiBaseUrl}documents/bulk_edit/` + ) + req.error(new ErrorEvent('failed')) + + expect(errorSpy).toHaveBeenCalled() + expect(component.networkActive).toBe(false) + expect(dialog.buttonsEnabled).toBe(true) + }) + + it('should refresh the document when removing password in update mode', () => { + let modal: NgbModalRef + modalService.activeInstances.subscribe((m) => (modal = m[0])) + const refreshSpy = jest.spyOn(openDocumentsService, 'refreshDocument') + initNormally() + component.password = 'secret' + + component.removePassword() + const dialog = + modal.componentInstance as PasswordRemovalConfirmDialogComponent + dialog.confirm() + const req = httpTestingController.expectOne( + `${environment.apiBaseUrl}documents/bulk_edit/` + ) + req.flush(true) + + expect(refreshSpy).toHaveBeenCalledWith(doc.id) + }) + it('should support keyboard shortcuts', () => { initNormally() diff --git a/src-ui/src/app/components/document-detail/document-detail.component.ts b/src-ui/src/app/components/document-detail/document-detail.component.ts index 9c0c84592..165cf0cef 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.ts +++ b/src-ui/src/app/components/document-detail/document-detail.component.ts @@ -83,6 +83,7 @@ import { getFilenameFromContentDisposition } from 'src/app/utils/http' import { ISODateAdapter } from 'src/app/utils/ngb-iso-date-adapter' import * as UTIF from 'utif' import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component' +import { PasswordRemovalConfirmDialogComponent } from '../common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component' import { CustomFieldsDropdownComponent } from '../common/custom-fields-dropdown/custom-fields-dropdown.component' import { CorrespondentEditDialogComponent } from '../common/edit-dialog/correspondent-edit-dialog/correspondent-edit-dialog.component' import { DocumentTypeEditDialogComponent } from '../common/edit-dialog/document-type-edit-dialog/document-type-edit-dialog.component' @@ -175,6 +176,7 @@ export enum ZoomSetting { NgxBootstrapIconsModule, PdfViewerModule, TextAreaComponent, + PasswordRemovalConfirmDialogComponent, ], }) export class DocumentDetailComponent @@ -1428,6 +1430,63 @@ export class DocumentDetailComponent }) } + removePassword() { + if (this.requiresPassword || !this.password) { + this.toastService.showError( + $localize`Please enter the current password before attempting to remove it.` + ) + return + } + const modal = this.modalService.open( + PasswordRemovalConfirmDialogComponent, + { + backdrop: 'static', + } + ) + modal.componentInstance.title = $localize`Remove password protection` + modal.componentInstance.message = $localize`Create an unprotected copy or replace the existing file.` + modal.componentInstance.btnCaption = $localize`Start` + + modal.componentInstance.confirmClicked + .pipe(takeUntil(this.unsubscribeNotifier)) + .subscribe(() => { + const dialog = + modal.componentInstance as PasswordRemovalConfirmDialogComponent + dialog.buttonsEnabled = false + this.networkActive = true + this.documentsService + .bulkEdit([this.document.id], 'remove_password', { + password: this.password, + update_document: dialog.updateDocument, + include_metadata: dialog.includeMetadata, + delete_original: dialog.deleteOriginal, + }) + .pipe(first(), takeUntil(this.unsubscribeNotifier)) + .subscribe({ + next: () => { + this.toastService.showInfo( + $localize`Password removal operation for "${this.document.title}" will begin in the background.` + ) + this.networkActive = false + modal.close() + if (!dialog.updateDocument && dialog.deleteOriginal) { + this.openDocumentService.closeDocument(this.document) + } else if (dialog.updateDocument) { + this.openDocumentService.refreshDocument(this.documentId) + } + }, + error: (error) => { + dialog.buttonsEnabled = true + this.networkActive = false + this.toastService.showError( + $localize`Error executing password removal operation`, + error + ) + }, + }) + }) + } + printDocument() { const printUrl = this.documentsService.getDownloadUrl( this.document.id, diff --git a/src-ui/src/main.ts b/src-ui/src/main.ts index b55faf227..f140536ec 100644 --- a/src-ui/src/main.ts +++ b/src-ui/src/main.ts @@ -132,6 +132,7 @@ import { threeDotsVertical, trash, uiRadios, + unlock, upcScan, windowStack, x, @@ -348,6 +349,7 @@ const icons = { threeDotsVertical, trash, uiRadios, + unlock, upcScan, windowStack, x, diff --git a/src/documents/bulk_edit.py b/src/documents/bulk_edit.py index 219947d09..43cb13261 100644 --- a/src/documents/bulk_edit.py +++ b/src/documents/bulk_edit.py @@ -646,6 +646,77 @@ def edit_pdf( return "OK" +def remove_password( + doc_ids: list[int], + password: str, + *, + update_document: bool = False, + delete_original: bool = False, + include_metadata: bool = True, + user: User | None = None, +) -> Literal["OK"]: + """ + Remove password protection from PDF documents. + """ + import pikepdf + + for doc_id in doc_ids: + doc = Document.objects.get(id=doc_id) + try: + logger.info( + f"Attempting password removal from document {doc_ids[0]}", + ) + with pikepdf.open(doc.source_path, password=password) as pdf: + temp_path = doc.source_path.with_suffix(".tmp.pdf") + pdf.remove_unreferenced_resources() + pdf.save(temp_path) + + if update_document: + # replace the original document with the unprotected one + temp_path.replace(doc.source_path) + doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest() + doc.page_count = len(pdf.pages) + doc.save() + update_document_content_maybe_archive_file.delay(document_id=doc.id) + else: + consume_tasks = [] + overrides = ( + DocumentMetadataOverrides().from_document(doc) + if include_metadata + else DocumentMetadataOverrides() + ) + if user is not None: + overrides.owner_id = user.id + + filepath: Path = ( + Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR)) + / f"{doc.id}_unprotected.pdf" + ) + temp_path.replace(filepath) + consume_tasks.append( + consume_file.s( + ConsumableDocument( + source=DocumentSource.ConsumeFolder, + original_file=filepath, + ), + overrides, + ), + ) + + if delete_original: + chord(header=consume_tasks, body=delete.si([doc.id])).delay() + else: + group(consume_tasks).delay() + + except Exception as e: + logger.exception(f"Error removing password from document {doc.id}: {e}") + raise ValueError( + f"An error occurred while removing the password: {e}", + ) from e + + return "OK" + + def reflect_doclinks( document: Document, field: CustomField, diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index 5c71de9a9..6e2307c2e 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -1430,6 +1430,7 @@ class BulkEditSerializer( "split", "delete_pages", "edit_pdf", + "remove_password", ], label="Method", write_only=True, @@ -1505,6 +1506,8 @@ class BulkEditSerializer( return bulk_edit.delete_pages elif method == "edit_pdf": return bulk_edit.edit_pdf + elif method == "remove_password": + return bulk_edit.remove_password else: # pragma: no cover # This will never happen as it is handled by the ChoiceField raise serializers.ValidationError("Unsupported method.") @@ -1701,6 +1704,12 @@ class BulkEditSerializer( f"Page {op['page']} is out of bounds for document with {doc.page_count} pages.", ) + def validate_parameters_remove_password(self, parameters): + if "password" not in parameters: + raise serializers.ValidationError("password not specified") + if not isinstance(parameters["password"], str): + raise serializers.ValidationError("password must be a string") + def validate(self, attrs): method = attrs["method"] parameters = attrs["parameters"] @@ -1741,6 +1750,8 @@ class BulkEditSerializer( "Edit PDF method only supports one document", ) self._validate_parameters_edit_pdf(parameters, attrs["documents"][0]) + elif method == bulk_edit.remove_password: + self.validate_parameters_remove_password(parameters) return attrs diff --git a/src/documents/tests/test_api_bulk_edit.py b/src/documents/tests/test_api_bulk_edit.py index 945f06b67..2ba9f1af6 100644 --- a/src/documents/tests/test_api_bulk_edit.py +++ b/src/documents/tests/test_api_bulk_edit.py @@ -1582,6 +1582,58 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase): self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertIn(b"out of bounds", response.content) + @mock.patch("documents.serialisers.bulk_edit.remove_password") + def test_remove_password(self, m): + self.setup_mock(m, "remove_password") + response = self.client.post( + "/api/documents/bulk_edit/", + json.dumps( + { + "documents": [self.doc2.id], + "method": "remove_password", + "parameters": {"password": "secret", "update_document": True}, + }, + ), + content_type="application/json", + ) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + m.assert_called_once() + args, kwargs = m.call_args + self.assertCountEqual(args[0], [self.doc2.id]) + self.assertEqual(kwargs["password"], "secret") + self.assertTrue(kwargs["update_document"]) + self.assertEqual(kwargs["user"], self.user) + + def test_remove_password_invalid_params(self): + response = self.client.post( + "/api/documents/bulk_edit/", + json.dumps( + { + "documents": [self.doc2.id], + "method": "remove_password", + "parameters": {}, + }, + ), + content_type="application/json", + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertIn(b"password not specified", response.content) + + response = self.client.post( + "/api/documents/bulk_edit/", + json.dumps( + { + "documents": [self.doc2.id], + "method": "remove_password", + "parameters": {"password": 123}, + }, + ), + content_type="application/json", + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertIn(b"password must be a string", response.content) + @override_settings(AUDIT_LOG_ENABLED=True) def test_bulk_edit_audit_log_enabled_simple_field(self): """ diff --git a/src/documents/tests/test_bulk_edit.py b/src/documents/tests/test_bulk_edit.py index c220c1e9b..bf5033bdc 100644 --- a/src/documents/tests/test_bulk_edit.py +++ b/src/documents/tests/test_bulk_edit.py @@ -1,3 +1,4 @@ +import hashlib import shutil from datetime import date from pathlib import Path @@ -1066,3 +1067,147 @@ class TestPDFActions(DirectoriesMixin, TestCase): bulk_edit.edit_pdf(doc_ids, operations, update_document=True) mock_group.assert_not_called() mock_consume_file.assert_not_called() + + @mock.patch("documents.bulk_edit.update_document_content_maybe_archive_file.delay") + @mock.patch("pikepdf.open") + def test_remove_password_update_document(self, mock_open, mock_update_document): + doc = self.doc1 + original_checksum = doc.checksum + + fake_pdf = mock.MagicMock() + fake_pdf.pages = [mock.Mock(), mock.Mock(), mock.Mock()] + + def save_side_effect(target_path): + Path(target_path).write_bytes(b"new pdf content") + + fake_pdf.save.side_effect = save_side_effect + mock_open.return_value.__enter__.return_value = fake_pdf + + result = bulk_edit.remove_password( + [doc.id], + password="secret", + update_document=True, + ) + + self.assertEqual(result, "OK") + mock_open.assert_called_once_with(doc.source_path, password="secret") + fake_pdf.remove_unreferenced_resources.assert_called_once() + doc.refresh_from_db() + self.assertNotEqual(doc.checksum, original_checksum) + expected_checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest() + self.assertEqual(doc.checksum, expected_checksum) + self.assertEqual(doc.page_count, len(fake_pdf.pages)) + mock_update_document.assert_called_once_with(document_id=doc.id) + + @mock.patch("documents.bulk_edit.chord") + @mock.patch("documents.bulk_edit.group") + @mock.patch("documents.tasks.consume_file.s") + @mock.patch("documents.bulk_edit.tempfile.mkdtemp") + @mock.patch("pikepdf.open") + def test_remove_password_creates_consumable_document( + self, + mock_open, + mock_mkdtemp, + mock_consume_file, + mock_group, + mock_chord, + ): + doc = self.doc2 + temp_dir = self.dirs.scratch_dir / "remove-password" + temp_dir.mkdir(parents=True, exist_ok=True) + mock_mkdtemp.return_value = str(temp_dir) + + fake_pdf = mock.MagicMock() + fake_pdf.pages = [mock.Mock(), mock.Mock()] + + def save_side_effect(target_path): + Path(target_path).write_bytes(b"password removed") + + fake_pdf.save.side_effect = save_side_effect + mock_open.return_value.__enter__.return_value = fake_pdf + mock_group.return_value.delay.return_value = None + + user = User.objects.create(username="owner") + + result = bulk_edit.remove_password( + [doc.id], + password="secret", + include_metadata=False, + update_document=False, + delete_original=False, + user=user, + ) + + self.assertEqual(result, "OK") + mock_open.assert_called_once_with(doc.source_path, password="secret") + mock_consume_file.assert_called_once() + consume_args, _ = mock_consume_file.call_args + consumable_document = consume_args[0] + overrides = consume_args[1] + expected_path = temp_dir / f"{doc.id}_unprotected.pdf" + self.assertTrue(expected_path.exists()) + self.assertEqual( + Path(consumable_document.original_file).resolve(), + expected_path.resolve(), + ) + self.assertEqual(overrides.owner_id, user.id) + mock_group.assert_called_once_with([mock_consume_file.return_value]) + mock_group.return_value.delay.assert_called_once() + mock_chord.assert_not_called() + + @mock.patch("documents.bulk_edit.delete") + @mock.patch("documents.bulk_edit.chord") + @mock.patch("documents.bulk_edit.group") + @mock.patch("documents.tasks.consume_file.s") + @mock.patch("documents.bulk_edit.tempfile.mkdtemp") + @mock.patch("pikepdf.open") + def test_remove_password_deletes_original( + self, + mock_open, + mock_mkdtemp, + mock_consume_file, + mock_group, + mock_chord, + mock_delete, + ): + doc = self.doc2 + temp_dir = self.dirs.scratch_dir / "remove-password-delete" + temp_dir.mkdir(parents=True, exist_ok=True) + mock_mkdtemp.return_value = str(temp_dir) + + fake_pdf = mock.MagicMock() + fake_pdf.pages = [mock.Mock(), mock.Mock()] + + def save_side_effect(target_path): + Path(target_path).write_bytes(b"password removed") + + fake_pdf.save.side_effect = save_side_effect + mock_open.return_value.__enter__.return_value = fake_pdf + mock_chord.return_value.delay.return_value = None + + result = bulk_edit.remove_password( + [doc.id], + password="secret", + include_metadata=False, + update_document=False, + delete_original=True, + ) + + self.assertEqual(result, "OK") + mock_open.assert_called_once_with(doc.source_path, password="secret") + mock_consume_file.assert_called_once() + mock_group.assert_not_called() + mock_chord.assert_called_once() + mock_chord.return_value.delay.assert_called_once() + mock_delete.si.assert_called_once_with([doc.id]) + + @mock.patch("pikepdf.open") + def test_remove_password_open_failure(self, mock_open): + mock_open.side_effect = RuntimeError("wrong password") + + with self.assertLogs("paperless.bulk_edit", level="ERROR") as cm: + with self.assertRaises(ValueError) as exc: + bulk_edit.remove_password([self.doc1.id], password="secret") + + self.assertIn("wrong password", str(exc.exception)) + self.assertIn("Error removing password from document", cm.output[0]) diff --git a/src/documents/views.py b/src/documents/views.py index d5910497f..680600c4b 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -1504,6 +1504,7 @@ class BulkEditView(PassUserMixin): "merge": None, "edit_pdf": "checksum", "reprocess": "checksum", + "remove_password": "checksum", } permission_classes = (IsAuthenticated,) @@ -1522,6 +1523,7 @@ class BulkEditView(PassUserMixin): bulk_edit.split, bulk_edit.merge, bulk_edit.edit_pdf, + bulk_edit.remove_password, ]: parameters["user"] = user @@ -1550,6 +1552,7 @@ class BulkEditView(PassUserMixin): bulk_edit.rotate, bulk_edit.delete_pages, bulk_edit.edit_pdf, + bulk_edit.remove_password, ] ) or ( @@ -1566,7 +1569,7 @@ class BulkEditView(PassUserMixin): and ( method in [bulk_edit.split, bulk_edit.merge] or ( - method == bulk_edit.edit_pdf + method in [bulk_edit.edit_pdf, bulk_edit.remove_password] and not parameters["update_document"] ) ) From cb5f09c04e0fa2db32beb56de6ff89bf67bcd2f6 Mon Sep 17 00:00:00 2001 From: GitHub Actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 8 Jan 2026 21:38:45 +0000 Subject: [PATCH 6/8] Auto translate strings --- src-ui/messages.xlf | 239 +++++++++++++++++-------- src/locale/en_US/LC_MESSAGES/django.po | 14 +- 2 files changed, 171 insertions(+), 82 deletions(-) diff --git a/src-ui/messages.xlf b/src-ui/messages.xlf index c2ecbee24..a0692e794 100644 --- a/src-ui/messages.xlf +++ b/src-ui/messages.xlf @@ -385,7 +385,7 @@ src/app/components/document-detail/document-detail.component.html - 113 + 119 @@ -534,7 +534,7 @@ src/app/components/document-detail/document-detail.component.html - 374 + 380 @@ -593,7 +593,7 @@ src/app/components/document-detail/document-detail.component.html - 367 + 373 src/app/components/document-list/bulk-editor/custom-fields-bulk-edit-dialog/custom-fields-bulk-edit-dialog.component.html @@ -761,7 +761,7 @@ src/app/components/document-detail/document-detail.component.html - 387 + 393 src/app/components/document-list/document-list.component.html @@ -1234,7 +1234,7 @@ src/app/components/document-detail/document-detail.component.html - 343 + 349 src/app/components/document-list/bulk-editor/bulk-editor.component.html @@ -2607,11 +2607,11 @@ src/app/components/document-detail/document-detail.component.ts - 1028 + 1030 src/app/components/document-detail/document-detail.component.ts - 1393 + 1395 src/app/components/document-list/bulk-editor/bulk-editor.component.ts @@ -3223,7 +3223,7 @@ src/app/components/document-detail/document-detail.component.ts - 981 + 983 src/app/components/document-list/bulk-editor/bulk-editor.component.ts @@ -3292,6 +3292,67 @@ 39 + + Replace current document + + src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.html + 22 + + + + Create new document + + src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.html + 35 + + + + Copy metadata + + src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.html + 43,44 + + + + Delete original + + src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.html + 48 + + + + Remove password protection + + src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.ts + 18 + + + src/app/components/document-detail/document-detail.component.ts + 1446 + + + + Create an unprotected copy or replace the existing file. + + src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.ts + 22 + + + src/app/components/document-detail/document-detail.component.ts + 1447 + + + + Start + + src/app/components/common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component.ts + 25 + + + src/app/components/document-detail/document-detail.component.ts + 1448 + + Note that only PDFs will be rotated. @@ -3413,7 +3474,7 @@ src/app/components/document-detail/document-detail.component.html - 107 + 113 src/app/guards/dirty-saved-view.guard.ts @@ -4379,7 +4440,7 @@ src/app/components/document-detail/document-detail.component.html - 309 + 315 @@ -4490,7 +4551,7 @@ src/app/components/document-detail/document-detail.component.html - 92 + 98 src/app/components/document-list/bulk-editor/bulk-editor.component.html @@ -6210,7 +6271,7 @@ src/app/components/document-detail/document-detail.component.html - 88 + 94 @@ -6806,35 +6867,42 @@ src/app/components/document-detail/document-detail.component.ts - 1392 + 1394 + + + + Remove Password + + src/app/components/document-detail/document-detail.component.html + 71 Send src/app/components/document-detail/document-detail.component.html - 84 + 90 Previous src/app/components/document-detail/document-detail.component.html - 110 + 116 Details src/app/components/document-detail/document-detail.component.html - 123 + 129 Title src/app/components/document-detail/document-detail.component.html - 126 + 132 src/app/components/document-list/document-list.component.html @@ -6857,21 +6925,21 @@ Archive serial number src/app/components/document-detail/document-detail.component.html - 127 + 133 Date created src/app/components/document-detail/document-detail.component.html - 128 + 134 Correspondent src/app/components/document-detail/document-detail.component.html - 130 + 136 src/app/components/document-list/bulk-editor/bulk-editor.component.html @@ -6898,7 +6966,7 @@ Document type src/app/components/document-detail/document-detail.component.html - 132 + 138 src/app/components/document-list/bulk-editor/bulk-editor.component.html @@ -6925,7 +6993,7 @@ Storage path src/app/components/document-detail/document-detail.component.html - 134 + 140 src/app/components/document-list/bulk-editor/bulk-editor.component.html @@ -6948,7 +7016,7 @@ Default src/app/components/document-detail/document-detail.component.html - 135 + 141 src/app/components/manage/saved-views/saved-views.component.html @@ -6959,14 +7027,14 @@ Content src/app/components/document-detail/document-detail.component.html - 239 + 245 Metadata src/app/components/document-detail/document-detail.component.html - 248 + 254 src/app/components/document-detail/metadata-collapse/metadata-collapse.component.ts @@ -6977,175 +7045,175 @@ Date modified src/app/components/document-detail/document-detail.component.html - 255 + 261 Date added src/app/components/document-detail/document-detail.component.html - 259 + 265 Media filename src/app/components/document-detail/document-detail.component.html - 263 + 269 Original filename src/app/components/document-detail/document-detail.component.html - 267 + 273 Original MD5 checksum src/app/components/document-detail/document-detail.component.html - 271 + 277 Original file size src/app/components/document-detail/document-detail.component.html - 275 + 281 Original mime type src/app/components/document-detail/document-detail.component.html - 279 + 285 Archive MD5 checksum src/app/components/document-detail/document-detail.component.html - 284 + 290 Archive file size src/app/components/document-detail/document-detail.component.html - 290 + 296 Original document metadata src/app/components/document-detail/document-detail.component.html - 299 + 305 Archived document metadata src/app/components/document-detail/document-detail.component.html - 302 + 308 Notes src/app/components/document-detail/document-detail.component.html - 321,324 + 327,330 History src/app/components/document-detail/document-detail.component.html - 332 + 338 Save & next src/app/components/document-detail/document-detail.component.html - 369 + 375 Save & close src/app/components/document-detail/document-detail.component.html - 372 + 378 Document loading... src/app/components/document-detail/document-detail.component.html - 382 + 388 Enter Password src/app/components/document-detail/document-detail.component.html - 436 + 442 An error occurred loading content: src/app/components/document-detail/document-detail.component.ts - 416,418 + 418,420 Document changes detected src/app/components/document-detail/document-detail.component.ts - 450 + 452 The version of this document in your browser session appears older than the existing version. src/app/components/document-detail/document-detail.component.ts - 451 + 453 Saving the document here may overwrite other changes that were made. To restore the existing version, discard your changes or close the document. src/app/components/document-detail/document-detail.component.ts - 452 + 454 Ok src/app/components/document-detail/document-detail.component.ts - 454 + 456 Next document src/app/components/document-detail/document-detail.component.ts - 580 + 582 Previous document src/app/components/document-detail/document-detail.component.ts - 590 + 592 Close document src/app/components/document-detail/document-detail.component.ts - 598 + 600 src/app/services/open-documents.service.ts @@ -7156,67 +7224,67 @@ Save document src/app/components/document-detail/document-detail.component.ts - 605 + 607 Save and close / next src/app/components/document-detail/document-detail.component.ts - 614 + 616 Error retrieving metadata src/app/components/document-detail/document-detail.component.ts - 669 + 671 Error retrieving suggestions. src/app/components/document-detail/document-detail.component.ts - 698 + 700 Document "" saved successfully. src/app/components/document-detail/document-detail.component.ts - 870 + 872 src/app/components/document-detail/document-detail.component.ts - 894 + 896 Error saving document "" src/app/components/document-detail/document-detail.component.ts - 900 + 902 Error saving document src/app/components/document-detail/document-detail.component.ts - 950 + 952 Do you really want to move the document "" to the trash? src/app/components/document-detail/document-detail.component.ts - 982 + 984 Documents can be restored prior to permanent deletion. src/app/components/document-detail/document-detail.component.ts - 983 + 985 src/app/components/document-list/bulk-editor/bulk-editor.component.ts @@ -7227,7 +7295,7 @@ Move to trash src/app/components/document-detail/document-detail.component.ts - 985 + 987 src/app/components/document-list/bulk-editor/bulk-editor.component.ts @@ -7238,14 +7306,14 @@ Error deleting document src/app/components/document-detail/document-detail.component.ts - 1004 + 1006 Reprocess confirm src/app/components/document-detail/document-detail.component.ts - 1024 + 1026 src/app/components/document-list/bulk-editor/bulk-editor.component.ts @@ -7256,81 +7324,102 @@ This operation will permanently recreate the archive file for this document. src/app/components/document-detail/document-detail.component.ts - 1025 + 1027 The archive file will be re-generated with the current settings. src/app/components/document-detail/document-detail.component.ts - 1026 + 1028 Reprocess operation for "" will begin in the background. Close and re-open or reload this document after the operation has completed to see new content. src/app/components/document-detail/document-detail.component.ts - 1036 + 1038 Error executing operation src/app/components/document-detail/document-detail.component.ts - 1047 + 1049 Error downloading document src/app/components/document-detail/document-detail.component.ts - 1096 + 1098 Page Fit src/app/components/document-detail/document-detail.component.ts - 1173 + 1175 PDF edit operation for "" will begin in the background. src/app/components/document-detail/document-detail.component.ts - 1411 + 1413 Error executing PDF edit operation src/app/components/document-detail/document-detail.component.ts - 1423 + 1425 + + + + Please enter the current password before attempting to remove it. + + src/app/components/document-detail/document-detail.component.ts + 1436 + + + + Password removal operation for "" will begin in the background. + + src/app/components/document-detail/document-detail.component.ts + 1468 + + + + Error executing password removal operation + + src/app/components/document-detail/document-detail.component.ts + 1482 Print failed. src/app/components/document-detail/document-detail.component.ts - 1460 + 1519 Error loading document for printing. src/app/components/document-detail/document-detail.component.ts - 1472 + 1531 An error occurred loading tiff: src/app/components/document-detail/document-detail.component.ts - 1537 + 1596 src/app/components/document-detail/document-detail.component.ts - 1541 + 1600 diff --git a/src/locale/en_US/LC_MESSAGES/django.po b/src/locale/en_US/LC_MESSAGES/django.po index 75cd392ad..6f64a0b88 100644 --- a/src/locale/en_US/LC_MESSAGES/django.po +++ b/src/locale/en_US/LC_MESSAGES/django.po @@ -2,7 +2,7 @@ msgid "" msgstr "" "Project-Id-Version: paperless-ngx\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-01-06 17:11+0000\n" +"POT-Creation-Date: 2026-01-08 21:37+0000\n" "PO-Revision-Date: 2022-02-17 04:17\n" "Last-Translator: \n" "Language-Team: English\n" @@ -1223,31 +1223,31 @@ msgstr "" msgid "Invalid color." msgstr "" -#: documents/serialisers.py:1835 +#: documents/serialisers.py:1846 #, python-format msgid "File type %(type)s not supported" msgstr "" -#: documents/serialisers.py:1879 +#: documents/serialisers.py:1890 #, python-format msgid "Custom field id must be an integer: %(id)s" msgstr "" -#: documents/serialisers.py:1886 +#: documents/serialisers.py:1897 #, python-format msgid "Custom field with id %(id)s does not exist" msgstr "" -#: documents/serialisers.py:1903 documents/serialisers.py:1913 +#: documents/serialisers.py:1914 documents/serialisers.py:1924 msgid "" "Custom fields must be a list of integers or an object mapping ids to values." msgstr "" -#: documents/serialisers.py:1908 +#: documents/serialisers.py:1919 msgid "Some custom fields don't exist or were specified twice." msgstr "" -#: documents/serialisers.py:2023 +#: documents/serialisers.py:2034 msgid "Invalid variable detected." msgstr "" From 58d88440f182bfd8bd0362bc210a22469e196334 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Thu, 8 Jan 2026 13:49:17 -0800 Subject: [PATCH 7/8] Feature: Remote OCR (Azure AI) (#10320) --- docs/configuration.md | 20 +++ docs/index.md | 5 +- docs/usage.md | 15 ++ pyproject.toml | 2 + src/paperless/settings.py | 8 ++ src/paperless_remote/__init__.py | 4 + src/paperless_remote/apps.py | 14 ++ src/paperless_remote/checks.py | 17 +++ src/paperless_remote/parsers.py | 118 ++++++++++++++++ src/paperless_remote/signals.py | 18 +++ src/paperless_remote/tests/__init__.py | 0 .../tests/samples/simple-digital.pdf | Bin 0 -> 22926 bytes src/paperless_remote/tests/test_checks.py | 24 ++++ src/paperless_remote/tests/test_parser.py | 128 ++++++++++++++++++ uv.lock | 39 ++++++ 15 files changed, 410 insertions(+), 2 deletions(-) create mode 100644 src/paperless_remote/__init__.py create mode 100644 src/paperless_remote/apps.py create mode 100644 src/paperless_remote/checks.py create mode 100644 src/paperless_remote/parsers.py create mode 100644 src/paperless_remote/signals.py create mode 100644 src/paperless_remote/tests/__init__.py create mode 100644 src/paperless_remote/tests/samples/simple-digital.pdf create mode 100644 src/paperless_remote/tests/test_checks.py create mode 100644 src/paperless_remote/tests/test_parser.py diff --git a/docs/configuration.md b/docs/configuration.md index cd5b8cf0a..e1f6f6d4c 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1804,3 +1804,23 @@ password. All of these options come from their similarly-named [Django settings] #### [`PAPERLESS_EMAIL_USE_SSL=`](#PAPERLESS_EMAIL_USE_SSL) {#PAPERLESS_EMAIL_USE_SSL} : Defaults to false. + +## Remote OCR + +#### [`PAPERLESS_REMOTE_OCR_ENGINE=`](#PAPERLESS_REMOTE_OCR_ENGINE) {#PAPERLESS_REMOTE_OCR_ENGINE} + +: The remote OCR engine to use. Currently only Azure AI is supported as "azureai". + + Defaults to None, which disables remote OCR. + +#### [`PAPERLESS_REMOTE_OCR_API_KEY=`](#PAPERLESS_REMOTE_OCR_API_KEY) {#PAPERLESS_REMOTE_OCR_API_KEY} + +: The API key to use for the remote OCR engine. + + Defaults to None. + +#### [`PAPERLESS_REMOTE_OCR_ENDPOINT=`](#PAPERLESS_REMOTE_OCR_ENDPOINT) {#PAPERLESS_REMOTE_OCR_ENDPOINT} + +: The endpoint to use for the remote OCR engine. This is required for Azure AI. + + Defaults to None. diff --git a/docs/index.md b/docs/index.md index c1c06eb2b..c84cd0ce4 100644 --- a/docs/index.md +++ b/docs/index.md @@ -25,9 +25,10 @@ physical documents into a searchable online archive so you can keep, well, _less ## Features - **Organize and index** your scanned documents with tags, correspondents, types, and more. -- _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way. +- _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way, unless you explicitly choose to do so. - Performs **OCR** on your documents, adding searchable and selectable text, even to documents scanned with only images. -- Utilizes the open-source Tesseract engine to recognize more than 100 languages. + - Utilizes the open-source Tesseract engine to recognize more than 100 languages. + - _New!_ Supports remote OCR with Azure AI (opt-in). - Documents are saved as PDF/A format which is designed for long term storage, alongside the unaltered originals. - Uses machine-learning to automatically add tags, correspondents and document types to your documents. - Supports PDF documents, images, plain text files, Office documents (Word, Excel, PowerPoint, and LibreOffice equivalents)[^1] and more. diff --git a/docs/usage.md b/docs/usage.md index 339dbddde..a307db3cd 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -901,6 +901,21 @@ how regularly you intend to scan documents and use paperless. performed the task associated with the document, move it to the inbox. +## Remote OCR + +!!! important + + This feature is disabled by default and will always remain strictly "opt-in". + +Paperless-ngx supports performing OCR on documents using remote services. At the moment, this is limited to +[Microsoft's Azure "Document Intelligence" service](https://azure.microsoft.com/en-us/products/ai-services/ai-document-intelligence). +This is of course a paid service (with a free tier) which requires an Azure account and subscription. Azure AI is not affiliated with +Paperless-ngx in any way. When enabled, Paperless-ngx will automatically send appropriate documents to Azure for OCR processing, bypassing +the local OCR engine. See the [configuration](configuration.md#PAPERLESS_REMOTE_OCR_ENGINE) options for more details. + +Additionally, when using a commercial service with this feature, consider both potential costs as well as any associated file size +or page limitations (e.g. with a free tier). + ## Architecture Paperless-ngx consists of the following components: diff --git a/pyproject.toml b/pyproject.toml index f5c484ae4..fb47e55f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ classifiers = [ # This will allow testing to not install a webserver, mysql, etc dependencies = [ + "azure-ai-documentintelligence>=1.0.2", "babel>=2.17", "bleach~=6.3.0", "celery[redis]~=5.5.1", @@ -253,6 +254,7 @@ testpaths = [ "src/paperless_tesseract/tests/", "src/paperless_tika/tests", "src/paperless_text/tests/", + "src/paperless_remote/tests/", ] addopts = [ "--pythonwarnings=all", diff --git a/src/paperless/settings.py b/src/paperless/settings.py index ab2aef4ee..1cd357f86 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -321,6 +321,7 @@ INSTALLED_APPS = [ "paperless_tesseract.apps.PaperlessTesseractConfig", "paperless_text.apps.PaperlessTextConfig", "paperless_mail.apps.PaperlessMailConfig", + "paperless_remote.apps.PaperlessRemoteParserConfig", "django.contrib.admin", "rest_framework", "rest_framework.authtoken", @@ -1396,3 +1397,10 @@ WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean( "PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS", "true", ) + +############################################################################### +# Remote Parser # +############################################################################### +REMOTE_OCR_ENGINE = os.getenv("PAPERLESS_REMOTE_OCR_ENGINE") +REMOTE_OCR_API_KEY = os.getenv("PAPERLESS_REMOTE_OCR_API_KEY") +REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT") diff --git a/src/paperless_remote/__init__.py b/src/paperless_remote/__init__.py new file mode 100644 index 000000000..5380ea5ac --- /dev/null +++ b/src/paperless_remote/__init__.py @@ -0,0 +1,4 @@ +# this is here so that django finds the checks. +from paperless_remote.checks import check_remote_parser_configured + +__all__ = ["check_remote_parser_configured"] diff --git a/src/paperless_remote/apps.py b/src/paperless_remote/apps.py new file mode 100644 index 000000000..8cd3199f9 --- /dev/null +++ b/src/paperless_remote/apps.py @@ -0,0 +1,14 @@ +from django.apps import AppConfig + +from paperless_remote.signals import remote_consumer_declaration + + +class PaperlessRemoteParserConfig(AppConfig): + name = "paperless_remote" + + def ready(self): + from documents.signals import document_consumer_declaration + + document_consumer_declaration.connect(remote_consumer_declaration) + + AppConfig.ready(self) diff --git a/src/paperless_remote/checks.py b/src/paperless_remote/checks.py new file mode 100644 index 000000000..b9abb0592 --- /dev/null +++ b/src/paperless_remote/checks.py @@ -0,0 +1,17 @@ +from django.conf import settings +from django.core.checks import Error +from django.core.checks import register + + +@register() +def check_remote_parser_configured(app_configs, **kwargs): + if settings.REMOTE_OCR_ENGINE == "azureai" and not ( + settings.REMOTE_OCR_ENDPOINT and settings.REMOTE_OCR_API_KEY + ): + return [ + Error( + "Azure AI remote parser requires endpoint and API key to be configured.", + ), + ] + + return [] diff --git a/src/paperless_remote/parsers.py b/src/paperless_remote/parsers.py new file mode 100644 index 000000000..493b7d7bb --- /dev/null +++ b/src/paperless_remote/parsers.py @@ -0,0 +1,118 @@ +from pathlib import Path + +from django.conf import settings + +from paperless_tesseract.parsers import RasterisedDocumentParser + + +class RemoteEngineConfig: + def __init__( + self, + engine: str, + api_key: str | None = None, + endpoint: str | None = None, + ): + self.engine = engine + self.api_key = api_key + self.endpoint = endpoint + + def engine_is_valid(self): + valid = self.engine in ["azureai"] and self.api_key is not None + if self.engine == "azureai": + valid = valid and self.endpoint is not None + return valid + + +class RemoteDocumentParser(RasterisedDocumentParser): + """ + This parser uses a remote OCR engine to parse documents. Currently, it supports Azure AI Vision + as this is the only service that provides a remote OCR API with text-embedded PDF output. + """ + + logging_name = "paperless.parsing.remote" + + def get_settings(self) -> RemoteEngineConfig: + """ + Returns the configuration for the remote OCR engine, loaded from Django settings. + """ + return RemoteEngineConfig( + engine=settings.REMOTE_OCR_ENGINE, + api_key=settings.REMOTE_OCR_API_KEY, + endpoint=settings.REMOTE_OCR_ENDPOINT, + ) + + def supported_mime_types(self): + if self.settings.engine_is_valid(): + return { + "application/pdf": ".pdf", + "image/png": ".png", + "image/jpeg": ".jpg", + "image/tiff": ".tiff", + "image/bmp": ".bmp", + "image/gif": ".gif", + "image/webp": ".webp", + } + else: + return {} + + def azure_ai_vision_parse( + self, + file: Path, + ) -> str | None: + """ + Uses Azure AI Vision to parse the document and return the text content. + It requests a searchable PDF output with embedded text. + The PDF is saved to the archive_path attribute. + Returns the text content extracted from the document. + If the parsing fails, it returns None. + """ + from azure.ai.documentintelligence import DocumentIntelligenceClient + from azure.ai.documentintelligence.models import AnalyzeDocumentRequest + from azure.ai.documentintelligence.models import AnalyzeOutputOption + from azure.ai.documentintelligence.models import DocumentContentFormat + from azure.core.credentials import AzureKeyCredential + + client = DocumentIntelligenceClient( + endpoint=self.settings.endpoint, + credential=AzureKeyCredential(self.settings.api_key), + ) + + try: + with file.open("rb") as f: + analyze_request = AnalyzeDocumentRequest(bytes_source=f.read()) + poller = client.begin_analyze_document( + model_id="prebuilt-read", + body=analyze_request, + output_content_format=DocumentContentFormat.TEXT, + output=[AnalyzeOutputOption.PDF], # request searchable PDF output + content_type="application/json", + ) + + poller.wait() + result_id = poller.details["operation_id"] + result = poller.result() + + # Download the PDF with embedded text + self.archive_path = self.tempdir / "archive.pdf" + with self.archive_path.open("wb") as f: + for chunk in client.get_analyze_result_pdf( + model_id="prebuilt-read", + result_id=result_id, + ): + f.write(chunk) + return result.content + except Exception as e: + self.log.error(f"Azure AI Vision parsing failed: {e}") + finally: + client.close() + + return None + + def parse(self, document_path: Path, mime_type, file_name=None): + if not self.settings.engine_is_valid(): + self.log.warning( + "No valid remote parser engine is configured, content will be empty.", + ) + self.text = "" + elif self.settings.engine == "azureai": + self.text = self.azure_ai_vision_parse(document_path) diff --git a/src/paperless_remote/signals.py b/src/paperless_remote/signals.py new file mode 100644 index 000000000..81955a479 --- /dev/null +++ b/src/paperless_remote/signals.py @@ -0,0 +1,18 @@ +def get_parser(*args, **kwargs): + from paperless_remote.parsers import RemoteDocumentParser + + return RemoteDocumentParser(*args, **kwargs) + + +def get_supported_mime_types(): + from paperless_remote.parsers import RemoteDocumentParser + + return RemoteDocumentParser(None).supported_mime_types() + + +def remote_consumer_declaration(sender, **kwargs): + return { + "parser": get_parser, + "weight": 5, + "mime_types": get_supported_mime_types(), + } diff --git a/src/paperless_remote/tests/__init__.py b/src/paperless_remote/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/paperless_remote/tests/samples/simple-digital.pdf b/src/paperless_remote/tests/samples/simple-digital.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e450de48269ce43785b8344c63e233a1794abae6 GIT binary patch literal 22926 zcmeFZ1ymeg@;^!l!6mrE;Lb3(yL)iAVQ_bM2@U~*y9EgZNJ4OTf?IHc27(3mH{|=> z-S7V7z4P{+J?EYO***+?`*z*Bb*rj-daCNvG^&!)EFe~HWSZ{c?w0P)-Fe9D05*W5 znGLd_AW#wFVCiNB;DGk10i~_&+#oJMX**Llh$IB;Xbuq;Ms{^`ftcDOdu6l44>H?6H;@?p^vKWPs#P#ZGbM{;Bcf{BGr+ELKNlvYYRNcNgtX+7@aLXT zMQN!S?3XnMOGXzd6?Y;rsx^sOB+DXSS48V9%8C_*Nre0Ge09*fJ;tB)nym>uKSOw1 z2i!o0IGFz_FtqiwM&zfZJvBhw+)rnJ_woEU1@Qha3iwk&AOMJsm!0je>e%A*b|c=( zS#^}IgXn*zCa)u*nWXP(wA20EkL1j%VyEC?M)$S`K=_(QzmCRCLHrFiECjSQtYn`iKg zf}%nOaWK%_&+Ku&A#j>Q@-?@j>#2p9dZv4QKhun z=@em(Dge&env$D{x9Q_-*cI_>U>>Rgrg4#rb67eijW{P8;mu->2nuC92$yD~)|^om zof)g{JNi%po%qS2uXL^$$;LVc720v6ksjPB{pbm!yHQ(d{s&oogF>puBi3^YH8K8~ ztf=^&Z>QNYfr%PP%}Ba_X=avrD9bVAkH*pka_wzWhja;v5}TSXTYZnCH!OGA` z3&Wr_z7-7B5)oa9ALHmvT?5AkgZZZC23wcJ>T-OElbRVKU0r;Baq_`7Pq-kT3Z{JZ znzD>tk*w~s6hTUEMXXn7y`Gwr?fjkxs;nIJ_~l-gs<>$-h<Ro53Nw-;(BpU?f_z^C3`oT3wrR`6@gqyKrECgMXzc67xJHqs zAT-Dx8^>$LdmKT)E37b`Q9HMosc9RLS$SU}H%%K8sPn~!;@wJl8+r3Ni~|WNKE`!R z=<|F3*4*42fu`oqj{85Inim%J8su5@xg8h26nNi<@6U2i3+$78s?@4}*VfUtWWkk6 zeAe{ldtn!>Qb4X=udArd3&K0rj2b-D!=Pmdh8w@li!_F%!*}lAmIHJV5!u@`n11Hu z#F}Fagcv7kuQ4S`TnuB$$LG*7l+ct^QSXK;nPa<~;%{0m9&|Yq?448ky<0xS-kd^R z=`@)^j=-TZt1p0$iI!&iVt%=D96Ou<>au%fn$^mpvHOmuK3obBkAk|UuHVvh2G0bh zVd#_TTdGX6Jv;Kv{=TJ2sA4{=8zx zVGa>A?xEGeV@B7Swbkd+ z`Yz5K(Xo}_Tt4>o8W?%ftQ37A^?FYCHR{9eQ0jBvmGBcLV z7USBIYAT_SguJkPyK>eTf=DHgI?IA7lk=OMias-*WM_{oKsStX;f1tbxPT*rG)H@JdR-qiMbg%YftI!VPiy zZR^}EJtn@&S8k+jFr_tRn+KzvT{naNdgcjyzj@^-Cw4W{vM74GX3aG zA8%4J&>|DQ4h1z-uCB}oY#P?Uy|GYrt+1K%w)kn}x`2wFUsXPfXf!%W(eEb!UUN;={~aG}&ptOzqXF$UaFB(W2)RzJSXYod?!X>MwuK0cJ@kv?_Z)Wq0~* zGOg&X#OHioX*4tz8_S6BMI3fc-aPx9SV>#!LJ6SP0Y&o|8J_vzMoHtuuMdn&y(V1R zK3q=dZ`GNZv1=&=LdVU94vAbHoVU;;EGI@!=NH-SOr^m`dwB(Y2hn07Nsh@#q^8!b zog){pP5B33E|Gl)J?KO2>I$`2g4eMdGHjsV|+;o9-(THn7OA24?NE{GgWdW$|4i-A%#Om9y@vU~Gu zf_#`FM|CtNfv^t=Vv#jFC!namky9zp<6{Wl8^lNw%}gptv8L=)vGr7JU$w5d0xfO@ zO`Hb6y3uS@5GCb|O^vME)$Um$SdSk5l-cGS^vgLtmnCt;I?6gFaT1e^Kycs3X~0)8 z#@Ld>x3EadXZ=fh*Sy_b)t4{;X-ds?7e@fOpdJ~0__})@Tj!i~EmyhR zMIaQ*Gq&r}C!;53!hbq4PU6b(^$S5J$HvCwPj~NadHT7-)7`vvRWj>x(94OQT=S)QiT2GGZDghdV$l(WmRmJFIsV5<7Q&=*@b_>z0*3@5vvn##f4+iAtctFB4n-0 zwal!;jo%)3jY*cxR)?YS9BGm&4jLFzMgE%Zds9|GHgwt$G;dYa(PPb(`E&Rb*J(?S z_{*t4;H1me92Saibz9)2`y4aeaEOjeuoRE9t$Nj#&&W$5r|$}8Eg86;nv zY>xf(Dh_F-t`;Xnc;xxNV!5UqHMfq0Mn~fae3Tz`4iS{D8W|NQbS!2j1 zFH<*9e-3L`+3Q8VSR14DPu+Z%TC5kTag`HZQN$w}xA&Ek)xR!ydk{s_4Go>SZMbzn zL_!NQZ`ynqXsi}XRqLZv8&^~H(aUdJUdVX!Wb3r=2iHsE=MP+Ky3f0aysXerYl3sR z5~I>gd=d9wF?6mJ6Nf#spfYIGs^W}}v(3s@?XPuWV*1IXJ)gFtnL^COB6#`zXTs8I zjVGs@;mP!J+c-_!s)%4fjqBG1$)mOSoCb_f^J1>2>yVNogSmjmRHb3NgQCNlWix+| z$sg8^D~-jq)%N)JT%m5PZNtW+B61^Nnib_);Fa7z$&cGqY6z0urs1<5oo6tjMwHBh zLT5Uxy+ebokmIfyM`}Yfy!<3ZTCpOuLq1}?{DPAe)JsR}5mWMY)a^u$BWv&snUh|Z z@w5RenTyjt7*A*MW61mAPy2v&IL4vgK6m^sl*=XlPajm@ruzpjPB@b8&6E8!ZhOcJ zGVt6uW;rN|yGwpNKIh=k;PasC_;_3KzGd&CgJ!hc@&E-d)yGrgbLMkskg>WWifmaok5-jbv%Y8R!_ZR!a*c(d+@u|ReL8tA^_wR4s z(=t^yBG?}G5mo>c1}UuHA&WLhsuxu1Sd>%h>@wQo;x$#s>+9K^UCE#8x!0)JX4ePV z1sCD*+67yq^suZo1ogxA!0I2<=;p7$hP*h#OSg2PPx(e#C$Lc>?`kEZI6BfLi$SJO zjJt)G59v2|fqDU8FJ>>^UfrMHs7BPnfo3i=cG%(VWP5TAq^)XV21 z>6;rtTl(aT+79zB=gbYc&^^nVu<_A&2Xe&RJh8r#PMxAtj2=F z)%fPs)dVAg;B8O)`^^>5hk(N#67s$PyzgN`w1&2-_?TSfoYwM!!g0AwmnPyNNUxEU zGdjQ_KG?fTY-8g)N^Je5hwqTkvrHD?oUyNz02zSybtl5ozu44-iuMSpv>)lG6f4(H zCxLhD77fEc*}vi;X4N!6E_&~1A$gs;Yve||em_#RSFR3h6Yjd>=9CVaFK(>8{5wA! zKjaDD@8=nN@71b1-k<$AYee6E82M|*{myK$S7Dr2yYe_m6F!X?Kf@arw%S@Lck%O4NiFh)aHP0`uSyE} zzVqrj;R7Q2We;?xXxoRme}!`>0F&jz_Z z+4Z{~oZjNYI|?`TY}u5vk?2_o@$&Ar9*%ca``_lrUe4KB*;HE_UKP0{o(jKXVAguS zm??JaKRl`d!4R~I&*nvK@8{E#-!86{)6FQCt^CTf1*~W8O+HjJB&3v1?$@@eqAvj# zm4QzUQCjSS%UFWQaZ+DoVm5ZeGN8b*u$csVpm6H0!J?!S61UqK9D1U)Ta4)gZU=`a zU77grJahcUgf4TnJ1nQvW4@gP6LXRM6#^`!0#*5iP7e2R2vXlZDxQ&NV}PPF(@o>dC__(T55|`~t+14O#brDBA9x;pGlC zIt@R7J(&;skAWXW9<%BL2Mzt0YUGt3VXHfjly1aZ{T4%F3{r4IP9N?n$87sn305g* z7{!MgE!4V!RL?bXL?rn!f&C2#-is$IQOC-DS+C0ASC!-G1LXpdJ-XDFi0=5hTEAkP zv}<{5TvjT1XZ;GuxUBxJsX~b71ikv8*)L#9xQV1jh`h9Y+0p zbeVCkVQ>QJ4%^~<31eR8ncCGnt2xE^h0nnDQmmJWO-5eB|}CM)u=DRO#rwrrNJlnQ3GDQG*v`~ES$4E(xmH{pAOhk;6c~hK@@-&o1!BJ87^L2x~HgP5i7gB1bZi0mJ$O00#VQur~ zfSb@JH+r2cP*wTMj95A|U)|XFO)E;A=)!tYHa4t`*Tv&{yordhX1yQmk|wAt5%I)d zBNU>~X;9)}m=@!^yXE2bz)a03_R(GxS~8NNg@;U2MYQs82FveyQQ|F1oD#;+u#KBA z{v3$%W5!b|uu(*W3r*+MV&dlKlDiFqC__R}Vrk6=VpQPNtRU>V_{t^UiL*!jZ!czE zq|C;eS?-P3oeq1I-=8E-Ccu4S=bMN-NSK^es(qNGzG-kr5pu3dAP&XG3D%?F$ly(j zMqvh1=gQC0P+_~*_I#=DUshU>3ya;z>AIE2q7Xlk#yW~gzl26iMH!1Y|86GU+i>iS* zEN(?Bh%T-N$i;cDD8GDF`|8Xd?hKb$mrDJLDU4l=mf|JCiZNuP@~W92WyFODR1NPD zn7gh0EiRuXSd+h(mvd4M@1Cr`(<~?2;(QA*I4(8L*1O#G-@OI zDRO9j@%?0Hyb(7=Rmp^duPm~ZB^O*!*FUID4S!fc!V^pU0dY|%+!^y&JGCszlWIu2 zUh3S%QDHY?`YuOPV<_fF$wP_^dJsDNE=kmsd zBmG7+@)hcp)4s7n$s>+9{)o(NZfRrZEFbt^=*&#zyT8;qZmsoD5o9%3)&*@6r^ zm%bGXWtg1My4uO;e(bc1|6!wEj?}Bpzk&3b{9y3HGWM%}Q=yo}*OM1qdkZ`CPkW*O%DR3o%(%G_(6lTi~BdHmdw zjg=M}x*YEEBctv534OShxFd$-QFhG91mK3co1$-;JgM~o967uFLhrH;@SPz6OY&P` zr_AG;t`E5xE5Na@ltOWsJ?hN4)Rhp@VN;T8q1}S4+=8iguB9Lf|4}aewQu~e6tFKN zoYkbO*60LG(^0#$>IkX*6b5X2J&C`c86sp2at_%`-{AG|Q20|F=4knSURw^mWbvvz zR#uoU3d3Gie&Go)iXj?X$DUUzXOS&AI(Y5*y;;J$Iq9&gL7}sOBHrN$kWJ!3phqwv6>k} zay@rFY5dW+v!nd=>zH+E9`7C8>Z=W)}?i- zd;v3|?}A;cuAVvPhv+fq)dJdjD$9IfM+pYV^=bGypcaT7$$t`|JF@zEJzdHWM^gX z>vwzR5s>Bk=~F$pRddetz{x{!?!=O-|ny^XIrdD z2V@1aoNqLZ<`NW#H<+*9@NUDc$#j{E-$+Iek;fj6izKqU{`ox@mtp@Y{%wRL` z`oPTn8N)gHpf`d!JFi=6m&Uukv#8}xTxAj~k4Gs^#v_jt-^JS980*?D>Xp7`MVN8&@n+X(`cc64x)gt&rT ztexB(p_w1(p|Ytx1Sl@{v@t1Kn?YPm-K-rQ)FCd`7RXPjAxl>PC$s^j6c=~&(r4k~ z^wXGK3qV&`N3 zf5=@KCJj4u1E9GpzZb)9nAb_X7=v{ww6&X3a{A>c;~5#g z2n27yt~OOjgO%i(C}J0VHW*W0r^uTxs{5ju(-z)2C0J=o>_}p@9tgf5AX-d-FuZBd z$qLEa`kvp9BKUf<*Y>#2XzTmNeBYN=zRMTS=K<=kG}R=?Zw1{C8;JoFb9EE7c3my8 zZ$&z9e~ND7So+~|*`ynkb$DJN7b;H4zjZf6`fy{+!Zh#lW5#l)h%;uAlVw(OGux|P z!U%MfG}p6;>mrt2Hb6XUKA=CVnn${WvMI!|g)zeQi;Cl1urBS4ZZt1(Y{YUib4+Ne z=&+%GXZio4Gx? zj)sdnJUN4+>7Q*77cx-W+T6{`Ri6jyINVUXaA#VCWXl50t0r7G({3ri} z5Ksf+(6f|uM zlz>VMozfNXTj;-DKb-#%P#M7Sw+t|d>$hzG_bK4_nVK3}hPQ`1dMZ4EZl2IT&jvSH z&xW9nAppi4O8Kb#pdfnW*CEj%h>Y(L8R<;`@cHx~yxTp0p4?+xJld@y>_5cWjyBE; zRojxW!dxwP`*nj&`C+=KzMK7wJ@?8nH1Gxu2f@4xLL~@}ZE&Mtn4iG_LC1j~#vV;p^V6MRt~AjvP#y%j1EWc96Oe>Y zI!&G5bYebw%Co3(=P z^oDtP3WF+hcj>pu2cyA_aW?fh1GoPye-mchbEiF1p~05##4< zqOdrk&rv&`-F5m?kbRNl(3I{-81)mq>n+|<9tpAaLj9Is%_mh}BEiL@G8URg2^Vu*f1zhM!SL3?Q-x zj{sZ;u<9c000INg_~H1(SQD8x(Y?hZDI5pze?`I;0O;Y6Ln-rR>4~DlzZbyKBk{wQ zh|3hX(PIxqxD;%t3cJBFhDGO3se&|+QzdX!aW$ULh@GoGcY9_NqL&{tPV}U zkB`J&H|NC_Mz-wwb`0XhU=32~DqA=Ef?6F^xvuwx%pr()-QtSU52+2+vrBv3=nFYn zkYi`}F`^*yYGnU9(iKP$O(fKEJ?+@m3o`%#*v)h-bH#Co`+)A)wRnu)f^tM<0*4$d zwT4LzM;h|1Gt5NF3GfB81@T!JTS!Ers4Rs!CNd%6POV_jY z*G^(zs9IhMBL+&oq{P7tel6WYfrTma()u;3BpxMxQY5`74#g-y9y9edQID?V^Fqvt z5Gx5c06(TSrvK~x*IBPAdJxTUPGC+DPY6J9UJ>aME#l0SD-alfONSQ~zB+klu0h0z zofyV)o!&H`22};_Ong&FQ*=`VktkZhVg6wOSs_`Gg=+mf?RQesST--t zkH! zuxCm4xZ#)YMJeS*r{ci_prnNj3E$2+81xuUz<>?QVaYL zGY|Pmf!4c7H0C;u;5UlzZBmZ+AnmzW*MF3}r8ZbNS4Z^IXR zySAP0VP-k}#VtK5>qMoN=XHI#jGftUe_5u$?gP zE`cthbtco~<48nIkR(=8@PPCt4Kg;(YZpn}LcEDYE9H+g{Fp+o1A1PX;edkErAKJD zu~jgKqdxVV_Go>_H3K>a@rt*oWK#=MwNbXwRAbFWW%YAv_rKPr=qT18O-+-iCikt_Z_-+guRMNQ)`rSGsX7T>uBbS&*m@FWlJdv% z9~?grtRqpAK<4ZjQ6pm8bW;P9<}`J*--7I#w+=kpWOHG@M&wJ*mSLq#OR^or zAF$tKbrJDS6qFB;%%y0jZl|ev)BdJmpc#icT(m%Kp1uVKGa5%KsZea9Ed7-o!zd0= z9)>0xOGd81{M9dpGKSO?A9;?F&`Alx{8-gK1{HeO6saMEA^aiQEg)$Kx{3@mK)z9e zU65R=UN}|Ekzb!*U*=kJT7Xg-Q>aj=P$nR)EvYT7Ei)(U8Fk3GjMjwNgy5#`=IR#X z#^ko|rtj8#M(;x@L?wjK3e!r^O47>M%G!$5iq?wViti8bhx3Q^C-P^#m%CBB(Yukp zQ8?57fv_vH+yA5Nhw*rOiE+_t{-e|j4dzsFNWm{vsds7`G!=L=uWh7+B+v72)Vs@1 zra5iUKPBqPzc1ldTPzov&YEtXmYFV_)}E%>quaySBZr539t_<=#e&4L#d5`RI)gex zI)giNIuj^jDdH(oDIyCJ3sMT=3!(}_3)0r{y6L;&x?Lk)L|~xqqw1sH#u7*)Q=w1+ zi|C6`i;#<0ix`SnikONpz=&XGFfy1MOaW#9vw*R|2w?IJzy`qvxKFo_C$=(nl^~Eq zOwo&$5gQ>HCK)anHkme=A{jRsI~ia7r8cEn($Yh-d{XoP>Hbi`w%dZcZ{bmU}2CL;lqXI5?QVB!F78xrjB zOY;u7<~yZ4Wjy8CCdGq8NeWL2kC953%9IM1O6!g7&F#(U&7%pSNumj*$ty`Od0P^- z#XijS*7Pj`^AvN9&RgBK4|esiS|m)VsiB&Qn$emWnvpXpGx0MyGkIJgTuEHX=Ww=2 zwk)xWj0+m6qjpS$Bt=xRCYPwQf8hwE+Yh-()=G}qnMMb^{R zS=Qy503jm7D_twS!X3gx!h^#75ApXLx3ssy2=Sr8p)nF+5)l${-9g>a-5K4HRIyaK zR5?_6MPWtpMd3v;;2?0y2B;q;o+chhvY7pnE;u!7g^>0oEEd?yeckD+av&VSx`Q~^7T@Ia18{ZjZ8&De58SEKE7;qa@8w40684MbH zGmtV!HP~t7|5EL|yzSF-p1sd9M^~-n&=I#|@qK$8Z$oQs_B;31=bej9^zZRo${VB5 z`(^cp!`9(;Td#kDZ>DdqZ}_dir7;;QMrvfTOqfi(OlW_4e|&!qT_RlsUFr_63P^=i z1x1Btj3xJ0a7wVoTgyA(_~(h4H=$Rfw{;70%R-Aq3wn!D3q{LG3t`KPmaP`FmV=f# zFBva7uN|*6uQ~6)Q;KVPzn^~Me(ZjGe$sxQ{SN(t{Xl*)*T1giulKGOu6(W|uE|cm z{%YO0?YfL1A5>Yl+`<2zwm!GUv)!<9xrwozu%WU8Tm$Ynt{tr%?X-74ra#g=B0MOlK(lsF~2B3!6eM2&ZK&cuv4#7tCOsgqBAn2K13m8KSV&>ilX9W!%HRESF+jz zY6DUO#`N;^n)E8~Glx|@PceNBPus_T;L zBESfij;noZzwA$r71lqb15k(ktn}WmPy)3>Ph-Y zMo5uGwY>BO{!%j6#sLm7GXfY{Dp+kO_W(XRoCch_XX#e8ySft!g>#Ze(aKS6c!t=^ zg`J$paiO&G>iIt=nXRiois~`5glfrF7IIBBSxbDB`N+56yTG%MOCT1jsi>-`rKm8X zI3hQq!mP)v^fOE+T&GAUZJwppxmLFpQtN2u``ybK&soe_*O|rH)!EWH@Py#R?L_>9 z@r3ro;e_Hu<~y)Q5cL2S305IiG?ow6Ec9!w6AKCJ8&(<3sJy|O-86#K^tU=G zSCq9>QAy@0o5?9}rxQ6--BQkyW8Vrg>H>6WzA<}i{`$OGyva1kHORtHjMIwKkP?n%tyZa~KZ$7VW9?v#WX)v_{vk0bJo#!;rUa$9 zvRJ>^w^*jQtk_r~@XZ5D=v&j2^|Yk76DgoH=Om<*sN}&nd@Qb7rH0#0BSIOL>UF6AnQQD+vTM0APVbV0F7^Jsm(q(953TGsJHKbjkYNNHM z*;L6{bf#r3VQrjIHB!P{P*rVL@w1BLU3PwUr9p+CSfl>+=U*wm3V${Isz!blE+M5T zrP(XlD@P+rV?-lUBDvKfpEhKl%8)9R>Yi$-S;!b!@uecOqP(KsCf}xT4>ZF*BRxYl zBTz9_;i%bO{){V{C?PW?voy0Lvsv$*Ui1ReC!0?(HupA{GZr&i)1Rj&_7wNT_mKDC z_b&F@_CR~OdzRD0(*iS*GiB5GGwl^e)`?tSxTv`}xtO_@xVkvixR5z*I9aWi#tG)U z%1>TxG|(z1DNt#q2ZU9}y({TWX>MoK|aL3dbZSdK}R z>DB%ht#GZFS+4iKdi8otdW#DL_IUP~_7HopJ>7xm0ri34f&78iLBPT70`TM9$H4tH zw|Tc7w`sQ#Hv@NZcQrR;Hyif_cZWU2-Im$w!um3W*{#{s*_qi-vqDwGC0XUZ(w8j` z4Tq%5gv*fSJRf=?S|L**8X>R{T`QoKy4A1Mrj@((S!<-vsgLoQ$NAum;LYR>|IO!{ zl^f8_x0{lixSQ4+@J;;<(@i_76Z}v3@9=x@c<^NKnD9T~%g~5Wo6w|DEl}U0aidK{ zFh>+dR76-rI7FC6SVu@joJZtDG@zBCrJ)8A6~yYqO2%o#jl?d+cEwJ{Dih{#?6DJp zGC_LW3+#Oa^+Z##GjS}jLa|h_9I;~rc|3<8GOhrJ7Mr_8IL9t)O6x*%CwAD$DQClw-gLxi`NzrB@&(@2!;rw}P{Rl7g**Bh-`z!^p!iZ+TegSXOlc z8pQQ#btbB&-!s2&sWPi_sTz9Es)wedq{E`;-yp5Wp+8kM^ZtDmhJL@!j-IwogYN3b z+>Zga_ucUJ-8NJXMb=>O1{mAXb*XfrwA*69V!`5@#Uh^Nur-r?rR}~6u~n9xzLmb? zBE)yjW7gyAMGa|h{Hk*2==|sm>k?~s^?dbU_3B~4a>z1|Z?RCl&{w1Gm4%gl;T7Sr zw!VkDd;8n63#JPJbZP`61aAZ?1pG+0NViC{NZQDN$VhY)^i_-k^jb13kz)6Oi^Ru* zVu=@#qhx#hoIX~E<7E=Q@ftD|GPp9XGQfU2Me@FElDN);ildbzgm(eQSv^5IPvTUGT$n zfBne0r)XfKMJBIbB0(yFJXc3WS%pyLxeET6(-`xZ%NX|<#Telj?pV8#n~}It1@smv zY!qem!f2<(r-jU`_2MRgz(0NC!Nu9ynYUfD<#SMDt zleMXiPaXGwK0qztAYVd^LX1#sODrwK0YM1kh6EL%9!>#%A3*>?25tvV`-L$Q8Cn2t z3uXxVI%*OM2x7-4Fw#AR}D` ze+AQ?(X{yVn3XtZ;79q7?rx%PHg0lm`e#mOc4t1Q>=9JbtbNpdOtci+=8E8vxO6&D zCFr(xq!z38rnb6vqSm|iyw<-KyLPx%!1>I%(0Rc*-g(`b&zZ}4*%{;eN{@Yi@<#Xu z*9Pf#iH-X2mEQ$6j(UMTjx>o_aM;t>Ke5EI*s&S0tErb1bY$lhscDgEE9e$zBB(8B zwP-zPR}>6nm*gpFkZ7!_`KZHbAvBtF?leNOKFMcH8E=^1RHgn*CrQfAGDurWzN7bM zxE{ldpfZy=i~m9vCHy9^cwP1}shp~k0T50j1ub~!Wul=dp)cWsux)t{`7ko3GHE`2 z(TQ-ed}97Y<<99Yd~QhuQeso0`bxpV%tH5``#qgK<$?dg;j7Z3H!^4mJ|uA>KEJG{ zCOV785~|CJrW&VGr`}IJpPHItniQBiDr=W}%n{2I8y^4a^!4>1Z;~?w7_ed8x1(rI zt6geVs%ar=;bS3XVYz)j(vZ`rptcTW0Pg zr@hgB>&bUyw35}5;}_u<+-XdAfcn3pd=;UWGQ)*lsyU8Zax2y!ud z=&^WKBKlFZrsF-J4f=%r$)tEqv(tj2QT3y0&G>t^wqp%b+jZG>zx8doA-M&)*5pGv zY~@^G?~~1(14qDQ0u%KV<;fGLOZUL%5W zgGC)Epn<39QCL-O_vI(#n+0EqLDkZSrOG>5Z(VQ8Yp?5DWYx}B1v;kcn0n4tg#~8$ z-@SVqfnAZ8F}h|JdgP{~j$<7dWj#{G)Kilquy&F7k9m2H~Xl~NVw6+)HC zl?s(L8VTC%HP5)RxrVqrxYoJ6x#GAaxPZ3Ywm)oXY~pPFZ4+j-W-exxX96m^r%&dr zXB($`XL72WHF&fxs~s+x7j?tyxh*Be$V7Y}ey)8>v*g+Tu}`z7wCBG+wQsw3yH~b9 zH?}^Clrtr?6Ttez;78-AE5_J+P5_Y?(KHbWu^CY`(E?E)(F*s^TuXr(uayhGtLwW{ zr5(q-K>vr!7o{k5_{=~!VXD^`9hSFKS?)U)PG zn~Nzc8dKA&XCJMLGUgR+x$Q0-gcl5!m*xbg?mmT&Q!bc|yL_Q}Us6$018Lf_Wh&78 zIFmoq%=P|L)2ABu((`?(?@|j=-7&)W2}V_o^MNNndL8XIxof#MxZ6L+es20qY+r9L z!a8C2rp4TY;aKgW#5K*q(8t@lIj7dJbh%Eb_}2YzS+R^Cq63XSTGnU6kaFtL*a}ULK7dI_{G11uj4C@GrWj?#It< zRO1^TwFrCP52JOVEud`>KjU$6$ZiMuflvAlVfup>f=)%dM89>+1k*hy<{9Lnam-vr zZOrhjzuw;cTB3i2-jl&X3R68`lpO6&r9J7FXjN{kJ`K4*DDBWMrwZsJ5vn zsIIBrQ(a06FNrNs9s!7X`b<{S}vR-n)7RlwRk z_%iD#VI}Yp=3MkRxbt~C!Z!?7!7rz4%e;c4BGm$iLb-e^Y{G&M+Z0{{x2ih@bKPf= z=NOS>tinryJ*Tp_NjpVF38TV9ejksm97BA?ZM$5*@AiC)>Wm7HYL&~CJCLgxJR7_j zEEwz^yzg&Ga3>=V$UeDR+E`3jRTfkCQ}n$`iGQX8r$Vxr*_MFRhsig1@Et8 zPcpu9^jk)aG3K6&_@3D9@ZGwe?;MUz<>vVdpZPiCjr{t$_w{CUdxZOI{MYIc(;X{U z3)ceIa92{-WLIP3@zz9t(@V+=sUI;b_+!R*K7$u(2PKQc(`lB*=&gRI$UeP~Lu;vz zmEamZVI_-sY=Yv|Hjho?OStRnC^i)9rixGyRDM3&-x2TtDBfgX9L0 z6;P{sz|OD3f#wCz(4!URPK6K!X$3I_y0;5yv}xRL%b^eY@xyY%Gs8S-_$v}eFII@# zF#Pf!A0}_3E{VzCk>d#S2FO0Bp03YE_TqX;IrUo6c$JuxfFU+pw~QMVzbb}phW5VA ztk0BiO=s?Ae$%^JDF1Z$smykf{F|_i-`QRIb?wQ*7SyeOc6KW=1-pUIq0gGrJd2LbtBmbve)?DF{`hwezl6?wCoZdx z#@ZflI!`2SBgYb!$ngUOZ)#7H*UKhV=JKpXhyzyc8=K_jQ&v7MD$Z9ew0yd@Kxw3@ zgS6o`YPQTYoSWHtW;rgJ2v{{cHM{y+_}=381K%49x5wYx+;Uy-FFKa4R$QM`X1lk`0gJq8cws? z&$;keH27Wi9d?nmK*OAE_>J04bL~9s7|>g%(q;E$%;oKc@xjsK?NN^y#e=s&`il;V z7cenMFclTgUZTSUeSZjqVZ#?${htbHo@y(eYC?E9dH(@I2R&7T{Iya<#of)x-A(Ry z-3GMCLq1_J(Z0$R~wZv9k?C1(yT?69`5hPcqXLTf$%a#9l9AU)^?j}S@`6pv1a|_caiT}de)Y{tqH}|J=KnGW#wfS$6pbY<@=7(0>xCwIz0{?ml zo$v4G!Hy0VjxP454q%9|_}|I?+WRCO@JXD%i1+_OI4;s?Qh~h>G&m} zuaAJ%7HT*;+6imBSVLbhAqf1h{l69Y_fCi;^!*dU>}>38EFcgIh*tx|#m~;h&&kWg z2I6OfuH(N@{x2)}L)lP&g8Cou?+*B+@&7Zn|19MHZ+85TMgC)vf0G9O<68eF#s14| z|6`GVS#|%Wf&a0{ze%zGGTZ-Hn-u#m zv;9BEBL9e=LGg@2l%7w~vHvMz_!pq23KTQv4Gmx+16A!z!4P|B@DC_%=Loj_E9&fT z5yrov&i(+8N!dZ{p|DCQl+4t@+|S$s1FAXNL6l5?1K<8!22d3WB>i*pzc-$=0aVccy}W;(^f#kFCslU0chyH`1Ly<( zCY|A50;B&mBKw5hb4C7dfIVd3-w}Jz$m-uAd(c^d3fAVX`hRHl=}^Ma{Rueumnqo) zE2vTe8h5pGw1lo4I@ccvL1dtc6U0Fb{1oif|7|4zP)An};NWg&_s2grDDQt(-QO*s zXzE}|4{>18RA+$7Y3gDt2MtU@-QuYUG5-@D2%X_C!Dd(e-?3;lL+HBx2m}K_zj3#J zmHRJn$v;DSg3?mb(rj#8JZ#V^CpI>2KIonkx`)CtdDx-G_*)?IPdZMCKWMn2w7==0 zbez!f+}wYDpyRnYpdV;Il!T1~%9mXVx@Uv(`aLC79Z#V_dP!(Fkev+#0`Y>l*x7!^ z1?kw>=otPYu9pkM0%~S<4rC~&KQ{n37Z(Q?zyk1R9}gD?)ZkAWz~TQY+=_7iIzwS=we$U&?!S+P!s`D6h(o=UGn;zf@Ux^bR~{QiN51tfzOz8%?u_+(c;KQ zhSD}T@LJ)eXGF>TK0gtN`{OwrawtjqNr zu=Ty=!&Y~Iu{Jf(7qWchg&`<|%RL{t1SLLk>*XB}8ke?Pd;Gj%M2wgYKkM)?7506b z)Cc-o(|s#&`Rh0lg~C4S#JPxi2#hpVu65C6obhw`Ur2K@=FM_GeSBSizlWDuDCw-V kPGfa=d?`gW`dlS1xI->&7F}1o7(8T1_&&7T@9RN#cXxA{fdBvi literal 0 HcmV?d00001 diff --git a/src/paperless_remote/tests/test_checks.py b/src/paperless_remote/tests/test_checks.py new file mode 100644 index 000000000..8a257952e --- /dev/null +++ b/src/paperless_remote/tests/test_checks.py @@ -0,0 +1,24 @@ +from unittest import TestCase + +from django.test import override_settings + +from paperless_remote import check_remote_parser_configured + + +class TestChecks(TestCase): + @override_settings(REMOTE_OCR_ENGINE=None) + def test_no_engine(self): + msgs = check_remote_parser_configured(None) + self.assertEqual(len(msgs), 0) + + @override_settings(REMOTE_OCR_ENGINE="azureai") + @override_settings(REMOTE_OCR_API_KEY="somekey") + @override_settings(REMOTE_OCR_ENDPOINT=None) + def test_azure_no_endpoint(self): + msgs = check_remote_parser_configured(None) + self.assertEqual(len(msgs), 1) + self.assertTrue( + msgs[0].msg.startswith( + "Azure AI remote parser requires endpoint and API key to be configured.", + ), + ) diff --git a/src/paperless_remote/tests/test_parser.py b/src/paperless_remote/tests/test_parser.py new file mode 100644 index 000000000..793778ec3 --- /dev/null +++ b/src/paperless_remote/tests/test_parser.py @@ -0,0 +1,128 @@ +import uuid +from pathlib import Path +from unittest import mock + +from django.test import TestCase +from django.test import override_settings + +from documents.tests.utils import DirectoriesMixin +from documents.tests.utils import FileSystemAssertsMixin +from paperless_remote.parsers import RemoteDocumentParser +from paperless_remote.signals import get_parser + + +class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): + SAMPLE_FILES = Path(__file__).resolve().parent / "samples" + + def assertContainsStrings(self, content: str, strings: list[str]): + # Asserts that all strings appear in content, in the given order. + indices = [] + for s in strings: + if s in content: + indices.append(content.index(s)) + else: + self.fail(f"'{s}' is not in '{content}'") + self.assertListEqual(indices, sorted(indices)) + + @mock.patch("paperless_tesseract.parsers.run_subprocess") + @mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient") + def test_get_text_with_azure(self, mock_client_cls, mock_subprocess): + # Arrange mock Azure client + mock_client = mock.Mock() + mock_client_cls.return_value = mock_client + + # Simulate poller result and its `.details` + mock_poller = mock.Mock() + mock_poller.wait.return_value = None + mock_poller.details = {"operation_id": "fake-op-id"} + mock_client.begin_analyze_document.return_value = mock_poller + mock_poller.result.return_value.content = "This is a test document." + + # Return dummy PDF bytes + mock_client.get_analyze_result_pdf.return_value = [ + b"%PDF-", + b"1.7 ", + b"FAKEPDF", + ] + + # Simulate pdftotext by writing dummy text to sidecar file + def fake_run(cmd, *args, **kwargs): + with Path(cmd[-1]).open("w", encoding="utf-8") as f: + f.write("This is a test document.") + + mock_subprocess.side_effect = fake_run + + with override_settings( + REMOTE_OCR_ENGINE="azureai", + REMOTE_OCR_API_KEY="somekey", + REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com", + ): + parser = get_parser(uuid.uuid4()) + parser.parse( + self.SAMPLE_FILES / "simple-digital.pdf", + "application/pdf", + ) + + self.assertContainsStrings( + parser.text.strip(), + ["This is a test document."], + ) + + @mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient") + def test_get_text_with_azure_error_logged_and_returns_none(self, mock_client_cls): + mock_client = mock.Mock() + mock_client.begin_analyze_document.side_effect = RuntimeError("fail") + mock_client_cls.return_value = mock_client + + with override_settings( + REMOTE_OCR_ENGINE="azureai", + REMOTE_OCR_API_KEY="somekey", + REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com", + ): + parser = get_parser(uuid.uuid4()) + with mock.patch.object(parser.log, "error") as mock_log_error: + parser.parse( + self.SAMPLE_FILES / "simple-digital.pdf", + "application/pdf", + ) + + self.assertIsNone(parser.text) + mock_client.begin_analyze_document.assert_called_once() + mock_client.close.assert_called_once() + mock_log_error.assert_called_once() + self.assertIn( + "Azure AI Vision parsing failed", + mock_log_error.call_args[0][0], + ) + + @override_settings( + REMOTE_OCR_ENGINE="azureai", + REMOTE_OCR_API_KEY="key", + REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com", + ) + def test_supported_mime_types_valid_config(self): + parser = RemoteDocumentParser(uuid.uuid4()) + expected_types = { + "application/pdf": ".pdf", + "image/png": ".png", + "image/jpeg": ".jpg", + "image/tiff": ".tiff", + "image/bmp": ".bmp", + "image/gif": ".gif", + "image/webp": ".webp", + } + self.assertEqual(parser.supported_mime_types(), expected_types) + + def test_supported_mime_types_invalid_config(self): + parser = get_parser(uuid.uuid4()) + self.assertEqual(parser.supported_mime_types(), {}) + + @override_settings( + REMOTE_OCR_ENGINE=None, + REMOTE_OCR_API_KEY=None, + REMOTE_OCR_ENDPOINT=None, + ) + def test_parse_with_invalid_config(self): + parser = get_parser(uuid.uuid4()) + parser.parse(self.SAMPLE_FILES / "simple-digital.pdf", "application/pdf") + self.assertEqual(parser.text, "") diff --git a/uv.lock b/uv.lock index d1cf11ee2..c621b203d 100644 --- a/uv.lock +++ b/uv.lock @@ -95,6 +95,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/ff/1175b0b7371e46244032d43a56862d0af455823b5280a50c63d99cc50f18/automat-25.4.16-py3-none-any.whl", hash = "sha256:04e9bce696a8d5671ee698005af6e5a9fa15354140a87f4870744604dcdd3ba1", size = 42842, upload-time = "2025-04-16T20:12:14.447Z" }, ] +[[package]] +name = "azure-ai-documentintelligence" +version = "1.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "azure-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "isodate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/44/7b/8115cd713e2caa5e44def85f2b7ebd02a74ae74d7113ba20bdd41fd6dd80/azure_ai_documentintelligence-1.0.2.tar.gz", hash = "sha256:4d75a2513f2839365ebabc0e0e1772f5601b3a8c9a71e75da12440da13b63484", size = 170940 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d9/75/c9ec040f23082f54ffb1977ff8f364c2d21c79a640a13d1c1809e7fd6b1a/azure_ai_documentintelligence-1.0.2-py3-none-any.whl", hash = "sha256:e1fb446abbdeccc9759d897898a0fe13141ed29f9ad11fc705f951925822ed59", size = 106005 }, +] + +[[package]] +name = "azure-core" +version = "1.33.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/75/aa/7c9db8edd626f1a7d99d09ef7926f6f4fb34d5f9fa00dc394afdfe8e2a80/azure_core-1.33.0.tar.gz", hash = "sha256:f367aa07b5e3005fec2c1e184b882b0b039910733907d001c20fb08ebb8c0eb9", size = 295633 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/b7/76b7e144aa53bd206bf1ce34fa75350472c3f69bf30e5c8c18bc9881035d/azure_core-1.33.0-py3-none-any.whl", hash = "sha256:9b5b6d0223a1d38c37500e6971118c1e0f13f54951e6893968b38910bc9cda8f", size = 207071 }, +] + [[package]] name = "babel" version = "2.17.0" @@ -1451,6 +1479,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/fc/4e5a141c3f7c7bed550ac1f69e599e92b6be449dd4677ec09f325cad0955/inotifyrecursive-0.3.5-py3-none-any.whl", hash = "sha256:7e5f4a2e1dc2bef0efa3b5f6b339c41fb4599055a2b54909d020e9e932cc8d2f", size = 8009, upload-time = "2020-11-20T12:38:46.981Z" }, ] +[[package]] +name = "isodate" +version = "0.7.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -2118,6 +2155,7 @@ name = "paperless-ngx" version = "2.20.3" source = { virtual = "." } dependencies = [ + { name = "azure-ai-documentintelligence", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "babel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "bleach", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "celery", extra = ["redis"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, @@ -2255,6 +2293,7 @@ typing = [ [package.metadata] requires-dist = [ + { name = "azure-ai-documentintelligence", specifier = ">=1.0.2" }, { name = "babel", specifier = ">=2.17" }, { name = "bleach", specifier = "~=6.3.0" }, { name = "celery", extras = ["redis"], specifier = "~=5.5.1" }, From ba4d88c801e75b2cd754aa5213ad3180e92c087b Mon Sep 17 00:00:00 2001 From: GitHub Actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 8 Jan 2026 21:51:48 +0000 Subject: [PATCH 8/8] Auto translate strings --- src/locale/en_US/LC_MESSAGES/django.po | 76 +++++++++++++------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/src/locale/en_US/LC_MESSAGES/django.po b/src/locale/en_US/LC_MESSAGES/django.po index 6f64a0b88..850c20ed5 100644 --- a/src/locale/en_US/LC_MESSAGES/django.po +++ b/src/locale/en_US/LC_MESSAGES/django.po @@ -2,7 +2,7 @@ msgid "" msgstr "" "Project-Id-Version: paperless-ngx\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-01-08 21:37+0000\n" +"POT-Creation-Date: 2026-01-08 21:50+0000\n" "PO-Revision-Date: 2022-02-17 04:17\n" "Last-Translator: \n" "Language-Team: English\n" @@ -1702,151 +1702,151 @@ msgstr "" msgid "paperless application settings" msgstr "" -#: paperless/settings.py:767 +#: paperless/settings.py:768 msgid "English (US)" msgstr "" -#: paperless/settings.py:768 +#: paperless/settings.py:769 msgid "Arabic" msgstr "" -#: paperless/settings.py:769 +#: paperless/settings.py:770 msgid "Afrikaans" msgstr "" -#: paperless/settings.py:770 +#: paperless/settings.py:771 msgid "Belarusian" msgstr "" -#: paperless/settings.py:771 +#: paperless/settings.py:772 msgid "Bulgarian" msgstr "" -#: paperless/settings.py:772 +#: paperless/settings.py:773 msgid "Catalan" msgstr "" -#: paperless/settings.py:773 +#: paperless/settings.py:774 msgid "Czech" msgstr "" -#: paperless/settings.py:774 +#: paperless/settings.py:775 msgid "Danish" msgstr "" -#: paperless/settings.py:775 +#: paperless/settings.py:776 msgid "German" msgstr "" -#: paperless/settings.py:776 +#: paperless/settings.py:777 msgid "Greek" msgstr "" -#: paperless/settings.py:777 +#: paperless/settings.py:778 msgid "English (GB)" msgstr "" -#: paperless/settings.py:778 +#: paperless/settings.py:779 msgid "Spanish" msgstr "" -#: paperless/settings.py:779 +#: paperless/settings.py:780 msgid "Persian" msgstr "" -#: paperless/settings.py:780 +#: paperless/settings.py:781 msgid "Finnish" msgstr "" -#: paperless/settings.py:781 +#: paperless/settings.py:782 msgid "French" msgstr "" -#: paperless/settings.py:782 +#: paperless/settings.py:783 msgid "Hungarian" msgstr "" -#: paperless/settings.py:783 +#: paperless/settings.py:784 msgid "Indonesian" msgstr "" -#: paperless/settings.py:784 +#: paperless/settings.py:785 msgid "Italian" msgstr "" -#: paperless/settings.py:785 +#: paperless/settings.py:786 msgid "Japanese" msgstr "" -#: paperless/settings.py:786 +#: paperless/settings.py:787 msgid "Korean" msgstr "" -#: paperless/settings.py:787 +#: paperless/settings.py:788 msgid "Luxembourgish" msgstr "" -#: paperless/settings.py:788 +#: paperless/settings.py:789 msgid "Norwegian" msgstr "" -#: paperless/settings.py:789 +#: paperless/settings.py:790 msgid "Dutch" msgstr "" -#: paperless/settings.py:790 +#: paperless/settings.py:791 msgid "Polish" msgstr "" -#: paperless/settings.py:791 +#: paperless/settings.py:792 msgid "Portuguese (Brazil)" msgstr "" -#: paperless/settings.py:792 +#: paperless/settings.py:793 msgid "Portuguese" msgstr "" -#: paperless/settings.py:793 +#: paperless/settings.py:794 msgid "Romanian" msgstr "" -#: paperless/settings.py:794 +#: paperless/settings.py:795 msgid "Russian" msgstr "" -#: paperless/settings.py:795 +#: paperless/settings.py:796 msgid "Slovak" msgstr "" -#: paperless/settings.py:796 +#: paperless/settings.py:797 msgid "Slovenian" msgstr "" -#: paperless/settings.py:797 +#: paperless/settings.py:798 msgid "Serbian" msgstr "" -#: paperless/settings.py:798 +#: paperless/settings.py:799 msgid "Swedish" msgstr "" -#: paperless/settings.py:799 +#: paperless/settings.py:800 msgid "Turkish" msgstr "" -#: paperless/settings.py:800 +#: paperless/settings.py:801 msgid "Ukrainian" msgstr "" -#: paperless/settings.py:801 +#: paperless/settings.py:802 msgid "Vietnamese" msgstr "" -#: paperless/settings.py:802 +#: paperless/settings.py:803 msgid "Chinese Simplified" msgstr "" -#: paperless/settings.py:803 +#: paperless/settings.py:804 msgid "Chinese Traditional" msgstr ""