Compare commits

..

22 Commits

Author SHA1 Message Date
Trenton H
972f9a069c One more tuple here 2026-01-28 15:57:33 -08:00
Trenton H
bd99fb66cf Resolves Sonarr issues 2026-01-28 15:50:11 -08:00
Trenton H
7704bc5399 To enable cleanup, use as a context manager 2026-01-28 15:45:27 -08:00
Trenton H
a055de0ce4 Restores environment 2026-01-28 15:25:14 -08:00
Trenton H
e0fdf1caa9 Adds example type checking configuration, with a default broard ignore and a tight scoped check 2026-01-28 15:19:22 -08:00
Trenton H
f80ae51a7d Two more missed 2026-01-28 14:44:06 -08:00
Trenton H
e101019924 Got to update the tests too 2026-01-28 14:33:48 -08:00
Trenton H
7afc8ceb24 Change the contract, just take the actual filename, not the file path 2026-01-28 14:24:14 -08:00
Trenton H
dfe0012872 Forgot the marker again 2026-01-28 14:14:11 -08:00
Trenton H
32771391ad Hooks up the class and fixes up the old testing. Includes ocr to date parser conversion we now do 2026-01-28 14:13:29 -08:00
Trenton H
9b7ae1c8ea Copy over the code and tests, to see if this even works 2026-01-28 13:54:53 -08:00
Trenton H
66593ec660 Chore: Bulk backend updates (#11543) 2026-01-28 13:30:12 -08:00
GitHub Actions
5af0d1da26 Auto translate strings 2026-01-28 16:27:11 +00:00
shamoon
3281ec2401 Documentation: update duplicates note 2026-01-28 08:25:16 -08:00
shamoon
dc9061eb97 Chore: refactor zoom and editor mode to use enums 2026-01-28 08:25:16 -08:00
Trenton H
6859e7e3c2 Chore: Resolve more flaky tests (#11920) 2026-01-28 16:13:27 +00:00
Jan Kleine
3e645bd9e2 Tweak: increase minimum screen width before inserting padding (#11926) 2026-01-28 15:57:47 +00:00
GitHub Actions
09d39de200 Auto translate strings 2026-01-28 15:55:01 +00:00
Jan Kleine
94231dbb0f Enhancement: Add setting for default PDF Editor mode (#11927)
---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
2026-01-28 15:53:14 +00:00
Trenton H
2f76350023 Chore: Push manually dispatched images to the registry (#11925) 2026-01-28 15:47:32 +00:00
Pierre Nédélec
4cbe56e3af Chore: Http interceptors refactor (#11923)
---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
2026-01-28 07:18:48 -08:00
Trenton H
01b21377af Chore: Use a local http server instead of external to reduce flakiness (#11916) 2026-01-28 03:57:12 +00:00
47 changed files with 3791 additions and 2949 deletions

View File

@@ -46,14 +46,13 @@ jobs:
id: ref
run: |
ref_name="${GITHUB_HEAD_REF:-$GITHUB_REF_NAME}"
# Sanitize by replacing / with - for cache keys
cache_ref="${ref_name//\//-}"
# Sanitize by replacing / with - for use in tags and cache keys
sanitized_ref="${ref_name//\//-}"
echo "ref_name=${ref_name}"
echo "cache_ref=${cache_ref}"
echo "sanitized_ref=${sanitized_ref}"
echo "name=${ref_name}" >> $GITHUB_OUTPUT
echo "cache-ref=${cache_ref}" >> $GITHUB_OUTPUT
echo "name=${sanitized_ref}" >> $GITHUB_OUTPUT
- name: Check push permissions
id: check-push
env:
@@ -62,12 +61,14 @@ jobs:
# should-push: Should we push to GHCR?
# True for:
# 1. Pushes (tags/dev/beta) - filtered via the workflow triggers
# 2. Internal PRs where the branch name starts with 'feature-' - filtered here when a PR is synced
# 2. Manual dispatch - always push to GHCR
# 3. Internal PRs where the branch name starts with 'feature-' or 'fix-'
should_push="false"
if [[ "${{ github.event_name }}" == "push" ]]; then
should_push="true"
elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
should_push="true"
elif [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
if [[ "${REF_NAME}" == feature-* || "${REF_NAME}" == fix-* ]]; then
should_push="true"
@@ -139,9 +140,9 @@ jobs:
PNGX_TAG_VERSION=${{ steps.docker-meta.outputs.version }}
outputs: type=image,name=${{ env.REGISTRY }}/${{ steps.repo.outputs.name }},push-by-digest=true,name-canonical=true,push=${{ steps.check-push.outputs.should-push }}
cache-from: |
type=registry,ref=${{ env.REGISTRY }}/${{ steps.repo.outputs.name }}/cache/app:${{ steps.ref.outputs.cache-ref }}-${{ matrix.arch }}
type=registry,ref=${{ env.REGISTRY }}/${{ steps.repo.outputs.name }}/cache/app:${{ steps.ref.outputs.name }}-${{ matrix.arch }}
type=registry,ref=${{ env.REGISTRY }}/${{ steps.repo.outputs.name }}/cache/app:dev-${{ matrix.arch }}
cache-to: ${{ steps.check-push.outputs.should-push == 'true' && format('type=registry,mode=max,ref={0}/{1}/cache/app:{2}-{3}', env.REGISTRY, steps.repo.outputs.name, steps.ref.outputs.cache-ref, matrix.arch) || '' }}
cache-to: ${{ steps.check-push.outputs.should-push == 'true' && format('type=registry,mode=max,ref={0}/{1}/cache/app:{2}-{3}', env.REGISTRY, steps.repo.outputs.name, steps.ref.outputs.name, matrix.arch) || '' }}
- name: Export digest
if: steps.check-push.outputs.should-push == 'true'
run: |

View File

@@ -37,7 +37,7 @@ repos:
- json
# See https://github.com/prettier/prettier/issues/15742 for the fork reason
- repo: https://github.com/rbubley/mirrors-prettier
rev: 'v3.6.2'
rev: 'v3.8.1'
hooks:
- id: prettier
types_or:
@@ -49,7 +49,7 @@ repos:
- 'prettier-plugin-organize-imports@4.1.0'
# Python hooks
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.14.5
rev: v0.14.14
hooks:
- id: ruff-check
- id: ruff-format
@@ -76,7 +76,7 @@ repos:
hooks:
- id: shellcheck
- repo: https://github.com/google/yamlfmt
rev: v0.20.0
rev: v0.21.0
hooks:
- id: yamlfmt
exclude: "^src-ui/pnpm-lock.yaml"

View File

@@ -34,3 +34,13 @@ services:
ports:
- "3143:3143" # IMAP
restart: unless-stopped
nginx:
image: docker.io/nginx:1.29-alpine
hostname: nginx
container_name: nginx
ports:
- "8080:8080"
restart: unless-stopped
volumes:
- ../../docs/assets:/usr/share/nginx/html/assets:ro
- ./test-nginx.conf:/etc/nginx/conf.d/default.conf:ro

View File

@@ -0,0 +1,14 @@
server {
listen 8080;
server_name localhost;
root /usr/share/nginx/html;
# Enable CORS for test requests
add_header 'Access-Control-Allow-Origin' '*' always;
add_header 'Access-Control-Allow-Methods' 'GET, HEAD, OPTIONS' always;
location / {
try_files $uri $uri/ =404;
}
}

View File

@@ -582,7 +582,7 @@ document.
### Detecting duplicates {#fuzzy_duplicate}
Paperless already catches and prevents upload of exactly matching documents,
Paperless-ngx already catches and warns of exactly matching documents,
however a new scan of an existing document may not produce an exact bit for bit
duplicate. But the content should be exact or close, allowing detection.

View File

@@ -19,14 +19,14 @@ dependencies = [
"azure-ai-documentintelligence>=1.0.2",
"babel>=2.17",
"bleach~=6.3.0",
"celery[redis]~=5.5.1",
"celery[redis]~=5.6.2",
"channels~=4.2",
"channels-redis~=4.2",
"concurrent-log-handler~=0.9.25",
"dateparser~=1.2",
# WARNING: django does not use semver.
# Only patch versions are guaranteed to not introduce breaking changes.
"django~=5.2.5",
"django~=5.2.10",
"django-allauth[mfa,socialaccount]~=65.13.1",
"django-auditlog~=3.4.1",
"django-cachalot~=2.8.0",
@@ -79,7 +79,7 @@ dependencies = [
"torch~=2.9.1",
"tqdm~=4.67.1",
"watchfiles>=1.1.1",
"whitenoise~=6.9",
"whitenoise~=6.11",
"whoosh-reloaded>=2.7.5",
"zxing-cpp~=2.3.0",
]
@@ -88,13 +88,13 @@ optional-dependencies.mariadb = [
"mysqlclient~=2.2.7",
]
optional-dependencies.postgres = [
"psycopg[c,pool]==3.2.12",
"psycopg[c,pool]==3.3",
# Direct dependency for proper resolution of the pre-built wheels
"psycopg-c==3.2.12",
"psycopg-c==3.3",
"psycopg-pool==3.3",
]
optional-dependencies.webserver = [
"granian[uvloop]~=2.5.1",
"granian[uvloop]~=2.6.0",
]
[dependency-groups]
@@ -152,7 +152,7 @@ typing = [
]
[tool.uv]
required-version = ">=0.5.14"
required-version = ">=0.9.0"
package = false
environments = [
"sys_platform == 'darwin'",
@@ -162,8 +162,8 @@ environments = [
[tool.uv.sources]
# Markers are chosen to select these almost exclusively when building the Docker image
psycopg-c = [
{ url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-bookworm-3.2.12/psycopg_c-3.2.12-cp312-cp312-linux_x86_64.whl", marker = "sys_platform == 'linux' and platform_machine == 'x86_64' and python_version == '3.12'" },
{ url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-bookworm-3.2.12/psycopg_c-3.2.12-cp312-cp312-linux_aarch64.whl", marker = "sys_platform == 'linux' and platform_machine == 'aarch64' and python_version == '3.12'" },
{ url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-trixie-3.3.0/psycopg_c-3.3.0-cp312-cp312-linux_x86_64.whl", marker = "sys_platform == 'linux' and platform_machine == 'x86_64' and python_version == '3.12'" },
{ url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-trixie-3.3.0/psycopg_c-3.3.0-cp312-cp312-linux_aarch64.whl", marker = "sys_platform == 'linux' and platform_machine == 'aarch64' and python_version == '3.12'" },
]
zxing-cpp = [
{ url = "https://github.com/paperless-ngx/builder/releases/download/zxing-2.3.0/zxing_cpp-2.3.0-cp312-cp312-linux_x86_64.whl", marker = "sys_platform == 'linux' and platform_machine == 'x86_64' and python_version == '3.12'" },
@@ -300,6 +300,15 @@ norecursedirs = [ "src/locale/", ".venv/", "src-ui/" ]
DJANGO_SETTINGS_MODULE = "paperless.settings"
markers = [
"live: Integration tests requiring external services (Gotenberg, Tika, nginx, etc)",
"nginx: Tests that make HTTP requests to the local nginx service",
"gotenberg: Tests requiring Gotenberg service",
"tika: Tests requiring Tika service",
"greenmail: Tests requiring Greenmail service",
"date_parsing: Tests which cover date parsing from content or filename",
]
[tool.pytest_env]
PAPERLESS_DISABLE_DBHANDLER = "true"
PAPERLESS_CACHE_BACKEND = "django.core.cache.backends.locmem.LocMemCache"
@@ -324,6 +333,10 @@ exclude_also = [
[tool.mypy]
mypy_path = "src"
files = [
"src/documents/plugins/date_parsing",
"src/documents/tests/date_parsing",
]
plugins = [
"mypy_django_plugin.main",
"mypy_drf_plugin.main",
@@ -335,5 +348,28 @@ disallow_untyped_defs = true
warn_redundant_casts = true
warn_unused_ignores = true
# This prevents errors from imports, but allows type-checking logic to work
follow_imports = "silent"
[[tool.mypy.overrides]]
module = [
"documents.*",
"paperless.*",
"paperless_ai.*",
"paperless_mail.*",
"paperless_tesseract.*",
"paperless_remote.*",
"paperless_text.*",
"paperless_tika.*",
]
ignore_errors = true
[[tool.mypy.overrides]]
module = [
"documents.plugins.date_parsing.*",
"documents.tests.date_parsing.*",
]
ignore_errors = false
[tool.django-stubs]
django_settings_module = "paperless.settings"

View File

@@ -561,7 +561,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">386</context>
<context context-type="linenumber">400</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/correspondent-edit-dialog/correspondent-edit-dialog.component.html</context>
@@ -1201,28 +1201,72 @@
<source>Bulk editing</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">263</context>
<context context-type="linenumber">264</context>
</context-group>
</trans-unit>
<trans-unit id="8158899674926420054" datatype="html">
<source>Show confirmation dialogs</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">266</context>
<context context-type="linenumber">267</context>
</context-group>
</trans-unit>
<trans-unit id="290238406234356122" datatype="html">
<source>Apply on close</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">267</context>
<context context-type="linenumber">268</context>
</context-group>
</trans-unit>
<trans-unit id="5084275925647254161" datatype="html">
<source>PDF Editor</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">272</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.html</context>
<context context-type="linenumber">66</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1472</context>
</context-group>
</trans-unit>
<trans-unit id="1577733187050997705" datatype="html">
<source>Default editing mode</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">275</context>
</context-group>
</trans-unit>
<trans-unit id="7273640930165035289" datatype="html">
<source>Create new document(s)</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">279</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/pdf-editor/pdf-editor.component.html</context>
<context context-type="linenumber">82</context>
</context-group>
</trans-unit>
<trans-unit id="8035757452478567832" datatype="html">
<source>Update existing document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">280</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/pdf-editor/pdf-editor.component.html</context>
<context context-type="linenumber">87</context>
</context-group>
</trans-unit>
<trans-unit id="8104421162933956065" datatype="html">
<source>Notes</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">271</context>
<context context-type="linenumber">285</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/document-list.component.html</context>
@@ -1241,14 +1285,14 @@
<source>Enable notes</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">274</context>
<context context-type="linenumber">288</context>
</context-group>
</trans-unit>
<trans-unit id="7314814725704332646" datatype="html">
<source>Permissions</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">283</context>
<context context-type="linenumber">297</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/group-edit-dialog/group-edit-dialog.component.html</context>
@@ -1311,28 +1355,28 @@
<source>Default Permissions</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">286</context>
<context context-type="linenumber">300</context>
</context-group>
</trans-unit>
<trans-unit id="6544153565064275581" datatype="html">
<source> Settings apply to this user account for objects (Tags, Mail Rules, etc. but not documents) created via the web UI. </source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">290,292</context>
<context context-type="linenumber">304,306</context>
</context-group>
</trans-unit>
<trans-unit id="4292903881380648974" datatype="html">
<source>Default Owner</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">297</context>
<context context-type="linenumber">311</context>
</context-group>
</trans-unit>
<trans-unit id="734147282056744882" datatype="html">
<source>Objects without an owner can be viewed and edited by all users</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">301</context>
<context context-type="linenumber">315</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/input/permissions/permissions-form/permissions-form.component.html</context>
@@ -1343,18 +1387,18 @@
<source>Default View Permissions</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">306</context>
<context context-type="linenumber">320</context>
</context-group>
</trans-unit>
<trans-unit id="2191775412581217688" datatype="html">
<source>Users:</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">311</context>
<context context-type="linenumber">325</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">338</context>
<context context-type="linenumber">352</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
@@ -1385,11 +1429,11 @@
<source>Groups:</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">321</context>
<context context-type="linenumber">335</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">348</context>
<context context-type="linenumber">362</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
@@ -1420,14 +1464,14 @@
<source>Default Edit Permissions</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">333</context>
<context context-type="linenumber">347</context>
</context-group>
</trans-unit>
<trans-unit id="3728984448750213892" datatype="html">
<source>Edit permissions also grant viewing permissions</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">357</context>
<context context-type="linenumber">371</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
@@ -1446,7 +1490,7 @@
<source>Notifications</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">365</context>
<context context-type="linenumber">379</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/app-frame/toasts-dropdown/toasts-dropdown.component.html</context>
@@ -1457,49 +1501,49 @@
<source>Document processing</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">368</context>
<context context-type="linenumber">382</context>
</context-group>
</trans-unit>
<trans-unit id="3656786776644872398" datatype="html">
<source>Show notifications when new documents are detected</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">372</context>
<context context-type="linenumber">386</context>
</context-group>
</trans-unit>
<trans-unit id="6057053428592387613" datatype="html">
<source>Show notifications when document processing completes successfully</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">373</context>
<context context-type="linenumber">387</context>
</context-group>
</trans-unit>
<trans-unit id="370315664367425513" datatype="html">
<source>Show notifications when document processing fails</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">374</context>
<context context-type="linenumber">388</context>
</context-group>
</trans-unit>
<trans-unit id="6838309441164918531" datatype="html">
<source>Suppress notifications on dashboard</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">375</context>
<context context-type="linenumber">389</context>
</context-group>
</trans-unit>
<trans-unit id="2741919327232918179" datatype="html">
<source>This will suppress all messages about document processing status on the dashboard.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">375</context>
<context context-type="linenumber">389</context>
</context-group>
</trans-unit>
<trans-unit id="2159130950882492111" datatype="html">
<source>Cancel</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">385</context>
<context context-type="linenumber">399</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/confirm-dialog/confirm-dialog.component.ts</context>
@@ -1570,21 +1614,21 @@
<source>Use system language</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.ts</context>
<context context-type="linenumber">78</context>
<context context-type="linenumber">79</context>
</context-group>
</trans-unit>
<trans-unit id="7729897675462249787" datatype="html">
<source>Use date format of display language</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.ts</context>
<context context-type="linenumber">81</context>
<context context-type="linenumber">82</context>
</context-group>
</trans-unit>
<trans-unit id="1379170675585571971" datatype="html">
<source>Archive serial number</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.ts</context>
<context context-type="linenumber">95</context>
<context context-type="linenumber">96</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.html</context>
@@ -1595,7 +1639,7 @@
<source>Correspondent</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.ts</context>
<context context-type="linenumber">97</context>
<context context-type="linenumber">98</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.html</context>
@@ -1626,7 +1670,7 @@
<source>Document type</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.ts</context>
<context context-type="linenumber">98</context>
<context context-type="linenumber">99</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.html</context>
@@ -1657,7 +1701,7 @@
<source>Storage path</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.ts</context>
<context context-type="linenumber">99</context>
<context context-type="linenumber">100</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.html</context>
@@ -1684,7 +1728,7 @@
<source>Tags</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.ts</context>
<context context-type="linenumber">100</context>
<context context-type="linenumber">101</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/app-frame/app-frame.component.html</context>
@@ -1723,7 +1767,7 @@
<source>Error retrieving users</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.ts</context>
<context context-type="linenumber">248</context>
<context context-type="linenumber">252</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/users-groups/users-groups.component.ts</context>
@@ -1734,7 +1778,7 @@
<source>Error retrieving groups</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.ts</context>
<context context-type="linenumber">267</context>
<context context-type="linenumber">271</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/users-groups/users-groups.component.ts</context>
@@ -1745,28 +1789,28 @@
<source>Settings were saved successfully.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.ts</context>
<context context-type="linenumber">577</context>
<context context-type="linenumber">588</context>
</context-group>
</trans-unit>
<trans-unit id="525012668859298131" datatype="html">
<source>Settings were saved successfully. Reload is required to apply some changes.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.ts</context>
<context context-type="linenumber">581</context>
<context context-type="linenumber">592</context>
</context-group>
</trans-unit>
<trans-unit id="8491974984518503778" datatype="html">
<source>Reload now</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.ts</context>
<context context-type="linenumber">582</context>
<context context-type="linenumber">593</context>
</context-group>
</trans-unit>
<trans-unit id="3011185103048412841" datatype="html">
<source>An error occurred while saving settings.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.ts</context>
<context context-type="linenumber">592</context>
<context context-type="linenumber">603</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/app-frame/app-frame.component.ts</context>
@@ -2775,11 +2819,11 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1121</context>
<context context-type="linenumber">1108</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1486</context>
<context context-type="linenumber">1473</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -3370,7 +3414,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1074</context>
<context context-type="linenumber">1061</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -3475,7 +3519,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1537</context>
<context context-type="linenumber">1524</context>
</context-group>
</trans-unit>
<trans-unit id="6661109599266152398" datatype="html">
@@ -3486,7 +3530,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1538</context>
<context context-type="linenumber">1525</context>
</context-group>
</trans-unit>
<trans-unit id="5162686434580248853" datatype="html">
@@ -3497,7 +3541,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1539</context>
<context context-type="linenumber">1526</context>
</context-group>
</trans-unit>
<trans-unit id="8157388568390631653" datatype="html">
@@ -6012,20 +6056,6 @@
<context context-type="linenumber">70</context>
</context-group>
</trans-unit>
<trans-unit id="7273640930165035289" datatype="html">
<source>Create new document(s)</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/pdf-editor/pdf-editor.component.html</context>
<context context-type="linenumber">82</context>
</context-group>
</trans-unit>
<trans-unit id="8035757452478567832" datatype="html">
<source>Update existing document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/pdf-editor/pdf-editor.component.html</context>
<context context-type="linenumber">87</context>
</context-group>
</trans-unit>
<trans-unit id="7248454234750442816" datatype="html">
<source>Copy metadata</source>
<context-group purpose="location">
@@ -7373,17 +7403,6 @@
<context context-type="linenumber">69</context>
</context-group>
</trans-unit>
<trans-unit id="5084275925647254161" datatype="html">
<source>PDF Editor</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.html</context>
<context context-type="linenumber">66</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1485</context>
</context-group>
</trans-unit>
<trans-unit id="2336375155355449543" datatype="html">
<source>Remove Password</source>
<context-group purpose="location">
@@ -7619,56 +7638,56 @@
<source>An error occurred loading content: <x id="PH" equiv-text="err.message ?? err.toString()"/></source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">441,443</context>
<context context-type="linenumber">428,430</context>
</context-group>
</trans-unit>
<trans-unit id="3200733026060976258" datatype="html">
<source>Document changes detected</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">480</context>
<context context-type="linenumber">467</context>
</context-group>
</trans-unit>
<trans-unit id="2887155916749964" datatype="html">
<source>The version of this document in your browser session appears older than the existing version.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">481</context>
<context context-type="linenumber">468</context>
</context-group>
</trans-unit>
<trans-unit id="237142428785956348" datatype="html">
<source>Saving the document here may overwrite other changes that were made. To restore the existing version, discard your changes or close the document.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">482</context>
<context context-type="linenumber">469</context>
</context-group>
</trans-unit>
<trans-unit id="8720977247725652816" datatype="html">
<source>Ok</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">484</context>
<context context-type="linenumber">471</context>
</context-group>
</trans-unit>
<trans-unit id="6142395741265832184" datatype="html">
<source>Next document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">610</context>
<context context-type="linenumber">597</context>
</context-group>
</trans-unit>
<trans-unit id="651985345816518480" datatype="html">
<source>Previous document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">620</context>
<context context-type="linenumber">607</context>
</context-group>
</trans-unit>
<trans-unit id="2885986061416655600" datatype="html">
<source>Close document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">628</context>
<context context-type="linenumber">615</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/open-documents.service.ts</context>
@@ -7679,67 +7698,67 @@
<source>Save document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">635</context>
<context context-type="linenumber">622</context>
</context-group>
</trans-unit>
<trans-unit id="1784543155727940353" datatype="html">
<source>Save and close / next</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">644</context>
<context context-type="linenumber">631</context>
</context-group>
</trans-unit>
<trans-unit id="5758784066858623886" datatype="html">
<source>Error retrieving metadata</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">699</context>
<context context-type="linenumber">686</context>
</context-group>
</trans-unit>
<trans-unit id="3456881259945295697" datatype="html">
<source>Error retrieving suggestions.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">754</context>
<context context-type="linenumber">741</context>
</context-group>
</trans-unit>
<trans-unit id="2194092841814123758" datatype="html">
<source>Document &quot;<x id="PH" equiv-text="newValues.title"/>&quot; saved successfully.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">963</context>
<context context-type="linenumber">950</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">987</context>
<context context-type="linenumber">974</context>
</context-group>
</trans-unit>
<trans-unit id="6626387786259219838" datatype="html">
<source>Error saving document &quot;<x id="PH" equiv-text="this.document.title"/>&quot;</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">993</context>
<context context-type="linenumber">980</context>
</context-group>
</trans-unit>
<trans-unit id="448882439049417053" datatype="html">
<source>Error saving document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1043</context>
<context context-type="linenumber">1030</context>
</context-group>
</trans-unit>
<trans-unit id="8410796510716511826" datatype="html">
<source>Do you really want to move the document &quot;<x id="PH" equiv-text="this.document.title"/>&quot; to the trash?</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1075</context>
<context context-type="linenumber">1062</context>
</context-group>
</trans-unit>
<trans-unit id="282586936710748252" datatype="html">
<source>Documents can be restored prior to permanent deletion.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1076</context>
<context context-type="linenumber">1063</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -7750,7 +7769,7 @@
<source>Move to trash</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1078</context>
<context context-type="linenumber">1065</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -7761,14 +7780,14 @@
<source>Error deleting document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1097</context>
<context context-type="linenumber">1084</context>
</context-group>
</trans-unit>
<trans-unit id="619486176823357521" datatype="html">
<source>Reprocess confirm</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1117</context>
<context context-type="linenumber">1104</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -7779,102 +7798,102 @@
<source>This operation will permanently recreate the archive file for this document.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1118</context>
<context context-type="linenumber">1105</context>
</context-group>
</trans-unit>
<trans-unit id="302054111564709516" datatype="html">
<source>The archive file will be re-generated with the current settings.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1119</context>
<context context-type="linenumber">1106</context>
</context-group>
</trans-unit>
<trans-unit id="8251197608401006898" datatype="html">
<source>Reprocess operation for &quot;<x id="PH" equiv-text="this.document.title"/>&quot; will begin in the background. Close and re-open or reload this document after the operation has completed to see new content.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1129</context>
<context context-type="linenumber">1116</context>
</context-group>
</trans-unit>
<trans-unit id="4409560272830824468" datatype="html">
<source>Error executing operation</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1140</context>
<context context-type="linenumber">1127</context>
</context-group>
</trans-unit>
<trans-unit id="6030453331794586802" datatype="html">
<source>Error downloading document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1189</context>
<context context-type="linenumber">1176</context>
</context-group>
</trans-unit>
<trans-unit id="4458954481601077369" datatype="html">
<source>Page Fit</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1266</context>
<context context-type="linenumber">1253</context>
</context-group>
</trans-unit>
<trans-unit id="4663705961777238777" datatype="html">
<source>PDF edit operation for &quot;<x id="PH" equiv-text="this.document.title"/>&quot; will begin in the background.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1504</context>
<context context-type="linenumber">1491</context>
</context-group>
</trans-unit>
<trans-unit id="9043972994040261999" datatype="html">
<source>Error executing PDF edit operation</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1516</context>
<context context-type="linenumber">1503</context>
</context-group>
</trans-unit>
<trans-unit id="6172690334763056188" datatype="html">
<source>Please enter the current password before attempting to remove it.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1527</context>
<context context-type="linenumber">1514</context>
</context-group>
</trans-unit>
<trans-unit id="968660764814228922" datatype="html">
<source>Password removal operation for &quot;<x id="PH" equiv-text="this.document.title"/>&quot; will begin in the background.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1559</context>
<context context-type="linenumber">1546</context>
</context-group>
</trans-unit>
<trans-unit id="2282118435712883014" datatype="html">
<source>Error executing password removal operation</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1573</context>
<context context-type="linenumber">1560</context>
</context-group>
</trans-unit>
<trans-unit id="3740891324955700797" datatype="html">
<source>Print failed.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1610</context>
<context context-type="linenumber">1597</context>
</context-group>
</trans-unit>
<trans-unit id="6457245677384603573" datatype="html">
<source>Error loading document for printing.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1622</context>
<context context-type="linenumber">1609</context>
</context-group>
</trans-unit>
<trans-unit id="6085793215710522488" datatype="html">
<source>An error occurred loading tiff: <x id="PH" equiv-text="err.toString()"/></source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1687</context>
<context context-type="linenumber">1674</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1691</context>
<context context-type="linenumber">1678</context>
</context-group>
</trans-unit>
<trans-unit id="4958946940233632319" datatype="html">

View File

@@ -259,6 +259,7 @@
</div>
</div>
</div>
<div class="col-xl-6 ps-xl-5">
<h5 class="mt-3" i18n>Bulk editing</h5>
<div class="row mb-3">
@@ -268,6 +269,19 @@
</div>
</div>
<h5 class="mt-3" i18n>PDF Editor</h5>
<div class="row">
<div class="col-md-3 col-form-label pt-0">
<span i18n>Default editing mode</span>
</div>
<div class="col">
<select class="form-select" formControlName="pdfEditorDefaultEditMode">
<option [ngValue]="PdfEditorEditMode.Create" i18n>Create new document(s)</option>
<option [ngValue]="PdfEditorEditMode.Update" i18n>Update existing document</option>
</select>
</div>
</div>
<h5 class="mt-3" i18n>Notes</h5>
<div class="row mb-3">
<div class="col">

View File

@@ -251,7 +251,7 @@ describe('SettingsComponent', () => {
expect(toastErrorSpy).toHaveBeenCalled()
expect(storeSpy).toHaveBeenCalled()
expect(appearanceSettingsSpy).not.toHaveBeenCalled()
expect(setSpy).toHaveBeenCalledTimes(31)
expect(setSpy).toHaveBeenCalledTimes(32)
// succeed
storeSpy.mockReturnValueOnce(of(true))

View File

@@ -64,8 +64,9 @@ import { PermissionsGroupComponent } from '../../common/input/permissions/permis
import { PermissionsUserComponent } from '../../common/input/permissions/permissions-user/permissions-user.component'
import { SelectComponent } from '../../common/input/select/select.component'
import { PageHeaderComponent } from '../../common/page-header/page-header.component'
import { PdfEditorEditMode } from '../../common/pdf-editor/pdf-editor-edit-mode'
import { SystemStatusDialogComponent } from '../../common/system-status-dialog/system-status-dialog.component'
import { ZoomSetting } from '../../document-detail/document-detail.component'
import { ZoomSetting } from '../../document-detail/zoom-setting'
import { ComponentWithPermissions } from '../../with-permissions/with-permissions.component'
enum SettingsNavIDs {
@@ -163,6 +164,7 @@ export class SettingsComponent
defaultPermsEditGroups: new FormControl(null),
useNativePdfViewer: new FormControl(null),
pdfViewerDefaultZoom: new FormControl(null),
pdfEditorDefaultEditMode: new FormControl(null),
documentEditingRemoveInboxTags: new FormControl(null),
documentEditingOverlayThumbnail: new FormControl(null),
documentDetailsHiddenFields: new FormControl([]),
@@ -196,6 +198,8 @@ export class SettingsComponent
public readonly ZoomSetting = ZoomSetting
public readonly PdfEditorEditMode = PdfEditorEditMode
public readonly documentDetailFieldOptions = documentDetailFieldOptions
get systemStatusHasErrors(): boolean {
@@ -314,6 +318,9 @@ export class SettingsComponent
pdfViewerDefaultZoom: this.settings.get(
SETTINGS_KEYS.PDF_VIEWER_ZOOM_SETTING
),
pdfEditorDefaultEditMode: this.settings.get(
SETTINGS_KEYS.PDF_EDITOR_DEFAULT_EDIT_MODE
),
displayLanguage: this.settings.getLanguage(),
dateLocale: this.settings.get(SETTINGS_KEYS.DATE_LOCALE),
dateFormat: this.settings.get(SETTINGS_KEYS.DATE_FORMAT),
@@ -483,6 +490,10 @@ export class SettingsComponent
SETTINGS_KEYS.PDF_VIEWER_ZOOM_SETTING,
this.settingsForm.value.pdfViewerDefaultZoom
)
this.settings.set(
SETTINGS_KEYS.PDF_EDITOR_DEFAULT_EDIT_MODE,
this.settingsForm.value.pdfEditorDefaultEditMode
)
this.settings.set(
SETTINGS_KEYS.DATE_LOCALE,
this.settingsForm.value.dateLocale

View File

@@ -248,7 +248,7 @@ main {
}
}
@media screen and (min-width: 366px) and (max-width: 768px) {
@media screen and (min-width: 376px) and (max-width: 768px) {
.navbar-toggler {
// compensate for 2 buttons on the right
margin-right: 45px;

View File

@@ -0,0 +1,4 @@
export enum PdfEditorEditMode {
Update = 'update',
Create = 'create',
}

View File

@@ -8,8 +8,11 @@ import { FormsModule } from '@angular/forms'
import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
import { PDFDocumentProxy, PdfViewerModule } from 'ng2-pdf-viewer'
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
import { DocumentService } from 'src/app/services/rest/document.service'
import { SettingsService } from 'src/app/services/settings.service'
import { ConfirmDialogComponent } from '../confirm-dialog/confirm-dialog.component'
import { PdfEditorEditMode } from './pdf-editor-edit-mode'
interface PageOperation {
page: number
@@ -19,11 +22,6 @@ interface PageOperation {
loaded?: boolean
}
export enum PdfEditorEditMode {
Update = 'update',
Create = 'create',
}
@Component({
selector: 'pngx-pdf-editor',
templateUrl: './pdf-editor.component.html',
@@ -39,12 +37,15 @@ export class PDFEditorComponent extends ConfirmDialogComponent {
public PdfEditorEditMode = PdfEditorEditMode
private documentService = inject(DocumentService)
private readonly settingsService = inject(SettingsService)
activeModal: NgbActiveModal = inject(NgbActiveModal)
documentID: number
pages: PageOperation[] = []
totalPages = 0
editMode: PdfEditorEditMode = PdfEditorEditMode.Create
editMode: PdfEditorEditMode = this.settingsService.get(
SETTINGS_KEYS.PDF_EDITOR_DEFAULT_EDIT_MODE
)
deleteOriginal: boolean = false
includeMetadata: boolean = true

View File

@@ -69,10 +69,8 @@ import { environment } from 'src/environments/environment'
import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component'
import { PasswordRemovalConfirmDialogComponent } from '../common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component'
import { CustomFieldsDropdownComponent } from '../common/custom-fields-dropdown/custom-fields-dropdown.component'
import {
DocumentDetailComponent,
ZoomSetting,
} from './document-detail.component'
import { DocumentDetailComponent } from './document-detail.component'
import { ZoomSetting } from './zoom-setting'
const doc: Document = {
id: 3,

View File

@@ -106,16 +106,15 @@ import { TextComponent } from '../common/input/text/text.component'
import { TextAreaComponent } from '../common/input/textarea/textarea.component'
import { UrlComponent } from '../common/input/url/url.component'
import { PageHeaderComponent } from '../common/page-header/page-header.component'
import {
PDFEditorComponent,
PdfEditorEditMode,
} from '../common/pdf-editor/pdf-editor.component'
import { PdfEditorEditMode } from '../common/pdf-editor/pdf-editor-edit-mode'
import { PDFEditorComponent } from '../common/pdf-editor/pdf-editor.component'
import { ShareLinksDialogComponent } from '../common/share-links-dialog/share-links-dialog.component'
import { SuggestionsDropdownComponent } from '../common/suggestions-dropdown/suggestions-dropdown.component'
import { DocumentHistoryComponent } from '../document-history/document-history.component'
import { DocumentNotesComponent } from '../document-notes/document-notes.component'
import { ComponentWithPermissions } from '../with-permissions/with-permissions.component'
import { MetadataCollapseComponent } from './metadata-collapse/metadata-collapse.component'
import { ZoomSetting } from './zoom-setting'
enum DocumentDetailNavIDs {
Details = 1,
@@ -137,18 +136,6 @@ enum ContentRenderType {
TIFF = 'tiff',
}
export enum ZoomSetting {
PageFit = 'page-fit',
PageWidth = 'page-width',
Quarter = '.25',
Half = '.5',
ThreeQuarters = '.75',
One = '1',
OneAndHalf = '1.5',
Two = '2',
Three = '3',
}
@Component({
selector: 'pngx-document-detail',
templateUrl: './document-detail.component.html',

View File

@@ -0,0 +1,11 @@
export enum ZoomSetting {
PageFit = 'page-fit',
PageWidth = 'page-width',
Quarter = '.25',
Half = '.5',
ThreeQuarters = '.75',
One = '1',
OneAndHalf = '1.5',
Two = '2',
Three = '3',
}

View File

@@ -1,3 +1,5 @@
import { PdfEditorEditMode } from '../components/common/pdf-editor/pdf-editor-edit-mode'
import { ZoomSetting } from '../components/document-detail/zoom-setting'
import { User } from './user'
export interface UiSettings {
@@ -74,6 +76,8 @@ export const SETTINGS_KEYS = {
'general-settings:document-details:hidden-fields',
SEARCH_DB_ONLY: 'general-settings:search:db-only',
SEARCH_FULL_TYPE: 'general-settings:search:more-link',
PDF_EDITOR_DEFAULT_EDIT_MODE:
'general-settings:document-editing:default-edit-mode',
EMPTY_TRASH_DELAY: 'trash_delay',
GMAIL_OAUTH_URL: 'gmail_oauth_url',
OUTLOOK_OAUTH_URL: 'outlook_oauth_url',
@@ -295,11 +299,16 @@ export const SETTINGS: UiSetting[] = [
{
key: SETTINGS_KEYS.PDF_VIEWER_ZOOM_SETTING,
type: 'string',
default: 'page-width', // ZoomSetting from 'document-detail.component'
default: ZoomSetting.PageWidth,
},
{
key: SETTINGS_KEYS.AI_ENABLED,
type: 'boolean',
default: false,
},
{
key: SETTINGS_KEYS.PDF_EDITOR_DEFAULT_EDIT_MODE,
type: 'string',
default: PdfEditorEditMode.Create,
},
]

View File

@@ -1,30 +1,41 @@
import { HttpEvent, HttpRequest } from '@angular/common/http'
import {
HttpClient,
provideHttpClient,
withInterceptors,
} from '@angular/common/http'
import {
HttpTestingController,
provideHttpClientTesting,
} from '@angular/common/http/testing'
import { TestBed } from '@angular/core/testing'
import { of } from 'rxjs'
import { environment } from 'src/environments/environment'
import { ApiVersionInterceptor } from './api-version.interceptor'
import { withApiVersionInterceptor } from './api-version.interceptor'
describe('ApiVersionInterceptor', () => {
let interceptor: ApiVersionInterceptor
let httpClient: HttpClient
let httpMock: HttpTestingController
beforeEach(() => {
TestBed.configureTestingModule({
providers: [ApiVersionInterceptor],
providers: [
provideHttpClient(withInterceptors([withApiVersionInterceptor])),
provideHttpClientTesting(),
],
})
interceptor = TestBed.inject(ApiVersionInterceptor)
httpClient = TestBed.inject(HttpClient)
httpMock = TestBed.inject(HttpTestingController)
})
it('should add api version to headers', () => {
interceptor.intercept(new HttpRequest('GET', 'https://example.com'), {
handle: (request) => {
const header = request.headers['lazyUpdate'][0]
expect(header.name).toEqual('Accept')
expect(header.value).toEqual(
`application/json; version=${environment.apiVersion}`
)
return of({} as HttpEvent<any>)
},
})
httpClient.get('https://example.com').subscribe()
const request = httpMock.expectOne('https://example.com')
const header = request.request.headers['lazyUpdate'][0]
expect(header.name).toEqual('Accept')
expect(header.value).toEqual(
`application/json; version=${environment.apiVersion}`
)
request.flush({})
})
})

View File

@@ -1,27 +1,20 @@
import {
HttpEvent,
HttpHandler,
HttpInterceptor,
HttpHandlerFn,
HttpInterceptorFn,
HttpRequest,
} from '@angular/common/http'
import { Injectable } from '@angular/core'
import { Observable } from 'rxjs'
import { environment } from 'src/environments/environment'
@Injectable()
export class ApiVersionInterceptor implements HttpInterceptor {
constructor() {}
intercept(
request: HttpRequest<unknown>,
next: HttpHandler
): Observable<HttpEvent<unknown>> {
request = request.clone({
setHeaders: {
Accept: `application/json; version=${environment.apiVersion}`,
},
})
return next.handle(request)
}
export const withApiVersionInterceptor: HttpInterceptorFn = (
request: HttpRequest<unknown>,
next: HttpHandlerFn
): Observable<HttpEvent<unknown>> => {
request = request.clone({
setHeaders: {
Accept: `application/json; version=${environment.apiVersion}`,
},
})
return next(request)
}

View File

@@ -1,35 +1,52 @@
import { HttpEvent, HttpRequest } from '@angular/common/http'
import {
HttpClient,
provideHttpClient,
withInterceptors,
} from '@angular/common/http'
import {
HttpTestingController,
provideHttpClientTesting,
} from '@angular/common/http/testing'
import { TestBed } from '@angular/core/testing'
import { Meta } from '@angular/platform-browser'
import { CookieService } from 'ngx-cookie-service'
import { of } from 'rxjs'
import { CsrfInterceptor } from './csrf.interceptor'
import { withCsrfInterceptor } from './csrf.interceptor'
describe('CsrfInterceptor', () => {
let interceptor: CsrfInterceptor
let meta: Meta
let cookieService: CookieService
let httpClient: HttpClient
let httpMock: HttpTestingController
beforeEach(() => {
TestBed.configureTestingModule({
providers: [CsrfInterceptor, Meta, CookieService],
providers: [
Meta,
CookieService,
provideHttpClient(withInterceptors([withCsrfInterceptor])),
provideHttpClientTesting(),
],
})
meta = TestBed.inject(Meta)
cookieService = TestBed.inject(CookieService)
interceptor = TestBed.inject(CsrfInterceptor)
httpClient = TestBed.inject(HttpClient)
httpMock = TestBed.inject(HttpTestingController)
})
it('should get csrf token', () => {
meta.addTag({ name: 'cookie_prefix', content: 'ngx-' }, true)
const cookieServiceSpy = jest.spyOn(cookieService, 'get')
cookieServiceSpy.mockReturnValue('csrftoken')
interceptor.intercept(new HttpRequest('GET', 'https://example.com'), {
handle: (request) => {
expect(request.headers['lazyUpdate'][0]['name']).toEqual('X-CSRFToken')
return of({} as HttpEvent<any>)
},
})
httpClient.get('https://example.com').subscribe()
const request = httpMock.expectOne('https://example.com')
expect(request.request.headers['lazyUpdate'][0]['name']).toEqual(
'X-CSRFToken'
)
expect(cookieServiceSpy).toHaveBeenCalled()
request.flush({})
})
})

View File

@@ -1,36 +1,32 @@
import {
HttpEvent,
HttpHandler,
HttpInterceptor,
HttpHandlerFn,
HttpInterceptorFn,
HttpRequest,
} from '@angular/common/http'
import { inject, Injectable } from '@angular/core'
import { inject } from '@angular/core'
import { Meta } from '@angular/platform-browser'
import { CookieService } from 'ngx-cookie-service'
import { Observable } from 'rxjs'
@Injectable()
export class CsrfInterceptor implements HttpInterceptor {
private cookieService: CookieService = inject(CookieService)
private meta: Meta = inject(Meta)
export const withCsrfInterceptor: HttpInterceptorFn = (
request: HttpRequest<unknown>,
next: HttpHandlerFn
): Observable<HttpEvent<unknown>> => {
const cookieService: CookieService = inject(CookieService)
const meta: Meta = inject(Meta)
intercept(
request: HttpRequest<unknown>,
next: HttpHandler
): Observable<HttpEvent<unknown>> {
let prefix = ''
if (this.meta.getTag('name=cookie_prefix')) {
prefix = this.meta.getTag('name=cookie_prefix').content
}
let csrfToken = this.cookieService.get(`${prefix}csrftoken`)
if (csrfToken) {
request = request.clone({
setHeaders: {
'X-CSRFToken': csrfToken,
},
})
}
return next.handle(request)
let prefix = ''
if (meta.getTag('name=cookie_prefix')) {
prefix = meta.getTag('name=cookie_prefix').content
}
let csrfToken = cookieService.get(`${prefix}csrftoken`)
if (csrfToken) {
request = request.clone({
setHeaders: {
'X-CSRFToken': csrfToken,
},
})
}
return next(request)
}

View File

@@ -8,9 +8,9 @@ import {
import { DragDropModule } from '@angular/cdk/drag-drop'
import { DatePipe, registerLocaleData } from '@angular/common'
import {
HTTP_INTERCEPTORS,
provideHttpClient,
withFetch,
withInterceptors,
withInterceptorsFromDi,
} from '@angular/common/http'
import { FormsModule, ReactiveFormsModule } from '@angular/forms'
@@ -151,8 +151,8 @@ import { AppComponent } from './app/app.component'
import { DirtyDocGuard } from './app/guards/dirty-doc.guard'
import { DirtySavedViewGuard } from './app/guards/dirty-saved-view.guard'
import { PermissionsGuard } from './app/guards/permissions.guard'
import { ApiVersionInterceptor } from './app/interceptors/api-version.interceptor'
import { CsrfInterceptor } from './app/interceptors/csrf.interceptor'
import { withApiVersionInterceptor } from './app/interceptors/api-version.interceptor'
import { withCsrfInterceptor } from './app/interceptors/csrf.interceptor'
import { DocumentTitlePipe } from './app/pipes/document-title.pipe'
import { FilterPipe } from './app/pipes/filter.pipe'
import { UsernamePipe } from './app/pipes/username.pipe'
@@ -381,16 +381,6 @@ bootstrapApplication(AppComponent, {
provideAppInitializer(initializeApp),
DatePipe,
CookieService,
{
provide: HTTP_INTERCEPTORS,
useClass: CsrfInterceptor,
multi: true,
},
{
provide: HTTP_INTERCEPTORS,
useClass: ApiVersionInterceptor,
multi: true,
},
FilterPipe,
DocumentTitlePipe,
{ provide: NgbDateAdapter, useClass: ISODateAdapter },
@@ -402,6 +392,10 @@ bootstrapApplication(AppComponent, {
CorrespondentNamePipe,
DocumentTypeNamePipe,
StoragePathNamePipe,
provideHttpClient(withInterceptorsFromDi(), withFetch()),
provideHttpClient(
withInterceptorsFromDi(),
withInterceptors([withCsrfInterceptor, withApiVersionInterceptor]),
withFetch()
),
],
}).catch((err) => console.error(err))

View File

@@ -16,7 +16,6 @@ from pikepdf import Pdf
from documents.converters import convert_from_tiff_to_pdf
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.models import Document
from documents.models import Tag
from documents.plugins.base import ConsumeTaskPlugin
from documents.plugins.base import StopConsumeTaskError
@@ -116,24 +115,6 @@ class BarcodePlugin(ConsumeTaskPlugin):
self._tiff_conversion_done = False
self.barcodes: list[Barcode] = []
def _apply_detected_asn(self, detected_asn: int) -> None:
"""
Apply a detected ASN to metadata if allowed.
"""
if (
self.metadata.skip_asn_if_exists
and Document.global_objects.filter(
archive_serial_number=detected_asn,
).exists()
):
logger.info(
f"Found ASN in barcode {detected_asn} but skipping because it already exists.",
)
return
logger.info(f"Found ASN in barcode: {detected_asn}")
self.metadata.asn = detected_asn
def run(self) -> None:
# Some operations may use PIL, override pixel setting if needed
maybe_override_pixel_limit()
@@ -205,8 +186,13 @@ class BarcodePlugin(ConsumeTaskPlugin):
# Update/overwrite an ASN if possible
# After splitting, as otherwise each split document gets the same ASN
if self.settings.barcode_enable_asn and (located_asn := self.asn) is not None:
self._apply_detected_asn(located_asn)
if (
self.settings.barcode_enable_asn
and not self.metadata.skip_asn
and (located_asn := self.asn) is not None
):
logger.info(f"Found ASN in barcode: {located_asn}")
self.metadata.asn = located_asn
def cleanup(self) -> None:
self.temp_dir.cleanup()

View File

@@ -7,6 +7,7 @@ from pathlib import Path
from typing import TYPE_CHECKING
from typing import Literal
from celery import chain
from celery import chord
from celery import group
from celery import shared_task
@@ -37,42 +38,6 @@ if TYPE_CHECKING:
logger: logging.Logger = logging.getLogger("paperless.bulk_edit")
@shared_task(bind=True)
def restore_archive_serial_numbers_task(
self,
backup: dict[int, int],
*args,
**kwargs,
) -> None:
restore_archive_serial_numbers(backup)
def release_archive_serial_numbers(doc_ids: list[int]) -> dict[int, int]:
"""
Clears ASNs on documents that are about to be replaced so new documents
can be assigned ASNs without uniqueness collisions. Returns a backup map
of doc_id -> previous ASN for potential restoration.
"""
qs = Document.objects.filter(
id__in=doc_ids,
archive_serial_number__isnull=False,
).only("pk", "archive_serial_number")
backup = dict(qs.values_list("pk", "archive_serial_number"))
qs.update(archive_serial_number=None)
logger.info(f"Released archive serial numbers for documents {list(backup.keys())}")
return backup
def restore_archive_serial_numbers(backup: dict[int, int]) -> None:
"""
Restores ASNs using the provided backup map, intended for
rollback when replacement consumption fails.
"""
for doc_id, asn in backup.items():
Document.objects.filter(pk=doc_id).update(archive_serial_number=asn)
logger.info(f"Restored archive serial numbers for documents {list(backup.keys())}")
def set_correspondent(
doc_ids: list[int],
correspondent: Correspondent,
@@ -421,7 +386,6 @@ def merge(
merged_pdf = pikepdf.new()
version: str = merged_pdf.pdf_version
handoff_asn: int | None = None
# use doc_ids to preserve order
for doc_id in doc_ids:
doc = qs.get(id=doc_id)
@@ -437,8 +401,6 @@ def merge(
version = max(version, pdf.pdf_version)
merged_pdf.pages.extend(pdf.pages)
affected_docs.append(doc.id)
if handoff_asn is None and doc.archive_serial_number is not None:
handoff_asn = doc.archive_serial_number
except Exception as e:
logger.exception(
f"Error merging document {doc.id}, it will not be included in the merge: {e}",
@@ -464,8 +426,6 @@ def merge(
DocumentMetadataOverrides.from_document(metadata_document)
)
overrides.title = metadata_document.title + " (merged)"
if metadata_document.archive_serial_number is not None:
handoff_asn = metadata_document.archive_serial_number
else:
overrides = DocumentMetadataOverrides()
else:
@@ -473,11 +433,8 @@ def merge(
if user is not None:
overrides.owner_id = user.id
if not delete_originals:
overrides.skip_asn_if_exists = True
if delete_originals and handoff_asn is not None:
overrides.asn = handoff_asn
# Avoid copying or detecting ASN from merged PDFs to prevent collision
overrides.skip_asn = True
logger.info("Adding merged document to the task queue.")
@@ -490,20 +447,12 @@ def merge(
)
if delete_originals:
backup = release_archive_serial_numbers(affected_docs)
logger.info(
"Queueing removal of original documents after consumption of merged document",
)
try:
consume_task.apply_async(
link=[delete.si(affected_docs)],
link_error=[restore_archive_serial_numbers_task.s(backup)],
)
except Exception:
restore_archive_serial_numbers(backup)
raise
else:
consume_task.delay()
chain(consume_task, delete.si(affected_docs)).delay()
else:
consume_task.delay()
return "OK"
@@ -545,8 +494,6 @@ def split(
overrides.title = f"{doc.title} (split {idx + 1})"
if user is not None:
overrides.owner_id = user.id
if not delete_originals:
overrides.skip_asn_if_exists = True
logger.info(
f"Adding split document with pages {split_doc} to the task queue.",
)
@@ -561,20 +508,10 @@ def split(
)
if delete_originals:
backup = release_archive_serial_numbers([doc.id])
logger.info(
"Queueing removal of original document after consumption of the split documents",
)
try:
chord(
header=consume_tasks,
body=delete.si([doc.id]),
).apply_async(
link_error=[restore_archive_serial_numbers_task.s(backup)],
)
except Exception:
restore_archive_serial_numbers(backup)
raise
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
else:
group(consume_tasks).delay()
@@ -677,10 +614,7 @@ def edit_pdf(
)
if user is not None:
overrides.owner_id = user.id
if not delete_original:
overrides.skip_asn_if_exists = True
if delete_original and len(pdf_docs) == 1:
overrides.asn = doc.archive_serial_number
for idx, pdf in enumerate(pdf_docs, start=1):
filepath: Path = (
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
@@ -699,17 +633,7 @@ def edit_pdf(
)
if delete_original:
backup = release_archive_serial_numbers([doc.id])
try:
chord(
header=consume_tasks,
body=delete.si([doc.id]),
).apply_async(
link_error=[restore_archive_serial_numbers_task.s(backup)],
)
except Exception:
restore_archive_serial_numbers(backup)
raise
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
else:
group(consume_tasks).delay()

View File

@@ -32,12 +32,12 @@ from documents.models import WorkflowTrigger
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import get_parser_class_for_mime_type
from documents.parsers import parse_date
from documents.permissions import set_permissions_for_object
from documents.plugins.base import AlwaysRunPluginMixin
from documents.plugins.base import ConsumeTaskPlugin
from documents.plugins.base import NoCleanupPluginMixin
from documents.plugins.base import NoSetupPluginMixin
from documents.plugins.date_parsing import get_date_parser
from documents.plugins.helpers import ProgressManager
from documents.plugins.helpers import ProgressStatusOptions
from documents.signals import document_consumption_finished
@@ -426,7 +426,8 @@ class ConsumerPlugin(
ProgressStatusOptions.WORKING,
ConsumerStatusShortMessage.PARSE_DATE,
)
date = parse_date(self.filename, text)
with get_date_parser() as date_parser:
date = next(date_parser.parse(self.filename, text), None)
archive_path = document_parser.get_archive_path()
page_count = document_parser.get_page_count(self.working_copy, mime_type)
@@ -690,7 +691,7 @@ class ConsumerPlugin(
pk=self.metadata.storage_path_id,
)
if self.metadata.asn is not None:
if self.metadata.asn is not None and not self.metadata.skip_asn:
document.archive_serial_number = self.metadata.asn
if self.metadata.owner_id:
@@ -832,8 +833,8 @@ class ConsumerPreflightPlugin(
"""
Check that if override_asn is given, it is unique and within a valid range
"""
if self.metadata.asn is None:
# if ASN is None
if self.metadata.skip_asn or self.metadata.asn is None:
# if skip is set or ASN is None
return
# Validate the range is above zero and less than uint32_t max
# otherwise, Whoosh can't handle it in the index

View File

@@ -30,7 +30,7 @@ class DocumentMetadataOverrides:
change_users: list[int] | None = None
change_groups: list[int] | None = None
custom_fields: dict | None = None
skip_asn_if_exists: bool = False
skip_asn: bool = False
def update(self, other: "DocumentMetadataOverrides") -> "DocumentMetadataOverrides":
"""
@@ -50,8 +50,8 @@ class DocumentMetadataOverrides:
self.storage_path_id = other.storage_path_id
if other.owner_id is not None:
self.owner_id = other.owner_id
if other.skip_asn_if_exists:
self.skip_asn_if_exists = True
if other.skip_asn:
self.skip_asn = True
# merge
if self.tag_ids is None:

View File

@@ -501,9 +501,22 @@ class Command(BaseCommand):
stability_timeout_ms = int(stability_delay * 1000)
testing_timeout_ms = int(self.testing_timeout_s * 1000)
# Start with no timeout (wait indefinitely for first event)
# unless in testing mode
timeout_ms = testing_timeout_ms if is_testing else 0
# Calculate appropriate timeout for watch loop
# In polling mode, rust_timeout must be significantly longer than poll_delay_ms
# to ensure poll cycles can complete before timing out
if is_testing:
if use_polling:
# For polling: timeout must be at least 3x the poll interval to allow
# multiple poll cycles. This prevents timeouts from interfering with
# the polling mechanism.
min_polling_timeout_ms = poll_delay_ms * 3
timeout_ms = max(min_polling_timeout_ms, testing_timeout_ms)
else:
# For native watching, use short timeout to check stop flag
timeout_ms = testing_timeout_ms
else:
# Not testing, wait indefinitely for first event
timeout_ms = 0
self.stop_flag.clear()
@@ -543,8 +556,14 @@ class Command(BaseCommand):
# Check pending files at stability interval
timeout_ms = stability_timeout_ms
elif is_testing:
# In testing, use short timeout to check stop flag
timeout_ms = testing_timeout_ms
# In testing, use appropriate timeout based on watch mode
if use_polling:
# For polling: ensure timeout allows polls to complete
min_polling_timeout_ms = poll_delay_ms * 3
timeout_ms = max(min_polling_timeout_ms, testing_timeout_ms)
else:
# For native watching, use short timeout to check stop flag
timeout_ms = testing_timeout_ms
else: # pragma: nocover
# No pending files, wait indefinitely
timeout_ms = 0

View File

@@ -9,22 +9,17 @@ import subprocess
import tempfile
from functools import lru_cache
from pathlib import Path
from re import Match
from typing import TYPE_CHECKING
from django.conf import settings
from django.utils import timezone
from documents.loggers import LoggingMixin
from documents.signals import document_consumer_declaration
from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess
from paperless.config import OcrConfig
from paperless.utils import ocr_to_dateparser_languages
if TYPE_CHECKING:
import datetime
from collections.abc import Iterator
# This regular expression will try to find dates in the document at
# hand and will match the following formats:
@@ -259,75 +254,6 @@ def make_thumbnail_from_pdf(in_path: Path, temp_dir: Path, logging_group=None) -
return out_path
def parse_date(filename, text) -> datetime.datetime | None:
return next(parse_date_generator(filename, text), None)
def parse_date_generator(filename, text) -> Iterator[datetime.datetime]:
"""
Returns the date of the document.
"""
def __parser(ds: str, date_order: str) -> datetime.datetime:
"""
Call dateparser.parse with a particular date ordering
"""
import dateparser
ocr_config = OcrConfig()
languages = settings.DATE_PARSER_LANGUAGES or ocr_to_dateparser_languages(
ocr_config.language,
)
return dateparser.parse(
ds,
settings={
"DATE_ORDER": date_order,
"PREFER_DAY_OF_MONTH": "first",
"RETURN_AS_TIMEZONE_AWARE": True,
"TIMEZONE": settings.TIME_ZONE,
},
locales=languages,
)
def __filter(date: datetime.datetime) -> datetime.datetime | None:
if (
date is not None
and date.year > 1900
and date <= timezone.now()
and date.date() not in settings.IGNORE_DATES
):
return date
return None
def __process_match(
match: Match[str],
date_order: str,
) -> datetime.datetime | None:
date_string = match.group(0)
try:
date = __parser(date_string, date_order)
except Exception:
# Skip all matches that do not parse to a proper date
date = None
return __filter(date)
def __process_content(content: str, date_order: str) -> Iterator[datetime.datetime]:
for m in re.finditer(DATE_REGEX, content):
date = __process_match(m, date_order)
if date is not None:
yield date
# if filename date parsing is enabled, search there first:
if settings.FILENAME_DATE_ORDER:
yield from __process_content(filename, settings.FILENAME_DATE_ORDER)
# Iterate through all regex matches in text and try to parse the date
yield from __process_content(text, settings.DATE_ORDER)
class ParseError(Exception):
pass

View File

@@ -0,0 +1,92 @@
import logging
from functools import lru_cache
from importlib.metadata import EntryPoint
from importlib.metadata import entry_points
from typing import Final
from django.conf import settings
from django.utils import timezone
from documents.plugins.date_parsing.base import DateParserConfig
from documents.plugins.date_parsing.base import DateParserPluginBase
from documents.plugins.date_parsing.regex_parser import RegexDateParserPlugin
from paperless.utils import ocr_to_dateparser_languages
logger = logging.getLogger(__name__)
DATE_PARSER_ENTRY_POINT_GROUP: Final = "paperless_ngx.date_parsers"
@lru_cache(maxsize=1)
def _discover_parser_class() -> type[DateParserPluginBase]:
"""
Discovers the date parser plugin class to use.
- If one or more plugins are found, sorts them by name and returns the first.
- If no plugins are found, returns the default RegexDateParser.
"""
eps: tuple[EntryPoint, ...]
try:
eps = entry_points(group=DATE_PARSER_ENTRY_POINT_GROUP)
except Exception as e:
# Log a warning
logger.warning(f"Could not query entry points for date parsers: {e}")
eps = ()
valid_plugins: list[EntryPoint] = []
for ep in eps:
try:
plugin_class = ep.load()
if plugin_class and issubclass(plugin_class, DateParserPluginBase):
valid_plugins.append(ep)
else:
logger.warning(f"Plugin {ep.name} does not subclass DateParser.")
except Exception as e:
logger.error(f"Unable to load date parser plugin {ep.name}: {e}")
if not valid_plugins:
return RegexDateParserPlugin
valid_plugins.sort(key=lambda ep: ep.name)
if len(valid_plugins) > 1:
logger.warning(
f"Multiple date parsers found: "
f"{[ep.name for ep in valid_plugins]}. "
f"Using the first one by name: '{valid_plugins[0].name}'.",
)
return valid_plugins[0].load()
def get_date_parser() -> DateParserPluginBase:
"""
Factory function to get an initialized date parser instance.
This function is responsible for:
1. Discovering the correct parser class (plugin or default).
2. Loading configuration from Django settings.
3. Instantiating the parser with the configuration.
"""
# 1. Discover the class (this is cached)
parser_class = _discover_parser_class()
# 2. Load configuration from settings
# TODO: Get the language from the settings and/or configuration object, depending
languages = languages = (
settings.DATE_PARSER_LANGUAGES
or ocr_to_dateparser_languages(settings.OCR_LANGUAGE)
)
config = DateParserConfig(
languages=languages,
timezone_str=settings.TIME_ZONE,
ignore_dates=settings.IGNORE_DATES,
reference_time=timezone.now(),
filename_date_order=settings.FILENAME_DATE_ORDER,
content_date_order=settings.DATE_ORDER,
)
# 3. Instantiate the discovered class with the config
return parser_class(config=config)

View File

@@ -0,0 +1,124 @@
import datetime
import logging
from abc import ABC
from abc import abstractmethod
from collections.abc import Iterator
from dataclasses import dataclass
from types import TracebackType
try:
from typing import Self
except ImportError:
from typing_extensions import Self
import dateparser
logger = logging.getLogger(__name__)
@dataclass(frozen=True, slots=True)
class DateParserConfig:
"""
Configuration for a DateParser instance.
This object is created by the factory and passed to the
parser's constructor, decoupling the parser from settings.
"""
languages: list[str]
timezone_str: str
ignore_dates: set[datetime.date]
# A "now" timestamp for filtering future dates.
# Passed in by the factory.
reference_time: datetime.datetime
# Settings for the default RegexDateParser
# Other plugins should use or consider these, but it is not required
filename_date_order: str | None
content_date_order: str
class DateParserPluginBase(ABC):
"""
Abstract base class for date parsing strategies.
Instances are configured via a DateParserConfig object.
"""
def __init__(self, config: DateParserConfig):
"""
Initializes the parser with its configuration.
"""
self.config = config
def __enter__(self) -> Self:
"""
Enter the runtime context related to this object.
Subclasses can override this to acquire resources (connections, handles).
"""
return self
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_val: BaseException | None,
exc_tb: TracebackType | None,
) -> None:
"""
Exit the runtime context related to this object.
Subclasses can override this to release resources.
"""
# Default implementation does nothing.
# Returning None implies exceptions are propagated.
def _parse_string(
self,
date_string: str,
date_order: str,
) -> datetime.datetime | None:
"""
Helper method to parse a single date string using dateparser.
Uses configuration from `self.config`.
"""
try:
return dateparser.parse(
date_string,
settings={
"DATE_ORDER": date_order,
"PREFER_DAY_OF_MONTH": "first",
"RETURN_AS_TIMEZONE_AWARE": True,
"TIMEZONE": self.config.timezone_str,
},
locales=self.config.languages,
)
except Exception as e:
logger.error(f"Error while parsing date string '{date_string}': {e}")
return None
def _filter_date(
self,
date: datetime.datetime | None,
) -> datetime.datetime | None:
"""
Helper method to validate a parsed datetime object.
Uses configuration from `self.config`.
"""
if (
date is not None
and date.year > 1900
and date <= self.config.reference_time
and date.date() not in self.config.ignore_dates
):
return date
return None
@abstractmethod
def parse(self, filename: str, content: str) -> Iterator[datetime.datetime]:
"""
Parses a document's filename and content, yielding valid datetime objects.
"""

View File

@@ -0,0 +1,65 @@
import datetime
import re
from collections.abc import Iterator
from re import Match
from documents.plugins.date_parsing.base import DateParserPluginBase
class RegexDateParserPlugin(DateParserPluginBase):
"""
The default date parser, using a series of regular expressions.
It is configured entirely by the DateParserConfig object
passed to its constructor.
"""
DATE_REGEX = re.compile(
r"(\b|(?!=([_-])))(\d{1,2})[\.\/-](\d{1,2})[\.\/-](\d{4}|\d{2})(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))(\d{4}|\d{2})[\.\/-](\d{1,2})[\.\/-](\d{1,2})(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))(\d{1,2}[\. ]+[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{4}|[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{1,2}, \d{4})(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{1,2}, (\d{4}))(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{4})(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))(\d{1,2}[^ 0-9]{2}[\. ]+[^ ]{3,9}[ \.\/-]\d{4})(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))(\b\d{1,2}[ \.\/-][a-zéûäëčžúřěáíóńźçŞğü]{3}[ \.\/-]\d{4})(\b|(?=([_-])))",
re.IGNORECASE,
)
def _process_match(
self,
match: Match[str],
date_order: str,
) -> datetime.datetime | None:
"""
Processes a single regex match using the base class helpers.
"""
date_string = match.group(0)
date = self._parse_string(date_string, date_order)
return self._filter_date(date)
def _process_content(
self,
content: str,
date_order: str,
) -> Iterator[datetime.datetime]:
"""
Finds all regex matches in content and yields valid dates.
"""
for m in re.finditer(self.DATE_REGEX, content):
date = self._process_match(m, date_order)
if date is not None:
yield date
def parse(self, filename: str, content: str) -> Iterator[datetime.datetime]:
"""
Implementation of the abstract parse method.
Reads its configuration from `self.config`.
"""
if self.config.filename_date_order:
yield from self._process_content(
filename,
self.config.filename_date_order,
)
yield from self._process_content(content, self.config.content_date_order)

View File

@@ -0,0 +1,82 @@
import datetime
from collections.abc import Generator
from typing import Any
import pytest
import pytest_django
from documents.plugins.date_parsing import _discover_parser_class
from documents.plugins.date_parsing.base import DateParserConfig
from documents.plugins.date_parsing.regex_parser import RegexDateParserPlugin
@pytest.fixture
def base_config() -> DateParserConfig:
"""Basic configuration for date parser testing."""
return DateParserConfig(
languages=["en"],
timezone_str="UTC",
ignore_dates=set(),
reference_time=datetime.datetime(
2024,
1,
15,
12,
0,
0,
tzinfo=datetime.timezone.utc,
),
filename_date_order="YMD",
content_date_order="DMY",
)
@pytest.fixture
def config_with_ignore_dates() -> DateParserConfig:
"""Configuration with dates to ignore."""
return DateParserConfig(
languages=["en", "de"],
timezone_str="America/New_York",
ignore_dates={datetime.date(2024, 1, 1), datetime.date(2024, 12, 25)},
reference_time=datetime.datetime(
2024,
1,
15,
12,
0,
0,
tzinfo=datetime.timezone.utc,
),
filename_date_order="DMY",
content_date_order="MDY",
)
@pytest.fixture
def regex_parser(base_config: DateParserConfig) -> RegexDateParserPlugin:
"""Instance of RegexDateParser with base config."""
return RegexDateParserPlugin(base_config)
@pytest.fixture
def clear_lru_cache() -> Generator[None, None, None]:
"""
Ensure the LRU cache for _discover_parser_class is cleared
before and after any test that depends on it.
"""
_discover_parser_class.cache_clear()
yield
_discover_parser_class.cache_clear()
@pytest.fixture
def mock_date_parser_settings(settings: pytest_django.fixtures.SettingsWrapper) -> Any:
"""
Override Django settings for the duration of date parser tests.
"""
settings.DATE_PARSER_LANGUAGES = ["en", "de"]
settings.TIME_ZONE = "UTC"
settings.IGNORE_DATES = [datetime.date(1900, 1, 1)]
settings.FILENAME_DATE_ORDER = "YMD"
settings.DATE_ORDER = "DMY"
return settings

View File

@@ -0,0 +1,228 @@
import datetime
import logging
from collections.abc import Iterator
from importlib.metadata import EntryPoint
import pytest
import pytest_mock
from django.utils import timezone
from documents.plugins.date_parsing import DATE_PARSER_ENTRY_POINT_GROUP
from documents.plugins.date_parsing import _discover_parser_class
from documents.plugins.date_parsing import get_date_parser
from documents.plugins.date_parsing.base import DateParserConfig
from documents.plugins.date_parsing.base import DateParserPluginBase
from documents.plugins.date_parsing.regex_parser import RegexDateParserPlugin
class AlphaParser(DateParserPluginBase):
def parse(self, filename: str, content: str) -> Iterator[datetime.datetime]:
yield timezone.now()
class BetaParser(DateParserPluginBase):
def parse(self, filename: str, content: str) -> Iterator[datetime.datetime]:
yield timezone.now()
@pytest.mark.date_parsing
@pytest.mark.usefixtures("clear_lru_cache")
class TestDiscoverParserClass:
"""Tests for the _discover_parser_class() function."""
def test_returns_default_when_no_plugins_found(
self,
mocker: pytest_mock.MockerFixture,
) -> None:
mocker.patch(
"documents.plugins.date_parsing.entry_points",
return_value=(),
)
result = _discover_parser_class()
assert result is RegexDateParserPlugin
def test_returns_default_when_entrypoint_query_fails(
self,
mocker: pytest_mock.MockerFixture,
caplog: pytest.LogCaptureFixture,
) -> None:
mocker.patch(
"documents.plugins.date_parsing.entry_points",
side_effect=RuntimeError("boom"),
)
result = _discover_parser_class()
assert result is RegexDateParserPlugin
assert "Could not query entry points" in caplog.text
def test_filters_out_invalid_plugins(
self,
mocker: pytest_mock.MockerFixture,
caplog: pytest.LogCaptureFixture,
) -> None:
fake_ep = mocker.MagicMock(spec=EntryPoint)
fake_ep.name = "bad_plugin"
fake_ep.load.return_value = object # not subclass of DateParser
mocker.patch(
"documents.plugins.date_parsing.entry_points",
return_value=(fake_ep,),
)
result = _discover_parser_class()
assert result is RegexDateParserPlugin
assert "does not subclass DateParser" in caplog.text
def test_skips_plugins_that_fail_to_load(
self,
mocker: pytest_mock.MockerFixture,
caplog: pytest.LogCaptureFixture,
) -> None:
fake_ep = mocker.MagicMock(spec=EntryPoint)
fake_ep.name = "failing_plugin"
fake_ep.load.side_effect = ImportError("cannot import")
mocker.patch(
"documents.plugins.date_parsing.entry_points",
return_value=(fake_ep,),
)
result = _discover_parser_class()
assert result is RegexDateParserPlugin
assert "Unable to load date parser plugin failing_plugin" in caplog.text
def test_returns_single_valid_plugin_without_warning(
self,
mocker: pytest_mock.MockerFixture,
caplog: pytest.LogCaptureFixture,
) -> None:
"""If exactly one valid plugin is discovered, it should be returned without logging a warning."""
ep = mocker.MagicMock(spec=EntryPoint)
ep.name = "alpha"
ep.load.return_value = AlphaParser
mock_entry_points = mocker.patch(
"documents.plugins.date_parsing.entry_points",
return_value=(ep,),
)
with caplog.at_level(
logging.WARNING,
logger="documents.plugins.date_parsing",
):
result = _discover_parser_class()
# It should have called entry_points with the correct group
mock_entry_points.assert_called_once_with(group=DATE_PARSER_ENTRY_POINT_GROUP)
# The discovered class should be exactly our AlphaParser
assert result is AlphaParser
# No warnings should have been logged
assert not any(
"Multiple date parsers found" in record.message for record in caplog.records
), "Unexpected warning logged when only one plugin was found"
def test_returns_first_valid_plugin_by_name(
self,
mocker: pytest_mock.MockerFixture,
) -> None:
ep_a = mocker.MagicMock(spec=EntryPoint)
ep_a.name = "alpha"
ep_a.load.return_value = AlphaParser
ep_b = mocker.MagicMock(spec=EntryPoint)
ep_b.name = "beta"
ep_b.load.return_value = BetaParser
mocker.patch(
"documents.plugins.date_parsing.entry_points",
return_value=(ep_b, ep_a),
)
result = _discover_parser_class()
assert result is AlphaParser
def test_logs_warning_if_multiple_plugins_found(
self,
mocker: pytest_mock.MockerFixture,
caplog: pytest.LogCaptureFixture,
) -> None:
ep1 = mocker.MagicMock(spec=EntryPoint)
ep1.name = "a"
ep1.load.return_value = AlphaParser
ep2 = mocker.MagicMock(spec=EntryPoint)
ep2.name = "b"
ep2.load.return_value = BetaParser
mocker.patch(
"documents.plugins.date_parsing.entry_points",
return_value=(ep1, ep2),
)
with caplog.at_level(
logging.WARNING,
logger="documents.plugins.date_parsing",
):
result = _discover_parser_class()
# Should select alphabetically first plugin ("a")
assert result is AlphaParser
# Should log a warning mentioning multiple parsers
assert any(
"Multiple date parsers found" in record.message for record in caplog.records
), "Expected a warning about multiple date parsers"
def test_cache_behavior_only_runs_once(
self,
mocker: pytest_mock.MockerFixture,
) -> None:
mock_entry_points = mocker.patch(
"documents.plugins.date_parsing.entry_points",
return_value=(),
)
# First call populates cache
_discover_parser_class()
# Second call should not re-invoke entry_points
_discover_parser_class()
mock_entry_points.assert_called_once()
@pytest.mark.date_parsing
@pytest.mark.usefixtures("mock_date_parser_settings")
class TestGetDateParser:
"""Tests for the get_date_parser() factory function."""
def test_returns_instance_of_discovered_class(
self,
mocker: pytest_mock.MockerFixture,
) -> None:
mocker.patch(
"documents.plugins.date_parsing._discover_parser_class",
return_value=AlphaParser,
)
parser = get_date_parser()
assert isinstance(parser, AlphaParser)
assert isinstance(parser.config, DateParserConfig)
assert parser.config.languages == ["en", "de"]
assert parser.config.timezone_str == "UTC"
assert parser.config.ignore_dates == [datetime.date(1900, 1, 1)]
assert parser.config.filename_date_order == "YMD"
assert parser.config.content_date_order == "DMY"
# Check reference_time near now
delta = abs((parser.config.reference_time - timezone.now()).total_seconds())
assert delta < 2
def test_uses_default_regex_parser_when_no_plugins(
self,
mocker: pytest_mock.MockerFixture,
) -> None:
mocker.patch(
"documents.plugins.date_parsing._discover_parser_class",
return_value=RegexDateParserPlugin,
)
parser = get_date_parser()
assert isinstance(parser, RegexDateParserPlugin)

View File

@@ -0,0 +1,433 @@
import datetime
import logging
from typing import Any
import pytest
import pytest_mock
from documents.plugins.date_parsing.base import DateParserConfig
from documents.plugins.date_parsing.regex_parser import RegexDateParserPlugin
@pytest.mark.date_parsing
class TestParseString:
"""Tests for DateParser._parse_string method via RegexDateParser."""
@pytest.mark.parametrize(
("date_string", "date_order", "expected_year"),
[
pytest.param("15/01/2024", "DMY", 2024, id="dmy_slash"),
pytest.param("01/15/2024", "MDY", 2024, id="mdy_slash"),
pytest.param("2024/01/15", "YMD", 2024, id="ymd_slash"),
pytest.param("January 15, 2024", "DMY", 2024, id="month_name_comma"),
pytest.param("15 Jan 2024", "DMY", 2024, id="day_abbr_month_year"),
pytest.param("15.01.2024", "DMY", 2024, id="dmy_dot"),
pytest.param("2024-01-15", "YMD", 2024, id="ymd_dash"),
],
)
def test_parse_string_valid_formats(
self,
regex_parser: RegexDateParserPlugin,
date_string: str,
date_order: str,
expected_year: int,
) -> None:
"""Should correctly parse various valid date formats."""
result = regex_parser._parse_string(date_string, date_order)
assert result is not None
assert result.year == expected_year
@pytest.mark.parametrize(
"invalid_string",
[
pytest.param("not a date", id="plain_text"),
pytest.param("32/13/2024", id="invalid_day_month"),
pytest.param("", id="empty_string"),
pytest.param("abc123xyz", id="alphanumeric_gibberish"),
pytest.param("99/99/9999", id="out_of_range"),
],
)
def test_parse_string_invalid_input(
self,
regex_parser: RegexDateParserPlugin,
invalid_string: str,
) -> None:
"""Should return None for invalid date strings."""
result = regex_parser._parse_string(invalid_string, "DMY")
assert result is None
def test_parse_string_handles_exceptions(
self,
caplog: pytest.LogCaptureFixture,
mocker: pytest_mock.MockerFixture,
regex_parser: RegexDateParserPlugin,
) -> None:
"""Should handle and log exceptions from dateparser gracefully."""
with caplog.at_level(
logging.ERROR,
logger="documents.plugins.date_parsing.base",
):
# We still need to mock dateparser.parse to force the exception
mocker.patch(
"documents.plugins.date_parsing.base.dateparser.parse",
side_effect=ValueError(
"Parsing error: 01/01/2024",
),
)
# 1. Execute the function under test
result = regex_parser._parse_string("01/01/2024", "DMY")
assert result is None
# Check if an error was logged
assert len(caplog.records) == 1
assert caplog.records[0].levelname == "ERROR"
# Check if the specific error message is present
assert "Error while parsing date string" in caplog.text
# Optional: Check for the exact exception message if it's included in the log
assert "Parsing error: 01/01/2024" in caplog.text
@pytest.mark.date_parsing
class TestFilterDate:
"""Tests for DateParser._filter_date method via RegexDateParser."""
@pytest.mark.parametrize(
("date", "expected_output"),
[
# Valid Dates
pytest.param(
datetime.datetime(2024, 1, 10, tzinfo=datetime.timezone.utc),
datetime.datetime(2024, 1, 10, tzinfo=datetime.timezone.utc),
id="valid_past_date",
),
pytest.param(
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.timezone.utc),
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.timezone.utc),
id="exactly_at_reference",
),
pytest.param(
datetime.datetime(1901, 1, 1, tzinfo=datetime.timezone.utc),
datetime.datetime(1901, 1, 1, tzinfo=datetime.timezone.utc),
id="year_1901_valid",
),
# Date is > reference_time
pytest.param(
datetime.datetime(2024, 1, 16, tzinfo=datetime.timezone.utc),
None,
id="future_date_day_after",
),
# date.date() in ignore_dates
pytest.param(
datetime.datetime(2024, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
None,
id="ignored_date_midnight_jan1",
),
pytest.param(
datetime.datetime(2024, 1, 1, 10, 30, 0, tzinfo=datetime.timezone.utc),
None,
id="ignored_date_midday_jan1",
),
pytest.param(
datetime.datetime(2024, 12, 25, 15, 0, 0, tzinfo=datetime.timezone.utc),
None,
id="ignored_date_dec25_future",
),
# date.year <= 1900
pytest.param(
datetime.datetime(1899, 12, 31, tzinfo=datetime.timezone.utc),
None,
id="year_1899",
),
pytest.param(
datetime.datetime(1900, 1, 1, tzinfo=datetime.timezone.utc),
None,
id="year_1900_boundary",
),
# date is None
pytest.param(None, None, id="none_input"),
],
)
def test_filter_date_validation_rules(
self,
config_with_ignore_dates: DateParserConfig,
date: datetime.datetime | None,
expected_output: datetime.datetime | None,
) -> None:
"""Should correctly validate dates against various rules."""
parser = RegexDateParserPlugin(config_with_ignore_dates)
result = parser._filter_date(date)
assert result == expected_output
def test_filter_date_respects_ignore_dates(
self,
config_with_ignore_dates: DateParserConfig,
) -> None:
"""Should filter out dates in the ignore_dates set."""
parser = RegexDateParserPlugin(config_with_ignore_dates)
ignored_date = datetime.datetime(
2024,
1,
1,
12,
0,
tzinfo=datetime.timezone.utc,
)
another_ignored = datetime.datetime(
2024,
12,
25,
15,
30,
tzinfo=datetime.timezone.utc,
)
allowed_date = datetime.datetime(
2024,
1,
2,
12,
0,
tzinfo=datetime.timezone.utc,
)
assert parser._filter_date(ignored_date) is None
assert parser._filter_date(another_ignored) is None
assert parser._filter_date(allowed_date) == allowed_date
def test_filter_date_timezone_aware(
self,
regex_parser: RegexDateParserPlugin,
) -> None:
"""Should work with timezone-aware datetimes."""
date_utc = datetime.datetime(2024, 1, 10, 12, 0, tzinfo=datetime.timezone.utc)
result = regex_parser._filter_date(date_utc)
assert result is not None
assert result.tzinfo is not None
@pytest.mark.date_parsing
class TestRegexDateParser:
@pytest.mark.parametrize(
("filename", "content", "expected"),
[
pytest.param(
"report-2023-12-25.txt",
"Event recorded on 25/12/2022.",
[
datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc),
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
],
id="filename-y-m-d_and_content-d-m-y",
),
pytest.param(
"img_2023.01.02.jpg",
"Taken on 01/02/2023",
[
datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc),
datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc),
],
id="ambiguous-dates-respect-orders",
),
pytest.param(
"notes.txt",
"bad date 99/99/9999 and 25/12/2022",
[
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
],
id="parse-exception-skips-bad-and-yields-good",
),
],
)
def test_parse_returns_expected_dates(
self,
base_config: DateParserConfig,
mocker: pytest_mock.MockerFixture,
filename: str,
content: str,
expected: list[datetime.datetime],
) -> None:
"""
High-level tests that exercise RegexDateParser.parse only.
dateparser.parse is mocked so tests are deterministic.
"""
parser = RegexDateParserPlugin(base_config)
# Patch the dateparser.parse
target = "documents.plugins.date_parsing.base.dateparser.parse"
def fake_parse(
date_string: str,
settings: dict[str, Any] | None = None,
locales: None = None,
) -> datetime.datetime | None:
date_order = settings.get("DATE_ORDER") if settings else None
# Filename-style YYYY-MM-DD / YYYY.MM.DD
if (
"2023-12-25" in date_string
or "2023.12.25" in date_string
or "2023-12-25" in date_string
):
return datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc)
# content DMY 25/12/2022
if "25/12/2022" in date_string or "25-12-2022" in date_string:
return datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc)
# filename YMD 2023.01.02
if "2023.01.02" in date_string or "2023-01-02" in date_string:
return datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc)
# ambiguous 01/02/2023 -> respect DATE_ORDER setting
if "01/02/2023" in date_string:
if date_order == "DMY":
return datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc)
if date_order == "YMD":
return datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc)
# fallback
return datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc)
# simulate parse failure for malformed input
if "99/99/9999" in date_string or "bad date" in date_string:
raise Exception("parse failed for malformed date")
return None
mocker.patch(target, side_effect=fake_parse)
results = list(parser.parse(filename, content))
assert results == expected
for dt in results:
assert dt.tzinfo is not None
def test_parse_filters_future_and_ignored_dates(
self,
mocker: pytest_mock.MockerFixture,
) -> None:
"""
Ensure parser filters out:
- dates after reference_time
- dates whose .date() are in ignore_dates
"""
cfg = DateParserConfig(
languages=["en"],
timezone_str="UTC",
ignore_dates={datetime.date(2023, 12, 10)},
reference_time=datetime.datetime(
2024,
1,
15,
12,
0,
0,
tzinfo=datetime.timezone.utc,
),
filename_date_order="YMD",
content_date_order="DMY",
)
parser = RegexDateParserPlugin(cfg)
target = "documents.plugins.date_parsing.base.dateparser.parse"
def fake_parse(
date_string: str,
settings: dict[str, Any] | None = None,
locales: None = None,
) -> datetime.datetime | None:
if "10/12/2023" in date_string or "10-12-2023" in date_string:
# ignored date
return datetime.datetime(2023, 12, 10, tzinfo=datetime.timezone.utc)
if "01/02/2024" in date_string or "01-02-2024" in date_string:
# future relative to reference_time -> filtered
return datetime.datetime(2024, 2, 1, tzinfo=datetime.timezone.utc)
if "05/01/2023" in date_string or "05-01-2023" in date_string:
# valid
return datetime.datetime(2023, 1, 5, tzinfo=datetime.timezone.utc)
return None
mocker.patch(target, side_effect=fake_parse)
content = "Ignored: 10/12/2023, Future: 01/02/2024, Keep: 05/01/2023"
results = list(parser.parse("whatever.txt", content))
assert results == [datetime.datetime(2023, 1, 5, tzinfo=datetime.timezone.utc)]
def test_parse_handles_no_matches_and_returns_empty_list(
self,
base_config: DateParserConfig,
) -> None:
"""
When there are no matching date-like substrings, parse should yield nothing.
"""
parser = RegexDateParserPlugin(base_config)
results = list(
parser.parse("no-dates.txt", "this has no dates whatsoever"),
)
assert results == []
def test_parse_skips_filename_when_filename_date_order_none(
self,
mocker: pytest_mock.MockerFixture,
) -> None:
"""
When filename_date_order is None the parser must not attempt to parse the filename.
Only dates found in the content should be passed to dateparser.parse.
"""
cfg = DateParserConfig(
languages=["en"],
timezone_str="UTC",
ignore_dates=set(),
reference_time=datetime.datetime(
2024,
1,
15,
12,
0,
0,
tzinfo=datetime.timezone.utc,
),
filename_date_order=None,
content_date_order="DMY",
)
parser = RegexDateParserPlugin(cfg)
# Patch the module's dateparser.parse so we can inspect calls
target = "documents.plugins.date_parsing.base.dateparser.parse"
def fake_parse(
date_string: str,
settings: dict[str, Any] | None = None,
locales: None = None,
) -> datetime.datetime | None:
# return distinct datetimes so we can tell which source was parsed
if "25/12/2022" in date_string:
return datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc)
if "2023-12-25" in date_string:
return datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc)
return None
mock = mocker.patch(target, side_effect=fake_parse)
filename = "report-2023-12-25.txt"
content = "Event recorded on 25/12/2022."
results = list(parser.parse(filename, content))
# Only the content date should have been parsed -> one call
assert mock.call_count == 1
# # first call, first positional arg
called_date_string = mock.call_args_list[0][0][0]
assert "25/12/2022" in called_date_string
# And the parser should have yielded the corresponding datetime
assert results == [
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
]

View File

@@ -1978,11 +1978,11 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@mock.patch("documents.parsers.parse_date_generator")
@mock.patch("documents.views.get_date_parser")
@override_settings(NUMBER_OF_SUGGESTED_DATES=0)
def test_get_suggestions_dates_disabled(
self,
parse_date_generator,
mock_get_date_parser: mock.MagicMock,
):
"""
GIVEN:
@@ -1999,7 +1999,8 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
)
self.client.get(f"/api/documents/{doc.pk}/suggestions/")
self.assertFalse(parse_date_generator.called)
mock_get_date_parser.assert_not_called()
def test_saved_views(self):
u1 = User.objects.create_superuser("user1")

View File

@@ -603,21 +603,23 @@ class TestPDFActions(DirectoriesMixin, TestCase):
expected_filename,
)
self.assertEqual(consume_file_args[1].title, None)
# No metadata_document_id, delete_originals False, so ASN should be None
self.assertIsNone(consume_file_args[1].asn)
self.assertTrue(consume_file_args[1].skip_asn)
# With metadata_document_id overrides
result = bulk_edit.merge(doc_ids, metadata_document_id=metadata_document_id)
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(consume_file_args[1].title, "B (merged)")
self.assertEqual(consume_file_args[1].created, self.doc2.created)
self.assertTrue(consume_file_args[1].skip_asn)
self.assertEqual(result, "OK")
@mock.patch("documents.bulk_edit.delete.si")
@mock.patch("documents.tasks.consume_file.s")
@mock.patch("documents.bulk_edit.chain")
def test_merge_and_delete_originals(
self,
mock_chain,
mock_consume_file,
mock_delete_documents,
):
@@ -631,12 +633,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
- Document deletion task should be called
"""
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
self.doc1.archive_serial_number = 101
self.doc2.archive_serial_number = 102
self.doc3.archive_serial_number = 103
self.doc1.save()
self.doc2.save()
self.doc3.save()
result = bulk_edit.merge(doc_ids, delete_originals=True)
self.assertEqual(result, "OK")
@@ -647,8 +643,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
mock_consume_file.assert_called()
mock_delete_documents.assert_called()
consume_sig = mock_consume_file.return_value
consume_sig.apply_async.assert_called_once()
mock_chain.assert_called_once()
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(
@@ -656,7 +651,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
expected_filename,
)
self.assertEqual(consume_file_args[1].title, None)
self.assertEqual(consume_file_args[1].asn, 101)
self.assertTrue(consume_file_args[1].skip_asn)
delete_documents_args, _ = mock_delete_documents.call_args
self.assertEqual(
@@ -664,92 +659,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
doc_ids,
)
self.doc1.refresh_from_db()
self.doc2.refresh_from_db()
self.doc3.refresh_from_db()
self.assertIsNone(self.doc1.archive_serial_number)
self.assertIsNone(self.doc2.archive_serial_number)
self.assertIsNone(self.doc3.archive_serial_number)
@mock.patch("documents.bulk_edit.delete.si")
@mock.patch("documents.tasks.consume_file.s")
def test_merge_and_delete_originals_restore_on_failure(
self,
mock_consume_file,
mock_delete_documents,
):
"""
GIVEN:
- Existing documents
WHEN:
- Merge action with deleting documents is called with 1 document
- Error occurs when queuing consume file task
THEN:
- Archive serial numbers are restored
"""
doc_ids = [self.doc1.id]
self.doc1.archive_serial_number = 111
self.doc1.save()
sig = mock.Mock()
sig.apply_async.side_effect = Exception("boom")
mock_consume_file.return_value = sig
with self.assertRaises(Exception):
bulk_edit.merge(doc_ids, delete_originals=True)
self.doc1.refresh_from_db()
self.assertEqual(self.doc1.archive_serial_number, 111)
@mock.patch("documents.bulk_edit.delete.si")
@mock.patch("documents.tasks.consume_file.s")
def test_merge_and_delete_originals_metadata_handoff(
self,
mock_consume_file,
mock_delete_documents,
):
"""
GIVEN:
- Existing documents with ASNs
WHEN:
- Merge with delete_originals=True and metadata_document_id set
THEN:
- Handoff ASN uses metadata document ASN
"""
doc_ids = [self.doc1.id, self.doc2.id]
self.doc1.archive_serial_number = 101
self.doc2.archive_serial_number = 202
self.doc1.save()
self.doc2.save()
result = bulk_edit.merge(
doc_ids,
metadata_document_id=self.doc2.id,
delete_originals=True,
)
self.assertEqual(result, "OK")
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(consume_file_args[1].asn, 202)
def test_restore_archive_serial_numbers_task(self):
"""
GIVEN:
- Existing document with no archive serial number
WHEN:
- Restore archive serial number task is called with backup data
THEN:
- Document archive serial number is restored
"""
self.doc1.archive_serial_number = 444
self.doc1.save()
Document.objects.filter(pk=self.doc1.id).update(archive_serial_number=None)
backup = {self.doc1.id: 444}
bulk_edit.restore_archive_serial_numbers_task(backup)
self.doc1.refresh_from_db()
self.assertEqual(self.doc1.archive_serial_number, 444)
@mock.patch("documents.tasks.consume_file.s")
def test_merge_with_archive_fallback(self, mock_consume_file):
"""
@@ -818,7 +727,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertEqual(mock_consume_file.call_count, 2)
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(consume_file_args[1].title, "B (split 2)")
self.assertIsNone(consume_file_args[1].asn)
self.assertEqual(result, "OK")
@@ -843,8 +751,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
"""
doc_ids = [self.doc2.id]
pages = [[1, 2], [3]]
self.doc2.archive_serial_number = 200
self.doc2.save()
result = bulk_edit.split(doc_ids, pages, delete_originals=True)
self.assertEqual(result, "OK")
@@ -862,42 +768,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
doc_ids,
)
self.doc2.refresh_from_db()
self.assertIsNone(self.doc2.archive_serial_number)
@mock.patch("documents.bulk_edit.delete.si")
@mock.patch("documents.tasks.consume_file.s")
@mock.patch("documents.bulk_edit.chord")
def test_split_restore_on_failure(
self,
mock_chord,
mock_consume_file,
mock_delete_documents,
):
"""
GIVEN:
- Existing documents
WHEN:
- Split action with deleting documents is called with 1 document and 2 page groups
- Error occurs when queuing chord task
THEN:
- Archive serial numbers are restored
"""
doc_ids = [self.doc2.id]
pages = [[1, 2]]
self.doc2.archive_serial_number = 222
self.doc2.save()
sig = mock.Mock()
sig.apply_async.side_effect = Exception("boom")
mock_chord.return_value = sig
result = bulk_edit.split(doc_ids, pages, delete_originals=True)
self.assertEqual(result, "OK")
self.doc2.refresh_from_db()
self.assertEqual(self.doc2.archive_serial_number, 222)
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.Pdf.save")
def test_split_with_errors(self, mock_save_pdf, mock_consume_file):
@@ -1098,49 +968,10 @@ class TestPDFActions(DirectoriesMixin, TestCase):
mock_chord.return_value.delay.return_value = None
doc_ids = [self.doc2.id]
operations = [{"page": 1}, {"page": 2}]
self.doc2.archive_serial_number = 250
self.doc2.save()
result = bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)
self.assertEqual(result, "OK")
mock_chord.assert_called_once()
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(consume_file_args[1].asn, 250)
self.doc2.refresh_from_db()
self.assertIsNone(self.doc2.archive_serial_number)
@mock.patch("documents.bulk_edit.delete.si")
@mock.patch("documents.tasks.consume_file.s")
@mock.patch("documents.bulk_edit.chord")
def test_edit_pdf_restore_on_failure(
self,
mock_chord,
mock_consume_file,
mock_delete_documents,
):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with delete_original=True
- Error occurs when queuing chord task
THEN:
- Archive serial numbers are restored
"""
doc_ids = [self.doc2.id]
operations = [{"page": 1}]
self.doc2.archive_serial_number = 333
self.doc2.save()
sig = mock.Mock()
sig.apply_async.side_effect = Exception("boom")
mock_chord.return_value = sig
with self.assertRaises(Exception):
bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)
self.doc2.refresh_from_db()
self.assertEqual(self.doc2.archive_serial_number, 333)
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
def test_edit_pdf_with_update_document(self, mock_update_document):

View File

@@ -14,7 +14,6 @@ from django.test import override_settings
from django.utils import timezone
from guardian.core import ObjectPermissionChecker
from documents.barcodes import BarcodePlugin
from documents.consumer import ConsumerError
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
@@ -413,6 +412,14 @@ class TestConsumer(
self.assertEqual(document.archive_serial_number, 123)
self._assert_first_last_send_progress()
def testMetadataOverridesSkipAsnPropagation(self):
overrides = DocumentMetadataOverrides()
incoming = DocumentMetadataOverrides(skip_asn=True)
overrides.update(incoming)
self.assertTrue(overrides.skip_asn)
def testOverrideTitlePlaceholders(self):
c = Correspondent.objects.create(name="Correspondent Name")
dt = DocumentType.objects.create(name="DocType Name")
@@ -1264,46 +1271,3 @@ class PostConsumeTestCase(DirectoriesMixin, GetConsumerMixin, TestCase):
r"sample\.pdf: Error while executing post-consume script: Command '\[.*\]' returned non-zero exit status \d+\.",
):
consumer.run_post_consume_script(doc)
class TestMetadataOverrides(TestCase):
def test_update_skip_asn_if_exists(self):
base = DocumentMetadataOverrides()
incoming = DocumentMetadataOverrides(skip_asn_if_exists=True)
base.update(incoming)
self.assertTrue(base.skip_asn_if_exists)
class TestBarcodeApplyDetectedASN(TestCase):
"""
GIVEN:
- Existing Documents with ASN 123
WHEN:
- A BarcodePlugin which detected an ASN
THEN:
- If skip_asn_if_exists is set, and ASN exists, do not set ASN
- If skip_asn_if_exists is set, and ASN does not exist, set ASN
"""
def test_apply_detected_asn_skips_existing_when_flag_set(self):
doc = Document.objects.create(
checksum="X1",
title="D1",
archive_serial_number=123,
)
metadata = DocumentMetadataOverrides(skip_asn_if_exists=True)
plugin = BarcodePlugin(
input_doc=mock.Mock(),
metadata=metadata,
status_mgr=mock.Mock(),
base_tmp_dir=tempfile.gettempdir(),
task_id="test-task",
)
plugin._apply_detected_asn(123)
self.assertIsNone(plugin.metadata.asn)
doc.hard_delete()
plugin._apply_detected_asn(123)
self.assertEqual(plugin.metadata.asn, 123)

View File

@@ -1,538 +0,0 @@
import datetime
from zoneinfo import ZoneInfo
import pytest
from pytest_django.fixtures import SettingsWrapper
from documents.parsers import parse_date
from documents.parsers import parse_date_generator
@pytest.mark.django_db()
class TestDate:
def test_date_format_1(self):
text = "lorem ipsum 130218 lorem ipsum"
assert parse_date("", text) is None
def test_date_format_2(self):
text = "lorem ipsum 2018 lorem ipsum"
assert parse_date("", text) is None
def test_date_format_3(self):
text = "lorem ipsum 20180213 lorem ipsum"
assert parse_date("", text) is None
def test_date_format_4(self, settings_timezone: ZoneInfo):
text = "lorem ipsum 13.02.2018 lorem ipsum"
date = parse_date("", text)
assert date == datetime.datetime(2018, 2, 13, 0, 0, tzinfo=settings_timezone)
def test_date_format_5(self, settings_timezone: ZoneInfo):
text = "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem ipsum"
date = parse_date("", text)
assert date == datetime.datetime(2018, 2, 13, 0, 0, tzinfo=settings_timezone)
def test_date_format_6(self):
text = (
"lorem ipsum\n"
"Wohnort\n"
"3100\n"
"IBAN\n"
"AT87 4534\n"
"1234\n"
"1234 5678\n"
"BIC\n"
"lorem ipsum"
)
assert parse_date("", text) is None
def test_date_format_7(
self,
settings: SettingsWrapper,
settings_timezone: ZoneInfo,
):
settings.DATE_PARSER_LANGUAGES = ["de"]
text = "lorem ipsum\nMärz 2019\nlorem ipsum"
date = parse_date("", text)
assert date == datetime.datetime(2019, 3, 1, 0, 0, tzinfo=settings_timezone)
def test_date_format_8(
self,
settings: SettingsWrapper,
settings_timezone: ZoneInfo,
):
settings.DATE_PARSER_LANGUAGES = ["de"]
text = (
"lorem ipsum\n"
"Wohnort\n"
"3100\n"
"IBAN\n"
"AT87 4534\n"
"1234\n"
"1234 5678\n"
"BIC\n"
"lorem ipsum\n"
"März 2020"
)
assert parse_date("", text) == datetime.datetime(
2020,
3,
1,
0,
0,
tzinfo=settings_timezone,
)
def test_date_format_9(
self,
settings: SettingsWrapper,
settings_timezone: ZoneInfo,
):
settings.DATE_PARSER_LANGUAGES = ["de"]
text = "lorem ipsum\n27. Nullmonth 2020\nMärz 2020\nlorem ipsum"
assert parse_date("", text) == datetime.datetime(
2020,
3,
1,
0,
0,
tzinfo=settings_timezone,
)
def test_date_format_10(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 22-MAR-2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime(
2022,
3,
22,
0,
0,
tzinfo=settings_timezone,
)
def test_date_format_11(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 22 MAR 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime(
2022,
3,
22,
0,
0,
tzinfo=settings_timezone,
)
def test_date_format_12(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 22/MAR/2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime(
2022,
3,
22,
0,
0,
tzinfo=settings_timezone,
)
def test_date_format_13(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 22.MAR.2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime(
2022,
3,
22,
0,
0,
tzinfo=settings_timezone,
)
def test_date_format_14(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 22.MAR 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime(
2022,
3,
22,
0,
0,
tzinfo=settings_timezone,
)
def test_date_format_15(self):
text = "Customer Number Currency 22.MAR.22 Credit Card 1934829304"
assert parse_date("", text) is None
def test_date_format_16(self):
text = "Customer Number Currency 22.MAR,22 Credit Card 1934829304"
assert parse_date("", text) is None
def test_date_format_17(self):
text = "Customer Number Currency 22,MAR,2022 Credit Card 1934829304"
assert parse_date("", text) is None
def test_date_format_18(self):
text = "Customer Number Currency 22 MAR,2022 Credit Card 1934829304"
assert parse_date("", text) is None
def test_date_format_19(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 21st MAR 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime(
2022,
3,
21,
0,
0,
tzinfo=settings_timezone,
)
def test_date_format_20(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 22nd March 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime(
2022,
3,
22,
0,
0,
tzinfo=settings_timezone,
)
def test_date_format_21(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 2nd MAR 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime(
2022,
3,
2,
0,
0,
tzinfo=settings_timezone,
)
def test_date_format_22(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 23rd MAR 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime(
2022,
3,
23,
0,
0,
tzinfo=settings_timezone,
)
def test_date_format_23(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 24th MAR 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime(
2022,
3,
24,
0,
0,
tzinfo=settings_timezone,
)
def test_date_format_24(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 21-MAR-2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime(
2022,
3,
21,
0,
0,
tzinfo=settings_timezone,
)
def test_date_format_25(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 25TH MAR 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime(
2022,
3,
25,
0,
0,
tzinfo=settings_timezone,
)
def test_date_format_26(self, settings_timezone: ZoneInfo):
text = "CHASE 0 September 25, 2019 JPMorgan Chase Bank, NA. P0 Box 182051"
assert parse_date("", text) == datetime.datetime(
2019,
9,
25,
0,
0,
tzinfo=settings_timezone,
)
def test_crazy_date_past(self):
assert parse_date("", "01-07-0590 00:00:00") is None
def test_crazy_date_future(self):
assert parse_date("", "01-07-2350 00:00:00") is None
def test_crazy_date_with_spaces(self):
assert parse_date("", "20 408000l 2475") is None
def test_utf_month_names(
self,
settings: SettingsWrapper,
settings_timezone: ZoneInfo,
):
settings.DATE_PARSER_LANGUAGES = ["fr", "de", "hr", "cs", "pl", "tr"]
assert parse_date("", "13 décembre 2023") == datetime.datetime(
2023,
12,
13,
0,
0,
tzinfo=settings_timezone,
)
assert parse_date("", "13 août 2022") == datetime.datetime(
2022,
8,
13,
0,
0,
tzinfo=settings_timezone,
)
assert parse_date("", "11 März 2020") == datetime.datetime(
2020,
3,
11,
0,
0,
tzinfo=settings_timezone,
)
assert parse_date("", "17. ožujka 2018.") == datetime.datetime(
2018,
3,
17,
0,
0,
tzinfo=settings_timezone,
)
assert parse_date("", "1. veljače 2016.") == datetime.datetime(
2016,
2,
1,
0,
0,
tzinfo=settings_timezone,
)
assert parse_date("", "15. února 1985") == datetime.datetime(
1985,
2,
15,
0,
0,
tzinfo=settings_timezone,
)
assert parse_date("", "30. září 2011") == datetime.datetime(
2011,
9,
30,
0,
0,
tzinfo=settings_timezone,
)
assert parse_date("", "28. května 1990") == datetime.datetime(
1990,
5,
28,
0,
0,
tzinfo=settings_timezone,
)
assert parse_date("", "1. grudzień 1997") == datetime.datetime(
1997,
12,
1,
0,
0,
tzinfo=settings_timezone,
)
assert parse_date("", "17 Şubat 2024") == datetime.datetime(
2024,
2,
17,
0,
0,
tzinfo=settings_timezone,
)
assert parse_date("", "30 Ağustos 2012") == datetime.datetime(
2012,
8,
30,
0,
0,
tzinfo=settings_timezone,
)
assert parse_date("", "17 Eylül 2000") == datetime.datetime(
2000,
9,
17,
0,
0,
tzinfo=settings_timezone,
)
assert parse_date("", "5. október 1992") == datetime.datetime(
1992,
10,
5,
0,
0,
tzinfo=settings_timezone,
)
def test_multiple_dates(self, settings_timezone: ZoneInfo):
text = """This text has multiple dates.
For example 02.02.2018, 22 July 2022 and December 2021.
But not 24-12-9999 because it's in the future..."""
dates = list(parse_date_generator("", text))
assert dates == [
datetime.datetime(2018, 2, 2, 0, 0, tzinfo=settings_timezone),
datetime.datetime(
2022,
7,
22,
0,
0,
tzinfo=settings_timezone,
),
datetime.datetime(
2021,
12,
1,
0,
0,
tzinfo=settings_timezone,
),
]
def test_filename_date_parse_valid_ymd(
self,
settings: SettingsWrapper,
settings_timezone: ZoneInfo,
):
"""
GIVEN:
- Date parsing from the filename is enabled
- Filename date format is with Year Month Day (YMD)
- Filename contains date matching the format
THEN:
- Should parse the date from the filename
"""
settings.FILENAME_DATE_ORDER = "YMD"
assert parse_date(
"/tmp/Scan-2022-04-01.pdf",
"No date in here",
) == datetime.datetime(2022, 4, 1, 0, 0, tzinfo=settings_timezone)
def test_filename_date_parse_valid_dmy(
self,
settings: SettingsWrapper,
settings_timezone: ZoneInfo,
):
"""
GIVEN:
- Date parsing from the filename is enabled
- Filename date format is with Day Month Year (DMY)
- Filename contains date matching the format
THEN:
- Should parse the date from the filename
"""
settings.FILENAME_DATE_ORDER = "DMY"
assert parse_date(
"/tmp/Scan-10.01.2021.pdf",
"No date in here",
) == datetime.datetime(2021, 1, 10, 0, 0, tzinfo=settings_timezone)
def test_filename_date_parse_invalid(self, settings: SettingsWrapper):
"""
GIVEN:
- Date parsing from the filename is enabled
- Filename includes no date
- File content includes no date
THEN:
- No date is parsed
"""
settings.FILENAME_DATE_ORDER = "YMD"
assert parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here") is None
def test_filename_date_ignored_use_content(
self,
settings: SettingsWrapper,
settings_timezone: ZoneInfo,
):
"""
GIVEN:
- Date parsing from the filename is enabled
- Filename date format is with Day Month Year (YMD)
- Date order is Day Month Year (DMY, the default)
- Filename contains date matching the format
- Filename date is an ignored date
- File content includes a date
THEN:
- Should parse the date from the content not filename
"""
settings.FILENAME_DATE_ORDER = "YMD"
settings.IGNORE_DATES = (datetime.date(2022, 4, 1),)
assert parse_date(
"/tmp/Scan-2022-04-01.pdf",
"The matching date is 24.03.2022",
) == datetime.datetime(2022, 3, 24, 0, 0, tzinfo=settings_timezone)
def test_ignored_dates_default_order(
self,
settings: SettingsWrapper,
settings_timezone: ZoneInfo,
):
"""
GIVEN:
- Ignore dates have been set
- File content includes ignored dates
- File content includes 1 non-ignored date
THEN:
- Should parse the date non-ignored date from content
"""
settings.IGNORE_DATES = (datetime.date(2019, 11, 3), datetime.date(2020, 1, 17))
text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem ipsum"
assert parse_date("", text) == datetime.datetime(
2018,
2,
13,
0,
0,
tzinfo=settings_timezone,
)
def test_ignored_dates_order_ymd(
self,
settings: SettingsWrapper,
settings_timezone: ZoneInfo,
):
"""
GIVEN:
- Ignore dates have been set
- Date order is Year Month Date (YMD)
- File content includes ignored dates
- File content includes 1 non-ignored date
THEN:
- Should parse the date non-ignored date from content
"""
settings.FILENAME_DATE_ORDER = "YMD"
settings.IGNORE_DATES = (datetime.date(2019, 11, 3), datetime.date(2020, 1, 17))
text = "lorem ipsum 190311, 20200117 and lorem 13.02.2018 lorem ipsum"
assert parse_date("", text) == datetime.datetime(
2018,
2,
13,
0,
0,
tzinfo=settings_timezone,
)

View File

@@ -114,6 +114,30 @@ def mock_supported_extensions(mocker: MockerFixture) -> MagicMock:
)
def wait_for_mock_call(
mock_obj: MagicMock,
timeout_s: float = 5.0,
poll_interval_s: float = 0.1,
) -> bool:
"""
Actively wait for a mock to be called.
Args:
mock_obj: The mock object to check (e.g., mock.delay)
timeout_s: Maximum time to wait in seconds
poll_interval_s: How often to check in seconds
Returns:
True if mock was called within timeout, False otherwise
"""
start_time = monotonic()
while monotonic() - start_time < timeout_s:
if mock_obj.called:
return True
sleep(poll_interval_s)
return False
class TestTrackedFile:
"""Tests for the TrackedFile dataclass."""
@@ -724,7 +748,7 @@ def start_consumer(
thread = ConsumerThread(consumption_dir, scratch_dir, **kwargs)
threads.append(thread)
thread.start()
sleep(0.5) # Give thread time to start
sleep(2.0) # Give thread time to start
return thread
try:
@@ -767,7 +791,8 @@ class TestCommandWatch:
target = consumption_dir / "document.pdf"
shutil.copy(sample_pdf, target)
sleep(0.5)
wait_for_mock_call(mock_consume_file_delay.delay, timeout_s=2.0)
if thread.exception:
raise thread.exception
@@ -788,9 +813,12 @@ class TestCommandWatch:
thread = start_consumer()
sleep(0.5)
target = consumption_dir / "document.pdf"
shutil.move(temp_location, target)
sleep(0.5)
wait_for_mock_call(mock_consume_file_delay.delay, timeout_s=2.0)
if thread.exception:
raise thread.exception
@@ -816,7 +844,7 @@ class TestCommandWatch:
f.flush()
sleep(0.05)
sleep(0.5)
wait_for_mock_call(mock_consume_file_delay.delay, timeout_s=2.0)
if thread.exception:
raise thread.exception
@@ -837,7 +865,7 @@ class TestCommandWatch:
(consumption_dir / "._document.pdf").write_bytes(b"test")
shutil.copy(sample_pdf, consumption_dir / "valid.pdf")
sleep(0.5)
wait_for_mock_call(mock_consume_file_delay.delay, timeout_s=2.0)
if thread.exception:
raise thread.exception
@@ -868,11 +896,10 @@ class TestCommandWatch:
assert not thread.is_alive()
@pytest.mark.django_db
class TestCommandWatchPolling:
"""Tests for polling mode."""
@pytest.mark.django_db
@pytest.mark.flaky(reruns=2)
def test_polling_mode_works(
self,
consumption_dir: Path,
@@ -882,7 +909,8 @@ class TestCommandWatchPolling:
) -> None:
"""
Test polling mode detects files.
Note: At times, there appears to be a timing issue, where delay has not yet been called, hence this is marked as flaky.
Uses active waiting with timeout to handle CI delays and polling timing.
"""
# Use shorter polling interval for faster test
thread = start_consumer(polling_interval=0.5, stability_delay=0.1)
@@ -890,9 +918,9 @@ class TestCommandWatchPolling:
target = consumption_dir / "document.pdf"
shutil.copy(sample_pdf, target)
# Wait for: poll interval + stability delay + another poll + margin
# CI can be slow, so use generous timeout
sleep(3.0)
# Actively wait for consumption
# Polling needs: interval (0.5s) + stability (0.1s) + next poll (0.5s) + margin
wait_for_mock_call(mock_consume_file_delay.delay, timeout_s=5.0)
if thread.exception:
raise thread.exception
@@ -919,7 +947,8 @@ class TestCommandWatchRecursive:
target = subdir / "document.pdf"
shutil.copy(sample_pdf, target)
sleep(0.5)
wait_for_mock_call(mock_consume_file_delay.delay, timeout_s=2.0)
if thread.exception:
raise thread.exception
@@ -948,7 +977,8 @@ class TestCommandWatchRecursive:
target = subdir / "document.pdf"
shutil.copy(sample_pdf, target)
sleep(0.5)
wait_for_mock_call(mock_consume_file_delay.delay, timeout_s=2.0)
if thread.exception:
raise thread.exception

View File

@@ -148,7 +148,6 @@ from documents.models import Workflow
from documents.models import WorkflowAction
from documents.models import WorkflowTrigger
from documents.parsers import get_parser_class_for_mime_type
from documents.parsers import parse_date_generator
from documents.permissions import AcknowledgeTasksPermissions
from documents.permissions import PaperlessAdminPermissions
from documents.permissions import PaperlessNotePermissions
@@ -158,6 +157,7 @@ from documents.permissions import get_document_count_filter_for_user
from documents.permissions import get_objects_for_user_owner_aware
from documents.permissions import has_perms_owner_aware
from documents.permissions import set_permissions_for_object
from documents.plugins.date_parsing import get_date_parser
from documents.schema import generate_object_with_permissions_schema
from documents.serialisers import AcknowledgeTasksViewSerializer
from documents.serialisers import BulkDownloadSerializer
@@ -1023,16 +1023,17 @@ class DocumentViewSet(
dates = []
if settings.NUMBER_OF_SUGGESTED_DATES > 0:
gen = parse_date_generator(doc.filename, doc.content)
dates = sorted(
{
i
for i in itertools.islice(
gen,
settings.NUMBER_OF_SUGGESTED_DATES,
)
},
)
with get_date_parser() as date_parser:
gen = date_parser.parse(doc.filename, doc.content)
dates = sorted(
{
i
for i in itertools.islice(
gen,
settings.NUMBER_OF_SUGGESTED_DATES,
)
},
)
resp_data = {
"correspondents": [

View File

@@ -89,3 +89,11 @@ def greenmail_mail_account(db: None) -> Generator[MailAccount, None, None]:
@pytest.fixture()
def mail_account_handler() -> MailAccountHandler:
return MailAccountHandler()
@pytest.fixture(scope="session")
def nginx_base_url() -> Generator[str, None, None]:
"""
The base URL for the nginx HTTP server we expect to be alive
"""
yield "http://localhost:8080"

View File

@@ -55,7 +55,7 @@ Content-Transfer-Encoding: 7bit
<p>Some Text</p>
<p>
<img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt="Has to be rewritten to work..">
<img src="https://docs.paperless-ngx.com/assets/logo_full_white.svg" alt="This image should not be shown.">
<img src="http://localhost:8080/assets/logo_full_white.svg" alt="This image should not be shown.">
</p>
<p>and an embedded image.<br>

View File

@@ -6,7 +6,7 @@
<p>Some Text</p>
<p>
<img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt="Has to be rewritten to work..">
<img src="https://docs.paperless-ngx.com/assets/logo_full_white.svg" alt="This image should not be shown.">
<img src="http://localhost:8080/assets/logo_full_white.svg" alt="This image should not be shown.">
</p>
<p>and an embedded image.<br>

View File

@@ -6,6 +6,8 @@ from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
@pytest.mark.live
@pytest.mark.greenmail
@pytest.mark.django_db
class TestMailGreenmail:
"""

View File

@@ -17,7 +17,7 @@ from paperless_mail.parsers import MailDocumentParser
def extract_text(pdf_path: Path) -> str:
"""
Using pdftotext from poppler, extracts the text of a PDF into a file,
then reads the file contents and returns it
then reads the file contents and returns it.
"""
with tempfile.NamedTemporaryFile(
mode="w+",
@@ -38,71 +38,107 @@ def extract_text(pdf_path: Path) -> str:
class MailAttachmentMock:
def __init__(self, payload, content_id):
def __init__(self, payload: bytes, content_id: str) -> None:
self.payload = payload
self.content_id = content_id
self.content_type = "image/png"
@pytest.mark.live
@pytest.mark.nginx
@pytest.mark.skipif(
"PAPERLESS_CI_TEST" not in os.environ,
reason="No Gotenberg/Tika servers to test with",
)
class TestUrlCanary:
class TestNginxService:
"""
Verify certain URLs are still available so testing is valid still
Verify the local nginx server is responding correctly.
These tests validate that the test infrastructure is working properly
before running the actual parser tests that depend on HTTP resources.
"""
def test_online_image_exception_on_not_available(self):
def test_non_existent_resource_returns_404(
self,
nginx_base_url: str,
) -> None:
"""
GIVEN:
- Fresh start
- Local nginx server is running
WHEN:
- nonexistent image is requested
- A non-existent resource is requested
THEN:
- An exception shall be thrown
"""
"""
A public image is used in the html sample file. We have no control
whether this image stays online forever, so here we check if we can detect if is not
available anymore.
- An HTTP 404 status code shall be returned
"""
resp = httpx.get(
"https://docs.paperless-ngx.com/assets/non-existent.png",
f"{nginx_base_url}/assets/non-existent.png",
timeout=5.0,
)
with pytest.raises(httpx.HTTPStatusError) as exec_info:
resp.raise_for_status()
assert exec_info.value.response.status_code == httpx.codes.NOT_FOUND
def test_is_online_image_still_available(self):
def test_valid_resource_is_available(
self,
nginx_base_url: str,
) -> None:
"""
GIVEN:
- Fresh start
- Local nginx server is running
WHEN:
- A public image used in the html sample file is requested
- A valid test fixture resource is requested
THEN:
- No exception shall be thrown
- The resource shall be returned with HTTP 200 status code
- The response shall contain the expected content type
"""
"""
A public image is used in the html sample file. We have no control
whether this image stays online forever, so here we check if it is still there
"""
# Now check the URL used in samples/sample.html
resp = httpx.get(
"https://docs.paperless-ngx.com/assets/logo_full_white.svg",
f"{nginx_base_url}/assets/logo_full_white.svg",
timeout=5.0,
)
resp.raise_for_status()
assert resp.status_code == httpx.codes.OK
assert "svg" in resp.headers.get("content-type", "").lower()
def test_server_connectivity(
self,
nginx_base_url: str,
) -> None:
"""
GIVEN:
- Local test fixtures server should be running
WHEN:
- A request is made to the server root
THEN:
- The server shall respond without connection errors
"""
try:
resp = httpx.get(
nginx_base_url,
timeout=5.0,
follow_redirects=True,
)
# We don't care about the status code, just that we can connect
assert resp.status_code in {200, 404, 403}
except httpx.ConnectError as e:
pytest.fail(
f"Cannot connect to nginx server at {nginx_base_url}. "
f"Ensure the nginx container is running via docker-compose.ci-test.yml. "
f"Error: {e}",
)
@pytest.mark.live
@pytest.mark.gotenberg
@pytest.mark.tika
@pytest.mark.nginx
@pytest.mark.skipif(
"PAPERLESS_CI_TEST" not in os.environ,
reason="No Gotenberg/Tika servers to test with",
)
class TestParserLive:
@staticmethod
def imagehash(file, hash_size=18):
def imagehash(file: Path, hash_size: int = 18) -> str:
return f"{average_hash(Image.open(file), hash_size)}"
def test_get_thumbnail(
@@ -112,14 +148,15 @@ class TestParserLive:
simple_txt_email_file: Path,
simple_txt_email_pdf_file: Path,
simple_txt_email_thumbnail_file: Path,
):
) -> None:
"""
GIVEN:
- Fresh start
- A simple text email file
- Mocked PDF generation returning a known PDF
WHEN:
- The Thumbnail is requested
- The thumbnail is requested
THEN:
- The returned thumbnail image file is as expected
- The returned thumbnail image file shall match the expected hash
"""
mock_generate_pdf = mocker.patch(
"paperless_mail.parsers.MailDocumentParser.generate_pdf",
@@ -134,22 +171,28 @@ class TestParserLive:
assert self.imagehash(thumb) == self.imagehash(
simple_txt_email_thumbnail_file,
), (
f"Created Thumbnail {thumb} differs from expected file {simple_txt_email_thumbnail_file}"
f"Created thumbnail {thumb} differs from expected file "
f"{simple_txt_email_thumbnail_file}"
)
def test_tika_parse_successful(self, mail_parser: MailDocumentParser):
def test_tika_parse_successful(self, mail_parser: MailDocumentParser) -> None:
"""
GIVEN:
- Fresh start
- HTML content to parse
- Tika server is running
WHEN:
- tika parsing is called
- Tika parsing is called
THEN:
- a web request to tika shall be done and the reply es returned
- A web request to Tika shall be made
- The parsed text content shall be returned
"""
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
html = (
'<html><head><meta http-equiv="content-type" '
'content="text/html; charset=UTF-8"></head>'
"<body><p>Some Text</p></body></html>"
)
expected_text = "Some Text"
# Check successful parsing
parsed = mail_parser.tika_parse(html)
assert expected_text == parsed.strip()
@@ -160,14 +203,17 @@ class TestParserLive:
html_email_file: Path,
merged_pdf_first: Path,
merged_pdf_second: Path,
):
) -> None:
"""
GIVEN:
- Intermediary pdfs to be merged
- Intermediary PDFs to be merged
- An HTML email file
WHEN:
- pdf generation is requested with html file requiring merging of pdfs
- PDF generation is requested with HTML file requiring merging
THEN:
- gotenberg is called to merge files and the resulting file is returned
- Gotenberg shall be called to merge files
- The resulting merged PDF shall be returned
- The merged PDF shall contain text from both source PDFs
"""
mock_generate_pdf_from_html = mocker.patch(
"paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html",
@@ -200,16 +246,17 @@ class TestParserLive:
html_email_file: Path,
html_email_pdf_file: Path,
html_email_thumbnail_file: Path,
):
) -> None:
"""
GIVEN:
- Fresh start
- An HTML email file
WHEN:
- pdf generation from simple eml file is requested
- PDF generation from the email file is requested
THEN:
- Gotenberg is called and the resulting file is returned and look as expected.
- Gotenberg shall be called to generate the PDF
- The archive PDF shall contain the expected content
- The generated thumbnail shall match the expected image hash
"""
util_call_with_backoff(mail_parser.parse, [html_email_file, "message/rfc822"])
# Check the archive PDF
@@ -217,7 +264,7 @@ class TestParserLive:
archive_text = extract_text(archive_path)
expected_archive_text = extract_text(html_email_pdf_file)
# Archive includes the HTML content, so use in
# Archive includes the HTML content
assert expected_archive_text in archive_text
# Check the thumbnail
@@ -227,9 +274,12 @@ class TestParserLive:
)
generated_thumbnail_hash = self.imagehash(generated_thumbnail)
# The created pdf is not reproducible. But the converted image should always look the same.
# The created PDF is not reproducible, but the converted image
# should always look the same
expected_hash = self.imagehash(html_email_thumbnail_file)
assert generated_thumbnail_hash == expected_hash, (
f"PDF looks different. Check if {generated_thumbnail} looks weird."
f"PDF thumbnail differs from expected. "
f"Generated: {generated_thumbnail}, "
f"Hash: {generated_thumbnail_hash} vs {expected_hash}"
)

3841
uv.lock generated

File diff suppressed because it is too large Load Diff