Compare commits

..

1 Commits

Author SHA1 Message Date
dependabot[bot]
5a0cbab975 Chore(deps-dev): Bump the frontend-eslint-dependencies group
Bumps the frontend-eslint-dependencies group in /src-ui with 3 updates: [@typescript-eslint/eslint-plugin](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/eslint-plugin), [@typescript-eslint/parser](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/parser) and [@typescript-eslint/utils](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/utils).


Updates `@typescript-eslint/eslint-plugin` from 8.47.0 to 8.48.0
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/eslint-plugin/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.48.0/packages/eslint-plugin)

Updates `@typescript-eslint/parser` from 8.47.0 to 8.48.0
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/parser/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.48.0/packages/parser)

Updates `@typescript-eslint/utils` from 8.47.0 to 8.48.0
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/utils/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.48.0/packages/utils)

---
updated-dependencies:
- dependency-name: "@typescript-eslint/eslint-plugin"
  dependency-version: 8.48.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: frontend-eslint-dependencies
- dependency-name: "@typescript-eslint/parser"
  dependency-version: 8.48.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: frontend-eslint-dependencies
- dependency-name: "@typescript-eslint/utils"
  dependency-version: 8.48.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: frontend-eslint-dependencies
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-01 23:08:38 +00:00
40 changed files with 1209 additions and 1914 deletions

View File

@@ -41,56 +41,30 @@ updates:
- "backend"
- "dependencies"
groups:
# Development & CI/CD Tooling
development:
patterns:
- "*pytest*"
- "ruff"
- "mkdocs-material"
- "pre-commit*"
# Django & DRF Ecosystem
django-ecosystem:
django:
patterns:
- "*django*"
- "drf-*"
- "djangorestframework"
- "whitenoise"
- "bleach"
- "jinja2"
# Async, Task Queuing & Caching
async-tasks:
patterns:
- "celery*"
- "channels*"
- "flower"
- "redis"
# Document, PDF, and OCR Processing
document-processing:
patterns:
- "ocrmypdf"
- "pdf2image"
- "pyzbar"
- "zxing-cpp"
- "tika-client"
- "gotenberg-client"
- "python-magic"
- "python-gnupg"
# Data, NLP, and Search
data-nlp-search:
patterns:
- "nltk"
- "scikit-learn"
- "langdetect"
- "rapidfuzz"
- "whoosh-reloaded"
# Utilities (Patch Updates)
utilities-patch:
major-versions:
update-types:
- "patch"
# Utilities (Minor Updates)
utilities-minor:
- "major"
small-changes:
update-types:
- "minor"
- "patch"
exclude-patterns:
- "*django*"
- "drf-*"
pre-built:
patterns:
- psycopg*
- zxing-cpp
# Enable updates for GitHub Actions
- package-ecosystem: "github-actions"
target-branch: "dev"

View File

@@ -67,7 +67,7 @@ jobs:
runs-on: ubuntu-24.04
steps:
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Install python
uses: actions/setup-python@v6
with:
@@ -81,7 +81,7 @@ jobs:
- pre-commit
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Set up Python
id: setup-python
uses: actions/setup-python@v6
@@ -131,7 +131,7 @@ jobs:
fail-fast: false
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Start containers
run: |
docker compose --file ${{ github.workspace }}/docker/compose/docker-compose.ci-test.yml pull --quiet
@@ -202,7 +202,7 @@ jobs:
needs:
- pre-commit
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
@@ -235,7 +235,7 @@ jobs:
shard-index: [1, 2, 3, 4]
shard-count: [4]
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
@@ -284,7 +284,7 @@ jobs:
shard-index: [1, 2]
shard-count: [2]
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
@@ -327,7 +327,7 @@ jobs:
- tests-frontend
- tests-frontend-e2e
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
@@ -424,7 +424,7 @@ jobs:
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
# If https://github.com/docker/buildx/issues/1044 is resolved,
# the append input with a native arm64 arch could be used to
# significantly speed up building
@@ -497,7 +497,7 @@ jobs:
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Set up Python
id: setup-python
uses: actions/setup-python@v6
@@ -643,7 +643,7 @@ jobs:
if: needs.publish-release.outputs.prerelease == 'false'
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
ref: main
- name: Set up Python

View File

@@ -34,7 +34,7 @@ jobs:
# Learn more about CodeQL language support at https://git.io/codeql-language-support
steps:
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@v5
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v4

View File

@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
token: ${{ secrets.PNGX_BOT_PAT }}
- name: crowdin action

View File

@@ -11,7 +11,7 @@ jobs:
contents: write
steps:
- name: Checkout code
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
token: ${{ secrets.PNGX_BOT_PAT }}
ref: ${{ github.head_ref }}

View File

@@ -32,7 +32,7 @@ RUN set -eux \
# Purpose: Installs s6-overlay and rootfs
# Comments:
# - Don't leave anything extra in here either
FROM ghcr.io/astral-sh/uv:0.9.15-python3.12-trixie-slim AS s6-overlay-base
FROM ghcr.io/astral-sh/uv:0.9.14-python3.12-trixie-slim AS s6-overlay-base
WORKDIR /usr/src/s6

View File

@@ -1794,23 +1794,3 @@ password. All of these options come from their similarly-named [Django settings]
#### [`PAPERLESS_EMAIL_USE_SSL=<bool>`](#PAPERLESS_EMAIL_USE_SSL) {#PAPERLESS_EMAIL_USE_SSL}
: Defaults to false.
## Remote OCR
#### [`PAPERLESS_REMOTE_OCR_ENGINE=<str>`](#PAPERLESS_REMOTE_OCR_ENGINE) {#PAPERLESS_REMOTE_OCR_ENGINE}
: The remote OCR engine to use. Currently only Azure AI is supported as "azureai".
Defaults to None, which disables remote OCR.
#### [`PAPERLESS_REMOTE_OCR_API_KEY=<str>`](#PAPERLESS_REMOTE_OCR_API_KEY) {#PAPERLESS_REMOTE_OCR_API_KEY}
: The API key to use for the remote OCR engine.
Defaults to None.
#### [`PAPERLESS_REMOTE_OCR_ENDPOINT=<str>`](#PAPERLESS_REMOTE_OCR_ENDPOINT) {#PAPERLESS_REMOTE_OCR_ENDPOINT}
: The endpoint to use for the remote OCR engine. This is required for Azure AI.
Defaults to None.

View File

@@ -25,10 +25,9 @@ physical documents into a searchable online archive so you can keep, well, _less
## Features
- **Organize and index** your scanned documents with tags, correspondents, types, and more.
- _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way, unless you explicitly choose to do so.
- _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way.
- Performs **OCR** on your documents, adding searchable and selectable text, even to documents scanned with only images.
- Utilizes the open-source Tesseract engine to recognize more than 100 languages.
- _New!_ Supports remote OCR with Azure AI (opt-in).
- Utilizes the open-source Tesseract engine to recognize more than 100 languages.
- Documents are saved as PDF/A format which is designed for long term storage, alongside the unaltered originals.
- Uses machine-learning to automatically add tags, correspondents and document types to your documents.
- Supports PDF documents, images, plain text files, Office documents (Word, Excel, PowerPoint, and LibreOffice equivalents)[^1] and more.

View File

@@ -892,21 +892,6 @@ how regularly you intend to scan documents and use paperless.
performed the task associated with the document, move it to the
inbox.
## Remote OCR
!!! important
This feature is disabled by default and will always remain strictly "opt-in".
Paperless-ngx supports performing OCR on documents using remote services. At the moment, this is limited to
[Microsoft's Azure "Document Intelligence" service](https://azure.microsoft.com/en-us/products/ai-services/ai-document-intelligence).
This is of course a paid service (with a free tier) which requires an Azure account and subscription. Azure AI is not affiliated with
Paperless-ngx in any way. When enabled, Paperless-ngx will automatically send appropriate documents to Azure for OCR processing, bypassing
the local OCR engine. See the [configuration](configuration.md#PAPERLESS_REMOTE_OCR_ENGINE) options for more details.
Additionally, when using a commercial service with this feature, consider both potential costs as well as any associated file size
or page limitations (e.g. with a free tier).
## Architecture
Paperless-ngx consists of the following components:

View File

@@ -16,7 +16,6 @@ classifiers = [
# This will allow testing to not install a webserver, mysql, etc
dependencies = [
"azure-ai-documentintelligence>=1.0.2",
"babel>=2.17",
"bleach~=6.3.0",
"celery[redis]~=5.5.1",
@@ -253,7 +252,6 @@ testpaths = [
"src/paperless_tesseract/tests/",
"src/paperless_tika/tests",
"src/paperless_text/tests/",
"src/paperless_remote/tests/",
]
addopts = [
"--pythonwarnings=all",

View File

@@ -5,14 +5,14 @@
<trans-unit id="ngb.alert.close" datatype="html">
<source>Close</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/alert/alert.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/alert/alert.ts</context>
<context context-type="linenumber">50</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.carousel.slide-number" datatype="html">
<source> Slide <x id="INTERPOLATION" equiv-text="ueryList&lt;NgbSli"/> of <x id="INTERPOLATION_1" equiv-text="EventSource = N"/> </source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/carousel/carousel.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/carousel/carousel.ts</context>
<context context-type="linenumber">131,135</context>
</context-group>
<note priority="1" from="description">Currently selected slide number read by screen reader</note>
@@ -20,212 +20,212 @@
<trans-unit id="ngb.carousel.previous" datatype="html">
<source>Previous</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/carousel/carousel.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/carousel/carousel.ts</context>
<context context-type="linenumber">157,159</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.carousel.next" datatype="html">
<source>Next</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/carousel/carousel.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/carousel/carousel.ts</context>
<context context-type="linenumber">198</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.datepicker.previous-month" datatype="html">
<source>Previous month</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="linenumber">83,85</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="linenumber">112</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.datepicker.next-month" datatype="html">
<source>Next month</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="linenumber">112</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="linenumber">112</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.HH" datatype="html">
<source>HH</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.toast.close-aria" datatype="html">
<source>Close</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.datepicker.select-month" datatype="html">
<source>Select month</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.first" datatype="html">
<source>««</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.hours" datatype="html">
<source>Hours</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.previous" datatype="html">
<source>«</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.MM" datatype="html">
<source>MM</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.next" datatype="html">
<source>»</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.datepicker.select-year" datatype="html">
<source>Select year</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.minutes" datatype="html">
<source>Minutes</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.last" datatype="html">
<source>»»</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.first-aria" datatype="html">
<source>First</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.increment-hours" datatype="html">
<source>Increment hours</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.previous-aria" datatype="html">
<source>Previous</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.decrement-hours" datatype="html">
<source>Decrement hours</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.next-aria" datatype="html">
<source>Next</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.increment-minutes" datatype="html">
<source>Increment minutes</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.last-aria" datatype="html">
<source>Last</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.decrement-minutes" datatype="html">
<source>Decrement minutes</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.SS" datatype="html">
<source>SS</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.seconds" datatype="html">
<source>Seconds</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.increment-seconds" datatype="html">
<source>Increment seconds</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.decrement-seconds" datatype="html">
<source>Decrement seconds</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.PM" datatype="html">
<source><x id="INTERPOLATION"/></source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
@@ -233,7 +233,7 @@
<source><x id="INTERPOLATION" equiv-text="barConfig);
pu"/></source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/progressbar/progressbar.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/progressbar/progressbar.ts</context>
<context context-type="linenumber">41,42</context>
</context-group>
</trans-unit>
@@ -816,7 +816,7 @@
<source>Jump to bottom</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/logs/logs.component.html</context>
<context context-type="linenumber">62</context>
<context context-type="linenumber">60</context>
</context-group>
</trans-unit>
<trans-unit id="1255048712725285892" datatype="html">

View File

@@ -12,14 +12,14 @@
"private": true,
"dependencies": {
"@angular/cdk": "^20.2.13",
"@angular/common": "~20.3.15",
"@angular/compiler": "~20.3.15",
"@angular/core": "~20.3.15",
"@angular/forms": "~20.3.15",
"@angular/localize": "~20.3.15",
"@angular/platform-browser": "~20.3.15",
"@angular/platform-browser-dynamic": "~20.3.15",
"@angular/router": "~20.3.15",
"@angular/common": "~20.3.14",
"@angular/compiler": "~20.3.12",
"@angular/core": "~20.3.12",
"@angular/forms": "~20.3.12",
"@angular/localize": "~20.3.12",
"@angular/platform-browser": "~20.3.12",
"@angular/platform-browser-dynamic": "~20.3.12",
"@angular/router": "~20.3.12",
"@ng-bootstrap/ng-bootstrap": "^19.0.1",
"@ng-select/ng-select": "^20.7.0",
"@ngneat/dirty-check-forms": "^3.0.3",
@@ -42,23 +42,23 @@
"devDependencies": {
"@angular-builders/custom-webpack": "^20.0.0",
"@angular-builders/jest": "^20.0.0",
"@angular-devkit/core": "^20.3.13",
"@angular-devkit/schematics": "^20.3.13",
"@angular-devkit/core": "^20.3.10",
"@angular-devkit/schematics": "^20.3.10",
"@angular-eslint/builder": "20.6.0",
"@angular-eslint/eslint-plugin": "20.6.0",
"@angular-eslint/eslint-plugin-template": "20.6.0",
"@angular-eslint/schematics": "20.6.0",
"@angular-eslint/template-parser": "20.6.0",
"@angular/build": "^20.3.13",
"@angular/cli": "~20.3.13",
"@angular/compiler-cli": "~20.3.15",
"@angular/build": "^20.3.10",
"@angular/cli": "~20.3.10",
"@angular/compiler-cli": "~20.3.12",
"@codecov/webpack-plugin": "^1.9.1",
"@playwright/test": "^1.57.0",
"@playwright/test": "^1.56.1",
"@types/jest": "^30.0.0",
"@types/node": "^24.10.1",
"@typescript-eslint/eslint-plugin": "^8.48.1",
"@typescript-eslint/parser": "^8.48.1",
"@typescript-eslint/utils": "^8.48.1",
"@typescript-eslint/eslint-plugin": "^8.48.0",
"@typescript-eslint/parser": "^8.48.0",
"@typescript-eslint/utils": "^8.48.0",
"eslint": "^9.39.1",
"jest": "30.2.0",
"jest-environment-jsdom": "^30.2.0",
@@ -68,7 +68,7 @@
"prettier-plugin-organize-imports": "^4.3.0",
"ts-node": "~10.9.1",
"typescript": "^5.8.3",
"webpack": "^5.103.0"
"webpack": "^5.102.1"
},
"packageManager": "pnpm@10.17.1",
"pnpm": {

834
src-ui/pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -48,9 +48,7 @@
<ng-container i18n>Loading...</ng-container>
</div>
} @else {
@for (log of logs; track log) {
<p class="m-0 p-0" [ngClass]="'log-entry-' + log.level">{{log.message}}</p>
}
<p *ngFor="let log of logs" class="m-0 p-0" [ngClass]="'log-entry-' + log.level">{{log.message}}</p>
}
</div>
<button

View File

@@ -26,7 +26,7 @@
@for (user of users; track user) {
<li class="list-group-item">
<div class="row">
<div class="col d-flex align-items-center" [class.opacity-50]="!user.is_active"><button class="btn btn-link p-0 text-start" type="button" (click)="editUser(user)" [disabled]="!permissionsService.currentUserCan(PermissionAction.Change, PermissionType.User)">{{user.username}}</button></div>
<div class="col d-flex align-items-center"><button class="btn btn-link p-0 text-start" type="button" (click)="editUser(user)" [disabled]="!permissionsService.currentUserCan(PermissionAction.Change, PermissionType.User)">{{user.username}}</button></div>
<div class="col d-flex align-items-center">{{user.first_name}} {{user.last_name}}</div>
<div class="col d-flex align-items-center">{{user.groups?.map(getGroupName, this).join(', ')}}</div>
<div class="col">

View File

@@ -61,22 +61,21 @@ def get_groups_with_only_permission(obj, codename):
return Group.objects.filter(id__in=group_object_perm_group_ids).distinct()
def set_permissions_for_object(permissions: dict, object, *, merge: bool = False):
def set_permissions_for_object(permissions: list[str], object, *, merge: bool = False):
"""
Set permissions for an object. The permissions are given as a mapping of actions
to a dict of user / group id lists, e.g.
{"view": {"users": [1], "groups": [2]}, "change": {"users": [], "groups": []}}.
Set permissions for an object. The permissions are given as a list of strings
in the format "action_modelname", e.g. "view_document".
If merge is True, the permissions are merged with the existing permissions and
no users or groups are removed. If False, the permissions are set to exactly
the given list of users and groups.
"""
for action, entry in permissions.items():
for action in permissions:
permission = f"{action}_{object.__class__.__name__.lower()}"
if "users" in entry:
if "users" in permissions[action]:
# users
users_to_add = User.objects.filter(id__in=entry["users"])
users_to_add = User.objects.filter(id__in=permissions[action]["users"])
users_to_remove = (
get_users_with_perms(
object,
@@ -86,12 +85,12 @@ def set_permissions_for_object(permissions: dict, object, *, merge: bool = False
if not merge
else User.objects.none()
)
if users_to_add.exists() and users_to_remove.exists():
if len(users_to_add) > 0 and len(users_to_remove) > 0:
users_to_remove = users_to_remove.exclude(id__in=users_to_add)
if users_to_remove.exists():
if len(users_to_remove) > 0:
for user in users_to_remove:
remove_perm(permission, user, object)
if users_to_add.exists():
if len(users_to_add) > 0:
for user in users_to_add:
assign_perm(permission, user, object)
if action == "change":
@@ -101,9 +100,9 @@ def set_permissions_for_object(permissions: dict, object, *, merge: bool = False
user,
object,
)
if "groups" in entry:
if "groups" in permissions[action]:
# groups
groups_to_add = Group.objects.filter(id__in=entry["groups"])
groups_to_add = Group.objects.filter(id__in=permissions[action]["groups"])
groups_to_remove = (
get_groups_with_only_permission(
object,
@@ -112,12 +111,12 @@ def set_permissions_for_object(permissions: dict, object, *, merge: bool = False
if not merge
else Group.objects.none()
)
if groups_to_add.exists() and groups_to_remove.exists():
if len(groups_to_add) > 0 and len(groups_to_remove) > 0:
groups_to_remove = groups_to_remove.exclude(id__in=groups_to_add)
if groups_to_remove.exists():
if len(groups_to_remove) > 0:
for group in groups_to_remove:
remove_perm(permission, group, object)
if groups_to_add.exists():
if len(groups_to_add) > 0:
for group in groups_to_add:
assign_perm(permission, group, object)
if action == "change":

View File

@@ -585,9 +585,6 @@ class TagSerializer(MatchingModelSerializer, OwnedObjectSerializer):
.select_related("owner")
.annotate(document_count=Count("documents", filter=filter_q)),
many=True,
user=self.user,
full_perms=self.full_perms,
all_fields=self.all_fields,
context=self.context,
)
return serializer.data

View File

@@ -1,10 +1,14 @@
from __future__ import annotations
import ipaddress
import logging
import shutil
import socket
from pathlib import Path
from typing import TYPE_CHECKING
from urllib.parse import urlparse
import httpx
from celery import shared_task
from celery import states
from celery.signals import before_task_publish
@@ -23,15 +27,20 @@ from django.db.models import Q
from django.dispatch import receiver
from django.utils import timezone
from filelock import FileLock
from guardian.shortcuts import remove_perm
from documents import matching
from documents.caching import clear_document_caches
from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_unique_filename
from documents.mail import EmailAttachment
from documents.mail import send_email
from documents.models import Correspondent
from documents.models import CustomField
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import PaperlessTask
from documents.models import SavedView
@@ -42,14 +51,8 @@ from documents.models import WorkflowAction
from documents.models import WorkflowRun
from documents.models import WorkflowTrigger
from documents.permissions import get_objects_for_user_owner_aware
from documents.workflows.actions import build_workflow_action_context
from documents.workflows.actions import execute_email_action
from documents.workflows.actions import execute_webhook_action
from documents.workflows.mutations import apply_assignment_to_document
from documents.workflows.mutations import apply_assignment_to_overrides
from documents.workflows.mutations import apply_removal_to_document
from documents.workflows.mutations import apply_removal_to_overrides
from documents.workflows.utils import get_workflows_for_trigger
from documents.permissions import set_permissions_for_object
from documents.templating.workflows import parse_w_workflow_placeholders
if TYPE_CHECKING:
from documents.classifier import DocumentClassifier
@@ -670,6 +673,92 @@ def run_workflows_updated(sender, document: Document, logging_group=None, **kwar
)
def _is_public_ip(ip: str) -> bool:
try:
obj = ipaddress.ip_address(ip)
return not (
obj.is_private
or obj.is_loopback
or obj.is_link_local
or obj.is_multicast
or obj.is_unspecified
)
except ValueError: # pragma: no cover
return False
def _resolve_first_ip(host: str) -> str | None:
try:
info = socket.getaddrinfo(host, None)
return info[0][4][0] if info else None
except Exception: # pragma: no cover
return None
@shared_task(
retry_backoff=True,
autoretry_for=(httpx.HTTPStatusError,),
max_retries=3,
throws=(httpx.HTTPError,),
)
def send_webhook(
url: str,
data: str | dict,
headers: dict,
files: dict,
*,
as_json: bool = False,
):
p = urlparse(url)
if p.scheme.lower() not in settings.WEBHOOKS_ALLOWED_SCHEMES or not p.hostname:
logger.warning("Webhook blocked: invalid scheme/hostname")
raise ValueError("Invalid URL scheme or hostname.")
port = p.port or (443 if p.scheme == "https" else 80)
if (
len(settings.WEBHOOKS_ALLOWED_PORTS) > 0
and port not in settings.WEBHOOKS_ALLOWED_PORTS
):
logger.warning("Webhook blocked: port not permitted")
raise ValueError("Destination port not permitted.")
ip = _resolve_first_ip(p.hostname)
if not ip or (
not _is_public_ip(ip) and not settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS
):
logger.warning("Webhook blocked: destination not allowed")
raise ValueError("Destination host is not allowed.")
try:
post_args = {
"url": url,
"headers": {
k: v for k, v in (headers or {}).items() if k.lower() != "host"
},
"files": files or None,
"timeout": 5.0,
"follow_redirects": False,
}
if as_json:
post_args["json"] = data
elif isinstance(data, dict):
post_args["data"] = data
else:
post_args["content"] = data
httpx.post(
**post_args,
).raise_for_status()
logger.info(
f"Webhook sent to {url}",
)
except Exception as e:
logger.error(
f"Failed attempt sending webhook to {url}: {e}",
)
raise e
def run_workflows(
trigger_type: WorkflowTrigger.WorkflowTriggerType,
document: Document | ConsumableDocument,
@@ -678,17 +767,573 @@ def run_workflows(
overrides: DocumentMetadataOverrides | None = None,
original_file: Path | None = None,
) -> tuple[DocumentMetadataOverrides, str] | None:
"""
Execute workflows matching a document for the given trigger. When `overrides` is provided
(consumption flow), actions mutate that object and the function returns `(overrides, messages)`.
Otherwise actions mutate the actual document and return nothing.
"""Run workflows which match a Document (or ConsumableDocument) for a specific trigger type or a single workflow if given.
Attachments for email/webhook actions use `original_file` when given, otherwise fall back to
`document.source_path` (Document) or `document.original_file` (ConsumableDocument).
Passing `workflow_to_run` skips the workflow query (currently only used by scheduled runs).
Assignment or removal actions are either applied directly to the document or an overrides object. If an overrides
object is provided, the function returns the object with the applied changes or None if no actions were applied and a string
of messages for each action. If no overrides object is provided, the changes are applied directly to the document and the
function returns None.
"""
def assignment_action():
if action.assign_tags.exists():
tag_ids_to_add: set[int] = set()
for tag in action.assign_tags.all():
tag_ids_to_add.add(tag.pk)
tag_ids_to_add.update(int(pk) for pk in tag.get_ancestors_pks())
if not use_overrides:
doc_tag_ids[:] = list(set(doc_tag_ids) | tag_ids_to_add)
else:
if overrides.tag_ids is None:
overrides.tag_ids = []
overrides.tag_ids = list(set(overrides.tag_ids) | tag_ids_to_add)
if action.assign_correspondent:
if not use_overrides:
document.correspondent = action.assign_correspondent
else:
overrides.correspondent_id = action.assign_correspondent.pk
if action.assign_document_type:
if not use_overrides:
document.document_type = action.assign_document_type
else:
overrides.document_type_id = action.assign_document_type.pk
if action.assign_storage_path:
if not use_overrides:
document.storage_path = action.assign_storage_path
else:
overrides.storage_path_id = action.assign_storage_path.pk
if action.assign_owner:
if not use_overrides:
document.owner = action.assign_owner
else:
overrides.owner_id = action.assign_owner.pk
if action.assign_title:
if not use_overrides:
try:
document.title = parse_w_workflow_placeholders(
action.assign_title,
document.correspondent.name if document.correspondent else "",
document.document_type.name if document.document_type else "",
document.owner.username if document.owner else "",
timezone.localtime(document.added),
document.original_filename or "",
document.filename or "",
document.created,
)
except Exception:
logger.exception(
f"Error occurred parsing title assignment '{action.assign_title}', falling back to original",
extra={"group": logging_group},
)
else:
overrides.title = action.assign_title
if any(
[
action.assign_view_users.exists(),
action.assign_view_groups.exists(),
action.assign_change_users.exists(),
action.assign_change_groups.exists(),
],
):
permissions = {
"view": {
"users": action.assign_view_users.values_list("id", flat=True),
"groups": action.assign_view_groups.values_list("id", flat=True),
},
"change": {
"users": action.assign_change_users.values_list("id", flat=True),
"groups": action.assign_change_groups.values_list("id", flat=True),
},
}
if not use_overrides:
set_permissions_for_object(
permissions=permissions,
object=document,
merge=True,
)
else:
overrides.view_users = list(
set(
(overrides.view_users or [])
+ list(permissions["view"]["users"]),
),
)
overrides.view_groups = list(
set(
(overrides.view_groups or [])
+ list(permissions["view"]["groups"]),
),
)
overrides.change_users = list(
set(
(overrides.change_users or [])
+ list(permissions["change"]["users"]),
),
)
overrides.change_groups = list(
set(
(overrides.change_groups or [])
+ list(permissions["change"]["groups"]),
),
)
if action.assign_custom_fields.exists():
if not use_overrides:
for field in action.assign_custom_fields.all():
value_field_name = CustomFieldInstance.get_value_field_name(
data_type=field.data_type,
)
args = {
value_field_name: action.assign_custom_fields_values.get(
str(field.pk),
None,
),
}
# for some reason update_or_create doesn't work here
instance = CustomFieldInstance.objects.filter(
field=field,
document=document,
).first()
if instance and args[value_field_name] is not None:
setattr(instance, value_field_name, args[value_field_name])
instance.save()
elif not instance:
CustomFieldInstance.objects.create(
**args,
field=field,
document=document,
)
else:
if overrides.custom_fields is None:
overrides.custom_fields = {}
overrides.custom_fields.update(
{
field.pk: action.assign_custom_fields_values.get(
str(field.pk),
None,
)
for field in action.assign_custom_fields.all()
},
)
def removal_action():
if action.remove_all_tags:
if not use_overrides:
doc_tag_ids.clear()
else:
overrides.tag_ids = None
else:
tag_ids_to_remove: set[int] = set()
for tag in action.remove_tags.all():
tag_ids_to_remove.add(tag.pk)
tag_ids_to_remove.update(int(pk) for pk in tag.get_descendants_pks())
if not use_overrides:
doc_tag_ids[:] = [t for t in doc_tag_ids if t not in tag_ids_to_remove]
elif overrides.tag_ids:
overrides.tag_ids = [
t for t in overrides.tag_ids if t not in tag_ids_to_remove
]
if not use_overrides and (
action.remove_all_correspondents
or (
document.correspondent
and action.remove_correspondents.filter(
pk=document.correspondent.pk,
).exists()
)
):
document.correspondent = None
elif use_overrides and (
action.remove_all_correspondents
or (
overrides.correspondent_id
and action.remove_correspondents.filter(
pk=overrides.correspondent_id,
).exists()
)
):
overrides.correspondent_id = None
if not use_overrides and (
action.remove_all_document_types
or (
document.document_type
and action.remove_document_types.filter(
pk=document.document_type.pk,
).exists()
)
):
document.document_type = None
elif use_overrides and (
action.remove_all_document_types
or (
overrides.document_type_id
and action.remove_document_types.filter(
pk=overrides.document_type_id,
).exists()
)
):
overrides.document_type_id = None
if not use_overrides and (
action.remove_all_storage_paths
or (
document.storage_path
and action.remove_storage_paths.filter(
pk=document.storage_path.pk,
).exists()
)
):
document.storage_path = None
elif use_overrides and (
action.remove_all_storage_paths
or (
overrides.storage_path_id
and action.remove_storage_paths.filter(
pk=overrides.storage_path_id,
).exists()
)
):
overrides.storage_path_id = None
if not use_overrides and (
action.remove_all_owners
or (
document.owner
and action.remove_owners.filter(pk=document.owner.pk).exists()
)
):
document.owner = None
elif use_overrides and (
action.remove_all_owners
or (
overrides.owner_id
and action.remove_owners.filter(pk=overrides.owner_id).exists()
)
):
overrides.owner_id = None
if action.remove_all_permissions:
if not use_overrides:
permissions = {
"view": {"users": [], "groups": []},
"change": {"users": [], "groups": []},
}
set_permissions_for_object(
permissions=permissions,
object=document,
merge=False,
)
else:
overrides.view_users = None
overrides.view_groups = None
overrides.change_users = None
overrides.change_groups = None
elif any(
[
action.remove_view_users.exists(),
action.remove_view_groups.exists(),
action.remove_change_users.exists(),
action.remove_change_groups.exists(),
],
):
if not use_overrides:
for user in action.remove_view_users.all():
remove_perm("view_document", user, document)
for user in action.remove_change_users.all():
remove_perm("change_document", user, document)
for group in action.remove_view_groups.all():
remove_perm("view_document", group, document)
for group in action.remove_change_groups.all():
remove_perm("change_document", group, document)
else:
if overrides.view_users:
for user in action.remove_view_users.filter(
pk__in=overrides.view_users,
):
overrides.view_users.remove(user.pk)
if overrides.change_users:
for user in action.remove_change_users.filter(
pk__in=overrides.change_users,
):
overrides.change_users.remove(user.pk)
if overrides.view_groups:
for group in action.remove_view_groups.filter(
pk__in=overrides.view_groups,
):
overrides.view_groups.remove(group.pk)
if overrides.change_groups:
for group in action.remove_change_groups.filter(
pk__in=overrides.change_groups,
):
overrides.change_groups.remove(group.pk)
if action.remove_all_custom_fields:
if not use_overrides:
CustomFieldInstance.objects.filter(document=document).hard_delete()
else:
overrides.custom_fields = None
elif action.remove_custom_fields.exists():
if not use_overrides:
CustomFieldInstance.objects.filter(
field__in=action.remove_custom_fields.all(),
document=document,
).hard_delete()
elif overrides.custom_fields:
for field in action.remove_custom_fields.filter(
pk__in=overrides.custom_fields.keys(),
):
overrides.custom_fields.pop(field.pk, None)
def email_action():
if not settings.EMAIL_ENABLED:
logger.error(
"Email backend has not been configured, cannot send email notifications",
extra={"group": logging_group},
)
return
if not use_overrides:
title = document.title
doc_url = (
f"{settings.PAPERLESS_URL}{settings.BASE_URL}documents/{document.pk}/"
)
correspondent = (
document.correspondent.name if document.correspondent else ""
)
document_type = (
document.document_type.name if document.document_type else ""
)
owner_username = document.owner.username if document.owner else ""
filename = document.original_filename or ""
current_filename = document.filename or ""
added = timezone.localtime(document.added)
created = document.created
else:
title = overrides.title if overrides.title else str(document.original_file)
doc_url = ""
correspondent = (
Correspondent.objects.filter(pk=overrides.correspondent_id).first()
if overrides.correspondent_id
else ""
)
document_type = (
DocumentType.objects.filter(pk=overrides.document_type_id).first().name
if overrides.document_type_id
else ""
)
owner_username = (
User.objects.filter(pk=overrides.owner_id).first().username
if overrides.owner_id
else ""
)
filename = document.original_file if document.original_file else ""
current_filename = filename
added = timezone.localtime(timezone.now())
created = overrides.created
subject = (
parse_w_workflow_placeholders(
action.email.subject,
correspondent,
document_type,
owner_username,
added,
filename,
current_filename,
created,
title,
doc_url,
)
if action.email.subject
else ""
)
body = (
parse_w_workflow_placeholders(
action.email.body,
correspondent,
document_type,
owner_username,
added,
filename,
current_filename,
created,
title,
doc_url,
)
if action.email.body
else ""
)
try:
attachments: list[EmailAttachment] = []
if action.email.include_document:
attachment: EmailAttachment | None = None
if trigger_type in [
WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
] and isinstance(document, Document):
friendly_name = (
Path(current_filename).name
if current_filename
else document.source_path.name
)
attachment = EmailAttachment(
path=document.source_path,
mime_type=document.mime_type,
friendly_name=friendly_name,
)
elif original_file:
friendly_name = (
Path(current_filename).name
if current_filename
else original_file.name
)
attachment = EmailAttachment(
path=original_file,
mime_type=document.mime_type,
friendly_name=friendly_name,
)
if attachment:
attachments = [attachment]
n_messages = send_email(
subject=subject,
body=body,
to=action.email.to.split(","),
attachments=attachments,
)
logger.debug(
f"Sent {n_messages} notification email(s) to {action.email.to}",
extra={"group": logging_group},
)
except Exception as e:
logger.exception(
f"Error occurred sending notification email: {e}",
extra={"group": logging_group},
)
def webhook_action():
if not use_overrides:
title = document.title
doc_url = (
f"{settings.PAPERLESS_URL}{settings.BASE_URL}documents/{document.pk}/"
)
correspondent = (
document.correspondent.name if document.correspondent else ""
)
document_type = (
document.document_type.name if document.document_type else ""
)
owner_username = document.owner.username if document.owner else ""
filename = document.original_filename or ""
current_filename = document.filename or ""
added = timezone.localtime(document.added)
created = document.created
else:
title = overrides.title if overrides.title else str(document.original_file)
doc_url = ""
correspondent = (
Correspondent.objects.filter(pk=overrides.correspondent_id).first()
if overrides.correspondent_id
else ""
)
document_type = (
DocumentType.objects.filter(pk=overrides.document_type_id).first().name
if overrides.document_type_id
else ""
)
owner_username = (
User.objects.filter(pk=overrides.owner_id).first().username
if overrides.owner_id
else ""
)
filename = document.original_file if document.original_file else ""
current_filename = filename
added = timezone.localtime(timezone.now())
created = overrides.created
try:
data = {}
if action.webhook.use_params:
if action.webhook.params:
try:
for key, value in action.webhook.params.items():
data[key] = parse_w_workflow_placeholders(
value,
correspondent,
document_type,
owner_username,
added,
filename,
current_filename,
created,
title,
doc_url,
)
except Exception as e:
logger.error(
f"Error occurred parsing webhook params: {e}",
extra={"group": logging_group},
)
elif action.webhook.body:
data = parse_w_workflow_placeholders(
action.webhook.body,
correspondent,
document_type,
owner_username,
added,
filename,
current_filename,
created,
title,
doc_url,
)
headers = {}
if action.webhook.headers:
try:
headers = {
str(k): str(v) for k, v in action.webhook.headers.items()
}
except Exception as e:
logger.error(
f"Error occurred parsing webhook headers: {e}",
extra={"group": logging_group},
)
files = None
if action.webhook.include_document:
with original_file.open("rb") as f:
files = {
"file": (
filename,
f.read(),
document.mime_type,
),
}
send_webhook.delay(
url=action.webhook.url,
data=data,
headers=headers,
files=files,
as_json=action.webhook.as_json,
)
logger.debug(
f"Webhook to {action.webhook.url} queued",
extra={"group": logging_group},
)
except Exception as e:
logger.exception(
f"Error occurred sending webhook: {e}",
extra={"group": logging_group},
)
use_overrides = overrides is not None
if original_file is None:
original_file = (
@@ -696,7 +1341,30 @@ def run_workflows(
)
messages = []
workflows = get_workflows_for_trigger(trigger_type, workflow_to_run)
workflows = (
(
Workflow.objects.filter(enabled=True, triggers__type=trigger_type)
.prefetch_related(
"actions",
"actions__assign_view_users",
"actions__assign_view_groups",
"actions__assign_change_users",
"actions__assign_change_groups",
"actions__assign_custom_fields",
"actions__remove_tags",
"actions__remove_correspondents",
"actions__remove_document_types",
"actions__remove_storage_paths",
"actions__remove_custom_fields",
"actions__remove_owners",
"triggers",
)
.order_by("order")
.distinct()
)
if workflow_to_run is None
else [workflow_to_run]
)
for workflow in workflows:
if not use_overrides:
@@ -716,39 +1384,13 @@ def run_workflows(
messages.append(message)
if action.type == WorkflowAction.WorkflowActionType.ASSIGNMENT:
if use_overrides and overrides:
apply_assignment_to_overrides(action, overrides)
else:
apply_assignment_to_document(
action,
document,
doc_tag_ids,
logging_group,
)
assignment_action()
elif action.type == WorkflowAction.WorkflowActionType.REMOVAL:
if use_overrides and overrides:
apply_removal_to_overrides(action, overrides)
else:
apply_removal_to_document(action, document, doc_tag_ids)
removal_action()
elif action.type == WorkflowAction.WorkflowActionType.EMAIL:
context = build_workflow_action_context(document, overrides)
execute_email_action(
action,
document,
context,
logging_group,
original_file,
trigger_type,
)
email_action()
elif action.type == WorkflowAction.WorkflowActionType.WEBHOOK:
context = build_workflow_action_context(document, overrides)
execute_webhook_action(
action,
document,
context,
logging_group,
original_file,
)
webhook_action()
if not use_overrides:
# limit title to 128 characters

View File

@@ -41,6 +41,7 @@ from documents.models import DocumentType
from documents.models import PaperlessTask
from documents.models import StoragePath
from documents.models import Tag
from documents.models import Workflow
from documents.models import WorkflowRun
from documents.models import WorkflowTrigger
from documents.parsers import DocumentParser
@@ -53,7 +54,6 @@ from documents.sanity_checker import SanityCheckFailedException
from documents.signals import document_updated
from documents.signals.handlers import cleanup_document_deletion
from documents.signals.handlers import run_workflows
from documents.workflows.utils import get_workflows_for_trigger
if settings.AUDIT_LOG_ENABLED:
from auditlog.models import LogEntry
@@ -400,8 +400,13 @@ def check_scheduled_workflows():
Once a document satisfies this condition, and recurring/non-recurring constraints are met, the workflow is run.
"""
scheduled_workflows = get_workflows_for_trigger(
WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
scheduled_workflows: list[Workflow] = (
Workflow.objects.filter(
triggers__type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
enabled=True,
)
.distinct()
.prefetch_related("triggers")
)
if scheduled_workflows.count() > 0:
logger.debug(f"Checking {len(scheduled_workflows)} scheduled workflows")

View File

@@ -1289,7 +1289,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
content_type__app_label="admin",
),
)
set_permissions([4, 5], set_permissions={}, owner=user2, merge=False)
set_permissions([4, 5], set_permissions=[], owner=user2, merge=False)
with index.open_index_writer() as writer:
index.update_document(writer, d1)

View File

@@ -26,7 +26,7 @@ from rest_framework.test import APITestCase
from documents.file_handling import create_source_path_directory
from documents.file_handling import generate_unique_filename
from documents.signals.handlers import run_workflows
from documents.workflows.webhooks import send_webhook
from documents.signals.handlers import send_webhook
if TYPE_CHECKING:
from django.db.models import QuerySet
@@ -2858,7 +2858,7 @@ class TestWorkflows(
mock_email_send.return_value = 1
with self.assertNoLogs("paperless.workflows", level="ERROR"):
with self.assertNoLogs("paperless.handlers", level="ERROR"):
run_workflows(
WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
consumable_document,
@@ -3096,7 +3096,7 @@ class TestWorkflows(
original_filename="sample.pdf",
)
with self.assertLogs("paperless.workflows.actions", level="ERROR") as cm:
with self.assertLogs("paperless.handlers", level="ERROR") as cm:
run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
expected_str = "Email backend has not been configured"
@@ -3144,7 +3144,7 @@ class TestWorkflows(
original_filename="sample.pdf",
)
with self.assertLogs("paperless.workflows", level="ERROR") as cm:
with self.assertLogs("paperless.handlers", level="ERROR") as cm:
run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
expected_str = "Error occurred sending email"
@@ -3215,7 +3215,7 @@ class TestWorkflows(
PAPERLESS_FORCE_SCRIPT_NAME="/paperless",
BASE_URL="/paperless/",
)
@mock.patch("documents.workflows.webhooks.send_webhook.delay")
@mock.patch("documents.signals.handlers.send_webhook.delay")
def test_workflow_webhook_action_body(self, mock_post):
"""
GIVEN:
@@ -3274,7 +3274,7 @@ class TestWorkflows(
@override_settings(
PAPERLESS_URL="http://localhost:8000",
)
@mock.patch("documents.workflows.webhooks.send_webhook.delay")
@mock.patch("documents.signals.handlers.send_webhook.delay")
def test_workflow_webhook_action_w_files(self, mock_post):
"""
GIVEN:
@@ -3377,7 +3377,7 @@ class TestWorkflows(
)
# fails because no file
with self.assertLogs("paperless.workflows", level="ERROR") as cm:
with self.assertLogs("paperless.handlers", level="ERROR") as cm:
run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
expected_str = "Error occurred sending webhook"
@@ -3420,7 +3420,7 @@ class TestWorkflows(
original_filename="sample.pdf",
)
with self.assertLogs("paperless.workflows", level="ERROR") as cm:
with self.assertLogs("paperless.handlers", level="ERROR") as cm:
run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
expected_str = "Error occurred parsing webhook params"
@@ -3436,7 +3436,7 @@ class TestWorkflows(
raise_for_status=mock.Mock(),
)
with self.assertLogs("paperless.workflows") as cm:
with self.assertLogs("paperless.handlers") as cm:
send_webhook(
url="http://paperless-ngx.com",
data="Test message",
@@ -3482,7 +3482,7 @@ class TestWorkflows(
),
)
with self.assertLogs("paperless.workflows") as cm:
with self.assertLogs("paperless.handlers") as cm:
with self.assertRaises(HTTPStatusError):
send_webhook(
url="http://paperless-ngx.com",
@@ -3498,7 +3498,7 @@ class TestWorkflows(
)
self.assertIn(expected_str, cm.output[0])
@mock.patch("documents.workflows.webhooks.send_webhook.delay")
@mock.patch("documents.signals.handlers.send_webhook.delay")
def test_workflow_webhook_action_consumption(self, mock_post):
"""
GIVEN:

View File

@@ -1,261 +0,0 @@
import logging
from pathlib import Path
from django.conf import settings
from django.contrib.auth.models import User
from django.utils import timezone
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.mail import EmailAttachment
from documents.mail import send_email
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import WorkflowAction
from documents.models import WorkflowTrigger
from documents.templating.workflows import parse_w_workflow_placeholders
from documents.workflows.webhooks import send_webhook
logger = logging.getLogger("paperless.workflows.actions")
def build_workflow_action_context(
document: Document | ConsumableDocument,
overrides: DocumentMetadataOverrides | None,
) -> dict:
"""
Build context dictionary for workflow action placeholder parsing.
"""
use_overrides = overrides is not None
if not use_overrides:
return {
"title": document.title,
"doc_url": f"{settings.PAPERLESS_URL}{settings.BASE_URL}documents/{document.pk}/",
"correspondent": document.correspondent.name
if document.correspondent
else "",
"document_type": document.document_type.name
if document.document_type
else "",
"owner_username": document.owner.username if document.owner else "",
"filename": document.original_filename or "",
"current_filename": document.filename or "",
"added": timezone.localtime(document.added),
"created": document.created,
}
correspondent_obj = (
Correspondent.objects.filter(pk=overrides.correspondent_id).first()
if overrides and overrides.correspondent_id
else None
)
document_type_obj = (
DocumentType.objects.filter(pk=overrides.document_type_id).first()
if overrides and overrides.document_type_id
else None
)
owner_obj = (
User.objects.filter(pk=overrides.owner_id).first()
if overrides and overrides.owner_id
else None
)
filename = document.original_file if document.original_file else ""
return {
"title": overrides.title
if overrides and overrides.title
else str(document.original_file),
"doc_url": "",
"correspondent": correspondent_obj.name if correspondent_obj else "",
"document_type": document_type_obj.name if document_type_obj else "",
"owner_username": owner_obj.username if owner_obj else "",
"filename": filename,
"current_filename": filename,
"added": timezone.localtime(timezone.now()),
"created": overrides.created if overrides else None,
}
def execute_email_action(
action: WorkflowAction,
document: Document | ConsumableDocument,
context: dict,
logging_group,
original_file: Path,
trigger_type: WorkflowTrigger.WorkflowTriggerType,
) -> None:
"""
Execute an email action for a workflow.
"""
if not settings.EMAIL_ENABLED:
logger.error(
"Email backend has not been configured, cannot send email notifications",
extra={"group": logging_group},
)
return
subject = (
parse_w_workflow_placeholders(
action.email.subject,
context["correspondent"],
context["document_type"],
context["owner_username"],
context["added"],
context["filename"],
context["current_filename"],
context["created"],
context["title"],
context["doc_url"],
)
if action.email.subject
else ""
)
body = (
parse_w_workflow_placeholders(
action.email.body,
context["correspondent"],
context["document_type"],
context["owner_username"],
context["added"],
context["filename"],
context["current_filename"],
context["created"],
context["title"],
context["doc_url"],
)
if action.email.body
else ""
)
try:
attachments: list[EmailAttachment] = []
if action.email.include_document:
attachment: EmailAttachment | None = None
if trigger_type in [
WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
] and isinstance(document, Document):
friendly_name = (
Path(context["current_filename"]).name
if context["current_filename"]
else document.source_path.name
)
attachment = EmailAttachment(
path=document.source_path,
mime_type=document.mime_type,
friendly_name=friendly_name,
)
elif original_file:
friendly_name = (
Path(context["current_filename"]).name
if context["current_filename"]
else original_file.name
)
attachment = EmailAttachment(
path=original_file,
mime_type=document.mime_type,
friendly_name=friendly_name,
)
if attachment:
attachments = [attachment]
n_messages = send_email(
subject=subject,
body=body,
to=action.email.to.split(","),
attachments=attachments,
)
logger.debug(
f"Sent {n_messages} notification email(s) to {action.email.to}",
extra={"group": logging_group},
)
except Exception as e:
logger.exception(
f"Error occurred sending notification email: {e}",
extra={"group": logging_group},
)
def execute_webhook_action(
action: WorkflowAction,
document: Document | ConsumableDocument,
context: dict,
logging_group,
original_file: Path,
):
try:
data = {}
if action.webhook.use_params:
if action.webhook.params:
try:
for key, value in action.webhook.params.items():
data[key] = parse_w_workflow_placeholders(
value,
context["correspondent"],
context["document_type"],
context["owner_username"],
context["added"],
context["filename"],
context["current_filename"],
context["created"],
context["title"],
context["doc_url"],
)
except Exception as e:
logger.error(
f"Error occurred parsing webhook params: {e}",
extra={"group": logging_group},
)
elif action.webhook.body:
data = parse_w_workflow_placeholders(
action.webhook.body,
context["correspondent"],
context["document_type"],
context["owner_username"],
context["added"],
context["filename"],
context["current_filename"],
context["created"],
context["title"],
context["doc_url"],
)
headers = {}
if action.webhook.headers:
try:
headers = {str(k): str(v) for k, v in action.webhook.headers.items()}
except Exception as e:
logger.error(
f"Error occurred parsing webhook headers: {e}",
extra={"group": logging_group},
)
files = None
if action.webhook.include_document:
with original_file.open("rb") as f:
files = {
"file": (
str(context["filename"])
if context["filename"]
else original_file.name,
f.read(),
document.mime_type,
),
}
send_webhook.delay(
url=action.webhook.url,
data=data,
headers=headers,
files=files,
as_json=action.webhook.as_json,
)
logger.debug(
f"Webhook to {action.webhook.url} queued",
extra={"group": logging_group},
)
except Exception as e:
logger.exception(
f"Error occurred sending webhook: {e}",
extra={"group": logging_group},
)

View File

@@ -1,357 +0,0 @@
import logging
from django.utils import timezone
from guardian.shortcuts import remove_perm
from documents.data_models import DocumentMetadataOverrides
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import WorkflowAction
from documents.permissions import set_permissions_for_object
from documents.templating.workflows import parse_w_workflow_placeholders
logger = logging.getLogger("paperless.workflows.mutations")
def apply_assignment_to_document(
action: WorkflowAction,
document: Document,
doc_tag_ids: list[int],
logging_group,
):
"""
Apply assignment actions to a Document instance.
action: WorkflowAction, annotated with 'has_assign_*' boolean fields
"""
if action.has_assign_tags:
tag_ids_to_add: set[int] = set()
for tag in action.assign_tags.all():
tag_ids_to_add.add(tag.pk)
tag_ids_to_add.update(int(pk) for pk in tag.get_ancestors_pks())
doc_tag_ids[:] = list(set(doc_tag_ids) | tag_ids_to_add)
if action.assign_correspondent:
document.correspondent = action.assign_correspondent
if action.assign_document_type:
document.document_type = action.assign_document_type
if action.assign_storage_path:
document.storage_path = action.assign_storage_path
if action.assign_owner:
document.owner = action.assign_owner
if action.assign_title:
try:
document.title = parse_w_workflow_placeholders(
action.assign_title,
document.correspondent.name if document.correspondent else "",
document.document_type.name if document.document_type else "",
document.owner.username if document.owner else "",
timezone.localtime(document.added),
document.original_filename or "",
document.filename or "",
document.created,
)
except Exception: # pragma: no cover
logger.exception(
f"Error occurred parsing title assignment '{action.assign_title}', falling back to original",
extra={"group": logging_group},
)
if any(
[
action.has_assign_view_users,
action.has_assign_view_groups,
action.has_assign_change_users,
action.has_assign_change_groups,
],
):
permissions = {
"view": {
"users": action.assign_view_users.values_list("id", flat=True),
"groups": action.assign_view_groups.values_list("id", flat=True),
},
"change": {
"users": action.assign_change_users.values_list("id", flat=True),
"groups": action.assign_change_groups.values_list("id", flat=True),
},
}
set_permissions_for_object(
permissions=permissions,
object=document,
merge=True,
)
if action.has_assign_custom_fields:
for field in action.assign_custom_fields.all():
value_field_name = CustomFieldInstance.get_value_field_name(
data_type=field.data_type,
)
args = {
value_field_name: action.assign_custom_fields_values.get(
str(field.pk),
None,
),
}
# for some reason update_or_create doesn't work here
instance = CustomFieldInstance.objects.filter(
field=field,
document=document,
).first()
if instance and args[value_field_name] is not None:
setattr(instance, value_field_name, args[value_field_name])
instance.save()
elif not instance:
CustomFieldInstance.objects.create(
**args,
field=field,
document=document,
)
def apply_assignment_to_overrides(
action: WorkflowAction,
overrides: DocumentMetadataOverrides,
):
"""
Apply assignment actions to DocumentMetadataOverrides.
action: WorkflowAction, annotated with 'has_assign_*' boolean fields
"""
if action.has_assign_tags:
if overrides.tag_ids is None:
overrides.tag_ids = []
tag_ids_to_add: set[int] = set()
for tag in action.assign_tags.all():
tag_ids_to_add.add(tag.pk)
tag_ids_to_add.update(int(pk) for pk in tag.get_ancestors_pks())
overrides.tag_ids = list(set(overrides.tag_ids) | tag_ids_to_add)
if action.assign_correspondent:
overrides.correspondent_id = action.assign_correspondent.pk
if action.assign_document_type:
overrides.document_type_id = action.assign_document_type.pk
if action.assign_storage_path:
overrides.storage_path_id = action.assign_storage_path.pk
if action.assign_owner:
overrides.owner_id = action.assign_owner.pk
if action.assign_title:
overrides.title = action.assign_title
if any(
[
action.has_assign_view_users,
action.has_assign_view_groups,
action.has_assign_change_users,
action.has_assign_change_groups,
],
):
overrides.view_users = list(
set(
(overrides.view_users or [])
+ list(action.assign_view_users.values_list("id", flat=True)),
),
)
overrides.view_groups = list(
set(
(overrides.view_groups or [])
+ list(action.assign_view_groups.values_list("id", flat=True)),
),
)
overrides.change_users = list(
set(
(overrides.change_users or [])
+ list(action.assign_change_users.values_list("id", flat=True)),
),
)
overrides.change_groups = list(
set(
(overrides.change_groups or [])
+ list(action.assign_change_groups.values_list("id", flat=True)),
),
)
if action.has_assign_custom_fields:
if overrides.custom_fields is None:
overrides.custom_fields = {}
overrides.custom_fields.update(
{
field.pk: action.assign_custom_fields_values.get(
str(field.pk),
None,
)
for field in action.assign_custom_fields.all()
},
)
def apply_removal_to_document(
action: WorkflowAction,
document: Document,
doc_tag_ids: list[int],
):
"""
Apply removal actions to a Document instance.
action: WorkflowAction, annotated with 'has_remove_*' boolean fields
"""
if action.remove_all_tags:
doc_tag_ids.clear()
else:
tag_ids_to_remove: set[int] = set()
for tag in action.remove_tags.all():
tag_ids_to_remove.add(tag.pk)
tag_ids_to_remove.update(int(pk) for pk in tag.get_descendants_pks())
doc_tag_ids[:] = [t for t in doc_tag_ids if t not in tag_ids_to_remove]
if action.remove_all_correspondents or (
document.correspondent
and action.remove_correspondents.filter(pk=document.correspondent.pk).exists()
):
document.correspondent = None
if action.remove_all_document_types or (
document.document_type
and action.remove_document_types.filter(pk=document.document_type.pk).exists()
):
document.document_type = None
if action.remove_all_storage_paths or (
document.storage_path
and action.remove_storage_paths.filter(pk=document.storage_path.pk).exists()
):
document.storage_path = None
if action.remove_all_owners or (
document.owner and action.remove_owners.filter(pk=document.owner.pk).exists()
):
document.owner = None
if action.remove_all_permissions:
permissions = {
"view": {"users": [], "groups": []},
"change": {"users": [], "groups": []},
}
set_permissions_for_object(
permissions=permissions,
object=document,
merge=False,
)
if any(
[
action.has_remove_view_users,
action.has_remove_view_groups,
action.has_remove_change_users,
action.has_remove_change_groups,
],
):
for user in action.remove_view_users.all():
remove_perm("view_document", user, document)
for user in action.remove_change_users.all():
remove_perm("change_document", user, document)
for group in action.remove_view_groups.all():
remove_perm("view_document", group, document)
for group in action.remove_change_groups.all():
remove_perm("change_document", group, document)
if action.remove_all_custom_fields:
CustomFieldInstance.objects.filter(document=document).hard_delete()
elif action.has_remove_custom_fields:
CustomFieldInstance.objects.filter(
field__in=action.remove_custom_fields.all(),
document=document,
).hard_delete()
def apply_removal_to_overrides(
action: WorkflowAction,
overrides: DocumentMetadataOverrides,
):
"""
Apply removal actions to DocumentMetadataOverrides.
action: WorkflowAction, annotated with 'has_remove_*' boolean fields
"""
if action.remove_all_tags:
overrides.tag_ids = None
elif overrides.tag_ids:
tag_ids_to_remove: set[int] = set()
for tag in action.remove_tags.all():
tag_ids_to_remove.add(tag.pk)
tag_ids_to_remove.update(int(pk) for pk in tag.get_descendants_pks())
overrides.tag_ids = [t for t in overrides.tag_ids if t not in tag_ids_to_remove]
if action.remove_all_correspondents or (
overrides.correspondent_id
and action.remove_correspondents.filter(pk=overrides.correspondent_id).exists()
):
overrides.correspondent_id = None
if action.remove_all_document_types or (
overrides.document_type_id
and action.remove_document_types.filter(pk=overrides.document_type_id).exists()
):
overrides.document_type_id = None
if action.remove_all_storage_paths or (
overrides.storage_path_id
and action.remove_storage_paths.filter(pk=overrides.storage_path_id).exists()
):
overrides.storage_path_id = None
if action.remove_all_owners or (
overrides.owner_id
and action.remove_owners.filter(pk=overrides.owner_id).exists()
):
overrides.owner_id = None
if action.remove_all_permissions:
overrides.view_users = None
overrides.view_groups = None
overrides.change_users = None
overrides.change_groups = None
elif any(
[
action.has_remove_view_users,
action.has_remove_view_groups,
action.has_remove_change_users,
action.has_remove_change_groups,
],
):
if overrides.view_users:
for user in action.remove_view_users.filter(pk__in=overrides.view_users):
overrides.view_users.remove(user.pk)
if overrides.change_users:
for user in action.remove_change_users.filter(
pk__in=overrides.change_users,
):
overrides.change_users.remove(user.pk)
if overrides.view_groups:
for group in action.remove_view_groups.filter(pk__in=overrides.view_groups):
overrides.view_groups.remove(group.pk)
if overrides.change_groups:
for group in action.remove_change_groups.filter(
pk__in=overrides.change_groups,
):
overrides.change_groups.remove(group.pk)
if action.remove_all_custom_fields:
overrides.custom_fields = None
elif action.has_remove_custom_fields and overrides.custom_fields:
for field in action.remove_custom_fields.filter(
pk__in=overrides.custom_fields.keys(),
):
overrides.custom_fields.pop(field.pk, None)

View File

@@ -1,116 +0,0 @@
import logging
from django.db.models import Exists
from django.db.models import OuterRef
from django.db.models import Prefetch
from documents.models import Workflow
from documents.models import WorkflowAction
from documents.models import WorkflowTrigger
logger = logging.getLogger("paperless.workflows")
def get_workflows_for_trigger(
trigger_type: WorkflowTrigger.WorkflowTriggerType,
workflow_to_run: Workflow | None = None,
):
"""
Return workflows relevant to a trigger. If a specific workflow is given,
wrap it in a list; otherwise fetch enabled workflows for the trigger with
the prefetches used by the runner.
"""
if workflow_to_run is not None:
return [workflow_to_run]
annotated_actions = (
WorkflowAction.objects.select_related(
"assign_correspondent",
"assign_document_type",
"assign_storage_path",
"assign_owner",
"email",
"webhook",
)
.prefetch_related(
"assign_tags",
"assign_view_users",
"assign_view_groups",
"assign_change_users",
"assign_change_groups",
"assign_custom_fields",
"remove_tags",
"remove_correspondents",
"remove_document_types",
"remove_storage_paths",
"remove_custom_fields",
"remove_owners",
)
.annotate(
has_assign_tags=Exists(
WorkflowAction.assign_tags.through.objects.filter(
workflowaction_id=OuterRef("pk"),
),
),
has_assign_view_users=Exists(
WorkflowAction.assign_view_users.through.objects.filter(
workflowaction_id=OuterRef("pk"),
),
),
has_assign_view_groups=Exists(
WorkflowAction.assign_view_groups.through.objects.filter(
workflowaction_id=OuterRef("pk"),
),
),
has_assign_change_users=Exists(
WorkflowAction.assign_change_users.through.objects.filter(
workflowaction_id=OuterRef("pk"),
),
),
has_assign_change_groups=Exists(
WorkflowAction.assign_change_groups.through.objects.filter(
workflowaction_id=OuterRef("pk"),
),
),
has_assign_custom_fields=Exists(
WorkflowAction.assign_custom_fields.through.objects.filter(
workflowaction_id=OuterRef("pk"),
),
),
has_remove_view_users=Exists(
WorkflowAction.remove_view_users.through.objects.filter(
workflowaction_id=OuterRef("pk"),
),
),
has_remove_view_groups=Exists(
WorkflowAction.remove_view_groups.through.objects.filter(
workflowaction_id=OuterRef("pk"),
),
),
has_remove_change_users=Exists(
WorkflowAction.remove_change_users.through.objects.filter(
workflowaction_id=OuterRef("pk"),
),
),
has_remove_change_groups=Exists(
WorkflowAction.remove_change_groups.through.objects.filter(
workflowaction_id=OuterRef("pk"),
),
),
has_remove_custom_fields=Exists(
WorkflowAction.remove_custom_fields.through.objects.filter(
workflowaction_id=OuterRef("pk"),
),
),
)
)
return (
Workflow.objects.filter(enabled=True, triggers__type=trigger_type)
.prefetch_related(
Prefetch("actions", queryset=annotated_actions),
"triggers",
)
.order_by("order")
.distinct()
)

View File

@@ -1,96 +0,0 @@
import ipaddress
import logging
import socket
from urllib.parse import urlparse
import httpx
from celery import shared_task
from django.conf import settings
logger = logging.getLogger("paperless.workflows.webhooks")
def _is_public_ip(ip: str) -> bool:
try:
obj = ipaddress.ip_address(ip)
return not (
obj.is_private
or obj.is_loopback
or obj.is_link_local
or obj.is_multicast
or obj.is_unspecified
)
except ValueError: # pragma: no cover
return False
def _resolve_first_ip(host: str) -> str | None:
try:
info = socket.getaddrinfo(host, None)
return info[0][4][0] if info else None
except Exception: # pragma: no cover
return None
@shared_task(
retry_backoff=True,
autoretry_for=(httpx.HTTPStatusError,),
max_retries=3,
throws=(httpx.HTTPError,),
)
def send_webhook(
url: str,
data: str | dict,
headers: dict,
files: dict,
*,
as_json: bool = False,
):
p = urlparse(url)
if p.scheme.lower() not in settings.WEBHOOKS_ALLOWED_SCHEMES or not p.hostname:
logger.warning("Webhook blocked: invalid scheme/hostname")
raise ValueError("Invalid URL scheme or hostname.")
port = p.port or (443 if p.scheme == "https" else 80)
if (
len(settings.WEBHOOKS_ALLOWED_PORTS) > 0
and port not in settings.WEBHOOKS_ALLOWED_PORTS
):
logger.warning("Webhook blocked: port not permitted")
raise ValueError("Destination port not permitted.")
ip = _resolve_first_ip(p.hostname)
if not ip or (
not _is_public_ip(ip) and not settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS
):
logger.warning("Webhook blocked: destination not allowed")
raise ValueError("Destination host is not allowed.")
try:
post_args = {
"url": url,
"headers": {
k: v for k, v in (headers or {}).items() if k.lower() != "host"
},
"files": files or None,
"timeout": 5.0,
"follow_redirects": False,
}
if as_json:
post_args["json"] = data
elif isinstance(data, dict):
post_args["data"] = data
else:
post_args["content"] = data
httpx.post(
**post_args,
).raise_for_status()
logger.info(
f"Webhook sent to {url}",
)
except Exception as e:
logger.error(
f"Failed attempt sending webhook to {url}: {e}",
)
raise e

View File

@@ -2,7 +2,7 @@ msgid ""
msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-12-10 16:39+0000\n"
"POT-Creation-Date: 2025-11-14 16:09+0000\n"
"PO-Revision-Date: 2022-02-17 04:17\n"
"Last-Translator: \n"
"Language-Team: English\n"
@@ -1224,35 +1224,35 @@ msgstr ""
msgid "Invalid regular expression: %(error)s"
msgstr ""
#: documents/serialisers.py:622
#: documents/serialisers.py:619
msgid "Invalid color."
msgstr ""
#: documents/serialisers.py:1808
#: documents/serialisers.py:1805
#, python-format
msgid "File type %(type)s not supported"
msgstr ""
#: documents/serialisers.py:1852
#: documents/serialisers.py:1849
#, python-format
msgid "Custom field id must be an integer: %(id)s"
msgstr ""
#: documents/serialisers.py:1859
#: documents/serialisers.py:1856
#, python-format
msgid "Custom field with id %(id)s does not exist"
msgstr ""
#: documents/serialisers.py:1876 documents/serialisers.py:1886
#: documents/serialisers.py:1873 documents/serialisers.py:1883
msgid ""
"Custom fields must be a list of integers or an object mapping ids to values."
msgstr ""
#: documents/serialisers.py:1881
#: documents/serialisers.py:1878
msgid "Some custom fields don't exist or were specified twice."
msgstr ""
#: documents/serialisers.py:1996
#: documents/serialisers.py:1993
msgid "Invalid variable detected."
msgstr ""

View File

@@ -137,25 +137,3 @@ class CustomSocialAccountAdapter(DefaultSocialAccountAdapter):
user.save()
handle_social_account_updated(None, request, sociallogin)
return user
def on_authentication_error(
self,
request,
provider,
error=None,
exception=None,
extra_context=None,
):
"""
Just log errors and pass them along.
"""
logger.warning(
f"Social authentication error for provider `{provider!s}`: {error!s} ({exception!s})",
)
return super().on_authentication_error(
request,
provider,
error,
exception,
extra_context,
)

View File

@@ -322,7 +322,6 @@ INSTALLED_APPS = [
"paperless_tesseract.apps.PaperlessTesseractConfig",
"paperless_text.apps.PaperlessTextConfig",
"paperless_mail.apps.PaperlessMailConfig",
"paperless_remote.apps.PaperlessRemoteParserConfig",
"django.contrib.admin",
"rest_framework",
"rest_framework.authtoken",
@@ -1402,10 +1401,3 @@ WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
"PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
"true",
)
###############################################################################
# Remote Parser #
###############################################################################
REMOTE_OCR_ENGINE = os.getenv("PAPERLESS_REMOTE_OCR_ENGINE")
REMOTE_OCR_API_KEY = os.getenv("PAPERLESS_REMOTE_OCR_API_KEY")
REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT")

View File

@@ -167,17 +167,3 @@ class TestCustomSocialAccountAdapter(TestCase):
self.assertEqual(user.groups.count(), 1)
self.assertTrue(user.groups.filter(name="group1").exists())
self.assertFalse(user.groups.filter(name="group2").exists())
def test_error_logged_on_authentication_error(self):
adapter = get_social_adapter()
request = HttpRequest()
with self.assertLogs("paperless.auth", level="INFO") as log_cm:
adapter.on_authentication_error(
request,
provider="test-provider",
error="Error",
exception="Test authentication error",
)
self.assertTrue(
any("Test authentication error" in message for message in log_cm.output),
)

View File

@@ -1,4 +0,0 @@
# this is here so that django finds the checks.
from paperless_remote.checks import check_remote_parser_configured
__all__ = ["check_remote_parser_configured"]

View File

@@ -1,14 +0,0 @@
from django.apps import AppConfig
from paperless_remote.signals import remote_consumer_declaration
class PaperlessRemoteParserConfig(AppConfig):
name = "paperless_remote"
def ready(self):
from documents.signals import document_consumer_declaration
document_consumer_declaration.connect(remote_consumer_declaration)
AppConfig.ready(self)

View File

@@ -1,17 +0,0 @@
from django.conf import settings
from django.core.checks import Error
from django.core.checks import register
@register()
def check_remote_parser_configured(app_configs, **kwargs):
if settings.REMOTE_OCR_ENGINE == "azureai" and not (
settings.REMOTE_OCR_ENDPOINT and settings.REMOTE_OCR_API_KEY
):
return [
Error(
"Azure AI remote parser requires endpoint and API key to be configured.",
),
]
return []

View File

@@ -1,118 +0,0 @@
from pathlib import Path
from django.conf import settings
from paperless_tesseract.parsers import RasterisedDocumentParser
class RemoteEngineConfig:
def __init__(
self,
engine: str,
api_key: str | None = None,
endpoint: str | None = None,
):
self.engine = engine
self.api_key = api_key
self.endpoint = endpoint
def engine_is_valid(self):
valid = self.engine in ["azureai"] and self.api_key is not None
if self.engine == "azureai":
valid = valid and self.endpoint is not None
return valid
class RemoteDocumentParser(RasterisedDocumentParser):
"""
This parser uses a remote OCR engine to parse documents. Currently, it supports Azure AI Vision
as this is the only service that provides a remote OCR API with text-embedded PDF output.
"""
logging_name = "paperless.parsing.remote"
def get_settings(self) -> RemoteEngineConfig:
"""
Returns the configuration for the remote OCR engine, loaded from Django settings.
"""
return RemoteEngineConfig(
engine=settings.REMOTE_OCR_ENGINE,
api_key=settings.REMOTE_OCR_API_KEY,
endpoint=settings.REMOTE_OCR_ENDPOINT,
)
def supported_mime_types(self):
if self.settings.engine_is_valid():
return {
"application/pdf": ".pdf",
"image/png": ".png",
"image/jpeg": ".jpg",
"image/tiff": ".tiff",
"image/bmp": ".bmp",
"image/gif": ".gif",
"image/webp": ".webp",
}
else:
return {}
def azure_ai_vision_parse(
self,
file: Path,
) -> str | None:
"""
Uses Azure AI Vision to parse the document and return the text content.
It requests a searchable PDF output with embedded text.
The PDF is saved to the archive_path attribute.
Returns the text content extracted from the document.
If the parsing fails, it returns None.
"""
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
from azure.ai.documentintelligence.models import AnalyzeOutputOption
from azure.ai.documentintelligence.models import DocumentContentFormat
from azure.core.credentials import AzureKeyCredential
client = DocumentIntelligenceClient(
endpoint=self.settings.endpoint,
credential=AzureKeyCredential(self.settings.api_key),
)
try:
with file.open("rb") as f:
analyze_request = AnalyzeDocumentRequest(bytes_source=f.read())
poller = client.begin_analyze_document(
model_id="prebuilt-read",
body=analyze_request,
output_content_format=DocumentContentFormat.TEXT,
output=[AnalyzeOutputOption.PDF], # request searchable PDF output
content_type="application/json",
)
poller.wait()
result_id = poller.details["operation_id"]
result = poller.result()
# Download the PDF with embedded text
self.archive_path = self.tempdir / "archive.pdf"
with self.archive_path.open("wb") as f:
for chunk in client.get_analyze_result_pdf(
model_id="prebuilt-read",
result_id=result_id,
):
f.write(chunk)
return result.content
except Exception as e:
self.log.error(f"Azure AI Vision parsing failed: {e}")
finally:
client.close()
return None
def parse(self, document_path: Path, mime_type, file_name=None):
if not self.settings.engine_is_valid():
self.log.warning(
"No valid remote parser engine is configured, content will be empty.",
)
self.text = ""
elif self.settings.engine == "azureai":
self.text = self.azure_ai_vision_parse(document_path)

View File

@@ -1,18 +0,0 @@
def get_parser(*args, **kwargs):
from paperless_remote.parsers import RemoteDocumentParser
return RemoteDocumentParser(*args, **kwargs)
def get_supported_mime_types():
from paperless_remote.parsers import RemoteDocumentParser
return RemoteDocumentParser(None).supported_mime_types()
def remote_consumer_declaration(sender, **kwargs):
return {
"parser": get_parser,
"weight": 5,
"mime_types": get_supported_mime_types(),
}

View File

@@ -1,24 +0,0 @@
from unittest import TestCase
from django.test import override_settings
from paperless_remote import check_remote_parser_configured
class TestChecks(TestCase):
@override_settings(REMOTE_OCR_ENGINE=None)
def test_no_engine(self):
msgs = check_remote_parser_configured(None)
self.assertEqual(len(msgs), 0)
@override_settings(REMOTE_OCR_ENGINE="azureai")
@override_settings(REMOTE_OCR_API_KEY="somekey")
@override_settings(REMOTE_OCR_ENDPOINT=None)
def test_azure_no_endpoint(self):
msgs = check_remote_parser_configured(None)
self.assertEqual(len(msgs), 1)
self.assertTrue(
msgs[0].msg.startswith(
"Azure AI remote parser requires endpoint and API key to be configured.",
),
)

View File

@@ -1,128 +0,0 @@
import uuid
from pathlib import Path
from unittest import mock
from django.test import TestCase
from django.test import override_settings
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from paperless_remote.parsers import RemoteDocumentParser
from paperless_remote.signals import get_parser
class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
SAMPLE_FILES = Path(__file__).resolve().parent / "samples"
def assertContainsStrings(self, content: str, strings: list[str]):
# Asserts that all strings appear in content, in the given order.
indices = []
for s in strings:
if s in content:
indices.append(content.index(s))
else:
self.fail(f"'{s}' is not in '{content}'")
self.assertListEqual(indices, sorted(indices))
@mock.patch("paperless_tesseract.parsers.run_subprocess")
@mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
def test_get_text_with_azure(self, mock_client_cls, mock_subprocess):
# Arrange mock Azure client
mock_client = mock.Mock()
mock_client_cls.return_value = mock_client
# Simulate poller result and its `.details`
mock_poller = mock.Mock()
mock_poller.wait.return_value = None
mock_poller.details = {"operation_id": "fake-op-id"}
mock_client.begin_analyze_document.return_value = mock_poller
mock_poller.result.return_value.content = "This is a test document."
# Return dummy PDF bytes
mock_client.get_analyze_result_pdf.return_value = [
b"%PDF-",
b"1.7 ",
b"FAKEPDF",
]
# Simulate pdftotext by writing dummy text to sidecar file
def fake_run(cmd, *args, **kwargs):
with Path(cmd[-1]).open("w", encoding="utf-8") as f:
f.write("This is a test document.")
mock_subprocess.side_effect = fake_run
with override_settings(
REMOTE_OCR_ENGINE="azureai",
REMOTE_OCR_API_KEY="somekey",
REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
):
parser = get_parser(uuid.uuid4())
parser.parse(
self.SAMPLE_FILES / "simple-digital.pdf",
"application/pdf",
)
self.assertContainsStrings(
parser.text.strip(),
["This is a test document."],
)
@mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
def test_get_text_with_azure_error_logged_and_returns_none(self, mock_client_cls):
mock_client = mock.Mock()
mock_client.begin_analyze_document.side_effect = RuntimeError("fail")
mock_client_cls.return_value = mock_client
with override_settings(
REMOTE_OCR_ENGINE="azureai",
REMOTE_OCR_API_KEY="somekey",
REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
):
parser = get_parser(uuid.uuid4())
with mock.patch.object(parser.log, "error") as mock_log_error:
parser.parse(
self.SAMPLE_FILES / "simple-digital.pdf",
"application/pdf",
)
self.assertIsNone(parser.text)
mock_client.begin_analyze_document.assert_called_once()
mock_client.close.assert_called_once()
mock_log_error.assert_called_once()
self.assertIn(
"Azure AI Vision parsing failed",
mock_log_error.call_args[0][0],
)
@override_settings(
REMOTE_OCR_ENGINE="azureai",
REMOTE_OCR_API_KEY="key",
REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
)
def test_supported_mime_types_valid_config(self):
parser = RemoteDocumentParser(uuid.uuid4())
expected_types = {
"application/pdf": ".pdf",
"image/png": ".png",
"image/jpeg": ".jpg",
"image/tiff": ".tiff",
"image/bmp": ".bmp",
"image/gif": ".gif",
"image/webp": ".webp",
}
self.assertEqual(parser.supported_mime_types(), expected_types)
def test_supported_mime_types_invalid_config(self):
parser = get_parser(uuid.uuid4())
self.assertEqual(parser.supported_mime_types(), {})
@override_settings(
REMOTE_OCR_ENGINE=None,
REMOTE_OCR_API_KEY=None,
REMOTE_OCR_ENDPOINT=None,
)
def test_parse_with_invalid_config(self):
parser = get_parser(uuid.uuid4())
parser.parse(self.SAMPLE_FILES / "simple-digital.pdf", "application/pdf")
self.assertEqual(parser.text, "")

39
uv.lock generated
View File

@@ -95,34 +95,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/02/ff/1175b0b7371e46244032d43a56862d0af455823b5280a50c63d99cc50f18/automat-25.4.16-py3-none-any.whl", hash = "sha256:04e9bce696a8d5671ee698005af6e5a9fa15354140a87f4870744604dcdd3ba1", size = 42842, upload-time = "2025-04-16T20:12:14.447Z" },
]
[[package]]
name = "azure-ai-documentintelligence"
version = "1.0.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "azure-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "isodate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/44/7b/8115cd713e2caa5e44def85f2b7ebd02a74ae74d7113ba20bdd41fd6dd80/azure_ai_documentintelligence-1.0.2.tar.gz", hash = "sha256:4d75a2513f2839365ebabc0e0e1772f5601b3a8c9a71e75da12440da13b63484", size = 170940 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d9/75/c9ec040f23082f54ffb1977ff8f364c2d21c79a640a13d1c1809e7fd6b1a/azure_ai_documentintelligence-1.0.2-py3-none-any.whl", hash = "sha256:e1fb446abbdeccc9759d897898a0fe13141ed29f9ad11fc705f951925822ed59", size = 106005 },
]
[[package]]
name = "azure-core"
version = "1.33.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/75/aa/7c9db8edd626f1a7d99d09ef7926f6f4fb34d5f9fa00dc394afdfe8e2a80/azure_core-1.33.0.tar.gz", hash = "sha256:f367aa07b5e3005fec2c1e184b882b0b039910733907d001c20fb08ebb8c0eb9", size = 295633 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/07/b7/76b7e144aa53bd206bf1ce34fa75350472c3f69bf30e5c8c18bc9881035d/azure_core-1.33.0-py3-none-any.whl", hash = "sha256:9b5b6d0223a1d38c37500e6971118c1e0f13f54951e6893968b38910bc9cda8f", size = 207071 },
]
[[package]]
name = "babel"
version = "2.17.0"
@@ -1479,15 +1451,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c7/fc/4e5a141c3f7c7bed550ac1f69e599e92b6be449dd4677ec09f325cad0955/inotifyrecursive-0.3.5-py3-none-any.whl", hash = "sha256:7e5f4a2e1dc2bef0efa3b5f6b339c41fb4599055a2b54909d020e9e932cc8d2f", size = 8009, upload-time = "2020-11-20T12:38:46.981Z" },
]
[[package]]
name = "isodate"
version = "0.7.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 },
]
[[package]]
name = "jinja2"
version = "3.1.6"
@@ -2155,7 +2118,6 @@ name = "paperless-ngx"
version = "2.20.1"
source = { virtual = "." }
dependencies = [
{ name = "azure-ai-documentintelligence", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "babel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "bleach", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "celery", extra = ["redis"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2292,7 +2254,6 @@ typing = [
[package.metadata]
requires-dist = [
{ name = "azure-ai-documentintelligence", specifier = ">=1.0.2" },
{ name = "babel", specifier = ">=2.17" },
{ name = "bleach", specifier = "~=6.3.0" },
{ name = "celery", extras = ["redis"], specifier = "~=5.5.1" },