Compare commits

..

63 Commits

Author SHA1 Message Date
shamoon
9eb81d5458 Merge branch 'dev' into feature-remote-ocr-2 2025-12-07 20:37:56 -08:00
shamoon
6a5ea49715 Merge branch 'dev' into feature-remote-ocr-2 2025-11-22 13:18:50 -08:00
shamoon
7d2fe630a5 Merge branch 'dev' into feature-remote-ocr-2 2025-11-19 23:49:11 -08:00
shamoon
c29dd5485b Merge branch 'dev' into feature-remote-ocr-2 2025-11-18 12:08:38 -08:00
shamoon
cef100a955 Wrap in try/catch 2025-11-18 12:07:16 -08:00
shamoon
4f53d1b6ee Merge branch 'dev' into feature-remote-ocr-2 2025-11-17 20:54:37 -08:00
shamoon
23cea77548 Merge branch 'dev' into feature-remote-ocr-2 2025-11-17 18:49:01 -08:00
shamoon
4900af93c6 Merge branch 'dev' into feature-remote-ocr-2 2025-11-15 13:49:39 -08:00
shamoon
ef834ae808 Merge branch 'dev' into feature-remote-ocr-2 2025-11-13 15:45:08 -08:00
shamoon
0537e87cb5 Merge branch 'dev' into feature-remote-ocr-2 2025-11-06 11:46:02 -08:00
shamoon
b4da5c3cd1 Merge branch 'dev' into feature-remote-ocr-2 2025-11-04 16:24:26 -08:00
shamoon
251b0fb3d6 Merge branch 'dev' into feature-remote-ocr-2 2025-11-04 08:24:02 -08:00
shamoon
32bdf11f7f Merge branch 'dev' into feature-remote-ocr-2 2025-11-02 08:14:04 -08:00
shamoon
0627ca69f5 Merge branch 'dev' into feature-remote-ocr-2 2025-10-29 11:13:53 -07:00
shamoon
f5525bbdff Merge branch 'dev' into feature-remote-ocr-2 2025-10-27 21:22:42 -07:00
shamoon
a21a2a41a8 Merge branch 'dev' into feature-remote-ocr-2 2025-10-26 07:41:51 -07:00
shamoon
cc73ed8b86 Merge branch 'dev' into feature-remote-ocr-2 2025-10-24 16:48:07 -07:00
shamoon
0c706b2316 Merge branch 'dev' into feature-remote-ocr-2 2025-10-23 16:38:35 -07:00
shamoon
85b7b6874d Merge branch 'dev' into feature-remote-ocr-2 2025-10-22 21:53:07 -07:00
shamoon
56b26185fa Merge branch 'dev' into feature-remote-ocr-2 2025-10-21 08:23:20 -07:00
shamoon
6537fade7b Merge branch 'dev' into feature-remote-ocr-2 2025-10-15 16:04:02 -07:00
shamoon
9f8090816f Merge branch 'dev' into feature-remote-ocr-2 2025-10-09 12:54:58 -07:00
shamoon
1de7c52478 Merge branch 'dev' into feature-remote-ocr-2 2025-10-01 19:24:38 -07:00
shamoon
9aaaa6f069 Merge branch 'dev' into feature-remote-ocr-2 2025-09-30 09:14:56 -07:00
shamoon
c3a20b7797 Merge branch 'dev' into feature-remote-ocr-2 2025-09-28 15:06:37 -07:00
shamoon
476556379b Merge branch 'dev' into feature-remote-ocr-2 2025-09-24 13:46:49 -07:00
shamoon
e5cafff043 Merge branch 'dev' into feature-remote-ocr-2 2025-09-22 13:42:55 -07:00
shamoon
8e0d574e99 Merge branch 'dev' into feature-remote-ocr-2 2025-09-21 16:18:13 -07:00
shamoon
8a5820328e Sonar suggestions 2025-09-17 19:18:47 -07:00
shamoon
809d62a2f4 Merge branch 'dev' into feature-remote-ocr-2 2025-09-17 16:51:23 -07:00
shamoon
0d87f94b9b Merge branch 'dev' into feature-remote-ocr-2 2025-09-14 14:01:35 -07:00
shamoon
315b90f8e5 Add typing to assertContainsStrings test util 2025-09-11 13:56:14 -07:00
shamoon
47b2d2964b Use regular testcase instead of django, config check test 2025-09-11 13:52:10 -07:00
shamoon
e05639ae4e tempdir already a path 2025-09-11 13:49:30 -07:00
shamoon
f400a8cb2f Close client 2025-09-11 13:49:06 -07:00
shamoon
26abcf5612 Also ensure API key is set 2025-09-11 13:48:06 -07:00
shamoon
afde52430d Merge branch 'dev' into feature-remote-ocr-2 2025-09-11 13:25:53 -07:00
shamoon
716f2da652 Merge branch 'dev' into feature-remote-ocr-2 2025-09-08 11:36:49 -07:00
shamoon
c54073b7c2 Merge branch 'dev' into feature-remote-ocr-2 2025-09-04 09:16:59 -07:00
shamoon
247e6f39dc Merge branch 'dev' into feature-remote-ocr-2 2025-09-01 20:10:40 -07:00
shamoon
1e6dfc4481 Merge branch 'dev' into feature-remote-ocr-2 2025-08-26 13:30:39 -07:00
shamoon
7cc0750066 Add note on costs and limitations for Azure OCR 2025-08-24 05:47:07 -07:00
shamoon
bd6585d3b4 Merge branch 'dev' into feature-remote-ocr-2 2025-08-22 08:54:26 -07:00
shamoon
717e828a1d Merge branch 'dev' into feature-remote-ocr-2 2025-08-17 21:25:14 -07:00
shamoon
07381d48e6 Merge branch 'dev' into feature-remote-ocr-2 2025-08-17 07:49:58 -07:00
shamoon
dd0ffaf312 Merge branch 'dev' into feature-remote-ocr-2 2025-08-11 10:48:36 -07:00
shamoon
264504affc Fix consumer declaration file extensions 2025-08-10 05:32:52 -07:00
shamoon
4feedf2add Merge branch 'dev' into feature-remote-ocr-2 2025-08-06 16:04:25 -04:00
shamoon
2f76cf9831 Merge branch 'dev' into feature-remote-ocr-2 2025-08-01 23:55:49 -04:00
shamoon
1002d37f6b Update test_parser.py 2025-07-09 11:05:37 -07:00
shamoon
d260a94740 Update parsers.py 2025-07-09 11:02:57 -07:00
shamoon
88c69b83ea Update index.md 2025-07-09 11:00:12 -07:00
shamoon
2557ee2014 Update docs to mention remote OCR with Azure AI 2025-07-09 09:53:30 -07:00
shamoon
3c75deed80 Add paperless_remote tests to testpaths 2025-07-08 14:19:45 -07:00
shamoon
d05343c927 Test fixes / coverage 2025-07-08 14:19:45 -07:00
shamoon
e7972b7eaf Coverage 2025-07-08 14:19:45 -07:00
shamoon
75a091cc0d Fix test 2025-07-08 14:19:44 -07:00
shamoon
dca74803fd Use output_content_format poller.result to get clean content 2025-07-08 14:19:44 -07:00
shamoon
3cf3d868d0 Some docs 2025-07-08 14:19:43 -07:00
shamoon
bf4fc6604a Test 2025-07-08 14:19:43 -07:00
shamoon
e8c1eb86fa This actually works
[ci skip]
2025-07-08 14:19:43 -07:00
shamoon
c3dad3cf69 Basic parse 2025-07-08 14:19:42 -07:00
shamoon
811bd66088 Ok, restart implementing this with just azure
[ci skip]
2025-07-08 14:19:42 -07:00
30 changed files with 1473 additions and 1170 deletions

View File

@@ -67,7 +67,7 @@ jobs:
runs-on: ubuntu-24.04
steps:
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Install python
uses: actions/setup-python@v6
with:
@@ -81,7 +81,7 @@ jobs:
- pre-commit
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Set up Python
id: setup-python
uses: actions/setup-python@v6
@@ -131,7 +131,7 @@ jobs:
fail-fast: false
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Start containers
run: |
docker compose --file ${{ github.workspace }}/docker/compose/docker-compose.ci-test.yml pull --quiet
@@ -202,7 +202,7 @@ jobs:
needs:
- pre-commit
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
@@ -235,7 +235,7 @@ jobs:
shard-index: [1, 2, 3, 4]
shard-count: [4]
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
@@ -284,7 +284,7 @@ jobs:
shard-index: [1, 2]
shard-count: [2]
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
@@ -327,7 +327,7 @@ jobs:
- tests-frontend
- tests-frontend-e2e
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
@@ -424,7 +424,7 @@ jobs:
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
# If https://github.com/docker/buildx/issues/1044 is resolved,
# the append input with a native arm64 arch could be used to
# significantly speed up building
@@ -497,7 +497,7 @@ jobs:
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Set up Python
id: setup-python
uses: actions/setup-python@v6
@@ -643,7 +643,7 @@ jobs:
if: needs.publish-release.outputs.prerelease == 'false'
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
ref: main
- name: Set up Python

View File

@@ -34,7 +34,7 @@ jobs:
# Learn more about CodeQL language support at https://git.io/codeql-language-support
steps:
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@v5
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v4

View File

@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
token: ${{ secrets.PNGX_BOT_PAT }}
- name: crowdin action

View File

@@ -11,7 +11,7 @@ jobs:
contents: write
steps:
- name: Checkout code
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
token: ${{ secrets.PNGX_BOT_PAT }}
ref: ${{ github.head_ref }}

View File

@@ -1794,3 +1794,23 @@ password. All of these options come from their similarly-named [Django settings]
#### [`PAPERLESS_EMAIL_USE_SSL=<bool>`](#PAPERLESS_EMAIL_USE_SSL) {#PAPERLESS_EMAIL_USE_SSL}
: Defaults to false.
## Remote OCR
#### [`PAPERLESS_REMOTE_OCR_ENGINE=<str>`](#PAPERLESS_REMOTE_OCR_ENGINE) {#PAPERLESS_REMOTE_OCR_ENGINE}
: The remote OCR engine to use. Currently only Azure AI is supported as "azureai".
Defaults to None, which disables remote OCR.
#### [`PAPERLESS_REMOTE_OCR_API_KEY=<str>`](#PAPERLESS_REMOTE_OCR_API_KEY) {#PAPERLESS_REMOTE_OCR_API_KEY}
: The API key to use for the remote OCR engine.
Defaults to None.
#### [`PAPERLESS_REMOTE_OCR_ENDPOINT=<str>`](#PAPERLESS_REMOTE_OCR_ENDPOINT) {#PAPERLESS_REMOTE_OCR_ENDPOINT}
: The endpoint to use for the remote OCR engine. This is required for Azure AI.
Defaults to None.

View File

@@ -25,9 +25,10 @@ physical documents into a searchable online archive so you can keep, well, _less
## Features
- **Organize and index** your scanned documents with tags, correspondents, types, and more.
- _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way.
- _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way, unless you explicitly choose to do so.
- Performs **OCR** on your documents, adding searchable and selectable text, even to documents scanned with only images.
- Utilizes the open-source Tesseract engine to recognize more than 100 languages.
- Utilizes the open-source Tesseract engine to recognize more than 100 languages.
- _New!_ Supports remote OCR with Azure AI (opt-in).
- Documents are saved as PDF/A format which is designed for long term storage, alongside the unaltered originals.
- Uses machine-learning to automatically add tags, correspondents and document types to your documents.
- Supports PDF documents, images, plain text files, Office documents (Word, Excel, PowerPoint, and LibreOffice equivalents)[^1] and more.

View File

@@ -892,6 +892,21 @@ how regularly you intend to scan documents and use paperless.
performed the task associated with the document, move it to the
inbox.
## Remote OCR
!!! important
This feature is disabled by default and will always remain strictly "opt-in".
Paperless-ngx supports performing OCR on documents using remote services. At the moment, this is limited to
[Microsoft's Azure "Document Intelligence" service](https://azure.microsoft.com/en-us/products/ai-services/ai-document-intelligence).
This is of course a paid service (with a free tier) which requires an Azure account and subscription. Azure AI is not affiliated with
Paperless-ngx in any way. When enabled, Paperless-ngx will automatically send appropriate documents to Azure for OCR processing, bypassing
the local OCR engine. See the [configuration](configuration.md#PAPERLESS_REMOTE_OCR_ENGINE) options for more details.
Additionally, when using a commercial service with this feature, consider both potential costs as well as any associated file size
or page limitations (e.g. with a free tier).
## Architecture
Paperless-ngx consists of the following components:

View File

@@ -16,6 +16,7 @@ classifiers = [
# This will allow testing to not install a webserver, mysql, etc
dependencies = [
"azure-ai-documentintelligence>=1.0.2",
"babel>=2.17",
"bleach~=6.3.0",
"celery[redis]~=5.5.1",
@@ -252,6 +253,7 @@ testpaths = [
"src/paperless_tesseract/tests/",
"src/paperless_tika/tests",
"src/paperless_text/tests/",
"src/paperless_remote/tests/",
]
addopts = [
"--pythonwarnings=all",

View File

@@ -5,14 +5,14 @@
<trans-unit id="ngb.alert.close" datatype="html">
<source>Close</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/alert/alert.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/alert/alert.ts</context>
<context context-type="linenumber">50</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.carousel.slide-number" datatype="html">
<source> Slide <x id="INTERPOLATION" equiv-text="ueryList&lt;NgbSli"/> of <x id="INTERPOLATION_1" equiv-text="EventSource = N"/> </source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/carousel/carousel.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/carousel/carousel.ts</context>
<context context-type="linenumber">131,135</context>
</context-group>
<note priority="1" from="description">Currently selected slide number read by screen reader</note>
@@ -20,212 +20,212 @@
<trans-unit id="ngb.carousel.previous" datatype="html">
<source>Previous</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/carousel/carousel.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/carousel/carousel.ts</context>
<context context-type="linenumber">157,159</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.carousel.next" datatype="html">
<source>Next</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/carousel/carousel.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/carousel/carousel.ts</context>
<context context-type="linenumber">198</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.datepicker.previous-month" datatype="html">
<source>Previous month</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="linenumber">83,85</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="linenumber">112</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.datepicker.next-month" datatype="html">
<source>Next month</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="linenumber">112</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/datepicker/datepicker-navigation.ts</context>
<context context-type="linenumber">112</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.HH" datatype="html">
<source>HH</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.toast.close-aria" datatype="html">
<source>Close</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.datepicker.select-month" datatype="html">
<source>Select month</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.first" datatype="html">
<source>««</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.hours" datatype="html">
<source>Hours</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.previous" datatype="html">
<source>«</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.MM" datatype="html">
<source>MM</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.next" datatype="html">
<source>»</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.datepicker.select-year" datatype="html">
<source>Select year</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.minutes" datatype="html">
<source>Minutes</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.last" datatype="html">
<source>»»</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.first-aria" datatype="html">
<source>First</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.increment-hours" datatype="html">
<source>Increment hours</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.previous-aria" datatype="html">
<source>Previous</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.decrement-hours" datatype="html">
<source>Decrement hours</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.next-aria" datatype="html">
<source>Next</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.increment-minutes" datatype="html">
<source>Increment minutes</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.pagination.last-aria" datatype="html">
<source>Last</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.decrement-minutes" datatype="html">
<source>Decrement minutes</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.SS" datatype="html">
<source>SS</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.seconds" datatype="html">
<source>Seconds</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.increment-seconds" datatype="html">
<source>Increment seconds</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.decrement-seconds" datatype="html">
<source>Decrement seconds</source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
<trans-unit id="ngb.timepicker.PM" datatype="html">
<source><x id="INTERPOLATION"/></source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/ngb-config.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/ngb-config.ts</context>
<context context-type="linenumber">13</context>
</context-group>
</trans-unit>
@@ -233,7 +233,7 @@
<source><x id="INTERPOLATION" equiv-text="barConfig);
pu"/></source>
<context-group purpose="location">
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.15_@angular+core@20.3.15_@angula_40533c760dbaadbd90323f0d78d15fb8/node_modules/src/progressbar/progressbar.ts</context>
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@19.0.1_@angular+common@20.3.14_@angular+core@20.3.12_@angula_f6978d5a33be250eb7b5e8e65faf7a7d/node_modules/src/progressbar/progressbar.ts</context>
<context context-type="linenumber">41,42</context>
</context-group>
</trans-unit>

View File

@@ -12,14 +12,14 @@
"private": true,
"dependencies": {
"@angular/cdk": "^20.2.13",
"@angular/common": "~20.3.15",
"@angular/compiler": "~20.3.15",
"@angular/core": "~20.3.15",
"@angular/forms": "~20.3.15",
"@angular/localize": "~20.3.15",
"@angular/platform-browser": "~20.3.15",
"@angular/platform-browser-dynamic": "~20.3.15",
"@angular/router": "~20.3.15",
"@angular/common": "~20.3.14",
"@angular/compiler": "~20.3.12",
"@angular/core": "~20.3.12",
"@angular/forms": "~20.3.12",
"@angular/localize": "~20.3.12",
"@angular/platform-browser": "~20.3.12",
"@angular/platform-browser-dynamic": "~20.3.12",
"@angular/router": "~20.3.12",
"@ng-bootstrap/ng-bootstrap": "^19.0.1",
"@ng-select/ng-select": "^20.7.0",
"@ngneat/dirty-check-forms": "^3.0.3",
@@ -42,16 +42,16 @@
"devDependencies": {
"@angular-builders/custom-webpack": "^20.0.0",
"@angular-builders/jest": "^20.0.0",
"@angular-devkit/core": "^20.3.13",
"@angular-devkit/schematics": "^20.3.13",
"@angular-devkit/core": "^20.3.10",
"@angular-devkit/schematics": "^20.3.10",
"@angular-eslint/builder": "20.6.0",
"@angular-eslint/eslint-plugin": "20.6.0",
"@angular-eslint/eslint-plugin-template": "20.6.0",
"@angular-eslint/schematics": "20.6.0",
"@angular-eslint/template-parser": "20.6.0",
"@angular/build": "^20.3.13",
"@angular/cli": "~20.3.13",
"@angular/compiler-cli": "~20.3.15",
"@angular/build": "^20.3.10",
"@angular/cli": "~20.3.10",
"@angular/compiler-cli": "~20.3.12",
"@codecov/webpack-plugin": "^1.9.1",
"@playwright/test": "^1.57.0",
"@types/jest": "^30.0.0",

579
src-ui/pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -61,22 +61,21 @@ def get_groups_with_only_permission(obj, codename):
return Group.objects.filter(id__in=group_object_perm_group_ids).distinct()
def set_permissions_for_object(permissions: dict, object, *, merge: bool = False):
def set_permissions_for_object(permissions: list[str], object, *, merge: bool = False):
"""
Set permissions for an object. The permissions are given as a mapping of actions
to a dict of user / group id lists, e.g.
{"view": {"users": [1], "groups": [2]}, "change": {"users": [], "groups": []}}.
Set permissions for an object. The permissions are given as a list of strings
in the format "action_modelname", e.g. "view_document".
If merge is True, the permissions are merged with the existing permissions and
no users or groups are removed. If False, the permissions are set to exactly
the given list of users and groups.
"""
for action, entry in permissions.items():
for action in permissions:
permission = f"{action}_{object.__class__.__name__.lower()}"
if "users" in entry:
if "users" in permissions[action]:
# users
users_to_add = User.objects.filter(id__in=entry["users"])
users_to_add = User.objects.filter(id__in=permissions[action]["users"])
users_to_remove = (
get_users_with_perms(
object,
@@ -86,12 +85,12 @@ def set_permissions_for_object(permissions: dict, object, *, merge: bool = False
if not merge
else User.objects.none()
)
if users_to_add.exists() and users_to_remove.exists():
if len(users_to_add) > 0 and len(users_to_remove) > 0:
users_to_remove = users_to_remove.exclude(id__in=users_to_add)
if users_to_remove.exists():
if len(users_to_remove) > 0:
for user in users_to_remove:
remove_perm(permission, user, object)
if users_to_add.exists():
if len(users_to_add) > 0:
for user in users_to_add:
assign_perm(permission, user, object)
if action == "change":
@@ -101,9 +100,9 @@ def set_permissions_for_object(permissions: dict, object, *, merge: bool = False
user,
object,
)
if "groups" in entry:
if "groups" in permissions[action]:
# groups
groups_to_add = Group.objects.filter(id__in=entry["groups"])
groups_to_add = Group.objects.filter(id__in=permissions[action]["groups"])
groups_to_remove = (
get_groups_with_only_permission(
object,
@@ -112,12 +111,12 @@ def set_permissions_for_object(permissions: dict, object, *, merge: bool = False
if not merge
else Group.objects.none()
)
if groups_to_add.exists() and groups_to_remove.exists():
if len(groups_to_add) > 0 and len(groups_to_remove) > 0:
groups_to_remove = groups_to_remove.exclude(id__in=groups_to_add)
if groups_to_remove.exists():
if len(groups_to_remove) > 0:
for group in groups_to_remove:
remove_perm(permission, group, object)
if groups_to_add.exists():
if len(groups_to_add) > 0:
for group in groups_to_add:
assign_perm(permission, group, object)
if action == "change":

View File

@@ -1,10 +1,14 @@
from __future__ import annotations
import ipaddress
import logging
import shutil
import socket
from pathlib import Path
from typing import TYPE_CHECKING
from urllib.parse import urlparse
import httpx
from celery import shared_task
from celery import states
from celery.signals import before_task_publish
@@ -23,15 +27,20 @@ from django.db.models import Q
from django.dispatch import receiver
from django.utils import timezone
from filelock import FileLock
from guardian.shortcuts import remove_perm
from documents import matching
from documents.caching import clear_document_caches
from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_unique_filename
from documents.mail import EmailAttachment
from documents.mail import send_email
from documents.models import Correspondent
from documents.models import CustomField
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import PaperlessTask
from documents.models import SavedView
@@ -42,14 +51,8 @@ from documents.models import WorkflowAction
from documents.models import WorkflowRun
from documents.models import WorkflowTrigger
from documents.permissions import get_objects_for_user_owner_aware
from documents.workflows.actions import build_workflow_action_context
from documents.workflows.actions import execute_email_action
from documents.workflows.actions import execute_webhook_action
from documents.workflows.mutations import apply_assignment_to_document
from documents.workflows.mutations import apply_assignment_to_overrides
from documents.workflows.mutations import apply_removal_to_document
from documents.workflows.mutations import apply_removal_to_overrides
from documents.workflows.utils import get_workflows_for_trigger
from documents.permissions import set_permissions_for_object
from documents.templating.workflows import parse_w_workflow_placeholders
if TYPE_CHECKING:
from documents.classifier import DocumentClassifier
@@ -670,6 +673,92 @@ def run_workflows_updated(sender, document: Document, logging_group=None, **kwar
)
def _is_public_ip(ip: str) -> bool:
try:
obj = ipaddress.ip_address(ip)
return not (
obj.is_private
or obj.is_loopback
or obj.is_link_local
or obj.is_multicast
or obj.is_unspecified
)
except ValueError: # pragma: no cover
return False
def _resolve_first_ip(host: str) -> str | None:
try:
info = socket.getaddrinfo(host, None)
return info[0][4][0] if info else None
except Exception: # pragma: no cover
return None
@shared_task(
retry_backoff=True,
autoretry_for=(httpx.HTTPStatusError,),
max_retries=3,
throws=(httpx.HTTPError,),
)
def send_webhook(
url: str,
data: str | dict,
headers: dict,
files: dict,
*,
as_json: bool = False,
):
p = urlparse(url)
if p.scheme.lower() not in settings.WEBHOOKS_ALLOWED_SCHEMES or not p.hostname:
logger.warning("Webhook blocked: invalid scheme/hostname")
raise ValueError("Invalid URL scheme or hostname.")
port = p.port or (443 if p.scheme == "https" else 80)
if (
len(settings.WEBHOOKS_ALLOWED_PORTS) > 0
and port not in settings.WEBHOOKS_ALLOWED_PORTS
):
logger.warning("Webhook blocked: port not permitted")
raise ValueError("Destination port not permitted.")
ip = _resolve_first_ip(p.hostname)
if not ip or (
not _is_public_ip(ip) and not settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS
):
logger.warning("Webhook blocked: destination not allowed")
raise ValueError("Destination host is not allowed.")
try:
post_args = {
"url": url,
"headers": {
k: v for k, v in (headers or {}).items() if k.lower() != "host"
},
"files": files or None,
"timeout": 5.0,
"follow_redirects": False,
}
if as_json:
post_args["json"] = data
elif isinstance(data, dict):
post_args["data"] = data
else:
post_args["content"] = data
httpx.post(
**post_args,
).raise_for_status()
logger.info(
f"Webhook sent to {url}",
)
except Exception as e:
logger.error(
f"Failed attempt sending webhook to {url}: {e}",
)
raise e
def run_workflows(
trigger_type: WorkflowTrigger.WorkflowTriggerType,
document: Document | ConsumableDocument,
@@ -678,17 +767,573 @@ def run_workflows(
overrides: DocumentMetadataOverrides | None = None,
original_file: Path | None = None,
) -> tuple[DocumentMetadataOverrides, str] | None:
"""
Execute workflows matching a document for the given trigger. When `overrides` is provided
(consumption flow), actions mutate that object and the function returns `(overrides, messages)`.
Otherwise actions mutate the actual document and return nothing.
"""Run workflows which match a Document (or ConsumableDocument) for a specific trigger type or a single workflow if given.
Attachments for email/webhook actions use `original_file` when given, otherwise fall back to
`document.source_path` (Document) or `document.original_file` (ConsumableDocument).
Passing `workflow_to_run` skips the workflow query (currently only used by scheduled runs).
Assignment or removal actions are either applied directly to the document or an overrides object. If an overrides
object is provided, the function returns the object with the applied changes or None if no actions were applied and a string
of messages for each action. If no overrides object is provided, the changes are applied directly to the document and the
function returns None.
"""
def assignment_action():
if action.assign_tags.exists():
tag_ids_to_add: set[int] = set()
for tag in action.assign_tags.all():
tag_ids_to_add.add(tag.pk)
tag_ids_to_add.update(int(pk) for pk in tag.get_ancestors_pks())
if not use_overrides:
doc_tag_ids[:] = list(set(doc_tag_ids) | tag_ids_to_add)
else:
if overrides.tag_ids is None:
overrides.tag_ids = []
overrides.tag_ids = list(set(overrides.tag_ids) | tag_ids_to_add)
if action.assign_correspondent:
if not use_overrides:
document.correspondent = action.assign_correspondent
else:
overrides.correspondent_id = action.assign_correspondent.pk
if action.assign_document_type:
if not use_overrides:
document.document_type = action.assign_document_type
else:
overrides.document_type_id = action.assign_document_type.pk
if action.assign_storage_path:
if not use_overrides:
document.storage_path = action.assign_storage_path
else:
overrides.storage_path_id = action.assign_storage_path.pk
if action.assign_owner:
if not use_overrides:
document.owner = action.assign_owner
else:
overrides.owner_id = action.assign_owner.pk
if action.assign_title:
if not use_overrides:
try:
document.title = parse_w_workflow_placeholders(
action.assign_title,
document.correspondent.name if document.correspondent else "",
document.document_type.name if document.document_type else "",
document.owner.username if document.owner else "",
timezone.localtime(document.added),
document.original_filename or "",
document.filename or "",
document.created,
)
except Exception:
logger.exception(
f"Error occurred parsing title assignment '{action.assign_title}', falling back to original",
extra={"group": logging_group},
)
else:
overrides.title = action.assign_title
if any(
[
action.assign_view_users.exists(),
action.assign_view_groups.exists(),
action.assign_change_users.exists(),
action.assign_change_groups.exists(),
],
):
permissions = {
"view": {
"users": action.assign_view_users.values_list("id", flat=True),
"groups": action.assign_view_groups.values_list("id", flat=True),
},
"change": {
"users": action.assign_change_users.values_list("id", flat=True),
"groups": action.assign_change_groups.values_list("id", flat=True),
},
}
if not use_overrides:
set_permissions_for_object(
permissions=permissions,
object=document,
merge=True,
)
else:
overrides.view_users = list(
set(
(overrides.view_users or [])
+ list(permissions["view"]["users"]),
),
)
overrides.view_groups = list(
set(
(overrides.view_groups or [])
+ list(permissions["view"]["groups"]),
),
)
overrides.change_users = list(
set(
(overrides.change_users or [])
+ list(permissions["change"]["users"]),
),
)
overrides.change_groups = list(
set(
(overrides.change_groups or [])
+ list(permissions["change"]["groups"]),
),
)
if action.assign_custom_fields.exists():
if not use_overrides:
for field in action.assign_custom_fields.all():
value_field_name = CustomFieldInstance.get_value_field_name(
data_type=field.data_type,
)
args = {
value_field_name: action.assign_custom_fields_values.get(
str(field.pk),
None,
),
}
# for some reason update_or_create doesn't work here
instance = CustomFieldInstance.objects.filter(
field=field,
document=document,
).first()
if instance and args[value_field_name] is not None:
setattr(instance, value_field_name, args[value_field_name])
instance.save()
elif not instance:
CustomFieldInstance.objects.create(
**args,
field=field,
document=document,
)
else:
if overrides.custom_fields is None:
overrides.custom_fields = {}
overrides.custom_fields.update(
{
field.pk: action.assign_custom_fields_values.get(
str(field.pk),
None,
)
for field in action.assign_custom_fields.all()
},
)
def removal_action():
if action.remove_all_tags:
if not use_overrides:
doc_tag_ids.clear()
else:
overrides.tag_ids = None
else:
tag_ids_to_remove: set[int] = set()
for tag in action.remove_tags.all():
tag_ids_to_remove.add(tag.pk)
tag_ids_to_remove.update(int(pk) for pk in tag.get_descendants_pks())
if not use_overrides:
doc_tag_ids[:] = [t for t in doc_tag_ids if t not in tag_ids_to_remove]
elif overrides.tag_ids:
overrides.tag_ids = [
t for t in overrides.tag_ids if t not in tag_ids_to_remove
]
if not use_overrides and (
action.remove_all_correspondents
or (
document.correspondent
and action.remove_correspondents.filter(
pk=document.correspondent.pk,
).exists()
)
):
document.correspondent = None
elif use_overrides and (
action.remove_all_correspondents
or (
overrides.correspondent_id
and action.remove_correspondents.filter(
pk=overrides.correspondent_id,
).exists()
)
):
overrides.correspondent_id = None
if not use_overrides and (
action.remove_all_document_types
or (
document.document_type
and action.remove_document_types.filter(
pk=document.document_type.pk,
).exists()
)
):
document.document_type = None
elif use_overrides and (
action.remove_all_document_types
or (
overrides.document_type_id
and action.remove_document_types.filter(
pk=overrides.document_type_id,
).exists()
)
):
overrides.document_type_id = None
if not use_overrides and (
action.remove_all_storage_paths
or (
document.storage_path
and action.remove_storage_paths.filter(
pk=document.storage_path.pk,
).exists()
)
):
document.storage_path = None
elif use_overrides and (
action.remove_all_storage_paths
or (
overrides.storage_path_id
and action.remove_storage_paths.filter(
pk=overrides.storage_path_id,
).exists()
)
):
overrides.storage_path_id = None
if not use_overrides and (
action.remove_all_owners
or (
document.owner
and action.remove_owners.filter(pk=document.owner.pk).exists()
)
):
document.owner = None
elif use_overrides and (
action.remove_all_owners
or (
overrides.owner_id
and action.remove_owners.filter(pk=overrides.owner_id).exists()
)
):
overrides.owner_id = None
if action.remove_all_permissions:
if not use_overrides:
permissions = {
"view": {"users": [], "groups": []},
"change": {"users": [], "groups": []},
}
set_permissions_for_object(
permissions=permissions,
object=document,
merge=False,
)
else:
overrides.view_users = None
overrides.view_groups = None
overrides.change_users = None
overrides.change_groups = None
elif any(
[
action.remove_view_users.exists(),
action.remove_view_groups.exists(),
action.remove_change_users.exists(),
action.remove_change_groups.exists(),
],
):
if not use_overrides:
for user in action.remove_view_users.all():
remove_perm("view_document", user, document)
for user in action.remove_change_users.all():
remove_perm("change_document", user, document)
for group in action.remove_view_groups.all():
remove_perm("view_document", group, document)
for group in action.remove_change_groups.all():
remove_perm("change_document", group, document)
else:
if overrides.view_users:
for user in action.remove_view_users.filter(
pk__in=overrides.view_users,
):
overrides.view_users.remove(user.pk)
if overrides.change_users:
for user in action.remove_change_users.filter(
pk__in=overrides.change_users,
):
overrides.change_users.remove(user.pk)
if overrides.view_groups:
for group in action.remove_view_groups.filter(
pk__in=overrides.view_groups,
):
overrides.view_groups.remove(group.pk)
if overrides.change_groups:
for group in action.remove_change_groups.filter(
pk__in=overrides.change_groups,
):
overrides.change_groups.remove(group.pk)
if action.remove_all_custom_fields:
if not use_overrides:
CustomFieldInstance.objects.filter(document=document).hard_delete()
else:
overrides.custom_fields = None
elif action.remove_custom_fields.exists():
if not use_overrides:
CustomFieldInstance.objects.filter(
field__in=action.remove_custom_fields.all(),
document=document,
).hard_delete()
elif overrides.custom_fields:
for field in action.remove_custom_fields.filter(
pk__in=overrides.custom_fields.keys(),
):
overrides.custom_fields.pop(field.pk, None)
def email_action():
if not settings.EMAIL_ENABLED:
logger.error(
"Email backend has not been configured, cannot send email notifications",
extra={"group": logging_group},
)
return
if not use_overrides:
title = document.title
doc_url = (
f"{settings.PAPERLESS_URL}{settings.BASE_URL}documents/{document.pk}/"
)
correspondent = (
document.correspondent.name if document.correspondent else ""
)
document_type = (
document.document_type.name if document.document_type else ""
)
owner_username = document.owner.username if document.owner else ""
filename = document.original_filename or ""
current_filename = document.filename or ""
added = timezone.localtime(document.added)
created = document.created
else:
title = overrides.title if overrides.title else str(document.original_file)
doc_url = ""
correspondent = (
Correspondent.objects.filter(pk=overrides.correspondent_id).first()
if overrides.correspondent_id
else ""
)
document_type = (
DocumentType.objects.filter(pk=overrides.document_type_id).first().name
if overrides.document_type_id
else ""
)
owner_username = (
User.objects.filter(pk=overrides.owner_id).first().username
if overrides.owner_id
else ""
)
filename = document.original_file if document.original_file else ""
current_filename = filename
added = timezone.localtime(timezone.now())
created = overrides.created
subject = (
parse_w_workflow_placeholders(
action.email.subject,
correspondent,
document_type,
owner_username,
added,
filename,
current_filename,
created,
title,
doc_url,
)
if action.email.subject
else ""
)
body = (
parse_w_workflow_placeholders(
action.email.body,
correspondent,
document_type,
owner_username,
added,
filename,
current_filename,
created,
title,
doc_url,
)
if action.email.body
else ""
)
try:
attachments: list[EmailAttachment] = []
if action.email.include_document:
attachment: EmailAttachment | None = None
if trigger_type in [
WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
] and isinstance(document, Document):
friendly_name = (
Path(current_filename).name
if current_filename
else document.source_path.name
)
attachment = EmailAttachment(
path=document.source_path,
mime_type=document.mime_type,
friendly_name=friendly_name,
)
elif original_file:
friendly_name = (
Path(current_filename).name
if current_filename
else original_file.name
)
attachment = EmailAttachment(
path=original_file,
mime_type=document.mime_type,
friendly_name=friendly_name,
)
if attachment:
attachments = [attachment]
n_messages = send_email(
subject=subject,
body=body,
to=action.email.to.split(","),
attachments=attachments,
)
logger.debug(
f"Sent {n_messages} notification email(s) to {action.email.to}",
extra={"group": logging_group},
)
except Exception as e:
logger.exception(
f"Error occurred sending notification email: {e}",
extra={"group": logging_group},
)
def webhook_action():
if not use_overrides:
title = document.title
doc_url = (
f"{settings.PAPERLESS_URL}{settings.BASE_URL}documents/{document.pk}/"
)
correspondent = (
document.correspondent.name if document.correspondent else ""
)
document_type = (
document.document_type.name if document.document_type else ""
)
owner_username = document.owner.username if document.owner else ""
filename = document.original_filename or ""
current_filename = document.filename or ""
added = timezone.localtime(document.added)
created = document.created
else:
title = overrides.title if overrides.title else str(document.original_file)
doc_url = ""
correspondent = (
Correspondent.objects.filter(pk=overrides.correspondent_id).first()
if overrides.correspondent_id
else ""
)
document_type = (
DocumentType.objects.filter(pk=overrides.document_type_id).first().name
if overrides.document_type_id
else ""
)
owner_username = (
User.objects.filter(pk=overrides.owner_id).first().username
if overrides.owner_id
else ""
)
filename = document.original_file if document.original_file else ""
current_filename = filename
added = timezone.localtime(timezone.now())
created = overrides.created
try:
data = {}
if action.webhook.use_params:
if action.webhook.params:
try:
for key, value in action.webhook.params.items():
data[key] = parse_w_workflow_placeholders(
value,
correspondent,
document_type,
owner_username,
added,
filename,
current_filename,
created,
title,
doc_url,
)
except Exception as e:
logger.error(
f"Error occurred parsing webhook params: {e}",
extra={"group": logging_group},
)
elif action.webhook.body:
data = parse_w_workflow_placeholders(
action.webhook.body,
correspondent,
document_type,
owner_username,
added,
filename,
current_filename,
created,
title,
doc_url,
)
headers = {}
if action.webhook.headers:
try:
headers = {
str(k): str(v) for k, v in action.webhook.headers.items()
}
except Exception as e:
logger.error(
f"Error occurred parsing webhook headers: {e}",
extra={"group": logging_group},
)
files = None
if action.webhook.include_document:
with original_file.open("rb") as f:
files = {
"file": (
filename,
f.read(),
document.mime_type,
),
}
send_webhook.delay(
url=action.webhook.url,
data=data,
headers=headers,
files=files,
as_json=action.webhook.as_json,
)
logger.debug(
f"Webhook to {action.webhook.url} queued",
extra={"group": logging_group},
)
except Exception as e:
logger.exception(
f"Error occurred sending webhook: {e}",
extra={"group": logging_group},
)
use_overrides = overrides is not None
if original_file is None:
original_file = (
@@ -696,7 +1341,30 @@ def run_workflows(
)
messages = []
workflows = get_workflows_for_trigger(trigger_type, workflow_to_run)
workflows = (
(
Workflow.objects.filter(enabled=True, triggers__type=trigger_type)
.prefetch_related(
"actions",
"actions__assign_view_users",
"actions__assign_view_groups",
"actions__assign_change_users",
"actions__assign_change_groups",
"actions__assign_custom_fields",
"actions__remove_tags",
"actions__remove_correspondents",
"actions__remove_document_types",
"actions__remove_storage_paths",
"actions__remove_custom_fields",
"actions__remove_owners",
"triggers",
)
.order_by("order")
.distinct()
)
if workflow_to_run is None
else [workflow_to_run]
)
for workflow in workflows:
if not use_overrides:
@@ -716,39 +1384,13 @@ def run_workflows(
messages.append(message)
if action.type == WorkflowAction.WorkflowActionType.ASSIGNMENT:
if use_overrides and overrides:
apply_assignment_to_overrides(action, overrides)
else:
apply_assignment_to_document(
action,
document,
doc_tag_ids,
logging_group,
)
assignment_action()
elif action.type == WorkflowAction.WorkflowActionType.REMOVAL:
if use_overrides and overrides:
apply_removal_to_overrides(action, overrides)
else:
apply_removal_to_document(action, document, doc_tag_ids)
removal_action()
elif action.type == WorkflowAction.WorkflowActionType.EMAIL:
context = build_workflow_action_context(document, overrides)
execute_email_action(
action,
document,
context,
logging_group,
original_file,
trigger_type,
)
email_action()
elif action.type == WorkflowAction.WorkflowActionType.WEBHOOK:
context = build_workflow_action_context(document, overrides)
execute_webhook_action(
action,
document,
context,
logging_group,
original_file,
)
webhook_action()
if not use_overrides:
# limit title to 128 characters

View File

@@ -1289,7 +1289,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
content_type__app_label="admin",
),
)
set_permissions([4, 5], set_permissions={}, owner=user2, merge=False)
set_permissions([4, 5], set_permissions=[], owner=user2, merge=False)
with index.open_index_writer() as writer:
index.update_document(writer, d1)

View File

@@ -26,7 +26,7 @@ from rest_framework.test import APITestCase
from documents.file_handling import create_source_path_directory
from documents.file_handling import generate_unique_filename
from documents.signals.handlers import run_workflows
from documents.workflows.webhooks import send_webhook
from documents.signals.handlers import send_webhook
if TYPE_CHECKING:
from django.db.models import QuerySet
@@ -2858,7 +2858,7 @@ class TestWorkflows(
mock_email_send.return_value = 1
with self.assertNoLogs("paperless.workflows", level="ERROR"):
with self.assertNoLogs("paperless.handlers", level="ERROR"):
run_workflows(
WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
consumable_document,
@@ -3096,7 +3096,7 @@ class TestWorkflows(
original_filename="sample.pdf",
)
with self.assertLogs("paperless.workflows.actions", level="ERROR") as cm:
with self.assertLogs("paperless.handlers", level="ERROR") as cm:
run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
expected_str = "Email backend has not been configured"
@@ -3144,7 +3144,7 @@ class TestWorkflows(
original_filename="sample.pdf",
)
with self.assertLogs("paperless.workflows", level="ERROR") as cm:
with self.assertLogs("paperless.handlers", level="ERROR") as cm:
run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
expected_str = "Error occurred sending email"
@@ -3215,7 +3215,7 @@ class TestWorkflows(
PAPERLESS_FORCE_SCRIPT_NAME="/paperless",
BASE_URL="/paperless/",
)
@mock.patch("documents.workflows.webhooks.send_webhook.delay")
@mock.patch("documents.signals.handlers.send_webhook.delay")
def test_workflow_webhook_action_body(self, mock_post):
"""
GIVEN:
@@ -3274,7 +3274,7 @@ class TestWorkflows(
@override_settings(
PAPERLESS_URL="http://localhost:8000",
)
@mock.patch("documents.workflows.webhooks.send_webhook.delay")
@mock.patch("documents.signals.handlers.send_webhook.delay")
def test_workflow_webhook_action_w_files(self, mock_post):
"""
GIVEN:
@@ -3377,7 +3377,7 @@ class TestWorkflows(
)
# fails because no file
with self.assertLogs("paperless.workflows", level="ERROR") as cm:
with self.assertLogs("paperless.handlers", level="ERROR") as cm:
run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
expected_str = "Error occurred sending webhook"
@@ -3420,7 +3420,7 @@ class TestWorkflows(
original_filename="sample.pdf",
)
with self.assertLogs("paperless.workflows", level="ERROR") as cm:
with self.assertLogs("paperless.handlers", level="ERROR") as cm:
run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
expected_str = "Error occurred parsing webhook params"
@@ -3436,7 +3436,7 @@ class TestWorkflows(
raise_for_status=mock.Mock(),
)
with self.assertLogs("paperless.workflows") as cm:
with self.assertLogs("paperless.handlers") as cm:
send_webhook(
url="http://paperless-ngx.com",
data="Test message",
@@ -3482,7 +3482,7 @@ class TestWorkflows(
),
)
with self.assertLogs("paperless.workflows") as cm:
with self.assertLogs("paperless.handlers") as cm:
with self.assertRaises(HTTPStatusError):
send_webhook(
url="http://paperless-ngx.com",
@@ -3498,7 +3498,7 @@ class TestWorkflows(
)
self.assertIn(expected_str, cm.output[0])
@mock.patch("documents.workflows.webhooks.send_webhook.delay")
@mock.patch("documents.signals.handlers.send_webhook.delay")
def test_workflow_webhook_action_consumption(self, mock_post):
"""
GIVEN:

View File

@@ -1,261 +0,0 @@
import logging
from pathlib import Path
from django.conf import settings
from django.contrib.auth.models import User
from django.utils import timezone
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.mail import EmailAttachment
from documents.mail import send_email
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import WorkflowAction
from documents.models import WorkflowTrigger
from documents.templating.workflows import parse_w_workflow_placeholders
from documents.workflows.webhooks import send_webhook
logger = logging.getLogger("paperless.workflows.actions")
def build_workflow_action_context(
document: Document | ConsumableDocument,
overrides: DocumentMetadataOverrides | None,
) -> dict:
"""
Build context dictionary for workflow action placeholder parsing.
"""
use_overrides = overrides is not None
if not use_overrides:
return {
"title": document.title,
"doc_url": f"{settings.PAPERLESS_URL}{settings.BASE_URL}documents/{document.pk}/",
"correspondent": document.correspondent.name
if document.correspondent
else "",
"document_type": document.document_type.name
if document.document_type
else "",
"owner_username": document.owner.username if document.owner else "",
"filename": document.original_filename or "",
"current_filename": document.filename or "",
"added": timezone.localtime(document.added),
"created": document.created,
}
correspondent_obj = (
Correspondent.objects.filter(pk=overrides.correspondent_id).first()
if overrides and overrides.correspondent_id
else None
)
document_type_obj = (
DocumentType.objects.filter(pk=overrides.document_type_id).first()
if overrides and overrides.document_type_id
else None
)
owner_obj = (
User.objects.filter(pk=overrides.owner_id).first()
if overrides and overrides.owner_id
else None
)
filename = document.original_file if document.original_file else ""
return {
"title": overrides.title
if overrides and overrides.title
else str(document.original_file),
"doc_url": "",
"correspondent": correspondent_obj.name if correspondent_obj else "",
"document_type": document_type_obj.name if document_type_obj else "",
"owner_username": owner_obj.username if owner_obj else "",
"filename": filename,
"current_filename": filename,
"added": timezone.localtime(timezone.now()),
"created": overrides.created if overrides else None,
}
def execute_email_action(
action: WorkflowAction,
document: Document | ConsumableDocument,
context: dict,
logging_group,
original_file: Path,
trigger_type: WorkflowTrigger.WorkflowTriggerType,
) -> None:
"""
Execute an email action for a workflow.
"""
if not settings.EMAIL_ENABLED:
logger.error(
"Email backend has not been configured, cannot send email notifications",
extra={"group": logging_group},
)
return
subject = (
parse_w_workflow_placeholders(
action.email.subject,
context["correspondent"],
context["document_type"],
context["owner_username"],
context["added"],
context["filename"],
context["current_filename"],
context["created"],
context["title"],
context["doc_url"],
)
if action.email.subject
else ""
)
body = (
parse_w_workflow_placeholders(
action.email.body,
context["correspondent"],
context["document_type"],
context["owner_username"],
context["added"],
context["filename"],
context["current_filename"],
context["created"],
context["title"],
context["doc_url"],
)
if action.email.body
else ""
)
try:
attachments: list[EmailAttachment] = []
if action.email.include_document:
attachment: EmailAttachment | None = None
if trigger_type in [
WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
] and isinstance(document, Document):
friendly_name = (
Path(context["current_filename"]).name
if context["current_filename"]
else document.source_path.name
)
attachment = EmailAttachment(
path=document.source_path,
mime_type=document.mime_type,
friendly_name=friendly_name,
)
elif original_file:
friendly_name = (
Path(context["current_filename"]).name
if context["current_filename"]
else original_file.name
)
attachment = EmailAttachment(
path=original_file,
mime_type=document.mime_type,
friendly_name=friendly_name,
)
if attachment:
attachments = [attachment]
n_messages = send_email(
subject=subject,
body=body,
to=action.email.to.split(","),
attachments=attachments,
)
logger.debug(
f"Sent {n_messages} notification email(s) to {action.email.to}",
extra={"group": logging_group},
)
except Exception as e:
logger.exception(
f"Error occurred sending notification email: {e}",
extra={"group": logging_group},
)
def execute_webhook_action(
action: WorkflowAction,
document: Document | ConsumableDocument,
context: dict,
logging_group,
original_file: Path,
):
try:
data = {}
if action.webhook.use_params:
if action.webhook.params:
try:
for key, value in action.webhook.params.items():
data[key] = parse_w_workflow_placeholders(
value,
context["correspondent"],
context["document_type"],
context["owner_username"],
context["added"],
context["filename"],
context["current_filename"],
context["created"],
context["title"],
context["doc_url"],
)
except Exception as e:
logger.error(
f"Error occurred parsing webhook params: {e}",
extra={"group": logging_group},
)
elif action.webhook.body:
data = parse_w_workflow_placeholders(
action.webhook.body,
context["correspondent"],
context["document_type"],
context["owner_username"],
context["added"],
context["filename"],
context["current_filename"],
context["created"],
context["title"],
context["doc_url"],
)
headers = {}
if action.webhook.headers:
try:
headers = {str(k): str(v) for k, v in action.webhook.headers.items()}
except Exception as e:
logger.error(
f"Error occurred parsing webhook headers: {e}",
extra={"group": logging_group},
)
files = None
if action.webhook.include_document:
with original_file.open("rb") as f:
files = {
"file": (
str(context["filename"])
if context["filename"]
else original_file.name,
f.read(),
document.mime_type,
),
}
send_webhook.delay(
url=action.webhook.url,
data=data,
headers=headers,
files=files,
as_json=action.webhook.as_json,
)
logger.debug(
f"Webhook to {action.webhook.url} queued",
extra={"group": logging_group},
)
except Exception as e:
logger.exception(
f"Error occurred sending webhook: {e}",
extra={"group": logging_group},
)

View File

@@ -1,348 +0,0 @@
import logging
from django.utils import timezone
from guardian.shortcuts import remove_perm
from documents.data_models import DocumentMetadataOverrides
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import WorkflowAction
from documents.permissions import set_permissions_for_object
from documents.templating.workflows import parse_w_workflow_placeholders
logger = logging.getLogger("paperless.workflows.mutations")
def apply_assignment_to_document(
action: WorkflowAction,
document: Document,
doc_tag_ids: list[int],
logging_group,
):
"""
Apply assignment actions to a Document instance.
"""
if action.assign_tags.exists():
tag_ids_to_add: set[int] = set()
for tag in action.assign_tags.all():
tag_ids_to_add.add(tag.pk)
tag_ids_to_add.update(int(pk) for pk in tag.get_ancestors_pks())
doc_tag_ids[:] = list(set(doc_tag_ids) | tag_ids_to_add)
if action.assign_correspondent:
document.correspondent = action.assign_correspondent
if action.assign_document_type:
document.document_type = action.assign_document_type
if action.assign_storage_path:
document.storage_path = action.assign_storage_path
if action.assign_owner:
document.owner = action.assign_owner
if action.assign_title:
try:
document.title = parse_w_workflow_placeholders(
action.assign_title,
document.correspondent.name if document.correspondent else "",
document.document_type.name if document.document_type else "",
document.owner.username if document.owner else "",
timezone.localtime(document.added),
document.original_filename or "",
document.filename or "",
document.created,
)
except Exception: # pragma: no cover
logger.exception(
f"Error occurred parsing title assignment '{action.assign_title}', falling back to original",
extra={"group": logging_group},
)
if any(
[
action.assign_view_users.exists(),
action.assign_view_groups.exists(),
action.assign_change_users.exists(),
action.assign_change_groups.exists(),
],
):
permissions = {
"view": {
"users": action.assign_view_users.values_list("id", flat=True),
"groups": action.assign_view_groups.values_list("id", flat=True),
},
"change": {
"users": action.assign_change_users.values_list("id", flat=True),
"groups": action.assign_change_groups.values_list("id", flat=True),
},
}
set_permissions_for_object(
permissions=permissions,
object=document,
merge=True,
)
if action.assign_custom_fields.exists():
for field in action.assign_custom_fields.all():
value_field_name = CustomFieldInstance.get_value_field_name(
data_type=field.data_type,
)
args = {
value_field_name: action.assign_custom_fields_values.get(
str(field.pk),
None,
),
}
# for some reason update_or_create doesn't work here
instance = CustomFieldInstance.objects.filter(
field=field,
document=document,
).first()
if instance and args[value_field_name] is not None:
setattr(instance, value_field_name, args[value_field_name])
instance.save()
elif not instance:
CustomFieldInstance.objects.create(
**args,
field=field,
document=document,
)
def apply_assignment_to_overrides(
action: WorkflowAction,
overrides: DocumentMetadataOverrides,
):
"""
Apply assignment actions to DocumentMetadataOverrides.
"""
if action.assign_tags.exists():
if overrides.tag_ids is None:
overrides.tag_ids = []
tag_ids_to_add: set[int] = set()
for tag in action.assign_tags.all():
tag_ids_to_add.add(tag.pk)
tag_ids_to_add.update(int(pk) for pk in tag.get_ancestors_pks())
overrides.tag_ids = list(set(overrides.tag_ids) | tag_ids_to_add)
if action.assign_correspondent:
overrides.correspondent_id = action.assign_correspondent.pk
if action.assign_document_type:
overrides.document_type_id = action.assign_document_type.pk
if action.assign_storage_path:
overrides.storage_path_id = action.assign_storage_path.pk
if action.assign_owner:
overrides.owner_id = action.assign_owner.pk
if action.assign_title:
overrides.title = action.assign_title
if any(
[
action.assign_view_users.exists(),
action.assign_view_groups.exists(),
action.assign_change_users.exists(),
action.assign_change_groups.exists(),
],
):
overrides.view_users = list(
set(
(overrides.view_users or [])
+ list(action.assign_view_users.values_list("id", flat=True)),
),
)
overrides.view_groups = list(
set(
(overrides.view_groups or [])
+ list(action.assign_view_groups.values_list("id", flat=True)),
),
)
overrides.change_users = list(
set(
(overrides.change_users or [])
+ list(action.assign_change_users.values_list("id", flat=True)),
),
)
overrides.change_groups = list(
set(
(overrides.change_groups or [])
+ list(action.assign_change_groups.values_list("id", flat=True)),
),
)
if action.assign_custom_fields.exists():
if overrides.custom_fields is None:
overrides.custom_fields = {}
overrides.custom_fields.update(
{
field.pk: action.assign_custom_fields_values.get(
str(field.pk),
None,
)
for field in action.assign_custom_fields.all()
},
)
def apply_removal_to_document(
action: WorkflowAction,
document: Document,
doc_tag_ids: list[int],
):
"""
Apply removal actions to a Document instance.
"""
if action.remove_all_tags:
doc_tag_ids.clear()
else:
tag_ids_to_remove: set[int] = set()
for tag in action.remove_tags.all():
tag_ids_to_remove.add(tag.pk)
tag_ids_to_remove.update(int(pk) for pk in tag.get_descendants_pks())
doc_tag_ids[:] = [t for t in doc_tag_ids if t not in tag_ids_to_remove]
if action.remove_all_correspondents or (
document.correspondent
and action.remove_correspondents.filter(pk=document.correspondent.pk).exists()
):
document.correspondent = None
if action.remove_all_document_types or (
document.document_type
and action.remove_document_types.filter(pk=document.document_type.pk).exists()
):
document.document_type = None
if action.remove_all_storage_paths or (
document.storage_path
and action.remove_storage_paths.filter(pk=document.storage_path.pk).exists()
):
document.storage_path = None
if action.remove_all_owners or (
document.owner and action.remove_owners.filter(pk=document.owner.pk).exists()
):
document.owner = None
if action.remove_all_permissions:
permissions = {
"view": {"users": [], "groups": []},
"change": {"users": [], "groups": []},
}
set_permissions_for_object(
permissions=permissions,
object=document,
merge=False,
)
elif any(
[
action.remove_view_users.exists(),
action.remove_view_groups.exists(),
action.remove_change_users.exists(),
action.remove_change_groups.exists(),
],
):
for user in action.remove_view_users.all():
remove_perm("view_document", user, document)
for user in action.remove_change_users.all():
remove_perm("change_document", user, document)
for group in action.remove_view_groups.all():
remove_perm("view_document", group, document)
for group in action.remove_change_groups.all():
remove_perm("change_document", group, document)
if action.remove_all_custom_fields:
CustomFieldInstance.objects.filter(document=document).hard_delete()
elif action.remove_custom_fields.exists():
CustomFieldInstance.objects.filter(
field__in=action.remove_custom_fields.all(),
document=document,
).hard_delete()
def apply_removal_to_overrides(
action: WorkflowAction,
overrides: DocumentMetadataOverrides,
):
"""
Apply removal actions to DocumentMetadataOverrides.
"""
if action.remove_all_tags:
overrides.tag_ids = None
elif overrides.tag_ids:
tag_ids_to_remove: set[int] = set()
for tag in action.remove_tags.all():
tag_ids_to_remove.add(tag.pk)
tag_ids_to_remove.update(int(pk) for pk in tag.get_descendants_pks())
overrides.tag_ids = [t for t in overrides.tag_ids if t not in tag_ids_to_remove]
if action.remove_all_correspondents or (
overrides.correspondent_id
and action.remove_correspondents.filter(pk=overrides.correspondent_id).exists()
):
overrides.correspondent_id = None
if action.remove_all_document_types or (
overrides.document_type_id
and action.remove_document_types.filter(pk=overrides.document_type_id).exists()
):
overrides.document_type_id = None
if action.remove_all_storage_paths or (
overrides.storage_path_id
and action.remove_storage_paths.filter(pk=overrides.storage_path_id).exists()
):
overrides.storage_path_id = None
if action.remove_all_owners or (
overrides.owner_id
and action.remove_owners.filter(pk=overrides.owner_id).exists()
):
overrides.owner_id = None
if action.remove_all_permissions:
overrides.view_users = None
overrides.view_groups = None
overrides.change_users = None
overrides.change_groups = None
elif any(
[
action.remove_view_users.exists(),
action.remove_view_groups.exists(),
action.remove_change_users.exists(),
action.remove_change_groups.exists(),
],
):
if overrides.view_users:
for user in action.remove_view_users.filter(pk__in=overrides.view_users):
overrides.view_users.remove(user.pk)
if overrides.change_users:
for user in action.remove_change_users.filter(
pk__in=overrides.change_users,
):
overrides.change_users.remove(user.pk)
if overrides.view_groups:
for group in action.remove_view_groups.filter(pk__in=overrides.view_groups):
overrides.view_groups.remove(group.pk)
if overrides.change_groups:
for group in action.remove_change_groups.filter(
pk__in=overrides.change_groups,
):
overrides.change_groups.remove(group.pk)
if action.remove_all_custom_fields:
overrides.custom_fields = None
elif action.remove_custom_fields.exists() and overrides.custom_fields:
for field in action.remove_custom_fields.filter(
pk__in=overrides.custom_fields.keys(),
):
overrides.custom_fields.pop(field.pk, None)

View File

@@ -1,36 +0,0 @@
from documents.models import Workflow
from documents.models import WorkflowTrigger
def get_workflows_for_trigger(
trigger_type: WorkflowTrigger.WorkflowTriggerType,
workflow_to_run: Workflow | None = None,
):
"""
Return workflows relevant to a trigger. If a specific workflow is given,
wrap it in a list; otherwise fetch enabled workflows for the trigger with
the prefetches used by the runner.
"""
if workflow_to_run is not None:
return [workflow_to_run]
return (
Workflow.objects.filter(enabled=True, triggers__type=trigger_type)
.prefetch_related(
"actions",
"actions__assign_view_users",
"actions__assign_view_groups",
"actions__assign_change_users",
"actions__assign_change_groups",
"actions__assign_custom_fields",
"actions__remove_tags",
"actions__remove_correspondents",
"actions__remove_document_types",
"actions__remove_storage_paths",
"actions__remove_custom_fields",
"actions__remove_owners",
"triggers",
)
.order_by("order")
.distinct()
)

View File

@@ -1,96 +0,0 @@
import ipaddress
import logging
import socket
from urllib.parse import urlparse
import httpx
from celery import shared_task
from django.conf import settings
logger = logging.getLogger("paperless.workflows.webhooks")
def _is_public_ip(ip: str) -> bool:
try:
obj = ipaddress.ip_address(ip)
return not (
obj.is_private
or obj.is_loopback
or obj.is_link_local
or obj.is_multicast
or obj.is_unspecified
)
except ValueError: # pragma: no cover
return False
def _resolve_first_ip(host: str) -> str | None:
try:
info = socket.getaddrinfo(host, None)
return info[0][4][0] if info else None
except Exception: # pragma: no cover
return None
@shared_task(
retry_backoff=True,
autoretry_for=(httpx.HTTPStatusError,),
max_retries=3,
throws=(httpx.HTTPError,),
)
def send_webhook(
url: str,
data: str | dict,
headers: dict,
files: dict,
*,
as_json: bool = False,
):
p = urlparse(url)
if p.scheme.lower() not in settings.WEBHOOKS_ALLOWED_SCHEMES or not p.hostname:
logger.warning("Webhook blocked: invalid scheme/hostname")
raise ValueError("Invalid URL scheme or hostname.")
port = p.port or (443 if p.scheme == "https" else 80)
if (
len(settings.WEBHOOKS_ALLOWED_PORTS) > 0
and port not in settings.WEBHOOKS_ALLOWED_PORTS
):
logger.warning("Webhook blocked: port not permitted")
raise ValueError("Destination port not permitted.")
ip = _resolve_first_ip(p.hostname)
if not ip or (
not _is_public_ip(ip) and not settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS
):
logger.warning("Webhook blocked: destination not allowed")
raise ValueError("Destination host is not allowed.")
try:
post_args = {
"url": url,
"headers": {
k: v for k, v in (headers or {}).items() if k.lower() != "host"
},
"files": files or None,
"timeout": 5.0,
"follow_redirects": False,
}
if as_json:
post_args["json"] = data
elif isinstance(data, dict):
post_args["data"] = data
else:
post_args["content"] = data
httpx.post(
**post_args,
).raise_for_status()
logger.info(
f"Webhook sent to {url}",
)
except Exception as e:
logger.error(
f"Failed attempt sending webhook to {url}: {e}",
)
raise e

View File

@@ -322,6 +322,7 @@ INSTALLED_APPS = [
"paperless_tesseract.apps.PaperlessTesseractConfig",
"paperless_text.apps.PaperlessTextConfig",
"paperless_mail.apps.PaperlessMailConfig",
"paperless_remote.apps.PaperlessRemoteParserConfig",
"django.contrib.admin",
"rest_framework",
"rest_framework.authtoken",
@@ -1401,3 +1402,10 @@ WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
"PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
"true",
)
###############################################################################
# Remote Parser #
###############################################################################
REMOTE_OCR_ENGINE = os.getenv("PAPERLESS_REMOTE_OCR_ENGINE")
REMOTE_OCR_API_KEY = os.getenv("PAPERLESS_REMOTE_OCR_API_KEY")
REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT")

View File

@@ -0,0 +1,4 @@
# this is here so that django finds the checks.
from paperless_remote.checks import check_remote_parser_configured
__all__ = ["check_remote_parser_configured"]

View File

@@ -0,0 +1,14 @@
from django.apps import AppConfig
from paperless_remote.signals import remote_consumer_declaration
class PaperlessRemoteParserConfig(AppConfig):
name = "paperless_remote"
def ready(self):
from documents.signals import document_consumer_declaration
document_consumer_declaration.connect(remote_consumer_declaration)
AppConfig.ready(self)

View File

@@ -0,0 +1,17 @@
from django.conf import settings
from django.core.checks import Error
from django.core.checks import register
@register()
def check_remote_parser_configured(app_configs, **kwargs):
if settings.REMOTE_OCR_ENGINE == "azureai" and not (
settings.REMOTE_OCR_ENDPOINT and settings.REMOTE_OCR_API_KEY
):
return [
Error(
"Azure AI remote parser requires endpoint and API key to be configured.",
),
]
return []

View File

@@ -0,0 +1,118 @@
from pathlib import Path
from django.conf import settings
from paperless_tesseract.parsers import RasterisedDocumentParser
class RemoteEngineConfig:
def __init__(
self,
engine: str,
api_key: str | None = None,
endpoint: str | None = None,
):
self.engine = engine
self.api_key = api_key
self.endpoint = endpoint
def engine_is_valid(self):
valid = self.engine in ["azureai"] and self.api_key is not None
if self.engine == "azureai":
valid = valid and self.endpoint is not None
return valid
class RemoteDocumentParser(RasterisedDocumentParser):
"""
This parser uses a remote OCR engine to parse documents. Currently, it supports Azure AI Vision
as this is the only service that provides a remote OCR API with text-embedded PDF output.
"""
logging_name = "paperless.parsing.remote"
def get_settings(self) -> RemoteEngineConfig:
"""
Returns the configuration for the remote OCR engine, loaded from Django settings.
"""
return RemoteEngineConfig(
engine=settings.REMOTE_OCR_ENGINE,
api_key=settings.REMOTE_OCR_API_KEY,
endpoint=settings.REMOTE_OCR_ENDPOINT,
)
def supported_mime_types(self):
if self.settings.engine_is_valid():
return {
"application/pdf": ".pdf",
"image/png": ".png",
"image/jpeg": ".jpg",
"image/tiff": ".tiff",
"image/bmp": ".bmp",
"image/gif": ".gif",
"image/webp": ".webp",
}
else:
return {}
def azure_ai_vision_parse(
self,
file: Path,
) -> str | None:
"""
Uses Azure AI Vision to parse the document and return the text content.
It requests a searchable PDF output with embedded text.
The PDF is saved to the archive_path attribute.
Returns the text content extracted from the document.
If the parsing fails, it returns None.
"""
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
from azure.ai.documentintelligence.models import AnalyzeOutputOption
from azure.ai.documentintelligence.models import DocumentContentFormat
from azure.core.credentials import AzureKeyCredential
client = DocumentIntelligenceClient(
endpoint=self.settings.endpoint,
credential=AzureKeyCredential(self.settings.api_key),
)
try:
with file.open("rb") as f:
analyze_request = AnalyzeDocumentRequest(bytes_source=f.read())
poller = client.begin_analyze_document(
model_id="prebuilt-read",
body=analyze_request,
output_content_format=DocumentContentFormat.TEXT,
output=[AnalyzeOutputOption.PDF], # request searchable PDF output
content_type="application/json",
)
poller.wait()
result_id = poller.details["operation_id"]
result = poller.result()
# Download the PDF with embedded text
self.archive_path = self.tempdir / "archive.pdf"
with self.archive_path.open("wb") as f:
for chunk in client.get_analyze_result_pdf(
model_id="prebuilt-read",
result_id=result_id,
):
f.write(chunk)
return result.content
except Exception as e:
self.log.error(f"Azure AI Vision parsing failed: {e}")
finally:
client.close()
return None
def parse(self, document_path: Path, mime_type, file_name=None):
if not self.settings.engine_is_valid():
self.log.warning(
"No valid remote parser engine is configured, content will be empty.",
)
self.text = ""
elif self.settings.engine == "azureai":
self.text = self.azure_ai_vision_parse(document_path)

View File

@@ -0,0 +1,18 @@
def get_parser(*args, **kwargs):
from paperless_remote.parsers import RemoteDocumentParser
return RemoteDocumentParser(*args, **kwargs)
def get_supported_mime_types():
from paperless_remote.parsers import RemoteDocumentParser
return RemoteDocumentParser(None).supported_mime_types()
def remote_consumer_declaration(sender, **kwargs):
return {
"parser": get_parser,
"weight": 5,
"mime_types": get_supported_mime_types(),
}

Binary file not shown.

View File

@@ -0,0 +1,24 @@
from unittest import TestCase
from django.test import override_settings
from paperless_remote import check_remote_parser_configured
class TestChecks(TestCase):
@override_settings(REMOTE_OCR_ENGINE=None)
def test_no_engine(self):
msgs = check_remote_parser_configured(None)
self.assertEqual(len(msgs), 0)
@override_settings(REMOTE_OCR_ENGINE="azureai")
@override_settings(REMOTE_OCR_API_KEY="somekey")
@override_settings(REMOTE_OCR_ENDPOINT=None)
def test_azure_no_endpoint(self):
msgs = check_remote_parser_configured(None)
self.assertEqual(len(msgs), 1)
self.assertTrue(
msgs[0].msg.startswith(
"Azure AI remote parser requires endpoint and API key to be configured.",
),
)

View File

@@ -0,0 +1,128 @@
import uuid
from pathlib import Path
from unittest import mock
from django.test import TestCase
from django.test import override_settings
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from paperless_remote.parsers import RemoteDocumentParser
from paperless_remote.signals import get_parser
class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
SAMPLE_FILES = Path(__file__).resolve().parent / "samples"
def assertContainsStrings(self, content: str, strings: list[str]):
# Asserts that all strings appear in content, in the given order.
indices = []
for s in strings:
if s in content:
indices.append(content.index(s))
else:
self.fail(f"'{s}' is not in '{content}'")
self.assertListEqual(indices, sorted(indices))
@mock.patch("paperless_tesseract.parsers.run_subprocess")
@mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
def test_get_text_with_azure(self, mock_client_cls, mock_subprocess):
# Arrange mock Azure client
mock_client = mock.Mock()
mock_client_cls.return_value = mock_client
# Simulate poller result and its `.details`
mock_poller = mock.Mock()
mock_poller.wait.return_value = None
mock_poller.details = {"operation_id": "fake-op-id"}
mock_client.begin_analyze_document.return_value = mock_poller
mock_poller.result.return_value.content = "This is a test document."
# Return dummy PDF bytes
mock_client.get_analyze_result_pdf.return_value = [
b"%PDF-",
b"1.7 ",
b"FAKEPDF",
]
# Simulate pdftotext by writing dummy text to sidecar file
def fake_run(cmd, *args, **kwargs):
with Path(cmd[-1]).open("w", encoding="utf-8") as f:
f.write("This is a test document.")
mock_subprocess.side_effect = fake_run
with override_settings(
REMOTE_OCR_ENGINE="azureai",
REMOTE_OCR_API_KEY="somekey",
REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
):
parser = get_parser(uuid.uuid4())
parser.parse(
self.SAMPLE_FILES / "simple-digital.pdf",
"application/pdf",
)
self.assertContainsStrings(
parser.text.strip(),
["This is a test document."],
)
@mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
def test_get_text_with_azure_error_logged_and_returns_none(self, mock_client_cls):
mock_client = mock.Mock()
mock_client.begin_analyze_document.side_effect = RuntimeError("fail")
mock_client_cls.return_value = mock_client
with override_settings(
REMOTE_OCR_ENGINE="azureai",
REMOTE_OCR_API_KEY="somekey",
REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
):
parser = get_parser(uuid.uuid4())
with mock.patch.object(parser.log, "error") as mock_log_error:
parser.parse(
self.SAMPLE_FILES / "simple-digital.pdf",
"application/pdf",
)
self.assertIsNone(parser.text)
mock_client.begin_analyze_document.assert_called_once()
mock_client.close.assert_called_once()
mock_log_error.assert_called_once()
self.assertIn(
"Azure AI Vision parsing failed",
mock_log_error.call_args[0][0],
)
@override_settings(
REMOTE_OCR_ENGINE="azureai",
REMOTE_OCR_API_KEY="key",
REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
)
def test_supported_mime_types_valid_config(self):
parser = RemoteDocumentParser(uuid.uuid4())
expected_types = {
"application/pdf": ".pdf",
"image/png": ".png",
"image/jpeg": ".jpg",
"image/tiff": ".tiff",
"image/bmp": ".bmp",
"image/gif": ".gif",
"image/webp": ".webp",
}
self.assertEqual(parser.supported_mime_types(), expected_types)
def test_supported_mime_types_invalid_config(self):
parser = get_parser(uuid.uuid4())
self.assertEqual(parser.supported_mime_types(), {})
@override_settings(
REMOTE_OCR_ENGINE=None,
REMOTE_OCR_API_KEY=None,
REMOTE_OCR_ENDPOINT=None,
)
def test_parse_with_invalid_config(self):
parser = get_parser(uuid.uuid4())
parser.parse(self.SAMPLE_FILES / "simple-digital.pdf", "application/pdf")
self.assertEqual(parser.text, "")

39
uv.lock generated
View File

@@ -95,6 +95,34 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/02/ff/1175b0b7371e46244032d43a56862d0af455823b5280a50c63d99cc50f18/automat-25.4.16-py3-none-any.whl", hash = "sha256:04e9bce696a8d5671ee698005af6e5a9fa15354140a87f4870744604dcdd3ba1", size = 42842, upload-time = "2025-04-16T20:12:14.447Z" },
]
[[package]]
name = "azure-ai-documentintelligence"
version = "1.0.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "azure-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "isodate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/44/7b/8115cd713e2caa5e44def85f2b7ebd02a74ae74d7113ba20bdd41fd6dd80/azure_ai_documentintelligence-1.0.2.tar.gz", hash = "sha256:4d75a2513f2839365ebabc0e0e1772f5601b3a8c9a71e75da12440da13b63484", size = 170940 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d9/75/c9ec040f23082f54ffb1977ff8f364c2d21c79a640a13d1c1809e7fd6b1a/azure_ai_documentintelligence-1.0.2-py3-none-any.whl", hash = "sha256:e1fb446abbdeccc9759d897898a0fe13141ed29f9ad11fc705f951925822ed59", size = 106005 },
]
[[package]]
name = "azure-core"
version = "1.33.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/75/aa/7c9db8edd626f1a7d99d09ef7926f6f4fb34d5f9fa00dc394afdfe8e2a80/azure_core-1.33.0.tar.gz", hash = "sha256:f367aa07b5e3005fec2c1e184b882b0b039910733907d001c20fb08ebb8c0eb9", size = 295633 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/07/b7/76b7e144aa53bd206bf1ce34fa75350472c3f69bf30e5c8c18bc9881035d/azure_core-1.33.0-py3-none-any.whl", hash = "sha256:9b5b6d0223a1d38c37500e6971118c1e0f13f54951e6893968b38910bc9cda8f", size = 207071 },
]
[[package]]
name = "babel"
version = "2.17.0"
@@ -1451,6 +1479,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c7/fc/4e5a141c3f7c7bed550ac1f69e599e92b6be449dd4677ec09f325cad0955/inotifyrecursive-0.3.5-py3-none-any.whl", hash = "sha256:7e5f4a2e1dc2bef0efa3b5f6b339c41fb4599055a2b54909d020e9e932cc8d2f", size = 8009, upload-time = "2020-11-20T12:38:46.981Z" },
]
[[package]]
name = "isodate"
version = "0.7.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 },
]
[[package]]
name = "jinja2"
version = "3.1.6"
@@ -2118,6 +2155,7 @@ name = "paperless-ngx"
version = "2.20.1"
source = { virtual = "." }
dependencies = [
{ name = "azure-ai-documentintelligence", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "babel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "bleach", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "celery", extra = ["redis"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2254,6 +2292,7 @@ typing = [
[package.metadata]
requires-dist = [
{ name = "azure-ai-documentintelligence", specifier = ">=1.0.2" },
{ name = "babel", specifier = ">=2.17" },
{ name = "bleach", specifier = "~=6.3.0" },
{ name = "celery", extras = ["redis"], specifier = "~=5.5.1" },