Compare commits

..

3 Commits

Author SHA1 Message Date
shamoon
f23433bd57 Merge branch 'dev' into fix-11679 2026-01-09 20:45:38 -08:00
shamoon
c968505f64 Fix: run pre-flight ASN check after barcode detection 2025-12-30 10:02:32 -08:00
shamoon
fbb5864757 Add test to reproduce asn in trash error 2025-12-30 10:02:31 -08:00
286 changed files with 14773 additions and 11096 deletions

View File

@@ -1,7 +1,6 @@
# https://docs.codecov.com/docs/codecovyml-reference#codecov
codecov:
require_ci_to_pass: true
# https://docs.codecov.com/docs/components
# https://docs.codecov.com/docs/components
component_management:
individual_components:
- component_id: backend
@@ -10,70 +9,35 @@ component_management:
- component_id: frontend
paths:
- src-ui/**
# https://docs.codecov.com/docs/flags#step-2-flag-management-in-yaml
# https://docs.codecov.com/docs/carryforward-flags
flags:
# Backend Python versions
backend-python-3.10:
backend:
paths:
- src/**
carryforward: true
backend-python-3.11:
paths:
- src/**
carryforward: true
backend-python-3.12:
paths:
- src/**
carryforward: true
# Frontend (shards merge into single flag)
frontend-node-24.x:
frontend:
paths:
- src-ui/**
carryforward: true
# https://docs.codecov.com/docs/pull-request-comments
comment:
layout: "header, diff, components, flags, files"
# https://docs.codecov.com/docs/javascript-bundle-analysis
require_bundle_changes: true
bundle_change_threshold: "50Kb"
coverage:
# https://docs.codecov.com/docs/commit-status
status:
project:
backend:
flags:
- backend-python-3.10
- backend-python-3.11
- backend-python-3.12
paths:
- src/**
default:
# https://docs.codecov.com/docs/commit-status#threshold
threshold: 1%
removed_code_behavior: adjust_base
frontend:
flags:
- frontend-node-24.x
paths:
- src-ui/**
threshold: 1%
removed_code_behavior: adjust_base
patch:
backend:
flags:
- backend-python-3.10
- backend-python-3.11
- backend-python-3.12
paths:
- src/**
target: 100%
threshold: 25%
frontend:
flags:
- frontend-node-24.x
paths:
- src-ui/**
default:
# For the changed lines only, target 100% covered, but
# allow as low as 75%
target: 100%
threshold: 25%
# https://docs.codecov.com/docs/javascript-bundle-analysis
bundle_analysis:
# Fail if the bundle size increases by more than 1MB
warning_threshold: "1MB"
status: true

View File

@@ -44,7 +44,6 @@ include-labels:
- 'notable'
exclude-labels:
- 'skip-changelog'
filter-by-commitish: true
category-template: '### $TITLE'
change-template: '- $TITLE @$AUTHOR ([#$NUMBER]($URL))'
change-title-escapes: '\<*_&#@'

View File

@@ -88,13 +88,13 @@ jobs:
if: always()
uses: codecov/codecov-action@v5
with:
flags: backend-python-${{ matrix.python-version }}
flags: backend,backend-python-${{ matrix.python-version }}
files: junit.xml
report_type: test_results
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
flags: backend-python-${{ matrix.python-version }}
flags: backend,backend-python-${{ matrix.python-version }}
files: coverage.xml
report_type: coverage
- name: Stop containers

View File

@@ -35,7 +35,7 @@ jobs:
contents: read
packages: write
outputs:
should-push: ${{ steps.check-push.outputs.should-push }}
can-push: ${{ steps.check-push.outputs.can-push }}
push-external: ${{ steps.check-push.outputs.push-external }}
repository: ${{ steps.repo.outputs.name }}
ref-name: ${{ steps.ref.outputs.name }}
@@ -59,28 +59,16 @@ jobs:
env:
REF_NAME: ${{ steps.ref.outputs.name }}
run: |
# should-push: Should we push to GHCR?
# True for:
# 1. Pushes (tags/dev/beta) - filtered via the workflow triggers
# 2. Internal PRs where the branch name starts with 'feature-' - filtered here when a PR is synced
should_push="false"
if [[ "${{ github.event_name }}" == "push" ]]; then
should_push="true"
elif [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
if [[ "${REF_NAME}" == feature-* || "${REF_NAME}" == fix-* ]]; then
should_push="true"
fi
fi
echo "should-push=${should_push}"
echo "should-push=${should_push}" >> $GITHUB_OUTPUT
# can-push: Can we push to GHCR?
# True for: pushes, or PRs from the same repo (not forks)
can_push=${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
echo "can-push=${can_push}"
echo "can-push=${can_push}" >> $GITHUB_OUTPUT
# push-external: Should we also push to Docker Hub and Quay.io?
# Only for main repo on dev/beta branches or version tags
push_external="false"
if [[ "${should_push}" == "true" && "${{ github.repository_owner }}" == "paperless-ngx" ]]; then
if [[ "${can_push}" == "true" && "${{ github.repository_owner }}" == "paperless-ngx" ]]; then
case "${REF_NAME}" in
dev|beta)
push_external="true"
@@ -110,12 +98,6 @@ jobs:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Maximize space
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: Docker metadata
id: docker-meta
uses: docker/metadata-action@v5.10.0
@@ -137,20 +119,20 @@ jobs:
labels: ${{ steps.docker-meta.outputs.labels }}
build-args: |
PNGX_TAG_VERSION=${{ steps.docker-meta.outputs.version }}
outputs: type=image,name=${{ env.REGISTRY }}/${{ steps.repo.outputs.name }},push-by-digest=true,name-canonical=true,push=${{ steps.check-push.outputs.should-push }}
outputs: type=image,name=${{ env.REGISTRY }}/${{ steps.repo.outputs.name }},push-by-digest=true,name-canonical=true,push=${{ steps.check-push.outputs.can-push }}
cache-from: |
type=registry,ref=${{ env.REGISTRY }}/${{ steps.repo.outputs.name }}/cache/app:${{ steps.ref.outputs.cache-ref }}-${{ matrix.arch }}
type=registry,ref=${{ env.REGISTRY }}/${{ steps.repo.outputs.name }}/cache/app:dev-${{ matrix.arch }}
cache-to: ${{ steps.check-push.outputs.should-push == 'true' && format('type=registry,mode=max,ref={0}/{1}/cache/app:{2}-{3}', env.REGISTRY, steps.repo.outputs.name, steps.ref.outputs.cache-ref, matrix.arch) || '' }}
cache-to: ${{ steps.check-push.outputs.can-push == 'true' && format('type=registry,mode=max,ref={0}/{1}/cache/app:{2}-{3}', env.REGISTRY, steps.repo.outputs.name, steps.ref.outputs.cache-ref, matrix.arch) || '' }}
- name: Export digest
if: steps.check-push.outputs.should-push == 'true'
if: steps.check-push.outputs.can-push == 'true'
run: |
mkdir -p /tmp/digests
digest="${{ steps.build.outputs.digest }}"
echo "digest=${digest}"
touch "/tmp/digests/${digest#sha256:}"
- name: Upload digest
if: steps.check-push.outputs.should-push == 'true'
if: steps.check-push.outputs.can-push == 'true'
uses: actions/upload-artifact@v6.0.0
with:
name: digests-${{ matrix.arch }}
@@ -161,7 +143,7 @@ jobs:
name: Merge and Push Manifest
runs-on: ubuntu-24.04
needs: build-arch
if: needs.build-arch.outputs.should-push == 'true'
if: needs.build-arch.outputs.can-push == 'true'
permissions:
contents: read
packages: write

View File

@@ -109,13 +109,13 @@ jobs:
if: always()
uses: codecov/codecov-action@v5
with:
flags: frontend-node-${{ matrix.node-version }}
flags: frontend,frontend-node-${{ matrix.node-version }}
directory: src-ui/
report_type: test_results
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
flags: frontend-node-${{ matrix.node-version }}
flags: frontend,frontend-node-${{ matrix.node-version }}
directory: src-ui/coverage/
e2e-tests:
name: "E2E Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"

View File

@@ -30,7 +30,7 @@ RUN set -eux \
# Purpose: Installs s6-overlay and rootfs
# Comments:
# - Don't leave anything extra in here either
FROM ghcr.io/astral-sh/uv:0.9.26-python3.12-trixie-slim AS s6-overlay-base
FROM ghcr.io/astral-sh/uv:0.9.15-python3.12-trixie-slim AS s6-overlay-base
WORKDIR /usr/src/s6
@@ -196,11 +196,7 @@ RUN set -eux \
&& apt-get install --yes --quiet --no-install-recommends ${BUILD_PACKAGES} \
&& echo "Installing Python requirements" \
&& uv export --quiet --no-dev --all-extras --format requirements-txt --output-file requirements.txt \
&& uv pip install --no-cache --system --no-python-downloads --python-preference system \
--index https://pypi.org/simple \
--index https://download.pytorch.org/whl/cpu \
--index-strategy unsafe-best-match \
--requirements requirements.txt \
&& uv pip install --no-cache --system --no-python-downloads --python-preference system --requirements requirements.txt \
&& echo "Installing NLTK data" \
&& python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" snowball_data \
&& python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" stopwords \

View File

@@ -4,13 +4,13 @@
set -eu
for command in document_archiver \
for command in decrypt_documents \
document_archiver \
document_exporter \
document_importer \
mail_fetcher \
document_create_classifier \
document_index \
document_llmindex \
document_renamer \
document_retagger \
document_thumbnails \

View File

@@ -6,9 +6,9 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_llmindex "$@"
s6-setuidgid paperless python3 manage.py decrypt_documents "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_llmindex "$@"
python3 manage.py decrypt_documents "$@"
else
echo "Unknown user."
fi

View File

@@ -580,6 +580,36 @@ document.
documents, such as encrypted PDF documents. The archiver will skip over
these documents each time it sees them.
### Managing encryption {#encryption}
!!! warning
Encryption was removed in [paperless-ng 0.9](changelog.md#paperless-ng-090)
because it did not really provide any additional security, the passphrase
was stored in a configuration file on the same system as the documents.
Furthermore, the entire text content of the documents is stored plain in
the database, even if your documents are encrypted. Filenames are not
encrypted as well. Finally, the web server provides transparent access to
your encrypted documents.
Consider running paperless on an encrypted filesystem instead, which
will then at least provide security against physical hardware theft.
#### Enabling encryption
Enabling encryption is no longer supported.
#### Disabling encryption
Basic usage to disable encryption of your document store:
(Note: If `PAPERLESS_PASSPHRASE` isn't set already, you need to specify
it here)
```
decrypt_documents [--passphrase SECR3TP4SSPHRA$E]
```
### Detecting duplicates {#fuzzy_duplicate}
Paperless already catches and prevents upload of exactly matching documents,

View File

@@ -501,7 +501,7 @@ The `datetime` filter formats a datetime string or datetime object using Python'
See the [strftime format code documentation](https://docs.python.org/3.13/library/datetime.html#strftime-and-strptime-format-codes)
for the possible codes and their meanings.
##### Date Localization {#date-localization}
##### Date Localization
The `localize_date` filter formats a date or datetime object into a localized string using Babel internationalization.
This takes into account the provided locale for translation. Since this must be used on a date or datetime object,
@@ -851,8 +851,8 @@ followed by the even pages.
It's important that the scan files get consumed in the correct order, and one at a time.
You therefore need to make sure that Paperless is running while you upload the files into
the directory; and if you're using polling, make sure that
`CONSUMER_POLLING_INTERVAL` is set to a value lower than it takes for the second scan to appear,
the directory; and if you're using [polling](configuration.md#polling), make sure that
`CONSUMER_POLLING` is set to a value lower than it takes for the second scan to appear,
like 5-10 or even lower.
Another thing that might happen is that you start a double sided scan, but then forget

View File

@@ -1,60 +1,9 @@
# Changelog
## paperless-ngx 2.20.5
### Bug Fixes
- Fix: ensure horizontal scroll for long tag names in list, wrap tags without parent [@shamoon](https://github.com/shamoon) ([#11811](https://github.com/paperless-ngx/paperless-ngx/pull/11811))
- Fix: use explicit order field for workflow actions [@shamoon](https://github.com/shamoon) [@stumpylog](https://github.com/stumpylog) ([#11781](https://github.com/paperless-ngx/paperless-ngx/pull/11781))
### All App Changes
<details>
<summary>2 changes</summary>
- Fix: ensure horizontal scroll for long tag names in list, wrap tags without parent [@shamoon](https://github.com/shamoon) ([#11811](https://github.com/paperless-ngx/paperless-ngx/pull/11811))
- Fix: use explicit order field for workflow actions [@shamoon](https://github.com/shamoon) [@stumpylog](https://github.com/stumpylog) ([#11781](https://github.com/paperless-ngx/paperless-ngx/pull/11781))
</details>
## paperless-ngx 2.20.4
### Security
- Resolve [GHSA-28cf-xvcf-hw6m](https://github.com/paperless-ngx/paperless-ngx/security/advisories/GHSA-28cf-xvcf-hw6m)
### Bug Fixes
- Fix: propagate metadata override created value [@shamoon](https://github.com/shamoon) ([#11659](https://github.com/paperless-ngx/paperless-ngx/pull/11659))
- Fix: support ordering by storage path name [@shamoon](https://github.com/shamoon) ([#11661](https://github.com/paperless-ngx/paperless-ngx/pull/11661))
- Fix: validate cf integer values within PostgreSQL range [@shamoon](https://github.com/shamoon) ([#11666](https://github.com/paperless-ngx/paperless-ngx/pull/11666))
- Fixhancement: add error handling and retry when opening index [@shamoon](https://github.com/shamoon) ([#11731](https://github.com/paperless-ngx/paperless-ngx/pull/11731))
- Fix: fix recurring workflow to respect latest run time [@shamoon](https://github.com/shamoon) ([#11735](https://github.com/paperless-ngx/paperless-ngx/pull/11735))
### All App Changes
<details>
<summary>5 changes</summary>
- Fix: propagate metadata override created value [@shamoon](https://github.com/shamoon) ([#11659](https://github.com/paperless-ngx/paperless-ngx/pull/11659))
- Fix: support ordering by storage path name [@shamoon](https://github.com/shamoon) ([#11661](https://github.com/paperless-ngx/paperless-ngx/pull/11661))
- Fix: validate cf integer values within PostgreSQL range [@shamoon](https://github.com/shamoon) ([#11666](https://github.com/paperless-ngx/paperless-ngx/pull/11666))
- Fixhancement: add error handling and retry when opening index [@shamoon](https://github.com/shamoon) ([#11731](https://github.com/paperless-ngx/paperless-ngx/pull/11731))
- Fix: fix recurring workflow to respect latest run time [@shamoon](https://github.com/shamoon) ([#11735](https://github.com/paperless-ngx/paperless-ngx/pull/11735))
</details>
## paperless-ngx 2.20.3
### Security
- Resolve [GHSA-7cq3-mhxq-w946](https://github.com/paperless-ngx/paperless-ngx/security/advisories/GHSA-7cq3-mhxq-w946)
## paperless-ngx 2.20.2
### Security
- Resolve [GHSA-6653-vcx4-69mc](https://github.com/paperless-ngx/paperless-ngx/security/advisories/GHSA-6653-vcx4-69mc)
- Resolve [GHSA-24x5-wp64-9fcc](https://github.com/paperless-ngx/paperless-ngx/security/advisories/GHSA-24x5-wp64-9fcc)
### Features / Enhancements
- Tweakhancement: dim inactive users in users-groups list [@shamoon](https://github.com/shamoon) ([#11537](https://github.com/paperless-ngx/paperless-ngx/pull/11537))

View File

@@ -170,18 +170,11 @@ Available options are `postgresql` and `mariadb`.
!!! note
A pool of 8-10 connections per worker is typically sufficient.
If you encounter error messages such as `couldn't get a connection`
or database connection timeouts, you probably need to increase the pool size.
!!! warning
Make sure your PostgreSQL `max_connections` setting is large enough to handle the connection pools:
`(NB_PAPERLESS_WORKERS + NB_CELERY_WORKERS) × POOL_SIZE + SAFETY_MARGIN`. For example, with
4 Paperless workers and 2 Celery workers, and a pool size of 8:``(4 + 2) × 8 + 10 = 58`,
so `max_connections = 60` (or even more) is appropriate.
This assumes only Paperless-ngx connects to your PostgreSQL instance. If you have other applications,
you should increase `max_connections` accordingly.
A small pool is typically sufficient — for example, a size of 4.
Make sure your PostgreSQL server's max_connections setting is large enough to handle:
```(Paperless workers + Celery workers) × pool size + safety margin```
For example, with 4 Paperless workers and 2 Celery workers, and a pool size of 4:
(4 + 2) × 4 + 10 = 34 connections required.
#### [`PAPERLESS_DB_READ_CACHE_ENABLED=<bool>`](#PAPERLESS_DB_READ_CACHE_ENABLED) {#PAPERLESS_DB_READ_CACHE_ENABLED}
@@ -1175,45 +1168,21 @@ don't exist yet.
#### [`PAPERLESS_CONSUMER_IGNORE_PATTERNS=<json>`](#PAPERLESS_CONSUMER_IGNORE_PATTERNS) {#PAPERLESS_CONSUMER_IGNORE_PATTERNS}
: Additional regex patterns for files to ignore in the consumption directory. Patterns are matched against filenames only (not full paths)
using Python's `re.match()`, which anchors at the start of the filename.
: By default, paperless ignores certain files and folders in the
consumption directory, such as system files created by the Mac OS
or hidden folders some tools use to store data.
See the [watchfiles documentation](https://watchfiles.helpmanual.io/api/filters/#watchfiles.BaseFilter.ignore_entity_patterns)
This can be adjusted by configuring a custom json array with
patterns to exclude.
This setting is for additional patterns beyond the built-in defaults. Common system files and directories are already ignored automatically.
The patterns will be compiled via Python's standard `re` module.
For example, `.DS_STORE/*` will ignore any files found in a folder
named `.DS_STORE`, including `.DS_STORE/bar.pdf` and `foo/.DS_STORE/bar.pdf`
Example custom patterns:
A pattern like `._*` will ignore anything starting with `._`, including:
`._foo.pdf` and `._bar/foo.pdf`
```json
["^temp_", "\\.bak$", "^~"]
```
This would ignore:
- Files starting with `temp_` (e.g., `temp_scan.pdf`)
- Files ending with `.bak` (e.g., `document.pdf.bak`)
- Files starting with `~` (e.g., `~$document.docx`)
Defaults to `[]` (empty list, uses only built-in defaults).
The default ignores are `[.DS_Store, .DS_STORE, ._*, desktop.ini, Thumbs.db]` and cannot be overridden.
#### [`PAPERLESS_CONSUMER_IGNORE_DIRS=<json>`](#PAPERLESS_CONSUMER_IGNORE_DIRS) {#PAPERLESS_CONSUMER_IGNORE_DIRS}
: Additional directory names to ignore in the consumption directory. Directories matching these names (and all their contents) will be skipped.
This setting is for additional directories beyond the built-in defaults. Matching is done by directory name only, not full path.
Example:
```json
["temp", "incoming", ".hidden"]
```
Defaults to `[]` (empty list, uses only built-in defaults).
The default ignores are `[.stfolder, .stversions, .localized, @eaDir, .Spotlight-V100, .Trashes, __MACOSX]` and cannot be overridden.
Defaults to
`[".DS_Store", ".DS_STORE", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini", "@eaDir/*", "Thumbs.db"]`.
#### [`PAPERLESS_CONSUMER_BARCODE_SCANNER=<string>`](#PAPERLESS_CONSUMER_BARCODE_SCANNER) {#PAPERLESS_CONSUMER_BARCODE_SCANNER}
@@ -1312,24 +1281,48 @@ within your documents.
Defaults to false.
#### [`PAPERLESS_CONSUMER_POLLING_INTERVAL=<num>`](#PAPERLESS_CONSUMER_POLLING_INTERVAL) {#PAPERLESS_CONSUMER_POLLING_INTERVAL}
### Polling {#polling}
: Configures how the consumer detects new files in the consumption directory.
#### [`PAPERLESS_CONSUMER_POLLING=<num>`](#PAPERLESS_CONSUMER_POLLING) {#PAPERLESS_CONSUMER_POLLING}
When set to `0` (default), paperless uses native filesystem notifications for efficient, immediate detection of new files.
: If paperless won't find documents added to your consume folder, it
might not be able to automatically detect filesystem changes. In
that case, specify a polling interval in seconds here, which will
then cause paperless to periodically check your consumption
directory for changes. This will also disable listening for file
system changes with `inotify`.
When set to a positive number, paperless polls the consumption directory at that interval in seconds. Use polling for network filesystems (NFS, SMB/CIFS) where native notifications may not work reliably.
Defaults to 0, which disables polling and uses filesystem
notifications.
Defaults to 0.
#### [`PAPERLESS_CONSUMER_POLLING_RETRY_COUNT=<num>`](#PAPERLESS_CONSUMER_POLLING_RETRY_COUNT) {#PAPERLESS_CONSUMER_POLLING_RETRY_COUNT}
#### [`PAPERLESS_CONSUMER_STABILITY_DELAY=<num>`](#PAPERLESS_CONSUMER_STABILITY_DELAY) {#PAPERLESS_CONSUMER_STABILITY_DELAY}
: If consumer polling is enabled, sets the maximum number of times
paperless will check for a file to remain unmodified. If a file's
modification time and size are identical for two consecutive checks, it
will be consumed.
: Sets the time in seconds that a file must remain unchanged (same size and modification time) before paperless will begin consuming it.
Defaults to 5.
Increase this value if you experience issues with files being consumed before they are fully written, particularly on slower network storage or
with certain scanner quirks
#### [`PAPERLESS_CONSUMER_POLLING_DELAY=<num>`](#PAPERLESS_CONSUMER_POLLING_DELAY) {#PAPERLESS_CONSUMER_POLLING_DELAY}
Defaults to 5.0 seconds.
: If consumer polling is enabled, sets the delay in seconds between
each check (above) paperless will do while waiting for a file to
remain unmodified.
Defaults to 5.
### iNotify {#inotify}
#### [`PAPERLESS_CONSUMER_INOTIFY_DELAY=<num>`](#PAPERLESS_CONSUMER_INOTIFY_DELAY) {#PAPERLESS_CONSUMER_INOTIFY_DELAY}
: Sets the time in seconds the consumer will wait for additional
events from inotify before the consumer will consider a file ready
and begin consumption. Certain scanners or network setups may
generate multiple events for a single file, leading to multiple
consumers working on the same file. Configure this to prevent that.
Defaults to 0.5 seconds.
## Workflow webhooks
@@ -1831,67 +1824,3 @@ password. All of these options come from their similarly-named [Django settings]
: The endpoint to use for the remote OCR engine. This is required for Azure AI.
Defaults to None.
## AI {#ai}
#### [`PAPERLESS_AI_ENABLED=<bool>`](#PAPERLESS_AI_ENABLED) {#PAPERLESS_AI_ENABLED}
: Enables the AI features in Paperless. This includes the AI-based
suggestions. This setting is required to be set to true in order to use the AI features.
Defaults to false.
#### [`PAPERLESS_AI_LLM_EMBEDDING_BACKEND=<str>`](#PAPERLESS_AI_LLM_EMBEDDING_BACKEND) {#PAPERLESS_AI_LLM_EMBEDDING_BACKEND}
: The embedding backend to use for RAG. This can be either "openai" or "huggingface".
Defaults to None.
#### [`PAPERLESS_AI_LLM_EMBEDDING_MODEL=<str>`](#PAPERLESS_AI_LLM_EMBEDDING_MODEL) {#PAPERLESS_AI_LLM_EMBEDDING_MODEL}
: The model to use for the embedding backend for RAG. This can be set to any of the embedding models supported by the current embedding backend. If not supplied, defaults to "text-embedding-3-small" for OpenAI and "sentence-transformers/all-MiniLM-L6-v2" for Huggingface.
Defaults to None.
#### [`PAPERLESS_AI_LLM_BACKEND=<str>`](#PAPERLESS_AI_LLM_BACKEND) {#PAPERLESS_AI_LLM_BACKEND}
: The AI backend to use. This can be either "openai" or "ollama". If set to "ollama", the AI
features will be run locally on your machine. If set to "openai", the AI features will be run
using the OpenAI API. This setting is required to be set to use the AI features.
Defaults to None.
!!! note
The OpenAI API is a paid service. You will need to set up an OpenAI account and
will be charged for usage incurred by Paperless-ngx features and your document data
will (of course) be sent to the OpenAI API. Paperless-ngx does not endorse the use of the
OpenAI API in any way.
Refer to the OpenAI terms of service, and use at your own risk.
#### [`PAPERLESS_AI_LLM_MODEL=<str>`](#PAPERLESS_AI_LLM_MODEL) {#PAPERLESS_AI_LLM_MODEL}
: The model to use for the AI backend, i.e. "gpt-3.5-turbo", "gpt-4" or any of the models supported by the
current backend. If not supplied, defaults to "gpt-3.5-turbo" for OpenAI and "llama3.1" for Ollama.
Defaults to None.
#### [`PAPERLESS_AI_LLM_API_KEY=<str>`](#PAPERLESS_AI_LLM_API_KEY) {#PAPERLESS_AI_LLM_API_KEY}
: The API key to use for the AI backend. This is required for the OpenAI backend (optional for others).
Defaults to None.
#### [`PAPERLESS_AI_LLM_ENDPOINT=<str>`](#PAPERLESS_AI_LLM_ENDPOINT) {#PAPERLESS_AI_LLM_ENDPOINT}
: The endpoint / url to use for the AI backend. This is required for the Ollama backend (optional for others).
Defaults to None.
#### [`PAPERLESS_AI_LLM_INDEX_TASK_CRON=<cron expression>`](#PAPERLESS_AI_LLM_INDEX_TASK_CRON) {#PAPERLESS_AI_LLM_INDEX_TASK_CRON}
: Configures the schedule to update the AI embeddings of text content and metadata for all documents. Only performed if
AI is enabled and the LLM embedding backend is set.
Defaults to `10 2 * * *`, once per day.

View File

@@ -31,7 +31,6 @@ physical documents into a searchable online archive so you can keep, well, _less
- _New!_ Supports remote OCR with Azure AI (opt-in).
- Documents are saved as PDF/A format which is designed for long term storage, alongside the unaltered originals.
- Uses machine-learning to automatically add tags, correspondents and document types to your documents.
- **New**: Paperless-ngx can now leverage AI (Large Language Models or LLMs) for document suggestions. This is an optional feature that can be enabled (and is disabled by default).
- Supports PDF documents, images, plain text files, Office documents (Word, Excel, PowerPoint, and LibreOffice equivalents)[^1] and more.
- Paperless stores your documents plain on disk. Filenames and folders are managed by paperless and their format can be configured freely with different configurations assigned to different documents.
- **Beautiful, modern web application** that features:

View File

@@ -1,25 +0,0 @@
# v3 Migration Guide
## Consumer Settings Changes
The v3 consumer command uses a [different library](https://watchfiles.helpmanual.io/) to unify
the watching for new files in the consume directory. For the user, this removes several configuration options related to delays and retries
and replaces with a single unified setting. It also adjusts how the consumer ignore filtering happens, replaced `fnmatch` with `regex` and
separating the directory ignore from the file ignore.
### Summary
| Old Setting | New Setting | Notes |
| ------------------------------ | ----------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------ |
| `CONSUMER_POLLING` | [`CONSUMER_POLLING_INTERVAL`](configuration.md#PAPERLESS_CONSUMER_POLLING_INTERVAL) | Renamed for clarity |
| `CONSUMER_INOTIFY_DELAY` | [`CONSUMER_STABILITY_DELAY`](configuration.md#PAPERLESS_CONSUMER_STABILITY_DELAY) | Unified for all modes |
| `CONSUMER_POLLING_DELAY` | _Removed_ | Use `CONSUMER_STABILITY_DELAY` |
| `CONSUMER_POLLING_RETRY_COUNT` | _Removed_ | Automatic with stability tracking |
| `CONSUMER_IGNORE_PATTERNS` | [`CONSUMER_IGNORE_PATTERNS`](configuration.md#PAPERLESS_CONSUMER_IGNORE_PATTERNS) | **Now regex, not fnmatch**; user patterns are added to (not replacing) default ones |
| _New_ | [`CONSUMER_IGNORE_DIRS`](configuration.md#PAPERLESS_CONSUMER_IGNORE_DIRS) | Additional directories to ignore; user entries are added to (not replacing) defaults |
## Encryption Support
Document and thumbnail encryption is no longer supported. This was previously deprecated in [paperless-ng 0.9.3](https://github.com/paperless-ngx/paperless-ngx/blob/dev/docs/changelog.md#paperless-ng-093)
Users must decrypt their document using the `decrypt_documents` command before upgrading.

View File

@@ -124,7 +124,8 @@ account. The script essentially automatically performs the steps described in [D
system notifications with `inotify`. When storing the consumption
directory on such a file system, paperless will not pick up new
files with the default configuration. You will need to use
[`PAPERLESS_CONSUMER_POLLING_INTERVAL`](configuration.md#PAPERLESS_CONSUMER_POLLING_INTERVAL), which will disable inotify.
[`PAPERLESS_CONSUMER_POLLING`](configuration.md#PAPERLESS_CONSUMER_POLLING), which will disable inotify. See
[here](configuration.md#polling).
5. Run `docker compose pull`. This will pull the image from the GitHub container registry
by default but you can change the image to pull from Docker Hub by changing the `image`

View File

@@ -46,9 +46,9 @@ run:
If you notice that the consumer will only pickup files in the
consumption directory at startup, but won't find any other files added
later, you will need to enable filesystem polling with the configuration
option [`PAPERLESS_CONSUMER_POLLING_INTERVAL`](configuration.md#PAPERLESS_CONSUMER_POLLING_INTERVAL).
option [`PAPERLESS_CONSUMER_POLLING`](configuration.md#PAPERLESS_CONSUMER_POLLING).
This will disable automatic listening for filesystem changes and
This will disable listening to filesystem changes with inotify and
paperless will manually check the consumption directory for changes
instead.
@@ -234,9 +234,47 @@ FileNotFoundError: [Errno 2] No such file or directory: '/tmp/ocrmypdf.io.yhk3zb
This probably indicates paperless tried to consume the same file twice.
This can happen for a number of reasons, depending on how documents are
placed into the consume folder, such as how a scanner may modify a file multiple times as it scans.
Try adjusting the
[file stability delay](configuration.md#PAPERLESS_CONSUMER_STABILITY_DELAY) to a larger value.
placed into the consume folder. If paperless is using inotify (the
default) to check for documents, try adjusting the
[inotify configuration](configuration.md#inotify). If polling is enabled, try adjusting the
[polling configuration](configuration.md#polling).
## Consumer fails waiting for file to remain unmodified.
You might find messages like these in your log files:
```
[ERROR] [paperless.management.consumer] Timeout while waiting on file /usr/src/paperless/src/../consume/SCN_0001.pdf to remain unmodified.
```
This indicates paperless timed out while waiting for the file to be
completely written to the consume folder. Adjusting
[polling configuration](configuration.md#polling) values should resolve the issue.
!!! note
The user will need to manually move the file out of the consume folder
and back in, for the initial failing file to be consumed.
## Consumer fails reporting "OS reports file as busy still".
You might find messages like these in your log files:
```
[WARNING] [paperless.management.consumer] Not consuming file /usr/src/paperless/src/../consume/SCN_0001.pdf: OS reports file as busy still
```
This indicates paperless was unable to open the file, as the OS reported
the file as still being in use. To prevent a crash, paperless did not
try to consume the file. If paperless is using inotify (the default) to
check for documents, try adjusting the
[inotify configuration](configuration.md#inotify). If polling is enabled, try adjusting the
[polling configuration](configuration.md#polling).
!!! note
The user will need to manually move the file out of the consume folder
and back in, for the initial failing file to be consumed.
## Log reports "Creating PaperlessTask failed".

View File

@@ -278,28 +278,6 @@ Once setup, navigating to the email settings page in Paperless-ngx will allow yo
You can also submit a document using the REST API, see [POSTing documents](api.md#file-uploads)
for details.
## Document Suggestions
Paperless-ngx can suggest tags, correspondents, document types and storage paths for documents based on the content of the document. This is done using a (non-LLM) machine learning model that is trained on the documents in your database. The suggestions are shown in the document detail page and can be accepted or rejected by the user.
## AI Features
Paperless-ngx includes several features that use AI to enhance the document management experience. These features are optional and can be enabled or disabled in the settings. If you are using the AI features, you may want to also enable the "LLM index" feature, which supports Retrieval-Augmented Generation (RAG) designed to improve the quality of AI responses. The LLM index feature is not enabled by default and requires additional configuration.
!!! warning
Remember that Paperless-ngx will send document content to the AI provider you have configured, so consider the privacy implications of using these features, especially if using a remote model (e.g. OpenAI), instead of the default local model.
The AI features work by creating an embedding of the text content and metadata of documents, which is then used for various tasks such as similarity search and question answering. This uses the FAISS vector store.
### AI-Enhanced Suggestions
If enabled, Paperless-ngx can use an AI LLM model to suggest document titles, dates, tags, correspondents and document types for documents. This feature will always be "opt-in" and does not disable the existing classifier-based suggestion system. Currently, both remote (via the OpenAI API) and local (via Ollama) models are supported, see [configuration](configuration.md#ai) for details.
### Document Chat
Paperless-ngx can use an AI LLM model to answer questions about a document or across multiple documents. Again, this feature works best when RAG is enabled. The chat feature is available in the upper app toolbar and will switch between chatting across multiple documents or a single document based on the current view.
## Sharing documents from Paperless-ngx
Paperless-ngx supports sharing documents with other users by assigning them [permissions](#object-permissions)
@@ -565,7 +543,7 @@ This allows for complex logic to be used to generate the title, including [logic
and [filters](https://jinja.palletsprojects.com/en/3.1.x/templates/#id11).
The template is provided as a string.
Using Jinja2 Templates is also useful for [Date localization](advanced_usage.md#date-localization) in the title.
Using Jinja2 Templates is also useful for [Date localization](advanced_usage.md#Date-Localization) in the title.
The available inputs differ depending on the type of workflow trigger.
This is because at the time of consumption (when the text is to be set), no automatic tags etc. have been
@@ -597,7 +575,6 @@ The following placeholders are only available for "added" or "updated" triggers
- `{{created_day}}`: created day
- `{{created_time}}`: created time in HH:MM format
- `{{doc_url}}`: URL to the document in the web UI. Requires the `PAPERLESS_URL` setting to be set.
- `{{doc_id}}`: Document ID
##### Examples

View File

@@ -69,9 +69,8 @@ nav:
- development.md
- 'FAQs': faq.md
- troubleshooting.md
- 'Migration to v3': migration.md
- changelog.md
copyright: Copyright &copy; 2016 - 2026 Daniel Quinn, Jonas Winkler, and the Paperless-ngx team
copyright: Copyright &copy; 2016 - 2023 Daniel Quinn, Jonas Winkler, and the Paperless-ngx team
extra:
social:
- icon: fontawesome/brands/github

View File

@@ -55,10 +55,10 @@
#PAPERLESS_TASK_WORKERS=1
#PAPERLESS_THREADS_PER_WORKER=1
#PAPERLESS_TIME_ZONE=UTC
#PAPERLESS_CONSUMER_POLLING_INTERVAL=10
#PAPERLESS_CONSUMER_POLLING=10
#PAPERLESS_CONSUMER_DELETE_DUPLICATES=false
#PAPERLESS_CONSUMER_RECURSIVE=false
#PAPERLESS_CONSUMER_IGNORE_PATTERNS=[] # Defaults are built in; add filename regexes, e.g. ["^\\.DS_Store$", "^desktop\\.ini$"]
#PAPERLESS_CONSUMER_IGNORE_PATTERNS=[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]
#PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
#PAPERLESS_CONSUMER_ENABLE_BARCODES=false
#PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT

View File

@@ -1,6 +1,6 @@
[project]
name = "paperless-ngx"
version = "2.20.5"
version = "2.20.3"
description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
readme = "README.md"
requires-python = ">=3.10"
@@ -27,8 +27,8 @@ dependencies = [
# WARNING: django does not use semver.
# Only patch versions are guaranteed to not introduce breaking changes.
"django~=5.2.5",
"django-allauth[mfa,socialaccount]~=65.13.1",
"django-auditlog~=3.4.1",
"django-allauth[mfa,socialaccount]~=65.12.1",
"django-auditlog~=3.3.0",
"django-cachalot~=2.8.0",
"django-celery-results~=2.6.0",
"django-compression-middleware~=0.5.0",
@@ -44,23 +44,16 @@ dependencies = [
"drf-spectacular~=0.28",
"drf-spectacular-sidecar~=2025.10.1",
"drf-writable-nested~=0.7.1",
"faiss-cpu>=1.10",
"filelock~=3.20.0",
"flower~=2.0.1",
"gotenberg-client~=0.13.1",
"gotenberg-client~=0.12.0",
"httpx-oauth~=0.16",
"imap-tools~=1.11.0",
"inotifyrecursive~=0.3",
"jinja2~=3.1.5",
"langdetect~=1.0.9",
"llama-index-core>=0.14.12",
"llama-index-embeddings-huggingface>=0.6.1",
"llama-index-embeddings-openai>=0.5.1",
"llama-index-llms-ollama>=0.9.1",
"llama-index-llms-openai>=0.6.13",
"llama-index-vector-stores-faiss>=0.5.2",
"nltk~=3.9.1",
"ocrmypdf~=16.13.0",
"openai>=1.76",
"ocrmypdf~=16.12.0",
"pathvalidate~=3.3.1",
"pdf2image~=1.17.0",
"python-dateutil~=2.9.0",
@@ -73,12 +66,10 @@ dependencies = [
"redis[hiredis]~=5.2.1",
"regex>=2025.9.18",
"scikit-learn~=1.7.0",
"sentence-transformers>=4.1",
"setproctitle~=1.3.4",
"tika-client~=0.10.0",
"torch~=2.9.1",
"tqdm~=4.67.1",
"watchfiles>=1.1.1",
"watchdog~=6.0",
"whitenoise~=6.9",
"whoosh-reloaded>=2.7.5",
"zxing-cpp~=2.3.0",
@@ -91,7 +82,7 @@ optional-dependencies.postgres = [
"psycopg[c,pool]==3.2.12",
# Direct dependency for proper resolution of the pre-built wheels
"psycopg-c==3.2.12",
"psycopg-pool==3.3",
"psycopg-pool==3.2.7",
]
optional-dependencies.webserver = [
"granian[uvloop]~=2.5.1",
@@ -126,7 +117,7 @@ testing = [
]
lint = [
"pre-commit~=4.5.1",
"pre-commit~=4.4.0",
"pre-commit-uv~=4.2.0",
"ruff~=0.14.0",
]
@@ -169,15 +160,6 @@ zxing-cpp = [
{ url = "https://github.com/paperless-ngx/builder/releases/download/zxing-2.3.0/zxing_cpp-2.3.0-cp312-cp312-linux_aarch64.whl", marker = "sys_platform == 'linux' and platform_machine == 'aarch64' and python_version == '3.12'" },
]
torch = [
{ index = "pytorch-cpu" },
]
[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true
[tool.ruff]
target-version = "py310"
line-length = 88
@@ -273,7 +255,6 @@ testpaths = [
"src/paperless_tika/tests",
"src/paperless_text/tests/",
"src/paperless_remote/tests/",
"src/paperless_ai/tests",
]
addopts = [
"--pythonwarnings=all",

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"name": "paperless-ngx-ui",
"version": "2.20.5",
"version": "2.20.3",
"scripts": {
"preinstall": "npx only-allow pnpm",
"ng": "ng",

View File

@@ -35,12 +35,8 @@
@case (ConfigOptionType.String) { <pngx-input-text [formControlName]="option.key" [error]="errors[option.key]"></pngx-input-text> }
@case (ConfigOptionType.JSON) { <pngx-input-text [formControlName]="option.key" [error]="errors[option.key]"></pngx-input-text> }
@case (ConfigOptionType.File) { <pngx-input-file [formControlName]="option.key" (upload)="uploadFile($event, option.key)" [error]="errors[option.key]"></pngx-input-file> }
@case (ConfigOptionType.Password) { <pngx-input-password [formControlName]="option.key" [error]="errors[option.key]"></pngx-input-password> }
}
</div>
@if (option.note) {
<div class="form-text fst-italic">{{option.note}}</div>
}
</div>
</div>
</div>

View File

@@ -29,7 +29,6 @@ import { SettingsService } from 'src/app/services/settings.service'
import { ToastService } from 'src/app/services/toast.service'
import { FileComponent } from '../../common/input/file/file.component'
import { NumberComponent } from '../../common/input/number/number.component'
import { PasswordComponent } from '../../common/input/password/password.component'
import { SelectComponent } from '../../common/input/select/select.component'
import { SwitchComponent } from '../../common/input/switch/switch.component'
import { TextComponent } from '../../common/input/text/text.component'
@@ -47,7 +46,6 @@ import { LoadingComponentWithPermissions } from '../../loading-component/loading
TextComponent,
NumberComponent,
FileComponent,
PasswordComponent,
AsyncPipe,
NgbNavModule,
FormsModule,

View File

@@ -91,9 +91,6 @@ const status: SystemStatus = {
sanity_check_status: SystemStatusItemStatus.ERROR,
sanity_check_last_run: new Date().toISOString(),
sanity_check_error: 'Error running sanity check.',
llmindex_status: SystemStatusItemStatus.DISABLED,
llmindex_last_modified: new Date().toISOString(),
llmindex_error: null,
},
}

View File

@@ -30,9 +30,6 @@
</div>
</div>
<ul ngbNav class="order-sm-3">
@if (aiEnabled) {
<pngx-chat></pngx-chat>
}
<pngx-toasts-dropdown></pngx-toasts-dropdown>
<li ngbDropdown class="nav-item dropdown">
<button class="btn ps-1 border-0" id="userDropdown" ngbDropdownToggle>

View File

@@ -44,7 +44,6 @@ import { SettingsService } from 'src/app/services/settings.service'
import { TasksService } from 'src/app/services/tasks.service'
import { ToastService } from 'src/app/services/toast.service'
import { environment } from 'src/environments/environment'
import { ChatComponent } from '../chat/chat/chat.component'
import { ProfileEditDialogComponent } from '../common/profile-edit-dialog/profile-edit-dialog.component'
import { DocumentDetailComponent } from '../document-detail/document-detail.component'
import { ComponentWithPermissions } from '../with-permissions/with-permissions.component'
@@ -60,7 +59,6 @@ import { ToastsDropdownComponent } from './toasts-dropdown/toasts-dropdown.compo
DocumentTitlePipe,
IfPermissionsDirective,
ToastsDropdownComponent,
ChatComponent,
RouterModule,
NgClass,
NgbDropdownModule,
@@ -186,10 +184,6 @@ export class AppFrameComponent
})
}
get aiEnabled(): boolean {
return this.settingsService.get(SETTINGS_KEYS.AI_ENABLED)
}
closeMenu() {
this.isMenuCollapsed = true
}

View File

@@ -1,5 +1,5 @@
<li ngbDropdown class="nav-item mx-1" (openChange)="onOpenChange($event)">
<li ngbDropdown class="nav-item" (openChange)="onOpenChange($event)">
@if (toasts.length) {
<span class="badge rounded-pill z-3 pe-none bg-secondary me-2 position-absolute top-0 left-0">{{ toasts.length }}</span>
}

View File

@@ -1,35 +0,0 @@
<li ngbDropdown class="nav-item me-n2" (openChange)="onOpenChange($event)">
<button class="btn border-0" id="chatDropdown" ngbDropdownToggle>
<i-bs width="1.3em" height="1.3em" name="chatSquareDots"></i-bs>
</button>
<div ngbDropdownMenu class="dropdown-menu-end shadow p-3" aria-labelledby="chatDropdown">
<div class="chat-container bg-light p-2">
<div class="chat-messages font-monospace small">
@for (message of messages; track message) {
<div class="message d-flex flex-row small" [class.justify-content-end]="message.role === 'user'">
<span class="p-2 m-2" [class.bg-dark]="message.role === 'user'">
{{ message.content }}
@if (message.isStreaming) { <span class="blinking-cursor">|</span> }
</span>
</div>
}
<div #scrollAnchor></div>
</div>
<form class="chat-input">
<div class="input-group">
<input
#chatInput
class="form-control form-control-sm" name="chatInput" type="text"
[placeholder]="placeholder"
[disabled]="loading"
[(ngModel)]="input"
(keydown)="searchInputKeyDown($event)"
/>
<button class="btn btn-sm btn-secondary" type="button" (click)="sendMessage()" [disabled]="loading">Send</button>
</div>
</form>
</div>
</div>
</li>

View File

@@ -1,37 +0,0 @@
.dropdown-menu {
width: var(--pngx-toast-max-width);
}
.chat-messages {
max-height: 350px;
overflow-y: auto;
}
.dropdown-toggle::after {
display: none;
}
.dropdown-item {
white-space: initial;
}
@media screen and (max-width: 400px) {
:host ::ng-deep .dropdown-menu-end {
right: -3rem;
}
}
.blinking-cursor {
font-weight: bold;
font-size: 1.2em;
animation: blink 1s step-end infinite;
}
@keyframes blink {
from, to {
opacity: 0;
}
50% {
opacity: 1;
}
}

View File

@@ -1,132 +0,0 @@
import { provideHttpClient, withInterceptorsFromDi } from '@angular/common/http'
import { provideHttpClientTesting } from '@angular/common/http/testing'
import { ElementRef } from '@angular/core'
import { ComponentFixture, TestBed } from '@angular/core/testing'
import { NavigationEnd, Router } from '@angular/router'
import { allIcons, NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { Subject } from 'rxjs'
import { ChatService } from 'src/app/services/chat.service'
import { ChatComponent } from './chat.component'
describe('ChatComponent', () => {
let component: ChatComponent
let fixture: ComponentFixture<ChatComponent>
let chatService: ChatService
let router: Router
let routerEvents$: Subject<NavigationEnd>
let mockStream$: Subject<string>
beforeEach(async () => {
TestBed.configureTestingModule({
imports: [NgxBootstrapIconsModule.pick(allIcons), ChatComponent],
providers: [
provideHttpClient(withInterceptorsFromDi()),
provideHttpClientTesting(),
],
}).compileComponents()
fixture = TestBed.createComponent(ChatComponent)
router = TestBed.inject(Router)
routerEvents$ = new Subject<any>()
jest
.spyOn(router, 'events', 'get')
.mockReturnValue(routerEvents$.asObservable())
chatService = TestBed.inject(ChatService)
mockStream$ = new Subject<string>()
jest
.spyOn(chatService, 'streamChat')
.mockReturnValue(mockStream$.asObservable())
component = fixture.componentInstance
jest.useFakeTimers()
fixture.detectChanges()
component.scrollAnchor.nativeElement.scrollIntoView = jest.fn()
})
it('should update documentId on initialization', () => {
jest.spyOn(router, 'url', 'get').mockReturnValue('/documents/123')
component.ngOnInit()
expect(component.documentId).toBe(123)
})
it('should update documentId on navigation', () => {
component.ngOnInit()
routerEvents$.next(new NavigationEnd(1, '/documents/456', '/documents/456'))
expect(component.documentId).toBe(456)
})
it('should return correct placeholder based on documentId', () => {
component.documentId = 123
expect(component.placeholder).toBe('Ask a question about this document...')
component.documentId = undefined
expect(component.placeholder).toBe('Ask a question about a document...')
})
it('should send a message and handle streaming response', () => {
component.input = 'Hello'
component.sendMessage()
expect(component.messages.length).toBe(2)
expect(component.messages[0].content).toBe('Hello')
expect(component.loading).toBe(true)
mockStream$.next('Hi')
expect(component.messages[1].content).toBe('H')
mockStream$.next('Hi there')
// advance time to process the typewriter effect
jest.advanceTimersByTime(1000)
expect(component.messages[1].content).toBe('Hi there')
mockStream$.complete()
expect(component.loading).toBe(false)
expect(component.messages[1].isStreaming).toBe(false)
})
it('should handle errors during streaming', () => {
component.input = 'Hello'
component.sendMessage()
mockStream$.error('Error')
expect(component.messages[1].content).toContain(
'⚠️ Error receiving response.'
)
expect(component.loading).toBe(false)
})
it('should enqueue typewriter chunks correctly', () => {
const message = { content: '', role: 'assistant', isStreaming: true }
component.enqueueTypewriter(null, message as any) // coverage for null
component.enqueueTypewriter('Hello', message as any)
expect(component['typewriterBuffer'].length).toBe(4)
})
it('should scroll to bottom after sending a message', () => {
const scrollSpy = jest.spyOn(
ChatComponent.prototype as any,
'scrollToBottom'
)
component.input = 'Test'
component.sendMessage()
expect(scrollSpy).toHaveBeenCalled()
})
it('should focus chat input when dropdown is opened', () => {
const focus = jest.fn()
component.chatInput = {
nativeElement: { focus: focus },
} as unknown as ElementRef<HTMLInputElement>
component.onOpenChange(true)
jest.advanceTimersByTime(15)
expect(focus).toHaveBeenCalled()
})
it('should send message on Enter key press', () => {
jest.spyOn(component, 'sendMessage')
const event = new KeyboardEvent('keydown', { key: 'Enter' })
component.searchInputKeyDown(event)
expect(component.sendMessage).toHaveBeenCalled()
})
})

View File

@@ -1,140 +0,0 @@
import { Component, ElementRef, inject, OnInit, ViewChild } from '@angular/core'
import { FormsModule, ReactiveFormsModule } from '@angular/forms'
import { NavigationEnd, Router } from '@angular/router'
import { NgbDropdownModule } from '@ng-bootstrap/ng-bootstrap'
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { filter, map } from 'rxjs'
import { ChatMessage, ChatService } from 'src/app/services/chat.service'
@Component({
selector: 'pngx-chat',
imports: [
FormsModule,
ReactiveFormsModule,
NgxBootstrapIconsModule,
NgbDropdownModule,
],
templateUrl: './chat.component.html',
styleUrl: './chat.component.scss',
})
export class ChatComponent implements OnInit {
public messages: ChatMessage[] = []
public loading = false
public input: string = ''
public documentId!: number
private chatService: ChatService = inject(ChatService)
private router: Router = inject(Router)
@ViewChild('scrollAnchor') scrollAnchor!: ElementRef<HTMLDivElement>
@ViewChild('chatInput') chatInput!: ElementRef<HTMLInputElement>
private typewriterBuffer: string[] = []
private typewriterActive = false
public get placeholder(): string {
return this.documentId
? $localize`Ask a question about this document...`
: $localize`Ask a question about a document...`
}
ngOnInit(): void {
this.updateDocumentId(this.router.url)
this.router.events
.pipe(
filter((event) => event instanceof NavigationEnd),
map((event) => (event as NavigationEnd).url)
)
.subscribe((url) => {
this.updateDocumentId(url)
})
}
private updateDocumentId(url: string): void {
const docIdRe = url.match(/^\/documents\/(\d+)/)
this.documentId = docIdRe ? +docIdRe[1] : undefined
}
sendMessage(): void {
if (!this.input.trim()) return
const userMessage: ChatMessage = { role: 'user', content: this.input }
this.messages.push(userMessage)
this.scrollToBottom()
const assistantMessage: ChatMessage = {
role: 'assistant',
content: '',
isStreaming: true,
}
this.messages.push(assistantMessage)
this.loading = true
let lastPartialLength = 0
this.chatService.streamChat(this.documentId, this.input).subscribe({
next: (chunk) => {
const delta = chunk.substring(lastPartialLength)
lastPartialLength = chunk.length
this.enqueueTypewriter(delta, assistantMessage)
},
error: () => {
assistantMessage.content += '\n\n⚠ Error receiving response.'
assistantMessage.isStreaming = false
this.loading = false
},
complete: () => {
assistantMessage.isStreaming = false
this.loading = false
this.scrollToBottom()
},
})
this.input = ''
}
enqueueTypewriter(chunk: string, message: ChatMessage): void {
if (!chunk) return
this.typewriterBuffer.push(...chunk.split(''))
if (!this.typewriterActive) {
this.typewriterActive = true
this.playTypewriter(message)
}
}
playTypewriter(message: ChatMessage): void {
if (this.typewriterBuffer.length === 0) {
this.typewriterActive = false
return
}
const nextChar = this.typewriterBuffer.shift()
message.content += nextChar
this.scrollToBottom()
setTimeout(() => this.playTypewriter(message), 10) // 10ms per character
}
private scrollToBottom(): void {
setTimeout(() => {
this.scrollAnchor?.nativeElement?.scrollIntoView({ behavior: 'smooth' })
}, 50)
}
public onOpenChange(open: boolean): void {
if (open) {
setTimeout(() => {
this.chatInput.nativeElement.focus()
}, 10)
}
}
public searchInputKeyDown(event: KeyboardEvent) {
if (event.key === 'Enter') {
event.preventDefault()
this.sendMessage()
}
}
}

View File

@@ -1,7 +1,7 @@
<div ngbDropdown #fieldDropdown="ngbDropdown" (openChange)="onOpenClose($event)" [popperOptions]="popperOptions">
<button type="button" class="btn btn-sm btn-outline-primary" id="customFieldsDropdown" [disabled]="disabled" ngbDropdownToggle>
<div ngbDropdown #fieldDropdown="ngbDropdown" (openChange)="onOpenClose($event)" [popperOptions]="popperOptions" placement="bottom-end">
<button class="btn btn-sm btn-outline-primary" id="customFieldsDropdown" [disabled]="disabled" ngbDropdownToggle>
<i-bs name="ui-radios"></i-bs>
<div class="d-none d-lg-inline">&nbsp;<ng-container i18n>Custom Fields</ng-container></div>
<div class="d-none d-sm-inline">&nbsp;<ng-container i18n>Custom Fields</ng-container></div>
</button>
<div ngbDropdownMenu aria-labelledby="customFieldsDropdown" class="shadow custom-fields-dropdown">
<div class="list-group list-group-flush" (keydown)="listKeyDown($event)">

View File

@@ -252,7 +252,7 @@ describe('WorkflowEditDialogComponent', () => {
expect(component.object.actions.length).toEqual(2)
})
it('should update order on drag n drop', () => {
it('should update order and remove ids from actions on drag n drop', () => {
const action1 = workflow.actions[0]
const action2 = workflow.actions[1]
component.object = workflow
@@ -261,6 +261,8 @@ describe('WorkflowEditDialogComponent', () => {
WorkflowAction[]
>)
expect(component.object.actions).toEqual([action2, action1])
expect(action1.id).toBeNull()
expect(action2.id).toBeNull()
})
it('should not include auto matching in algorithms', () => {
@@ -412,9 +414,6 @@ describe('WorkflowEditDialogComponent', () => {
return newFilter
}
const correspondentAny = addFilterOfType(TriggerFilterType.CorrespondentAny)
correspondentAny.get('values').setValue([11])
const correspondentIs = addFilterOfType(TriggerFilterType.CorrespondentIs)
correspondentIs.get('values').setValue(1)
@@ -424,18 +423,12 @@ describe('WorkflowEditDialogComponent', () => {
const documentTypeIs = addFilterOfType(TriggerFilterType.DocumentTypeIs)
documentTypeIs.get('values').setValue(1)
const documentTypeAny = addFilterOfType(TriggerFilterType.DocumentTypeAny)
documentTypeAny.get('values').setValue([12])
const documentTypeNot = addFilterOfType(TriggerFilterType.DocumentTypeNot)
documentTypeNot.get('values').setValue([1])
const storagePathIs = addFilterOfType(TriggerFilterType.StoragePathIs)
storagePathIs.get('values').setValue(1)
const storagePathAny = addFilterOfType(TriggerFilterType.StoragePathAny)
storagePathAny.get('values').setValue([13])
const storagePathNot = addFilterOfType(TriggerFilterType.StoragePathNot)
storagePathNot.get('values').setValue([1])
@@ -450,13 +443,10 @@ describe('WorkflowEditDialogComponent', () => {
expect(formValues.triggers[0].filter_has_tags).toEqual([1])
expect(formValues.triggers[0].filter_has_all_tags).toEqual([2, 3])
expect(formValues.triggers[0].filter_has_not_tags).toEqual([4])
expect(formValues.triggers[0].filter_has_any_correspondents).toEqual([11])
expect(formValues.triggers[0].filter_has_correspondent).toEqual(1)
expect(formValues.triggers[0].filter_has_not_correspondents).toEqual([1])
expect(formValues.triggers[0].filter_has_any_document_types).toEqual([12])
expect(formValues.triggers[0].filter_has_document_type).toEqual(1)
expect(formValues.triggers[0].filter_has_not_document_types).toEqual([1])
expect(formValues.triggers[0].filter_has_any_storage_paths).toEqual([13])
expect(formValues.triggers[0].filter_has_storage_path).toEqual(1)
expect(formValues.triggers[0].filter_has_not_storage_paths).toEqual([1])
expect(formValues.triggers[0].filter_custom_field_query).toEqual(
@@ -519,22 +509,16 @@ describe('WorkflowEditDialogComponent', () => {
setFilter(TriggerFilterType.TagsAll, 11)
setFilter(TriggerFilterType.TagsNone, 12)
setFilter(TriggerFilterType.CorrespondentAny, 16)
setFilter(TriggerFilterType.CorrespondentNot, 13)
setFilter(TriggerFilterType.DocumentTypeAny, 17)
setFilter(TriggerFilterType.DocumentTypeNot, 14)
setFilter(TriggerFilterType.StoragePathAny, 18)
setFilter(TriggerFilterType.StoragePathNot, 15)
const formValues = component['getFormValues']()
expect(formValues.triggers[0].filter_has_all_tags).toEqual([11])
expect(formValues.triggers[0].filter_has_not_tags).toEqual([12])
expect(formValues.triggers[0].filter_has_any_correspondents).toEqual([16])
expect(formValues.triggers[0].filter_has_not_correspondents).toEqual([13])
expect(formValues.triggers[0].filter_has_any_document_types).toEqual([17])
expect(formValues.triggers[0].filter_has_not_document_types).toEqual([14])
expect(formValues.triggers[0].filter_has_any_storage_paths).toEqual([18])
expect(formValues.triggers[0].filter_has_not_storage_paths).toEqual([15])
})
@@ -658,11 +642,8 @@ describe('WorkflowEditDialogComponent', () => {
filter_has_tags: [],
filter_has_all_tags: [],
filter_has_not_tags: [],
filter_has_any_correspondents: [],
filter_has_not_correspondents: [],
filter_has_any_document_types: [],
filter_has_not_document_types: [],
filter_has_any_storage_paths: [],
filter_has_not_storage_paths: [],
filter_has_correspondent: null,
filter_has_document_type: null,
@@ -720,14 +701,11 @@ describe('WorkflowEditDialogComponent', () => {
trigger.filter_has_tags = [1]
trigger.filter_has_all_tags = [2, 3]
trigger.filter_has_not_tags = [4]
trigger.filter_has_any_correspondents = [10] as any
trigger.filter_has_correspondent = 5 as any
trigger.filter_has_not_correspondents = [6] as any
trigger.filter_has_document_type = 7 as any
trigger.filter_has_any_document_types = [11] as any
trigger.filter_has_not_document_types = [8] as any
trigger.filter_has_storage_path = 9 as any
trigger.filter_has_any_storage_paths = [12] as any
trigger.filter_has_not_storage_paths = [10] as any
trigger.filter_custom_field_query = JSON.stringify([
'AND',
@@ -738,8 +716,8 @@ describe('WorkflowEditDialogComponent', () => {
component.ngOnInit()
const triggerGroup = component.triggerFields.at(0) as FormGroup
const filters = component.getFiltersFormArray(triggerGroup)
expect(filters.length).toBe(13)
const customFieldFilter = filters.at(12) as FormGroup
expect(filters.length).toBe(10)
const customFieldFilter = filters.at(9) as FormGroup
expect(customFieldFilter.get('type').value).toBe(
TriggerFilterType.CustomFieldQuery
)
@@ -748,27 +726,12 @@ describe('WorkflowEditDialogComponent', () => {
})
it('should expose select metadata helpers', () => {
expect(component.isSelectMultiple(TriggerFilterType.CorrespondentAny)).toBe(
true
)
expect(component.isSelectMultiple(TriggerFilterType.CorrespondentNot)).toBe(
true
)
expect(component.isSelectMultiple(TriggerFilterType.CorrespondentIs)).toBe(
false
)
expect(component.isSelectMultiple(TriggerFilterType.DocumentTypeAny)).toBe(
true
)
expect(component.isSelectMultiple(TriggerFilterType.DocumentTypeIs)).toBe(
false
)
expect(component.isSelectMultiple(TriggerFilterType.StoragePathAny)).toBe(
true
)
expect(component.isSelectMultiple(TriggerFilterType.StoragePathIs)).toBe(
false
)
component.correspondents = [{ id: 1, name: 'C1' } as any]
component.documentTypes = [{ id: 2, name: 'DT' } as any]
@@ -780,15 +743,9 @@ describe('WorkflowEditDialogComponent', () => {
expect(
component.getFilterSelectItems(TriggerFilterType.DocumentTypeIs)
).toEqual(component.documentTypes)
expect(
component.getFilterSelectItems(TriggerFilterType.DocumentTypeAny)
).toEqual(component.documentTypes)
expect(
component.getFilterSelectItems(TriggerFilterType.StoragePathIs)
).toEqual(component.storagePaths)
expect(
component.getFilterSelectItems(TriggerFilterType.StoragePathAny)
).toEqual(component.storagePaths)
expect(component.getFilterSelectItems(TriggerFilterType.TagsAll)).toEqual(
[]
)

View File

@@ -145,13 +145,10 @@ export enum TriggerFilterType {
TagsAny = 'tags_any',
TagsAll = 'tags_all',
TagsNone = 'tags_none',
CorrespondentAny = 'correspondent_any',
CorrespondentIs = 'correspondent_is',
CorrespondentNot = 'correspondent_not',
DocumentTypeAny = 'document_type_any',
DocumentTypeIs = 'document_type_is',
DocumentTypeNot = 'document_type_not',
StoragePathAny = 'storage_path_any',
StoragePathIs = 'storage_path_is',
StoragePathNot = 'storage_path_not',
CustomFieldQuery = 'custom_field_query',
@@ -175,11 +172,8 @@ type TriggerFilterAggregate = {
filter_has_tags: number[]
filter_has_all_tags: number[]
filter_has_not_tags: number[]
filter_has_any_correspondents: number[]
filter_has_not_correspondents: number[]
filter_has_any_document_types: number[]
filter_has_not_document_types: number[]
filter_has_any_storage_paths: number[]
filter_has_not_storage_paths: number[]
filter_has_correspondent: number | null
filter_has_document_type: number | null
@@ -225,14 +219,6 @@ const TRIGGER_FILTER_DEFINITIONS: TriggerFilterDefinition[] = [
allowMultipleEntries: false,
allowMultipleValues: true,
},
{
id: TriggerFilterType.CorrespondentAny,
name: $localize`Has any of these correspondents`,
inputType: 'select',
allowMultipleEntries: false,
allowMultipleValues: true,
selectItems: 'correspondents',
},
{
id: TriggerFilterType.CorrespondentIs,
name: $localize`Has correspondent`,
@@ -257,14 +243,6 @@ const TRIGGER_FILTER_DEFINITIONS: TriggerFilterDefinition[] = [
allowMultipleValues: false,
selectItems: 'documentTypes',
},
{
id: TriggerFilterType.DocumentTypeAny,
name: $localize`Has any of these document types`,
inputType: 'select',
allowMultipleEntries: false,
allowMultipleValues: true,
selectItems: 'documentTypes',
},
{
id: TriggerFilterType.DocumentTypeNot,
name: $localize`Does not have document types`,
@@ -281,14 +259,6 @@ const TRIGGER_FILTER_DEFINITIONS: TriggerFilterDefinition[] = [
allowMultipleValues: false,
selectItems: 'storagePaths',
},
{
id: TriggerFilterType.StoragePathAny,
name: $localize`Has any of these storage paths`,
inputType: 'select',
allowMultipleEntries: false,
allowMultipleValues: true,
selectItems: 'storagePaths',
},
{
id: TriggerFilterType.StoragePathNot,
name: $localize`Does not have storage paths`,
@@ -336,15 +306,6 @@ const FILTER_HANDLERS: Record<TriggerFilterType, FilterHandler> = {
extract: (trigger) => trigger.filter_has_not_tags,
hasValue: (value) => Array.isArray(value) && value.length > 0,
},
[TriggerFilterType.CorrespondentAny]: {
apply: (aggregate, values) => {
aggregate.filter_has_any_correspondents = Array.isArray(values)
? [...values]
: [values]
},
extract: (trigger) => trigger.filter_has_any_correspondents,
hasValue: (value) => Array.isArray(value) && value.length > 0,
},
[TriggerFilterType.CorrespondentIs]: {
apply: (aggregate, values) => {
aggregate.filter_has_correspondent = Array.isArray(values)
@@ -372,15 +333,6 @@ const FILTER_HANDLERS: Record<TriggerFilterType, FilterHandler> = {
extract: (trigger) => trigger.filter_has_document_type,
hasValue: (value) => value !== null && value !== undefined,
},
[TriggerFilterType.DocumentTypeAny]: {
apply: (aggregate, values) => {
aggregate.filter_has_any_document_types = Array.isArray(values)
? [...values]
: [values]
},
extract: (trigger) => trigger.filter_has_any_document_types,
hasValue: (value) => Array.isArray(value) && value.length > 0,
},
[TriggerFilterType.DocumentTypeNot]: {
apply: (aggregate, values) => {
aggregate.filter_has_not_document_types = Array.isArray(values)
@@ -399,15 +351,6 @@ const FILTER_HANDLERS: Record<TriggerFilterType, FilterHandler> = {
extract: (trigger) => trigger.filter_has_storage_path,
hasValue: (value) => value !== null && value !== undefined,
},
[TriggerFilterType.StoragePathAny]: {
apply: (aggregate, values) => {
aggregate.filter_has_any_storage_paths = Array.isArray(values)
? [...values]
: [values]
},
extract: (trigger) => trigger.filter_has_any_storage_paths,
hasValue: (value) => Array.isArray(value) && value.length > 0,
},
[TriggerFilterType.StoragePathNot]: {
apply: (aggregate, values) => {
aggregate.filter_has_not_storage_paths = Array.isArray(values)
@@ -699,11 +642,8 @@ export class WorkflowEditDialogComponent
filter_has_tags: [],
filter_has_all_tags: [],
filter_has_not_tags: [],
filter_has_any_correspondents: [],
filter_has_not_correspondents: [],
filter_has_any_document_types: [],
filter_has_not_document_types: [],
filter_has_any_storage_paths: [],
filter_has_not_storage_paths: [],
filter_has_correspondent: null,
filter_has_document_type: null,
@@ -730,16 +670,10 @@ export class WorkflowEditDialogComponent
trigger.filter_has_tags = aggregate.filter_has_tags
trigger.filter_has_all_tags = aggregate.filter_has_all_tags
trigger.filter_has_not_tags = aggregate.filter_has_not_tags
trigger.filter_has_any_correspondents =
aggregate.filter_has_any_correspondents
trigger.filter_has_not_correspondents =
aggregate.filter_has_not_correspondents
trigger.filter_has_any_document_types =
aggregate.filter_has_any_document_types
trigger.filter_has_not_document_types =
aggregate.filter_has_not_document_types
trigger.filter_has_any_storage_paths =
aggregate.filter_has_any_storage_paths
trigger.filter_has_not_storage_paths =
aggregate.filter_has_not_storage_paths
trigger.filter_has_correspondent =
@@ -922,11 +856,8 @@ export class WorkflowEditDialogComponent
case TriggerFilterType.TagsAny:
case TriggerFilterType.TagsAll:
case TriggerFilterType.TagsNone:
case TriggerFilterType.CorrespondentAny:
case TriggerFilterType.CorrespondentNot:
case TriggerFilterType.DocumentTypeAny:
case TriggerFilterType.DocumentTypeNot:
case TriggerFilterType.StoragePathAny:
case TriggerFilterType.StoragePathNot:
return true
default:
@@ -1248,11 +1179,8 @@ export class WorkflowEditDialogComponent
filter_has_tags: [],
filter_has_all_tags: [],
filter_has_not_tags: [],
filter_has_any_correspondents: [],
filter_has_not_correspondents: [],
filter_has_any_document_types: [],
filter_has_not_document_types: [],
filter_has_any_storage_paths: [],
filter_has_not_storage_paths: [],
filter_custom_field_query: null,
filter_has_correspondent: null,
@@ -1355,6 +1283,11 @@ export class WorkflowEditDialogComponent
const actionField = this.actionFields.at(event.previousIndex)
this.actionFields.removeAt(event.previousIndex)
this.actionFields.insert(event.currentIndex, actionField)
// removing id will effectively re-create the actions in this order
this.object.actions.forEach((a) => (a.id = null))
this.actionFields.controls.forEach((c) =>
c.get('id').setValue(null, { emitEvent: false })
)
}
save(): void {

View File

@@ -1,11 +1,5 @@
<div class="mb-3" [class.pb-3]="error">
<div class="row">
<div class="d-flex align-items-center position-relative hidden-button-container" [class.col-md-3]="horizontal">
@if (title) {
<label class="form-label" [class.mb-md-0]="horizontal" [for]="inputId">{{title}}</label>
}
</div>
<div class="position-relative" [class.col-md-9]="horizontal">
<div class="mb-3">
<label class="form-label" [for]="inputId">{{title}}</label>
<div class="input-group" [class.is-invalid]="error">
<input #inputField [type]="showReveal && textVisible ? 'text' : 'password'" class="form-control" [class.is-invalid]="error" [id]="inputId" [(ngModel)]="value" (focus)="onFocus()" (focusout)="onFocusOut()" (change)="onChange(value)" [disabled]="disabled" [autocomplete]="autocomplete">
@if (showReveal) {
@@ -20,5 +14,4 @@
@if (hint) {
<small class="form-text text-muted" [innerHTML]="hint"></small>
}
</div>
</div>

View File

@@ -28,7 +28,7 @@
</button>
</ng-template>
<ng-template ng-option-tmp let-item="item" let-index="index" let-search="searchTerm">
<div class="tag-option-row d-flex align-items-center" [class.w-auto]="!getTag(item.id)?.parent">
<div class="tag-option-row d-flex align-items-center">
@if (item.id && tags) {
@if (getTag(item.id)?.parent) {
<i-bs name="list-nested" class="me-1"></i-bs>

View File

@@ -22,8 +22,8 @@
}
// Dropdown hierarchy reveal for ng-select options
:host ::ng-deep .ng-dropdown-panel .ng-option {
overflow-x: auto !important;
::ng-deep .ng-dropdown-panel .ng-option {
overflow-x: scroll;
.tag-option-row {
font-size: 1rem;
@@ -41,12 +41,12 @@
}
}
:host ::ng-deep .ng-dropdown-panel .ng-option:hover .hierarchy-reveal,
:host ::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-reveal {
::ng-deep .ng-dropdown-panel .ng-option:hover .hierarchy-reveal,
::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-reveal {
max-width: 1000px;
}
::ng-deep .ng-dropdown-panel .ng-option:hover .hierarchy-indicator,
:host ::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-indicator {
::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-indicator {
background: transparent;
}

View File

@@ -15,12 +15,6 @@
@if (hint) {
<small class="form-text text-muted" [innerHTML]="hint"></small>
}
@if (getSuggestion()?.length > 0) {
<small>
<span i18n>Suggestion:</span>&nbsp;
<a (click)="applySuggestion(s)" [routerLink]="[]">{{getSuggestion()}}</a>&nbsp;
</small>
}
<div class="invalid-feedback position-absolute top-100">
{{error}}
</div>

View File

@@ -26,20 +26,10 @@ describe('TextComponent', () => {
it('should support use of input field', () => {
expect(component.value).toBeUndefined()
input.value = 'foo'
input.dispatchEvent(new Event('input'))
fixture.detectChanges()
expect(component.value).toBe('foo')
})
it('should support suggestion', () => {
component.value = 'foo'
component.suggestion = 'foo'
expect(component.getSuggestion()).toBe('')
component.value = 'bar'
expect(component.getSuggestion()).toBe('foo')
component.applySuggestion()
fixture.detectChanges()
expect(component.value).toBe('foo')
// TODO: why doesn't this work?
// input.value = 'foo'
// input.dispatchEvent(new Event('change'))
// fixture.detectChanges()
// expect(component.value).toEqual('foo')
})
})

View File

@@ -4,7 +4,6 @@ import {
NG_VALUE_ACCESSOR,
ReactiveFormsModule,
} from '@angular/forms'
import { RouterLink } from '@angular/router'
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { AbstractInputComponent } from '../abstract-input'
@@ -19,12 +18,7 @@ import { AbstractInputComponent } from '../abstract-input'
selector: 'pngx-input-text',
templateUrl: './text.component.html',
styleUrls: ['./text.component.scss'],
imports: [
FormsModule,
ReactiveFormsModule,
NgxBootstrapIconsModule,
RouterLink,
],
imports: [FormsModule, ReactiveFormsModule, NgxBootstrapIconsModule],
})
export class TextComponent extends AbstractInputComponent<string> {
@Input()
@@ -33,19 +27,7 @@ export class TextComponent extends AbstractInputComponent<string> {
@Input()
placeholder: string = ''
@Input()
suggestion: string = ''
constructor() {
super()
}
getSuggestion() {
return this.value !== this.suggestion ? this.suggestion : ''
}
applySuggestion() {
this.value = this.suggestion
this.onChange(this.value)
}
}

View File

@@ -1,49 +0,0 @@
<div class="btn-group">
<button type="button" class="btn btn-sm btn-outline-primary" (click)="clickSuggest()" [disabled]="loading || (suggestions && !aiEnabled)">
@if (loading) {
<div class="spinner-border spinner-border-sm" role="status"></div>
} @else {
<i-bs width="1.2em" height="1.2em" name="stars"></i-bs>
}
<span class="d-none d-lg-inline ps-1" i18n>Suggest</span>
@if (totalSuggestions > 0) {
<span class="badge bg-primary ms-2">{{ totalSuggestions }}</span>
}
</button>
@if (aiEnabled) {
<div class="btn-group" ngbDropdown #dropdown="ngbDropdown" [popperOptions]="popperOptions">
<button type="button" class="btn btn-sm btn-outline-primary" ngbDropdownToggle [disabled]="loading || !suggestions" aria-expanded="false" aria-controls="suggestionsDropdown" aria-label="Suggestions dropdown">
<span class="visually-hidden" i18n>Show suggestions</span>
</button>
<div ngbDropdownMenu aria-labelledby="suggestionsDropdown" class="shadow suggestions-dropdown">
<div class="list-group list-group-flush small pb-0">
@if (!suggestions?.suggested_tags && !suggestions?.suggested_document_types && !suggestions?.suggested_correspondents) {
<div class="list-group-item text-muted fst-italic">
<small class="text-muted small fst-italic" i18n>No novel suggestions</small>
</div>
}
@if (suggestions?.suggested_tags.length > 0) {
<small class="list-group-item text-uppercase text-muted small">Tags</small>
@for (tag of suggestions.suggested_tags; track tag) {
<button type="button" class="list-group-item list-group-item-action bg-light" (click)="addTag.emit(tag)" i18n>{{ tag }}</button>
}
}
@if (suggestions?.suggested_document_types.length > 0) {
<div class="list-group-item text-uppercase text-muted small">Document Types</div>
@for (type of suggestions.suggested_document_types; track type) {
<button type="button" class="list-group-item list-group-item-action bg-light" (click)="addDocumentType.emit(type)" i18n>{{ type }}</button>
}
}
@if (suggestions?.suggested_correspondents.length > 0) {
<div class="list-group-item text-uppercase text-muted small">Correspondents</div>
@for (correspondent of suggestions.suggested_correspondents; track correspondent) {
<button type="button" class="list-group-item list-group-item-action bg-light" (click)="addCorrespondent.emit(correspondent)" i18n>{{ correspondent }}</button>
}
}
</div>
</div>
</div>
}
</div>

View File

@@ -1,3 +0,0 @@
.suggestions-dropdown {
min-width: 250px;
}

View File

@@ -1,51 +0,0 @@
import { ComponentFixture, TestBed } from '@angular/core/testing'
import { NgbDropdownModule } from '@ng-bootstrap/ng-bootstrap'
import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
import { SuggestionsDropdownComponent } from './suggestions-dropdown.component'
describe('SuggestionsDropdownComponent', () => {
let component: SuggestionsDropdownComponent
let fixture: ComponentFixture<SuggestionsDropdownComponent>
beforeEach(() => {
TestBed.configureTestingModule({
imports: [
NgbDropdownModule,
NgxBootstrapIconsModule.pick(allIcons),
SuggestionsDropdownComponent,
],
providers: [],
})
fixture = TestBed.createComponent(SuggestionsDropdownComponent)
component = fixture.componentInstance
fixture.detectChanges()
})
it('should calculate totalSuggestions', () => {
component.suggestions = {
suggested_correspondents: ['John Doe'],
suggested_tags: ['Tag1', 'Tag2'],
suggested_document_types: ['Type1'],
}
expect(component.totalSuggestions).toBe(4)
})
it('should emit getSuggestions when clickSuggest is called and suggestions are null', () => {
jest.spyOn(component.getSuggestions, 'emit')
component.suggestions = null
component.clickSuggest()
expect(component.getSuggestions.emit).toHaveBeenCalled()
})
it('should toggle dropdown when clickSuggest is called and suggestions are not null', () => {
component.aiEnabled = true
fixture.detectChanges()
component.suggestions = {
suggested_correspondents: [],
suggested_tags: [],
suggested_document_types: [],
}
component.clickSuggest()
expect(component.dropdown.open).toBeTruthy()
})
})

View File

@@ -1,64 +0,0 @@
import {
Component,
EventEmitter,
Input,
Output,
ViewChild,
} from '@angular/core'
import { NgbDropdown, NgbDropdownModule } from '@ng-bootstrap/ng-bootstrap'
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { DocumentSuggestions } from 'src/app/data/document-suggestions'
import { pngxPopperOptions } from 'src/app/utils/popper-options'
@Component({
selector: 'pngx-suggestions-dropdown',
imports: [NgbDropdownModule, NgxBootstrapIconsModule],
templateUrl: './suggestions-dropdown.component.html',
styleUrl: './suggestions-dropdown.component.scss',
})
export class SuggestionsDropdownComponent {
public popperOptions = pngxPopperOptions
@ViewChild('dropdown') dropdown: NgbDropdown
@Input()
suggestions: DocumentSuggestions = null
@Input()
aiEnabled: boolean = false
@Input()
loading: boolean = false
@Input()
disabled: boolean = false
@Output()
getSuggestions: EventEmitter<SuggestionsDropdownComponent> =
new EventEmitter()
@Output()
addTag: EventEmitter<string> = new EventEmitter()
@Output()
addDocumentType: EventEmitter<string> = new EventEmitter()
@Output()
addCorrespondent: EventEmitter<string> = new EventEmitter()
public clickSuggest(): void {
if (!this.suggestions) {
this.getSuggestions.emit(this)
} else {
this.dropdown?.toggle()
}
}
get totalSuggestions(): number {
return (
this.suggestions?.suggested_correspondents?.length +
this.suggestions?.suggested_tags?.length +
this.suggestions?.suggested_document_types?.length || 0
)
}
}

View File

@@ -266,43 +266,6 @@
}
</span>
</dd>
@if (aiEnabled) {
<dt i18n>AI Index</dt>
<dd class="d-flex align-items-center">
<button class="btn btn-sm d-flex align-items-center btn-dark text-uppercase small" [ngbPopover]="llmIndexStatus" triggers="click mouseenter:mouseleave">
{{status.tasks.llmindex_status}}
@if (status.tasks.llmindex_status === 'OK') {
@if (isStale(status.tasks.llmindex_last_modified)) {
<i-bs name="exclamation-triangle-fill" class="text-warning ms-2 lh-1"></i-bs>
} @else {
<i-bs name="check-circle-fill" class="text-primary ms-2 lh-1"></i-bs>
}
} @else {
<i-bs name="exclamation-triangle-fill" class="ms-2 lh-1"
[class.text-danger]="status.tasks.llmindex_status === SystemStatusItemStatus.ERROR"
[class.text-warning]="status.tasks.llmindex_status === SystemStatusItemStatus.WARNING"
[class.text-muted]="status.tasks.llmindex_status === SystemStatusItemStatus.DISABLED"></i-bs>
}
</button>
@if (currentUserIsSuperUser) {
@if (isRunning(PaperlessTaskName.LLMIndexUpdate)) {
<div class="spinner-border spinner-border-sm ms-2" role="status"></div>
} @else {
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskName.LLMIndexUpdate)">
<i-bs name="play-fill"></i-bs>&nbsp;
<ng-container i18n>Run Task</ng-container>
</button>
}
}
</dd>
<ng-template #llmIndexStatus>
@if (status.tasks.llmindex_status === 'OK') {
<h6><ng-container i18n>Last Run</ng-container>:</h6> <span class="font-monospace small">{{status.tasks.llmindex_last_modified | customDate:'medium'}}</span>
} @else {
<h6><ng-container i18n>Error</ng-container>:</h6> <span class="font-monospace small">{{status.tasks.llmindex_error}}</span>
}
</ng-template>
}
</dl>
</div>
</div>

View File

@@ -68,9 +68,6 @@ const status: SystemStatus = {
sanity_check_status: SystemStatusItemStatus.OK,
sanity_check_last_run: new Date().toISOString(),
sanity_check_error: null,
llmindex_status: SystemStatusItemStatus.OK,
llmindex_last_modified: new Date().toISOString(),
llmindex_error: null,
},
}

View File

@@ -13,11 +13,9 @@ import {
SystemStatus,
SystemStatusItemStatus,
} from 'src/app/data/system-status'
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe'
import { FileSizePipe } from 'src/app/pipes/file-size.pipe'
import { PermissionsService } from 'src/app/services/permissions.service'
import { SettingsService } from 'src/app/services/settings.service'
import { SystemStatusService } from 'src/app/services/system-status.service'
import { TasksService } from 'src/app/services/tasks.service'
import { ToastService } from 'src/app/services/toast.service'
@@ -46,7 +44,6 @@ export class SystemStatusDialogComponent implements OnInit, OnDestroy {
private toastService = inject(ToastService)
private permissionsService = inject(PermissionsService)
private websocketStatusService = inject(WebsocketStatusService)
private settingsService = inject(SettingsService)
public SystemStatusItemStatus = SystemStatusItemStatus
public PaperlessTaskName = PaperlessTaskName
@@ -63,10 +60,6 @@ export class SystemStatusDialogComponent implements OnInit, OnDestroy {
return this.permissionsService.isSuperUser()
}
get aiEnabled(): boolean {
return this.settingsService.get(SETTINGS_KEYS.AI_ENABLED)
}
public ngOnInit() {
this.versionMismatch =
environment.production &&

View File

@@ -74,6 +74,16 @@
</div>
</div>
<pngx-custom-fields-dropdown
*pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.CustomField }"
[documentId]="documentId"
[disabled]="!userCanEdit"
[existingFields]="document?.custom_fields"
(created)="refreshCustomFields()"
(added)="addField($event)">
</pngx-custom-fields-dropdown>
<div class="ms-auto" ngbDropdown>
<button class="btn btn-sm btn-outline-primary" id="sendDropdown" ngbDropdownToggle>
<i-bs name="send"></i-bs>
@@ -94,7 +104,7 @@
</pngx-page-header>
<div class="row">
<div class="col-md-6 col-xl-5 mb-4">
<div class="col-md-6 col-xl-4 mb-4">
<form [formGroup]='documentForm' (ngSubmit)="save()">
@@ -111,32 +121,6 @@
</button>
</div>
<ng-container *pngxIfPermissions="{ action: PermissionAction.Change, type: PermissionType.Document }">
<div class="btn-group pb-3 ms-auto">
<pngx-suggestions-dropdown *pngxIfPermissions="{ action: PermissionAction.Change, type: PermissionType.Document }"
[disabled]="!userCanEdit || suggestionsLoading"
[loading]="suggestionsLoading"
[suggestions]="suggestions"
[aiEnabled]="aiEnabled"
(getSuggestions)="getSuggestions()"
(addTag)="createTag($event)"
(addDocumentType)="createDocumentType($event)"
(addCorrespondent)="createCorrespondent($event)">
</pngx-suggestions-dropdown>
</div>
<div class="btn-group pb-3 ms-2">
<pngx-custom-fields-dropdown
*pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.CustomField }"
[documentId]="documentId"
[disabled]="!userCanEdit"
[existingFields]="document?.custom_fields"
(created)="refreshCustomFields()"
(added)="addField($event)">
</pngx-custom-fields-dropdown>
</div>
</ng-container>
<ng-container *ngTemplateOutlet="saveButtons"></ng-container>
</div>
@@ -145,7 +129,7 @@
<a ngbNavLink i18n>Details</a>
<ng-template ngbNavContent>
<div>
<pngx-input-text #inputTitle i18n-title title="Title" formControlName="title" [horizontal]="true" [suggestion]="suggestions?.title" (keyup)="titleKeyUp($event)" [error]="error?.title"></pngx-input-text>
<pngx-input-text #inputTitle i18n-title title="Title" formControlName="title" [horizontal]="true" (keyup)="titleKeyUp($event)" [error]="error?.title"></pngx-input-text>
<pngx-input-number i18n-title title="Archive serial number" [error]="error?.archive_serial_number" [horizontal]="true" formControlName='archive_serial_number'></pngx-input-number>
<pngx-input-date i18n-title title="Date created" formControlName="created" [suggestions]="suggestions?.dates" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event)"
[error]="error?.created"></pngx-input-date>
@@ -155,7 +139,7 @@
(createNew)="createDocumentType($event)" [hideAddButton]="createDisabled(DataType.DocumentType)" [suggestions]="suggestions?.document_types" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.DocumentType }"></pngx-input-select>
<pngx-input-select [items]="storagePaths" i18n-title title="Storage path" formControlName="storage_path" [allowNull]="true" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.StoragePath)"
(createNew)="createStoragePath($event)" [hideAddButton]="createDisabled(DataType.StoragePath)" [suggestions]="suggestions?.storage_paths" i18n-placeholder placeholder="Default" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.StoragePath }"></pngx-input-select>
<pngx-input-tags #tagsInput formControlName="tags" [suggestions]="suggestions?.tags" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.Tag)" [hideAddButton]="createDisabled(DataType.Tag)" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Tag }"></pngx-input-tags>
<pngx-input-tags formControlName="tags" [suggestions]="suggestions?.tags" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.Tag)" [hideAddButton]="createDisabled(DataType.Tag)" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Tag }"></pngx-input-tags>
@for (fieldInstance of document?.custom_fields; track fieldInstance.field; let i = $index) {
<div [formGroup]="customFieldFormFields.controls[i]">
@switch (getCustomFieldFromInstance(fieldInstance)?.data_type) {
@@ -377,14 +361,14 @@
</form>
</div>
<div class="col-md-6 col-xl-7 mb-3 d-none d-md-block position-relative" #pdfPreview>
<div class="col-md-6 col-xl-8 mb-3 d-none d-md-block position-relative" #pdfPreview>
<ng-container *ngTemplateOutlet="previewContent"></ng-container>
</div>
</div>
<ng-template #saveButtons>
<div class="btn-group pb-3 ms-4">
<div class="btn-group pb-3 ms-auto">
<ng-container *pngxIfPermissions="{ action: PermissionAction.Change, type: PermissionType.Document }">
<button type="submit" class="order-3 btn btn-sm btn-primary" i18n [disabled]="!userCanEdit || networkActive || (isDirty$ | async) !== true">Save</button>
@if (hasNext()) {

View File

@@ -157,16 +157,6 @@ describe('DocumentDetailComponent', () => {
{
provide: TagService,
useValue: {
getCachedMany: (ids: number[]) =>
of(
ids.map((id) => ({
id,
name: `Tag${id}`,
is_inbox_tag: true,
color: '#ff0000',
text_color: '#000000',
}))
),
listAll: () =>
of({
count: 3,
@@ -393,32 +383,8 @@ describe('DocumentDetailComponent', () => {
currentUserCan = true
})
it('should support creating tag, remove from suggestions', () => {
it('should support creating document type', () => {
initNormally()
component.suggestions = {
suggested_tags: ['Tag1', 'NewTag12'],
}
let openModal: NgbModalRef
modalService.activeInstances.subscribe((modal) => (openModal = modal[0]))
const modalSpy = jest.spyOn(modalService, 'open')
component.createTag('NewTag12')
expect(modalSpy).toHaveBeenCalled()
openModal.componentInstance.succeeded.next({
id: 12,
name: 'NewTag12',
is_inbox_tag: true,
color: '#ff0000',
text_color: '#000000',
})
expect(component.tagsInput.value).toContain(12)
expect(component.suggestions.suggested_tags).not.toContain('NewTag12')
})
it('should support creating document type, remove from suggestions', () => {
initNormally()
component.suggestions = {
suggested_document_types: ['DocumentType1', 'NewDocType2'],
}
let openModal: NgbModalRef
modalService.activeInstances.subscribe((modal) => (openModal = modal[0]))
const modalSpy = jest.spyOn(modalService, 'open')
@@ -426,16 +392,10 @@ describe('DocumentDetailComponent', () => {
expect(modalSpy).toHaveBeenCalled()
openModal.componentInstance.succeeded.next({ id: 12, name: 'NewDocType12' })
expect(component.documentForm.get('document_type').value).toEqual(12)
expect(component.suggestions.suggested_document_types).not.toContain(
'NewDocType2'
)
})
it('should support creating correspondent, remove from suggestions', () => {
it('should support creating correspondent', () => {
initNormally()
component.suggestions = {
suggested_correspondents: ['Correspondent1', 'NewCorrrespondent12'],
}
let openModal: NgbModalRef
modalService.activeInstances.subscribe((modal) => (openModal = modal[0]))
const modalSpy = jest.spyOn(modalService, 'open')
@@ -446,9 +406,6 @@ describe('DocumentDetailComponent', () => {
name: 'NewCorrrespondent12',
})
expect(component.documentForm.get('correspondent').value).toEqual(12)
expect(component.suggestions.suggested_correspondents).not.toContain(
'NewCorrrespondent12'
)
})
it('should support creating storage path', () => {
@@ -1039,7 +996,7 @@ describe('DocumentDetailComponent', () => {
expect(component.document.custom_fields).toHaveLength(initialLength - 1)
expect(component.customFieldFormFields).toHaveLength(initialLength - 1)
expect(
fixture.debugElement.query(By.css('form ul')).nativeElement.textContent
fixture.debugElement.query(By.css('form')).nativeElement.textContent
).not.toContain('Field 1')
const patchSpy = jest.spyOn(documentService, 'patch')
component.save(true)
@@ -1130,22 +1087,10 @@ describe('DocumentDetailComponent', () => {
it('should get suggestions', () => {
const suggestionsSpy = jest.spyOn(documentService, 'getSuggestions')
suggestionsSpy.mockReturnValue(
of({
tags: [42, 43],
suggested_tags: [],
suggested_document_types: [],
suggested_correspondents: [],
})
)
suggestionsSpy.mockReturnValue(of({ tags: [42, 43] }))
initNormally()
expect(suggestionsSpy).toHaveBeenCalled()
expect(component.suggestions).toEqual({
tags: [42, 43],
suggested_tags: [],
suggested_document_types: [],
suggested_correspondents: [],
})
expect(component.suggestions).toEqual({ tags: [42, 43] })
})
it('should show error if needed for get suggestions', () => {

View File

@@ -31,7 +31,6 @@ import {
map,
switchMap,
takeUntil,
tap,
} from 'rxjs/operators'
import { Correspondent } from 'src/app/data/correspondent'
import { CustomField, CustomFieldDataType } from 'src/app/data/custom-field'
@@ -77,7 +76,6 @@ import { DocumentTypeService } from 'src/app/services/rest/document-type.service
import { DocumentService } from 'src/app/services/rest/document.service'
import { SavedViewService } from 'src/app/services/rest/saved-view.service'
import { StoragePathService } from 'src/app/services/rest/storage-path.service'
import { TagService } from 'src/app/services/rest/tag.service'
import { UserService } from 'src/app/services/rest/user.service'
import { SettingsService } from 'src/app/services/settings.service'
import { ToastService } from 'src/app/services/toast.service'
@@ -91,7 +89,6 @@ import { CorrespondentEditDialogComponent } from '../common/edit-dialog/correspo
import { DocumentTypeEditDialogComponent } from '../common/edit-dialog/document-type-edit-dialog/document-type-edit-dialog.component'
import { EditDialogMode } from '../common/edit-dialog/edit-dialog.component'
import { StoragePathEditDialogComponent } from '../common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component'
import { TagEditDialogComponent } from '../common/edit-dialog/tag-edit-dialog/tag-edit-dialog.component'
import { EmailDocumentDialogComponent } from '../common/email-document-dialog/email-document-dialog.component'
import { CheckComponent } from '../common/input/check/check.component'
import { DateComponent } from '../common/input/date/date.component'
@@ -110,7 +107,6 @@ import {
PdfEditorEditMode,
} from '../common/pdf-editor/pdf-editor.component'
import { ShareLinksDialogComponent } from '../common/share-links-dialog/share-links-dialog.component'
import { SuggestionsDropdownComponent } from '../common/suggestions-dropdown/suggestions-dropdown.component'
import { DocumentHistoryComponent } from '../document-history/document-history.component'
import { DocumentNotesComponent } from '../document-notes/document-notes.component'
import { ComponentWithPermissions } from '../with-permissions/with-permissions.component'
@@ -167,7 +163,6 @@ export enum ZoomSetting {
NumberComponent,
MonetaryComponent,
UrlComponent,
SuggestionsDropdownComponent,
CustomDatePipe,
FileSizePipe,
IfPermissionsDirective,
@@ -189,7 +184,6 @@ export class DocumentDetailComponent
{
private documentsService = inject(DocumentService)
private route = inject(ActivatedRoute)
private tagService = inject(TagService)
private correspondentService = inject(CorrespondentService)
private documentTypeService = inject(DocumentTypeService)
private router = inject(Router)
@@ -212,8 +206,6 @@ export class DocumentDetailComponent
@ViewChild('inputTitle')
titleInput: TextComponent
@ViewChild('tagsInput') tagsInput: TagsComponent
expandOriginalMetadata = false
expandArchivedMetadata = false
@@ -225,7 +217,6 @@ export class DocumentDetailComponent
document: Document
metadata: DocumentMetadata
suggestions: DocumentSuggestions
suggestionsLoading: boolean = false
users: User[]
title: string
@@ -285,10 +276,10 @@ export class DocumentDetailComponent
if (
element &&
element.nativeElement.offsetParent !== null &&
this.nav?.activeId == DocumentDetailNavIDs.Preview
this.nav?.activeId == 4
) {
// its visible
setTimeout(() => this.nav?.select(DocumentDetailNavIDs.Details))
setTimeout(() => this.nav?.select(1))
}
}
@@ -307,10 +298,6 @@ export class DocumentDetailComponent
return this.deviceDetectorService.isMobile()
}
get aiEnabled(): boolean {
return this.settings.get(SETTINGS_KEYS.AI_ENABLED)
}
get archiveContentRenderType(): ContentRenderType {
return this.document?.archived_file_name
? this.getRenderType('application/pdf')
@@ -695,25 +682,8 @@ export class DocumentDetailComponent
PermissionType.Document
)
) {
this.tagService.getCachedMany(doc.tags).subscribe((tags) => {
// only show suggestions if document has inbox tags
if (tags.some((tag) => tag.is_inbox_tag)) {
this.getSuggestions()
}
})
}
this.title = this.documentTitlePipe.transform(doc.title)
this.prepareForm(doc)
}
get customFieldFormFields(): FormArray {
return this.documentForm.get('custom_fields') as FormArray
}
getSuggestions() {
this.suggestionsLoading = true
this.documentsService
.getSuggestions(this.documentId)
.getSuggestions(doc.id)
.pipe(
first(),
takeUntil(this.unsubscribeNotifier),
@@ -722,11 +692,9 @@ export class DocumentDetailComponent
.subscribe({
next: (result) => {
this.suggestions = result
this.suggestionsLoading = false
},
error: (error) => {
this.suggestions = null
this.suggestionsLoading = false
this.toastService.showError(
$localize`Error retrieving suggestions.`,
error
@@ -734,37 +702,12 @@ export class DocumentDetailComponent
},
})
}
this.title = this.documentTitlePipe.transform(doc.title)
this.prepareForm(doc)
}
createTag(newName: string) {
var modal = this.modalService.open(TagEditDialogComponent, {
backdrop: 'static',
})
modal.componentInstance.dialogMode = EditDialogMode.CREATE
if (newName) modal.componentInstance.object = { name: newName }
modal.componentInstance.succeeded
.pipe(
tap((newTag: Tag) => {
// remove from suggestions if present
if (this.suggestions) {
this.suggestions = {
...this.suggestions,
suggested_tags: this.suggestions.suggested_tags.filter(
(tag) => tag !== newTag.name
),
}
}
}),
switchMap((newTag: Tag) => {
return this.tagService
.listAll()
.pipe(map((tags) => ({ newTag, tags })))
}),
takeUntil(this.unsubscribeNotifier)
)
.subscribe(({ newTag, tags }) => {
this.tagsInput.tags = tags.results
this.tagsInput.addTag(newTag.id)
})
get customFieldFormFields(): FormArray {
return this.documentForm.get('custom_fields') as FormArray
}
createDocumentType(newName: string) {
@@ -786,12 +729,6 @@ export class DocumentDetailComponent
this.documentTypes = documentTypes.results
this.documentForm.get('document_type').setValue(newDocumentType.id)
this.documentForm.get('document_type').markAsDirty()
if (this.suggestions) {
this.suggestions.suggested_document_types =
this.suggestions.suggested_document_types.filter(
(dt) => dt !== newName
)
}
})
}
@@ -816,12 +753,6 @@ export class DocumentDetailComponent
this.correspondents = correspondents.results
this.documentForm.get('correspondent').setValue(newCorrespondent.id)
this.documentForm.get('correspondent').markAsDirty()
if (this.suggestions) {
this.suggestions.suggested_correspondents =
this.suggestions.suggested_correspondents.filter(
(c) => c !== newName
)
}
})
}

View File

@@ -1,17 +1,11 @@
export interface DocumentSuggestions {
title?: string
tags?: number[]
suggested_tags?: string[]
correspondents?: number[]
suggested_correspondents?: string[]
document_types?: number[]
suggested_document_types?: string[]
storage_paths?: number[]
suggested_storage_paths?: string[]
dates?: string[] // ISO-formatted date string e.g. 2022-11-03
}

View File

@@ -44,24 +44,12 @@ export enum ConfigOptionType {
Boolean = 'boolean',
JSON = 'json',
File = 'file',
Password = 'password',
}
export const ConfigCategory = {
General: $localize`General Settings`,
OCR: $localize`OCR Settings`,
Barcode: $localize`Barcode Settings`,
AI: $localize`AI Settings`,
}
export const LLMEmbeddingBackendConfig = {
OPENAI: 'openai',
HUGGINGFACE: 'huggingface',
}
export const LLMBackendConfig = {
OPENAI: 'openai',
OLLAMA: 'ollama',
}
export interface ConfigOption {
@@ -71,7 +59,6 @@ export interface ConfigOption {
choices?: Array<{ id: string; name: string }>
config_key?: string
category: string
note?: string
}
function mapToItems(enumObj: Object): Array<{ id: string; name: string }> {
@@ -271,58 +258,6 @@ export const PaperlessConfigOptions: ConfigOption[] = [
config_key: 'PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING',
category: ConfigCategory.Barcode,
},
{
key: 'ai_enabled',
title: $localize`AI Enabled`,
type: ConfigOptionType.Boolean,
config_key: 'PAPERLESS_AI_ENABLED',
category: ConfigCategory.AI,
note: $localize`Consider privacy implications when enabling AI features, especially if using a remote model.`,
},
{
key: 'llm_embedding_backend',
title: $localize`LLM Embedding Backend`,
type: ConfigOptionType.Select,
choices: mapToItems(LLMEmbeddingBackendConfig),
config_key: 'PAPERLESS_AI_LLM_EMBEDDING_BACKEND',
category: ConfigCategory.AI,
},
{
key: 'llm_embedding_model',
title: $localize`LLM Embedding Model`,
type: ConfigOptionType.String,
config_key: 'PAPERLESS_AI_LLM_EMBEDDING_MODEL',
category: ConfigCategory.AI,
},
{
key: 'llm_backend',
title: $localize`LLM Backend`,
type: ConfigOptionType.Select,
choices: mapToItems(LLMBackendConfig),
config_key: 'PAPERLESS_AI_LLM_BACKEND',
category: ConfigCategory.AI,
},
{
key: 'llm_model',
title: $localize`LLM Model`,
type: ConfigOptionType.String,
config_key: 'PAPERLESS_AI_LLM_MODEL',
category: ConfigCategory.AI,
},
{
key: 'llm_api_key',
title: $localize`LLM API Key`,
type: ConfigOptionType.Password,
config_key: 'PAPERLESS_AI_LLM_API_KEY',
category: ConfigCategory.AI,
},
{
key: 'llm_endpoint',
title: $localize`LLM Endpoint`,
type: ConfigOptionType.String,
config_key: 'PAPERLESS_AI_LLM_ENDPOINT',
category: ConfigCategory.AI,
},
]
export interface PaperlessConfig extends ObjectWithId {
@@ -352,11 +287,4 @@ export interface PaperlessConfig extends ObjectWithId {
barcode_max_pages: number
barcode_enable_tag: boolean
barcode_tag_mapping: object
ai_enabled: boolean
llm_embedding_backend: string
llm_embedding_model: string
llm_backend: string
llm_model: string
llm_api_key: string
llm_endpoint: string
}

View File

@@ -11,7 +11,6 @@ export enum PaperlessTaskName {
TrainClassifier = 'train_classifier',
SanityCheck = 'check_sanity',
IndexOptimize = 'index_optimize',
LLMIndexUpdate = 'llmindex_update',
}
export enum PaperlessTaskStatus {

View File

@@ -7,7 +7,6 @@ export enum SystemStatusItemStatus {
OK = 'OK',
ERROR = 'ERROR',
WARNING = 'WARNING',
DISABLED = 'DISABLED',
}
export interface SystemStatus {
@@ -44,9 +43,6 @@ export interface SystemStatus {
sanity_check_status: SystemStatusItemStatus
sanity_check_last_run: string // ISO date string
sanity_check_error: string
llmindex_status: SystemStatusItemStatus
llmindex_last_modified: string // ISO date string
llmindex_error: string
}
websocket_connected?: SystemStatusItemStatus // added client-side
}

View File

@@ -76,7 +76,6 @@ export const SETTINGS_KEYS = {
GMAIL_OAUTH_URL: 'gmail_oauth_url',
OUTLOOK_OAUTH_URL: 'outlook_oauth_url',
EMAIL_ENABLED: 'email_enabled',
AI_ENABLED: 'ai_enabled',
}
export const SETTINGS: UiSetting[] = [
@@ -290,9 +289,4 @@ export const SETTINGS: UiSetting[] = [
type: 'string',
default: 'page-width', // ZoomSetting from 'document-detail.component'
},
{
key: SETTINGS_KEYS.AI_ENABLED,
type: 'boolean',
default: false,
},
]

View File

@@ -44,16 +44,10 @@ export interface WorkflowTrigger extends ObjectWithId {
filter_has_not_tags?: number[] // Tag.id[]
filter_has_any_correspondents?: number[] // Correspondent.id[]
filter_has_not_correspondents?: number[] // Correspondent.id[]
filter_has_any_document_types?: number[] // DocumentType.id[]
filter_has_not_document_types?: number[] // DocumentType.id[]
filter_has_any_storage_paths?: number[] // StoragePath.id[]
filter_has_not_storage_paths?: number[] // StoragePath.id[]
filter_custom_field_query?: string

View File

@@ -4,15 +4,15 @@ import {
HttpInterceptor,
HttpRequest,
} from '@angular/common/http'
import { inject, Injectable } from '@angular/core'
import { Injectable, inject } from '@angular/core'
import { Meta } from '@angular/platform-browser'
import { CookieService } from 'ngx-cookie-service'
import { Observable } from 'rxjs'
@Injectable()
export class CsrfInterceptor implements HttpInterceptor {
private cookieService: CookieService = inject(CookieService)
private meta: Meta = inject(Meta)
private cookieService = inject(CookieService)
private meta = inject(Meta)
intercept(
request: HttpRequest<unknown>,

View File

@@ -1,58 +0,0 @@
import {
HttpEventType,
provideHttpClient,
withInterceptorsFromDi,
} from '@angular/common/http'
import {
HttpTestingController,
provideHttpClientTesting,
} from '@angular/common/http/testing'
import { TestBed } from '@angular/core/testing'
import { environment } from 'src/environments/environment'
import { ChatService } from './chat.service'
describe('ChatService', () => {
let service: ChatService
let httpMock: HttpTestingController
beforeEach(() => {
TestBed.configureTestingModule({
imports: [],
providers: [
ChatService,
provideHttpClient(withInterceptorsFromDi()),
provideHttpClientTesting(),
],
})
service = TestBed.inject(ChatService)
httpMock = TestBed.inject(HttpTestingController)
})
afterEach(() => {
httpMock.verify()
})
it('should stream chat messages', (done) => {
const documentId = 1
const prompt = 'Hello, world!'
const mockResponse = 'Partial response text'
const apiUrl = `${environment.apiBaseUrl}documents/chat/`
service.streamChat(documentId, prompt).subscribe((chunk) => {
expect(chunk).toBe(mockResponse)
done()
})
const req = httpMock.expectOne(apiUrl)
expect(req.request.method).toBe('POST')
expect(req.request.body).toEqual({
document_id: documentId,
q: prompt,
})
req.event({
type: HttpEventType.DownloadProgress,
partialText: mockResponse,
} as any)
})
})

View File

@@ -1,46 +0,0 @@
import {
HttpClient,
HttpDownloadProgressEvent,
HttpEventType,
} from '@angular/common/http'
import { inject, Injectable } from '@angular/core'
import { filter, map, Observable } from 'rxjs'
import { environment } from 'src/environments/environment'
export interface ChatMessage {
role: 'user' | 'assistant'
content: string
isStreaming?: boolean
}
@Injectable({
providedIn: 'root',
})
export class ChatService {
private http: HttpClient = inject(HttpClient)
streamChat(documentId: number, prompt: string): Observable<string> {
return this.http
.post(
`${environment.apiBaseUrl}documents/chat/`,
{
document_id: documentId,
q: prompt,
},
{
observe: 'events',
reportProgress: true,
responseType: 'text',
withCredentials: true,
}
)
.pipe(
map((event) => {
if (event.type === HttpEventType.DownloadProgress) {
return (event as HttpDownloadProgressEvent).partialText!
}
}),
filter((chunk) => !!chunk)
)
}
}

View File

@@ -6,7 +6,7 @@ export const environment = {
apiVersion: '9', // match src/paperless/settings.py
appTitle: 'Paperless-ngx',
tag: 'prod',
version: '2.20.5',
version: '2.20.3',
webSocketHost: window.location.host,
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
webSocketBaseUrl: base_url.pathname + 'ws/',

View File

@@ -10,7 +10,6 @@ import { DatePipe, registerLocaleData } from '@angular/common'
import {
HTTP_INTERCEPTORS,
provideHttpClient,
withFetch,
withInterceptorsFromDi,
} from '@angular/common/http'
import { FormsModule, ReactiveFormsModule } from '@angular/forms'
@@ -50,7 +49,6 @@ import {
caretDown,
caretUp,
chatLeftText,
chatSquareDots,
check,
check2All,
checkAll,
@@ -126,7 +124,6 @@ import {
sliders2Vertical,
sortAlphaDown,
sortAlphaUpAlt,
stars,
tag,
tagFill,
tags,
@@ -269,7 +266,6 @@ const icons = {
caretDown,
caretUp,
chatLeftText,
chatSquareDots,
check,
check2All,
checkAll,
@@ -345,7 +341,6 @@ const icons = {
sliders2Vertical,
sortAlphaDown,
sortAlphaUpAlt,
stars,
tagFill,
tag,
tags,
@@ -412,6 +407,6 @@ bootstrapApplication(AppComponent, {
CorrespondentNamePipe,
DocumentTypeNamePipe,
StoragePathNamePipe,
provideHttpClient(withInterceptorsFromDi(), withFetch()),
provideHttpClient(withInterceptorsFromDi()),
],
}).catch((err) => console.error(err))

View File

@@ -73,7 +73,6 @@ $form-check-radio-checked-bg-image-dark: url("data:image/svg+xml,<svg xmlns='htt
}
@mixin dark-mode {
color-scheme: dark;
--pngx-body-color-accent: #{$text-color-dark-bg-accent};
--pngx-bg-alt: #242529;
--pngx-bg-alt2: #232323;

View File

@@ -1,4 +1,5 @@
# this is here so that django finds the checks.
from documents.checks import changed_password_check
from documents.checks import parser_check
__all__ = ["parser_check"]
__all__ = ["changed_password_check", "parser_check"]

View File

@@ -60,6 +60,7 @@ class DocumentAdmin(GuardedModelAdmin):
"added",
"modified",
"mime_type",
"storage_type",
"filename",
"checksum",
"archive_filename",

View File

@@ -11,7 +11,6 @@ class DocumentsConfig(AppConfig):
from documents.signals import document_consumption_finished
from documents.signals import document_updated
from documents.signals.handlers import add_inbox_tags
from documents.signals.handlers import add_or_update_document_in_llm_index
from documents.signals.handlers import add_to_index
from documents.signals.handlers import run_workflows_added
from documents.signals.handlers import run_workflows_updated
@@ -27,7 +26,6 @@ class DocumentsConfig(AppConfig):
document_consumption_finished.connect(set_storage_path)
document_consumption_finished.connect(add_to_index)
document_consumption_finished.connect(run_workflows_added)
document_consumption_finished.connect(add_or_update_document_in_llm_index)
document_updated.connect(run_workflows_updated)
import documents.schema # noqa: F401

View File

@@ -13,6 +13,7 @@ from pikepdf import Page
from pikepdf import PasswordError
from pikepdf import Pdf
from documents.consumer import ConsumerPreflightPlugin
from documents.converters import convert_from_tiff_to_pdf
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
@@ -193,6 +194,15 @@ class BarcodePlugin(ConsumeTaskPlugin):
):
logger.info(f"Found ASN in barcode: {located_asn}")
self.metadata.asn = located_asn
# (Re-)run the preflight ASN check
preflight_plugin = ConsumerPreflightPlugin(
input_doc=self.input_doc,
metadata=self.metadata,
status_mgr=self.status_mgr,
base_tmp_dir=self.base_tmp_dir,
task_id=self.task_id,
)
preflight_plugin.pre_check_asn_value()
def cleanup(self) -> None:
self.temp_dir.cleanup()

View File

@@ -41,7 +41,6 @@ class SuggestionCacheData:
CLASSIFIER_VERSION_KEY: Final[str] = "classifier_version"
CLASSIFIER_HASH_KEY: Final[str] = "classifier_hash"
CLASSIFIER_MODIFIED_KEY: Final[str] = "classifier_modified"
LLM_CACHE_CLASSIFIER_VERSION: Final[int] = 1000 # Marker distinguishing LLM suggestions
CACHE_1_MINUTE: Final[int] = 60
CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE
@@ -197,54 +196,6 @@ def refresh_suggestions_cache(
cache.touch(doc_key, timeout)
def get_llm_suggestion_cache(
document_id: int,
backend: str,
) -> SuggestionCacheData | None:
doc_key = get_suggestion_cache_key(document_id)
data: SuggestionCacheData = cache.get(doc_key)
if data and data.classifier_hash == backend:
return data
return None
def set_llm_suggestions_cache(
document_id: int,
suggestions: dict,
*,
backend: str,
timeout: int = CACHE_50_MINUTES,
) -> None:
"""
Cache LLM-generated suggestions using a backend-specific identifier (e.g. 'openai:gpt-4').
"""
doc_key = get_suggestion_cache_key(document_id)
cache.set(
doc_key,
SuggestionCacheData(
classifier_version=LLM_CACHE_CLASSIFIER_VERSION,
classifier_hash=backend,
suggestions=suggestions,
),
timeout,
)
def invalidate_llm_suggestions_cache(
document_id: int,
) -> None:
"""
Invalidate the LLM suggestions cache for a specific document and backend.
"""
doc_key = get_suggestion_cache_key(document_id)
data: SuggestionCacheData = cache.get(doc_key)
if data:
cache.delete(doc_key)
def get_metadata_cache_key(document_id: int) -> str:
"""
Returns the basic key for a document's metadata

View File

@@ -1,12 +1,60 @@
import textwrap
from django.conf import settings
from django.core.checks import Error
from django.core.checks import Warning
from django.core.checks import register
from django.core.exceptions import FieldError
from django.db.utils import OperationalError
from django.db.utils import ProgrammingError
from documents.signals import document_consumer_declaration
from documents.templating.utils import convert_format_str_to_template_format
@register()
def changed_password_check(app_configs, **kwargs):
from documents.models import Document
from paperless.db import GnuPG
try:
encrypted_doc = (
Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG,
)
.only("pk", "storage_type")
.first()
)
except (OperationalError, ProgrammingError, FieldError):
return [] # No documents table yet
if encrypted_doc:
if not settings.PASSPHRASE:
return [
Error(
"The database contains encrypted documents but no password is set.",
),
]
if not GnuPG.decrypted(encrypted_doc.source_file):
return [
Error(
textwrap.dedent(
"""
The current password doesn't match the password of the
existing documents.
If you intend to change your password, you must first export
all of the old documents, start fresh with the new password
and then re-import them."
""",
),
),
]
return []
@register()
def parser_check(app_configs, **kwargs):
parsers = []

View File

@@ -128,7 +128,7 @@ def thumbnail_last_modified(request, pk: int) -> datetime | None:
Cache should be (slightly?) faster than filesystem
"""
try:
doc = Document.objects.only("pk").get(pk=pk)
doc = Document.objects.only("storage_type").get(pk=pk)
if not doc.thumbnail_path.exists():
return None
doc_key = get_thumbnail_modified_key(pk)

View File

@@ -497,6 +497,7 @@ class ConsumerPlugin(
create_source_path_directory(document.source_path)
self._write(
document.storage_type,
self.unmodified_original
if self.unmodified_original is not None
else self.working_copy,
@@ -504,6 +505,7 @@ class ConsumerPlugin(
)
self._write(
document.storage_type,
thumbnail,
document.thumbnail_path,
)
@@ -515,6 +517,7 @@ class ConsumerPlugin(
)
create_source_path_directory(document.archive_path)
self._write(
document.storage_type,
archive_path,
document.archive_path,
)
@@ -634,6 +637,8 @@ class ConsumerPlugin(
)
self.log.debug(f"Creation date from st_mtime: {create_date}")
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
if self.metadata.filename:
title = Path(self.metadata.filename).stem
else:
@@ -660,6 +665,7 @@ class ConsumerPlugin(
checksum=hashlib.md5(file_for_checksum.read_bytes()).hexdigest(),
created=create_date,
modified=create_date,
storage_type=storage_type,
page_count=page_count,
original_filename=self.filename,
)
@@ -730,7 +736,7 @@ class ConsumerPlugin(
}
CustomFieldInstance.objects.create(**args) # adds to document
def _write(self, source, target):
def _write(self, storage_type, source, target):
with (
Path(source).open("rb") as read_file,
Path(target).open("wb") as write_file,

View File

@@ -126,6 +126,7 @@ def generate_filename(
doc: Document,
*,
counter=0,
append_gpg=True,
archive_filename=False,
) -> Path:
base_path: Path | None = None
@@ -169,4 +170,8 @@ def generate_filename(
final_filename = f"{doc.pk:07}{counter_str}{filetype_str}"
full_path = Path(final_filename)
# Add GPG extension if needed
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
full_path = full_path.with_suffix(full_path.suffix + ".gpg")
return full_path

View File

@@ -0,0 +1,93 @@
from pathlib import Path
from django.conf import settings
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from documents.models import Document
from paperless.db import GnuPG
class Command(BaseCommand):
help = (
"This is how you migrate your stored documents from an encrypted "
"state to an unencrypted one (or vice-versa)"
)
def add_arguments(self, parser) -> None:
parser.add_argument(
"--passphrase",
help=(
"If PAPERLESS_PASSPHRASE isn't set already, you need to specify it here"
),
)
def handle(self, *args, **options) -> None:
try:
self.stdout.write(
self.style.WARNING(
"\n\n"
"WARNING: This script is going to work directly on your "
"document originals, so\n"
"WARNING: you probably shouldn't run "
"this unless you've got a recent backup\n"
"WARNING: handy. It "
"*should* work without a hitch, but be safe and backup your\n"
"WARNING: stuff first.\n\n"
"Hit Ctrl+C to exit now, or Enter to "
"continue.\n\n",
),
)
_ = input()
except KeyboardInterrupt:
return
passphrase = options["passphrase"] or settings.PASSPHRASE
if not passphrase:
raise CommandError(
"Passphrase not defined. Please set it with --passphrase or "
"by declaring it in your environment or your config.",
)
self.__gpg_to_unencrypted(passphrase)
def __gpg_to_unencrypted(self, passphrase: str) -> None:
encrypted_files = Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG,
)
for document in encrypted_files:
self.stdout.write(f"Decrypting {document}")
old_paths = [document.source_path, document.thumbnail_path]
with document.source_file as file_handle:
raw_document = GnuPG.decrypted(file_handle, passphrase)
with document.thumbnail_file as file_handle:
raw_thumb = GnuPG.decrypted(file_handle, passphrase)
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
ext: str = Path(document.filename).suffix
if not ext == ".gpg":
raise CommandError(
f"Abort: encrypted file {document.source_path} does not "
f"end with .gpg",
)
document.filename = Path(document.filename).stem
with document.source_path.open("wb") as f:
f.write(raw_document)
with document.thumbnail_path.open("wb") as f:
f.write(raw_thumb)
Document.objects.filter(id=document.id).update(
storage_type=document.storage_type,
filename=document.filename,
)
for path in old_paths:
path.unlink()

View File

@@ -1,343 +1,135 @@
"""
Document consumer management command.
Watches a consumption directory for new documents and queues them for processing.
Uses watchfiles for efficient file system monitoring with support for both
native OS notifications and polling fallback.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass
import os
from concurrent.futures import ThreadPoolExecutor
from fnmatch import filter
from pathlib import Path
from pathlib import PurePath
from threading import Event
from time import monotonic
from typing import TYPE_CHECKING
from time import sleep
from typing import Final
from django import db
from django.conf import settings
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from watchfiles import Change
from watchfiles import DefaultFilter
from watchfiles import watch
from watchdog.events import FileSystemEventHandler
from watchdog.observers.polling import PollingObserver
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
from documents.models import Tag
from documents.parsers import get_supported_file_extensions
from documents.parsers import is_file_ext_supported
from documents.tasks import consume_file
if TYPE_CHECKING:
from collections.abc import Iterator
try:
from inotifyrecursive import INotify
from inotifyrecursive import flags
except ImportError: # pragma: no cover
INotify = flags = None
logger = logging.getLogger("paperless.management.consumer")
@dataclass
class TrackedFile:
"""Represents a file being tracked for stability."""
path: Path
last_event_time: float
last_mtime: float | None = None
last_size: int | None = None
def update_stats(self) -> bool:
def _tags_from_path(filepath: Path) -> list[int]:
"""
Update file stats. Returns True if file exists and stats were updated.
"""
try:
stat = self.path.stat()
self.last_mtime = stat.st_mtime
self.last_size = stat.st_size
return True
except OSError:
return False
def is_unchanged(self) -> bool:
"""
Check if file stats match the previously recorded values.
Returns False if file doesn't exist or stats changed.
"""
try:
stat = self.path.stat()
return stat.st_mtime == self.last_mtime and stat.st_size == self.last_size
except OSError:
return False
class FileStabilityTracker:
"""
Tracks file events and determines when files are stable for consumption.
A file is considered stable when:
1. No new events have been received for it within the stability delay
2. Its size and modification time haven't changed
3. It still exists as a regular file
This handles various edge cases:
- Network copies that write in chunks
- Scanners that open/close files multiple times
- Temporary files that get renamed
- Files that are deleted before becoming stable
"""
def __init__(self, stability_delay: float = 1.0) -> None:
"""
Initialize the tracker.
Args:
stability_delay: Time in seconds a file must remain unchanged
before being considered stable.
"""
self.stability_delay = stability_delay
self._tracked: dict[Path, TrackedFile] = {}
def track(self, path: Path, change: Change) -> None:
"""
Register a file event.
Args:
path: The file path that changed.
change: The type of change (added, modified, deleted).
"""
path = path.resolve()
match change:
case Change.deleted:
self._tracked.pop(path, None)
logger.debug(f"Stopped tracking deleted file: {path}")
case Change.added | Change.modified:
current_time = monotonic()
if path in self._tracked:
tracked = self._tracked[path]
tracked.last_event_time = current_time
tracked.update_stats()
logger.debug(f"Updated tracking for: {path}")
else:
tracked = TrackedFile(path=path, last_event_time=current_time)
if tracked.update_stats():
self._tracked[path] = tracked
logger.debug(f"Started tracking: {path}")
else:
logger.debug(f"Could not stat file, not tracking: {path}")
def get_stable_files(self) -> Iterator[Path]:
"""
Yield files that have been stable for the configured delay.
Files are removed from tracking once yielded or determined to be invalid.
"""
current_time = monotonic()
to_remove: list[Path] = []
to_yield: list[Path] = []
for path, tracked in self._tracked.items():
time_since_event = current_time - tracked.last_event_time
if time_since_event < self.stability_delay:
continue
# File has waited long enough, verify it's unchanged
if not tracked.is_unchanged():
# Stats changed or file gone - update and wait again
if tracked.update_stats():
tracked.last_event_time = current_time
logger.debug(f"File changed during stability check: {path}")
else:
# File no longer exists, remove from tracking
to_remove.append(path)
logger.debug(f"File disappeared during stability check: {path}")
continue
# File is stable, we can return it
to_yield.append(path)
logger.info(f"File is stable: {path}")
# Remove files that are no longer valid
for path in to_remove:
self._tracked.pop(path, None)
# Remove and yield stable files
for path in to_yield:
self._tracked.pop(path, None)
yield path
def has_pending_files(self) -> bool:
"""Check if there are files waiting for stability check."""
return len(self._tracked) > 0
@property
def pending_count(self) -> int:
"""Number of files being tracked."""
return len(self._tracked)
class ConsumerFilter(DefaultFilter):
"""
Filter for watchfiles that accepts only supported document types
and ignores system files/directories.
Extends DefaultFilter leveraging its built-in filtering:
- `ignore_dirs`: Directory names to ignore (and all their contents)
- `ignore_entity_patterns`: Regex patterns matched against filename/dirname only
We add custom logic for file extension filtering (only accept supported
document types), which the library doesn't provide.
"""
# Regex patterns for files to always ignore (matched against filename only)
# These are passed to DefaultFilter.ignore_entity_patterns
DEFAULT_IGNORE_PATTERNS: Final[tuple[str, ...]] = (
r"^\.DS_Store$",
r"^\.DS_STORE$",
r"^\._.*",
r"^desktop\.ini$",
r"^Thumbs\.db$",
)
# Directories to always ignore (passed to DefaultFilter.ignore_dirs)
# These are matched by directory name, not full path
DEFAULT_IGNORE_DIRS: Final[tuple[str, ...]] = (
".stfolder", # Syncthing
".stversions", # Syncthing
".localized", # macOS
"@eaDir", # Synology NAS
".Spotlight-V100", # macOS
".Trashes", # macOS
"__MACOSX", # macOS archive artifacts
)
def __init__(
self,
*,
supported_extensions: frozenset[str] | None = None,
ignore_patterns: list[str] | None = None,
ignore_dirs: list[str] | None = None,
) -> None:
"""
Initialize the consumer filter.
Args:
supported_extensions: Set of file extensions to accept (e.g., {".pdf", ".png"}).
If None, uses get_supported_file_extensions().
ignore_patterns: Additional regex patterns to ignore (matched against filename).
ignore_dirs: Additional directory names to ignore (merged with defaults).
"""
# Get supported extensions
if supported_extensions is None:
supported_extensions = frozenset(get_supported_file_extensions())
self._supported_extensions = supported_extensions
# Combine default and user patterns
all_patterns: list[str] = list(self.DEFAULT_IGNORE_PATTERNS)
if ignore_patterns:
all_patterns.extend(ignore_patterns)
# Combine default and user ignore_dirs
all_ignore_dirs: list[str] = list(self.DEFAULT_IGNORE_DIRS)
if ignore_dirs:
all_ignore_dirs.extend(ignore_dirs)
# Let DefaultFilter handle all the pattern and directory filtering
super().__init__(
ignore_dirs=tuple(all_ignore_dirs),
ignore_entity_patterns=tuple(all_patterns),
ignore_paths=(),
)
def __call__(self, change: Change, path: str) -> bool:
"""
Filter function for watchfiles.
Returns True if the path should be watched, False to ignore.
The parent DefaultFilter handles:
- Hidden files/directories (starting with .)
- Directories in ignore_dirs
- Files/directories matching ignore_entity_patterns
We additionally filter files by extension.
"""
# Let parent filter handle directory ignoring and pattern matching
if not super().__call__(change, path):
return False
path_obj = Path(path)
# For directories, parent filter already handled everything
if path_obj.is_dir():
return True
# For files, check extension
return self._has_supported_extension(path_obj)
def _has_supported_extension(self, path: Path) -> bool:
"""Check if the file has a supported extension."""
suffix = path.suffix.lower()
return suffix in self._supported_extensions
def _tags_from_path(filepath: Path, consumption_dir: Path) -> list[int]:
"""
Walk up the directory tree from filepath to consumption_dir
Walk up the directory tree from filepath to CONSUMPTION_DIR
and get or create Tag IDs for every directory.
Returns list of Tag primary keys.
Returns set of Tag models
"""
db.close_old_connections()
tag_ids: set[int] = set()
path_parts = filepath.relative_to(consumption_dir).parent.parts
tag_ids = set()
path_parts = filepath.relative_to(settings.CONSUMPTION_DIR).parent.parts
for part in path_parts:
tag, _ = Tag.objects.get_or_create(
name__iexact=part,
defaults={"name": part},
tag_ids.add(
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk,
)
tag_ids.add(tag.pk)
return list(tag_ids)
def _consume_file(
filepath: Path,
consumption_dir: Path,
*,
subdirs_as_tags: bool,
) -> None:
def _is_ignored(filepath: Path) -> bool:
"""
Queue a file for consumption.
Checks if the given file should be ignored, based on configured
patterns.
Args:
filepath: Path to the file to consume.
consumption_dir: Base consumption directory.
subdirs_as_tags: Whether to create tags from subdirectory names.
Returns True if the file is ignored, False otherwise
"""
# Verify file still exists and is accessible
# Trim out the consume directory, leaving only filename and it's
# path relative to the consume directory
filepath_relative = PurePath(filepath).relative_to(settings.CONSUMPTION_DIR)
# March through the components of the path, including directories and the filename
# looking for anything matching
# foo/bar/baz/file.pdf -> (foo, bar, baz, file.pdf)
parts = []
for part in filepath_relative.parts:
# If the part is not the name (ie, it's a dir)
# Need to append the trailing slash or fnmatch doesn't match
# fnmatch("dir", "dir/*") == False
# fnmatch("dir/", "dir/*") == True
if part != filepath_relative.name:
part = part + "/"
parts.append(part)
for pattern in settings.CONSUMER_IGNORE_PATTERNS:
if len(filter(parts, pattern)):
return True
return False
def _consume(filepath: Path) -> None:
# Check permissions early
try:
filepath.stat()
except (PermissionError, OSError):
logger.warning(f"Not consuming file {filepath}: Permission denied.")
return
if filepath.is_dir() or _is_ignored(filepath):
return
if not filepath.is_file():
logger.debug(f"Not consuming {filepath}: not a file or doesn't exist")
logger.debug(f"Not consuming file {filepath}: File has moved.")
return
if not is_file_ext_supported(filepath.suffix):
logger.warning(f"Not consuming file {filepath}: Unknown file extension.")
return
# Total wait time: up to 500ms
os_error_retry_count: Final[int] = 50
os_error_retry_wait: Final[float] = 0.01
read_try_count = 0
file_open_ok = False
os_error_str = None
while (read_try_count < os_error_retry_count) and not file_open_ok:
try:
with filepath.open("rb"):
file_open_ok = True
except OSError as e:
logger.warning(f"Not consuming {filepath}: {e}")
read_try_count += 1
os_error_str = str(e)
sleep(os_error_retry_wait)
if read_try_count >= os_error_retry_count:
logger.warning(f"Not consuming file {filepath}: OS reports {os_error_str}")
return
# Get tags from path if configured
tag_ids: list[int] | None = None
if subdirs_as_tags:
tag_ids = None
try:
tag_ids = _tags_from_path(filepath, consumption_dir)
if settings.CONSUMER_SUBDIRS_AS_TAGS:
tag_ids = _tags_from_path(filepath)
except Exception:
logger.exception(f"Error creating tags from path for {filepath}")
logger.exception("Error creating tags from path")
# Queue for consumption
try:
logger.info(f"Adding {filepath} to the task queue")
logger.info(f"Adding {filepath} to the task queue.")
consume_file.delay(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
@@ -346,209 +138,228 @@ def _consume_file(
DocumentMetadataOverrides(tag_ids=tag_ids),
)
except Exception:
logger.exception(f"Error while queuing document {filepath}")
# Catch all so that the consumer won't crash.
# This is also what the test case is listening for to check for
# errors.
logger.exception("Error while consuming document")
def _consume_wait_unmodified(file: Path) -> None:
"""
Waits for the given file to appear unmodified based on file size
and modification time. Will wait a configured number of seconds
and retry a configured number of times before either consuming or
giving up
"""
if _is_ignored(file):
return
logger.debug(f"Waiting for file {file} to remain unmodified")
mtime = -1
size = -1
current_try = 0
while current_try < settings.CONSUMER_POLLING_RETRY_COUNT:
try:
stat_data = file.stat()
new_mtime = stat_data.st_mtime
new_size = stat_data.st_size
except FileNotFoundError:
logger.debug(
f"File {file} moved while waiting for it to remain unmodified.",
)
return
if new_mtime == mtime and new_size == size:
_consume(file)
return
mtime = new_mtime
size = new_size
sleep(settings.CONSUMER_POLLING_DELAY)
current_try += 1
logger.error(f"Timeout while waiting on file {file} to remain unmodified.")
class Handler(FileSystemEventHandler):
def __init__(self, pool: ThreadPoolExecutor) -> None:
super().__init__()
self._pool = pool
def on_created(self, event):
self._pool.submit(_consume_wait_unmodified, Path(event.src_path))
def on_moved(self, event):
self._pool.submit(_consume_wait_unmodified, Path(event.dest_path))
class Command(BaseCommand):
"""
Watch a consumption directory and queue new documents for processing.
Uses watchfiles for efficient file system monitoring. Supports both
native OS notifications (inotify on Linux, FSEvents on macOS) and
polling for network filesystems.
On every iteration of an infinite loop, consume what we can from the
consumption directory.
"""
help = "Watch the consumption directory for new documents"
# For testing - allows tests to stop the consumer
stop_flag: Event = Event()
# Testing timeout in seconds
# This is here primarily for the tests and is irrelevant in production.
stop_flag = Event()
# Also only for testing, configures in one place the timeout used before checking
# the stop flag
testing_timeout_s: Final[float] = 0.5
testing_timeout_ms: Final[float] = testing_timeout_s * 1000.0
def add_arguments(self, parser) -> None:
def add_arguments(self, parser):
parser.add_argument(
"directory",
default=None,
default=settings.CONSUMPTION_DIR,
nargs="?",
help="The consumption directory (defaults to CONSUMPTION_DIR setting)",
)
parser.add_argument(
"--oneshot",
action="store_true",
help="Process existing files and exit without watching",
help="The consumption directory.",
)
parser.add_argument("--oneshot", action="store_true", help="Run only once.")
# Only use during unit testing, will configure a timeout
# Leaving it unset or false and the consumer will exit when it
# receives SIGINT
parser.add_argument(
"--testing",
action="store_true",
help="Enable testing mode with shorter timeouts",
help="Flag used only for unit testing",
default=False,
)
def handle(self, *args, **options) -> None:
# Resolve consumption directory
directory = options.get("directory")
def handle(self, *args, **options):
directory = options["directory"]
recursive = settings.CONSUMER_RECURSIVE
if not directory:
directory = getattr(settings, "CONSUMPTION_DIR", None)
if not directory:
raise CommandError("CONSUMPTION_DIR is not configured")
raise CommandError("CONSUMPTION_DIR does not appear to be set.")
directory = Path(directory).resolve()
if not directory.exists():
raise CommandError(f"Consumption directory does not exist: {directory}")
if not directory.is_dir():
raise CommandError(f"Consumption path is not a directory: {directory}")
raise CommandError(f"Consumption directory {directory} does not exist")
# Ensure scratch directory exists
# Consumer will need this
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
# Get settings
recursive: bool = settings.CONSUMER_RECURSIVE
subdirs_as_tags: bool = settings.CONSUMER_SUBDIRS_AS_TAGS
polling_interval: float = settings.CONSUMER_POLLING_INTERVAL
stability_delay: float = settings.CONSUMER_STABILITY_DELAY
ignore_patterns: list[str] = settings.CONSUMER_IGNORE_PATTERNS
ignore_dirs: list[str] = settings.CONSUMER_IGNORE_DIRS
is_testing: bool = options.get("testing", False)
is_oneshot: bool = options.get("oneshot", False)
if recursive:
for dirpath, _, filenames in os.walk(directory):
for filename in filenames:
filepath = Path(dirpath) / filename
_consume(filepath)
else:
for filepath in directory.iterdir():
_consume(filepath)
# Create filter
consumer_filter = ConsumerFilter(
ignore_patterns=ignore_patterns,
ignore_dirs=ignore_dirs,
)
# Process existing files
self._process_existing_files(
directory=directory,
recursive=recursive,
subdirs_as_tags=subdirs_as_tags,
consumer_filter=consumer_filter,
)
if is_oneshot:
logger.info("Oneshot mode: processed existing files, exiting")
if options["oneshot"]:
return
# Start watching
self._watch_directory(
directory=directory,
recursive=recursive,
subdirs_as_tags=subdirs_as_tags,
consumer_filter=consumer_filter,
polling_interval=polling_interval,
stability_delay=stability_delay,
is_testing=is_testing,
)
logger.debug("Consumer exiting")
def _process_existing_files(
self,
*,
directory: Path,
recursive: bool,
subdirs_as_tags: bool,
consumer_filter: ConsumerFilter,
) -> None:
"""Process any existing files in the consumption directory."""
logger.info(f"Processing existing files in {directory}")
glob_pattern = "**/*" if recursive else "*"
for filepath in directory.glob(glob_pattern):
# Use filter to check if file should be processed
if not filepath.is_file():
continue
if not consumer_filter(Change.added, str(filepath)):
continue
_consume_file(
filepath=filepath,
consumption_dir=directory,
subdirs_as_tags=subdirs_as_tags,
)
def _watch_directory(
self,
*,
directory: Path,
recursive: bool,
subdirs_as_tags: bool,
consumer_filter: ConsumerFilter,
polling_interval: float,
stability_delay: float,
is_testing: bool,
) -> None:
"""Watch directory for changes and process stable files."""
use_polling = polling_interval > 0
poll_delay_ms = int(polling_interval * 1000) if use_polling else 0
if use_polling:
logger.info(
f"Watching {directory} using polling (interval: {polling_interval}s)",
)
if settings.CONSUMER_POLLING == 0 and INotify:
self.handle_inotify(directory, recursive, is_testing=options["testing"])
else:
logger.info(f"Watching {directory} using native file system events")
if INotify is None and settings.CONSUMER_POLLING == 0: # pragma: no cover
logger.warning("Using polling as INotify import failed")
self.handle_polling(directory, recursive, is_testing=options["testing"])
# Create stability tracker
tracker = FileStabilityTracker(stability_delay=stability_delay)
logger.debug("Consumer exiting.")
# Calculate timeouts
stability_timeout_ms = int(stability_delay * 1000)
testing_timeout_ms = int(self.testing_timeout_s * 1000)
def handle_polling(self, directory, recursive, *, is_testing: bool):
logger.info(f"Polling directory for changes: {directory}")
# Start with no timeout (wait indefinitely for first event)
# unless in testing mode
timeout_ms = testing_timeout_ms if is_testing else 0
timeout = None
if is_testing:
timeout = self.testing_timeout_s
logger.debug(f"Configuring timeout to {timeout}s")
self.stop_flag.clear()
polling_interval = settings.CONSUMER_POLLING
if polling_interval == 0: # pragma: no cover
# Only happens if INotify failed to import
logger.warning("Using polling of 10s, consider setting this")
polling_interval = 10
while not self.stop_flag.is_set():
with ThreadPoolExecutor(max_workers=4) as pool:
observer = PollingObserver(timeout=polling_interval)
observer.schedule(Handler(pool), directory, recursive=recursive)
observer.start()
try:
for changes in watch(
directory,
watch_filter=consumer_filter,
rust_timeout=timeout_ms,
yield_on_timeout=True,
force_polling=use_polling,
poll_delay_ms=poll_delay_ms,
recursive=recursive,
stop_event=self.stop_flag,
):
# Process each change
for change_type, path in changes:
path = Path(path).resolve()
if not path.is_file():
while observer.is_alive():
observer.join(timeout)
if self.stop_flag.is_set():
observer.stop()
except KeyboardInterrupt:
observer.stop()
observer.join()
def handle_inotify(self, directory, recursive, *, is_testing: bool):
logger.info(f"Using inotify to watch directory for changes: {directory}")
timeout_ms = None
if is_testing:
timeout_ms = self.testing_timeout_ms
logger.debug(f"Configuring timeout to {timeout_ms}ms")
inotify = INotify()
inotify_flags = flags.CLOSE_WRITE | flags.MOVED_TO | flags.MODIFY
if recursive:
inotify.add_watch_recursive(directory, inotify_flags)
else:
inotify.add_watch(directory, inotify_flags)
inotify_debounce_secs: Final[float] = settings.CONSUMER_INOTIFY_DELAY
inotify_debounce_ms: Final[int] = inotify_debounce_secs * 1000
finished = False
notified_files = {}
try:
while not finished:
try:
for event in inotify.read(timeout=timeout_ms):
path = inotify.get_path(event.wd) if recursive else directory
filepath = Path(path) / event.name
if flags.MODIFY in flags.from_mask(event.mask):
notified_files.pop(filepath, None)
else:
notified_files[filepath] = monotonic()
# Check the files against the timeout
still_waiting = {}
# last_event_time is time of the last inotify event for this file
for filepath, last_event_time in notified_files.items():
# Current time - last time over the configured timeout
waited_long_enough = (
monotonic() - last_event_time
) > inotify_debounce_secs
# Also make sure the file exists still, some scanners might write a
# temporary file first
try:
file_still_exists = filepath.exists() and filepath.is_file()
except (PermissionError, OSError): # pragma: no cover
# If we can't check, let it fail in the _consume function
file_still_exists = True
continue
logger.debug(f"Event: {change_type.name} for {path}")
tracker.track(path, change_type)
# Check for stable files
for stable_path in tracker.get_stable_files():
_consume_file(
filepath=stable_path,
consumption_dir=directory,
subdirs_as_tags=subdirs_as_tags,
)
if waited_long_enough and file_still_exists:
_consume(filepath)
elif file_still_exists:
still_waiting[filepath] = last_event_time
# Exit watch loop to reconfigure timeout
break
# These files are still waiting to hit the timeout
notified_files = still_waiting
# Determine next timeout
if tracker.has_pending_files():
# Check pending files at stability interval
timeout_ms = stability_timeout_ms
# If files are waiting, need to exit read() to check them
# Otherwise, go back to infinite sleep time, but only if not testing
if len(notified_files) > 0:
timeout_ms = inotify_debounce_ms
elif is_testing:
# In testing, use short timeout to check stop flag
timeout_ms = testing_timeout_ms
else: # pragma: nocover
# No pending files, wait indefinitely
timeout_ms = 0
timeout_ms = self.testing_timeout_ms
else:
timeout_ms = None
except KeyboardInterrupt: # pragma: nocover
logger.info("Received interrupt, stopping consumer")
self.stop_flag.set()
if self.stop_flag.is_set():
logger.debug("Finishing because event is set")
finished = True
except KeyboardInterrupt:
logger.info("Received SIGINT, stopping inotify")
finished = True
finally:
inotify.close()

View File

@@ -3,6 +3,7 @@ import json
import os
import shutil
import tempfile
import time
from pathlib import Path
from typing import TYPE_CHECKING
@@ -55,6 +56,7 @@ from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.utils import copy_file_with_basic_stats
from paperless import version
from paperless.db import GnuPG
from paperless.models import ApplicationConfiguration
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
@@ -314,17 +316,20 @@ class Command(CryptMixin, BaseCommand):
total=len(document_manifest),
disable=self.no_progress_bar,
):
# 3.1. store files unencrypted
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED
document = document_map[document_dict["pk"]]
# 3.1. generate a unique filename
# 3.2. generate a unique filename
base_name = self.generate_base_name(document)
# 3.2. write filenames into manifest
# 3.3. write filenames into manifest
original_target, thumbnail_target, archive_target = (
self.generate_document_targets(document, base_name, document_dict)
)
# 3.3. write files to target folder
# 3.4. write files to target folder
if not self.data_only:
self.copy_document_files(
document,
@@ -418,6 +423,7 @@ class Command(CryptMixin, BaseCommand):
base_name = generate_filename(
document,
counter=filename_counter,
append_gpg=False,
)
else:
base_name = document.get_public_filename(counter=filename_counter)
@@ -476,6 +482,28 @@ class Command(CryptMixin, BaseCommand):
If the document is encrypted, the files are decrypted before copying them to the target location.
"""
if document.storage_type == Document.STORAGE_TYPE_GPG:
t = int(time.mktime(document.created.timetuple()))
original_target.parent.mkdir(parents=True, exist_ok=True)
with document.source_file as out_file:
original_target.write_bytes(GnuPG.decrypted(out_file))
os.utime(original_target, times=(t, t))
if thumbnail_target:
thumbnail_target.parent.mkdir(parents=True, exist_ok=True)
with document.thumbnail_file as out_file:
thumbnail_target.write_bytes(GnuPG.decrypted(out_file))
os.utime(thumbnail_target, times=(t, t))
if archive_target:
archive_target.parent.mkdir(parents=True, exist_ok=True)
if TYPE_CHECKING:
assert isinstance(document.archive_path, Path)
with document.archive_path as out_file:
archive_target.write_bytes(GnuPG.decrypted(out_file))
os.utime(archive_target, times=(t, t))
else:
self.check_and_copy(
document.source_path,
document.checksum,

View File

@@ -383,6 +383,8 @@ class Command(CryptMixin, BaseCommand):
else:
archive_path = None
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
with FileLock(settings.MEDIA_LOCK):
if Path(document.source_path).is_file():
raise FileExistsError(document.source_path)

View File

@@ -1,22 +0,0 @@
from django.core.management import BaseCommand
from django.db import transaction
from documents.management.commands.mixins import ProgressBarMixin
from documents.tasks import llmindex_index
class Command(ProgressBarMixin, BaseCommand):
help = "Manages the LLM-based vector index for Paperless."
def add_arguments(self, parser):
parser.add_argument("command", choices=["rebuild", "update"])
self.add_argument_progress_bar_mixin(parser)
def handle(self, *args, **options):
self.handle_progress_bar_mixin(**options)
with transaction.atomic():
llmindex_index(
progress_bar_disable=self.no_progress_bar,
rebuild=options["command"] == "rebuild",
scheduled=False,
)

View File

@@ -403,18 +403,6 @@ def existing_document_matches_workflow(
f"Document tags {list(document.tags.all())} include excluded tags {list(trigger_has_not_tags_qs)}",
)
allowed_correspondent_ids = set(
trigger.filter_has_any_correspondents.values_list("id", flat=True),
)
if (
allowed_correspondent_ids
and document.correspondent_id not in allowed_correspondent_ids
):
return (
False,
f"Document correspondent {document.correspondent} is not one of {list(trigger.filter_has_any_correspondents.all())}",
)
# Document correspondent vs trigger has_correspondent
if (
trigger.filter_has_correspondent_id is not None
@@ -436,17 +424,6 @@ def existing_document_matches_workflow(
f"Document correspondent {document.correspondent} is excluded by {list(trigger.filter_has_not_correspondents.all())}",
)
allowed_document_type_ids = set(
trigger.filter_has_any_document_types.values_list("id", flat=True),
)
if allowed_document_type_ids and (
document.document_type_id not in allowed_document_type_ids
):
return (
False,
f"Document doc type {document.document_type} is not one of {list(trigger.filter_has_any_document_types.all())}",
)
# Document document_type vs trigger has_document_type
if (
trigger.filter_has_document_type_id is not None
@@ -468,17 +445,6 @@ def existing_document_matches_workflow(
f"Document doc type {document.document_type} is excluded by {list(trigger.filter_has_not_document_types.all())}",
)
allowed_storage_path_ids = set(
trigger.filter_has_any_storage_paths.values_list("id", flat=True),
)
if allowed_storage_path_ids and (
document.storage_path_id not in allowed_storage_path_ids
):
return (
False,
f"Document storage path {document.storage_path} is not one of {list(trigger.filter_has_any_storage_paths.all())}",
)
# Document storage_path vs trigger has_storage_path
if (
trigger.filter_has_storage_path_id is not None
@@ -566,10 +532,6 @@ def prefilter_documents_by_workflowtrigger(
# Correspondent, DocumentType, etc. filtering
if trigger.filter_has_any_correspondents.exists():
documents = documents.filter(
correspondent__in=trigger.filter_has_any_correspondents.all(),
)
if trigger.filter_has_correspondent is not None:
documents = documents.filter(
correspondent=trigger.filter_has_correspondent,
@@ -579,10 +541,6 @@ def prefilter_documents_by_workflowtrigger(
correspondent__in=trigger.filter_has_not_correspondents.all(),
)
if trigger.filter_has_any_document_types.exists():
documents = documents.filter(
document_type__in=trigger.filter_has_any_document_types.all(),
)
if trigger.filter_has_document_type is not None:
documents = documents.filter(
document_type=trigger.filter_has_document_type,
@@ -592,10 +550,6 @@ def prefilter_documents_by_workflowtrigger(
document_type__in=trigger.filter_has_not_document_types.all(),
)
if trigger.filter_has_any_storage_paths.exists():
documents = documents.filter(
storage_path__in=trigger.filter_has_any_storage_paths.all(),
)
if trigger.filter_has_storage_path is not None:
documents = documents.filter(
storage_path=trigger.filter_has_storage_path,
@@ -650,11 +604,8 @@ def document_matches_workflow(
"filter_has_tags",
"filter_has_all_tags",
"filter_has_not_tags",
"filter_has_any_document_types",
"filter_has_not_document_types",
"filter_has_any_correspondents",
"filter_has_not_correspondents",
"filter_has_any_storage_paths",
"filter_has_not_storage_paths",
)
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,26 @@
# Generated by Django 1.9 on 2015-12-26 13:16
import django.utils.timezone
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0001_initial"),
]
operations = [
migrations.AlterModelOptions(
name="document",
options={"ordering": ("sender", "title")},
),
migrations.AlterField(
model_name="document",
name="created",
field=models.DateTimeField(
default=django.utils.timezone.now,
editable=False,
),
),
]

View File

@@ -0,0 +1,70 @@
# Generated by Django 1.9 on 2016-01-11 12:21
import django.db.models.deletion
from django.db import migrations
from django.db import models
from django.template.defaultfilters import slugify
DOCUMENT_SENDER_MAP = {}
def move_sender_strings_to_sender_model(apps, schema_editor):
sender_model = apps.get_model("documents", "Sender")
document_model = apps.get_model("documents", "Document")
# Create the sender and log the relationship with the document
for document in document_model.objects.all():
if document.sender:
(
DOCUMENT_SENDER_MAP[document.pk],
_,
) = sender_model.objects.get_or_create(
name=document.sender,
defaults={"slug": slugify(document.sender)},
)
def realign_senders(apps, schema_editor):
document_model = apps.get_model("documents", "Document")
for pk, sender in DOCUMENT_SENDER_MAP.items():
document_model.objects.filter(pk=pk).update(sender=sender)
class Migration(migrations.Migration):
dependencies = [
("documents", "0002_auto_20151226_1316"),
]
operations = [
migrations.CreateModel(
name="Sender",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=128, unique=True)),
("slug", models.SlugField()),
],
),
migrations.RunPython(move_sender_strings_to_sender_model),
migrations.RemoveField(
model_name="document",
name="sender",
),
migrations.AddField(
model_name="document",
name="sender",
field=models.ForeignKey(
blank=True,
on_delete=django.db.models.deletion.CASCADE,
to="documents.Sender",
),
),
migrations.RunPython(realign_senders),
]

View File

@@ -0,0 +1,25 @@
# Generated by Django 1.9 on 2016-01-14 18:44
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0003_sender"),
]
operations = [
migrations.AlterField(
model_name="document",
name="sender",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="documents",
to="documents.Sender",
),
),
]

View File

@@ -0,0 +1,178 @@
# Generated by Django 4.2.13 on 2024-06-28 17:52
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
replaces = [
("documents", "0004_auto_20160114_1844"),
("documents", "0005_auto_20160123_0313"),
("documents", "0006_auto_20160123_0430"),
("documents", "0007_auto_20160126_2114"),
("documents", "0008_document_file_type"),
("documents", "0009_auto_20160214_0040"),
("documents", "0010_log"),
("documents", "0011_auto_20160303_1929"),
]
dependencies = [
("documents", "0003_sender"),
]
operations = [
migrations.AlterField(
model_name="document",
name="sender",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="documents",
to="documents.sender",
),
),
migrations.AlterModelOptions(
name="sender",
options={"ordering": ("name",)},
),
migrations.CreateModel(
name="Tag",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=128, unique=True)),
("slug", models.SlugField(blank=True)),
(
"colour",
models.PositiveIntegerField(
choices=[
(1, "#a6cee3"),
(2, "#1f78b4"),
(3, "#b2df8a"),
(4, "#33a02c"),
(5, "#fb9a99"),
(6, "#e31a1c"),
(7, "#fdbf6f"),
(8, "#ff7f00"),
(9, "#cab2d6"),
(10, "#6a3d9a"),
(11, "#b15928"),
(12, "#000000"),
(13, "#cccccc"),
],
default=1,
),
),
("match", models.CharField(blank=True, max_length=256)),
(
"matching_algorithm",
models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.',
),
),
],
options={
"abstract": False,
},
),
migrations.AlterField(
model_name="sender",
name="slug",
field=models.SlugField(blank=True),
),
migrations.AddField(
model_name="document",
name="file_type",
field=models.CharField(
choices=[
("pdf", "PDF"),
("png", "PNG"),
("jpg", "JPG"),
("gif", "GIF"),
("tiff", "TIFF"),
],
default="pdf",
editable=False,
max_length=4,
),
preserve_default=False,
),
migrations.AddField(
model_name="document",
name="tags",
field=models.ManyToManyField(
blank=True,
related_name="documents",
to="documents.tag",
),
),
migrations.CreateModel(
name="Log",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("group", models.UUIDField(blank=True)),
("message", models.TextField()),
(
"level",
models.PositiveIntegerField(
choices=[
(10, "Debugging"),
(20, "Informational"),
(30, "Warning"),
(40, "Error"),
(50, "Critical"),
],
default=20,
),
),
(
"component",
models.PositiveIntegerField(
choices=[(1, "Consumer"), (2, "Mail Fetcher")],
),
),
("created", models.DateTimeField(auto_now_add=True)),
("modified", models.DateTimeField(auto_now=True)),
],
options={
"ordering": ("-modified",),
},
),
migrations.RenameModel(
old_name="Sender",
new_name="Correspondent",
),
migrations.AlterModelOptions(
name="document",
options={"ordering": ("correspondent", "title")},
),
migrations.RenameField(
model_name="document",
old_name="sender",
new_name="correspondent",
),
]

View File

@@ -0,0 +1,16 @@
# Generated by Django 1.9 on 2016-01-23 03:13
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("documents", "0004_auto_20160114_1844"),
]
operations = [
migrations.AlterModelOptions(
name="sender",
options={"ordering": ("name",)},
),
]

View File

@@ -1,43 +0,0 @@
# Generated by Django 5.2.7 on 2025-12-17 22:25
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0004_remove_document_storage_type"),
]
operations = [
migrations.AddField(
model_name="workflowtrigger",
name="filter_has_any_correspondents",
field=models.ManyToManyField(
blank=True,
related_name="workflowtriggers_has_any_correspondent",
to="documents.correspondent",
verbose_name="has one of these correspondents",
),
),
migrations.AddField(
model_name="workflowtrigger",
name="filter_has_any_document_types",
field=models.ManyToManyField(
blank=True,
related_name="workflowtriggers_has_any_document_type",
to="documents.documenttype",
verbose_name="has one of these document types",
),
),
migrations.AddField(
model_name="workflowtrigger",
name="filter_has_any_storage_paths",
field=models.ManyToManyField(
blank=True,
related_name="workflowtriggers_has_any_storage_path",
to="documents.storagepath",
verbose_name="has one of these storage paths",
),
),
]

View File

@@ -0,0 +1,64 @@
# Generated by Django 1.9 on 2016-01-23 04:30
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0005_auto_20160123_0313"),
]
operations = [
migrations.CreateModel(
name="Tag",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=128, unique=True)),
("slug", models.SlugField(blank=True)),
(
"colour",
models.PositiveIntegerField(
choices=[
(1, "#a6cee3"),
(2, "#1f78b4"),
(3, "#b2df8a"),
(4, "#33a02c"),
(5, "#fb9a99"),
(6, "#e31a1c"),
(7, "#fdbf6f"),
(8, "#ff7f00"),
(9, "#cab2d6"),
(10, "#6a3d9a"),
(11, "#ffff99"),
(12, "#b15928"),
(13, "#000000"),
(14, "#cccccc"),
],
default=1,
),
),
],
options={
"abstract": False,
},
),
migrations.AlterField(
model_name="sender",
name="slug",
field=models.SlugField(blank=True),
),
migrations.AddField(
model_name="document",
name="tags",
field=models.ManyToManyField(related_name="documents", to="documents.Tag"),
),
]

View File

@@ -0,0 +1,55 @@
# Generated by Django 1.9 on 2016-01-26 21:14
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0006_auto_20160123_0430"),
]
operations = [
migrations.AddField(
model_name="tag",
name="match",
field=models.CharField(blank=True, max_length=256),
),
migrations.AddField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
blank=True,
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
],
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.',
null=True,
),
),
migrations.AlterField(
model_name="tag",
name="colour",
field=models.PositiveIntegerField(
choices=[
(1, "#a6cee3"),
(2, "#1f78b4"),
(3, "#b2df8a"),
(4, "#33a02c"),
(5, "#fb9a99"),
(6, "#e31a1c"),
(7, "#fdbf6f"),
(8, "#ff7f00"),
(9, "#cab2d6"),
(10, "#6a3d9a"),
(11, "#b15928"),
(12, "#000000"),
(13, "#cccccc"),
],
default=1,
),
),
]

View File

@@ -0,0 +1,39 @@
# Generated by Django 1.9 on 2016-01-29 22:58
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0007_auto_20160126_2114"),
]
operations = [
migrations.AddField(
model_name="document",
name="file_type",
field=models.CharField(
choices=[
("pdf", "PDF"),
("png", "PNG"),
("jpg", "JPG"),
("gif", "GIF"),
("tiff", "TIFF"),
],
default="pdf",
editable=False,
max_length=4,
),
preserve_default=False,
),
migrations.AlterField(
model_name="document",
name="tags",
field=models.ManyToManyField(
blank=True,
related_name="documents",
to="documents.Tag",
),
),
]

View File

@@ -0,0 +1,27 @@
# Generated by Django 1.9 on 2016-02-14 00:40
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0008_document_file_type"),
]
operations = [
migrations.AlterField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.',
),
),
]

View File

@@ -0,0 +1,53 @@
# Generated by Django 1.9 on 2016-02-27 17:54
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0009_auto_20160214_0040"),
]
operations = [
migrations.CreateModel(
name="Log",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("group", models.UUIDField(blank=True)),
("message", models.TextField()),
(
"level",
models.PositiveIntegerField(
choices=[
(10, "Debugging"),
(20, "Informational"),
(30, "Warning"),
(40, "Error"),
(50, "Critical"),
],
default=20,
),
),
(
"component",
models.PositiveIntegerField(
choices=[(1, "Consumer"), (2, "Mail Fetcher")],
),
),
("created", models.DateTimeField(auto_now_add=True)),
("modified", models.DateTimeField(auto_now=True)),
],
options={
"ordering": ("-modified",),
},
),
]

View File

@@ -0,0 +1,26 @@
# Generated by Django 1.9.2 on 2016-03-03 19:29
from django.db import migrations
class Migration(migrations.Migration):
atomic = False
dependencies = [
("documents", "0010_log"),
]
operations = [
migrations.RenameModel(
old_name="Sender",
new_name="Correspondent",
),
migrations.AlterModelOptions(
name="document",
options={"ordering": ("correspondent", "title")},
),
migrations.RenameField(
model_name="document",
old_name="sender",
new_name="correspondent",
),
]

View File

@@ -0,0 +1,128 @@
# Generated by Django 1.9.2 on 2016-03-05 00:40
import os
import re
import shutil
import subprocess
import tempfile
from pathlib import Path
import gnupg
from django.conf import settings
from django.db import migrations
from django.utils.termcolors import colorize as colourise # Spelling hurts me
class GnuPG:
"""
A handy singleton to use when handling encrypted files.
"""
gpg = gnupg.GPG(gnupghome=settings.GNUPG_HOME)
@classmethod
def decrypted(cls, file_handle):
return cls.gpg.decrypt_file(file_handle, passphrase=settings.PASSPHRASE).data
@classmethod
def encrypted(cls, file_handle):
return cls.gpg.encrypt_file(
file_handle,
recipients=None,
passphrase=settings.PASSPHRASE,
symmetric=True,
).data
def move_documents_and_create_thumbnails(apps, schema_editor):
(Path(settings.MEDIA_ROOT) / "documents" / "originals").mkdir(
parents=True,
exist_ok=True,
)
(Path(settings.MEDIA_ROOT) / "documents" / "thumbnails").mkdir(
parents=True,
exist_ok=True,
)
documents: list[str] = os.listdir(Path(settings.MEDIA_ROOT) / "documents") # noqa: PTH208
if set(documents) == {"originals", "thumbnails"}:
return
print(
colourise(
"\n\n"
" This is a one-time only migration to generate thumbnails for all of your\n"
" documents so that future UIs will have something to work with. If you have\n"
" a lot of documents though, this may take a while, so a coffee break may be\n"
" in order."
"\n",
opts=("bold",),
),
)
Path(settings.SCRATCH_DIR).mkdir(parents=True, exist_ok=True)
for f in sorted(documents):
if not f.endswith("gpg"):
continue
print(
" {} {} {}".format(
colourise("*", fg="green"),
colourise("Generating a thumbnail for", fg="white"),
colourise(f, fg="cyan"),
),
)
thumb_temp: str = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
orig_temp: str = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
orig_source: Path = Path(settings.MEDIA_ROOT) / "documents" / f
orig_target: Path = Path(orig_temp) / f.replace(".gpg", "")
with orig_source.open("rb") as encrypted, orig_target.open("wb") as unencrypted:
unencrypted.write(GnuPG.decrypted(encrypted))
subprocess.Popen(
(
settings.CONVERT_BINARY,
"-scale",
"500x5000",
"-alpha",
"remove",
orig_target,
Path(thumb_temp) / "convert-%04d.png",
),
).wait()
thumb_source: Path = Path(thumb_temp) / "convert-0000.png"
thumb_target: Path = (
Path(settings.MEDIA_ROOT)
/ "documents"
/ "thumbnails"
/ re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f)
)
with (
thumb_source.open("rb") as unencrypted,
thumb_target.open("wb") as encrypted,
):
encrypted.write(GnuPG.encrypted(unencrypted))
shutil.rmtree(thumb_temp)
shutil.rmtree(orig_temp)
shutil.move(
Path(settings.MEDIA_ROOT) / "documents" / f,
Path(settings.MEDIA_ROOT) / "documents" / "originals" / f,
)
class Migration(migrations.Migration):
dependencies = [
("documents", "0011_auto_20160303_1929"),
]
operations = [
migrations.RunPython(move_documents_and_create_thumbnails),
]

View File

@@ -0,0 +1,42 @@
# Generated by Django 1.9.4 on 2016-03-25 21:11
import django.utils.timezone
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0012_auto_20160305_0040"),
]
operations = [
migrations.AddField(
model_name="correspondent",
name="match",
field=models.CharField(blank=True, max_length=256),
),
migrations.AddField(
model_name="correspondent",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.',
),
),
migrations.AlterField(
model_name="document",
name="created",
field=models.DateTimeField(default=django.utils.timezone.now),
),
migrations.RemoveField(
model_name="log",
name="component",
),
]

View File

@@ -0,0 +1,182 @@
# Generated by Django 1.9.4 on 2016-03-28 19:09
import hashlib
from pathlib import Path
import django.utils.timezone
import gnupg
from django.conf import settings
from django.db import migrations
from django.db import models
from django.template.defaultfilters import slugify
from django.utils.termcolors import colorize as colourise # Spelling hurts me
class GnuPG:
"""
A handy singleton to use when handling encrypted files.
"""
gpg = gnupg.GPG(gnupghome=settings.GNUPG_HOME)
@classmethod
def decrypted(cls, file_handle):
return cls.gpg.decrypt_file(file_handle, passphrase=settings.PASSPHRASE).data
@classmethod
def encrypted(cls, file_handle):
return cls.gpg.encrypt_file(
file_handle,
recipients=None,
passphrase=settings.PASSPHRASE,
symmetric=True,
).data
class Document:
"""
Django's migrations restrict access to model methods, so this is a snapshot
of the methods that existed at the time this migration was written, since
we need to make use of a lot of these shortcuts here.
"""
def __init__(self, doc):
self.pk = doc.pk
self.correspondent = doc.correspondent
self.title = doc.title
self.file_type = doc.file_type
self.tags = doc.tags
self.created = doc.created
def __str__(self):
created = self.created.strftime("%Y%m%d%H%M%S")
if self.correspondent and self.title:
return f"{created}: {self.correspondent} - {self.title}"
if self.correspondent or self.title:
return f"{created}: {self.correspondent or self.title}"
return str(created)
@property
def source_path(self):
return (
Path(settings.MEDIA_ROOT)
/ "documents"
/ "originals"
/ f"{self.pk:07}.{self.file_type}.gpg"
)
@property
def source_file(self):
return self.source_path.open("rb")
@property
def file_name(self):
return slugify(str(self)) + "." + self.file_type
def set_checksums(apps, schema_editor):
document_model = apps.get_model("documents", "Document")
if not document_model.objects.all().exists():
return
print(
colourise(
"\n\n"
" This is a one-time only migration to generate checksums for all\n"
" of your existing documents. If you have a lot of documents\n"
" though, this may take a while, so a coffee break may be in\n"
" order."
"\n",
opts=("bold",),
),
)
sums = {}
for d in document_model.objects.all():
document = Document(d)
print(
" {} {} {}".format(
colourise("*", fg="green"),
colourise("Generating a checksum for", fg="white"),
colourise(document.file_name, fg="cyan"),
),
)
with document.source_file as encrypted:
checksum = hashlib.md5(GnuPG.decrypted(encrypted)).hexdigest()
if checksum in sums:
error = "\n{line}{p1}\n\n{doc1}\n{doc2}\n\n{p2}\n\n{code}\n\n{p3}{line}".format(
p1=colourise(
"It appears that you have two identical documents in your collection and \nPaperless no longer supports this (see issue #97). The documents in question\nare:",
fg="yellow",
),
p2=colourise(
"To fix this problem, you'll have to remove one of them from the database, a task\nmost easily done by running the following command in the same\ndirectory as manage.py:",
fg="yellow",
),
p3=colourise(
"When that's finished, re-run the migrate, and provided that there aren't any\nother duplicates, you should be good to go.",
fg="yellow",
),
doc1=colourise(
f" * {sums[checksum][1]} (id: {sums[checksum][0]})",
fg="red",
),
doc2=colourise(
f" * {document.file_name} (id: {document.pk})",
fg="red",
),
code=colourise(
f" $ echo 'DELETE FROM documents_document WHERE id = {document.pk};' | ./manage.py dbshell",
fg="green",
),
line=colourise("\n{}\n".format("=" * 80), fg="white", opts=("bold",)),
)
raise RuntimeError(error)
sums[checksum] = (document.pk, document.file_name)
document_model.objects.filter(pk=document.pk).update(checksum=checksum)
def do_nothing(apps, schema_editor):
pass
class Migration(migrations.Migration):
dependencies = [
("documents", "0013_auto_20160325_2111"),
]
operations = [
migrations.AddField(
model_name="document",
name="checksum",
field=models.CharField(
default="-",
db_index=True,
editable=False,
max_length=32,
help_text="The checksum of the original document (before it "
"was encrypted). We use this to prevent duplicate "
"document imports.",
),
preserve_default=False,
),
migrations.RunPython(set_checksums, do_nothing),
migrations.AlterField(
model_name="document",
name="created",
field=models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
),
),
migrations.AlterField(
model_name="document",
name="modified",
field=models.DateTimeField(auto_now=True, db_index=True),
),
]

View File

@@ -0,0 +1,33 @@
# Generated by Django 1.10.2 on 2016-10-05 21:38
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0014_document_checksum"),
]
operations = [
migrations.AlterField(
model_name="document",
name="checksum",
field=models.CharField(
editable=False,
help_text="The checksum of the original document (before it was encrypted). We use this to prevent duplicate document imports.",
max_length=32,
unique=True,
),
),
migrations.AddField(
model_name="correspondent",
name="is_insensitive",
field=models.BooleanField(default=True),
),
migrations.AddField(
model_name="tag",
name="is_insensitive",
field=models.BooleanField(default=True),
),
]

View File

@@ -0,0 +1,92 @@
# Generated by Django 4.2.13 on 2024-06-28 17:57
import django.db.models.deletion
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
replaces = [
("documents", "0015_add_insensitive_to_match"),
("documents", "0016_auto_20170325_1558"),
("documents", "0017_auto_20170512_0507"),
("documents", "0018_auto_20170715_1712"),
]
dependencies = [
("documents", "0014_document_checksum"),
]
operations = [
migrations.AlterField(
model_name="document",
name="checksum",
field=models.CharField(
editable=False,
help_text="The checksum of the original document (before it was encrypted). We use this to prevent duplicate document imports.",
max_length=32,
unique=True,
),
),
migrations.AddField(
model_name="correspondent",
name="is_insensitive",
field=models.BooleanField(default=True),
),
migrations.AddField(
model_name="tag",
name="is_insensitive",
field=models.BooleanField(default=True),
),
migrations.AlterField(
model_name="document",
name="content",
field=models.TextField(
blank=True,
db_index=("mysql" not in settings.DATABASES["default"]["ENGINE"]),
help_text="The raw, text-only data of the document. This field is primarily used for searching.",
),
),
migrations.AlterField(
model_name="correspondent",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
(5, "Fuzzy Match"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
migrations.AlterField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
(5, "Fuzzy Match"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
migrations.AlterField(
model_name="document",
name="correspondent",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="documents",
to="documents.correspondent",
),
),
]

View File

@@ -0,0 +1,23 @@
# Generated by Django 1.10.5 on 2017-03-25 15:58
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0015_add_insensitive_to_match"),
]
operations = [
migrations.AlterField(
model_name="document",
name="content",
field=models.TextField(
blank=True,
db_index=("mysql" not in settings.DATABASES["default"]["ENGINE"]),
help_text="The raw, text-only data of the document. This field is primarily used for searching.",
),
),
]

View File

@@ -0,0 +1,43 @@
# Generated by Django 1.10.5 on 2017-05-12 05:07
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0016_auto_20170325_1558"),
]
operations = [
migrations.AlterField(
model_name="correspondent",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
(5, "Fuzzy Match"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
migrations.AlterField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
(5, "Fuzzy Match"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
]

View File

@@ -0,0 +1,25 @@
# Generated by Django 1.10.5 on 2017-07-15 17:12
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0017_auto_20170512_0507"),
]
operations = [
migrations.AlterField(
model_name="document",
name="correspondent",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="documents",
to="documents.Correspondent",
),
),
]

View File

@@ -0,0 +1,22 @@
# Generated by Django 1.10.5 on 2017-07-15 17:12
from django.contrib.auth.models import User
from django.db import migrations
def forwards_func(apps, schema_editor):
User.objects.create(username="consumer")
def reverse_func(apps, schema_editor):
User.objects.get(username="consumer").delete()
class Migration(migrations.Migration):
dependencies = [
("documents", "0018_auto_20170715_1712"),
]
operations = [
migrations.RunPython(forwards_func, reverse_func),
]

Some files were not shown because too many files have changed in this diff Show More