mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge branch 'dev' into feature-permissions
This commit is contained in:
commit
d2a6f79612
19
.codecov.yml
Normal file
19
.codecov.yml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# https://docs.codecov.com/docs/pull-request-comments
|
||||||
|
# codecov will only comment if coverage changes
|
||||||
|
comment:
|
||||||
|
require_changes: true
|
||||||
|
coverage:
|
||||||
|
status:
|
||||||
|
project:
|
||||||
|
default:
|
||||||
|
# https://docs.codecov.com/docs/commit-status#threshold
|
||||||
|
threshold: 1%
|
||||||
|
# https://docs.codecov.com/docs/commit-status#only_pulls
|
||||||
|
only_pulls: true
|
||||||
|
patch:
|
||||||
|
default:
|
||||||
|
# For the changed lines only, target 75% covered, but
|
||||||
|
# allow as low as 50%
|
||||||
|
target: 75%
|
||||||
|
threshold: 25%
|
||||||
|
only_pulls: true
|
86
.github/workflows/ci.yml
vendored
86
.github/workflows/ci.yml
vendored
@ -113,16 +113,12 @@ jobs:
|
|||||||
PAPERLESS_MAIL_TEST_HOST: ${{ secrets.TEST_MAIL_HOST }}
|
PAPERLESS_MAIL_TEST_HOST: ${{ secrets.TEST_MAIL_HOST }}
|
||||||
PAPERLESS_MAIL_TEST_USER: ${{ secrets.TEST_MAIL_USER }}
|
PAPERLESS_MAIL_TEST_USER: ${{ secrets.TEST_MAIL_USER }}
|
||||||
PAPERLESS_MAIL_TEST_PASSWD: ${{ secrets.TEST_MAIL_PASSWD }}
|
PAPERLESS_MAIL_TEST_PASSWD: ${{ secrets.TEST_MAIL_PASSWD }}
|
||||||
# Skip Tests which require convert
|
|
||||||
PAPERLESS_TEST_SKIP_CONVERT: 1
|
|
||||||
# Enable Gotenberg end to end testing
|
# Enable Gotenberg end to end testing
|
||||||
GOTENBERG_LIVE: 1
|
GOTENBERG_LIVE: 1
|
||||||
steps:
|
steps:
|
||||||
-
|
-
|
||||||
name: Checkout
|
name: Checkout
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
-
|
-
|
||||||
name: Start containers
|
name: Start containers
|
||||||
run: |
|
run: |
|
||||||
@ -145,6 +141,10 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update -qq
|
sudo apt-get update -qq
|
||||||
sudo apt-get install -qq --no-install-recommends unpaper tesseract-ocr imagemagick ghostscript libzbar0 poppler-utils
|
sudo apt-get install -qq --no-install-recommends unpaper tesseract-ocr imagemagick ghostscript libzbar0 poppler-utils
|
||||||
|
-
|
||||||
|
name: Configure ImageMagick
|
||||||
|
run: |
|
||||||
|
sudo cp docker/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
|
||||||
-
|
-
|
||||||
name: Install Python dependencies
|
name: Install Python dependencies
|
||||||
run: |
|
run: |
|
||||||
@ -160,27 +160,14 @@ jobs:
|
|||||||
cd src/
|
cd src/
|
||||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} run pytest -ra
|
pipenv --python ${{ steps.setup-python.outputs.python-version }} run pytest -ra
|
||||||
-
|
-
|
||||||
name: Get changed files
|
name: Upload coverage to Codecov
|
||||||
id: changed-files-specific
|
if: matrix.python-version == ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||||
uses: tj-actions/changed-files@v35
|
uses: codecov/codecov-action@v3
|
||||||
with:
|
with:
|
||||||
files: |
|
# not required for public repos, but intermittently fails otherwise
|
||||||
src/**
|
token: ${{ secrets.CODECOV_TOKEN }}
|
||||||
-
|
# future expansion
|
||||||
name: List all changed files
|
flags: backend
|
||||||
run: |
|
|
||||||
for file in ${{ steps.changed-files-specific.outputs.all_changed_files }}; do
|
|
||||||
echo "${file} was changed"
|
|
||||||
done
|
|
||||||
-
|
|
||||||
name: Publish coverage results
|
|
||||||
if: matrix.python-version == ${{ env.DEFAULT_PYTHON_VERSION }} && steps.changed-files-specific.outputs.any_changed == 'true'
|
|
||||||
env:
|
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
# https://github.com/coveralls-clients/coveralls-python/issues/251
|
|
||||||
run: |
|
|
||||||
cd src/
|
|
||||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} run coveralls --service=github
|
|
||||||
-
|
-
|
||||||
name: Stop containers
|
name: Stop containers
|
||||||
if: always()
|
if: always()
|
||||||
@ -347,7 +334,7 @@ jobs:
|
|||||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||||
-
|
-
|
||||||
name: Build and push
|
name: Build and push
|
||||||
uses: docker/build-push-action@v3
|
uses: docker/build-push-action@v4
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
@ -442,21 +429,48 @@ jobs:
|
|||||||
-
|
-
|
||||||
name: Move files
|
name: Move files
|
||||||
run: |
|
run: |
|
||||||
mkdir dist
|
echo "Making dist folders"
|
||||||
mkdir dist/paperless-ngx
|
for directory in dist \
|
||||||
mkdir dist/paperless-ngx/scripts
|
dist/paperless-ngx \
|
||||||
cp .dockerignore .env Dockerfile Pipfile Pipfile.lock requirements.txt LICENSE README.md dist/paperless-ngx/
|
dist/paperless-ngx/scripts;
|
||||||
cp paperless.conf.example dist/paperless-ngx/paperless.conf
|
do
|
||||||
cp gunicorn.conf.py dist/paperless-ngx/gunicorn.conf.py
|
mkdir --verbose --parents ${directory}
|
||||||
cp -r docker/ dist/paperless-ngx/docker
|
done
|
||||||
cp scripts/*.service scripts/*.sh scripts/*.socket dist/paperless-ngx/scripts/
|
|
||||||
cp -r src/ dist/paperless-ngx/src
|
echo "Copying basic files"
|
||||||
cp -r docs/_build/html/ dist/paperless-ngx/docs
|
for file_name in .dockerignore \
|
||||||
mv static dist/paperless-ngx
|
.env \
|
||||||
|
Dockerfile \
|
||||||
|
Pipfile \
|
||||||
|
Pipfile.lock \
|
||||||
|
requirements.txt \
|
||||||
|
LICENSE \
|
||||||
|
README.md \
|
||||||
|
paperless.conf.example \
|
||||||
|
gunicorn.conf.py
|
||||||
|
do
|
||||||
|
cp --verbose ${file_name} dist/paperless-ngx/
|
||||||
|
done
|
||||||
|
mv --verbose dist/paperless-ngx/paperless.conf.example paperless.conf
|
||||||
|
|
||||||
|
echo "Copying Docker related files"
|
||||||
|
cp --recursive docker/ dist/paperless-ngx/docker
|
||||||
|
|
||||||
|
echo "Copying startup scripts"
|
||||||
|
cp --verbose scripts/*.service scripts/*.sh scripts/*.socket dist/paperless-ngx/scripts/
|
||||||
|
|
||||||
|
echo "Copying source files"
|
||||||
|
cp --recursive src/ dist/paperless-ngx/src
|
||||||
|
echo "Copying documentation"
|
||||||
|
cp --recursive docs/_build/html/ dist/paperless-ngx/docs
|
||||||
|
|
||||||
|
mv --verbose static dist/paperless-ngx
|
||||||
-
|
-
|
||||||
name: Make release package
|
name: Make release package
|
||||||
run: |
|
run: |
|
||||||
|
echo "Creating release archive"
|
||||||
cd dist
|
cd dist
|
||||||
|
sudo chown -R 1000:1000 paperless-ngx/
|
||||||
tar -cJf paperless-ngx.tar.xz paperless-ngx/
|
tar -cJf paperless-ngx.tar.xz paperless-ngx/
|
||||||
-
|
-
|
||||||
name: Upload release artifact
|
name: Upload release artifact
|
||||||
|
@ -45,7 +45,7 @@ jobs:
|
|||||||
uses: docker/setup-qemu-action@v2
|
uses: docker/setup-qemu-action@v2
|
||||||
-
|
-
|
||||||
name: Build ${{ fromJSON(inputs.build-json).name }}
|
name: Build ${{ fromJSON(inputs.build-json).name }}
|
||||||
uses: docker/build-push-action@v3
|
uses: docker/build-push-action@v4
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: ${{ inputs.dockerfile }}
|
file: ${{ inputs.dockerfile }}
|
||||||
|
17
Dockerfile
17
Dockerfile
@ -1,4 +1,5 @@
|
|||||||
# syntax=docker/dockerfile:1.4
|
# syntax=docker/dockerfile:1.4
|
||||||
|
# https://github.com/moby/buildkit/blob/master/frontend/dockerfile/docs/reference.md
|
||||||
|
|
||||||
FROM --platform=$BUILDPLATFORM node:16-bullseye-slim AS compile-frontend
|
FROM --platform=$BUILDPLATFORM node:16-bullseye-slim AS compile-frontend
|
||||||
|
|
||||||
@ -61,10 +62,6 @@ ARG PSYCOPG2_VERSION
|
|||||||
|
|
||||||
# Packages need for running
|
# Packages need for running
|
||||||
ARG RUNTIME_PACKAGES="\
|
ARG RUNTIME_PACKAGES="\
|
||||||
# Python
|
|
||||||
python3 \
|
|
||||||
python3-pip \
|
|
||||||
python3-setuptools \
|
|
||||||
# General utils
|
# General utils
|
||||||
curl \
|
curl \
|
||||||
# Docker specific
|
# Docker specific
|
||||||
@ -128,7 +125,7 @@ RUN set -eux \
|
|||||||
&& apt-get install --yes --quiet --no-install-recommends ${RUNTIME_PACKAGES} \
|
&& apt-get install --yes --quiet --no-install-recommends ${RUNTIME_PACKAGES} \
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
&& echo "Installing supervisor" \
|
&& echo "Installing supervisor" \
|
||||||
&& python3 -m pip install --default-timeout=1000 --upgrade --no-cache-dir supervisor==4.2.4
|
&& python3 -m pip install --default-timeout=1000 --upgrade --no-cache-dir supervisor==4.2.5
|
||||||
|
|
||||||
# Copy gunicorn config
|
# Copy gunicorn config
|
||||||
# Changes very infrequently
|
# Changes very infrequently
|
||||||
@ -137,7 +134,6 @@ WORKDIR /usr/src/paperless/
|
|||||||
COPY gunicorn.conf.py .
|
COPY gunicorn.conf.py .
|
||||||
|
|
||||||
# setup docker-specific things
|
# setup docker-specific things
|
||||||
# Use mounts to avoid copying installer files into the image
|
|
||||||
# These change sometimes, but rarely
|
# These change sometimes, but rarely
|
||||||
WORKDIR /usr/src/paperless/src/docker/
|
WORKDIR /usr/src/paperless/src/docker/
|
||||||
|
|
||||||
@ -179,7 +175,6 @@ RUN set -eux \
|
|||||||
&& ./install_management_commands.sh
|
&& ./install_management_commands.sh
|
||||||
|
|
||||||
# Install the built packages from the installer library images
|
# Install the built packages from the installer library images
|
||||||
# Use mounts to avoid copying installer files into the image
|
|
||||||
# These change sometimes
|
# These change sometimes
|
||||||
RUN set -eux \
|
RUN set -eux \
|
||||||
&& echo "Getting binaries" \
|
&& echo "Getting binaries" \
|
||||||
@ -203,7 +198,8 @@ RUN set -eux \
|
|||||||
&& python3 -m pip list \
|
&& python3 -m pip list \
|
||||||
&& echo "Cleaning up image layer" \
|
&& echo "Cleaning up image layer" \
|
||||||
&& cd ../ \
|
&& cd ../ \
|
||||||
&& rm -rf paperless-ngx
|
&& rm -rf paperless-ngx \
|
||||||
|
&& rm paperless-ngx.tar.gz
|
||||||
|
|
||||||
WORKDIR /usr/src/paperless/src/
|
WORKDIR /usr/src/paperless/src/
|
||||||
|
|
||||||
@ -247,11 +243,12 @@ COPY ./src ./
|
|||||||
COPY --from=compile-frontend /src/src/documents/static/frontend/ ./documents/static/frontend/
|
COPY --from=compile-frontend /src/src/documents/static/frontend/ ./documents/static/frontend/
|
||||||
|
|
||||||
# add users, setup scripts
|
# add users, setup scripts
|
||||||
|
# Mount the compiled frontend to expected location
|
||||||
RUN set -eux \
|
RUN set -eux \
|
||||||
&& addgroup --gid 1000 paperless \
|
&& addgroup --gid 1000 paperless \
|
||||||
&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \
|
&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \
|
||||||
&& chown -R paperless:paperless ../ \
|
&& chown -R paperless:paperless /usr/src/paperless \
|
||||||
&& gosu paperless python3 manage.py collectstatic --clear --no-input \
|
&& gosu paperless python3 manage.py collectstatic --clear --no-input --link \
|
||||||
&& gosu paperless python3 manage.py compilemessages
|
&& gosu paperless python3 manage.py compilemessages
|
||||||
|
|
||||||
VOLUME ["/usr/src/paperless/data", \
|
VOLUME ["/usr/src/paperless/data", \
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
[](https://github.com/paperless-ngx/paperless-ngx/actions)
|
[](https://github.com/paperless-ngx/paperless-ngx/actions)
|
||||||
[](https://crowdin.com/project/paperless-ngx)
|
[](https://crowdin.com/project/paperless-ngx)
|
||||||
[](https://docs.paperless-ngx.com)
|
[](https://docs.paperless-ngx.com)
|
||||||
[](https://coveralls.io/github/paperless-ngx/paperless-ngx?branch=master)
|
[](https://codecov.io/gh/paperless-ngx/paperless-ngx)
|
||||||
[](https://matrix.to/#/%23paperlessngx%3Amatrix.org)
|
[](https://matrix.to/#/%23paperlessngx%3Amatrix.org)
|
||||||
[](https://demo.paperless-ngx.com)
|
[](https://demo.paperless-ngx.com)
|
||||||
|
|
||||||
|
@ -80,7 +80,7 @@ django_checks() {
|
|||||||
|
|
||||||
search_index() {
|
search_index() {
|
||||||
|
|
||||||
local -r index_version=2
|
local -r index_version=3
|
||||||
local -r index_version_file=${DATA_DIR}/.index_version
|
local -r index_version_file=${DATA_DIR}/.index_version
|
||||||
|
|
||||||
if [[ (! -f "${index_version_file}") || $(<"${index_version_file}") != "$index_version" ]]; then
|
if [[ (! -f "${index_version_file}") || $(<"${index_version_file}") != "$index_version" ]]; then
|
||||||
|
@ -121,7 +121,17 @@ Executed after the consumer sees a new document in the consumption
|
|||||||
folder, but before any processing of the document is performed. This
|
folder, but before any processing of the document is performed. This
|
||||||
script can access the following relevant environment variables set:
|
script can access the following relevant environment variables set:
|
||||||
|
|
||||||
- `DOCUMENT_SOURCE_PATH`
|
| Environment Variable | Description |
|
||||||
|
| ----------------------- | ------------------------------------------------------------ |
|
||||||
|
| `DOCUMENT_SOURCE_PATH` | Original path of the consumed document |
|
||||||
|
| `DOCUMENT_WORKING_PATH` | Path to a copy of the original that consumption will work on |
|
||||||
|
|
||||||
|
!!! note
|
||||||
|
|
||||||
|
Pre-consume scripts which modify the document should only change
|
||||||
|
the `DOCUMENT_WORKING_PATH` file or a second consume task may
|
||||||
|
be triggered, leading to failures as two tasks work on the
|
||||||
|
same document path
|
||||||
|
|
||||||
A simple but common example for this would be creating a simple script
|
A simple but common example for this would be creating a simple script
|
||||||
like this:
|
like this:
|
||||||
@ -130,7 +140,7 @@ like this:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
pdf2pdfocr.py -i ${DOCUMENT_SOURCE_PATH}
|
pdf2pdfocr.py -i ${DOCUMENT_WORKING_PATH}
|
||||||
```
|
```
|
||||||
|
|
||||||
`/etc/paperless.conf`
|
`/etc/paperless.conf`
|
||||||
@ -157,27 +167,37 @@ Executed after the consumer has successfully processed a document and
|
|||||||
has moved it into paperless. It receives the following environment
|
has moved it into paperless. It receives the following environment
|
||||||
variables:
|
variables:
|
||||||
|
|
||||||
- `DOCUMENT_ID`
|
| Environment Variable | Description |
|
||||||
- `DOCUMENT_FILE_NAME`
|
| ---------------------------- | --------------------------------------------- |
|
||||||
- `DOCUMENT_CREATED`
|
| `DOCUMENT_ID` | Database primary key of the document |
|
||||||
- `DOCUMENT_MODIFIED`
|
| `DOCUMENT_FILE_NAME` | Formatted filename, not including paths |
|
||||||
- `DOCUMENT_ADDED`
|
| `DOCUMENT_CREATED` | Date & time when document created |
|
||||||
- `DOCUMENT_SOURCE_PATH`
|
| `DOCUMENT_MODIFIED` | Date & time when document was last modified |
|
||||||
- `DOCUMENT_ARCHIVE_PATH`
|
| `DOCUMENT_ADDED` | Date & time when document was added |
|
||||||
- `DOCUMENT_THUMBNAIL_PATH`
|
| `DOCUMENT_SOURCE_PATH` | Path to the original document file |
|
||||||
- `DOCUMENT_DOWNLOAD_URL`
|
| `DOCUMENT_ARCHIVE_PATH` | Path to the generate archive file (if any) |
|
||||||
- `DOCUMENT_THUMBNAIL_URL`
|
| `DOCUMENT_THUMBNAIL_PATH` | Path to the generated thumbnail |
|
||||||
- `DOCUMENT_CORRESPONDENT`
|
| `DOCUMENT_DOWNLOAD_URL` | URL for document download |
|
||||||
- `DOCUMENT_TAGS`
|
| `DOCUMENT_THUMBNAIL_URL` | URL for the document thumbnail |
|
||||||
- `DOCUMENT_ORIGINAL_FILENAME`
|
| `DOCUMENT_CORRESPONDENT` | Assigned correspondent (if any) |
|
||||||
|
| `DOCUMENT_TAGS` | Comma separated list of tags applied (if any) |
|
||||||
|
| `DOCUMENT_ORIGINAL_FILENAME` | Filename of original document |
|
||||||
|
|
||||||
The script can be in any language, but for a simple shell script
|
The script can be in any language, A simple shell script example:
|
||||||
example, you can take a look at
|
|
||||||
[post-consumption-example.sh](https://github.com/paperless-ngx/paperless-ngx/blob/main/scripts/post-consumption-example.sh)
|
```bash title="post-consumption-example"
|
||||||
in this project.
|
--8<-- "./scripts/post-consumption-example.sh"
|
||||||
|
```
|
||||||
|
|
||||||
|
!!! note
|
||||||
|
|
||||||
The post consumption script cannot cancel the consumption process.
|
The post consumption script cannot cancel the consumption process.
|
||||||
|
|
||||||
|
!!! warning
|
||||||
|
|
||||||
|
The post consumption script should not modify the document files
|
||||||
|
directly
|
||||||
|
|
||||||
The script's stdout and stderr will be logged line by line to the
|
The script's stdout and stderr will be logged line by line to the
|
||||||
webserver log, along with the exit code of the script.
|
webserver log, along with the exit code of the script.
|
||||||
|
|
||||||
|
@ -141,7 +141,8 @@ directory.
|
|||||||
files created using "collectstatic" manager command are stored.
|
files created using "collectstatic" manager command are stored.
|
||||||
|
|
||||||
Unless you're doing something fancy, there is no need to override
|
Unless you're doing something fancy, there is no need to override
|
||||||
this.
|
this. If this is changed, you may need to run
|
||||||
|
`collectstatic` again.
|
||||||
|
|
||||||
Defaults to "../static/", relative to the "src" directory.
|
Defaults to "../static/", relative to the "src" directory.
|
||||||
|
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
# Development
|
# Development
|
||||||
|
|
||||||
This section describes the steps you need to take to start development
|
This section describes the steps you need to take to start development
|
||||||
on paperless-ngx.
|
on Paperless-ngx.
|
||||||
|
|
||||||
Check out the source from github. The repository is organized in the
|
Check out the source from GitHub. The repository is organized in the
|
||||||
following way:
|
following way:
|
||||||
|
|
||||||
- `main` always represents the latest release and will only see
|
- `main` always represents the latest release and will only see
|
||||||
@ -12,7 +12,7 @@ following way:
|
|||||||
- `feature-X` contain bigger changes that will be in some release, but
|
- `feature-X` contain bigger changes that will be in some release, but
|
||||||
not necessarily the next one.
|
not necessarily the next one.
|
||||||
|
|
||||||
When making functional changes to paperless, _always_ make your changes
|
When making functional changes to Paperless-ngx, _always_ make your changes
|
||||||
on the `dev` branch.
|
on the `dev` branch.
|
||||||
|
|
||||||
Apart from that, the folder structure is as follows:
|
Apart from that, the folder structure is as follows:
|
||||||
@ -24,9 +24,9 @@ Apart from that, the folder structure is as follows:
|
|||||||
development.
|
development.
|
||||||
- `docker/` - Files required to build the docker image.
|
- `docker/` - Files required to build the docker image.
|
||||||
|
|
||||||
## Contributing to Paperless
|
## Contributing to Paperless-ngx
|
||||||
|
|
||||||
Maybe you've been using Paperless for a while and want to add a feature
|
Maybe you've been using Paperless-ngx for a while and want to add a feature
|
||||||
or two, or maybe you've come across a bug that you have some ideas how
|
or two, or maybe you've come across a bug that you have some ideas how
|
||||||
to solve. The beauty of open source software is that you can see what's
|
to solve. The beauty of open source software is that you can see what's
|
||||||
wrong and help to get it fixed for everyone!
|
wrong and help to get it fixed for everyone!
|
||||||
@ -36,13 +36,13 @@ conduct](https://github.com/paperless-ngx/paperless-ngx/blob/main/CODE_OF_CONDUC
|
|||||||
and other important information in the [contributing
|
and other important information in the [contributing
|
||||||
guidelines](https://github.com/paperless-ngx/paperless-ngx/blob/main/CONTRIBUTING.md).
|
guidelines](https://github.com/paperless-ngx/paperless-ngx/blob/main/CONTRIBUTING.md).
|
||||||
|
|
||||||
## Code formatting with pre-commit Hooks
|
## Code formatting with pre-commit hooks
|
||||||
|
|
||||||
To ensure a consistent style and formatting across the project source,
|
To ensure a consistent style and formatting across the project source,
|
||||||
the project utilizes a Git [`pre-commit`](https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks)
|
the project utilizes Git [`pre-commit`](https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks)
|
||||||
hook to perform some formatting and linting before a commit is allowed.
|
hooks to perform some formatting and linting before a commit is allowed.
|
||||||
That way, everyone uses the same style and some common issues can be caught
|
That way, everyone uses the same style and some common issues can be caught
|
||||||
early on. See below for installation instructions.
|
early on.
|
||||||
|
|
||||||
Once installed, hooks will run when you commit. If the formatting isn't
|
Once installed, hooks will run when you commit. If the formatting isn't
|
||||||
quite right or a linter catches something, the commit will be rejected.
|
quite right or a linter catches something, the commit will be rejected.
|
||||||
@ -51,129 +51,110 @@ as the Python formatting tool `black`, will format failing
|
|||||||
files, so all you need to do is `git add` those files again
|
files, so all you need to do is `git add` those files again
|
||||||
and retry your commit.
|
and retry your commit.
|
||||||
|
|
||||||
## Initial setup and first start
|
## General setup
|
||||||
|
|
||||||
After you forked and cloned the code from github you need to perform a
|
After you forked and cloned the code from GitHub you need to perform a
|
||||||
first-time setup. To do the setup you need to perform the steps from the
|
first-time setup.
|
||||||
following chapters in a certain order:
|
|
||||||
|
!!! note
|
||||||
|
|
||||||
|
Every command is executed directly from the root folder of the project unless specified otherwise.
|
||||||
|
|
||||||
1. Install prerequisites + pipenv as mentioned in
|
1. Install prerequisites + pipenv as mentioned in
|
||||||
[Bare metal route](/setup#bare_metal)
|
[Bare metal route](/setup#bare_metal).
|
||||||
|
|
||||||
2. Copy `paperless.conf.example` to `paperless.conf` and enable debug
|
2. Copy `paperless.conf.example` to `paperless.conf` and enable debug
|
||||||
mode.
|
mode within the file via `PAPERLESS_DEBUG=true`.
|
||||||
|
|
||||||
3. Install the Angular CLI interface:
|
3. Create `consume` and `media` directories:
|
||||||
|
|
||||||
```shell-session
|
```bash
|
||||||
$ npm install -g @angular/cli
|
$ mkdir -p consume media
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Install pre-commit hooks
|
4. Install the Python dependencies:
|
||||||
|
|
||||||
```shell-session
|
```bash
|
||||||
pre-commit install
|
$ pipenv install --dev
|
||||||
```
|
```
|
||||||
|
|
||||||
5. Create `consume` and `media` folders in the cloned root folder.
|
!!! note
|
||||||
|
|
||||||
```shell-session
|
Using a virtual environment is highly recommended. You can spawn one via `pipenv shell`.
|
||||||
mkdir -p consume media
|
Make sure you're using Python 3.10.x or lower. Otherwise you might
|
||||||
|
get issues with building dependencies. You can use
|
||||||
|
[pyenv](https://github.com/pyenv/pyenv) to install a specific
|
||||||
|
Python version.
|
||||||
|
|
||||||
|
5. Install pre-commit hooks:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ pre-commit install
|
||||||
```
|
```
|
||||||
|
|
||||||
6. You can now either ...
|
6. Apply migrations and create a superuser for your development instance:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# src/
|
||||||
|
|
||||||
|
$ python3 manage.py migrate
|
||||||
|
$ python3 manage.py createsuperuser
|
||||||
|
```
|
||||||
|
|
||||||
|
7. You can now either ...
|
||||||
|
|
||||||
- install redis or
|
- install redis or
|
||||||
|
|
||||||
- use the included scripts/start-services.sh to use docker to fire
|
- use the included `scripts/start_services.sh` to use docker to fire
|
||||||
up a redis instance (and some other services such as tika,
|
up a redis instance (and some other services such as tika,
|
||||||
gotenberg and a database server) or
|
gotenberg and a database server) or
|
||||||
|
|
||||||
- spin up a bare redis container
|
- spin up a bare redis container
|
||||||
|
|
||||||
```shell-session
|
```
|
||||||
docker run -d -p 6379:6379 --restart unless-stopped redis:latest
|
$ docker run -d -p 6379:6379 --restart unless-stopped redis:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
7. Install the python dependencies by performing in the src/ directory.
|
8. Continue with either back-end or front-end development – or both :-).
|
||||||
|
|
||||||
```shell-session
|
|
||||||
pipenv install --dev
|
|
||||||
```
|
|
||||||
|
|
||||||
!!! note
|
|
||||||
|
|
||||||
Make sure you're using python 3.10.x or lower. Otherwise you might
|
|
||||||
get issues with building dependencies. You can use
|
|
||||||
[pyenv](https://github.com/pyenv/pyenv) to install a specific
|
|
||||||
python version.
|
|
||||||
|
|
||||||
8. Generate the static UI so you can perform a login to get session
|
|
||||||
that is required for frontend development (this needs to be done one
|
|
||||||
time only). From src-ui directory:
|
|
||||||
|
|
||||||
```shell-session
|
|
||||||
npm install .
|
|
||||||
./node_modules/.bin/ng build --configuration production
|
|
||||||
```
|
|
||||||
|
|
||||||
9. Apply migrations and create a superuser for your dev instance:
|
|
||||||
|
|
||||||
```shell-session
|
|
||||||
python3 manage.py migrate
|
|
||||||
python3 manage.py createsuperuser
|
|
||||||
```
|
|
||||||
|
|
||||||
10. Now spin up the dev backend. Depending on which part of paperless
|
|
||||||
you're developing for, you need to have some or all of them
|
|
||||||
running.
|
|
||||||
|
|
||||||
```shell-session
|
|
||||||
python3 manage.py runserver & python3 manage.py document_consumer & celery --app paperless worker
|
|
||||||
```
|
|
||||||
|
|
||||||
11. Login with the superuser credentials provided in step 8 at
|
|
||||||
`http://localhost:8000` to create a session that enables you to use
|
|
||||||
the backend.
|
|
||||||
|
|
||||||
Backend development environment is now ready, to start Frontend
|
|
||||||
development go to `/src-ui` and run `ng serve`. From there you can use
|
|
||||||
`http://localhost:4200` for a preview.
|
|
||||||
|
|
||||||
## Back end development
|
## Back end development
|
||||||
|
|
||||||
The backend is a [Django](https://www.djangoproject.com/) application. PyCharm works well for development,
|
The back end is a [Django](https://www.djangoproject.com/) application. [PyCharm](https://www.jetbrains.com/de-de/pycharm/) as well as [Visual Studio Code](https://code.visualstudio.com) work well for development, but you can use whatever you want.
|
||||||
but you can use whatever you want.
|
|
||||||
|
|
||||||
Configure the IDE to use the src/ folder as the base source folder.
|
Configure the IDE to use the `src/`-folder as the base source folder.
|
||||||
Configure the following launch configurations in your IDE:
|
Configure the following launch configurations in your IDE:
|
||||||
|
|
||||||
- `python3 manage.py runserver`
|
- `python3 manage.py runserver`
|
||||||
- `celery --app paperless worker`
|
|
||||||
- `python3 manage.py document_consumer`
|
- `python3 manage.py document_consumer`
|
||||||
|
- `celery --app paperless worker -l DEBUG` (or any other log level)
|
||||||
|
|
||||||
To start them all:
|
To start them all:
|
||||||
|
|
||||||
```shell-session
|
```bash
|
||||||
python3 manage.py runserver & python3 manage.py document_consumer & celery --app paperless worker
|
# src/
|
||||||
|
|
||||||
|
$ python3 manage.py runserver & \
|
||||||
|
python3 manage.py document_consumer & \
|
||||||
|
celery --app paperless worker -l DEBUG
|
||||||
```
|
```
|
||||||
|
|
||||||
Testing and code style:
|
You might need the front end to test your back end code. This assumes that you have AngularJS installed on your system. Go to the [Front end development](#front-end-development) section for further details. To build the front end once use this commmand:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# src-ui/
|
||||||
|
|
||||||
|
$ npm install
|
||||||
|
$ ng build --configuration production
|
||||||
|
```
|
||||||
|
|
||||||
|
### Testing
|
||||||
|
|
||||||
- Run `pytest` in the `src/` directory to execute all tests. This also
|
- Run `pytest` in the `src/` directory to execute all tests. This also
|
||||||
generates a HTML coverage report. When runnings test, paperless.conf
|
generates a HTML coverage report. When runnings test, `paperless.conf`
|
||||||
is loaded as well. However: the tests rely on the default
|
is loaded as well. However, the tests rely on the default
|
||||||
configuration. This is not ideal. But for now, make sure no settings
|
configuration. This is not ideal. But for now, make sure no settings
|
||||||
except for DEBUG are overridden when testing.
|
except for DEBUG are overridden when testing.
|
||||||
|
|
||||||
- Coding style is enforced by the Git pre-commit hooks. These will
|
|
||||||
ensure your code is formatted and do some linting when you do a `git commit`.
|
|
||||||
|
|
||||||
- You can also run `black` manually to format your code
|
|
||||||
|
|
||||||
- The `pre-commit` hooks will modify files and interact with each other.
|
|
||||||
It may take a couple of `git add`, `git commit` cycle to satisfy them.
|
|
||||||
|
|
||||||
!!! note
|
!!! note
|
||||||
|
|
||||||
The line length rule E501 is generally useful for getting multiple
|
The line length rule E501 is generally useful for getting multiple
|
||||||
@ -184,23 +165,31 @@ Testing and code style:
|
|||||||
|
|
||||||
## Front end development
|
## Front end development
|
||||||
|
|
||||||
The front end is built using Angular. In order to get started, you need
|
The front end is built using AngularJS. In order to get started, you need Node.js (version 14.15+) and
|
||||||
`npm`. Install the Angular CLI interface with
|
`npm`.
|
||||||
|
|
||||||
```shell-session
|
!!! note
|
||||||
|
|
||||||
|
The following commands are all performed in the `src-ui`-directory. You will need a running back end (including an active session) to connect to the back end API. To spin it up refer to the commands under the section [above](#back-end-development).
|
||||||
|
|
||||||
|
1. Install the Angular CLI. You might need sudo privileges
|
||||||
|
to perform this command:
|
||||||
|
|
||||||
|
```bash
|
||||||
$ npm install -g @angular/cli
|
$ npm install -g @angular/cli
|
||||||
```
|
```
|
||||||
|
|
||||||
and make sure that it's on your path. Next, in the src-ui/ directory,
|
2. Make sure that it's on your path.
|
||||||
install the required dependencies of the project.
|
|
||||||
|
|
||||||
```shell-session
|
3. Install all neccessary modules:
|
||||||
|
|
||||||
|
```bash
|
||||||
$ npm install
|
$ npm install
|
||||||
```
|
```
|
||||||
|
|
||||||
You can launch a development server by running
|
4. You can launch a development server by running:
|
||||||
|
|
||||||
```shell-session
|
```bash
|
||||||
$ ng serve
|
$ ng serve
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -208,24 +197,17 @@ This will automatically update whenever you save. However, in-place
|
|||||||
compilation might fail on syntax errors, in which case you need to
|
compilation might fail on syntax errors, in which case you need to
|
||||||
restart it.
|
restart it.
|
||||||
|
|
||||||
By default, the development server is available on
|
By default, the development server is available on `http://localhost:4200/` and is configured to access the API at
|
||||||
`http://localhost:4200/` and is configured to access the API at
|
`http://localhost:8000/api/`, which is the default of the backend. If you enabled `DEBUG` on the back end, several security overrides for allowed hosts, CORS and X-Frame-Options are in place so that the front end behaves exactly as in production.
|
||||||
`http://localhost:8000/api/`, which is the default of the backend. If
|
|
||||||
you enabled DEBUG on the back end, several security overrides for
|
|
||||||
allowed hosts, CORS and X-Frame-Options are in place so that the front
|
|
||||||
end behaves exactly as in production. This also relies on you being
|
|
||||||
logged into the back end. Without a valid session, The front end will
|
|
||||||
simply not work.
|
|
||||||
|
|
||||||
Testing and code style:
|
### Testing and code style
|
||||||
|
|
||||||
- The front end code (.ts, .html, .scss) use `prettier` for code
|
- The front end code (.ts, .html, .scss) use `prettier` for code
|
||||||
formatting via the Git `pre-commit` hooks which run automatically on
|
formatting via the Git `pre-commit` hooks which run automatically on
|
||||||
commit. See
|
commit. See [above](#code-formatting-with-pre-commit-hooks) for installation instructions. You can also run this via the CLI with a
|
||||||
[above](#code-formatting-with-pre-commit-hooks) for installation. You can also run this via cli with a
|
|
||||||
command such as
|
command such as
|
||||||
|
|
||||||
```shell-session
|
```bash
|
||||||
$ git ls-files -- '*.ts' | xargs pre-commit run prettier --files
|
$ git ls-files -- '*.ts' | xargs pre-commit run prettier --files
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -233,21 +215,20 @@ Testing and code style:
|
|||||||
for significantly more front end tests. Unit tests and e2e tests,
|
for significantly more front end tests. Unit tests and e2e tests,
|
||||||
respectively, can be run non-interactively with:
|
respectively, can be run non-interactively with:
|
||||||
|
|
||||||
```shell-session
|
```bash
|
||||||
$ ng test
|
$ ng test
|
||||||
$ npm run e2e:ci
|
$ npm run e2e:ci
|
||||||
```
|
```
|
||||||
|
|
||||||
Cypress also includes a UI which can be run from within the `src-ui`
|
- Cypress also includes a UI which can be run with:
|
||||||
directory with
|
|
||||||
|
|
||||||
```shell-session
|
```bash
|
||||||
$ ./node_modules/.bin/cypress open
|
$ ./node_modules/.bin/cypress open
|
||||||
```
|
```
|
||||||
|
|
||||||
In order to build the front end and serve it as part of django, execute
|
- In order to build the front end and serve it as part of Django, execute:
|
||||||
|
|
||||||
```shell-session
|
```bash
|
||||||
$ ng build --configuration production
|
$ ng build --configuration production
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -257,25 +238,25 @@ that authentication is working.
|
|||||||
|
|
||||||
## Localization
|
## Localization
|
||||||
|
|
||||||
Paperless is available in many different languages. Since paperless
|
Paperless-ngx is available in many different languages. Since Paperless-ngx
|
||||||
consists both of a django application and an Angular front end, both
|
consists both of a Django application and an AngularJS front end, both
|
||||||
these parts have to be translated separately.
|
these parts have to be translated separately.
|
||||||
|
|
||||||
### Front end localization
|
### Front end localization
|
||||||
|
|
||||||
- The Angular front end does localization according to the [Angular
|
- The AngularJS front end does localization according to the [Angular
|
||||||
documentation](https://angular.io/guide/i18n).
|
documentation](https://angular.io/guide/i18n).
|
||||||
- The source language of the project is "en_US".
|
- The source language of the project is "en_US".
|
||||||
- The source strings end up in the file "src-ui/messages.xlf".
|
- The source strings end up in the file `src-ui/messages.xlf`.
|
||||||
- The translated strings need to be placed in the
|
- The translated strings need to be placed in the
|
||||||
"src-ui/src/locale/" folder.
|
`src-ui/src/locale/` folder.
|
||||||
- In order to extract added or changed strings from the source files,
|
- In order to extract added or changed strings from the source files,
|
||||||
call `ng xi18n --ivy`.
|
call `ng xi18n --ivy`.
|
||||||
|
|
||||||
Adding new languages requires adding the translated files in the
|
Adding new languages requires adding the translated files in the
|
||||||
"src-ui/src/locale/" folder and adjusting a couple files.
|
`src-ui/src/locale/` folder and adjusting a couple files.
|
||||||
|
|
||||||
1. Adjust "src-ui/angular.json":
|
1. Adjust `src-ui/angular.json`:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"i18n": {
|
"i18n": {
|
||||||
@ -292,7 +273,7 @@ Adding new languages requires adding the translated files in the
|
|||||||
```
|
```
|
||||||
|
|
||||||
2. Add the language to the available options in
|
2. Add the language to the available options in
|
||||||
"src-ui/src/app/services/settings.service.ts":
|
`src-ui/src/app/services/settings.service.ts`:
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
getLanguageOptions(): LanguageOption[] {
|
getLanguageOptions(): LanguageOption[] {
|
||||||
@ -313,7 +294,7 @@ Adding new languages requires adding the translated files in the
|
|||||||
and "yyyy".
|
and "yyyy".
|
||||||
|
|
||||||
3. Import and register the Angular data for this locale in
|
3. Import and register the Angular data for this locale in
|
||||||
"src-ui/src/app/app.module.ts":
|
`src-ui/src/app/app.module.ts`:
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
import localeDe from '@angular/common/locales/de'
|
import localeDe from '@angular/common/locales/de'
|
||||||
@ -326,10 +307,10 @@ A majority of the strings that appear in the back end appear only when
|
|||||||
the admin is used. However, some of these are still shown on the front
|
the admin is used. However, some of these are still shown on the front
|
||||||
end (such as error messages).
|
end (such as error messages).
|
||||||
|
|
||||||
- The django application does localization according to the [django
|
- The django application does localization according to the [Django
|
||||||
documentation](https://docs.djangoproject.com/en/3.1/topics/i18n/translation/).
|
documentation](https://docs.djangoproject.com/en/3.1/topics/i18n/translation/).
|
||||||
- The source language of the project is "en_US".
|
- The source language of the project is "en_US".
|
||||||
- Localization files end up in the folder "src/locale/".
|
- Localization files end up in the folder `src/locale/`.
|
||||||
- In order to extract strings from the application, call
|
- In order to extract strings from the application, call
|
||||||
`python3 manage.py makemessages -l en_US`. This is important after
|
`python3 manage.py makemessages -l en_US`. This is important after
|
||||||
making changes to translatable strings.
|
making changes to translatable strings.
|
||||||
@ -340,8 +321,8 @@ end (such as error messages).
|
|||||||
command.
|
command.
|
||||||
|
|
||||||
Adding new languages requires adding the translated files in the
|
Adding new languages requires adding the translated files in the
|
||||||
"src/locale/" folder and adjusting the file
|
`src/locale/`-folder and adjusting the file
|
||||||
"src/paperless/settings.py" to include the new language:
|
`src/paperless/settings.py` to include the new language:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
LANGUAGES = [
|
LANGUAGES = [
|
||||||
@ -360,18 +341,27 @@ LANGUAGES = [
|
|||||||
The documentation is built using material-mkdocs, see their [documentation](https://squidfunk.github.io/mkdocs-material/reference/).
|
The documentation is built using material-mkdocs, see their [documentation](https://squidfunk.github.io/mkdocs-material/reference/).
|
||||||
If you want to build the documentation locally, this is how you do it:
|
If you want to build the documentation locally, this is how you do it:
|
||||||
|
|
||||||
1. Install python dependencies.
|
1. Have an active pipenv shell (`pipenv shell`) and install Python dependencies:
|
||||||
|
|
||||||
```shell-session
|
```bash
|
||||||
$ cd /path/to/paperless
|
|
||||||
$ pipenv install --dev
|
$ pipenv install --dev
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Build the documentation
|
2. Build the documentation
|
||||||
|
|
||||||
```shell-session
|
```bash
|
||||||
$ cd /path/to/paperless
|
$ mkdocs build --config-file mkdocs.yml
|
||||||
$ pipenv mkdocs build --config-file mkdocs.yml
|
```
|
||||||
|
|
||||||
|
_alternatively..._
|
||||||
|
|
||||||
|
3. Serve the documentation. This will spin up a
|
||||||
|
copy of the documentation at http://127.0.0.1:8000
|
||||||
|
that will automatically refresh everytime you change
|
||||||
|
something.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ mkdocs serve
|
||||||
```
|
```
|
||||||
|
|
||||||
## Building the Docker image
|
## Building the Docker image
|
||||||
@ -384,35 +374,35 @@ helper script `build-docker-image.sh`.
|
|||||||
|
|
||||||
Building the docker image from source:
|
Building the docker image from source:
|
||||||
|
|
||||||
```shell-session
|
```bash
|
||||||
./build-docker-image.sh Dockerfile -t <your-tag>
|
./build-docker-image.sh Dockerfile -t <your-tag>
|
||||||
```
|
```
|
||||||
|
|
||||||
## Extending Paperless
|
## Extending Paperless-ngx
|
||||||
|
|
||||||
Paperless does not have any fancy plugin systems and will probably never
|
Paperless-ngx does not have any fancy plugin systems and will probably never
|
||||||
have. However, some parts of the application have been designed to allow
|
have. However, some parts of the application have been designed to allow
|
||||||
easy integration of additional features without any modification to the
|
easy integration of additional features without any modification to the
|
||||||
base code.
|
base code.
|
||||||
|
|
||||||
### Making custom parsers
|
### Making custom parsers
|
||||||
|
|
||||||
Paperless uses parsers to add documents to paperless. A parser is
|
Paperless-ngx uses parsers to add documents. A parser is
|
||||||
responsible for:
|
responsible for:
|
||||||
|
|
||||||
- Retrieve the content from the original
|
- Retrieving the content from the original
|
||||||
- Create a thumbnail
|
- Creating a thumbnail
|
||||||
- Optional: Retrieve a created date from the original
|
- _optional:_ Retrieving a created date from the original
|
||||||
- Optional: Create an archived document from the original
|
- _optional:_ Creainge an archived document from the original
|
||||||
|
|
||||||
Custom parsers can be added to paperless to support more file types. In
|
Custom parsers can be added to Paperless-ngx to support more file types. In
|
||||||
order to do that, you need to write the parser itself and announce its
|
order to do that, you need to write the parser itself and announce its
|
||||||
existence to paperless.
|
existence to Paperless-ngx.
|
||||||
|
|
||||||
The parser itself must extend `documents.parsers.DocumentParser` and
|
The parser itself must extend `documents.parsers.DocumentParser` and
|
||||||
must implement the methods `parse` and `get_thumbnail`. You can provide
|
must implement the methods `parse` and `get_thumbnail`. You can provide
|
||||||
your own implementation to `get_date` if you don't want to rely on
|
your own implementation to `get_date` if you don't want to rely on
|
||||||
paperless' default date guessing mechanisms.
|
Paperless-ngx' default date guessing mechanisms.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
class MyCustomParser(DocumentParser):
|
class MyCustomParser(DocumentParser):
|
||||||
@ -444,7 +434,7 @@ to be empty and removed after consumption finished. You can use that
|
|||||||
directory to store any intermediate files and also use it to store the
|
directory to store any intermediate files and also use it to store the
|
||||||
thumbnail / archived document.
|
thumbnail / archived document.
|
||||||
|
|
||||||
After that, you need to announce your parser to paperless. You need to
|
After that, you need to announce your parser to Paperless-ngx. You need to
|
||||||
connect a handler to the `document_consumer_declaration` signal. Have a
|
connect a handler to the `document_consumer_declaration` signal. Have a
|
||||||
look in the file `src/paperless_tesseract/apps.py` on how that's done.
|
look in the file `src/paperless_tesseract/apps.py` on how that's done.
|
||||||
The handler is a method that returns information about your parser:
|
The handler is a method that returns information about your parser:
|
||||||
@ -464,11 +454,11 @@ def myparser_consumer_declaration(sender, **kwargs):
|
|||||||
- `parser` is a reference to a class that extends `DocumentParser`.
|
- `parser` is a reference to a class that extends `DocumentParser`.
|
||||||
- `weight` is used whenever two or more parsers are able to parse a
|
- `weight` is used whenever two or more parsers are able to parse a
|
||||||
file: The parser with the higher weight wins. This can be used to
|
file: The parser with the higher weight wins. This can be used to
|
||||||
override the parsers provided by paperless.
|
override the parsers provided by Paperless-ngx.
|
||||||
- `mime_types` is a dictionary. The keys are the mime types your
|
- `mime_types` is a dictionary. The keys are the mime types your
|
||||||
parser supports and the value is the default file extension that
|
parser supports and the value is the default file extension that
|
||||||
paperless should use when storing files and serving them for
|
Paperless-ngx should use when storing files and serving them for
|
||||||
download. We could guess that from the file extensions, but some
|
download. We could guess that from the file extensions, but some
|
||||||
mime types have many extensions associated with them and the python
|
mime types have many extensions associated with them and the Python
|
||||||
methods responsible for guessing the extension do not always return
|
methods responsible for guessing the extension do not always return
|
||||||
the same value.
|
the same value.
|
||||||
|
@ -388,12 +388,7 @@ supported.
|
|||||||
```
|
```
|
||||||
|
|
||||||
8. Install python requirements from the `requirements.txt` file. It is
|
8. Install python requirements from the `requirements.txt` file. It is
|
||||||
up to you if you wish to use a virtual environment or not. First you
|
up to you if you wish to use a virtual environment or not. First you should update your pip, so it gets the actual packages.
|
||||||
should update your pip, so it gets the actual packages.
|
|
||||||
|
|
||||||
```shell-session
|
|
||||||
sudo -Hu paperless pip3 install --upgrade pip
|
|
||||||
```
|
|
||||||
|
|
||||||
```shell-session
|
```shell-session
|
||||||
sudo -Hu paperless pip3 install -r requirements.txt
|
sudo -Hu paperless pip3 install -r requirements.txt
|
||||||
|
@ -41,6 +41,7 @@ markdown_extensions:
|
|||||||
anchor_linenums: true
|
anchor_linenums: true
|
||||||
- pymdownx.superfences
|
- pymdownx.superfences
|
||||||
- pymdownx.inlinehilite
|
- pymdownx.inlinehilite
|
||||||
|
- pymdownx.snippets
|
||||||
strict: true
|
strict: true
|
||||||
nav:
|
nav:
|
||||||
- index.md
|
- index.md
|
||||||
@ -54,7 +55,7 @@ nav:
|
|||||||
- 'FAQs': faq.md
|
- 'FAQs': faq.md
|
||||||
- troubleshooting.md
|
- troubleshooting.md
|
||||||
- changelog.md
|
- changelog.md
|
||||||
copyright: Copyright © 2016 - 2022 Daniel Quinn, Jonas Winkler, and the Paperless-ngx team
|
copyright: Copyright © 2016 - 2023 Daniel Quinn, Jonas Winkler, and the Paperless-ngx team
|
||||||
extra:
|
extra:
|
||||||
social:
|
social:
|
||||||
- icon: fontawesome/brands/github
|
- icon: fontawesome/brands/github
|
||||||
|
@ -192,7 +192,8 @@
|
|||||||
"cli": {
|
"cli": {
|
||||||
"schematicCollections": [
|
"schematicCollections": [
|
||||||
"@angular-eslint/schematics"
|
"@angular-eslint/schematics"
|
||||||
]
|
],
|
||||||
|
"analytics": false
|
||||||
},
|
},
|
||||||
"schematics": {
|
"schematics": {
|
||||||
"@angular-eslint/schematics:application": {
|
"@angular-eslint/schematics:application": {
|
||||||
|
14779
src-ui/package-lock.json
generated
14779
src-ui/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -13,14 +13,14 @@
|
|||||||
},
|
},
|
||||||
"private": true,
|
"private": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@angular/common": "~15.1.0",
|
"@angular/common": "~15.1.2",
|
||||||
"@angular/compiler": "~15.1.0",
|
"@angular/compiler": "~15.1.2",
|
||||||
"@angular/core": "~15.1.0",
|
"@angular/core": "~15.1.2",
|
||||||
"@angular/forms": "~15.1.0",
|
"@angular/forms": "~15.1.2",
|
||||||
"@angular/localize": "~15.1.0",
|
"@angular/localize": "~15.1.2",
|
||||||
"@angular/platform-browser": "~15.1.0",
|
"@angular/platform-browser": "~15.1.2",
|
||||||
"@angular/platform-browser-dynamic": "~15.1.0",
|
"@angular/platform-browser-dynamic": "~15.1.2",
|
||||||
"@angular/router": "~15.1.0",
|
"@angular/router": "~15.1.2",
|
||||||
"@ng-bootstrap/ng-bootstrap": "^14.0.1",
|
"@ng-bootstrap/ng-bootstrap": "^14.0.1",
|
||||||
"@ng-select/ng-select": "^10.0.1",
|
"@ng-select/ng-select": "^10.0.1",
|
||||||
"@ngneat/dirty-check-forms": "^3.0.3",
|
"@ngneat/dirty-check-forms": "^3.0.3",
|
||||||
@ -39,18 +39,18 @@
|
|||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@angular-builders/jest": "15.0.0",
|
"@angular-builders/jest": "15.0.0",
|
||||||
"@angular-devkit/build-angular": "~15.1.0",
|
"@angular-devkit/build-angular": "~15.1.4",
|
||||||
"@angular-eslint/builder": "15.1.0",
|
"@angular-eslint/builder": "15.2.0",
|
||||||
"@angular-eslint/eslint-plugin": "15.1.0",
|
"@angular-eslint/eslint-plugin": "15.2.0",
|
||||||
"@angular-eslint/eslint-plugin-template": "15.1.0",
|
"@angular-eslint/eslint-plugin-template": "15.2.0",
|
||||||
"@angular-eslint/schematics": "15.1.0",
|
"@angular-eslint/schematics": "15.2.0",
|
||||||
"@angular-eslint/template-parser": "15.1.0",
|
"@angular-eslint/template-parser": "15.2.0",
|
||||||
"@angular/cli": "~15.1.0",
|
"@angular/cli": "~15.1.4",
|
||||||
"@angular/compiler-cli": "~15.1.0",
|
"@angular/compiler-cli": "~15.1.2",
|
||||||
"@types/jest": "28.1.6",
|
"@types/jest": "28.1.6",
|
||||||
"@types/node": "^18.7.23",
|
"@types/node": "^18.7.23",
|
||||||
"@typescript-eslint/eslint-plugin": "^5.43.0",
|
"@typescript-eslint/eslint-plugin": "^5.43.0",
|
||||||
"@typescript-eslint/parser": "^5.43.0",
|
"@typescript-eslint/parser": "^5.50.0",
|
||||||
"concurrently": "7.4.0",
|
"concurrently": "7.4.0",
|
||||||
"eslint": "^8.31.0",
|
"eslint": "^8.31.0",
|
||||||
"jest": "28.1.3",
|
"jest": "28.1.3",
|
||||||
|
@ -229,6 +229,10 @@ export class DocumentDetailComponent
|
|||||||
)
|
)
|
||||||
.subscribe({
|
.subscribe({
|
||||||
next: (titleValue) => {
|
next: (titleValue) => {
|
||||||
|
// In the rare case when the field changed just after debounced event was fired.
|
||||||
|
// We dont want to overwrite whats actually in the text field, so just return
|
||||||
|
if (titleValue !== this.titleInput.value) return
|
||||||
|
|
||||||
this.title = titleValue
|
this.title = titleValue
|
||||||
this.documentForm.patchValue({ title: titleValue })
|
this.documentForm.patchValue({ title: titleValue })
|
||||||
},
|
},
|
||||||
|
@ -5,7 +5,7 @@ export const environment = {
|
|||||||
apiBaseUrl: document.baseURI + 'api/',
|
apiBaseUrl: document.baseURI + 'api/',
|
||||||
apiVersion: '2',
|
apiVersion: '2',
|
||||||
appTitle: 'Paperless-ngx',
|
appTitle: 'Paperless-ngx',
|
||||||
version: '1.12.1-dev',
|
version: '1.12.2-dev',
|
||||||
webSocketHost: window.location.host,
|
webSocketHost: window.location.host,
|
||||||
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
|
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
|
||||||
webSocketBaseUrl: base_url.pathname + 'ws/',
|
webSocketBaseUrl: base_url.pathname + 'ws/',
|
||||||
|
@ -4,18 +4,17 @@ import shutil
|
|||||||
import tempfile
|
import tempfile
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from math import ceil
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Dict
|
||||||
from typing import List
|
from typing import List
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import magic
|
import magic
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from pdf2image import convert_from_path
|
from pdf2image import convert_from_path
|
||||||
|
from pdf2image.exceptions import PDFPageCountError
|
||||||
from pikepdf import Page
|
from pikepdf import Page
|
||||||
from pikepdf import PasswordError
|
|
||||||
from pikepdf import Pdf
|
from pikepdf import Pdf
|
||||||
from pikepdf import PdfImage
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from PIL import ImageSequence
|
from PIL import ImageSequence
|
||||||
from pyzbar import pyzbar
|
from pyzbar import pyzbar
|
||||||
@ -154,52 +153,15 @@ def scan_file_for_barcodes(
|
|||||||
(page_number, barcode_text) tuples
|
(page_number, barcode_text) tuples
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _pikepdf_barcode_scan(pdf_filepath: str) -> List[Barcode]:
|
|
||||||
detected_barcodes = []
|
|
||||||
with Pdf.open(pdf_filepath) as pdf:
|
|
||||||
for page_num, page in enumerate(pdf.pages):
|
|
||||||
for image_key in page.images:
|
|
||||||
pdfimage = PdfImage(page.images[image_key])
|
|
||||||
|
|
||||||
# This type is known to have issues:
|
|
||||||
# https://github.com/pikepdf/pikepdf/issues/401
|
|
||||||
if "/CCITTFaxDecode" in pdfimage.filters:
|
|
||||||
raise BarcodeImageFormatError(
|
|
||||||
"Unable to decode CCITTFaxDecode images",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Not all images can be transcoded to a PIL image, which
|
|
||||||
# is what pyzbar expects to receive, so this may
|
|
||||||
# raise an exception, triggering fallback
|
|
||||||
pillow_img = pdfimage.as_pil_image()
|
|
||||||
|
|
||||||
# Scale the image down
|
|
||||||
# See: https://github.com/paperless-ngx/paperless-ngx/issues/2385
|
|
||||||
# TLDR: zbar has issues with larger images
|
|
||||||
width, height = pillow_img.size
|
|
||||||
if width > 1024:
|
|
||||||
scaler = ceil(width / 1024)
|
|
||||||
new_width = int(width / scaler)
|
|
||||||
new_height = int(height / scaler)
|
|
||||||
pillow_img = pillow_img.resize((new_width, new_height))
|
|
||||||
|
|
||||||
width, height = pillow_img.size
|
|
||||||
if height > 2048:
|
|
||||||
scaler = ceil(height / 2048)
|
|
||||||
new_width = int(width / scaler)
|
|
||||||
new_height = int(height / scaler)
|
|
||||||
pillow_img = pillow_img.resize((new_width, new_height))
|
|
||||||
|
|
||||||
for barcode_value in barcode_reader(pillow_img):
|
|
||||||
detected_barcodes.append(Barcode(page_num, barcode_value))
|
|
||||||
|
|
||||||
return detected_barcodes
|
|
||||||
|
|
||||||
def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]:
|
def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]:
|
||||||
detected_barcodes = []
|
detected_barcodes = []
|
||||||
# use a temporary directory in case the file is too big to handle in memory
|
# use a temporary directory in case the file is too big to handle in memory
|
||||||
with tempfile.TemporaryDirectory() as path:
|
with tempfile.TemporaryDirectory() as path:
|
||||||
pages_from_path = convert_from_path(pdf_filepath, output_folder=path)
|
pages_from_path = convert_from_path(
|
||||||
|
pdf_filepath,
|
||||||
|
dpi=300,
|
||||||
|
output_folder=path,
|
||||||
|
)
|
||||||
for current_page_number, page in enumerate(pages_from_path):
|
for current_page_number, page in enumerate(pages_from_path):
|
||||||
for barcode_value in barcode_reader(page):
|
for barcode_value in barcode_reader(page):
|
||||||
detected_barcodes.append(
|
detected_barcodes.append(
|
||||||
@ -219,27 +181,19 @@ def scan_file_for_barcodes(
|
|||||||
# Always try pikepdf first, it's usually fine, faster and
|
# Always try pikepdf first, it's usually fine, faster and
|
||||||
# uses less memory
|
# uses less memory
|
||||||
try:
|
try:
|
||||||
barcodes = _pikepdf_barcode_scan(pdf_filepath)
|
barcodes = _pdf2image_barcode_scan(pdf_filepath)
|
||||||
# Password protected files can't be checked
|
# Password protected files can't be checked
|
||||||
except PasswordError as e:
|
# This is the exception raised for those
|
||||||
|
except PDFPageCountError as e:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"File is likely password protected, not checking for barcodes: {e}",
|
f"File is likely password protected, not checking for barcodes: {e}",
|
||||||
)
|
)
|
||||||
# Handle pikepdf related image decoding issues with a fallback to page
|
|
||||||
# by page conversion to images in a temporary directory
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(
|
|
||||||
f"Falling back to pdf2image because: {e}",
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
barcodes = _pdf2image_barcode_scan(pdf_filepath)
|
|
||||||
# This file is really borked, allow the consumption to continue
|
# This file is really borked, allow the consumption to continue
|
||||||
# but it may fail further on
|
# but it may fail further on
|
||||||
except Exception as e: # pragma: no cover
|
except Exception as e: # pragma: no cover
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Exception during barcode scanning: {e}",
|
f"Exception during barcode scanning: {e}",
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Unsupported file format for barcode reader: {str(mime_type)}",
|
f"Unsupported file format for barcode reader: {str(mime_type)}",
|
||||||
@ -248,16 +202,25 @@ def scan_file_for_barcodes(
|
|||||||
return DocumentBarcodeInfo(pdf_filepath, barcodes)
|
return DocumentBarcodeInfo(pdf_filepath, barcodes)
|
||||||
|
|
||||||
|
|
||||||
def get_separating_barcodes(barcodes: List[Barcode]) -> List[int]:
|
def get_separating_barcodes(barcodes: List[Barcode]) -> Dict[int, bool]:
|
||||||
"""
|
"""
|
||||||
Search the parsed barcodes for separators
|
Search the parsed barcodes for separators
|
||||||
and returns a list of page numbers, which
|
and returns a dict of page numbers, which
|
||||||
separate the file into new files.
|
separate the file into new files, together
|
||||||
|
with the information whether to keep the page.
|
||||||
"""
|
"""
|
||||||
# filter all barcodes for the separator string
|
# filter all barcodes for the separator string
|
||||||
# get the page numbers of the separating barcodes
|
# get the page numbers of the separating barcodes
|
||||||
|
separator_pages = {bc.page: False for bc in barcodes if bc.is_separator}
|
||||||
|
if not settings.CONSUMER_ENABLE_ASN_BARCODE:
|
||||||
|
return separator_pages
|
||||||
|
|
||||||
return list({bc.page for bc in barcodes if bc.is_separator})
|
# add the page numbers of the ASN barcodes
|
||||||
|
# (except for first page, that might lead to infinite loops).
|
||||||
|
return {
|
||||||
|
**separator_pages,
|
||||||
|
**{bc.page: True for bc in barcodes if bc.is_asn and bc.page != 0},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
|
def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
|
||||||
@ -289,10 +252,11 @@ def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
|
|||||||
return asn
|
return asn
|
||||||
|
|
||||||
|
|
||||||
def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:
|
def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Separate the provided pdf file on the pages_to_split_on.
|
Separate the provided pdf file on the pages_to_split_on.
|
||||||
The pages which are defined by page_numbers will be removed.
|
The pages which are defined by the keys in page_numbers
|
||||||
|
will be removed if the corresponding value is false.
|
||||||
Returns a list of (temporary) filepaths to consume.
|
Returns a list of (temporary) filepaths to consume.
|
||||||
These will need to be deleted later.
|
These will need to be deleted later.
|
||||||
"""
|
"""
|
||||||
@ -308,26 +272,28 @@ def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:
|
|||||||
fname = os.path.splitext(os.path.basename(filepath))[0]
|
fname = os.path.splitext(os.path.basename(filepath))[0]
|
||||||
pdf = Pdf.open(filepath)
|
pdf = Pdf.open(filepath)
|
||||||
|
|
||||||
|
# Start with an empty document
|
||||||
|
current_document: List[Page] = []
|
||||||
# A list of documents, ie a list of lists of pages
|
# A list of documents, ie a list of lists of pages
|
||||||
documents: List[List[Page]] = []
|
documents: List[List[Page]] = [current_document]
|
||||||
# A single document, ie a list of pages
|
|
||||||
document: List[Page] = []
|
|
||||||
|
|
||||||
for idx, page in enumerate(pdf.pages):
|
for idx, page in enumerate(pdf.pages):
|
||||||
# Keep building the new PDF as long as it is not a
|
# Keep building the new PDF as long as it is not a
|
||||||
# separator index
|
# separator index
|
||||||
if idx not in pages_to_split_on:
|
if idx not in pages_to_split_on:
|
||||||
document.append(page)
|
current_document.append(page)
|
||||||
# Make sure to append the very last document to the documents
|
continue
|
||||||
if idx == (len(pdf.pages) - 1):
|
|
||||||
documents.append(document)
|
# This is a split index
|
||||||
document = []
|
# Start a new destination page listing
|
||||||
else:
|
|
||||||
# This is a split index, save the current PDF pages, and restart
|
|
||||||
# a new destination page listing
|
|
||||||
logger.debug(f"Starting new document at idx {idx}")
|
logger.debug(f"Starting new document at idx {idx}")
|
||||||
documents.append(document)
|
current_document = []
|
||||||
document = []
|
documents.append(current_document)
|
||||||
|
keep_page = pages_to_split_on[idx]
|
||||||
|
if keep_page:
|
||||||
|
# Keep the page
|
||||||
|
# (new document is started by asn barcode)
|
||||||
|
current_document.append(page)
|
||||||
|
|
||||||
documents = [x for x in documents if len(x)]
|
documents = [x for x in documents if len(x)]
|
||||||
|
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
import uuid
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
from subprocess import CompletedProcess
|
from subprocess import CompletedProcess
|
||||||
from subprocess import run
|
from subprocess import run
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
@ -95,7 +98,8 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.path = None
|
self.path: Optional[Path] = None
|
||||||
|
self.original_path: Optional[Path] = None
|
||||||
self.filename = None
|
self.filename = None
|
||||||
self.override_title = None
|
self.override_title = None
|
||||||
self.override_correspondent_id = None
|
self.override_correspondent_id = None
|
||||||
@ -144,11 +148,16 @@ class Consumer(LoggingMixin):
|
|||||||
return
|
return
|
||||||
# Validate the range is above zero and less than uint32_t max
|
# Validate the range is above zero and less than uint32_t max
|
||||||
# otherwise, Whoosh can't handle it in the index
|
# otherwise, Whoosh can't handle it in the index
|
||||||
if self.override_asn < 0 or self.override_asn > 0xFF_FF_FF_FF:
|
if (
|
||||||
|
self.override_asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
|
||||||
|
or self.override_asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
|
||||||
|
):
|
||||||
self._fail(
|
self._fail(
|
||||||
MESSAGE_ASN_RANGE,
|
MESSAGE_ASN_RANGE,
|
||||||
f"Not consuming {self.filename}: "
|
f"Not consuming {self.filename}: "
|
||||||
f"Given ASN {self.override_asn} is out of range [0, 4,294,967,295]",
|
f"Given ASN {self.override_asn} is out of range "
|
||||||
|
f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
|
||||||
|
f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}]",
|
||||||
)
|
)
|
||||||
if Document.objects.filter(archive_serial_number=self.override_asn).exists():
|
if Document.objects.filter(archive_serial_number=self.override_asn).exists():
|
||||||
self._fail(
|
self._fail(
|
||||||
@ -169,16 +178,18 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
||||||
|
|
||||||
filepath_arg = os.path.normpath(self.path)
|
working_file_path = str(self.path)
|
||||||
|
original_file_path = str(self.original_path)
|
||||||
|
|
||||||
script_env = os.environ.copy()
|
script_env = os.environ.copy()
|
||||||
script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg
|
script_env["DOCUMENT_SOURCE_PATH"] = original_file_path
|
||||||
|
script_env["DOCUMENT_WORKING_PATH"] = working_file_path
|
||||||
|
|
||||||
try:
|
try:
|
||||||
completed_proc = run(
|
completed_proc = run(
|
||||||
args=[
|
args=[
|
||||||
settings.PRE_CONSUME_SCRIPT,
|
settings.PRE_CONSUME_SCRIPT,
|
||||||
filepath_arg,
|
original_file_path,
|
||||||
],
|
],
|
||||||
env=script_env,
|
env=script_env,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
@ -197,7 +208,7 @@ class Consumer(LoggingMixin):
|
|||||||
exception=e,
|
exception=e,
|
||||||
)
|
)
|
||||||
|
|
||||||
def run_post_consume_script(self, document):
|
def run_post_consume_script(self, document: Document):
|
||||||
if not settings.POST_CONSUME_SCRIPT:
|
if not settings.POST_CONSUME_SCRIPT:
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -288,8 +299,8 @@ class Consumer(LoggingMixin):
|
|||||||
Return the document object if it was successfully created.
|
Return the document object if it was successfully created.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.path = path
|
self.path = Path(path).resolve()
|
||||||
self.filename = override_filename or os.path.basename(path)
|
self.filename = override_filename or self.path.name
|
||||||
self.override_title = override_title
|
self.override_title = override_title
|
||||||
self.override_correspondent_id = override_correspondent_id
|
self.override_correspondent_id = override_correspondent_id
|
||||||
self.override_document_type_id = override_document_type_id
|
self.override_document_type_id = override_document_type_id
|
||||||
@ -315,6 +326,15 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
self.log("info", f"Consuming {self.filename}")
|
self.log("info", f"Consuming {self.filename}")
|
||||||
|
|
||||||
|
# For the actual work, copy the file into a tempdir
|
||||||
|
self.original_path = self.path
|
||||||
|
tempdir = tempfile.TemporaryDirectory(
|
||||||
|
prefix="paperless-ngx",
|
||||||
|
dir=settings.SCRATCH_DIR,
|
||||||
|
)
|
||||||
|
self.path = Path(tempdir.name) / Path(self.filename)
|
||||||
|
shutil.copy(self.original_path, self.path)
|
||||||
|
|
||||||
# Determine the parser class.
|
# Determine the parser class.
|
||||||
|
|
||||||
mime_type = magic.from_file(self.path, mime=True)
|
mime_type = magic.from_file(self.path, mime=True)
|
||||||
@ -457,11 +477,12 @@ class Consumer(LoggingMixin):
|
|||||||
# Delete the file only if it was successfully consumed
|
# Delete the file only if it was successfully consumed
|
||||||
self.log("debug", f"Deleting file {self.path}")
|
self.log("debug", f"Deleting file {self.path}")
|
||||||
os.unlink(self.path)
|
os.unlink(self.path)
|
||||||
|
self.original_path.unlink()
|
||||||
|
|
||||||
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
||||||
shadow_file = os.path.join(
|
shadow_file = os.path.join(
|
||||||
os.path.dirname(self.path),
|
os.path.dirname(self.original_path),
|
||||||
"._" + os.path.basename(self.path),
|
"._" + os.path.basename(self.original_path),
|
||||||
)
|
)
|
||||||
|
|
||||||
if os.path.isfile(shadow_file):
|
if os.path.isfile(shadow_file):
|
||||||
@ -478,6 +499,7 @@ class Consumer(LoggingMixin):
|
|||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
document_parser.cleanup()
|
document_parser.cleanup()
|
||||||
|
tempdir.cleanup()
|
||||||
|
|
||||||
self.run_post_consume_script(document)
|
self.run_post_consume_script(document)
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ from contextlib import contextmanager
|
|||||||
|
|
||||||
from dateutil.parser import isoparse
|
from dateutil.parser import isoparse
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from django.utils import timezone
|
||||||
from documents.models import Comment
|
from documents.models import Comment
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from guardian.shortcuts import get_users_with_perms
|
from guardian.shortcuts import get_users_with_perms
|
||||||
@ -94,10 +95,22 @@ def open_index_searcher():
|
|||||||
searcher.close()
|
searcher.close()
|
||||||
|
|
||||||
|
|
||||||
def update_document(writer, doc):
|
def update_document(writer: AsyncWriter, doc: Document):
|
||||||
tags = ",".join([t.name for t in doc.tags.all()])
|
tags = ",".join([t.name for t in doc.tags.all()])
|
||||||
tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
|
tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
|
||||||
comments = ",".join([str(c.comment) for c in Comment.objects.filter(document=doc)])
|
comments = ",".join([str(c.comment) for c in Comment.objects.filter(document=doc)])
|
||||||
|
asn = doc.archive_serial_number
|
||||||
|
if asn is not None and (
|
||||||
|
asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
|
||||||
|
or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
|
||||||
|
):
|
||||||
|
logger.error(
|
||||||
|
f"Not indexing Archive Serial Number {asn} of document {doc.pk}. "
|
||||||
|
f"ASN is out of range "
|
||||||
|
f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
|
||||||
|
f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}.",
|
||||||
|
)
|
||||||
|
asn = 0
|
||||||
users_with_perms = get_users_with_perms(
|
users_with_perms = get_users_with_perms(
|
||||||
doc,
|
doc,
|
||||||
only_with_perms_in=["view_document"],
|
only_with_perms_in=["view_document"],
|
||||||
@ -118,7 +131,7 @@ def update_document(writer, doc):
|
|||||||
has_type=doc.document_type is not None,
|
has_type=doc.document_type is not None,
|
||||||
created=doc.created,
|
created=doc.created,
|
||||||
added=doc.added,
|
added=doc.added,
|
||||||
asn=doc.archive_serial_number,
|
asn=asn,
|
||||||
modified=doc.modified,
|
modified=doc.modified,
|
||||||
path=doc.storage_path.name if doc.storage_path else None,
|
path=doc.storage_path.name if doc.storage_path else None,
|
||||||
path_id=doc.storage_path.id if doc.storage_path else None,
|
path_id=doc.storage_path.id if doc.storage_path else None,
|
||||||
@ -283,7 +296,7 @@ class DelayedFullTextQuery(DelayedQuery):
|
|||||||
["content", "title", "correspondent", "tag", "type", "comments"],
|
["content", "title", "correspondent", "tag", "type", "comments"],
|
||||||
self.searcher.ixreader.schema,
|
self.searcher.ixreader.schema,
|
||||||
)
|
)
|
||||||
qp.add_plugin(DateParserPlugin())
|
qp.add_plugin(DateParserPlugin(basedate=timezone.now()))
|
||||||
q = qp.parse(q_str)
|
q = qp.parse(q_str)
|
||||||
|
|
||||||
corrected = self.searcher.correct_query(q, q_str)
|
corrected = self.searcher.correct_query(q, q_str)
|
||||||
|
@ -311,8 +311,8 @@ class Command(BaseCommand):
|
|||||||
archive_target = None
|
archive_target = None
|
||||||
|
|
||||||
# 3.4. write files to target folder
|
# 3.4. write files to target folder
|
||||||
t = int(time.mktime(document.created.timetuple()))
|
|
||||||
if document.storage_type == Document.STORAGE_TYPE_GPG:
|
if document.storage_type == Document.STORAGE_TYPE_GPG:
|
||||||
|
t = int(time.mktime(document.created.timetuple()))
|
||||||
|
|
||||||
original_target.parent.mkdir(parents=True, exist_ok=True)
|
original_target.parent.mkdir(parents=True, exist_ok=True)
|
||||||
with document.source_file as out_file:
|
with document.source_file as out_file:
|
||||||
|
@ -0,0 +1,23 @@
|
|||||||
|
# Generated by Django 4.1.5 on 2023-02-03 21:53
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("documents", "1029_alter_document_archive_serial_number"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="paperlesstask",
|
||||||
|
name="task_file_name",
|
||||||
|
field=models.CharField(
|
||||||
|
help_text="Name of the file which the Task was run for",
|
||||||
|
max_length=255,
|
||||||
|
null=True,
|
||||||
|
verbose_name="Task Filename",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
@ -3,6 +3,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
from typing import Final
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import dateutil.parser
|
import dateutil.parser
|
||||||
@ -242,6 +243,9 @@ class Document(ModelWithOwner):
|
|||||||
help_text=_("The original name of the file when it was uploaded"),
|
help_text=_("The original name of the file when it was uploaded"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
ARCHIVE_SERIAL_NUMBER_MIN: Final[int] = 0
|
||||||
|
ARCHIVE_SERIAL_NUMBER_MAX: Final[int] = 0xFF_FF_FF_FF
|
||||||
|
|
||||||
archive_serial_number = models.PositiveIntegerField(
|
archive_serial_number = models.PositiveIntegerField(
|
||||||
_("archive serial number"),
|
_("archive serial number"),
|
||||||
blank=True,
|
blank=True,
|
||||||
@ -249,8 +253,8 @@ class Document(ModelWithOwner):
|
|||||||
unique=True,
|
unique=True,
|
||||||
db_index=True,
|
db_index=True,
|
||||||
validators=[
|
validators=[
|
||||||
MaxValueValidator(0xFF_FF_FF_FF),
|
MaxValueValidator(ARCHIVE_SERIAL_NUMBER_MAX),
|
||||||
MinValueValidator(0),
|
MinValueValidator(ARCHIVE_SERIAL_NUMBER_MIN),
|
||||||
],
|
],
|
||||||
help_text=_(
|
help_text=_(
|
||||||
"The position of this document in your physical document " "archive.",
|
"The position of this document in your physical document " "archive.",
|
||||||
@ -567,7 +571,7 @@ class PaperlessTask(models.Model):
|
|||||||
task_file_name = models.CharField(
|
task_file_name = models.CharField(
|
||||||
null=True,
|
null=True,
|
||||||
max_length=255,
|
max_length=255,
|
||||||
verbose_name=_("Task Name"),
|
verbose_name=_("Task Filename"),
|
||||||
help_text=_("Name of the file which the Task was run for"),
|
help_text=_("Name of the file which the Task was run for"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -166,7 +166,7 @@ def consume_file(
|
|||||||
# notify the sender, otherwise the progress bar
|
# notify the sender, otherwise the progress bar
|
||||||
# in the UI stays stuck
|
# in the UI stays stuck
|
||||||
payload = {
|
payload = {
|
||||||
"filename": override_filename,
|
"filename": override_filename or path.name,
|
||||||
"task_id": task_id,
|
"task_id": task_id,
|
||||||
"current_progress": 100,
|
"current_progress": 100,
|
||||||
"max_progress": 100,
|
"max_progress": 100,
|
||||||
|
Before Width: | Height: | Size: 33 KiB After Width: | Height: | Size: 33 KiB |
Before Width: | Height: | Size: 39 KiB After Width: | Height: | Size: 39 KiB |
BIN
src/documents/tests/samples/barcodes/split-by-asn-1.pdf
Normal file
BIN
src/documents/tests/samples/barcodes/split-by-asn-1.pdf
Normal file
Binary file not shown.
BIN
src/documents/tests/samples/barcodes/split-by-asn-2.pdf
Normal file
BIN
src/documents/tests/samples/barcodes/split-by-asn-2.pdf
Normal file
Binary file not shown.
@ -7,6 +7,7 @@ import tempfile
|
|||||||
import urllib.request
|
import urllib.request
|
||||||
import uuid
|
import uuid
|
||||||
import zipfile
|
import zipfile
|
||||||
|
from datetime import timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
@ -25,6 +26,7 @@ from django.contrib.auth.models import Permission
|
|||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.test import override_settings
|
from django.test import override_settings
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
from dateutil.relativedelta import relativedelta
|
||||||
from documents import bulk_edit
|
from documents import bulk_edit
|
||||||
from documents import index
|
from documents import index
|
||||||
from documents.models import Correspondent
|
from documents.models import Correspondent
|
||||||
@ -509,6 +511,270 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
|||||||
response = self.client.get("/api/documents/?query=content&page=3&page_size=10")
|
response = self.client.get("/api/documents/?query=content&page=3&page_size=10")
|
||||||
self.assertEqual(response.status_code, 404)
|
self.assertEqual(response.status_code, 404)
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
TIME_ZONE="UTC",
|
||||||
|
)
|
||||||
|
def test_search_added_in_last_week(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Three documents added right now
|
||||||
|
- The timezone is UTC time
|
||||||
|
WHEN:
|
||||||
|
- Query for documents added in the last 7 days
|
||||||
|
THEN:
|
||||||
|
- All three recent documents are returned
|
||||||
|
"""
|
||||||
|
d1 = Document.objects.create(
|
||||||
|
title="invoice",
|
||||||
|
content="the thing i bought at a shop and paid with bank account",
|
||||||
|
checksum="A",
|
||||||
|
pk=1,
|
||||||
|
)
|
||||||
|
d2 = Document.objects.create(
|
||||||
|
title="bank statement 1",
|
||||||
|
content="things i paid for in august",
|
||||||
|
pk=2,
|
||||||
|
checksum="B",
|
||||||
|
)
|
||||||
|
d3 = Document.objects.create(
|
||||||
|
title="bank statement 3",
|
||||||
|
content="things i paid for in september",
|
||||||
|
pk=3,
|
||||||
|
checksum="C",
|
||||||
|
)
|
||||||
|
with index.open_index_writer() as writer:
|
||||||
|
index.update_document(writer, d1)
|
||||||
|
index.update_document(writer, d2)
|
||||||
|
index.update_document(writer, d3)
|
||||||
|
|
||||||
|
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
|
||||||
|
results = response.data["results"]
|
||||||
|
# Expect 3 documents returned
|
||||||
|
self.assertEqual(len(results), 3)
|
||||||
|
|
||||||
|
for idx, subset in enumerate(
|
||||||
|
[
|
||||||
|
{"id": 1, "title": "invoice"},
|
||||||
|
{"id": 2, "title": "bank statement 1"},
|
||||||
|
{"id": 3, "title": "bank statement 3"},
|
||||||
|
],
|
||||||
|
):
|
||||||
|
result = results[idx]
|
||||||
|
# Assert subset in results
|
||||||
|
self.assertDictEqual(result, {**result, **subset})
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
TIME_ZONE="America/Chicago",
|
||||||
|
)
|
||||||
|
def test_search_added_in_last_week_with_timezone_behind(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Two documents added right now
|
||||||
|
- One document added over a week ago
|
||||||
|
- The timezone is behind UTC time (-6)
|
||||||
|
WHEN:
|
||||||
|
- Query for documents added in the last 7 days
|
||||||
|
THEN:
|
||||||
|
- The two recent documents are returned
|
||||||
|
"""
|
||||||
|
d1 = Document.objects.create(
|
||||||
|
title="invoice",
|
||||||
|
content="the thing i bought at a shop and paid with bank account",
|
||||||
|
checksum="A",
|
||||||
|
pk=1,
|
||||||
|
)
|
||||||
|
d2 = Document.objects.create(
|
||||||
|
title="bank statement 1",
|
||||||
|
content="things i paid for in august",
|
||||||
|
pk=2,
|
||||||
|
checksum="B",
|
||||||
|
)
|
||||||
|
d3 = Document.objects.create(
|
||||||
|
title="bank statement 3",
|
||||||
|
content="things i paid for in september",
|
||||||
|
pk=3,
|
||||||
|
checksum="C",
|
||||||
|
# 7 days, 1 hour and 1 minute ago
|
||||||
|
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||||
|
)
|
||||||
|
with index.open_index_writer() as writer:
|
||||||
|
index.update_document(writer, d1)
|
||||||
|
index.update_document(writer, d2)
|
||||||
|
index.update_document(writer, d3)
|
||||||
|
|
||||||
|
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
|
||||||
|
results = response.data["results"]
|
||||||
|
|
||||||
|
# Expect 2 documents returned
|
||||||
|
self.assertEqual(len(results), 2)
|
||||||
|
|
||||||
|
for idx, subset in enumerate(
|
||||||
|
[{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
|
||||||
|
):
|
||||||
|
result = results[idx]
|
||||||
|
# Assert subset in results
|
||||||
|
self.assertDictEqual(result, {**result, **subset})
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
TIME_ZONE="Europe/Sofia",
|
||||||
|
)
|
||||||
|
def test_search_added_in_last_week_with_timezone_ahead(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Two documents added right now
|
||||||
|
- One document added over a week ago
|
||||||
|
- The timezone is behind UTC time (+2)
|
||||||
|
WHEN:
|
||||||
|
- Query for documents added in the last 7 days
|
||||||
|
THEN:
|
||||||
|
- The two recent documents are returned
|
||||||
|
"""
|
||||||
|
d1 = Document.objects.create(
|
||||||
|
title="invoice",
|
||||||
|
content="the thing i bought at a shop and paid with bank account",
|
||||||
|
checksum="A",
|
||||||
|
pk=1,
|
||||||
|
)
|
||||||
|
d2 = Document.objects.create(
|
||||||
|
title="bank statement 1",
|
||||||
|
content="things i paid for in august",
|
||||||
|
pk=2,
|
||||||
|
checksum="B",
|
||||||
|
)
|
||||||
|
d3 = Document.objects.create(
|
||||||
|
title="bank statement 3",
|
||||||
|
content="things i paid for in september",
|
||||||
|
pk=3,
|
||||||
|
checksum="C",
|
||||||
|
# 7 days, 1 hour and 1 minute ago
|
||||||
|
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||||
|
)
|
||||||
|
with index.open_index_writer() as writer:
|
||||||
|
index.update_document(writer, d1)
|
||||||
|
index.update_document(writer, d2)
|
||||||
|
index.update_document(writer, d3)
|
||||||
|
|
||||||
|
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
|
||||||
|
results = response.data["results"]
|
||||||
|
|
||||||
|
# Expect 2 documents returned
|
||||||
|
self.assertEqual(len(results), 2)
|
||||||
|
|
||||||
|
for idx, subset in enumerate(
|
||||||
|
[{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
|
||||||
|
):
|
||||||
|
result = results[idx]
|
||||||
|
# Assert subset in results
|
||||||
|
self.assertDictEqual(result, {**result, **subset})
|
||||||
|
|
||||||
|
def test_search_added_in_last_month(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- One document added right now
|
||||||
|
- One documents added about a week ago
|
||||||
|
- One document added over 1 month
|
||||||
|
WHEN:
|
||||||
|
- Query for documents added in the last month
|
||||||
|
THEN:
|
||||||
|
- The two recent documents are returned
|
||||||
|
"""
|
||||||
|
d1 = Document.objects.create(
|
||||||
|
title="invoice",
|
||||||
|
content="the thing i bought at a shop and paid with bank account",
|
||||||
|
checksum="A",
|
||||||
|
pk=1,
|
||||||
|
)
|
||||||
|
d2 = Document.objects.create(
|
||||||
|
title="bank statement 1",
|
||||||
|
content="things i paid for in august",
|
||||||
|
pk=2,
|
||||||
|
checksum="B",
|
||||||
|
# 1 month, 1 day ago
|
||||||
|
added=timezone.now() - relativedelta(months=1, days=1),
|
||||||
|
)
|
||||||
|
d3 = Document.objects.create(
|
||||||
|
title="bank statement 3",
|
||||||
|
content="things i paid for in september",
|
||||||
|
pk=3,
|
||||||
|
checksum="C",
|
||||||
|
# 7 days, 1 hour and 1 minute ago
|
||||||
|
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||||
|
)
|
||||||
|
|
||||||
|
with index.open_index_writer() as writer:
|
||||||
|
index.update_document(writer, d1)
|
||||||
|
index.update_document(writer, d2)
|
||||||
|
index.update_document(writer, d3)
|
||||||
|
|
||||||
|
response = self.client.get("/api/documents/?query=added:[-1 month to now]")
|
||||||
|
results = response.data["results"]
|
||||||
|
|
||||||
|
# Expect 2 documents returned
|
||||||
|
self.assertEqual(len(results), 2)
|
||||||
|
|
||||||
|
for idx, subset in enumerate(
|
||||||
|
[{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
|
||||||
|
):
|
||||||
|
result = results[idx]
|
||||||
|
# Assert subset in results
|
||||||
|
self.assertDictEqual(result, {**result, **subset})
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
TIME_ZONE="America/Denver",
|
||||||
|
)
|
||||||
|
def test_search_added_in_last_month_timezone_behind(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- One document added right now
|
||||||
|
- One documents added about a week ago
|
||||||
|
- One document added over 1 month
|
||||||
|
- The timezone is behind UTC time (-6 or -7)
|
||||||
|
WHEN:
|
||||||
|
- Query for documents added in the last month
|
||||||
|
THEN:
|
||||||
|
- The two recent documents are returned
|
||||||
|
"""
|
||||||
|
d1 = Document.objects.create(
|
||||||
|
title="invoice",
|
||||||
|
content="the thing i bought at a shop and paid with bank account",
|
||||||
|
checksum="A",
|
||||||
|
pk=1,
|
||||||
|
)
|
||||||
|
d2 = Document.objects.create(
|
||||||
|
title="bank statement 1",
|
||||||
|
content="things i paid for in august",
|
||||||
|
pk=2,
|
||||||
|
checksum="B",
|
||||||
|
# 1 month, 1 day ago
|
||||||
|
added=timezone.now() - relativedelta(months=1, days=1),
|
||||||
|
)
|
||||||
|
d3 = Document.objects.create(
|
||||||
|
title="bank statement 3",
|
||||||
|
content="things i paid for in september",
|
||||||
|
pk=3,
|
||||||
|
checksum="C",
|
||||||
|
# 7 days, 1 hour and 1 minute ago
|
||||||
|
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||||
|
)
|
||||||
|
|
||||||
|
with index.open_index_writer() as writer:
|
||||||
|
index.update_document(writer, d1)
|
||||||
|
index.update_document(writer, d2)
|
||||||
|
index.update_document(writer, d3)
|
||||||
|
|
||||||
|
response = self.client.get("/api/documents/?query=added:[-1 month to now]")
|
||||||
|
results = response.data["results"]
|
||||||
|
|
||||||
|
# Expect 2 documents returned
|
||||||
|
self.assertEqual(len(results), 2)
|
||||||
|
|
||||||
|
for idx, subset in enumerate(
|
||||||
|
[{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
|
||||||
|
):
|
||||||
|
result = results[idx]
|
||||||
|
# Assert subset in results
|
||||||
|
self.assertDictEqual(result, {**result, **subset})
|
||||||
|
|
||||||
@mock.patch("documents.index.autocomplete")
|
@mock.patch("documents.index.autocomplete")
|
||||||
def test_search_autocomplete(self, m):
|
def test_search_autocomplete(self, m):
|
||||||
m.side_effect = lambda ix, term, limit: [term for _ in range(limit)]
|
m.side_effect = lambda ix, term, limit: [term for _ in range(limit)]
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -833,7 +833,8 @@ class PreConsumeTestCase(TestCase):
|
|||||||
with tempfile.NamedTemporaryFile() as script:
|
with tempfile.NamedTemporaryFile() as script:
|
||||||
with override_settings(PRE_CONSUME_SCRIPT=script.name):
|
with override_settings(PRE_CONSUME_SCRIPT=script.name):
|
||||||
c = Consumer()
|
c = Consumer()
|
||||||
c.path = "path-to-file"
|
c.original_path = "path-to-file"
|
||||||
|
c.path = "/tmp/somewhere/path-to-file"
|
||||||
c.run_pre_consume_script()
|
c.run_pre_consume_script()
|
||||||
|
|
||||||
m.assert_called_once()
|
m.assert_called_once()
|
||||||
@ -841,10 +842,19 @@ class PreConsumeTestCase(TestCase):
|
|||||||
args, kwargs = m.call_args
|
args, kwargs = m.call_args
|
||||||
|
|
||||||
command = kwargs["args"]
|
command = kwargs["args"]
|
||||||
|
environment = kwargs["env"]
|
||||||
|
|
||||||
self.assertEqual(command[0], script.name)
|
self.assertEqual(command[0], script.name)
|
||||||
self.assertEqual(command[1], "path-to-file")
|
self.assertEqual(command[1], "path-to-file")
|
||||||
|
|
||||||
|
self.assertDictContainsSubset(
|
||||||
|
{
|
||||||
|
"DOCUMENT_SOURCE_PATH": c.original_path,
|
||||||
|
"DOCUMENT_WORKING_PATH": c.path,
|
||||||
|
},
|
||||||
|
environment,
|
||||||
|
)
|
||||||
|
|
||||||
@mock.patch("documents.consumer.Consumer.log")
|
@mock.patch("documents.consumer.Consumer.log")
|
||||||
def test_script_with_output(self, mocked_log):
|
def test_script_with_output(self, mocked_log):
|
||||||
"""
|
"""
|
||||||
@ -961,9 +971,10 @@ class PostConsumeTestCase(TestCase):
|
|||||||
|
|
||||||
m.assert_called_once()
|
m.assert_called_once()
|
||||||
|
|
||||||
args, kwargs = m.call_args
|
_, kwargs = m.call_args
|
||||||
|
|
||||||
command = kwargs["args"]
|
command = kwargs["args"]
|
||||||
|
environment = kwargs["env"]
|
||||||
|
|
||||||
self.assertEqual(command[0], script.name)
|
self.assertEqual(command[0], script.name)
|
||||||
self.assertEqual(command[1], str(doc.pk))
|
self.assertEqual(command[1], str(doc.pk))
|
||||||
@ -972,6 +983,17 @@ class PostConsumeTestCase(TestCase):
|
|||||||
self.assertEqual(command[7], "my_bank")
|
self.assertEqual(command[7], "my_bank")
|
||||||
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
||||||
|
|
||||||
|
self.assertDictContainsSubset(
|
||||||
|
{
|
||||||
|
"DOCUMENT_ID": str(doc.pk),
|
||||||
|
"DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/",
|
||||||
|
"DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/",
|
||||||
|
"DOCUMENT_CORRESPONDENT": "my_bank",
|
||||||
|
"DOCUMENT_TAGS": "a,b",
|
||||||
|
},
|
||||||
|
environment,
|
||||||
|
)
|
||||||
|
|
||||||
def test_script_exit_non_zero(self):
|
def test_script_exit_non_zero(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from unittest import mock
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from documents import index
|
from documents import index
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
@ -31,3 +33,60 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
|
|||||||
)
|
)
|
||||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
|
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
|
||||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
|
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
|
||||||
|
|
||||||
|
def test_archive_serial_number_ranging(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Document with an archive serial number above schema allowed size
|
||||||
|
WHEN:
|
||||||
|
- Document is provided to the index
|
||||||
|
THEN:
|
||||||
|
- Error is logged
|
||||||
|
- Document ASN is reset to 0 for the index
|
||||||
|
"""
|
||||||
|
doc1 = Document.objects.create(
|
||||||
|
title="doc1",
|
||||||
|
checksum="A",
|
||||||
|
content="test test2 test3",
|
||||||
|
# yes, this is allowed, unless full_clean is run
|
||||||
|
# DRF does call the validators, this test won't
|
||||||
|
archive_serial_number=Document.ARCHIVE_SERIAL_NUMBER_MAX + 1,
|
||||||
|
)
|
||||||
|
with self.assertLogs("paperless.index", level="ERROR") as cm:
|
||||||
|
with mock.patch(
|
||||||
|
"documents.index.AsyncWriter.update_document",
|
||||||
|
) as mocked_update_doc:
|
||||||
|
index.add_or_update_document(doc1)
|
||||||
|
|
||||||
|
mocked_update_doc.assert_called_once()
|
||||||
|
_, kwargs = mocked_update_doc.call_args
|
||||||
|
|
||||||
|
self.assertEqual(kwargs["asn"], 0)
|
||||||
|
|
||||||
|
error_str = cm.output[0]
|
||||||
|
expected_str = "ERROR:paperless.index:Not indexing Archive Serial Number 4294967296 of document 1"
|
||||||
|
self.assertIn(expected_str, error_str)
|
||||||
|
|
||||||
|
def test_archive_serial_number_is_none(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Document with no archive serial number
|
||||||
|
WHEN:
|
||||||
|
- Document is provided to the index
|
||||||
|
THEN:
|
||||||
|
- ASN isn't touched
|
||||||
|
"""
|
||||||
|
doc1 = Document.objects.create(
|
||||||
|
title="doc1",
|
||||||
|
checksum="A",
|
||||||
|
content="test test2 test3",
|
||||||
|
)
|
||||||
|
with mock.patch(
|
||||||
|
"documents.index.AsyncWriter.update_document",
|
||||||
|
) as mocked_update_doc:
|
||||||
|
index.add_or_update_document(doc1)
|
||||||
|
|
||||||
|
mocked_update_doc.assert_called_once()
|
||||||
|
_, kwargs = mocked_update_doc.call_args
|
||||||
|
|
||||||
|
self.assertIsNone(kwargs["asn"])
|
||||||
|
@ -3,6 +3,7 @@ import shutil
|
|||||||
import tempfile
|
import tempfile
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
from django.apps import apps
|
from django.apps import apps
|
||||||
from django.db import connection
|
from django.db import connection
|
||||||
@ -86,6 +87,30 @@ class DirectoriesMixin:
|
|||||||
remove_dirs(self.dirs)
|
remove_dirs(self.dirs)
|
||||||
|
|
||||||
|
|
||||||
|
class ConsumerProgressMixin:
|
||||||
|
def setUp(self) -> None:
|
||||||
|
self.send_progress_patcher = mock.patch(
|
||||||
|
"documents.consumer.Consumer._send_progress",
|
||||||
|
)
|
||||||
|
self.send_progress_mock = self.send_progress_patcher.start()
|
||||||
|
super().setUp()
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
super().tearDown()
|
||||||
|
self.send_progress_patcher.stop()
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentConsumeDelayMixin:
|
||||||
|
def setUp(self) -> None:
|
||||||
|
self.consume_file_patcher = mock.patch("documents.tasks.consume_file.delay")
|
||||||
|
self.consume_file_mock = self.consume_file_patcher.start()
|
||||||
|
super().setUp()
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
super().tearDown()
|
||||||
|
self.consume_file_patcher.stop()
|
||||||
|
|
||||||
|
|
||||||
class TestMigrations(TransactionTestCase):
|
class TestMigrations(TransactionTestCase):
|
||||||
@property
|
@property
|
||||||
def app(self):
|
def app(self):
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from typing import Final
|
from typing import Final
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
|
||||||
__version__: Final[Tuple[int, int, int]] = (1, 12, 1)
|
__version__: Final[Tuple[int, int, int]] = (1, 12, 2)
|
||||||
# Version string like X.Y.Z
|
# Version string like X.Y.Z
|
||||||
__full_version_str__: Final[str] = ".".join(map(str, __version__))
|
__full_version_str__: Final[str] = ".".join(map(str, __version__))
|
||||||
# Version string like X.Y
|
# Version string like X.Y
|
||||||
|
@ -67,11 +67,6 @@ class TestParserLive(TestCase):
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# Only run if convert is available
|
|
||||||
@pytest.mark.skipif(
|
|
||||||
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
|
|
||||||
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
|
||||||
)
|
|
||||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
|
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
|
||||||
def test_get_thumbnail(self, mock_generate_pdf: mock.MagicMock):
|
def test_get_thumbnail(self, mock_generate_pdf: mock.MagicMock):
|
||||||
"""
|
"""
|
||||||
@ -204,11 +199,6 @@ class TestParserLive(TestCase):
|
|||||||
"GOTENBERG_LIVE" not in os.environ,
|
"GOTENBERG_LIVE" not in os.environ,
|
||||||
reason="No gotenberg server",
|
reason="No gotenberg server",
|
||||||
)
|
)
|
||||||
# Only run if convert is available
|
|
||||||
@pytest.mark.skipif(
|
|
||||||
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
|
|
||||||
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
|
||||||
)
|
|
||||||
def test_generate_pdf_from_mail(self):
|
def test_generate_pdf_from_mail(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@ -301,11 +291,6 @@ class TestParserLive(TestCase):
|
|||||||
"GOTENBERG_LIVE" not in os.environ,
|
"GOTENBERG_LIVE" not in os.environ,
|
||||||
reason="No gotenberg server",
|
reason="No gotenberg server",
|
||||||
)
|
)
|
||||||
# Only run if convert is available
|
|
||||||
@pytest.mark.skipif(
|
|
||||||
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
|
|
||||||
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
|
||||||
)
|
|
||||||
def test_generate_pdf_from_html(self):
|
def test_generate_pdf_from_html(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
|
@ -90,7 +90,7 @@ class TikaDocumentParser(DocumentParser):
|
|||||||
with open(document_path, "rb") as document_handle:
|
with open(document_path, "rb") as document_handle:
|
||||||
files = {
|
files = {
|
||||||
"files": (
|
"files": (
|
||||||
file_name or os.path.basename(document_path),
|
"convert" + os.path.splitext(document_path)[-1],
|
||||||
document_handle,
|
document_handle,
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@ max-line-length = 88
|
|||||||
|
|
||||||
[tool:pytest]
|
[tool:pytest]
|
||||||
DJANGO_SETTINGS_MODULE=paperless.settings
|
DJANGO_SETTINGS_MODULE=paperless.settings
|
||||||
addopts = --pythonwarnings=all --cov --cov-report=html --numprocesses auto --quiet
|
addopts = --pythonwarnings=all --cov --cov-report=html --cov-report=xml --numprocesses auto --quiet
|
||||||
env =
|
env =
|
||||||
PAPERLESS_DISABLE_DBHANDLER=true
|
PAPERLESS_DISABLE_DBHANDLER=true
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user