Merge branch 'dev' into feature-permissions

This commit is contained in:
shamoon 2023-02-03 14:23:50 -08:00
commit d2a6f79612
36 changed files with 2538 additions and 14911 deletions

19
.codecov.yml Normal file
View File

@ -0,0 +1,19 @@
# https://docs.codecov.com/docs/pull-request-comments
# codecov will only comment if coverage changes
comment:
require_changes: true
coverage:
status:
project:
default:
# https://docs.codecov.com/docs/commit-status#threshold
threshold: 1%
# https://docs.codecov.com/docs/commit-status#only_pulls
only_pulls: true
patch:
default:
# For the changed lines only, target 75% covered, but
# allow as low as 50%
target: 75%
threshold: 25%
only_pulls: true

View File

@ -113,16 +113,12 @@ jobs:
PAPERLESS_MAIL_TEST_HOST: ${{ secrets.TEST_MAIL_HOST }} PAPERLESS_MAIL_TEST_HOST: ${{ secrets.TEST_MAIL_HOST }}
PAPERLESS_MAIL_TEST_USER: ${{ secrets.TEST_MAIL_USER }} PAPERLESS_MAIL_TEST_USER: ${{ secrets.TEST_MAIL_USER }}
PAPERLESS_MAIL_TEST_PASSWD: ${{ secrets.TEST_MAIL_PASSWD }} PAPERLESS_MAIL_TEST_PASSWD: ${{ secrets.TEST_MAIL_PASSWD }}
# Skip Tests which require convert
PAPERLESS_TEST_SKIP_CONVERT: 1
# Enable Gotenberg end to end testing # Enable Gotenberg end to end testing
GOTENBERG_LIVE: 1 GOTENBERG_LIVE: 1
steps: steps:
- -
name: Checkout name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v3
with:
fetch-depth: 0
- -
name: Start containers name: Start containers
run: | run: |
@ -145,6 +141,10 @@ jobs:
run: | run: |
sudo apt-get update -qq sudo apt-get update -qq
sudo apt-get install -qq --no-install-recommends unpaper tesseract-ocr imagemagick ghostscript libzbar0 poppler-utils sudo apt-get install -qq --no-install-recommends unpaper tesseract-ocr imagemagick ghostscript libzbar0 poppler-utils
-
name: Configure ImageMagick
run: |
sudo cp docker/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
- -
name: Install Python dependencies name: Install Python dependencies
run: | run: |
@ -160,27 +160,14 @@ jobs:
cd src/ cd src/
pipenv --python ${{ steps.setup-python.outputs.python-version }} run pytest -ra pipenv --python ${{ steps.setup-python.outputs.python-version }} run pytest -ra
- -
name: Get changed files name: Upload coverage to Codecov
id: changed-files-specific if: matrix.python-version == ${{ env.DEFAULT_PYTHON_VERSION }}
uses: tj-actions/changed-files@v35 uses: codecov/codecov-action@v3
with: with:
files: | # not required for public repos, but intermittently fails otherwise
src/** token: ${{ secrets.CODECOV_TOKEN }}
- # future expansion
name: List all changed files flags: backend
run: |
for file in ${{ steps.changed-files-specific.outputs.all_changed_files }}; do
echo "${file} was changed"
done
-
name: Publish coverage results
if: matrix.python-version == ${{ env.DEFAULT_PYTHON_VERSION }} && steps.changed-files-specific.outputs.any_changed == 'true'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# https://github.com/coveralls-clients/coveralls-python/issues/251
run: |
cd src/
pipenv --python ${{ steps.setup-python.outputs.python-version }} run coveralls --service=github
- -
name: Stop containers name: Stop containers
if: always() if: always()
@ -347,7 +334,7 @@ jobs:
password: ${{ secrets.DOCKERHUB_TOKEN }} password: ${{ secrets.DOCKERHUB_TOKEN }}
- -
name: Build and push name: Build and push
uses: docker/build-push-action@v3 uses: docker/build-push-action@v4
with: with:
context: . context: .
file: ./Dockerfile file: ./Dockerfile
@ -442,21 +429,48 @@ jobs:
- -
name: Move files name: Move files
run: | run: |
mkdir dist echo "Making dist folders"
mkdir dist/paperless-ngx for directory in dist \
mkdir dist/paperless-ngx/scripts dist/paperless-ngx \
cp .dockerignore .env Dockerfile Pipfile Pipfile.lock requirements.txt LICENSE README.md dist/paperless-ngx/ dist/paperless-ngx/scripts;
cp paperless.conf.example dist/paperless-ngx/paperless.conf do
cp gunicorn.conf.py dist/paperless-ngx/gunicorn.conf.py mkdir --verbose --parents ${directory}
cp -r docker/ dist/paperless-ngx/docker done
cp scripts/*.service scripts/*.sh scripts/*.socket dist/paperless-ngx/scripts/
cp -r src/ dist/paperless-ngx/src echo "Copying basic files"
cp -r docs/_build/html/ dist/paperless-ngx/docs for file_name in .dockerignore \
mv static dist/paperless-ngx .env \
Dockerfile \
Pipfile \
Pipfile.lock \
requirements.txt \
LICENSE \
README.md \
paperless.conf.example \
gunicorn.conf.py
do
cp --verbose ${file_name} dist/paperless-ngx/
done
mv --verbose dist/paperless-ngx/paperless.conf.example paperless.conf
echo "Copying Docker related files"
cp --recursive docker/ dist/paperless-ngx/docker
echo "Copying startup scripts"
cp --verbose scripts/*.service scripts/*.sh scripts/*.socket dist/paperless-ngx/scripts/
echo "Copying source files"
cp --recursive src/ dist/paperless-ngx/src
echo "Copying documentation"
cp --recursive docs/_build/html/ dist/paperless-ngx/docs
mv --verbose static dist/paperless-ngx
- -
name: Make release package name: Make release package
run: | run: |
echo "Creating release archive"
cd dist cd dist
sudo chown -R 1000:1000 paperless-ngx/
tar -cJf paperless-ngx.tar.xz paperless-ngx/ tar -cJf paperless-ngx.tar.xz paperless-ngx/
- -
name: Upload release artifact name: Upload release artifact

View File

@ -45,7 +45,7 @@ jobs:
uses: docker/setup-qemu-action@v2 uses: docker/setup-qemu-action@v2
- -
name: Build ${{ fromJSON(inputs.build-json).name }} name: Build ${{ fromJSON(inputs.build-json).name }}
uses: docker/build-push-action@v3 uses: docker/build-push-action@v4
with: with:
context: . context: .
file: ${{ inputs.dockerfile }} file: ${{ inputs.dockerfile }}

View File

@ -1,4 +1,5 @@
# syntax=docker/dockerfile:1.4 # syntax=docker/dockerfile:1.4
# https://github.com/moby/buildkit/blob/master/frontend/dockerfile/docs/reference.md
FROM --platform=$BUILDPLATFORM node:16-bullseye-slim AS compile-frontend FROM --platform=$BUILDPLATFORM node:16-bullseye-slim AS compile-frontend
@ -61,10 +62,6 @@ ARG PSYCOPG2_VERSION
# Packages need for running # Packages need for running
ARG RUNTIME_PACKAGES="\ ARG RUNTIME_PACKAGES="\
# Python
python3 \
python3-pip \
python3-setuptools \
# General utils # General utils
curl \ curl \
# Docker specific # Docker specific
@ -128,7 +125,7 @@ RUN set -eux \
&& apt-get install --yes --quiet --no-install-recommends ${RUNTIME_PACKAGES} \ && apt-get install --yes --quiet --no-install-recommends ${RUNTIME_PACKAGES} \
&& rm -rf /var/lib/apt/lists/* \ && rm -rf /var/lib/apt/lists/* \
&& echo "Installing supervisor" \ && echo "Installing supervisor" \
&& python3 -m pip install --default-timeout=1000 --upgrade --no-cache-dir supervisor==4.2.4 && python3 -m pip install --default-timeout=1000 --upgrade --no-cache-dir supervisor==4.2.5
# Copy gunicorn config # Copy gunicorn config
# Changes very infrequently # Changes very infrequently
@ -137,7 +134,6 @@ WORKDIR /usr/src/paperless/
COPY gunicorn.conf.py . COPY gunicorn.conf.py .
# setup docker-specific things # setup docker-specific things
# Use mounts to avoid copying installer files into the image
# These change sometimes, but rarely # These change sometimes, but rarely
WORKDIR /usr/src/paperless/src/docker/ WORKDIR /usr/src/paperless/src/docker/
@ -179,7 +175,6 @@ RUN set -eux \
&& ./install_management_commands.sh && ./install_management_commands.sh
# Install the built packages from the installer library images # Install the built packages from the installer library images
# Use mounts to avoid copying installer files into the image
# These change sometimes # These change sometimes
RUN set -eux \ RUN set -eux \
&& echo "Getting binaries" \ && echo "Getting binaries" \
@ -203,7 +198,8 @@ RUN set -eux \
&& python3 -m pip list \ && python3 -m pip list \
&& echo "Cleaning up image layer" \ && echo "Cleaning up image layer" \
&& cd ../ \ && cd ../ \
&& rm -rf paperless-ngx && rm -rf paperless-ngx \
&& rm paperless-ngx.tar.gz
WORKDIR /usr/src/paperless/src/ WORKDIR /usr/src/paperless/src/
@ -247,11 +243,12 @@ COPY ./src ./
COPY --from=compile-frontend /src/src/documents/static/frontend/ ./documents/static/frontend/ COPY --from=compile-frontend /src/src/documents/static/frontend/ ./documents/static/frontend/
# add users, setup scripts # add users, setup scripts
# Mount the compiled frontend to expected location
RUN set -eux \ RUN set -eux \
&& addgroup --gid 1000 paperless \ && addgroup --gid 1000 paperless \
&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \ && useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \
&& chown -R paperless:paperless ../ \ && chown -R paperless:paperless /usr/src/paperless \
&& gosu paperless python3 manage.py collectstatic --clear --no-input \ && gosu paperless python3 manage.py collectstatic --clear --no-input --link \
&& gosu paperless python3 manage.py compilemessages && gosu paperless python3 manage.py compilemessages
VOLUME ["/usr/src/paperless/data", \ VOLUME ["/usr/src/paperless/data", \

View File

@ -1,7 +1,7 @@
[![ci](https://github.com/paperless-ngx/paperless-ngx/workflows/ci/badge.svg)](https://github.com/paperless-ngx/paperless-ngx/actions) [![ci](https://github.com/paperless-ngx/paperless-ngx/workflows/ci/badge.svg)](https://github.com/paperless-ngx/paperless-ngx/actions)
[![Crowdin](https://badges.crowdin.net/paperless-ngx/localized.svg)](https://crowdin.com/project/paperless-ngx) [![Crowdin](https://badges.crowdin.net/paperless-ngx/localized.svg)](https://crowdin.com/project/paperless-ngx)
[![Documentation Status](https://img.shields.io/github/deployments/paperless-ngx/paperless-ngx/github-pages?label=docs)](https://docs.paperless-ngx.com) [![Documentation Status](https://img.shields.io/github/deployments/paperless-ngx/paperless-ngx/github-pages?label=docs)](https://docs.paperless-ngx.com)
[![Coverage Status](https://coveralls.io/repos/github/paperless-ngx/paperless-ngx/badge.svg?branch=master)](https://coveralls.io/github/paperless-ngx/paperless-ngx?branch=master) [![codecov](https://codecov.io/gh/paperless-ngx/paperless-ngx/branch/main/graph/badge.svg?token=VK6OUPJ3TY)](https://codecov.io/gh/paperless-ngx/paperless-ngx)
[![Chat on Matrix](https://matrix.to/img/matrix-badge.svg)](https://matrix.to/#/%23paperlessngx%3Amatrix.org) [![Chat on Matrix](https://matrix.to/img/matrix-badge.svg)](https://matrix.to/#/%23paperlessngx%3Amatrix.org)
[![demo](https://cronitor.io/badges/ve7ItY/production/W5E_B9jkelG9ZbDiNHUPQEVH3MY.svg)](https://demo.paperless-ngx.com) [![demo](https://cronitor.io/badges/ve7ItY/production/W5E_B9jkelG9ZbDiNHUPQEVH3MY.svg)](https://demo.paperless-ngx.com)

View File

@ -80,7 +80,7 @@ django_checks() {
search_index() { search_index() {
local -r index_version=2 local -r index_version=3
local -r index_version_file=${DATA_DIR}/.index_version local -r index_version_file=${DATA_DIR}/.index_version
if [[ (! -f "${index_version_file}") || $(<"${index_version_file}") != "$index_version" ]]; then if [[ (! -f "${index_version_file}") || $(<"${index_version_file}") != "$index_version" ]]; then

View File

@ -121,7 +121,17 @@ Executed after the consumer sees a new document in the consumption
folder, but before any processing of the document is performed. This folder, but before any processing of the document is performed. This
script can access the following relevant environment variables set: script can access the following relevant environment variables set:
- `DOCUMENT_SOURCE_PATH` | Environment Variable | Description |
| ----------------------- | ------------------------------------------------------------ |
| `DOCUMENT_SOURCE_PATH` | Original path of the consumed document |
| `DOCUMENT_WORKING_PATH` | Path to a copy of the original that consumption will work on |
!!! note
Pre-consume scripts which modify the document should only change
the `DOCUMENT_WORKING_PATH` file or a second consume task may
be triggered, leading to failures as two tasks work on the
same document path
A simple but common example for this would be creating a simple script A simple but common example for this would be creating a simple script
like this: like this:
@ -130,7 +140,7 @@ like this:
```bash ```bash
#!/usr/bin/env bash #!/usr/bin/env bash
pdf2pdfocr.py -i ${DOCUMENT_SOURCE_PATH} pdf2pdfocr.py -i ${DOCUMENT_WORKING_PATH}
``` ```
`/etc/paperless.conf` `/etc/paperless.conf`
@ -157,27 +167,37 @@ Executed after the consumer has successfully processed a document and
has moved it into paperless. It receives the following environment has moved it into paperless. It receives the following environment
variables: variables:
- `DOCUMENT_ID` | Environment Variable | Description |
- `DOCUMENT_FILE_NAME` | ---------------------------- | --------------------------------------------- |
- `DOCUMENT_CREATED` | `DOCUMENT_ID` | Database primary key of the document |
- `DOCUMENT_MODIFIED` | `DOCUMENT_FILE_NAME` | Formatted filename, not including paths |
- `DOCUMENT_ADDED` | `DOCUMENT_CREATED` | Date & time when document created |
- `DOCUMENT_SOURCE_PATH` | `DOCUMENT_MODIFIED` | Date & time when document was last modified |
- `DOCUMENT_ARCHIVE_PATH` | `DOCUMENT_ADDED` | Date & time when document was added |
- `DOCUMENT_THUMBNAIL_PATH` | `DOCUMENT_SOURCE_PATH` | Path to the original document file |
- `DOCUMENT_DOWNLOAD_URL` | `DOCUMENT_ARCHIVE_PATH` | Path to the generate archive file (if any) |
- `DOCUMENT_THUMBNAIL_URL` | `DOCUMENT_THUMBNAIL_PATH` | Path to the generated thumbnail |
- `DOCUMENT_CORRESPONDENT` | `DOCUMENT_DOWNLOAD_URL` | URL for document download |
- `DOCUMENT_TAGS` | `DOCUMENT_THUMBNAIL_URL` | URL for the document thumbnail |
- `DOCUMENT_ORIGINAL_FILENAME` | `DOCUMENT_CORRESPONDENT` | Assigned correspondent (if any) |
| `DOCUMENT_TAGS` | Comma separated list of tags applied (if any) |
| `DOCUMENT_ORIGINAL_FILENAME` | Filename of original document |
The script can be in any language, but for a simple shell script The script can be in any language, A simple shell script example:
example, you can take a look at
[post-consumption-example.sh](https://github.com/paperless-ngx/paperless-ngx/blob/main/scripts/post-consumption-example.sh) ```bash title="post-consumption-example"
in this project. --8<-- "./scripts/post-consumption-example.sh"
```
!!! note
The post consumption script cannot cancel the consumption process. The post consumption script cannot cancel the consumption process.
!!! warning
The post consumption script should not modify the document files
directly
The script's stdout and stderr will be logged line by line to the The script's stdout and stderr will be logged line by line to the
webserver log, along with the exit code of the script. webserver log, along with the exit code of the script.

View File

@ -141,7 +141,8 @@ directory.
files created using "collectstatic" manager command are stored. files created using "collectstatic" manager command are stored.
Unless you're doing something fancy, there is no need to override Unless you're doing something fancy, there is no need to override
this. this. If this is changed, you may need to run
`collectstatic` again.
Defaults to "../static/", relative to the "src" directory. Defaults to "../static/", relative to the "src" directory.

View File

@ -1,9 +1,9 @@
# Development # Development
This section describes the steps you need to take to start development This section describes the steps you need to take to start development
on paperless-ngx. on Paperless-ngx.
Check out the source from github. The repository is organized in the Check out the source from GitHub. The repository is organized in the
following way: following way:
- `main` always represents the latest release and will only see - `main` always represents the latest release and will only see
@ -12,7 +12,7 @@ following way:
- `feature-X` contain bigger changes that will be in some release, but - `feature-X` contain bigger changes that will be in some release, but
not necessarily the next one. not necessarily the next one.
When making functional changes to paperless, _always_ make your changes When making functional changes to Paperless-ngx, _always_ make your changes
on the `dev` branch. on the `dev` branch.
Apart from that, the folder structure is as follows: Apart from that, the folder structure is as follows:
@ -24,9 +24,9 @@ Apart from that, the folder structure is as follows:
development. development.
- `docker/` - Files required to build the docker image. - `docker/` - Files required to build the docker image.
## Contributing to Paperless ## Contributing to Paperless-ngx
Maybe you've been using Paperless for a while and want to add a feature Maybe you've been using Paperless-ngx for a while and want to add a feature
or two, or maybe you've come across a bug that you have some ideas how or two, or maybe you've come across a bug that you have some ideas how
to solve. The beauty of open source software is that you can see what's to solve. The beauty of open source software is that you can see what's
wrong and help to get it fixed for everyone! wrong and help to get it fixed for everyone!
@ -36,13 +36,13 @@ conduct](https://github.com/paperless-ngx/paperless-ngx/blob/main/CODE_OF_CONDUC
and other important information in the [contributing and other important information in the [contributing
guidelines](https://github.com/paperless-ngx/paperless-ngx/blob/main/CONTRIBUTING.md). guidelines](https://github.com/paperless-ngx/paperless-ngx/blob/main/CONTRIBUTING.md).
## Code formatting with pre-commit Hooks ## Code formatting with pre-commit hooks
To ensure a consistent style and formatting across the project source, To ensure a consistent style and formatting across the project source,
the project utilizes a Git [`pre-commit`](https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks) the project utilizes Git [`pre-commit`](https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks)
hook to perform some formatting and linting before a commit is allowed. hooks to perform some formatting and linting before a commit is allowed.
That way, everyone uses the same style and some common issues can be caught That way, everyone uses the same style and some common issues can be caught
early on. See below for installation instructions. early on.
Once installed, hooks will run when you commit. If the formatting isn't Once installed, hooks will run when you commit. If the formatting isn't
quite right or a linter catches something, the commit will be rejected. quite right or a linter catches something, the commit will be rejected.
@ -51,129 +51,110 @@ as the Python formatting tool `black`, will format failing
files, so all you need to do is `git add` those files again files, so all you need to do is `git add` those files again
and retry your commit. and retry your commit.
## Initial setup and first start ## General setup
After you forked and cloned the code from github you need to perform a After you forked and cloned the code from GitHub you need to perform a
first-time setup. To do the setup you need to perform the steps from the first-time setup.
following chapters in a certain order:
!!! note
Every command is executed directly from the root folder of the project unless specified otherwise.
1. Install prerequisites + pipenv as mentioned in 1. Install prerequisites + pipenv as mentioned in
[Bare metal route](/setup#bare_metal) [Bare metal route](/setup#bare_metal).
2. Copy `paperless.conf.example` to `paperless.conf` and enable debug 2. Copy `paperless.conf.example` to `paperless.conf` and enable debug
mode. mode within the file via `PAPERLESS_DEBUG=true`.
3. Install the Angular CLI interface: 3. Create `consume` and `media` directories:
```shell-session ```bash
$ npm install -g @angular/cli $ mkdir -p consume media
``` ```
4. Install pre-commit hooks 4. Install the Python dependencies:
```shell-session ```bash
pre-commit install $ pipenv install --dev
``` ```
5. Create `consume` and `media` folders in the cloned root folder. !!! note
```shell-session Using a virtual environment is highly recommended. You can spawn one via `pipenv shell`.
mkdir -p consume media Make sure you're using Python 3.10.x or lower. Otherwise you might
get issues with building dependencies. You can use
[pyenv](https://github.com/pyenv/pyenv) to install a specific
Python version.
5. Install pre-commit hooks:
```bash
$ pre-commit install
``` ```
6. You can now either ... 6. Apply migrations and create a superuser for your development instance:
```bash
# src/
$ python3 manage.py migrate
$ python3 manage.py createsuperuser
```
7. You can now either ...
- install redis or - install redis or
- use the included scripts/start-services.sh to use docker to fire - use the included `scripts/start_services.sh` to use docker to fire
up a redis instance (and some other services such as tika, up a redis instance (and some other services such as tika,
gotenberg and a database server) or gotenberg and a database server) or
- spin up a bare redis container - spin up a bare redis container
```shell-session ```
docker run -d -p 6379:6379 --restart unless-stopped redis:latest $ docker run -d -p 6379:6379 --restart unless-stopped redis:latest
``` ```
7. Install the python dependencies by performing in the src/ directory. 8. Continue with either back-end or front-end development or both :-).
```shell-session
pipenv install --dev
```
!!! note
Make sure you're using python 3.10.x or lower. Otherwise you might
get issues with building dependencies. You can use
[pyenv](https://github.com/pyenv/pyenv) to install a specific
python version.
8. Generate the static UI so you can perform a login to get session
that is required for frontend development (this needs to be done one
time only). From src-ui directory:
```shell-session
npm install .
./node_modules/.bin/ng build --configuration production
```
9. Apply migrations and create a superuser for your dev instance:
```shell-session
python3 manage.py migrate
python3 manage.py createsuperuser
```
10. Now spin up the dev backend. Depending on which part of paperless
you're developing for, you need to have some or all of them
running.
```shell-session
python3 manage.py runserver & python3 manage.py document_consumer & celery --app paperless worker
```
11. Login with the superuser credentials provided in step 8 at
`http://localhost:8000` to create a session that enables you to use
the backend.
Backend development environment is now ready, to start Frontend
development go to `/src-ui` and run `ng serve`. From there you can use
`http://localhost:4200` for a preview.
## Back end development ## Back end development
The backend is a [Django](https://www.djangoproject.com/) application. PyCharm works well for development, The back end is a [Django](https://www.djangoproject.com/) application. [PyCharm](https://www.jetbrains.com/de-de/pycharm/) as well as [Visual Studio Code](https://code.visualstudio.com) work well for development, but you can use whatever you want.
but you can use whatever you want.
Configure the IDE to use the src/ folder as the base source folder. Configure the IDE to use the `src/`-folder as the base source folder.
Configure the following launch configurations in your IDE: Configure the following launch configurations in your IDE:
- `python3 manage.py runserver` - `python3 manage.py runserver`
- `celery --app paperless worker`
- `python3 manage.py document_consumer` - `python3 manage.py document_consumer`
- `celery --app paperless worker -l DEBUG` (or any other log level)
To start them all: To start them all:
```shell-session ```bash
python3 manage.py runserver & python3 manage.py document_consumer & celery --app paperless worker # src/
$ python3 manage.py runserver & \
python3 manage.py document_consumer & \
celery --app paperless worker -l DEBUG
``` ```
Testing and code style: You might need the front end to test your back end code. This assumes that you have AngularJS installed on your system. Go to the [Front end development](#front-end-development) section for further details. To build the front end once use this commmand:
```bash
# src-ui/
$ npm install
$ ng build --configuration production
```
### Testing
- Run `pytest` in the `src/` directory to execute all tests. This also - Run `pytest` in the `src/` directory to execute all tests. This also
generates a HTML coverage report. When runnings test, paperless.conf generates a HTML coverage report. When runnings test, `paperless.conf`
is loaded as well. However: the tests rely on the default is loaded as well. However, the tests rely on the default
configuration. This is not ideal. But for now, make sure no settings configuration. This is not ideal. But for now, make sure no settings
except for DEBUG are overridden when testing. except for DEBUG are overridden when testing.
- Coding style is enforced by the Git pre-commit hooks. These will
ensure your code is formatted and do some linting when you do a `git commit`.
- You can also run `black` manually to format your code
- The `pre-commit` hooks will modify files and interact with each other.
It may take a couple of `git add`, `git commit` cycle to satisfy them.
!!! note !!! note
The line length rule E501 is generally useful for getting multiple The line length rule E501 is generally useful for getting multiple
@ -184,23 +165,31 @@ Testing and code style:
## Front end development ## Front end development
The front end is built using Angular. In order to get started, you need The front end is built using AngularJS. In order to get started, you need Node.js (version 14.15+) and
`npm`. Install the Angular CLI interface with `npm`.
```shell-session !!! note
The following commands are all performed in the `src-ui`-directory. You will need a running back end (including an active session) to connect to the back end API. To spin it up refer to the commands under the section [above](#back-end-development).
1. Install the Angular CLI. You might need sudo privileges
to perform this command:
```bash
$ npm install -g @angular/cli $ npm install -g @angular/cli
``` ```
and make sure that it's on your path. Next, in the src-ui/ directory, 2. Make sure that it's on your path.
install the required dependencies of the project.
```shell-session 3. Install all neccessary modules:
```bash
$ npm install $ npm install
``` ```
You can launch a development server by running 4. You can launch a development server by running:
```shell-session ```bash
$ ng serve $ ng serve
``` ```
@ -208,24 +197,17 @@ This will automatically update whenever you save. However, in-place
compilation might fail on syntax errors, in which case you need to compilation might fail on syntax errors, in which case you need to
restart it. restart it.
By default, the development server is available on By default, the development server is available on `http://localhost:4200/` and is configured to access the API at
`http://localhost:4200/` and is configured to access the API at `http://localhost:8000/api/`, which is the default of the backend. If you enabled `DEBUG` on the back end, several security overrides for allowed hosts, CORS and X-Frame-Options are in place so that the front end behaves exactly as in production.
`http://localhost:8000/api/`, which is the default of the backend. If
you enabled DEBUG on the back end, several security overrides for
allowed hosts, CORS and X-Frame-Options are in place so that the front
end behaves exactly as in production. This also relies on you being
logged into the back end. Without a valid session, The front end will
simply not work.
Testing and code style: ### Testing and code style
- The front end code (.ts, .html, .scss) use `prettier` for code - The front end code (.ts, .html, .scss) use `prettier` for code
formatting via the Git `pre-commit` hooks which run automatically on formatting via the Git `pre-commit` hooks which run automatically on
commit. See commit. See [above](#code-formatting-with-pre-commit-hooks) for installation instructions. You can also run this via the CLI with a
[above](#code-formatting-with-pre-commit-hooks) for installation. You can also run this via cli with a
command such as command such as
```shell-session ```bash
$ git ls-files -- '*.ts' | xargs pre-commit run prettier --files $ git ls-files -- '*.ts' | xargs pre-commit run prettier --files
``` ```
@ -233,21 +215,20 @@ Testing and code style:
for significantly more front end tests. Unit tests and e2e tests, for significantly more front end tests. Unit tests and e2e tests,
respectively, can be run non-interactively with: respectively, can be run non-interactively with:
```shell-session ```bash
$ ng test $ ng test
$ npm run e2e:ci $ npm run e2e:ci
``` ```
Cypress also includes a UI which can be run from within the `src-ui` - Cypress also includes a UI which can be run with:
directory with
```shell-session ```bash
$ ./node_modules/.bin/cypress open $ ./node_modules/.bin/cypress open
``` ```
In order to build the front end and serve it as part of django, execute - In order to build the front end and serve it as part of Django, execute:
```shell-session ```bash
$ ng build --configuration production $ ng build --configuration production
``` ```
@ -257,25 +238,25 @@ that authentication is working.
## Localization ## Localization
Paperless is available in many different languages. Since paperless Paperless-ngx is available in many different languages. Since Paperless-ngx
consists both of a django application and an Angular front end, both consists both of a Django application and an AngularJS front end, both
these parts have to be translated separately. these parts have to be translated separately.
### Front end localization ### Front end localization
- The Angular front end does localization according to the [Angular - The AngularJS front end does localization according to the [Angular
documentation](https://angular.io/guide/i18n). documentation](https://angular.io/guide/i18n).
- The source language of the project is "en_US". - The source language of the project is "en_US".
- The source strings end up in the file "src-ui/messages.xlf". - The source strings end up in the file `src-ui/messages.xlf`.
- The translated strings need to be placed in the - The translated strings need to be placed in the
"src-ui/src/locale/" folder. `src-ui/src/locale/` folder.
- In order to extract added or changed strings from the source files, - In order to extract added or changed strings from the source files,
call `ng xi18n --ivy`. call `ng xi18n --ivy`.
Adding new languages requires adding the translated files in the Adding new languages requires adding the translated files in the
"src-ui/src/locale/" folder and adjusting a couple files. `src-ui/src/locale/` folder and adjusting a couple files.
1. Adjust "src-ui/angular.json": 1. Adjust `src-ui/angular.json`:
```json ```json
"i18n": { "i18n": {
@ -292,7 +273,7 @@ Adding new languages requires adding the translated files in the
``` ```
2. Add the language to the available options in 2. Add the language to the available options in
"src-ui/src/app/services/settings.service.ts": `src-ui/src/app/services/settings.service.ts`:
```typescript ```typescript
getLanguageOptions(): LanguageOption[] { getLanguageOptions(): LanguageOption[] {
@ -313,7 +294,7 @@ Adding new languages requires adding the translated files in the
and "yyyy". and "yyyy".
3. Import and register the Angular data for this locale in 3. Import and register the Angular data for this locale in
"src-ui/src/app/app.module.ts": `src-ui/src/app/app.module.ts`:
```typescript ```typescript
import localeDe from '@angular/common/locales/de' import localeDe from '@angular/common/locales/de'
@ -326,10 +307,10 @@ A majority of the strings that appear in the back end appear only when
the admin is used. However, some of these are still shown on the front the admin is used. However, some of these are still shown on the front
end (such as error messages). end (such as error messages).
- The django application does localization according to the [django - The django application does localization according to the [Django
documentation](https://docs.djangoproject.com/en/3.1/topics/i18n/translation/). documentation](https://docs.djangoproject.com/en/3.1/topics/i18n/translation/).
- The source language of the project is "en_US". - The source language of the project is "en_US".
- Localization files end up in the folder "src/locale/". - Localization files end up in the folder `src/locale/`.
- In order to extract strings from the application, call - In order to extract strings from the application, call
`python3 manage.py makemessages -l en_US`. This is important after `python3 manage.py makemessages -l en_US`. This is important after
making changes to translatable strings. making changes to translatable strings.
@ -340,8 +321,8 @@ end (such as error messages).
command. command.
Adding new languages requires adding the translated files in the Adding new languages requires adding the translated files in the
"src/locale/" folder and adjusting the file `src/locale/`-folder and adjusting the file
"src/paperless/settings.py" to include the new language: `src/paperless/settings.py` to include the new language:
```python ```python
LANGUAGES = [ LANGUAGES = [
@ -360,18 +341,27 @@ LANGUAGES = [
The documentation is built using material-mkdocs, see their [documentation](https://squidfunk.github.io/mkdocs-material/reference/). The documentation is built using material-mkdocs, see their [documentation](https://squidfunk.github.io/mkdocs-material/reference/).
If you want to build the documentation locally, this is how you do it: If you want to build the documentation locally, this is how you do it:
1. Install python dependencies. 1. Have an active pipenv shell (`pipenv shell`) and install Python dependencies:
```shell-session ```bash
$ cd /path/to/paperless
$ pipenv install --dev $ pipenv install --dev
``` ```
2. Build the documentation 2. Build the documentation
```shell-session ```bash
$ cd /path/to/paperless $ mkdocs build --config-file mkdocs.yml
$ pipenv mkdocs build --config-file mkdocs.yml ```
_alternatively..._
3. Serve the documentation. This will spin up a
copy of the documentation at http://127.0.0.1:8000
that will automatically refresh everytime you change
something.
```bash
$ mkdocs serve
``` ```
## Building the Docker image ## Building the Docker image
@ -384,35 +374,35 @@ helper script `build-docker-image.sh`.
Building the docker image from source: Building the docker image from source:
```shell-session ```bash
./build-docker-image.sh Dockerfile -t <your-tag> ./build-docker-image.sh Dockerfile -t <your-tag>
``` ```
## Extending Paperless ## Extending Paperless-ngx
Paperless does not have any fancy plugin systems and will probably never Paperless-ngx does not have any fancy plugin systems and will probably never
have. However, some parts of the application have been designed to allow have. However, some parts of the application have been designed to allow
easy integration of additional features without any modification to the easy integration of additional features without any modification to the
base code. base code.
### Making custom parsers ### Making custom parsers
Paperless uses parsers to add documents to paperless. A parser is Paperless-ngx uses parsers to add documents. A parser is
responsible for: responsible for:
- Retrieve the content from the original - Retrieving the content from the original
- Create a thumbnail - Creating a thumbnail
- Optional: Retrieve a created date from the original - _optional:_ Retrieving a created date from the original
- Optional: Create an archived document from the original - _optional:_ Creainge an archived document from the original
Custom parsers can be added to paperless to support more file types. In Custom parsers can be added to Paperless-ngx to support more file types. In
order to do that, you need to write the parser itself and announce its order to do that, you need to write the parser itself and announce its
existence to paperless. existence to Paperless-ngx.
The parser itself must extend `documents.parsers.DocumentParser` and The parser itself must extend `documents.parsers.DocumentParser` and
must implement the methods `parse` and `get_thumbnail`. You can provide must implement the methods `parse` and `get_thumbnail`. You can provide
your own implementation to `get_date` if you don't want to rely on your own implementation to `get_date` if you don't want to rely on
paperless' default date guessing mechanisms. Paperless-ngx' default date guessing mechanisms.
```python ```python
class MyCustomParser(DocumentParser): class MyCustomParser(DocumentParser):
@ -444,7 +434,7 @@ to be empty and removed after consumption finished. You can use that
directory to store any intermediate files and also use it to store the directory to store any intermediate files and also use it to store the
thumbnail / archived document. thumbnail / archived document.
After that, you need to announce your parser to paperless. You need to After that, you need to announce your parser to Paperless-ngx. You need to
connect a handler to the `document_consumer_declaration` signal. Have a connect a handler to the `document_consumer_declaration` signal. Have a
look in the file `src/paperless_tesseract/apps.py` on how that's done. look in the file `src/paperless_tesseract/apps.py` on how that's done.
The handler is a method that returns information about your parser: The handler is a method that returns information about your parser:
@ -464,11 +454,11 @@ def myparser_consumer_declaration(sender, **kwargs):
- `parser` is a reference to a class that extends `DocumentParser`. - `parser` is a reference to a class that extends `DocumentParser`.
- `weight` is used whenever two or more parsers are able to parse a - `weight` is used whenever two or more parsers are able to parse a
file: The parser with the higher weight wins. This can be used to file: The parser with the higher weight wins. This can be used to
override the parsers provided by paperless. override the parsers provided by Paperless-ngx.
- `mime_types` is a dictionary. The keys are the mime types your - `mime_types` is a dictionary. The keys are the mime types your
parser supports and the value is the default file extension that parser supports and the value is the default file extension that
paperless should use when storing files and serving them for Paperless-ngx should use when storing files and serving them for
download. We could guess that from the file extensions, but some download. We could guess that from the file extensions, but some
mime types have many extensions associated with them and the python mime types have many extensions associated with them and the Python
methods responsible for guessing the extension do not always return methods responsible for guessing the extension do not always return
the same value. the same value.

View File

@ -388,12 +388,7 @@ supported.
``` ```
8. Install python requirements from the `requirements.txt` file. It is 8. Install python requirements from the `requirements.txt` file. It is
up to you if you wish to use a virtual environment or not. First you up to you if you wish to use a virtual environment or not. First you should update your pip, so it gets the actual packages.
should update your pip, so it gets the actual packages.
```shell-session
sudo -Hu paperless pip3 install --upgrade pip
```
```shell-session ```shell-session
sudo -Hu paperless pip3 install -r requirements.txt sudo -Hu paperless pip3 install -r requirements.txt

View File

@ -41,6 +41,7 @@ markdown_extensions:
anchor_linenums: true anchor_linenums: true
- pymdownx.superfences - pymdownx.superfences
- pymdownx.inlinehilite - pymdownx.inlinehilite
- pymdownx.snippets
strict: true strict: true
nav: nav:
- index.md - index.md
@ -54,7 +55,7 @@ nav:
- 'FAQs': faq.md - 'FAQs': faq.md
- troubleshooting.md - troubleshooting.md
- changelog.md - changelog.md
copyright: Copyright &copy; 2016 - 2022 Daniel Quinn, Jonas Winkler, and the Paperless-ngx team copyright: Copyright &copy; 2016 - 2023 Daniel Quinn, Jonas Winkler, and the Paperless-ngx team
extra: extra:
social: social:
- icon: fontawesome/brands/github - icon: fontawesome/brands/github

View File

@ -192,7 +192,8 @@
"cli": { "cli": {
"schematicCollections": [ "schematicCollections": [
"@angular-eslint/schematics" "@angular-eslint/schematics"
] ],
"analytics": false
}, },
"schematics": { "schematics": {
"@angular-eslint/schematics:application": { "@angular-eslint/schematics:application": {

14779
src-ui/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -13,14 +13,14 @@
}, },
"private": true, "private": true,
"dependencies": { "dependencies": {
"@angular/common": "~15.1.0", "@angular/common": "~15.1.2",
"@angular/compiler": "~15.1.0", "@angular/compiler": "~15.1.2",
"@angular/core": "~15.1.0", "@angular/core": "~15.1.2",
"@angular/forms": "~15.1.0", "@angular/forms": "~15.1.2",
"@angular/localize": "~15.1.0", "@angular/localize": "~15.1.2",
"@angular/platform-browser": "~15.1.0", "@angular/platform-browser": "~15.1.2",
"@angular/platform-browser-dynamic": "~15.1.0", "@angular/platform-browser-dynamic": "~15.1.2",
"@angular/router": "~15.1.0", "@angular/router": "~15.1.2",
"@ng-bootstrap/ng-bootstrap": "^14.0.1", "@ng-bootstrap/ng-bootstrap": "^14.0.1",
"@ng-select/ng-select": "^10.0.1", "@ng-select/ng-select": "^10.0.1",
"@ngneat/dirty-check-forms": "^3.0.3", "@ngneat/dirty-check-forms": "^3.0.3",
@ -39,18 +39,18 @@
}, },
"devDependencies": { "devDependencies": {
"@angular-builders/jest": "15.0.0", "@angular-builders/jest": "15.0.0",
"@angular-devkit/build-angular": "~15.1.0", "@angular-devkit/build-angular": "~15.1.4",
"@angular-eslint/builder": "15.1.0", "@angular-eslint/builder": "15.2.0",
"@angular-eslint/eslint-plugin": "15.1.0", "@angular-eslint/eslint-plugin": "15.2.0",
"@angular-eslint/eslint-plugin-template": "15.1.0", "@angular-eslint/eslint-plugin-template": "15.2.0",
"@angular-eslint/schematics": "15.1.0", "@angular-eslint/schematics": "15.2.0",
"@angular-eslint/template-parser": "15.1.0", "@angular-eslint/template-parser": "15.2.0",
"@angular/cli": "~15.1.0", "@angular/cli": "~15.1.4",
"@angular/compiler-cli": "~15.1.0", "@angular/compiler-cli": "~15.1.2",
"@types/jest": "28.1.6", "@types/jest": "28.1.6",
"@types/node": "^18.7.23", "@types/node": "^18.7.23",
"@typescript-eslint/eslint-plugin": "^5.43.0", "@typescript-eslint/eslint-plugin": "^5.43.0",
"@typescript-eslint/parser": "^5.43.0", "@typescript-eslint/parser": "^5.50.0",
"concurrently": "7.4.0", "concurrently": "7.4.0",
"eslint": "^8.31.0", "eslint": "^8.31.0",
"jest": "28.1.3", "jest": "28.1.3",

View File

@ -229,6 +229,10 @@ export class DocumentDetailComponent
) )
.subscribe({ .subscribe({
next: (titleValue) => { next: (titleValue) => {
// In the rare case when the field changed just after debounced event was fired.
// We dont want to overwrite whats actually in the text field, so just return
if (titleValue !== this.titleInput.value) return
this.title = titleValue this.title = titleValue
this.documentForm.patchValue({ title: titleValue }) this.documentForm.patchValue({ title: titleValue })
}, },

View File

@ -5,7 +5,7 @@ export const environment = {
apiBaseUrl: document.baseURI + 'api/', apiBaseUrl: document.baseURI + 'api/',
apiVersion: '2', apiVersion: '2',
appTitle: 'Paperless-ngx', appTitle: 'Paperless-ngx',
version: '1.12.1-dev', version: '1.12.2-dev',
webSocketHost: window.location.host, webSocketHost: window.location.host,
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:', webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
webSocketBaseUrl: base_url.pathname + 'ws/', webSocketBaseUrl: base_url.pathname + 'ws/',

View File

@ -4,18 +4,17 @@ import shutil
import tempfile import tempfile
from dataclasses import dataclass from dataclasses import dataclass
from functools import lru_cache from functools import lru_cache
from math import ceil
from pathlib import Path from pathlib import Path
from typing import Dict
from typing import List from typing import List
from typing import Optional from typing import Optional
import magic import magic
from django.conf import settings from django.conf import settings
from pdf2image import convert_from_path from pdf2image import convert_from_path
from pdf2image.exceptions import PDFPageCountError
from pikepdf import Page from pikepdf import Page
from pikepdf import PasswordError
from pikepdf import Pdf from pikepdf import Pdf
from pikepdf import PdfImage
from PIL import Image from PIL import Image
from PIL import ImageSequence from PIL import ImageSequence
from pyzbar import pyzbar from pyzbar import pyzbar
@ -154,52 +153,15 @@ def scan_file_for_barcodes(
(page_number, barcode_text) tuples (page_number, barcode_text) tuples
""" """
def _pikepdf_barcode_scan(pdf_filepath: str) -> List[Barcode]:
detected_barcodes = []
with Pdf.open(pdf_filepath) as pdf:
for page_num, page in enumerate(pdf.pages):
for image_key in page.images:
pdfimage = PdfImage(page.images[image_key])
# This type is known to have issues:
# https://github.com/pikepdf/pikepdf/issues/401
if "/CCITTFaxDecode" in pdfimage.filters:
raise BarcodeImageFormatError(
"Unable to decode CCITTFaxDecode images",
)
# Not all images can be transcoded to a PIL image, which
# is what pyzbar expects to receive, so this may
# raise an exception, triggering fallback
pillow_img = pdfimage.as_pil_image()
# Scale the image down
# See: https://github.com/paperless-ngx/paperless-ngx/issues/2385
# TLDR: zbar has issues with larger images
width, height = pillow_img.size
if width > 1024:
scaler = ceil(width / 1024)
new_width = int(width / scaler)
new_height = int(height / scaler)
pillow_img = pillow_img.resize((new_width, new_height))
width, height = pillow_img.size
if height > 2048:
scaler = ceil(height / 2048)
new_width = int(width / scaler)
new_height = int(height / scaler)
pillow_img = pillow_img.resize((new_width, new_height))
for barcode_value in barcode_reader(pillow_img):
detected_barcodes.append(Barcode(page_num, barcode_value))
return detected_barcodes
def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]: def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]:
detected_barcodes = [] detected_barcodes = []
# use a temporary directory in case the file is too big to handle in memory # use a temporary directory in case the file is too big to handle in memory
with tempfile.TemporaryDirectory() as path: with tempfile.TemporaryDirectory() as path:
pages_from_path = convert_from_path(pdf_filepath, output_folder=path) pages_from_path = convert_from_path(
pdf_filepath,
dpi=300,
output_folder=path,
)
for current_page_number, page in enumerate(pages_from_path): for current_page_number, page in enumerate(pages_from_path):
for barcode_value in barcode_reader(page): for barcode_value in barcode_reader(page):
detected_barcodes.append( detected_barcodes.append(
@ -219,27 +181,19 @@ def scan_file_for_barcodes(
# Always try pikepdf first, it's usually fine, faster and # Always try pikepdf first, it's usually fine, faster and
# uses less memory # uses less memory
try: try:
barcodes = _pikepdf_barcode_scan(pdf_filepath) barcodes = _pdf2image_barcode_scan(pdf_filepath)
# Password protected files can't be checked # Password protected files can't be checked
except PasswordError as e: # This is the exception raised for those
except PDFPageCountError as e:
logger.warning( logger.warning(
f"File is likely password protected, not checking for barcodes: {e}", f"File is likely password protected, not checking for barcodes: {e}",
) )
# Handle pikepdf related image decoding issues with a fallback to page
# by page conversion to images in a temporary directory
except Exception as e:
logger.warning(
f"Falling back to pdf2image because: {e}",
)
try:
barcodes = _pdf2image_barcode_scan(pdf_filepath)
# This file is really borked, allow the consumption to continue # This file is really borked, allow the consumption to continue
# but it may fail further on # but it may fail further on
except Exception as e: # pragma: no cover except Exception as e: # pragma: no cover
logger.warning( logger.warning(
f"Exception during barcode scanning: {e}", f"Exception during barcode scanning: {e}",
) )
else: else:
logger.warning( logger.warning(
f"Unsupported file format for barcode reader: {str(mime_type)}", f"Unsupported file format for barcode reader: {str(mime_type)}",
@ -248,16 +202,25 @@ def scan_file_for_barcodes(
return DocumentBarcodeInfo(pdf_filepath, barcodes) return DocumentBarcodeInfo(pdf_filepath, barcodes)
def get_separating_barcodes(barcodes: List[Barcode]) -> List[int]: def get_separating_barcodes(barcodes: List[Barcode]) -> Dict[int, bool]:
""" """
Search the parsed barcodes for separators Search the parsed barcodes for separators
and returns a list of page numbers, which and returns a dict of page numbers, which
separate the file into new files. separate the file into new files, together
with the information whether to keep the page.
""" """
# filter all barcodes for the separator string # filter all barcodes for the separator string
# get the page numbers of the separating barcodes # get the page numbers of the separating barcodes
separator_pages = {bc.page: False for bc in barcodes if bc.is_separator}
if not settings.CONSUMER_ENABLE_ASN_BARCODE:
return separator_pages
return list({bc.page for bc in barcodes if bc.is_separator}) # add the page numbers of the ASN barcodes
# (except for first page, that might lead to infinite loops).
return {
**separator_pages,
**{bc.page: True for bc in barcodes if bc.is_asn and bc.page != 0},
}
def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]: def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
@ -289,10 +252,11 @@ def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
return asn return asn
def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]: def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[str]:
""" """
Separate the provided pdf file on the pages_to_split_on. Separate the provided pdf file on the pages_to_split_on.
The pages which are defined by page_numbers will be removed. The pages which are defined by the keys in page_numbers
will be removed if the corresponding value is false.
Returns a list of (temporary) filepaths to consume. Returns a list of (temporary) filepaths to consume.
These will need to be deleted later. These will need to be deleted later.
""" """
@ -308,26 +272,28 @@ def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:
fname = os.path.splitext(os.path.basename(filepath))[0] fname = os.path.splitext(os.path.basename(filepath))[0]
pdf = Pdf.open(filepath) pdf = Pdf.open(filepath)
# Start with an empty document
current_document: List[Page] = []
# A list of documents, ie a list of lists of pages # A list of documents, ie a list of lists of pages
documents: List[List[Page]] = [] documents: List[List[Page]] = [current_document]
# A single document, ie a list of pages
document: List[Page] = []
for idx, page in enumerate(pdf.pages): for idx, page in enumerate(pdf.pages):
# Keep building the new PDF as long as it is not a # Keep building the new PDF as long as it is not a
# separator index # separator index
if idx not in pages_to_split_on: if idx not in pages_to_split_on:
document.append(page) current_document.append(page)
# Make sure to append the very last document to the documents continue
if idx == (len(pdf.pages) - 1):
documents.append(document) # This is a split index
document = [] # Start a new destination page listing
else:
# This is a split index, save the current PDF pages, and restart
# a new destination page listing
logger.debug(f"Starting new document at idx {idx}") logger.debug(f"Starting new document at idx {idx}")
documents.append(document) current_document = []
document = [] documents.append(current_document)
keep_page = pages_to_split_on[idx]
if keep_page:
# Keep the page
# (new document is started by asn barcode)
current_document.append(page)
documents = [x for x in documents if len(x)] documents = [x for x in documents if len(x)]

View File

@ -1,7 +1,10 @@
import datetime import datetime
import hashlib import hashlib
import os import os
import shutil
import tempfile
import uuid import uuid
from pathlib import Path
from subprocess import CompletedProcess from subprocess import CompletedProcess
from subprocess import run from subprocess import run
from typing import Optional from typing import Optional
@ -95,7 +98,8 @@ class Consumer(LoggingMixin):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.path = None self.path: Optional[Path] = None
self.original_path: Optional[Path] = None
self.filename = None self.filename = None
self.override_title = None self.override_title = None
self.override_correspondent_id = None self.override_correspondent_id = None
@ -144,11 +148,16 @@ class Consumer(LoggingMixin):
return return
# Validate the range is above zero and less than uint32_t max # Validate the range is above zero and less than uint32_t max
# otherwise, Whoosh can't handle it in the index # otherwise, Whoosh can't handle it in the index
if self.override_asn < 0 or self.override_asn > 0xFF_FF_FF_FF: if (
self.override_asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
or self.override_asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
):
self._fail( self._fail(
MESSAGE_ASN_RANGE, MESSAGE_ASN_RANGE,
f"Not consuming {self.filename}: " f"Not consuming {self.filename}: "
f"Given ASN {self.override_asn} is out of range [0, 4,294,967,295]", f"Given ASN {self.override_asn} is out of range "
f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}]",
) )
if Document.objects.filter(archive_serial_number=self.override_asn).exists(): if Document.objects.filter(archive_serial_number=self.override_asn).exists():
self._fail( self._fail(
@ -169,16 +178,18 @@ class Consumer(LoggingMixin):
self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}") self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
filepath_arg = os.path.normpath(self.path) working_file_path = str(self.path)
original_file_path = str(self.original_path)
script_env = os.environ.copy() script_env = os.environ.copy()
script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg script_env["DOCUMENT_SOURCE_PATH"] = original_file_path
script_env["DOCUMENT_WORKING_PATH"] = working_file_path
try: try:
completed_proc = run( completed_proc = run(
args=[ args=[
settings.PRE_CONSUME_SCRIPT, settings.PRE_CONSUME_SCRIPT,
filepath_arg, original_file_path,
], ],
env=script_env, env=script_env,
capture_output=True, capture_output=True,
@ -197,7 +208,7 @@ class Consumer(LoggingMixin):
exception=e, exception=e,
) )
def run_post_consume_script(self, document): def run_post_consume_script(self, document: Document):
if not settings.POST_CONSUME_SCRIPT: if not settings.POST_CONSUME_SCRIPT:
return return
@ -288,8 +299,8 @@ class Consumer(LoggingMixin):
Return the document object if it was successfully created. Return the document object if it was successfully created.
""" """
self.path = path self.path = Path(path).resolve()
self.filename = override_filename or os.path.basename(path) self.filename = override_filename or self.path.name
self.override_title = override_title self.override_title = override_title
self.override_correspondent_id = override_correspondent_id self.override_correspondent_id = override_correspondent_id
self.override_document_type_id = override_document_type_id self.override_document_type_id = override_document_type_id
@ -315,6 +326,15 @@ class Consumer(LoggingMixin):
self.log("info", f"Consuming {self.filename}") self.log("info", f"Consuming {self.filename}")
# For the actual work, copy the file into a tempdir
self.original_path = self.path
tempdir = tempfile.TemporaryDirectory(
prefix="paperless-ngx",
dir=settings.SCRATCH_DIR,
)
self.path = Path(tempdir.name) / Path(self.filename)
shutil.copy(self.original_path, self.path)
# Determine the parser class. # Determine the parser class.
mime_type = magic.from_file(self.path, mime=True) mime_type = magic.from_file(self.path, mime=True)
@ -457,11 +477,12 @@ class Consumer(LoggingMixin):
# Delete the file only if it was successfully consumed # Delete the file only if it was successfully consumed
self.log("debug", f"Deleting file {self.path}") self.log("debug", f"Deleting file {self.path}")
os.unlink(self.path) os.unlink(self.path)
self.original_path.unlink()
# https://github.com/jonaswinkler/paperless-ng/discussions/1037 # https://github.com/jonaswinkler/paperless-ng/discussions/1037
shadow_file = os.path.join( shadow_file = os.path.join(
os.path.dirname(self.path), os.path.dirname(self.original_path),
"._" + os.path.basename(self.path), "._" + os.path.basename(self.original_path),
) )
if os.path.isfile(shadow_file): if os.path.isfile(shadow_file):
@ -478,6 +499,7 @@ class Consumer(LoggingMixin):
) )
finally: finally:
document_parser.cleanup() document_parser.cleanup()
tempdir.cleanup()
self.run_post_consume_script(document) self.run_post_consume_script(document)

View File

@ -5,6 +5,7 @@ from contextlib import contextmanager
from dateutil.parser import isoparse from dateutil.parser import isoparse
from django.conf import settings from django.conf import settings
from django.utils import timezone
from documents.models import Comment from documents.models import Comment
from documents.models import Document from documents.models import Document
from guardian.shortcuts import get_users_with_perms from guardian.shortcuts import get_users_with_perms
@ -94,10 +95,22 @@ def open_index_searcher():
searcher.close() searcher.close()
def update_document(writer, doc): def update_document(writer: AsyncWriter, doc: Document):
tags = ",".join([t.name for t in doc.tags.all()]) tags = ",".join([t.name for t in doc.tags.all()])
tags_ids = ",".join([str(t.id) for t in doc.tags.all()]) tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
comments = ",".join([str(c.comment) for c in Comment.objects.filter(document=doc)]) comments = ",".join([str(c.comment) for c in Comment.objects.filter(document=doc)])
asn = doc.archive_serial_number
if asn is not None and (
asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
):
logger.error(
f"Not indexing Archive Serial Number {asn} of document {doc.pk}. "
f"ASN is out of range "
f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}.",
)
asn = 0
users_with_perms = get_users_with_perms( users_with_perms = get_users_with_perms(
doc, doc,
only_with_perms_in=["view_document"], only_with_perms_in=["view_document"],
@ -118,7 +131,7 @@ def update_document(writer, doc):
has_type=doc.document_type is not None, has_type=doc.document_type is not None,
created=doc.created, created=doc.created,
added=doc.added, added=doc.added,
asn=doc.archive_serial_number, asn=asn,
modified=doc.modified, modified=doc.modified,
path=doc.storage_path.name if doc.storage_path else None, path=doc.storage_path.name if doc.storage_path else None,
path_id=doc.storage_path.id if doc.storage_path else None, path_id=doc.storage_path.id if doc.storage_path else None,
@ -283,7 +296,7 @@ class DelayedFullTextQuery(DelayedQuery):
["content", "title", "correspondent", "tag", "type", "comments"], ["content", "title", "correspondent", "tag", "type", "comments"],
self.searcher.ixreader.schema, self.searcher.ixreader.schema,
) )
qp.add_plugin(DateParserPlugin()) qp.add_plugin(DateParserPlugin(basedate=timezone.now()))
q = qp.parse(q_str) q = qp.parse(q_str)
corrected = self.searcher.correct_query(q, q_str) corrected = self.searcher.correct_query(q, q_str)

View File

@ -311,8 +311,8 @@ class Command(BaseCommand):
archive_target = None archive_target = None
# 3.4. write files to target folder # 3.4. write files to target folder
t = int(time.mktime(document.created.timetuple()))
if document.storage_type == Document.STORAGE_TYPE_GPG: if document.storage_type == Document.STORAGE_TYPE_GPG:
t = int(time.mktime(document.created.timetuple()))
original_target.parent.mkdir(parents=True, exist_ok=True) original_target.parent.mkdir(parents=True, exist_ok=True)
with document.source_file as out_file: with document.source_file as out_file:

View File

@ -0,0 +1,23 @@
# Generated by Django 4.1.5 on 2023-02-03 21:53
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("documents", "1029_alter_document_archive_serial_number"),
]
operations = [
migrations.AlterField(
model_name="paperlesstask",
name="task_file_name",
field=models.CharField(
help_text="Name of the file which the Task was run for",
max_length=255,
null=True,
verbose_name="Task Filename",
),
),
]

View File

@ -3,6 +3,7 @@ import logging
import os import os
import re import re
from collections import OrderedDict from collections import OrderedDict
from typing import Final
from typing import Optional from typing import Optional
import dateutil.parser import dateutil.parser
@ -242,6 +243,9 @@ class Document(ModelWithOwner):
help_text=_("The original name of the file when it was uploaded"), help_text=_("The original name of the file when it was uploaded"),
) )
ARCHIVE_SERIAL_NUMBER_MIN: Final[int] = 0
ARCHIVE_SERIAL_NUMBER_MAX: Final[int] = 0xFF_FF_FF_FF
archive_serial_number = models.PositiveIntegerField( archive_serial_number = models.PositiveIntegerField(
_("archive serial number"), _("archive serial number"),
blank=True, blank=True,
@ -249,8 +253,8 @@ class Document(ModelWithOwner):
unique=True, unique=True,
db_index=True, db_index=True,
validators=[ validators=[
MaxValueValidator(0xFF_FF_FF_FF), MaxValueValidator(ARCHIVE_SERIAL_NUMBER_MAX),
MinValueValidator(0), MinValueValidator(ARCHIVE_SERIAL_NUMBER_MIN),
], ],
help_text=_( help_text=_(
"The position of this document in your physical document " "archive.", "The position of this document in your physical document " "archive.",
@ -567,7 +571,7 @@ class PaperlessTask(models.Model):
task_file_name = models.CharField( task_file_name = models.CharField(
null=True, null=True,
max_length=255, max_length=255,
verbose_name=_("Task Name"), verbose_name=_("Task Filename"),
help_text=_("Name of the file which the Task was run for"), help_text=_("Name of the file which the Task was run for"),
) )

View File

@ -166,7 +166,7 @@ def consume_file(
# notify the sender, otherwise the progress bar # notify the sender, otherwise the progress bar
# in the UI stays stuck # in the UI stays stuck
payload = { payload = {
"filename": override_filename, "filename": override_filename or path.name,
"task_id": task_id, "task_id": task_id,
"current_progress": 100, "current_progress": 100,
"max_progress": 100, "max_progress": 100,

View File

@ -7,6 +7,7 @@ import tempfile
import urllib.request import urllib.request
import uuid import uuid
import zipfile import zipfile
from datetime import timedelta
from pathlib import Path from pathlib import Path
from unittest import mock from unittest import mock
from unittest.mock import MagicMock from unittest.mock import MagicMock
@ -25,6 +26,7 @@ from django.contrib.auth.models import Permission
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.test import override_settings from django.test import override_settings
from django.utils import timezone from django.utils import timezone
from dateutil.relativedelta import relativedelta
from documents import bulk_edit from documents import bulk_edit
from documents import index from documents import index
from documents.models import Correspondent from documents.models import Correspondent
@ -509,6 +511,270 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
response = self.client.get("/api/documents/?query=content&page=3&page_size=10") response = self.client.get("/api/documents/?query=content&page=3&page_size=10")
self.assertEqual(response.status_code, 404) self.assertEqual(response.status_code, 404)
@override_settings(
TIME_ZONE="UTC",
)
def test_search_added_in_last_week(self):
"""
GIVEN:
- Three documents added right now
- The timezone is UTC time
WHEN:
- Query for documents added in the last 7 days
THEN:
- All three recent documents are returned
"""
d1 = Document.objects.create(
title="invoice",
content="the thing i bought at a shop and paid with bank account",
checksum="A",
pk=1,
)
d2 = Document.objects.create(
title="bank statement 1",
content="things i paid for in august",
pk=2,
checksum="B",
)
d3 = Document.objects.create(
title="bank statement 3",
content="things i paid for in september",
pk=3,
checksum="C",
)
with index.open_index_writer() as writer:
index.update_document(writer, d1)
index.update_document(writer, d2)
index.update_document(writer, d3)
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
results = response.data["results"]
# Expect 3 documents returned
self.assertEqual(len(results), 3)
for idx, subset in enumerate(
[
{"id": 1, "title": "invoice"},
{"id": 2, "title": "bank statement 1"},
{"id": 3, "title": "bank statement 3"},
],
):
result = results[idx]
# Assert subset in results
self.assertDictEqual(result, {**result, **subset})
@override_settings(
TIME_ZONE="America/Chicago",
)
def test_search_added_in_last_week_with_timezone_behind(self):
"""
GIVEN:
- Two documents added right now
- One document added over a week ago
- The timezone is behind UTC time (-6)
WHEN:
- Query for documents added in the last 7 days
THEN:
- The two recent documents are returned
"""
d1 = Document.objects.create(
title="invoice",
content="the thing i bought at a shop and paid with bank account",
checksum="A",
pk=1,
)
d2 = Document.objects.create(
title="bank statement 1",
content="things i paid for in august",
pk=2,
checksum="B",
)
d3 = Document.objects.create(
title="bank statement 3",
content="things i paid for in september",
pk=3,
checksum="C",
# 7 days, 1 hour and 1 minute ago
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
)
with index.open_index_writer() as writer:
index.update_document(writer, d1)
index.update_document(writer, d2)
index.update_document(writer, d3)
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
results = response.data["results"]
# Expect 2 documents returned
self.assertEqual(len(results), 2)
for idx, subset in enumerate(
[{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
):
result = results[idx]
# Assert subset in results
self.assertDictEqual(result, {**result, **subset})
@override_settings(
TIME_ZONE="Europe/Sofia",
)
def test_search_added_in_last_week_with_timezone_ahead(self):
"""
GIVEN:
- Two documents added right now
- One document added over a week ago
- The timezone is behind UTC time (+2)
WHEN:
- Query for documents added in the last 7 days
THEN:
- The two recent documents are returned
"""
d1 = Document.objects.create(
title="invoice",
content="the thing i bought at a shop and paid with bank account",
checksum="A",
pk=1,
)
d2 = Document.objects.create(
title="bank statement 1",
content="things i paid for in august",
pk=2,
checksum="B",
)
d3 = Document.objects.create(
title="bank statement 3",
content="things i paid for in september",
pk=3,
checksum="C",
# 7 days, 1 hour and 1 minute ago
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
)
with index.open_index_writer() as writer:
index.update_document(writer, d1)
index.update_document(writer, d2)
index.update_document(writer, d3)
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
results = response.data["results"]
# Expect 2 documents returned
self.assertEqual(len(results), 2)
for idx, subset in enumerate(
[{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
):
result = results[idx]
# Assert subset in results
self.assertDictEqual(result, {**result, **subset})
def test_search_added_in_last_month(self):
"""
GIVEN:
- One document added right now
- One documents added about a week ago
- One document added over 1 month
WHEN:
- Query for documents added in the last month
THEN:
- The two recent documents are returned
"""
d1 = Document.objects.create(
title="invoice",
content="the thing i bought at a shop and paid with bank account",
checksum="A",
pk=1,
)
d2 = Document.objects.create(
title="bank statement 1",
content="things i paid for in august",
pk=2,
checksum="B",
# 1 month, 1 day ago
added=timezone.now() - relativedelta(months=1, days=1),
)
d3 = Document.objects.create(
title="bank statement 3",
content="things i paid for in september",
pk=3,
checksum="C",
# 7 days, 1 hour and 1 minute ago
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
)
with index.open_index_writer() as writer:
index.update_document(writer, d1)
index.update_document(writer, d2)
index.update_document(writer, d3)
response = self.client.get("/api/documents/?query=added:[-1 month to now]")
results = response.data["results"]
# Expect 2 documents returned
self.assertEqual(len(results), 2)
for idx, subset in enumerate(
[{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
):
result = results[idx]
# Assert subset in results
self.assertDictEqual(result, {**result, **subset})
@override_settings(
TIME_ZONE="America/Denver",
)
def test_search_added_in_last_month_timezone_behind(self):
"""
GIVEN:
- One document added right now
- One documents added about a week ago
- One document added over 1 month
- The timezone is behind UTC time (-6 or -7)
WHEN:
- Query for documents added in the last month
THEN:
- The two recent documents are returned
"""
d1 = Document.objects.create(
title="invoice",
content="the thing i bought at a shop and paid with bank account",
checksum="A",
pk=1,
)
d2 = Document.objects.create(
title="bank statement 1",
content="things i paid for in august",
pk=2,
checksum="B",
# 1 month, 1 day ago
added=timezone.now() - relativedelta(months=1, days=1),
)
d3 = Document.objects.create(
title="bank statement 3",
content="things i paid for in september",
pk=3,
checksum="C",
# 7 days, 1 hour and 1 minute ago
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
)
with index.open_index_writer() as writer:
index.update_document(writer, d1)
index.update_document(writer, d2)
index.update_document(writer, d3)
response = self.client.get("/api/documents/?query=added:[-1 month to now]")
results = response.data["results"]
# Expect 2 documents returned
self.assertEqual(len(results), 2)
for idx, subset in enumerate(
[{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
):
result = results[idx]
# Assert subset in results
self.assertDictEqual(result, {**result, **subset})
@mock.patch("documents.index.autocomplete") @mock.patch("documents.index.autocomplete")
def test_search_autocomplete(self, m): def test_search_autocomplete(self, m):
m.side_effect = lambda ix, term, limit: [term for _ in range(limit)] m.side_effect = lambda ix, term, limit: [term for _ in range(limit)]

File diff suppressed because it is too large Load Diff

View File

@ -833,7 +833,8 @@ class PreConsumeTestCase(TestCase):
with tempfile.NamedTemporaryFile() as script: with tempfile.NamedTemporaryFile() as script:
with override_settings(PRE_CONSUME_SCRIPT=script.name): with override_settings(PRE_CONSUME_SCRIPT=script.name):
c = Consumer() c = Consumer()
c.path = "path-to-file" c.original_path = "path-to-file"
c.path = "/tmp/somewhere/path-to-file"
c.run_pre_consume_script() c.run_pre_consume_script()
m.assert_called_once() m.assert_called_once()
@ -841,10 +842,19 @@ class PreConsumeTestCase(TestCase):
args, kwargs = m.call_args args, kwargs = m.call_args
command = kwargs["args"] command = kwargs["args"]
environment = kwargs["env"]
self.assertEqual(command[0], script.name) self.assertEqual(command[0], script.name)
self.assertEqual(command[1], "path-to-file") self.assertEqual(command[1], "path-to-file")
self.assertDictContainsSubset(
{
"DOCUMENT_SOURCE_PATH": c.original_path,
"DOCUMENT_WORKING_PATH": c.path,
},
environment,
)
@mock.patch("documents.consumer.Consumer.log") @mock.patch("documents.consumer.Consumer.log")
def test_script_with_output(self, mocked_log): def test_script_with_output(self, mocked_log):
""" """
@ -961,9 +971,10 @@ class PostConsumeTestCase(TestCase):
m.assert_called_once() m.assert_called_once()
args, kwargs = m.call_args _, kwargs = m.call_args
command = kwargs["args"] command = kwargs["args"]
environment = kwargs["env"]
self.assertEqual(command[0], script.name) self.assertEqual(command[0], script.name)
self.assertEqual(command[1], str(doc.pk)) self.assertEqual(command[1], str(doc.pk))
@ -972,6 +983,17 @@ class PostConsumeTestCase(TestCase):
self.assertEqual(command[7], "my_bank") self.assertEqual(command[7], "my_bank")
self.assertCountEqual(command[8].split(","), ["a", "b"]) self.assertCountEqual(command[8].split(","), ["a", "b"])
self.assertDictContainsSubset(
{
"DOCUMENT_ID": str(doc.pk),
"DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/",
"DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/",
"DOCUMENT_CORRESPONDENT": "my_bank",
"DOCUMENT_TAGS": "a,b",
},
environment,
)
def test_script_exit_non_zero(self): def test_script_exit_non_zero(self):
""" """
GIVEN: GIVEN:

View File

@ -1,3 +1,5 @@
from unittest import mock
from django.test import TestCase from django.test import TestCase
from documents import index from documents import index
from documents.models import Document from documents.models import Document
@ -31,3 +33,60 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
) )
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"]) self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), []) self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
def test_archive_serial_number_ranging(self):
"""
GIVEN:
- Document with an archive serial number above schema allowed size
WHEN:
- Document is provided to the index
THEN:
- Error is logged
- Document ASN is reset to 0 for the index
"""
doc1 = Document.objects.create(
title="doc1",
checksum="A",
content="test test2 test3",
# yes, this is allowed, unless full_clean is run
# DRF does call the validators, this test won't
archive_serial_number=Document.ARCHIVE_SERIAL_NUMBER_MAX + 1,
)
with self.assertLogs("paperless.index", level="ERROR") as cm:
with mock.patch(
"documents.index.AsyncWriter.update_document",
) as mocked_update_doc:
index.add_or_update_document(doc1)
mocked_update_doc.assert_called_once()
_, kwargs = mocked_update_doc.call_args
self.assertEqual(kwargs["asn"], 0)
error_str = cm.output[0]
expected_str = "ERROR:paperless.index:Not indexing Archive Serial Number 4294967296 of document 1"
self.assertIn(expected_str, error_str)
def test_archive_serial_number_is_none(self):
"""
GIVEN:
- Document with no archive serial number
WHEN:
- Document is provided to the index
THEN:
- ASN isn't touched
"""
doc1 = Document.objects.create(
title="doc1",
checksum="A",
content="test test2 test3",
)
with mock.patch(
"documents.index.AsyncWriter.update_document",
) as mocked_update_doc:
index.add_or_update_document(doc1)
mocked_update_doc.assert_called_once()
_, kwargs = mocked_update_doc.call_args
self.assertIsNone(kwargs["asn"])

View File

@ -3,6 +3,7 @@ import shutil
import tempfile import tempfile
from collections import namedtuple from collections import namedtuple
from contextlib import contextmanager from contextlib import contextmanager
from unittest import mock
from django.apps import apps from django.apps import apps
from django.db import connection from django.db import connection
@ -86,6 +87,30 @@ class DirectoriesMixin:
remove_dirs(self.dirs) remove_dirs(self.dirs)
class ConsumerProgressMixin:
def setUp(self) -> None:
self.send_progress_patcher = mock.patch(
"documents.consumer.Consumer._send_progress",
)
self.send_progress_mock = self.send_progress_patcher.start()
super().setUp()
def tearDown(self) -> None:
super().tearDown()
self.send_progress_patcher.stop()
class DocumentConsumeDelayMixin:
def setUp(self) -> None:
self.consume_file_patcher = mock.patch("documents.tasks.consume_file.delay")
self.consume_file_mock = self.consume_file_patcher.start()
super().setUp()
def tearDown(self) -> None:
super().tearDown()
self.consume_file_patcher.stop()
class TestMigrations(TransactionTestCase): class TestMigrations(TransactionTestCase):
@property @property
def app(self): def app(self):

View File

@ -1,7 +1,7 @@
from typing import Final from typing import Final
from typing import Tuple from typing import Tuple
__version__: Final[Tuple[int, int, int]] = (1, 12, 1) __version__: Final[Tuple[int, int, int]] = (1, 12, 2)
# Version string like X.Y.Z # Version string like X.Y.Z
__full_version_str__: Final[str] = ".".join(map(str, __version__)) __full_version_str__: Final[str] = ".".join(map(str, __version__))
# Version string like X.Y # Version string like X.Y

View File

@ -67,11 +67,6 @@ class TestParserLive(TestCase):
return result return result
# Only run if convert is available
@pytest.mark.skipif(
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
)
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf") @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
def test_get_thumbnail(self, mock_generate_pdf: mock.MagicMock): def test_get_thumbnail(self, mock_generate_pdf: mock.MagicMock):
""" """
@ -204,11 +199,6 @@ class TestParserLive(TestCase):
"GOTENBERG_LIVE" not in os.environ, "GOTENBERG_LIVE" not in os.environ,
reason="No gotenberg server", reason="No gotenberg server",
) )
# Only run if convert is available
@pytest.mark.skipif(
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
)
def test_generate_pdf_from_mail(self): def test_generate_pdf_from_mail(self):
""" """
GIVEN: GIVEN:
@ -301,11 +291,6 @@ class TestParserLive(TestCase):
"GOTENBERG_LIVE" not in os.environ, "GOTENBERG_LIVE" not in os.environ,
reason="No gotenberg server", reason="No gotenberg server",
) )
# Only run if convert is available
@pytest.mark.skipif(
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
)
def test_generate_pdf_from_html(self): def test_generate_pdf_from_html(self):
""" """
GIVEN: GIVEN:

View File

@ -90,7 +90,7 @@ class TikaDocumentParser(DocumentParser):
with open(document_path, "rb") as document_handle: with open(document_path, "rb") as document_handle:
files = { files = {
"files": ( "files": (
file_name or os.path.basename(document_path), "convert" + os.path.splitext(document_path)[-1],
document_handle, document_handle,
), ),
} }

View File

@ -7,7 +7,7 @@ max-line-length = 88
[tool:pytest] [tool:pytest]
DJANGO_SETTINGS_MODULE=paperless.settings DJANGO_SETTINGS_MODULE=paperless.settings
addopts = --pythonwarnings=all --cov --cov-report=html --numprocesses auto --quiet addopts = --pythonwarnings=all --cov --cov-report=html --cov-report=xml --numprocesses auto --quiet
env = env =
PAPERLESS_DISABLE_DBHANDLER=true PAPERLESS_DISABLE_DBHANDLER=true