mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge branch 'dev' into feature-permissions
This commit is contained in:
commit
d2a6f79612
19
.codecov.yml
Normal file
19
.codecov.yml
Normal file
@ -0,0 +1,19 @@
|
||||
# https://docs.codecov.com/docs/pull-request-comments
|
||||
# codecov will only comment if coverage changes
|
||||
comment:
|
||||
require_changes: true
|
||||
coverage:
|
||||
status:
|
||||
project:
|
||||
default:
|
||||
# https://docs.codecov.com/docs/commit-status#threshold
|
||||
threshold: 1%
|
||||
# https://docs.codecov.com/docs/commit-status#only_pulls
|
||||
only_pulls: true
|
||||
patch:
|
||||
default:
|
||||
# For the changed lines only, target 75% covered, but
|
||||
# allow as low as 50%
|
||||
target: 75%
|
||||
threshold: 25%
|
||||
only_pulls: true
|
86
.github/workflows/ci.yml
vendored
86
.github/workflows/ci.yml
vendored
@ -113,16 +113,12 @@ jobs:
|
||||
PAPERLESS_MAIL_TEST_HOST: ${{ secrets.TEST_MAIL_HOST }}
|
||||
PAPERLESS_MAIL_TEST_USER: ${{ secrets.TEST_MAIL_USER }}
|
||||
PAPERLESS_MAIL_TEST_PASSWD: ${{ secrets.TEST_MAIL_PASSWD }}
|
||||
# Skip Tests which require convert
|
||||
PAPERLESS_TEST_SKIP_CONVERT: 1
|
||||
# Enable Gotenberg end to end testing
|
||||
GOTENBERG_LIVE: 1
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
-
|
||||
name: Start containers
|
||||
run: |
|
||||
@ -145,6 +141,10 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -qq --no-install-recommends unpaper tesseract-ocr imagemagick ghostscript libzbar0 poppler-utils
|
||||
-
|
||||
name: Configure ImageMagick
|
||||
run: |
|
||||
sudo cp docker/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
|
||||
-
|
||||
name: Install Python dependencies
|
||||
run: |
|
||||
@ -160,27 +160,14 @@ jobs:
|
||||
cd src/
|
||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} run pytest -ra
|
||||
-
|
||||
name: Get changed files
|
||||
id: changed-files-specific
|
||||
uses: tj-actions/changed-files@v35
|
||||
name: Upload coverage to Codecov
|
||||
if: matrix.python-version == ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
files: |
|
||||
src/**
|
||||
-
|
||||
name: List all changed files
|
||||
run: |
|
||||
for file in ${{ steps.changed-files-specific.outputs.all_changed_files }}; do
|
||||
echo "${file} was changed"
|
||||
done
|
||||
-
|
||||
name: Publish coverage results
|
||||
if: matrix.python-version == ${{ env.DEFAULT_PYTHON_VERSION }} && steps.changed-files-specific.outputs.any_changed == 'true'
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
# https://github.com/coveralls-clients/coveralls-python/issues/251
|
||||
run: |
|
||||
cd src/
|
||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} run coveralls --service=github
|
||||
# not required for public repos, but intermittently fails otherwise
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
# future expansion
|
||||
flags: backend
|
||||
-
|
||||
name: Stop containers
|
||||
if: always()
|
||||
@ -347,7 +334,7 @@ jobs:
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v3
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
@ -442,21 +429,48 @@ jobs:
|
||||
-
|
||||
name: Move files
|
||||
run: |
|
||||
mkdir dist
|
||||
mkdir dist/paperless-ngx
|
||||
mkdir dist/paperless-ngx/scripts
|
||||
cp .dockerignore .env Dockerfile Pipfile Pipfile.lock requirements.txt LICENSE README.md dist/paperless-ngx/
|
||||
cp paperless.conf.example dist/paperless-ngx/paperless.conf
|
||||
cp gunicorn.conf.py dist/paperless-ngx/gunicorn.conf.py
|
||||
cp -r docker/ dist/paperless-ngx/docker
|
||||
cp scripts/*.service scripts/*.sh scripts/*.socket dist/paperless-ngx/scripts/
|
||||
cp -r src/ dist/paperless-ngx/src
|
||||
cp -r docs/_build/html/ dist/paperless-ngx/docs
|
||||
mv static dist/paperless-ngx
|
||||
echo "Making dist folders"
|
||||
for directory in dist \
|
||||
dist/paperless-ngx \
|
||||
dist/paperless-ngx/scripts;
|
||||
do
|
||||
mkdir --verbose --parents ${directory}
|
||||
done
|
||||
|
||||
echo "Copying basic files"
|
||||
for file_name in .dockerignore \
|
||||
.env \
|
||||
Dockerfile \
|
||||
Pipfile \
|
||||
Pipfile.lock \
|
||||
requirements.txt \
|
||||
LICENSE \
|
||||
README.md \
|
||||
paperless.conf.example \
|
||||
gunicorn.conf.py
|
||||
do
|
||||
cp --verbose ${file_name} dist/paperless-ngx/
|
||||
done
|
||||
mv --verbose dist/paperless-ngx/paperless.conf.example paperless.conf
|
||||
|
||||
echo "Copying Docker related files"
|
||||
cp --recursive docker/ dist/paperless-ngx/docker
|
||||
|
||||
echo "Copying startup scripts"
|
||||
cp --verbose scripts/*.service scripts/*.sh scripts/*.socket dist/paperless-ngx/scripts/
|
||||
|
||||
echo "Copying source files"
|
||||
cp --recursive src/ dist/paperless-ngx/src
|
||||
echo "Copying documentation"
|
||||
cp --recursive docs/_build/html/ dist/paperless-ngx/docs
|
||||
|
||||
mv --verbose static dist/paperless-ngx
|
||||
-
|
||||
name: Make release package
|
||||
run: |
|
||||
echo "Creating release archive"
|
||||
cd dist
|
||||
sudo chown -R 1000:1000 paperless-ngx/
|
||||
tar -cJf paperless-ngx.tar.xz paperless-ngx/
|
||||
-
|
||||
name: Upload release artifact
|
||||
|
@ -45,7 +45,7 @@ jobs:
|
||||
uses: docker/setup-qemu-action@v2
|
||||
-
|
||||
name: Build ${{ fromJSON(inputs.build-json).name }}
|
||||
uses: docker/build-push-action@v3
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
file: ${{ inputs.dockerfile }}
|
||||
|
17
Dockerfile
17
Dockerfile
@ -1,4 +1,5 @@
|
||||
# syntax=docker/dockerfile:1.4
|
||||
# https://github.com/moby/buildkit/blob/master/frontend/dockerfile/docs/reference.md
|
||||
|
||||
FROM --platform=$BUILDPLATFORM node:16-bullseye-slim AS compile-frontend
|
||||
|
||||
@ -61,10 +62,6 @@ ARG PSYCOPG2_VERSION
|
||||
|
||||
# Packages need for running
|
||||
ARG RUNTIME_PACKAGES="\
|
||||
# Python
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-setuptools \
|
||||
# General utils
|
||||
curl \
|
||||
# Docker specific
|
||||
@ -128,7 +125,7 @@ RUN set -eux \
|
||||
&& apt-get install --yes --quiet --no-install-recommends ${RUNTIME_PACKAGES} \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& echo "Installing supervisor" \
|
||||
&& python3 -m pip install --default-timeout=1000 --upgrade --no-cache-dir supervisor==4.2.4
|
||||
&& python3 -m pip install --default-timeout=1000 --upgrade --no-cache-dir supervisor==4.2.5
|
||||
|
||||
# Copy gunicorn config
|
||||
# Changes very infrequently
|
||||
@ -137,7 +134,6 @@ WORKDIR /usr/src/paperless/
|
||||
COPY gunicorn.conf.py .
|
||||
|
||||
# setup docker-specific things
|
||||
# Use mounts to avoid copying installer files into the image
|
||||
# These change sometimes, but rarely
|
||||
WORKDIR /usr/src/paperless/src/docker/
|
||||
|
||||
@ -179,7 +175,6 @@ RUN set -eux \
|
||||
&& ./install_management_commands.sh
|
||||
|
||||
# Install the built packages from the installer library images
|
||||
# Use mounts to avoid copying installer files into the image
|
||||
# These change sometimes
|
||||
RUN set -eux \
|
||||
&& echo "Getting binaries" \
|
||||
@ -203,7 +198,8 @@ RUN set -eux \
|
||||
&& python3 -m pip list \
|
||||
&& echo "Cleaning up image layer" \
|
||||
&& cd ../ \
|
||||
&& rm -rf paperless-ngx
|
||||
&& rm -rf paperless-ngx \
|
||||
&& rm paperless-ngx.tar.gz
|
||||
|
||||
WORKDIR /usr/src/paperless/src/
|
||||
|
||||
@ -247,11 +243,12 @@ COPY ./src ./
|
||||
COPY --from=compile-frontend /src/src/documents/static/frontend/ ./documents/static/frontend/
|
||||
|
||||
# add users, setup scripts
|
||||
# Mount the compiled frontend to expected location
|
||||
RUN set -eux \
|
||||
&& addgroup --gid 1000 paperless \
|
||||
&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \
|
||||
&& chown -R paperless:paperless ../ \
|
||||
&& gosu paperless python3 manage.py collectstatic --clear --no-input \
|
||||
&& chown -R paperless:paperless /usr/src/paperless \
|
||||
&& gosu paperless python3 manage.py collectstatic --clear --no-input --link \
|
||||
&& gosu paperless python3 manage.py compilemessages
|
||||
|
||||
VOLUME ["/usr/src/paperless/data", \
|
||||
|
@ -1,7 +1,7 @@
|
||||
[](https://github.com/paperless-ngx/paperless-ngx/actions)
|
||||
[](https://crowdin.com/project/paperless-ngx)
|
||||
[](https://docs.paperless-ngx.com)
|
||||
[](https://coveralls.io/github/paperless-ngx/paperless-ngx?branch=master)
|
||||
[](https://codecov.io/gh/paperless-ngx/paperless-ngx)
|
||||
[](https://matrix.to/#/%23paperlessngx%3Amatrix.org)
|
||||
[](https://demo.paperless-ngx.com)
|
||||
|
||||
|
@ -80,7 +80,7 @@ django_checks() {
|
||||
|
||||
search_index() {
|
||||
|
||||
local -r index_version=2
|
||||
local -r index_version=3
|
||||
local -r index_version_file=${DATA_DIR}/.index_version
|
||||
|
||||
if [[ (! -f "${index_version_file}") || $(<"${index_version_file}") != "$index_version" ]]; then
|
||||
|
@ -121,7 +121,17 @@ Executed after the consumer sees a new document in the consumption
|
||||
folder, but before any processing of the document is performed. This
|
||||
script can access the following relevant environment variables set:
|
||||
|
||||
- `DOCUMENT_SOURCE_PATH`
|
||||
| Environment Variable | Description |
|
||||
| ----------------------- | ------------------------------------------------------------ |
|
||||
| `DOCUMENT_SOURCE_PATH` | Original path of the consumed document |
|
||||
| `DOCUMENT_WORKING_PATH` | Path to a copy of the original that consumption will work on |
|
||||
|
||||
!!! note
|
||||
|
||||
Pre-consume scripts which modify the document should only change
|
||||
the `DOCUMENT_WORKING_PATH` file or a second consume task may
|
||||
be triggered, leading to failures as two tasks work on the
|
||||
same document path
|
||||
|
||||
A simple but common example for this would be creating a simple script
|
||||
like this:
|
||||
@ -130,7 +140,7 @@ like this:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
pdf2pdfocr.py -i ${DOCUMENT_SOURCE_PATH}
|
||||
pdf2pdfocr.py -i ${DOCUMENT_WORKING_PATH}
|
||||
```
|
||||
|
||||
`/etc/paperless.conf`
|
||||
@ -157,26 +167,36 @@ Executed after the consumer has successfully processed a document and
|
||||
has moved it into paperless. It receives the following environment
|
||||
variables:
|
||||
|
||||
- `DOCUMENT_ID`
|
||||
- `DOCUMENT_FILE_NAME`
|
||||
- `DOCUMENT_CREATED`
|
||||
- `DOCUMENT_MODIFIED`
|
||||
- `DOCUMENT_ADDED`
|
||||
- `DOCUMENT_SOURCE_PATH`
|
||||
- `DOCUMENT_ARCHIVE_PATH`
|
||||
- `DOCUMENT_THUMBNAIL_PATH`
|
||||
- `DOCUMENT_DOWNLOAD_URL`
|
||||
- `DOCUMENT_THUMBNAIL_URL`
|
||||
- `DOCUMENT_CORRESPONDENT`
|
||||
- `DOCUMENT_TAGS`
|
||||
- `DOCUMENT_ORIGINAL_FILENAME`
|
||||
| Environment Variable | Description |
|
||||
| ---------------------------- | --------------------------------------------- |
|
||||
| `DOCUMENT_ID` | Database primary key of the document |
|
||||
| `DOCUMENT_FILE_NAME` | Formatted filename, not including paths |
|
||||
| `DOCUMENT_CREATED` | Date & time when document created |
|
||||
| `DOCUMENT_MODIFIED` | Date & time when document was last modified |
|
||||
| `DOCUMENT_ADDED` | Date & time when document was added |
|
||||
| `DOCUMENT_SOURCE_PATH` | Path to the original document file |
|
||||
| `DOCUMENT_ARCHIVE_PATH` | Path to the generate archive file (if any) |
|
||||
| `DOCUMENT_THUMBNAIL_PATH` | Path to the generated thumbnail |
|
||||
| `DOCUMENT_DOWNLOAD_URL` | URL for document download |
|
||||
| `DOCUMENT_THUMBNAIL_URL` | URL for the document thumbnail |
|
||||
| `DOCUMENT_CORRESPONDENT` | Assigned correspondent (if any) |
|
||||
| `DOCUMENT_TAGS` | Comma separated list of tags applied (if any) |
|
||||
| `DOCUMENT_ORIGINAL_FILENAME` | Filename of original document |
|
||||
|
||||
The script can be in any language, but for a simple shell script
|
||||
example, you can take a look at
|
||||
[post-consumption-example.sh](https://github.com/paperless-ngx/paperless-ngx/blob/main/scripts/post-consumption-example.sh)
|
||||
in this project.
|
||||
The script can be in any language, A simple shell script example:
|
||||
|
||||
The post consumption script cannot cancel the consumption process.
|
||||
```bash title="post-consumption-example"
|
||||
--8<-- "./scripts/post-consumption-example.sh"
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
The post consumption script cannot cancel the consumption process.
|
||||
|
||||
!!! warning
|
||||
|
||||
The post consumption script should not modify the document files
|
||||
directly
|
||||
|
||||
The script's stdout and stderr will be logged line by line to the
|
||||
webserver log, along with the exit code of the script.
|
||||
|
@ -141,7 +141,8 @@ directory.
|
||||
files created using "collectstatic" manager command are stored.
|
||||
|
||||
Unless you're doing something fancy, there is no need to override
|
||||
this.
|
||||
this. If this is changed, you may need to run
|
||||
`collectstatic` again.
|
||||
|
||||
Defaults to "../static/", relative to the "src" directory.
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
# Development
|
||||
|
||||
This section describes the steps you need to take to start development
|
||||
on paperless-ngx.
|
||||
on Paperless-ngx.
|
||||
|
||||
Check out the source from github. The repository is organized in the
|
||||
Check out the source from GitHub. The repository is organized in the
|
||||
following way:
|
||||
|
||||
- `main` always represents the latest release and will only see
|
||||
@ -12,7 +12,7 @@ following way:
|
||||
- `feature-X` contain bigger changes that will be in some release, but
|
||||
not necessarily the next one.
|
||||
|
||||
When making functional changes to paperless, _always_ make your changes
|
||||
When making functional changes to Paperless-ngx, _always_ make your changes
|
||||
on the `dev` branch.
|
||||
|
||||
Apart from that, the folder structure is as follows:
|
||||
@ -24,9 +24,9 @@ Apart from that, the folder structure is as follows:
|
||||
development.
|
||||
- `docker/` - Files required to build the docker image.
|
||||
|
||||
## Contributing to Paperless
|
||||
## Contributing to Paperless-ngx
|
||||
|
||||
Maybe you've been using Paperless for a while and want to add a feature
|
||||
Maybe you've been using Paperless-ngx for a while and want to add a feature
|
||||
or two, or maybe you've come across a bug that you have some ideas how
|
||||
to solve. The beauty of open source software is that you can see what's
|
||||
wrong and help to get it fixed for everyone!
|
||||
@ -36,13 +36,13 @@ conduct](https://github.com/paperless-ngx/paperless-ngx/blob/main/CODE_OF_CONDUC
|
||||
and other important information in the [contributing
|
||||
guidelines](https://github.com/paperless-ngx/paperless-ngx/blob/main/CONTRIBUTING.md).
|
||||
|
||||
## Code formatting with pre-commit Hooks
|
||||
## Code formatting with pre-commit hooks
|
||||
|
||||
To ensure a consistent style and formatting across the project source,
|
||||
the project utilizes a Git [`pre-commit`](https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks)
|
||||
hook to perform some formatting and linting before a commit is allowed.
|
||||
the project utilizes Git [`pre-commit`](https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks)
|
||||
hooks to perform some formatting and linting before a commit is allowed.
|
||||
That way, everyone uses the same style and some common issues can be caught
|
||||
early on. See below for installation instructions.
|
||||
early on.
|
||||
|
||||
Once installed, hooks will run when you commit. If the formatting isn't
|
||||
quite right or a linter catches something, the commit will be rejected.
|
||||
@ -51,129 +51,110 @@ as the Python formatting tool `black`, will format failing
|
||||
files, so all you need to do is `git add` those files again
|
||||
and retry your commit.
|
||||
|
||||
## Initial setup and first start
|
||||
## General setup
|
||||
|
||||
After you forked and cloned the code from github you need to perform a
|
||||
first-time setup. To do the setup you need to perform the steps from the
|
||||
following chapters in a certain order:
|
||||
After you forked and cloned the code from GitHub you need to perform a
|
||||
first-time setup.
|
||||
|
||||
!!! note
|
||||
|
||||
Every command is executed directly from the root folder of the project unless specified otherwise.
|
||||
|
||||
1. Install prerequisites + pipenv as mentioned in
|
||||
[Bare metal route](/setup#bare_metal)
|
||||
[Bare metal route](/setup#bare_metal).
|
||||
|
||||
2. Copy `paperless.conf.example` to `paperless.conf` and enable debug
|
||||
mode.
|
||||
mode within the file via `PAPERLESS_DEBUG=true`.
|
||||
|
||||
3. Install the Angular CLI interface:
|
||||
3. Create `consume` and `media` directories:
|
||||
|
||||
```shell-session
|
||||
$ npm install -g @angular/cli
|
||||
```bash
|
||||
$ mkdir -p consume media
|
||||
```
|
||||
|
||||
4. Install pre-commit hooks
|
||||
4. Install the Python dependencies:
|
||||
|
||||
```shell-session
|
||||
pre-commit install
|
||||
```bash
|
||||
$ pipenv install --dev
|
||||
```
|
||||
|
||||
5. Create `consume` and `media` folders in the cloned root folder.
|
||||
!!! note
|
||||
|
||||
```shell-session
|
||||
mkdir -p consume media
|
||||
Using a virtual environment is highly recommended. You can spawn one via `pipenv shell`.
|
||||
Make sure you're using Python 3.10.x or lower. Otherwise you might
|
||||
get issues with building dependencies. You can use
|
||||
[pyenv](https://github.com/pyenv/pyenv) to install a specific
|
||||
Python version.
|
||||
|
||||
5. Install pre-commit hooks:
|
||||
|
||||
```bash
|
||||
$ pre-commit install
|
||||
```
|
||||
|
||||
6. You can now either ...
|
||||
6. Apply migrations and create a superuser for your development instance:
|
||||
|
||||
```bash
|
||||
# src/
|
||||
|
||||
$ python3 manage.py migrate
|
||||
$ python3 manage.py createsuperuser
|
||||
```
|
||||
|
||||
7. You can now either ...
|
||||
|
||||
- install redis or
|
||||
|
||||
- use the included scripts/start-services.sh to use docker to fire
|
||||
- use the included `scripts/start_services.sh` to use docker to fire
|
||||
up a redis instance (and some other services such as tika,
|
||||
gotenberg and a database server) or
|
||||
|
||||
- spin up a bare redis container
|
||||
|
||||
```shell-session
|
||||
docker run -d -p 6379:6379 --restart unless-stopped redis:latest
|
||||
```
|
||||
$ docker run -d -p 6379:6379 --restart unless-stopped redis:latest
|
||||
```
|
||||
|
||||
7. Install the python dependencies by performing in the src/ directory.
|
||||
|
||||
```shell-session
|
||||
pipenv install --dev
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
Make sure you're using python 3.10.x or lower. Otherwise you might
|
||||
get issues with building dependencies. You can use
|
||||
[pyenv](https://github.com/pyenv/pyenv) to install a specific
|
||||
python version.
|
||||
|
||||
8. Generate the static UI so you can perform a login to get session
|
||||
that is required for frontend development (this needs to be done one
|
||||
time only). From src-ui directory:
|
||||
|
||||
```shell-session
|
||||
npm install .
|
||||
./node_modules/.bin/ng build --configuration production
|
||||
```
|
||||
|
||||
9. Apply migrations and create a superuser for your dev instance:
|
||||
|
||||
```shell-session
|
||||
python3 manage.py migrate
|
||||
python3 manage.py createsuperuser
|
||||
```
|
||||
|
||||
10. Now spin up the dev backend. Depending on which part of paperless
|
||||
you're developing for, you need to have some or all of them
|
||||
running.
|
||||
|
||||
```shell-session
|
||||
python3 manage.py runserver & python3 manage.py document_consumer & celery --app paperless worker
|
||||
```
|
||||
|
||||
11. Login with the superuser credentials provided in step 8 at
|
||||
`http://localhost:8000` to create a session that enables you to use
|
||||
the backend.
|
||||
|
||||
Backend development environment is now ready, to start Frontend
|
||||
development go to `/src-ui` and run `ng serve`. From there you can use
|
||||
`http://localhost:4200` for a preview.
|
||||
8. Continue with either back-end or front-end development – or both :-).
|
||||
|
||||
## Back end development
|
||||
|
||||
The backend is a [Django](https://www.djangoproject.com/) application. PyCharm works well for development,
|
||||
but you can use whatever you want.
|
||||
The back end is a [Django](https://www.djangoproject.com/) application. [PyCharm](https://www.jetbrains.com/de-de/pycharm/) as well as [Visual Studio Code](https://code.visualstudio.com) work well for development, but you can use whatever you want.
|
||||
|
||||
Configure the IDE to use the src/ folder as the base source folder.
|
||||
Configure the IDE to use the `src/`-folder as the base source folder.
|
||||
Configure the following launch configurations in your IDE:
|
||||
|
||||
- `python3 manage.py runserver`
|
||||
- `celery --app paperless worker`
|
||||
- `python3 manage.py document_consumer`
|
||||
- `celery --app paperless worker -l DEBUG` (or any other log level)
|
||||
|
||||
To start them all:
|
||||
|
||||
```shell-session
|
||||
python3 manage.py runserver & python3 manage.py document_consumer & celery --app paperless worker
|
||||
```bash
|
||||
# src/
|
||||
|
||||
$ python3 manage.py runserver & \
|
||||
python3 manage.py document_consumer & \
|
||||
celery --app paperless worker -l DEBUG
|
||||
```
|
||||
|
||||
Testing and code style:
|
||||
You might need the front end to test your back end code. This assumes that you have AngularJS installed on your system. Go to the [Front end development](#front-end-development) section for further details. To build the front end once use this commmand:
|
||||
|
||||
```bash
|
||||
# src-ui/
|
||||
|
||||
$ npm install
|
||||
$ ng build --configuration production
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
||||
- Run `pytest` in the `src/` directory to execute all tests. This also
|
||||
generates a HTML coverage report. When runnings test, paperless.conf
|
||||
is loaded as well. However: the tests rely on the default
|
||||
generates a HTML coverage report. When runnings test, `paperless.conf`
|
||||
is loaded as well. However, the tests rely on the default
|
||||
configuration. This is not ideal. But for now, make sure no settings
|
||||
except for DEBUG are overridden when testing.
|
||||
|
||||
- Coding style is enforced by the Git pre-commit hooks. These will
|
||||
ensure your code is formatted and do some linting when you do a `git commit`.
|
||||
|
||||
- You can also run `black` manually to format your code
|
||||
|
||||
- The `pre-commit` hooks will modify files and interact with each other.
|
||||
It may take a couple of `git add`, `git commit` cycle to satisfy them.
|
||||
|
||||
!!! note
|
||||
|
||||
The line length rule E501 is generally useful for getting multiple
|
||||
@ -184,98 +165,98 @@ Testing and code style:
|
||||
|
||||
## Front end development
|
||||
|
||||
The front end is built using Angular. In order to get started, you need
|
||||
`npm`. Install the Angular CLI interface with
|
||||
The front end is built using AngularJS. In order to get started, you need Node.js (version 14.15+) and
|
||||
`npm`.
|
||||
|
||||
```shell-session
|
||||
$ npm install -g @angular/cli
|
||||
```
|
||||
!!! note
|
||||
|
||||
and make sure that it's on your path. Next, in the src-ui/ directory,
|
||||
install the required dependencies of the project.
|
||||
The following commands are all performed in the `src-ui`-directory. You will need a running back end (including an active session) to connect to the back end API. To spin it up refer to the commands under the section [above](#back-end-development).
|
||||
|
||||
```shell-session
|
||||
$ npm install
|
||||
```
|
||||
1. Install the Angular CLI. You might need sudo privileges
|
||||
to perform this command:
|
||||
|
||||
You can launch a development server by running
|
||||
```bash
|
||||
$ npm install -g @angular/cli
|
||||
```
|
||||
|
||||
```shell-session
|
||||
$ ng serve
|
||||
```
|
||||
2. Make sure that it's on your path.
|
||||
|
||||
This will automatically update whenever you save. However, in-place
|
||||
compilation might fail on syntax errors, in which case you need to
|
||||
restart it.
|
||||
3. Install all neccessary modules:
|
||||
|
||||
By default, the development server is available on
|
||||
`http://localhost:4200/` and is configured to access the API at
|
||||
`http://localhost:8000/api/`, which is the default of the backend. If
|
||||
you enabled DEBUG on the back end, several security overrides for
|
||||
allowed hosts, CORS and X-Frame-Options are in place so that the front
|
||||
end behaves exactly as in production. This also relies on you being
|
||||
logged into the back end. Without a valid session, The front end will
|
||||
simply not work.
|
||||
```bash
|
||||
$ npm install
|
||||
```
|
||||
|
||||
Testing and code style:
|
||||
4. You can launch a development server by running:
|
||||
|
||||
- The frontend code (.ts, .html, .scss) use `prettier` for code
|
||||
```bash
|
||||
$ ng serve
|
||||
```
|
||||
|
||||
This will automatically update whenever you save. However, in-place
|
||||
compilation might fail on syntax errors, in which case you need to
|
||||
restart it.
|
||||
|
||||
By default, the development server is available on `http://localhost:4200/` and is configured to access the API at
|
||||
`http://localhost:8000/api/`, which is the default of the backend. If you enabled `DEBUG` on the back end, several security overrides for allowed hosts, CORS and X-Frame-Options are in place so that the front end behaves exactly as in production.
|
||||
|
||||
### Testing and code style
|
||||
|
||||
- The front end code (.ts, .html, .scss) use `prettier` for code
|
||||
formatting via the Git `pre-commit` hooks which run automatically on
|
||||
commit. See
|
||||
[above](#code-formatting-with-pre-commit-hooks) for installation. You can also run this via cli with a
|
||||
commit. See [above](#code-formatting-with-pre-commit-hooks) for installation instructions. You can also run this via the CLI with a
|
||||
command such as
|
||||
|
||||
```shell-session
|
||||
```bash
|
||||
$ git ls-files -- '*.ts' | xargs pre-commit run prettier --files
|
||||
```
|
||||
|
||||
- Frontend testing uses jest and cypress. There is currently a need
|
||||
for significantly more frontend tests. Unit tests and e2e tests,
|
||||
- Front end testing uses jest and cypress. There is currently a need
|
||||
for significantly more front end tests. Unit tests and e2e tests,
|
||||
respectively, can be run non-interactively with:
|
||||
|
||||
```shell-session
|
||||
```bash
|
||||
$ ng test
|
||||
$ npm run e2e:ci
|
||||
```
|
||||
|
||||
Cypress also includes a UI which can be run from within the `src-ui`
|
||||
directory with
|
||||
- Cypress also includes a UI which can be run with:
|
||||
|
||||
```shell-session
|
||||
$ ./node_modules/.bin/cypress open
|
||||
```bash
|
||||
$ ./node_modules/.bin/cypress open
|
||||
```
|
||||
|
||||
- In order to build the front end and serve it as part of Django, execute:
|
||||
|
||||
```bash
|
||||
$ ng build --configuration production
|
||||
```
|
||||
|
||||
In order to build the front end and serve it as part of django, execute
|
||||
|
||||
```shell-session
|
||||
$ ng build --configuration production
|
||||
```
|
||||
|
||||
This will build the front end and put it in a location from which the
|
||||
Django server will serve it as static content. This way, you can verify
|
||||
that authentication is working.
|
||||
This will build the front end and put it in a location from which the
|
||||
Django server will serve it as static content. This way, you can verify
|
||||
that authentication is working.
|
||||
|
||||
## Localization
|
||||
|
||||
Paperless is available in many different languages. Since paperless
|
||||
consists both of a django application and an Angular front end, both
|
||||
Paperless-ngx is available in many different languages. Since Paperless-ngx
|
||||
consists both of a Django application and an AngularJS front end, both
|
||||
these parts have to be translated separately.
|
||||
|
||||
### Front end localization
|
||||
|
||||
- The Angular front end does localization according to the [Angular
|
||||
- The AngularJS front end does localization according to the [Angular
|
||||
documentation](https://angular.io/guide/i18n).
|
||||
- The source language of the project is "en_US".
|
||||
- The source strings end up in the file "src-ui/messages.xlf".
|
||||
- The source strings end up in the file `src-ui/messages.xlf`.
|
||||
- The translated strings need to be placed in the
|
||||
"src-ui/src/locale/" folder.
|
||||
`src-ui/src/locale/` folder.
|
||||
- In order to extract added or changed strings from the source files,
|
||||
call `ng xi18n --ivy`.
|
||||
|
||||
Adding new languages requires adding the translated files in the
|
||||
"src-ui/src/locale/" folder and adjusting a couple files.
|
||||
`src-ui/src/locale/` folder and adjusting a couple files.
|
||||
|
||||
1. Adjust "src-ui/angular.json":
|
||||
1. Adjust `src-ui/angular.json`:
|
||||
|
||||
```json
|
||||
"i18n": {
|
||||
@ -292,7 +273,7 @@ Adding new languages requires adding the translated files in the
|
||||
```
|
||||
|
||||
2. Add the language to the available options in
|
||||
"src-ui/src/app/services/settings.service.ts":
|
||||
`src-ui/src/app/services/settings.service.ts`:
|
||||
|
||||
```typescript
|
||||
getLanguageOptions(): LanguageOption[] {
|
||||
@ -313,7 +294,7 @@ Adding new languages requires adding the translated files in the
|
||||
and "yyyy".
|
||||
|
||||
3. Import and register the Angular data for this locale in
|
||||
"src-ui/src/app/app.module.ts":
|
||||
`src-ui/src/app/app.module.ts`:
|
||||
|
||||
```typescript
|
||||
import localeDe from '@angular/common/locales/de'
|
||||
@ -326,10 +307,10 @@ A majority of the strings that appear in the back end appear only when
|
||||
the admin is used. However, some of these are still shown on the front
|
||||
end (such as error messages).
|
||||
|
||||
- The django application does localization according to the [django
|
||||
- The django application does localization according to the [Django
|
||||
documentation](https://docs.djangoproject.com/en/3.1/topics/i18n/translation/).
|
||||
- The source language of the project is "en_US".
|
||||
- Localization files end up in the folder "src/locale/".
|
||||
- Localization files end up in the folder `src/locale/`.
|
||||
- In order to extract strings from the application, call
|
||||
`python3 manage.py makemessages -l en_US`. This is important after
|
||||
making changes to translatable strings.
|
||||
@ -340,8 +321,8 @@ end (such as error messages).
|
||||
command.
|
||||
|
||||
Adding new languages requires adding the translated files in the
|
||||
"src/locale/" folder and adjusting the file
|
||||
"src/paperless/settings.py" to include the new language:
|
||||
`src/locale/`-folder and adjusting the file
|
||||
`src/paperless/settings.py` to include the new language:
|
||||
|
||||
```python
|
||||
LANGUAGES = [
|
||||
@ -360,18 +341,27 @@ LANGUAGES = [
|
||||
The documentation is built using material-mkdocs, see their [documentation](https://squidfunk.github.io/mkdocs-material/reference/).
|
||||
If you want to build the documentation locally, this is how you do it:
|
||||
|
||||
1. Install python dependencies.
|
||||
1. Have an active pipenv shell (`pipenv shell`) and install Python dependencies:
|
||||
|
||||
```shell-session
|
||||
$ cd /path/to/paperless
|
||||
```bash
|
||||
$ pipenv install --dev
|
||||
```
|
||||
|
||||
2. Build the documentation
|
||||
|
||||
```shell-session
|
||||
$ cd /path/to/paperless
|
||||
$ pipenv mkdocs build --config-file mkdocs.yml
|
||||
```bash
|
||||
$ mkdocs build --config-file mkdocs.yml
|
||||
```
|
||||
|
||||
_alternatively..._
|
||||
|
||||
3. Serve the documentation. This will spin up a
|
||||
copy of the documentation at http://127.0.0.1:8000
|
||||
that will automatically refresh everytime you change
|
||||
something.
|
||||
|
||||
```bash
|
||||
$ mkdocs serve
|
||||
```
|
||||
|
||||
## Building the Docker image
|
||||
@ -384,35 +374,35 @@ helper script `build-docker-image.sh`.
|
||||
|
||||
Building the docker image from source:
|
||||
|
||||
```shell-session
|
||||
```bash
|
||||
./build-docker-image.sh Dockerfile -t <your-tag>
|
||||
```
|
||||
|
||||
## Extending Paperless
|
||||
## Extending Paperless-ngx
|
||||
|
||||
Paperless does not have any fancy plugin systems and will probably never
|
||||
Paperless-ngx does not have any fancy plugin systems and will probably never
|
||||
have. However, some parts of the application have been designed to allow
|
||||
easy integration of additional features without any modification to the
|
||||
base code.
|
||||
|
||||
### Making custom parsers
|
||||
|
||||
Paperless uses parsers to add documents to paperless. A parser is
|
||||
Paperless-ngx uses parsers to add documents. A parser is
|
||||
responsible for:
|
||||
|
||||
- Retrieve the content from the original
|
||||
- Create a thumbnail
|
||||
- Optional: Retrieve a created date from the original
|
||||
- Optional: Create an archived document from the original
|
||||
- Retrieving the content from the original
|
||||
- Creating a thumbnail
|
||||
- _optional:_ Retrieving a created date from the original
|
||||
- _optional:_ Creainge an archived document from the original
|
||||
|
||||
Custom parsers can be added to paperless to support more file types. In
|
||||
Custom parsers can be added to Paperless-ngx to support more file types. In
|
||||
order to do that, you need to write the parser itself and announce its
|
||||
existence to paperless.
|
||||
existence to Paperless-ngx.
|
||||
|
||||
The parser itself must extend `documents.parsers.DocumentParser` and
|
||||
must implement the methods `parse` and `get_thumbnail`. You can provide
|
||||
your own implementation to `get_date` if you don't want to rely on
|
||||
paperless' default date guessing mechanisms.
|
||||
Paperless-ngx' default date guessing mechanisms.
|
||||
|
||||
```python
|
||||
class MyCustomParser(DocumentParser):
|
||||
@ -444,7 +434,7 @@ to be empty and removed after consumption finished. You can use that
|
||||
directory to store any intermediate files and also use it to store the
|
||||
thumbnail / archived document.
|
||||
|
||||
After that, you need to announce your parser to paperless. You need to
|
||||
After that, you need to announce your parser to Paperless-ngx. You need to
|
||||
connect a handler to the `document_consumer_declaration` signal. Have a
|
||||
look in the file `src/paperless_tesseract/apps.py` on how that's done.
|
||||
The handler is a method that returns information about your parser:
|
||||
@ -464,11 +454,11 @@ def myparser_consumer_declaration(sender, **kwargs):
|
||||
- `parser` is a reference to a class that extends `DocumentParser`.
|
||||
- `weight` is used whenever two or more parsers are able to parse a
|
||||
file: The parser with the higher weight wins. This can be used to
|
||||
override the parsers provided by paperless.
|
||||
override the parsers provided by Paperless-ngx.
|
||||
- `mime_types` is a dictionary. The keys are the mime types your
|
||||
parser supports and the value is the default file extension that
|
||||
paperless should use when storing files and serving them for
|
||||
Paperless-ngx should use when storing files and serving them for
|
||||
download. We could guess that from the file extensions, but some
|
||||
mime types have many extensions associated with them and the python
|
||||
mime types have many extensions associated with them and the Python
|
||||
methods responsible for guessing the extension do not always return
|
||||
the same value.
|
||||
|
@ -388,12 +388,7 @@ supported.
|
||||
```
|
||||
|
||||
8. Install python requirements from the `requirements.txt` file. It is
|
||||
up to you if you wish to use a virtual environment or not. First you
|
||||
should update your pip, so it gets the actual packages.
|
||||
|
||||
```shell-session
|
||||
sudo -Hu paperless pip3 install --upgrade pip
|
||||
```
|
||||
up to you if you wish to use a virtual environment or not. First you should update your pip, so it gets the actual packages.
|
||||
|
||||
```shell-session
|
||||
sudo -Hu paperless pip3 install -r requirements.txt
|
||||
|
@ -41,6 +41,7 @@ markdown_extensions:
|
||||
anchor_linenums: true
|
||||
- pymdownx.superfences
|
||||
- pymdownx.inlinehilite
|
||||
- pymdownx.snippets
|
||||
strict: true
|
||||
nav:
|
||||
- index.md
|
||||
@ -54,7 +55,7 @@ nav:
|
||||
- 'FAQs': faq.md
|
||||
- troubleshooting.md
|
||||
- changelog.md
|
||||
copyright: Copyright © 2016 - 2022 Daniel Quinn, Jonas Winkler, and the Paperless-ngx team
|
||||
copyright: Copyright © 2016 - 2023 Daniel Quinn, Jonas Winkler, and the Paperless-ngx team
|
||||
extra:
|
||||
social:
|
||||
- icon: fontawesome/brands/github
|
||||
|
@ -192,7 +192,8 @@
|
||||
"cli": {
|
||||
"schematicCollections": [
|
||||
"@angular-eslint/schematics"
|
||||
]
|
||||
],
|
||||
"analytics": false
|
||||
},
|
||||
"schematics": {
|
||||
"@angular-eslint/schematics:application": {
|
||||
|
14779
src-ui/package-lock.json
generated
14779
src-ui/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -13,14 +13,14 @@
|
||||
},
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"@angular/common": "~15.1.0",
|
||||
"@angular/compiler": "~15.1.0",
|
||||
"@angular/core": "~15.1.0",
|
||||
"@angular/forms": "~15.1.0",
|
||||
"@angular/localize": "~15.1.0",
|
||||
"@angular/platform-browser": "~15.1.0",
|
||||
"@angular/platform-browser-dynamic": "~15.1.0",
|
||||
"@angular/router": "~15.1.0",
|
||||
"@angular/common": "~15.1.2",
|
||||
"@angular/compiler": "~15.1.2",
|
||||
"@angular/core": "~15.1.2",
|
||||
"@angular/forms": "~15.1.2",
|
||||
"@angular/localize": "~15.1.2",
|
||||
"@angular/platform-browser": "~15.1.2",
|
||||
"@angular/platform-browser-dynamic": "~15.1.2",
|
||||
"@angular/router": "~15.1.2",
|
||||
"@ng-bootstrap/ng-bootstrap": "^14.0.1",
|
||||
"@ng-select/ng-select": "^10.0.1",
|
||||
"@ngneat/dirty-check-forms": "^3.0.3",
|
||||
@ -39,18 +39,18 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@angular-builders/jest": "15.0.0",
|
||||
"@angular-devkit/build-angular": "~15.1.0",
|
||||
"@angular-eslint/builder": "15.1.0",
|
||||
"@angular-eslint/eslint-plugin": "15.1.0",
|
||||
"@angular-eslint/eslint-plugin-template": "15.1.0",
|
||||
"@angular-eslint/schematics": "15.1.0",
|
||||
"@angular-eslint/template-parser": "15.1.0",
|
||||
"@angular/cli": "~15.1.0",
|
||||
"@angular/compiler-cli": "~15.1.0",
|
||||
"@angular-devkit/build-angular": "~15.1.4",
|
||||
"@angular-eslint/builder": "15.2.0",
|
||||
"@angular-eslint/eslint-plugin": "15.2.0",
|
||||
"@angular-eslint/eslint-plugin-template": "15.2.0",
|
||||
"@angular-eslint/schematics": "15.2.0",
|
||||
"@angular-eslint/template-parser": "15.2.0",
|
||||
"@angular/cli": "~15.1.4",
|
||||
"@angular/compiler-cli": "~15.1.2",
|
||||
"@types/jest": "28.1.6",
|
||||
"@types/node": "^18.7.23",
|
||||
"@typescript-eslint/eslint-plugin": "^5.43.0",
|
||||
"@typescript-eslint/parser": "^5.43.0",
|
||||
"@typescript-eslint/parser": "^5.50.0",
|
||||
"concurrently": "7.4.0",
|
||||
"eslint": "^8.31.0",
|
||||
"jest": "28.1.3",
|
||||
|
@ -229,6 +229,10 @@ export class DocumentDetailComponent
|
||||
)
|
||||
.subscribe({
|
||||
next: (titleValue) => {
|
||||
// In the rare case when the field changed just after debounced event was fired.
|
||||
// We dont want to overwrite whats actually in the text field, so just return
|
||||
if (titleValue !== this.titleInput.value) return
|
||||
|
||||
this.title = titleValue
|
||||
this.documentForm.patchValue({ title: titleValue })
|
||||
},
|
||||
|
@ -5,7 +5,7 @@ export const environment = {
|
||||
apiBaseUrl: document.baseURI + 'api/',
|
||||
apiVersion: '2',
|
||||
appTitle: 'Paperless-ngx',
|
||||
version: '1.12.1-dev',
|
||||
version: '1.12.2-dev',
|
||||
webSocketHost: window.location.host,
|
||||
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
|
||||
webSocketBaseUrl: base_url.pathname + 'ws/',
|
||||
|
@ -4,18 +4,17 @@ import shutil
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
from math import ceil
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
|
||||
import magic
|
||||
from django.conf import settings
|
||||
from pdf2image import convert_from_path
|
||||
from pdf2image.exceptions import PDFPageCountError
|
||||
from pikepdf import Page
|
||||
from pikepdf import PasswordError
|
||||
from pikepdf import Pdf
|
||||
from pikepdf import PdfImage
|
||||
from PIL import Image
|
||||
from PIL import ImageSequence
|
||||
from pyzbar import pyzbar
|
||||
@ -154,52 +153,15 @@ def scan_file_for_barcodes(
|
||||
(page_number, barcode_text) tuples
|
||||
"""
|
||||
|
||||
def _pikepdf_barcode_scan(pdf_filepath: str) -> List[Barcode]:
|
||||
detected_barcodes = []
|
||||
with Pdf.open(pdf_filepath) as pdf:
|
||||
for page_num, page in enumerate(pdf.pages):
|
||||
for image_key in page.images:
|
||||
pdfimage = PdfImage(page.images[image_key])
|
||||
|
||||
# This type is known to have issues:
|
||||
# https://github.com/pikepdf/pikepdf/issues/401
|
||||
if "/CCITTFaxDecode" in pdfimage.filters:
|
||||
raise BarcodeImageFormatError(
|
||||
"Unable to decode CCITTFaxDecode images",
|
||||
)
|
||||
|
||||
# Not all images can be transcoded to a PIL image, which
|
||||
# is what pyzbar expects to receive, so this may
|
||||
# raise an exception, triggering fallback
|
||||
pillow_img = pdfimage.as_pil_image()
|
||||
|
||||
# Scale the image down
|
||||
# See: https://github.com/paperless-ngx/paperless-ngx/issues/2385
|
||||
# TLDR: zbar has issues with larger images
|
||||
width, height = pillow_img.size
|
||||
if width > 1024:
|
||||
scaler = ceil(width / 1024)
|
||||
new_width = int(width / scaler)
|
||||
new_height = int(height / scaler)
|
||||
pillow_img = pillow_img.resize((new_width, new_height))
|
||||
|
||||
width, height = pillow_img.size
|
||||
if height > 2048:
|
||||
scaler = ceil(height / 2048)
|
||||
new_width = int(width / scaler)
|
||||
new_height = int(height / scaler)
|
||||
pillow_img = pillow_img.resize((new_width, new_height))
|
||||
|
||||
for barcode_value in barcode_reader(pillow_img):
|
||||
detected_barcodes.append(Barcode(page_num, barcode_value))
|
||||
|
||||
return detected_barcodes
|
||||
|
||||
def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]:
|
||||
detected_barcodes = []
|
||||
# use a temporary directory in case the file is too big to handle in memory
|
||||
with tempfile.TemporaryDirectory() as path:
|
||||
pages_from_path = convert_from_path(pdf_filepath, output_folder=path)
|
||||
pages_from_path = convert_from_path(
|
||||
pdf_filepath,
|
||||
dpi=300,
|
||||
output_folder=path,
|
||||
)
|
||||
for current_page_number, page in enumerate(pages_from_path):
|
||||
for barcode_value in barcode_reader(page):
|
||||
detected_barcodes.append(
|
||||
@ -219,27 +181,19 @@ def scan_file_for_barcodes(
|
||||
# Always try pikepdf first, it's usually fine, faster and
|
||||
# uses less memory
|
||||
try:
|
||||
barcodes = _pikepdf_barcode_scan(pdf_filepath)
|
||||
barcodes = _pdf2image_barcode_scan(pdf_filepath)
|
||||
# Password protected files can't be checked
|
||||
except PasswordError as e:
|
||||
# This is the exception raised for those
|
||||
except PDFPageCountError as e:
|
||||
logger.warning(
|
||||
f"File is likely password protected, not checking for barcodes: {e}",
|
||||
)
|
||||
# Handle pikepdf related image decoding issues with a fallback to page
|
||||
# by page conversion to images in a temporary directory
|
||||
except Exception as e:
|
||||
# This file is really borked, allow the consumption to continue
|
||||
# but it may fail further on
|
||||
except Exception as e: # pragma: no cover
|
||||
logger.warning(
|
||||
f"Falling back to pdf2image because: {e}",
|
||||
f"Exception during barcode scanning: {e}",
|
||||
)
|
||||
try:
|
||||
barcodes = _pdf2image_barcode_scan(pdf_filepath)
|
||||
# This file is really borked, allow the consumption to continue
|
||||
# but it may fail further on
|
||||
except Exception as e: # pragma: no cover
|
||||
logger.warning(
|
||||
f"Exception during barcode scanning: {e}",
|
||||
)
|
||||
|
||||
else:
|
||||
logger.warning(
|
||||
f"Unsupported file format for barcode reader: {str(mime_type)}",
|
||||
@ -248,16 +202,25 @@ def scan_file_for_barcodes(
|
||||
return DocumentBarcodeInfo(pdf_filepath, barcodes)
|
||||
|
||||
|
||||
def get_separating_barcodes(barcodes: List[Barcode]) -> List[int]:
|
||||
def get_separating_barcodes(barcodes: List[Barcode]) -> Dict[int, bool]:
|
||||
"""
|
||||
Search the parsed barcodes for separators
|
||||
and returns a list of page numbers, which
|
||||
separate the file into new files.
|
||||
and returns a dict of page numbers, which
|
||||
separate the file into new files, together
|
||||
with the information whether to keep the page.
|
||||
"""
|
||||
# filter all barcodes for the separator string
|
||||
# get the page numbers of the separating barcodes
|
||||
separator_pages = {bc.page: False for bc in barcodes if bc.is_separator}
|
||||
if not settings.CONSUMER_ENABLE_ASN_BARCODE:
|
||||
return separator_pages
|
||||
|
||||
return list({bc.page for bc in barcodes if bc.is_separator})
|
||||
# add the page numbers of the ASN barcodes
|
||||
# (except for first page, that might lead to infinite loops).
|
||||
return {
|
||||
**separator_pages,
|
||||
**{bc.page: True for bc in barcodes if bc.is_asn and bc.page != 0},
|
||||
}
|
||||
|
||||
|
||||
def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
|
||||
@ -289,10 +252,11 @@ def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
|
||||
return asn
|
||||
|
||||
|
||||
def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:
|
||||
def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[str]:
|
||||
"""
|
||||
Separate the provided pdf file on the pages_to_split_on.
|
||||
The pages which are defined by page_numbers will be removed.
|
||||
The pages which are defined by the keys in page_numbers
|
||||
will be removed if the corresponding value is false.
|
||||
Returns a list of (temporary) filepaths to consume.
|
||||
These will need to be deleted later.
|
||||
"""
|
||||
@ -308,26 +272,28 @@ def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:
|
||||
fname = os.path.splitext(os.path.basename(filepath))[0]
|
||||
pdf = Pdf.open(filepath)
|
||||
|
||||
# Start with an empty document
|
||||
current_document: List[Page] = []
|
||||
# A list of documents, ie a list of lists of pages
|
||||
documents: List[List[Page]] = []
|
||||
# A single document, ie a list of pages
|
||||
document: List[Page] = []
|
||||
documents: List[List[Page]] = [current_document]
|
||||
|
||||
for idx, page in enumerate(pdf.pages):
|
||||
# Keep building the new PDF as long as it is not a
|
||||
# separator index
|
||||
if idx not in pages_to_split_on:
|
||||
document.append(page)
|
||||
# Make sure to append the very last document to the documents
|
||||
if idx == (len(pdf.pages) - 1):
|
||||
documents.append(document)
|
||||
document = []
|
||||
else:
|
||||
# This is a split index, save the current PDF pages, and restart
|
||||
# a new destination page listing
|
||||
logger.debug(f"Starting new document at idx {idx}")
|
||||
documents.append(document)
|
||||
document = []
|
||||
current_document.append(page)
|
||||
continue
|
||||
|
||||
# This is a split index
|
||||
# Start a new destination page listing
|
||||
logger.debug(f"Starting new document at idx {idx}")
|
||||
current_document = []
|
||||
documents.append(current_document)
|
||||
keep_page = pages_to_split_on[idx]
|
||||
if keep_page:
|
||||
# Keep the page
|
||||
# (new document is started by asn barcode)
|
||||
current_document.append(page)
|
||||
|
||||
documents = [x for x in documents if len(x)]
|
||||
|
||||
|
@ -1,7 +1,10 @@
|
||||
import datetime
|
||||
import hashlib
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from subprocess import CompletedProcess
|
||||
from subprocess import run
|
||||
from typing import Optional
|
||||
@ -95,7 +98,8 @@ class Consumer(LoggingMixin):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.path = None
|
||||
self.path: Optional[Path] = None
|
||||
self.original_path: Optional[Path] = None
|
||||
self.filename = None
|
||||
self.override_title = None
|
||||
self.override_correspondent_id = None
|
||||
@ -144,11 +148,16 @@ class Consumer(LoggingMixin):
|
||||
return
|
||||
# Validate the range is above zero and less than uint32_t max
|
||||
# otherwise, Whoosh can't handle it in the index
|
||||
if self.override_asn < 0 or self.override_asn > 0xFF_FF_FF_FF:
|
||||
if (
|
||||
self.override_asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
|
||||
or self.override_asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
|
||||
):
|
||||
self._fail(
|
||||
MESSAGE_ASN_RANGE,
|
||||
f"Not consuming {self.filename}: "
|
||||
f"Given ASN {self.override_asn} is out of range [0, 4,294,967,295]",
|
||||
f"Given ASN {self.override_asn} is out of range "
|
||||
f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
|
||||
f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}]",
|
||||
)
|
||||
if Document.objects.filter(archive_serial_number=self.override_asn).exists():
|
||||
self._fail(
|
||||
@ -169,16 +178,18 @@ class Consumer(LoggingMixin):
|
||||
|
||||
self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
||||
|
||||
filepath_arg = os.path.normpath(self.path)
|
||||
working_file_path = str(self.path)
|
||||
original_file_path = str(self.original_path)
|
||||
|
||||
script_env = os.environ.copy()
|
||||
script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg
|
||||
script_env["DOCUMENT_SOURCE_PATH"] = original_file_path
|
||||
script_env["DOCUMENT_WORKING_PATH"] = working_file_path
|
||||
|
||||
try:
|
||||
completed_proc = run(
|
||||
args=[
|
||||
settings.PRE_CONSUME_SCRIPT,
|
||||
filepath_arg,
|
||||
original_file_path,
|
||||
],
|
||||
env=script_env,
|
||||
capture_output=True,
|
||||
@ -197,7 +208,7 @@ class Consumer(LoggingMixin):
|
||||
exception=e,
|
||||
)
|
||||
|
||||
def run_post_consume_script(self, document):
|
||||
def run_post_consume_script(self, document: Document):
|
||||
if not settings.POST_CONSUME_SCRIPT:
|
||||
return
|
||||
|
||||
@ -288,8 +299,8 @@ class Consumer(LoggingMixin):
|
||||
Return the document object if it was successfully created.
|
||||
"""
|
||||
|
||||
self.path = path
|
||||
self.filename = override_filename or os.path.basename(path)
|
||||
self.path = Path(path).resolve()
|
||||
self.filename = override_filename or self.path.name
|
||||
self.override_title = override_title
|
||||
self.override_correspondent_id = override_correspondent_id
|
||||
self.override_document_type_id = override_document_type_id
|
||||
@ -315,6 +326,15 @@ class Consumer(LoggingMixin):
|
||||
|
||||
self.log("info", f"Consuming {self.filename}")
|
||||
|
||||
# For the actual work, copy the file into a tempdir
|
||||
self.original_path = self.path
|
||||
tempdir = tempfile.TemporaryDirectory(
|
||||
prefix="paperless-ngx",
|
||||
dir=settings.SCRATCH_DIR,
|
||||
)
|
||||
self.path = Path(tempdir.name) / Path(self.filename)
|
||||
shutil.copy(self.original_path, self.path)
|
||||
|
||||
# Determine the parser class.
|
||||
|
||||
mime_type = magic.from_file(self.path, mime=True)
|
||||
@ -457,11 +477,12 @@ class Consumer(LoggingMixin):
|
||||
# Delete the file only if it was successfully consumed
|
||||
self.log("debug", f"Deleting file {self.path}")
|
||||
os.unlink(self.path)
|
||||
self.original_path.unlink()
|
||||
|
||||
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
||||
shadow_file = os.path.join(
|
||||
os.path.dirname(self.path),
|
||||
"._" + os.path.basename(self.path),
|
||||
os.path.dirname(self.original_path),
|
||||
"._" + os.path.basename(self.original_path),
|
||||
)
|
||||
|
||||
if os.path.isfile(shadow_file):
|
||||
@ -478,6 +499,7 @@ class Consumer(LoggingMixin):
|
||||
)
|
||||
finally:
|
||||
document_parser.cleanup()
|
||||
tempdir.cleanup()
|
||||
|
||||
self.run_post_consume_script(document)
|
||||
|
||||
|
@ -5,6 +5,7 @@ from contextlib import contextmanager
|
||||
|
||||
from dateutil.parser import isoparse
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
from documents.models import Comment
|
||||
from documents.models import Document
|
||||
from guardian.shortcuts import get_users_with_perms
|
||||
@ -94,10 +95,22 @@ def open_index_searcher():
|
||||
searcher.close()
|
||||
|
||||
|
||||
def update_document(writer, doc):
|
||||
def update_document(writer: AsyncWriter, doc: Document):
|
||||
tags = ",".join([t.name for t in doc.tags.all()])
|
||||
tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
|
||||
comments = ",".join([str(c.comment) for c in Comment.objects.filter(document=doc)])
|
||||
asn = doc.archive_serial_number
|
||||
if asn is not None and (
|
||||
asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
|
||||
or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
|
||||
):
|
||||
logger.error(
|
||||
f"Not indexing Archive Serial Number {asn} of document {doc.pk}. "
|
||||
f"ASN is out of range "
|
||||
f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
|
||||
f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}.",
|
||||
)
|
||||
asn = 0
|
||||
users_with_perms = get_users_with_perms(
|
||||
doc,
|
||||
only_with_perms_in=["view_document"],
|
||||
@ -118,7 +131,7 @@ def update_document(writer, doc):
|
||||
has_type=doc.document_type is not None,
|
||||
created=doc.created,
|
||||
added=doc.added,
|
||||
asn=doc.archive_serial_number,
|
||||
asn=asn,
|
||||
modified=doc.modified,
|
||||
path=doc.storage_path.name if doc.storage_path else None,
|
||||
path_id=doc.storage_path.id if doc.storage_path else None,
|
||||
@ -283,7 +296,7 @@ class DelayedFullTextQuery(DelayedQuery):
|
||||
["content", "title", "correspondent", "tag", "type", "comments"],
|
||||
self.searcher.ixreader.schema,
|
||||
)
|
||||
qp.add_plugin(DateParserPlugin())
|
||||
qp.add_plugin(DateParserPlugin(basedate=timezone.now()))
|
||||
q = qp.parse(q_str)
|
||||
|
||||
corrected = self.searcher.correct_query(q, q_str)
|
||||
|
@ -311,8 +311,8 @@ class Command(BaseCommand):
|
||||
archive_target = None
|
||||
|
||||
# 3.4. write files to target folder
|
||||
t = int(time.mktime(document.created.timetuple()))
|
||||
if document.storage_type == Document.STORAGE_TYPE_GPG:
|
||||
t = int(time.mktime(document.created.timetuple()))
|
||||
|
||||
original_target.parent.mkdir(parents=True, exist_ok=True)
|
||||
with document.source_file as out_file:
|
||||
|
@ -0,0 +1,23 @@
|
||||
# Generated by Django 4.1.5 on 2023-02-03 21:53
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("documents", "1029_alter_document_archive_serial_number"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="paperlesstask",
|
||||
name="task_file_name",
|
||||
field=models.CharField(
|
||||
help_text="Name of the file which the Task was run for",
|
||||
max_length=255,
|
||||
null=True,
|
||||
verbose_name="Task Filename",
|
||||
),
|
||||
),
|
||||
]
|
@ -3,6 +3,7 @@ import logging
|
||||
import os
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
from typing import Final
|
||||
from typing import Optional
|
||||
|
||||
import dateutil.parser
|
||||
@ -242,6 +243,9 @@ class Document(ModelWithOwner):
|
||||
help_text=_("The original name of the file when it was uploaded"),
|
||||
)
|
||||
|
||||
ARCHIVE_SERIAL_NUMBER_MIN: Final[int] = 0
|
||||
ARCHIVE_SERIAL_NUMBER_MAX: Final[int] = 0xFF_FF_FF_FF
|
||||
|
||||
archive_serial_number = models.PositiveIntegerField(
|
||||
_("archive serial number"),
|
||||
blank=True,
|
||||
@ -249,8 +253,8 @@ class Document(ModelWithOwner):
|
||||
unique=True,
|
||||
db_index=True,
|
||||
validators=[
|
||||
MaxValueValidator(0xFF_FF_FF_FF),
|
||||
MinValueValidator(0),
|
||||
MaxValueValidator(ARCHIVE_SERIAL_NUMBER_MAX),
|
||||
MinValueValidator(ARCHIVE_SERIAL_NUMBER_MIN),
|
||||
],
|
||||
help_text=_(
|
||||
"The position of this document in your physical document " "archive.",
|
||||
@ -567,7 +571,7 @@ class PaperlessTask(models.Model):
|
||||
task_file_name = models.CharField(
|
||||
null=True,
|
||||
max_length=255,
|
||||
verbose_name=_("Task Name"),
|
||||
verbose_name=_("Task Filename"),
|
||||
help_text=_("Name of the file which the Task was run for"),
|
||||
)
|
||||
|
||||
|
@ -166,7 +166,7 @@ def consume_file(
|
||||
# notify the sender, otherwise the progress bar
|
||||
# in the UI stays stuck
|
||||
payload = {
|
||||
"filename": override_filename,
|
||||
"filename": override_filename or path.name,
|
||||
"task_id": task_id,
|
||||
"current_progress": 100,
|
||||
"max_progress": 100,
|
||||
|
Before Width: | Height: | Size: 33 KiB After Width: | Height: | Size: 33 KiB |
Before Width: | Height: | Size: 39 KiB After Width: | Height: | Size: 39 KiB |
BIN
src/documents/tests/samples/barcodes/split-by-asn-1.pdf
Normal file
BIN
src/documents/tests/samples/barcodes/split-by-asn-1.pdf
Normal file
Binary file not shown.
BIN
src/documents/tests/samples/barcodes/split-by-asn-2.pdf
Normal file
BIN
src/documents/tests/samples/barcodes/split-by-asn-2.pdf
Normal file
Binary file not shown.
@ -7,6 +7,7 @@ import tempfile
|
||||
import urllib.request
|
||||
import uuid
|
||||
import zipfile
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
from unittest.mock import MagicMock
|
||||
@ -25,6 +26,7 @@ from django.contrib.auth.models import Permission
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import override_settings
|
||||
from django.utils import timezone
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from documents import bulk_edit
|
||||
from documents import index
|
||||
from documents.models import Correspondent
|
||||
@ -509,6 +511,270 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
response = self.client.get("/api/documents/?query=content&page=3&page_size=10")
|
||||
self.assertEqual(response.status_code, 404)
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="UTC",
|
||||
)
|
||||
def test_search_added_in_last_week(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Three documents added right now
|
||||
- The timezone is UTC time
|
||||
WHEN:
|
||||
- Query for documents added in the last 7 days
|
||||
THEN:
|
||||
- All three recent documents are returned
|
||||
"""
|
||||
d1 = Document.objects.create(
|
||||
title="invoice",
|
||||
content="the thing i bought at a shop and paid with bank account",
|
||||
checksum="A",
|
||||
pk=1,
|
||||
)
|
||||
d2 = Document.objects.create(
|
||||
title="bank statement 1",
|
||||
content="things i paid for in august",
|
||||
pk=2,
|
||||
checksum="B",
|
||||
)
|
||||
d3 = Document.objects.create(
|
||||
title="bank statement 3",
|
||||
content="things i paid for in september",
|
||||
pk=3,
|
||||
checksum="C",
|
||||
)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
|
||||
results = response.data["results"]
|
||||
# Expect 3 documents returned
|
||||
self.assertEqual(len(results), 3)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[
|
||||
{"id": 1, "title": "invoice"},
|
||||
{"id": 2, "title": "bank statement 1"},
|
||||
{"id": 3, "title": "bank statement 3"},
|
||||
],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="America/Chicago",
|
||||
)
|
||||
def test_search_added_in_last_week_with_timezone_behind(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Two documents added right now
|
||||
- One document added over a week ago
|
||||
- The timezone is behind UTC time (-6)
|
||||
WHEN:
|
||||
- Query for documents added in the last 7 days
|
||||
THEN:
|
||||
- The two recent documents are returned
|
||||
"""
|
||||
d1 = Document.objects.create(
|
||||
title="invoice",
|
||||
content="the thing i bought at a shop and paid with bank account",
|
||||
checksum="A",
|
||||
pk=1,
|
||||
)
|
||||
d2 = Document.objects.create(
|
||||
title="bank statement 1",
|
||||
content="things i paid for in august",
|
||||
pk=2,
|
||||
checksum="B",
|
||||
)
|
||||
d3 = Document.objects.create(
|
||||
title="bank statement 3",
|
||||
content="things i paid for in september",
|
||||
pk=3,
|
||||
checksum="C",
|
||||
# 7 days, 1 hour and 1 minute ago
|
||||
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||
)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
|
||||
results = response.data["results"]
|
||||
|
||||
# Expect 2 documents returned
|
||||
self.assertEqual(len(results), 2)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="Europe/Sofia",
|
||||
)
|
||||
def test_search_added_in_last_week_with_timezone_ahead(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Two documents added right now
|
||||
- One document added over a week ago
|
||||
- The timezone is behind UTC time (+2)
|
||||
WHEN:
|
||||
- Query for documents added in the last 7 days
|
||||
THEN:
|
||||
- The two recent documents are returned
|
||||
"""
|
||||
d1 = Document.objects.create(
|
||||
title="invoice",
|
||||
content="the thing i bought at a shop and paid with bank account",
|
||||
checksum="A",
|
||||
pk=1,
|
||||
)
|
||||
d2 = Document.objects.create(
|
||||
title="bank statement 1",
|
||||
content="things i paid for in august",
|
||||
pk=2,
|
||||
checksum="B",
|
||||
)
|
||||
d3 = Document.objects.create(
|
||||
title="bank statement 3",
|
||||
content="things i paid for in september",
|
||||
pk=3,
|
||||
checksum="C",
|
||||
# 7 days, 1 hour and 1 minute ago
|
||||
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||
)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
|
||||
results = response.data["results"]
|
||||
|
||||
# Expect 2 documents returned
|
||||
self.assertEqual(len(results), 2)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
|
||||
def test_search_added_in_last_month(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- One document added right now
|
||||
- One documents added about a week ago
|
||||
- One document added over 1 month
|
||||
WHEN:
|
||||
- Query for documents added in the last month
|
||||
THEN:
|
||||
- The two recent documents are returned
|
||||
"""
|
||||
d1 = Document.objects.create(
|
||||
title="invoice",
|
||||
content="the thing i bought at a shop and paid with bank account",
|
||||
checksum="A",
|
||||
pk=1,
|
||||
)
|
||||
d2 = Document.objects.create(
|
||||
title="bank statement 1",
|
||||
content="things i paid for in august",
|
||||
pk=2,
|
||||
checksum="B",
|
||||
# 1 month, 1 day ago
|
||||
added=timezone.now() - relativedelta(months=1, days=1),
|
||||
)
|
||||
d3 = Document.objects.create(
|
||||
title="bank statement 3",
|
||||
content="things i paid for in september",
|
||||
pk=3,
|
||||
checksum="C",
|
||||
# 7 days, 1 hour and 1 minute ago
|
||||
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||
)
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 month to now]")
|
||||
results = response.data["results"]
|
||||
|
||||
# Expect 2 documents returned
|
||||
self.assertEqual(len(results), 2)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="America/Denver",
|
||||
)
|
||||
def test_search_added_in_last_month_timezone_behind(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- One document added right now
|
||||
- One documents added about a week ago
|
||||
- One document added over 1 month
|
||||
- The timezone is behind UTC time (-6 or -7)
|
||||
WHEN:
|
||||
- Query for documents added in the last month
|
||||
THEN:
|
||||
- The two recent documents are returned
|
||||
"""
|
||||
d1 = Document.objects.create(
|
||||
title="invoice",
|
||||
content="the thing i bought at a shop and paid with bank account",
|
||||
checksum="A",
|
||||
pk=1,
|
||||
)
|
||||
d2 = Document.objects.create(
|
||||
title="bank statement 1",
|
||||
content="things i paid for in august",
|
||||
pk=2,
|
||||
checksum="B",
|
||||
# 1 month, 1 day ago
|
||||
added=timezone.now() - relativedelta(months=1, days=1),
|
||||
)
|
||||
d3 = Document.objects.create(
|
||||
title="bank statement 3",
|
||||
content="things i paid for in september",
|
||||
pk=3,
|
||||
checksum="C",
|
||||
# 7 days, 1 hour and 1 minute ago
|
||||
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||
)
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 month to now]")
|
||||
results = response.data["results"]
|
||||
|
||||
# Expect 2 documents returned
|
||||
self.assertEqual(len(results), 2)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
|
||||
@mock.patch("documents.index.autocomplete")
|
||||
def test_search_autocomplete(self, m):
|
||||
m.side_effect = lambda ix, term, limit: [term for _ in range(limit)]
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -833,7 +833,8 @@ class PreConsumeTestCase(TestCase):
|
||||
with tempfile.NamedTemporaryFile() as script:
|
||||
with override_settings(PRE_CONSUME_SCRIPT=script.name):
|
||||
c = Consumer()
|
||||
c.path = "path-to-file"
|
||||
c.original_path = "path-to-file"
|
||||
c.path = "/tmp/somewhere/path-to-file"
|
||||
c.run_pre_consume_script()
|
||||
|
||||
m.assert_called_once()
|
||||
@ -841,10 +842,19 @@ class PreConsumeTestCase(TestCase):
|
||||
args, kwargs = m.call_args
|
||||
|
||||
command = kwargs["args"]
|
||||
environment = kwargs["env"]
|
||||
|
||||
self.assertEqual(command[0], script.name)
|
||||
self.assertEqual(command[1], "path-to-file")
|
||||
|
||||
self.assertDictContainsSubset(
|
||||
{
|
||||
"DOCUMENT_SOURCE_PATH": c.original_path,
|
||||
"DOCUMENT_WORKING_PATH": c.path,
|
||||
},
|
||||
environment,
|
||||
)
|
||||
|
||||
@mock.patch("documents.consumer.Consumer.log")
|
||||
def test_script_with_output(self, mocked_log):
|
||||
"""
|
||||
@ -961,9 +971,10 @@ class PostConsumeTestCase(TestCase):
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
args, kwargs = m.call_args
|
||||
_, kwargs = m.call_args
|
||||
|
||||
command = kwargs["args"]
|
||||
environment = kwargs["env"]
|
||||
|
||||
self.assertEqual(command[0], script.name)
|
||||
self.assertEqual(command[1], str(doc.pk))
|
||||
@ -972,6 +983,17 @@ class PostConsumeTestCase(TestCase):
|
||||
self.assertEqual(command[7], "my_bank")
|
||||
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
||||
|
||||
self.assertDictContainsSubset(
|
||||
{
|
||||
"DOCUMENT_ID": str(doc.pk),
|
||||
"DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/",
|
||||
"DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/",
|
||||
"DOCUMENT_CORRESPONDENT": "my_bank",
|
||||
"DOCUMENT_TAGS": "a,b",
|
||||
},
|
||||
environment,
|
||||
)
|
||||
|
||||
def test_script_exit_non_zero(self):
|
||||
"""
|
||||
GIVEN:
|
||||
|
@ -1,3 +1,5 @@
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase
|
||||
from documents import index
|
||||
from documents.models import Document
|
||||
@ -31,3 +33,60 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
|
||||
)
|
||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
|
||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
|
||||
|
||||
def test_archive_serial_number_ranging(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Document with an archive serial number above schema allowed size
|
||||
WHEN:
|
||||
- Document is provided to the index
|
||||
THEN:
|
||||
- Error is logged
|
||||
- Document ASN is reset to 0 for the index
|
||||
"""
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1",
|
||||
checksum="A",
|
||||
content="test test2 test3",
|
||||
# yes, this is allowed, unless full_clean is run
|
||||
# DRF does call the validators, this test won't
|
||||
archive_serial_number=Document.ARCHIVE_SERIAL_NUMBER_MAX + 1,
|
||||
)
|
||||
with self.assertLogs("paperless.index", level="ERROR") as cm:
|
||||
with mock.patch(
|
||||
"documents.index.AsyncWriter.update_document",
|
||||
) as mocked_update_doc:
|
||||
index.add_or_update_document(doc1)
|
||||
|
||||
mocked_update_doc.assert_called_once()
|
||||
_, kwargs = mocked_update_doc.call_args
|
||||
|
||||
self.assertEqual(kwargs["asn"], 0)
|
||||
|
||||
error_str = cm.output[0]
|
||||
expected_str = "ERROR:paperless.index:Not indexing Archive Serial Number 4294967296 of document 1"
|
||||
self.assertIn(expected_str, error_str)
|
||||
|
||||
def test_archive_serial_number_is_none(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Document with no archive serial number
|
||||
WHEN:
|
||||
- Document is provided to the index
|
||||
THEN:
|
||||
- ASN isn't touched
|
||||
"""
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1",
|
||||
checksum="A",
|
||||
content="test test2 test3",
|
||||
)
|
||||
with mock.patch(
|
||||
"documents.index.AsyncWriter.update_document",
|
||||
) as mocked_update_doc:
|
||||
index.add_or_update_document(doc1)
|
||||
|
||||
mocked_update_doc.assert_called_once()
|
||||
_, kwargs = mocked_update_doc.call_args
|
||||
|
||||
self.assertIsNone(kwargs["asn"])
|
||||
|
@ -3,6 +3,7 @@ import shutil
|
||||
import tempfile
|
||||
from collections import namedtuple
|
||||
from contextlib import contextmanager
|
||||
from unittest import mock
|
||||
|
||||
from django.apps import apps
|
||||
from django.db import connection
|
||||
@ -86,6 +87,30 @@ class DirectoriesMixin:
|
||||
remove_dirs(self.dirs)
|
||||
|
||||
|
||||
class ConsumerProgressMixin:
|
||||
def setUp(self) -> None:
|
||||
self.send_progress_patcher = mock.patch(
|
||||
"documents.consumer.Consumer._send_progress",
|
||||
)
|
||||
self.send_progress_mock = self.send_progress_patcher.start()
|
||||
super().setUp()
|
||||
|
||||
def tearDown(self) -> None:
|
||||
super().tearDown()
|
||||
self.send_progress_patcher.stop()
|
||||
|
||||
|
||||
class DocumentConsumeDelayMixin:
|
||||
def setUp(self) -> None:
|
||||
self.consume_file_patcher = mock.patch("documents.tasks.consume_file.delay")
|
||||
self.consume_file_mock = self.consume_file_patcher.start()
|
||||
super().setUp()
|
||||
|
||||
def tearDown(self) -> None:
|
||||
super().tearDown()
|
||||
self.consume_file_patcher.stop()
|
||||
|
||||
|
||||
class TestMigrations(TransactionTestCase):
|
||||
@property
|
||||
def app(self):
|
||||
|
@ -1,7 +1,7 @@
|
||||
from typing import Final
|
||||
from typing import Tuple
|
||||
|
||||
__version__: Final[Tuple[int, int, int]] = (1, 12, 1)
|
||||
__version__: Final[Tuple[int, int, int]] = (1, 12, 2)
|
||||
# Version string like X.Y.Z
|
||||
__full_version_str__: Final[str] = ".".join(map(str, __version__))
|
||||
# Version string like X.Y
|
||||
|
@ -67,11 +67,6 @@ class TestParserLive(TestCase):
|
||||
|
||||
return result
|
||||
|
||||
# Only run if convert is available
|
||||
@pytest.mark.skipif(
|
||||
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
|
||||
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
||||
)
|
||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
|
||||
def test_get_thumbnail(self, mock_generate_pdf: mock.MagicMock):
|
||||
"""
|
||||
@ -204,11 +199,6 @@ class TestParserLive(TestCase):
|
||||
"GOTENBERG_LIVE" not in os.environ,
|
||||
reason="No gotenberg server",
|
||||
)
|
||||
# Only run if convert is available
|
||||
@pytest.mark.skipif(
|
||||
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
|
||||
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
||||
)
|
||||
def test_generate_pdf_from_mail(self):
|
||||
"""
|
||||
GIVEN:
|
||||
@ -301,11 +291,6 @@ class TestParserLive(TestCase):
|
||||
"GOTENBERG_LIVE" not in os.environ,
|
||||
reason="No gotenberg server",
|
||||
)
|
||||
# Only run if convert is available
|
||||
@pytest.mark.skipif(
|
||||
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
|
||||
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
||||
)
|
||||
def test_generate_pdf_from_html(self):
|
||||
"""
|
||||
GIVEN:
|
||||
|
@ -90,7 +90,7 @@ class TikaDocumentParser(DocumentParser):
|
||||
with open(document_path, "rb") as document_handle:
|
||||
files = {
|
||||
"files": (
|
||||
file_name or os.path.basename(document_path),
|
||||
"convert" + os.path.splitext(document_path)[-1],
|
||||
document_handle,
|
||||
),
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ max-line-length = 88
|
||||
|
||||
[tool:pytest]
|
||||
DJANGO_SETTINGS_MODULE=paperless.settings
|
||||
addopts = --pythonwarnings=all --cov --cov-report=html --numprocesses auto --quiet
|
||||
addopts = --pythonwarnings=all --cov --cov-report=html --cov-report=xml --numprocesses auto --quiet
|
||||
env =
|
||||
PAPERLESS_DISABLE_DBHANDLER=true
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user