diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..0f0ac44e2 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,46 @@ +--- +name: Bug report +about: Something is not working +title: "[BUG] Concise description of the issue" +labels: '' +assignees: '' + +--- + + + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Webserver logs** +``` +If available, post any logs from the web server related to your issue. +``` + +**Relevant information** + - Host OS of the machine running paperless: [e.g. Archlinux / Ubuntu 20.04] + - Browser [e.g. chrome, safari] + - Version [e.g. 1.0.0] + - Installation method: [docker / bare metal] + - Any configuration changes you made in `docker-compose.yml`, `docker-compose.env` or `paperless.conf`. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 000000000..47c36c23d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,21 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: "[Feature Request] Consice and clear description of your feature request" +labels: '' +assignees: '' + +--- + + + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. diff --git a/.github/ISSUE_TEMPLATE/other.md b/.github/ISSUE_TEMPLATE/other.md new file mode 100644 index 000000000..ef394f7f9 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/other.md @@ -0,0 +1,18 @@ +--- +name: Other +about: Anything that is not a feature request or bug. +title: "[Other] Title of your issue" +labels: '' +assignees: '' + +--- + + diff --git a/.github/workflows/ansible.yml b/.github/workflows/ansible.yml index 646c7ff81..fd965e760 100644 --- a/.github/workflows/ansible.yml +++ b/.github/workflows/ansible.yml @@ -21,20 +21,20 @@ jobs: - name: Install dependencies run: | python3 -m pip install --upgrade pip - python3 -m pip install molecule[ansible,docker] + python3 -m pip install molecule[ansible,docker] jmespath ansible --version docker --version molecule --version python --version - - name: Test fresh installation with molecule + - name: Test installation/build/upgrade with molecule run: | cd ansible - molecule test -s fresh - working-directory: "${{ github.repository }}" - - name: Test release update with molecule - run: | - cd ansible - molecule test -s update + molecule create + molecule verify + molecule converge + molecule idempotence + molecule verify + molecule destroy working-directory: "${{ github.repository }}" # # https://galaxy.ansible.com/docs/contributing/importing.html # release: diff --git a/README.md b/README.md index 89f55b2d9..0fc3185a1 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![ci](https://github.com/jonaswinkler/paperless-ng/workflows/ci/badge.svg) +[![ci](https://github.com/jonaswinkler/paperless-ng/workflows/ci/badge.svg)](https://github.com/jonaswinkler/paperless-ng/actions) [![Documentation Status](https://readthedocs.org/projects/paperless-ng/badge/?version=latest)](https://paperless-ng.readthedocs.io/en/latest/?badge=latest) [![Gitter](https://badges.gitter.im/paperless-ng/community.svg)](https://gitter.im/paperless-ng/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Docker Hub Pulls](https://img.shields.io/docker/pulls/jonaswinkler/paperless-ng.svg)](https://hub.docker.com/r/jonaswinkler/paperless-ng) @@ -8,7 +8,11 @@ [Paperless](https://github.com/the-paperless-project/paperless) is an application by Daniel Quinn and contributors that indexes your scanned documents and allows you to easily search for documents and store metadata alongside your documents. -Paperless-ng is a fork of the original project, adding a new interface and many other changes under the hood. For a detailed list of changes, have a look at the changelog in the documentation. +Paperless-ng is a fork of the original project, adding a new interface and many other changes under the hood. For a detailed list of changes, have a look at the [change log](https://paperless-ng.readthedocs.io/en/latest/changelog.html) in the documentation. + +# Survey + +If you already used Paperless-ng for a bit, would like to give some anonymous feedback, and help me decide on what to focus on next: I've created a survey, [see here](https://github.com/jonaswinkler/paperless-ng/issues/402). Thank you! # How it Works @@ -29,6 +33,8 @@ Here's what you get: # Features * Performs OCR on your documents, adds selectable text to image only documents and adds tags, correspondents and document types to your documents. +* Supports PDF documents, images, plain text files, and Office documents (Word, Excel, Powerpoint, and LibreOffice equivalents). + * Office document support is optional and provided by Apache Tika (see [configuration](https://paperless-ng.readthedocs.io/en/latest/configuration.html#tika-settings)) * Paperless stores your documents plain on disk. Filenames and folders are managed by paperless and can be configured freely. * Single page application front end. Should be pretty snappy. Will be mobile friendly in the future. * Includes a dashboard that shows basic statistics and has document upload. @@ -50,25 +56,6 @@ If you want to see some screenshots of paperless-ng in action, [some are availab For a complete list of changes from paperless, check out the [changelog](https://paperless-ng.readthedocs.io/en/latest/changelog.html) -# Roadmap for 1.0 - -- Make the front end nice (except mobile). -- Fix whatever bugs I and you find. -- Make the documentation nice. - -## On the chopping block. - -- **GnuPG encrypion.** [Here's a note about encryption in paperless](https://paperless-ng.readthedocs.io/en/latest/administration.html#managing-encryption). The gist of it is that I don't see which attacks this implementation protects against. It gives a false sense of security to users who don't care about how it works. - -## Wont-do list. - -These features will probably never make it into paperless, since paperless is meant to be an easy to use set-and-forget solution. - -- **Document versions.** I might consider adding the ability to update a document with a newer version, but that's about it. The kind of documents that get added to paperless usually don't change at all. -- **Workflows.** I don't see a use case for these, yet. -- **Folders.** Tags are superior in just about every way. -- **Apps / extension support.** Again, paperless is meant to be simple. - # Getting started The recommended way to deploy paperless is docker-compose. The files in the /docker/hub directory are configured to pull the image from Docker Hub. diff --git a/ansible/defaults/main.yml b/ansible/defaults/main.yml index 83047307d..aaeffa507 100644 --- a/ansible/defaults/main.yml +++ b/ansible/defaults/main.yml @@ -1,5 +1,5 @@ --- -paperlessng_version: 0.9.14 +paperlessng_version: latest # 'latest', release number, or github branch/tag/commit/ref # Required services paperlessng_redis_host: localhost diff --git a/ansible/molecule/update/converge.yml b/ansible/molecule/default/converge.yml similarity index 56% rename from ansible/molecule/update/converge.yml rename to ansible/molecule/default/converge.yml index b19a5981a..f5f9b17c2 100644 --- a/ansible/molecule/update/converge.yml +++ b/ansible/molecule/default/converge.yml @@ -2,10 +2,9 @@ - name: update previous release to newest release hosts: all tasks: - - name: set current version as installation target + - name: set github ref as version when available set_fact: - paperlessng_version: 0.9.14 - + paperlessng_version: "{{ lookup('env', 'GITHUB_REF') | default('latest', True) }}" - name: update to newest paperless-ng release include_role: name: ansible diff --git a/ansible/molecule/fresh/molecule.yml b/ansible/molecule/default/molecule.yml similarity index 100% rename from ansible/molecule/fresh/molecule.yml rename to ansible/molecule/default/molecule.yml diff --git a/ansible/molecule/update/prepare.yml b/ansible/molecule/default/prepare.yml similarity index 85% rename from ansible/molecule/update/prepare.yml rename to ansible/molecule/default/prepare.yml index 6f3734329..e175eff5b 100644 --- a/ansible/molecule/update/prepare.yml +++ b/ansible/molecule/default/prepare.yml @@ -3,7 +3,7 @@ tasks: - name: set previous version as installation target set_fact: - paperlessng_version: 0.9.13 + paperlessng_version: latest - name: install previous paperless-ng release include_role: diff --git a/ansible/molecule/default/verify.yml b/ansible/molecule/default/verify.yml new file mode 100644 index 000000000..185840783 --- /dev/null +++ b/ansible/molecule/default/verify.yml @@ -0,0 +1,94 @@ +--- +- name: Verify + hosts: all + gather_facts: false + + vars_files: + - ../../defaults/main.yml + + tasks: + - name: check if webserver is up + uri: + url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}" + status_code: [200, 302] + return_content: yes + register: landingpage + failed_when: "'Sign in' not in landingpage.content" + + - name: generate random name and content + set_fact: + content: "{{ lookup('password', '/dev/null length=65536 chars=ascii_letters') }}" + filename: "{{ lookup('password', '/dev/null length=8 chars=ascii_letters') }}" + + - name: check if document posting works + uri: + url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/documents/post_document/" + method: POST + body_format: form-multipart + body: + document: + content: "{{ content }}" + filename: "{{ filename }}.txt" + headers: + Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' + Content-Type: text/plain + return_content: yes + register: post_document + failed_when: "'OK' not in post_document.content" + + - name: verify uploaded document has been accepted + uri: + url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/" + headers: + Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' + return_content: yes + register: logs + failed_when: "('Consuming ' + filename + '.txt') not in logs.content" + + - name: sleep till consumption finished + pause: + seconds: 10 + + - name: verify uploaded document has been consumed + uri: + url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/" + headers: + Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' + return_content: yes + register: logs + failed_when: "filename + ' consumption finished' not in logs.content" + + - name: get documents + uri: + url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/documents/" + headers: + Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' + return_content: yes + register: documents + + - name: set document index + set_fact: + index: "{{ documents.json['results'][0]['id'] }}" + + - name: verify uploaded document is avaiable + uri: + url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/documents/{{ index }}/" + headers: + Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' + return_content: yes + register: document + failed_when: "'Not found.' in document.content or content not in document.json['content']" + + - name: check if deleting uploaded document works + uri: + url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/documents/bulk_edit/" + method: POST + body_format: json + body: + documents: ["{{ index }}"] + method: delete + parameters: {} + headers: + Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' + register: delete_document + failed_when: "'OK' not in delete_document.json['result']" diff --git a/ansible/molecule/fresh/converge.yml b/ansible/molecule/fresh/converge.yml deleted file mode 100644 index 99e25677b..000000000 --- a/ansible/molecule/fresh/converge.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -- name: fresh installation - hosts: all - tasks: - - name: install paperless-ng with default parameters - include_role: - name: ansible diff --git a/ansible/molecule/fresh/verify.yml b/ansible/molecule/fresh/verify.yml deleted file mode 100644 index c353783ab..000000000 --- a/ansible/molecule/fresh/verify.yml +++ /dev/null @@ -1,60 +0,0 @@ ---- -- name: Verify - hosts: all - gather_facts: false - - vars_files: - - ../../defaults/main.yml - - tasks: - - name: check if webserver is up - uri: - url: http://localhost:8000 - status_code: [200, 302] - return_content: yes - register: landingpage - failed_when: "'Sign in' not in landingpage.content" - - - name: check if document posting works - uri: - url: http://localhost:8000/api/documents/post_document/ - method: POST - body_format: form-multipart - body: - document: - content: FOO - filename: document.txt - mime_type: text/plain - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: post_document - failed_when: "'OK' not in post_document.content" - - - name: verify uploaded document has been accepted - uri: - url: http://localhost:8000/api/logs/ - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: logs - failed_when: "'Consuming document.txt' not in logs.content" - - # assumes txt consumption finished by now, might have to sleep a bit - - name: verify uploaded document has been consumed - uri: - url: http://localhost:8000/api/logs/ - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: logs - failed_when: "'document consumption finished' not in logs.content" - - - name: verify uploaded document is avaiable - uri: - url: http://localhost:8000/api/documents/1/ - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: document - failed_when: "'Not found.' in document.content or 'FOO' not in document.content" diff --git a/ansible/molecule/update/molecule.yml b/ansible/molecule/update/molecule.yml deleted file mode 100644 index 27f37ba63..000000000 --- a/ansible/molecule/update/molecule.yml +++ /dev/null @@ -1,35 +0,0 @@ ---- -dependency: - name: galaxy -driver: - name: docker -platforms: - - name: ubuntu_focal - image: jrei/systemd-ubuntu:20.04 - privileged: true - volumes: - - /sys/fs/cgroup:/sys/fs/cgroup:ro - tmpfs: - - /tmp - - /run - - /run/lock - override_command: False - # ubuntu 18.04 bionic works except that - # the default redis configuration expects IPv6 which is not enabled in docker by default - # the default Python environment is configured for ASCII instead of UTF-8 - # ubuntu 16.04 xenial only has Python 3.5 which is EOL and breaks multiple dependencies - - name: debian_buster - image: jrei/systemd-debian:10 - privileged: true - volumes: - - /sys/fs/cgroup:/sys/fs/cgroup:ro - tmpfs: - - /tmp - - /run - - /run/lock - override_command: False - # debian 9 stretch only has Python 3.5 which is EOL and breaks multiple dependencies -provisioner: - name: ansible -verifier: - name: ansible diff --git a/ansible/molecule/update/verify.yml b/ansible/molecule/update/verify.yml deleted file mode 100644 index c353783ab..000000000 --- a/ansible/molecule/update/verify.yml +++ /dev/null @@ -1,60 +0,0 @@ ---- -- name: Verify - hosts: all - gather_facts: false - - vars_files: - - ../../defaults/main.yml - - tasks: - - name: check if webserver is up - uri: - url: http://localhost:8000 - status_code: [200, 302] - return_content: yes - register: landingpage - failed_when: "'Sign in' not in landingpage.content" - - - name: check if document posting works - uri: - url: http://localhost:8000/api/documents/post_document/ - method: POST - body_format: form-multipart - body: - document: - content: FOO - filename: document.txt - mime_type: text/plain - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: post_document - failed_when: "'OK' not in post_document.content" - - - name: verify uploaded document has been accepted - uri: - url: http://localhost:8000/api/logs/ - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: logs - failed_when: "'Consuming document.txt' not in logs.content" - - # assumes txt consumption finished by now, might have to sleep a bit - - name: verify uploaded document has been consumed - uri: - url: http://localhost:8000/api/logs/ - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: logs - failed_when: "'document consumption finished' not in logs.content" - - - name: verify uploaded document is avaiable - uri: - url: http://localhost:8000/api/documents/1/ - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: document - failed_when: "'Not found.' in document.content or 'FOO' not in document.content" diff --git a/ansible/tasks/install-release.yml b/ansible/tasks/install-release.yml new file mode 100644 index 000000000..c2dfb0b9f --- /dev/null +++ b/ansible/tasks/install-release.yml @@ -0,0 +1,6 @@ +--- +- name: extract paperless-ng + unarchive: + src: "https://github.com/jonaswinkler/paperless-ng/releases/download/ng-{{ paperlessng_version }}/paperless-ng-{{ paperlessng_version }}.tar.xz" + remote_src: yes + dest: "{{ tempdir.path }}" diff --git a/ansible/tasks/install-source.yml b/ansible/tasks/install-source.yml new file mode 100644 index 000000000..ab8fbfef7 --- /dev/null +++ b/ansible/tasks/install-source.yml @@ -0,0 +1,111 @@ +--- +- name: install dev dependencies + apt: + pkg: + - git + - npm + - gettext + +- name: create output directories + file: + path: "{{ item }}" + state: directory + owner: "{{ paperlessng_system_user }}" + group: "{{ paperlessng_system_group }}" + mode: "750" + with_items: + - "{{ tempdir.path }}/paperless-ng" + - "{{ tempdir.path }}/paperless-ng/scripts" + +- block: + - name: create temporary git directory + tempfile: + state: directory + path: "{{ paperlessng_directory }}" + register: gitdir + + - name: pull paperless-ng + git: + repo: https://github.com/jonaswinkler/paperless-ng.git + dest: "{{ gitdir.path }}" + version: "{{ paperlessng_version }}" + refspec: "+refs/pull/*:refs/pull/*" + + - name: compile frontend + command: + cmd: "{{ item }}" + args: + chdir: "{{ gitdir.path }}/src-ui" + failed_when: false + with_items: + - npm install -g @angular/cli + - npm install + - ./node_modules/.bin/ng build --prod + + - name: copy application into place + copy: + src: "{{ gitdir.path }}/{{ item.src }}" + remote_src: yes + dest: "{{ tempdir.path }}/paperless-ng/{{ item.dest | default('') }}" + with_items: + - src: CONTRIBUTING.md + - src: LICENSE + - src: Pipfile + - src: Pipfile.lock + - src: README.md + - src: requirements.txt + - src: paperless.conf.example + dest: "paperless.conf" + + - name: glob all scripts + find: + paths: ["{{ gitdir.path }}/scripts/"] + patterns: + - "*.service" + - "*.sh" + register: glob + + - name: copy scripts + copy: + src: "{{ item.path }}" + remote_src: yes + dest: "{{ tempdir.path }}/paperless-ng/scripts/" + with_items: + - "{{ glob.files }}" + + - name: copy sources + command: + cmd: "cp -r src/ {{ tempdir.path }}/paperless-ng/src" + args: + chdir: "{{ gitdir.path }}" + + - name: create paperlessng venv + command: + cmd: "python3 -m virtualenv {{ gitdir.path }}/.venv/ -p /usr/bin/python3" + + - name: install paperlessng requirements + command: + cmd: "{{ gitdir.path }}/.venv/bin/python3 -m pip install -r {{ gitdir.path }}/requirements.txt" + + - name: compile messages + command: "{{ gitdir.path }}/.venv/bin/python3 manage.py compilemessages" + args: + chdir: "{{ tempdir.path }}/paperless-ng/src/" + + - name: collect static files + command: "{{ gitdir.path }}/.venv/bin/python3 manage.py collectstatic --no-input" + args: + chdir: "{{ tempdir.path }}/paperless-ng/src/" + + - name: remove pycache directories + shell: find . -name __pycache__ | xargs rm -r + args: + chdir: "{{ tempdir.path }}" + + - name: remove temporary git directory + file: + path: "{{ gitdir.path }}" + state: absent + + become: yes + become_user: "{{ paperlessng_system_user }}" diff --git a/ansible/tasks/main.yml b/ansible/tasks/main.yml index 6412fa30f..7dc6dbc05 100644 --- a/ansible/tasks/main.yml +++ b/ansible/tasks/main.yml @@ -34,7 +34,13 @@ - build-essential - python3-setuptools - python3-wheel - - python3-virtualenv + +# upstream virtualenv in Ubuntu 20.04 is broken +# https://github.com/pypa/virtualenv/issues/1873 +- name: install python virtualenv + pip: + name: virtualenv + extra_args: --upgrade - name: install ocr languages apt: @@ -97,71 +103,141 @@ # GNUPG_HOME required due to paperless db.py create_home: yes +- block: + - name: get latest release version + uri: + url: https://api.github.com/repos/jonaswinkler/paperless-ng/releases/latest + method: GET + register: latest_release + - name: parse latest release version + set_fact: + paperlessng_version: "{{ latest_release.json['tag_name'] }}" + when: paperlessng_version == "latest" + +- block: + - name: sanitize version string + set_fact: + paperlessng_version: "{{ paperlessng_version | regex_replace('^ng-(\\d+\\.\\d+\\.\\d+)$', '\\1') }}" + - name: get tag data + uri: + url: https://api.github.com/repos/jonaswinkler/paperless-ng/tags + method: GET + register: tags + - name: get commit for target tag + set_fact: + paperlessng_commit: "{{ tags.json | json_query('[?name==`ng-' + paperlessng_version +'`] | [0].commit.sha') }}" + when: paperlessng_version | regex_search("^(ng-)?(\d+\.\d+\.\d+)$") + +- block: + - name: check if version is branch + uri: + url: "https://api.github.com/repos/jonaswinkler/paperless-ng/branches/{{ paperlessng_version }}" + method: GET + status_code: [200, 404] + register: branch + - name: get commit for target branch + set_fact: + paperlessng_commit: "{{ branch.json | json_query('commit.sha') }}" + when: branch.status == 200 + - block: + - name: check if version is commit-or-ref + uri: + url: "https://api.github.com/repos/jonaswinkler/paperless-ng/commits/{{ paperlessng_version }}" + method: GET + status_code: [200, 404, 422] + register: commit + - name: get commit for target commit-or-ref + set_fact: + paperlessng_commit: "{{ commit.json | json_query('sha') }}" + when: commit.status == 200 + - name: fail + fail: + msg: "Can not determine commit from `paperlessng_version=={{ paperlessng_version }}`!" + when: commit.status != 200 + when: branch.status == 404 + when: not(paperlessng_version | regex_search("^(ng-)?(\d+\.\d+\.\d+)$")) + - name: check for paperless-ng installation command: - cmd: 'grep -Po "(?<=Paperless-ng )\d+\.\d+\.\d+" {{ paperlessng_directory }}/docs/changelog.html' - changed_when: '"No such file or directory" in paperlessng_current_version.stderr or paperlessng_current_version.stdout != paperlessng_version | string' + cmd: "cat {{ paperlessng_directory }}/.installed_version" + changed_when: '"No such file or directory" in paperlessng_current_commit.stderr or paperlessng_current_commit.stdout != paperlessng_commit | string' failed_when: false ignore_errors: yes - register: paperlessng_current_version + register: paperlessng_current_commit - name: register current state set_fact: - fresh_installation: '{{ "No such file or directory" in paperlessng_current_version.stderr }}' - update_installation: '{{ "No such file or directory" not in paperlessng_current_version.stderr and paperlessng_current_version.stdout != paperlessng_version | string }}' - reconfigure_only: '{{ paperlessng_current_version.stdout == paperlessng_version | string }}' + fresh_installation: '{{ "No such file or directory" in paperlessng_current_commit.stderr }}' + update_installation: '{{ "No such file or directory" not in paperlessng_current_commit.stderr and paperlessng_current_commit.stdout != paperlessng_commit | string }}' + reconfigure_only: "{{ paperlessng_current_commit.stdout == paperlessng_commit | string }}" -- name: backup current paperless-ng installation - copy: - src: "{{ paperlessng_directory }}" - remote_src: yes - dest: "{{ paperlessng_directory }}-{{ ansible_date_time.iso8601 }}/" +- block: + - name: backup current paperless-ng installation + copy: + src: "{{ paperlessng_directory }}" + remote_src: yes + dest: "{{ paperlessng_directory }}-{{ ansible_date_time.iso8601 }}/" + - name: remove current paperless sources + file: + path: "{{ paperlessng_directory }}/{{ item }}" + state: absent + with_items: + - docker + - docs + - scripts + - src + - static when: update_installation -- name: remove current paperless sources - file: - path: "{{ paperlessng_directory }}/{{ item }}" - state: absent - with_items: - - docker - - docs - - scripts - - src - - static - when: update_installation - -- name: create temporary directory - tempfile: - state: directory - register: tempdir - when: not reconfigure_only - -- name: extract paperless-ng - unarchive: - src: "https://github.com/jonaswinkler/paperless-ng/releases/download/ng-{{ paperlessng_version }}/paperless-ng-{{ paperlessng_version }}.tar.xz" - remote_src: yes - dest: "{{ tempdir.path }}" - when: not reconfigure_only - -- name: change owner and permissions of paperless-ng - command: - cmd: "{{ item }}" - warn: false - with_items: - - "chown -R {{ paperlessng_system_user }}:{{ paperlessng_system_group }} {{ tempdir.path }}" - - "find {{ tempdir.path }} -type d -exec chmod 0750 {} ;" - - "find {{ tempdir.path }} -type f -exec chmod 0640 {} ;" - when: not reconfigure_only - -- name: move paperless-ng - command: - cmd: "cp -a {{ tempdir.path }}/paperless-ng/. {{ paperlessng_directory }}" - when: not reconfigure_only - -- name: remove temporary directory - file: - path: "{{ tempdir.path }}" - state: absent +- block: + - name: create paperless-ng directory and set permissions + file: + path: "{{ paperlessng_directory }}" + state: directory + owner: "{{ paperlessng_system_user }}" + group: "{{ paperlessng_system_group }}" + mode: "750" + - name: create temporary directory + become: yes + become_user: "{{ paperlessng_system_user }}" + tempfile: + state: directory + path: "{{ paperlessng_directory }}" + register: tempdir + - name: check if version is available as release archive + uri: + url: "https://github.com/jonaswinkler/paperless-ng/releases/download/ng-{{ paperlessng_version }}/paperless-ng-{{ paperlessng_version }}.tar.xz" + method: GET + status_code: [200, 404] + register: release_archive + - name: install paperless-ng from source + include_tasks: install-source.yml + when: release_archive.status == 404 + - name: install paperless-ng from release archive + include_tasks: install-release.yml + when: release_archive.status == 200 + - name: change owner and permissions of paperless-ng + command: + cmd: "{{ item }}" + warn: false + with_items: + - "chown -R {{ paperlessng_system_user }}:{{ paperlessng_system_group }} {{ tempdir.path }}" + - "find {{ tempdir.path }} -type d -exec chmod 0750 {} ;" + - "find {{ tempdir.path }} -type f -exec chmod 0640 {} ;" + - name: move paperless-ng + command: + cmd: "cp -a {{ tempdir.path }}/paperless-ng/. {{ paperlessng_directory }}" + - name: store commit hash of installed version + copy: + content: "{{ paperlessng_commit }}" + dest: "{{ paperlessng_directory }}/.installed_version" + owner: "{{ paperlessng_system_user }}" + group: "{{ paperlessng_system_group }}" + mode: "0440" + - name: remove temporary directory + file: + path: "{{ tempdir.path }}" + state: absent when: not reconfigure_only - name: create paperless-ng directories and set permissions @@ -172,7 +248,6 @@ group: "{{ paperlessng_system_group }}" mode: "750" with_items: - - "{{ paperlessng_directory }}" - "{{ paperlessng_consumption_dir }}" - "{{ paperlessng_data_dir }}" - "{{ paperlessng_media_root }}" @@ -180,7 +255,7 @@ - name: rename initial config command: - cmd: "mv {{ paperlessng_directory }}/paperless.conf {{ paperlessng_directory }}/paperless.conf.template" + cmd: "mv -f {{ paperlessng_directory }}/paperless.conf {{ paperlessng_directory }}/paperless.conf.template" removes: "{{ paperlessng_directory }}/paperless.conf" - name: configure paperless-ng @@ -310,21 +385,20 @@ creates: "{{ paperlessng_virtualenv }}" register: venv -- name: install paperlessng requirements - become: yes - become_user: "{{ paperlessng_system_user }}" - pip: - requirements: "{{ paperlessng_directory }}/requirements.txt" - executable: "{{ paperlessng_virtualenv }}/bin/pip3" - extra_args: --upgrade - when: not reconfigure_only - -- name: migrate database schema - become: yes - become_user: "{{ paperlessng_system_user }}" - command: "{{ paperlessng_virtualenv }}/bin/python3 {{ paperlessng_directory }}/src/manage.py migrate" - register: database_schema - changed_when: '"No migrations to apply." not in database_schema.stdout' +- block: + - name: install paperlessng requirements + become: yes + become_user: "{{ paperlessng_system_user }}" + pip: + requirements: "{{ paperlessng_directory }}/requirements.txt" + executable: "{{ paperlessng_virtualenv }}/bin/pip3" + extra_args: --upgrade + - name: migrate database schema + become: yes + become_user: "{{ paperlessng_system_user }}" + command: "{{ paperlessng_virtualenv }}/bin/python3 {{ paperlessng_directory }}/src/manage.py migrate" + register: database_schema + changed_when: '"No migrations to apply." not in database_schema.stdout' when: not reconfigure_only - name: configure paperless superuser @@ -392,7 +466,7 @@ # https://www.freedesktop.org/software/systemd/man/systemd.exec.html { option: "User", value: "{{ paperlessng_system_user }}" }, { option: "Group", value: "{{ paperlessng_system_group }}" }, - { option: "WorkingDirectory", value: "{{ paperlessng_directory }}/src", }, + { option: "WorkingDirectory", value: "{{ paperlessng_directory }}/src" }, { option: "ProtectSystem", value: "full" }, { option: "NoNewPrivileges", value: "true" }, { option: "PrivateUsers", value: "true" }, diff --git a/docs/administration.rst b/docs/administration.rst index 14b986e82..c54323e6e 100644 --- a/docs/administration.rst +++ b/docs/administration.rst @@ -121,27 +121,19 @@ After grabbing the new release and unpacking the contents, do the following: dependencies. The dependencies required are listed in the section about :ref:`bare metal installations `. -2. Update python requirements. If you use Pipenv, this is done with the following steps. +2. Update python requirements. Keep in mind to activate your virtual environment + before that, if you use one. .. code:: shell-session - $ pip install --upgrade pipenv - $ cd /path/to/paperless - $ pipenv clean - $ pipenv install - - This creates a new virtual environment (or uses your existing environment) - and installs all dependencies into it. - - You can also use the included ``requirements.txt`` file instead and create the virtual - environment yourself. This file includes exactly the same dependencies. + $ pip install -r requirements.txt 3. Migrate the database. .. code:: shell-session $ cd src - $ pipenv run python3 manage.py migrate + $ python3 manage.py migrate This might not actually do anything. Not every new paperless version comes with new database migrations. @@ -195,7 +187,7 @@ or .. code:: shell-session $ cd /path/to/paperless/src - $ pipenv run python manage.py + $ python3 manage.py depending on whether you use docker or not. @@ -462,6 +454,3 @@ Basic usage to disable encryption of your document store: .. code:: decrypt_documents [--passphrase SECR3TP4SSPHRA$E] - - -.. _Pipenv: https://pipenv.pypa.io/en/latest/ diff --git a/docs/configuration.rst b/docs/configuration.rst index 5edc003f6..36b124350 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -376,25 +376,24 @@ PAPERLESS_THREADS_PER_WORKER= use a higher thread per worker count. The default is a balance between the two, according to your CPU core count, - with a slight favor towards threads per worker, and leaving at least one core - free for other tasks: + with a slight favor towards threads per worker: +----------------+---------+---------+ | CPU core count | Workers | Threads | +----------------+---------+---------+ | 1 | 1 | 1 | +----------------+---------+---------+ - | 2 | 1 | 1 | + | 2 | 2 | 1 | +----------------+---------+---------+ - | 4 | 1 | 3 | + | 4 | 2 | 2 | +----------------+---------+---------+ - | 6 | 2 | 2 | + | 6 | 2 | 3 | +----------------+---------+---------+ - | 8 | 2 | 3 | + | 8 | 2 | 4 | +----------------+---------+---------+ - | 12 | 3 | 3 | + | 12 | 3 | 4 | +----------------+---------+---------+ - | 16 | 3 | 5 | + | 16 | 4 | 4 | +----------------+---------+---------+ If you only specify PAPERLESS_TASK_WORKERS, paperless will adjust diff --git a/docs/setup.rst b/docs/setup.rst index 60a8b79d4..248bc558e 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -20,45 +20,45 @@ Paperless consists of the following components: .. code:: shell-session $ cd /path/to/paperless/src/ - $ pipenv run gunicorn -c /usr/src/paperless/gunicorn.conf.py -b 0.0.0.0:8000 paperless.wsgi + $ gunicorn -c ../gunicorn.conf.py -b 0.0.0.0:8000 paperless.wsgi or by any other means such as Apache ``mod_wsgi``. * **The consumer:** This is what watches your consumption folder for documents. - However, the consumer itself does not consume really consume your documents anymore. - It rather notifies a task processor that a new file is ready for consumption. + However, the consumer itself does not really consume your documents. + Now it notifies a task processor that a new file is ready for consumption. I suppose it should be named differently. - This also used to check your emails, but that's now gone elsewhere as well. + This was also used to check your emails, but that's now done elsewhere as well. Start the consumer with the management command ``document_consumer``: .. code:: shell-session $ cd /path/to/paperless/src/ - $ pipenv run python3 manage.py document_consumer + $ python3 manage.py document_consumer .. _setup-task_processor: * **The task processor:** Paperless relies on `Django Q `_ - for doing much of the heavy lifting. This is a task queue that accepts tasks from - multiple sources and processes tasks in parallel. It also comes with a scheduler that executes + for doing most of the heavy lifting. This is a task queue that accepts tasks from + multiple sources and processes these in parallel. It also comes with a scheduler that executes certain commands periodically. This task processor is responsible for: * Consuming documents. When the consumer finds new documents, it notifies the task processor to start a consumption task. - * Consuming emails. It periodically checks your configured accounts for new mails and - produces consumption tasks for any documents it finds. * The task processor also performs the consumption of any documents you upload through the web interface. - * Maintain the search index and the automatic matching algorithm. These are things that paperless + * Consuming emails. It periodically checks your configured accounts for new emails and + notifies the task processor to consume the attachment of an email. + * Maintaining the search index and the automatic matching algorithm. These are things that paperless needs to do from time to time in order to operate properly. This allows paperless to process multiple documents from your consumption folder in parallel! On - a modern multi core system, consumption with full ocr is blazing fast. + a modern multi core system, this makes the consumption process with full OCR blazingly fast. - The task processor comes with a built-in admin interface that you can use to see whenever any of the + The task processor comes with a built-in admin interface that you can use to check whenever any of the tasks fail and inspect the errors (i.e., wrong email credentials, errors during consuming a specific file, etc). @@ -67,11 +67,11 @@ Paperless consists of the following components: .. code:: shell-session $ cd /path/to/paperless/src/ - $ pipenv run python3 manage.py qcluster + $ python3 manage.py qcluster * A `redis `_ message broker: This is a really lightweight service that is responsible - for getting the tasks from the webserver and consumer to the task scheduler. These run in different - processes (maybe even on different machines!), and therefore, this is necessary. + for getting the tasks from the webserver and the consumer to the task scheduler. These run in a different + process (maybe even on different machines!), and therefore, this is necessary. * Optional: A database server. Paperless supports both PostgreSQL and SQLite for storing its data. @@ -79,7 +79,7 @@ Paperless consists of the following components: Installation ############ -You can go multiple routes with setting up and running Paperless: +You can go multiple routes to setup and run Paperless: * :ref:`Pull the image from Docker Hub ` * :ref:`Build the Docker image yourself ` @@ -87,26 +87,31 @@ You can go multiple routes with setting up and running Paperless: * :ref:`Use ansible to install Paperless on your system automatically (bare metal) ` The Docker routes are quick & easy. These are the recommended routes. This configures all the stuff -from above automatically so that it just works and uses sensible defaults for all configuration options. +from the above automatically so that it just works and uses sensible defaults for all configuration options. +Here you find a cheat-sheet for docker beginners: `CLI Basics `_ -The bare metal route is more complicated to setup but makes it easier +The bare metal route is complicated to setup but makes it easier should you want to contribute some code back. You need to configure and run the above mentioned components yourself. -The ansible route cobines benefits from both options: -the setup process is fully automated, reproducible and idempotent, -it includes the same sensible defaults, -and it simultaneously provides the flexibility of a bare metal installation. +The ansible route combines benefits of both options: +the setup process is fully automated, reproducible and `idempotent `_, +it includes the same sensible defaults, and it simultaneously provides the flexibility of a bare metal installation. + +.. _CLI Basics: https://sehn.tech/post/devops-with-docker/ +.. _idempotent: https://docs.ansible.com/ansible/latest/reference_appendices/glossary.html#Idempotency .. _setup-docker_hub: Install Paperless from Docker Hub ================================= -1. Go to the `/docker/compose directory on the project page `_ - and download one of the ``docker-compose.*.yml`` files, depending on which database backend you +1. Login with your user and create a folder in your home-directory `mkdir -v ~/paperless-ng` to have a place for your configuration files and consumption directory. + +2. Go to the `/docker/compose directory on the project page `_ + and download one of the `docker-compose.*.yml` files, depending on which database backend you want to use. Rename this file to `docker-compose.yml`. - If you want to enable optional support for Office documents, download a file with ``-tika`` in its name. + If you want to enable optional support for Office documents, download a file with `-tika` in the file name. Download the ``docker-compose.env`` file and the ``.env`` file as well and store them in the same directory. @@ -115,25 +120,26 @@ Install Paperless from Docker Hub For new installations, it is recommended to use PostgreSQL as the database backend. -2. Install `Docker`_ and `docker-compose`_. +3. Install `Docker`_ and `docker-compose`_. .. caution:: If you want to use the included ``docker-compose.*.yml`` file, you need to have at least Docker version **17.09.0** and docker-compose version **1.17.0**. + To check do: `docker-compose -v` or `docker -v` See the `Docker installation guide`_ on how to install the current version of Docker for your operating system or Linux distribution of - choice. To get an up-to-date version of docker-compose, follow the + choice. To get the latest version of docker-compose, follow the `docker-compose installation guide`_ if your package repository doesn't include it. .. _Docker installation guide: https://docs.docker.com/engine/installation/ .. _docker-compose installation guide: https://docs.docker.com/compose/install/ -3. Modify ``docker-compose.yml`` to your preferences. You may want to change the path - to the consumption directory in this file. Find the line that specifies where +4. Modify ``docker-compose.yml`` to your preferences. You may want to change the path + to the consumption directory. Find the line that specifies where to mount the consumption directory: .. code:: @@ -149,31 +155,35 @@ Install Paperless from Docker Hub Don't change the part after the colon or paperless wont find your documents. -4. Modify ``docker-compose.env``, following the comments in the file. The +5. Modify ``docker-compose.env``, following the comments in the file. The most important change is to set ``USERMAP_UID`` and ``USERMAP_GID`` to the uid and gid of your user on the host system. This ensures that both the docker container and you on the host machine have write access to the consumption directory. If your UID and GID on the host system is 1000 (the default for the first normal user on most systems), it will - work out of the box without any modifications. + work out of the box without any modifications. `id "username"` to check. .. note:: - You can use any settings from the file ``paperless.conf.example`` in this file. - Have a look at :ref:`configuration` to see whats available. + You can copy any setting from the file ``paperless.conf.example`` and paste it here. + Have a look at :ref:`configuration` to see what's available. .. caution:: - Certain file systems such as NFS network shares don't support file system + Some file systems such as NFS network shares don't support file system notifications with ``inotify``. When storing the consumption directory - on such a file system, paperless will be unable to pick up new files + on such a file system, paperless will not pick up new files with the default configuration. You will need to use ``PAPERLESS_CONSUMER_POLLING``, which will disable inotify. See :ref:`here `. -5. Run ``docker-compose up -d``. This will create and start the necessary - containers. +6. Now head over to: https://hub.docker.com/r/jonaswinkler/paperless-ng and choose your preferred + image and copy the link. To download this image do a `docker pull` followed by the link. Do this within the directory with the .yml files. + Depending on your network connection and CPU this will take a while. You have time to get a beverage. -6. To be able to login, you will need a super user. To create it, execute the +7. Run ``docker-compose up -d``. This will create and start the necessary + containers, but your are not done yet! + +8. To be able to login, you will need a super user. To create it, execute the following command: .. code-block:: shell-session @@ -181,12 +191,12 @@ Install Paperless from Docker Hub $ docker-compose run --rm webserver createsuperuser This will prompt you to set a username, an optional e-mail address and - finally a password. + finally a password (at least 8 characters). -7. The default ``docker-compose.yml`` exports the webserver on your local port +9. The default ``docker-compose.yml`` exports the webserver on your local port 8000. If you haven't adapted this, you should now be able to visit your - Paperless instance at ``http://127.0.0.1:8000``. You can login with the - user and password you just created. + Paperless instance at ``http://127.0.0.1:8000`` or your servers IP-Address:8000. + Use the login credentials you have created with the previous step. .. _Docker: https://www.docker.com/ .. _docker-compose: https://docs.docker.com/compose/install/ @@ -214,7 +224,7 @@ Build the docker image yourself webserver: image: jonaswinkler/paperless-ng:latest - + and replace it with a line that instructs docker-compose to build the image from the current working directory instead: .. code:: yaml @@ -245,7 +255,7 @@ writing. Windows is not and will never be supported. 1. Install dependencies. Paperless requires the following packages. * ``python3`` 3.6, 3.7, 3.8, 3.9 - * ``python3-pip``, optionally ``pipenv`` for package installation + * ``python3-pip`` * ``python3-dev`` * ``fonts-liberation`` for generating thumbnails for plain text files @@ -314,8 +324,13 @@ writing. Windows is not and will never be supported. Adjust as necessary if you configured different folders. -7. Install python requirements. Paperless comes with both Pipfiles for ``pipenv`` as well as with a ``requirements.txt``. - Both will install exactly the same requirements. It is up to you if you wish to use a virtual environment or not. +7. Install python requirements from the ``requirements.txt`` file. + It is up to you if you wish to use a virtual environment or not. + + .. code:: shell-session + + pip3 install -r requirements.txt + 8. Go to ``/opt/paperless/src``, and execute the following commands: @@ -339,7 +354,8 @@ writing. Windows is not and will never be supported. .. warning:: This is a development server which should not be used in - production. + production. It is not audited for security and performance + is inferior to production ready web servers. .. hint:: @@ -354,6 +370,11 @@ writing. Windows is not and will never be supported. ``consumer`` script to watch the input folder, and the ``scheduler`` script to run tasks such as email checking and document consumption. + You may need to adjust the path to the ``gunicorn`` executable. This + will be installed as part of the python dependencies, and is either located + in the ``bin`` folder of your virtual environment, or in ``~/.local/bin/`` if + no virtual environment is used. + These services rely on redis and optionally the database server, but don't need to be started in any particular order. The example files depend on redis being started. If you use a database server, you should @@ -406,7 +427,7 @@ Install Paperless using ansible This role currently only supports Debian 10 Buster and Ubuntu 20.04 Focal or later as target hosts. -1. Install ansible 2.7+ on the management node. +1. Install ansible 2.7+ on the management node. This may be the target host paperless-ng is being installed on or any remote host which can access the target host. For further details, check the ansible `inventory `_ documentation. @@ -518,7 +539,10 @@ Migration to paperless-ng At its core, paperless-ng is still paperless and fully compatible. However, some things have changed under the hood, so you need to adapt your setup depending on -how you installed paperless. The important things to keep in mind are as follows. +how you installed paperless. + +This setup describes how to update an existing paperless Docker installation. +The important things to keep in mind are as follows: * Read the :ref:`changelog ` and take note of breaking changes. * You should decide if you want to stick with SQLite or want to migrate your database @@ -553,11 +577,18 @@ Migration to paperless-ng is then performed in a few simple steps: .. caution:: - Paperless includes a ``.env`` file. This will set the - project name for docker compose to ``paperless`` so that paperless-ng will - automatically reuse your existing paperless volumes. When you start it, it - will migrate your existing data. After that, your old paperless installation - will be incompatible with the migrated volumes. + Paperless-ng includes a ``.env`` file. This will set the + project name for docker compose to ``paperless``, which will also define the name + of the volumes by paperless-ng. However, if you experience that paperless-ng + is not using your old paperless volumes, verify the names of your volumes with + + .. code:: shell-session + + $ docker volume ls | grep _data + + and adjust the project name in the ``.env`` file so that it matches the name + of the volumes before the ``_data`` part. + 4. Download the ``docker-compose.sqlite.yml`` file to ``docker-compose.yml``. If you want to switch to PostgreSQL, do that after you migrated your existing @@ -638,14 +669,12 @@ management commands as below. This will launch the container and initialize the PostgreSQL database. - b) Without docker, open a shell in your virtual environment, switch to + b) Without docker, remember to activate any virtual environment, switch to the ``src`` directory and create the database schema: .. code:: shell-session - $ cd /path/to/paperless - $ pipenv shell - $ cd src + $ cd /path/to/paperless/src $ python3 manage.py migrate This will not copy any data yet. @@ -662,7 +691,7 @@ management commands as below. $ python3 manage.py loaddata data.json -6. Exit the shell. +6. If operating inside Docker, you may exit the shell now. .. code:: shell-session diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index b8343710f..f55d57af5 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -30,13 +30,22 @@ Consumer fails to pickup any new files ###################################### If you notice that the consumer will only pickup files in the consumption -directory at startup, but won't find any other files added later, check out -the configuration file and enable filesystem polling with the setting -``PAPERLESS_CONSUMER_POLLING``. +directory at startup, but won't find any other files added later, you will need to +enable filesystem polling with the configuration option +``PAPERLESS_CONSUMER_POLLING``, see :ref:`here `. This will disable listening to filesystem changes with inotify and paperless will manually check the consumption directory for changes instead. + +Paperless always redirects to /admin +#################################### + +You probably had the old paperless installed at some point. Paperless installed +a permanent redirect to /admin in your browser, and you need to clear your +browsing data / cache to fix that. + + Operation not permitted ####################### @@ -64,6 +73,24 @@ This may have two reasons: with Inbox tags. Verify that there are documents in your archive without inbox tags. The algorithm will only learn from documents not in your inbox. +UserWarning in sklearn on every single document +############################################### + +You may encounter warnings like this: + +.. code:: + + /usr/local/lib/python3.7/site-packages/sklearn/base.py:315: + UserWarning: Trying to unpickle estimator CountVectorizer from version 0.23.2 when using version 0.24.0. + This might lead to breaking code or invalid results. Use at your own risk. + +This happens when certain dependencies of paperless that are responsible for the auto matching algorithm are +updated. After updating these, your current training data *might* not be compatible anymore. This can be ignored +in most cases. This warning will disappear automatically when paperless updates the training data. + +If you want to get rid of the warning or actually experience issues with automatic matching, delete +the file ``classification_model.pickle`` in the data directory and let paperless recreate it. + Permission denied errors in the consumption directory ##################################################### @@ -78,3 +105,47 @@ Ensure that ``USERMAP_UID`` and ``USERMAP_GID`` are set to the user id and group different from ``1000``. See :ref:`setup-docker_hub`. Also ensure that you are able to read and write to the consumption directory on the host. + +Web-UI stuck at "Loading..." +############################ + +This might have multiple reasons. + + +1. If you built the docker image yourself or deployed using the bare metal route, + make sure that there are files in ``/static/frontend//``. + If there are no files, make sure that you executed ``collectstatic`` successfully, either + manually or as part of the docker image build. + + If the front end is still missing, make sure that the front end is compiled (files present in + ``src/documents/static/frontend``). If it is not, you need to compile the front end yourself + or download the release archive instead of cloning the repository. + +2. Check the output of the web server. You might see errors like this: + + + .. code:: + + [2021-01-25 10:08:04 +0000] [40] [ERROR] Socket error processing request. + Traceback (most recent call last): + File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/sync.py", line 134, in handle + self.handle_request(listener, req, client, addr) + File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/sync.py", line 190, in handle_request + util.reraise(*sys.exc_info()) + File "/usr/local/lib/python3.7/site-packages/gunicorn/util.py", line 625, in reraise + raise value + File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/sync.py", line 178, in handle_request + resp.write_file(respiter) + File "/usr/local/lib/python3.7/site-packages/gunicorn/http/wsgi.py", line 396, in write_file + if not self.sendfile(respiter): + File "/usr/local/lib/python3.7/site-packages/gunicorn/http/wsgi.py", line 386, in sendfile + sent += os.sendfile(sockno, fileno, offset + sent, count) + OSError: [Errno 22] Invalid argument + + To fix this issue, add + + .. code:: + + SENDFILE=0 + + to your `docker-compose.env` file. \ No newline at end of file diff --git a/src-ui/messages.xlf b/src-ui/messages.xlf index eab8ed80e..ff928863d 100644 --- a/src-ui/messages.xlf +++ b/src-ui/messages.xlf @@ -188,35 +188,35 @@ Confirm delete src/app/components/document-detail/document-detail.component.ts - 192 + 199 Do you really want to delete document ""? src/app/components/document-detail/document-detail.component.ts - 193 + 200 The files for this document will be deleted permanently. This operation cannot be undone. src/app/components/document-detail/document-detail.component.ts - 194 + 201 Delete document src/app/components/document-detail/document-detail.component.ts - 196 + 203 Error deleting document: src/app/components/document-detail/document-detail.component.ts - 203 + 210 @@ -1215,6 +1215,13 @@ 73 + + "" + + src/app/components/document-list/bulk-editor/bulk-editor.component.ts + 112 + + "" and "" @@ -1223,13 +1230,6 @@ This is for messages like 'modify "tag1" and "tag2"' - - "" - - src/app/components/document-list/bulk-editor/bulk-editor.component.ts - 116 - - , @@ -1379,6 +1379,13 @@ 27 + + Suggestions: + + src/app/components/common/input/select/select.component.html + 26 + + Save current view @@ -1726,49 +1733,49 @@ ASN src/app/services/rest/document.service.ts - 16 + 17 Correspondent src/app/services/rest/document.service.ts - 17 + 18 Title src/app/services/rest/document.service.ts - 18 + 19 Document type src/app/services/rest/document.service.ts - 19 + 20 Created src/app/services/rest/document.service.ts - 20 + 21 Added src/app/services/rest/document.service.ts - 21 + 22 Modified src/app/services/rest/document.service.ts - 22 + 23 diff --git a/src-ui/src/app/components/common/input/select/select.component.html b/src-ui/src/app/components/common/input/select/select.component.html index aa500d0d1..540429e89 100644 --- a/src-ui/src/app/components/common/input/select/select.component.html +++ b/src-ui/src/app/components/common/input/select/select.component.html @@ -22,4 +22,12 @@ {{hint}} + + Suggestions:  + + {{s.name}}  + + + + diff --git a/src-ui/src/app/components/common/input/select/select.component.ts b/src-ui/src/app/components/common/input/select/select.component.ts index 18f30cf6e..e02aaab72 100644 --- a/src-ui/src/app/components/common/input/select/select.component.ts +++ b/src-ui/src/app/components/common/input/select/select.component.ts @@ -30,11 +30,22 @@ export class SelectComponent extends AbstractInputComponent { @Input() allowNull: boolean = false + @Input() + suggestions: number[] + @Output() createNew = new EventEmitter() - + showPlusButton(): boolean { return this.createNew.observers.length > 0 } + getSuggestions() { + if (this.suggestions && this.items) { + return this.suggestions.filter(id => id != this.value).map(id => this.items.find(item => item.id == id)) + } else { + return [] + } + } + } diff --git a/src-ui/src/app/components/common/input/tags/tags.component.html b/src-ui/src/app/components/common/input/tags/tags.component.html index c9a0c96d6..677b9f4d1 100644 --- a/src-ui/src/app/components/common/input/tags/tags.component.html +++ b/src-ui/src/app/components/common/input/tags/tags.component.html @@ -2,30 +2,25 @@
- + (change)="onChange(value)" + (blur)="onTouched()"> - +
-
- - - -
- +
@@ -39,5 +34,13 @@
{{hint}} + + Suggestions:  + + {{tag.name}}  + + + + diff --git a/src-ui/src/app/components/common/input/tags/tags.component.ts b/src-ui/src/app/components/common/input/tags/tags.component.ts index 5501ac5a6..f77d0570d 100644 --- a/src-ui/src/app/components/common/input/tags/tags.component.ts +++ b/src-ui/src/app/components/common/input/tags/tags.component.ts @@ -26,9 +26,6 @@ export class TagsComponent implements OnInit, ControlValueAccessor { writeValue(newValue: number[]): void { this.value = newValue - if (this.tags) { - this.displayValue = newValue - } } registerOnChange(fn: any): void { this.onChange = fn; @@ -43,7 +40,6 @@ export class TagsComponent implements OnInit, ControlValueAccessor { ngOnInit(): void { this.tagService.listAll().subscribe(result => { this.tags = result.results - this.displayValue = this.value }) } @@ -53,23 +49,28 @@ export class TagsComponent implements OnInit, ControlValueAccessor { @Input() hint - value: number[] + @Input() + suggestions: number[] - displayValue: number[] = [] + value: number[] tags: PaperlessTag[] getTag(id) { - return this.tags.find(tag => tag.id == id) + if (this.tags) { + return this.tags.find(tag => tag.id == id) + } else { + return null + } } removeTag(id) { - let index = this.displayValue.indexOf(id) + let index = this.value.indexOf(id) if (index > -1) { - let oldValue = this.displayValue + let oldValue = this.value oldValue.splice(index, 1) - this.displayValue = [...oldValue] - this.onChange(this.displayValue) + this.value = [...oldValue] + this.onChange(this.value) } } @@ -79,15 +80,23 @@ export class TagsComponent implements OnInit, ControlValueAccessor { modal.componentInstance.success.subscribe(newTag => { this.tagService.listAll().subscribe(tags => { this.tags = tags.results - this.displayValue = [...this.displayValue, newTag.id] - this.onChange(this.displayValue) + this.value = [...this.value, newTag.id] + this.onChange(this.value) }) }) } - ngSelectChange() { - this.value = this.displayValue - this.onChange(this.displayValue) + getSuggestions() { + if (this.suggestions && this.tags) { + return this.suggestions.filter(id => !this.value.includes(id)).map(id => this.tags.find(tag => tag.id == id)) + } else { + return [] + } + } + + addTag(id) { + this.value = [...this.value, id] + this.onChange(this.value) } } diff --git a/src-ui/src/app/components/document-detail/document-detail.component.html b/src-ui/src/app/components/document-detail/document-detail.component.html index 639b9e260..2814a1242 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.html +++ b/src-ui/src/app/components/document-detail/document-detail.component.html @@ -60,10 +60,10 @@ + (createNew)="createCorrespondent()" [suggestions]="suggestions?.correspondents"> - + (createNew)="createDocumentType()" [suggestions]="suggestions?.document_types"> + @@ -145,6 +145,6 @@ - + diff --git a/src-ui/src/app/components/document-detail/document-detail.component.ts b/src-ui/src/app/components/document-detail/document-detail.component.ts index aa2308eac..a7cce715e 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.ts +++ b/src-ui/src/app/components/document-detail/document-detail.component.ts @@ -19,6 +19,7 @@ import { PDFDocumentProxy } from 'ng2-pdf-viewer'; import { ToastService } from 'src/app/services/toast.service'; import { TextComponent } from '../common/input/text/text.component'; import { SettingsService, SETTINGS_KEYS } from 'src/app/services/settings.service'; +import { PaperlessDocumentSuggestions } from 'src/app/data/paperless-document-suggestions'; @Component({ selector: 'app-document-detail', @@ -40,6 +41,8 @@ export class DocumentDetailComponent implements OnInit { documentId: number document: PaperlessDocument metadata: PaperlessDocumentMetadata + suggestions: PaperlessDocumentSuggestions + title: string previewUrl: string downloadUrl: string @@ -95,6 +98,7 @@ export class DocumentDetailComponent implements OnInit { this.previewUrl = this.documentsService.getPreviewUrl(this.documentId) this.downloadUrl = this.documentsService.getDownloadUrl(this.documentId) this.downloadOriginalUrl = this.documentsService.getDownloadUrl(this.documentId, true) + this.suggestions = null if (this.openDocumentService.getOpenDocument(this.documentId)) { this.updateComponent(this.openDocumentService.getOpenDocument(this.documentId)) } else { @@ -112,6 +116,9 @@ export class DocumentDetailComponent implements OnInit { this.documentsService.getMetadata(doc.id).subscribe(result => { this.metadata = result }) + this.documentsService.getSuggestions(doc.id).subscribe(result => { + this.suggestions = result + }) this.title = this.documentTitlePipe.transform(doc.title) this.documentForm.patchValue(doc) } diff --git a/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.ts b/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.ts index 6b2598fe8..04fc2a978 100644 --- a/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.ts +++ b/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.ts @@ -109,7 +109,7 @@ export class BulkEditorComponent { if (items.length == 0) { return "" } else if (items.length == 1) { - return items[0].name + return $localize`"${items[0].name}"` } else if (items.length == 2) { return $localize`:This is for messages like 'modify "tag1" and "tag2"':"${items[0].name}" and "${items[1].name}"` } else { diff --git a/src-ui/src/app/data/paperless-document-suggestions.ts b/src-ui/src/app/data/paperless-document-suggestions.ts new file mode 100644 index 000000000..71459eff2 --- /dev/null +++ b/src-ui/src/app/data/paperless-document-suggestions.ts @@ -0,0 +1,9 @@ +export interface PaperlessDocumentSuggestions { + + tags?: number[] + + correspondents?: number[] + + document_types?: number[] + +} \ No newline at end of file diff --git a/src-ui/src/app/services/rest/document.service.ts b/src-ui/src/app/services/rest/document.service.ts index dd2c32fa8..19b18cfeb 100644 --- a/src-ui/src/app/services/rest/document.service.ts +++ b/src-ui/src/app/services/rest/document.service.ts @@ -11,6 +11,7 @@ import { CorrespondentService } from './correspondent.service'; import { DocumentTypeService } from './document-type.service'; import { TagService } from './tag.service'; import { FILTER_RULE_TYPES } from 'src/app/data/filter-rule-type'; +import { PaperlessDocumentSuggestions } from 'src/app/data/paperless-document-suggestions'; export const DOCUMENT_SORT_FIELDS = [ { field: 'archive_serial_number', name: $localize`ASN` }, @@ -129,4 +130,8 @@ export class DocumentService extends AbstractPaperlessService return this.http.post(this.getResourceUrl(null, 'selection_data'), {"documents": ids}) } + getSuggestions(id: number): Observable { + return this.http.get(this.getResourceUrl(id, 'suggestions')) + } + } diff --git a/src-ui/src/locale/messages.de.xlf b/src-ui/src/locale/messages.de.xlf index a898e4630..b6e2708ba 100644 --- a/src-ui/src/locale/messages.de.xlf +++ b/src-ui/src/locale/messages.de.xlf @@ -147,7 +147,7 @@
Created - Erstellt + Ausgestellt src/app/components/document-list/document-list.component.html 129 @@ -166,7 +166,7 @@ Löschen bestätigen src/app/components/document-detail/document-detail.component.ts - 192 + 199 @@ -174,7 +174,7 @@ Möchten Sie das Dokument "" wirklich löschen? src/app/components/document-detail/document-detail.component.ts - 193 + 200 @@ -182,7 +182,7 @@ Die Dateien dieses Dokuments werden permanent gelöscht. Diese Aktion kann nicht rückgängig gemacht werden. src/app/components/document-detail/document-detail.component.ts - 194 + 201 @@ -190,7 +190,7 @@ Dokument löschen src/app/components/document-detail/document-detail.component.ts - 196 + 203 @@ -198,7 +198,7 @@ Fehler beim Löschen des Dokuments: src/app/components/document-detail/document-detail.component.ts - 203 + 210 @@ -307,7 +307,7 @@ Date created - Erstellt am + Ausgestellt am src/app/components/document-detail/document-detail.component.html 61 @@ -1283,6 +1283,14 @@ 73 + + "" + "" + + src/app/components/document-list/bulk-editor/bulk-editor.component.ts + 112 + + "" and "" "" und "" @@ -1292,14 +1300,6 @@ This is for messages like 'modify "tag1" and "tag2"' - - "" - "" - - src/app/components/document-list/bulk-editor/bulk-editor.component.ts - 116 - - , , @@ -1470,6 +1470,14 @@ 27 + + Suggestions: + Vorschläge: + + src/app/components/common/input/select/select.component.html + 26 + + Save current view Aktuelle Ansicht speichern @@ -1723,7 +1731,7 @@ ASN src/app/services/rest/document.service.ts - 16 + 17 @@ -1731,7 +1739,7 @@ Korrespondent src/app/services/rest/document.service.ts - 17 + 18 @@ -1739,7 +1747,7 @@ Titel src/app/services/rest/document.service.ts - 18 + 19 @@ -1747,15 +1755,15 @@ Dokumenttyp src/app/services/rest/document.service.ts - 19 + 20 Created - Erstellt am + Ausgestellt am src/app/services/rest/document.service.ts - 20 + 21 @@ -1763,7 +1771,7 @@ Hinzugefügt am src/app/services/rest/document.service.ts - 21 + 22 @@ -1771,7 +1779,7 @@ Geändert am src/app/services/rest/document.service.ts - 22 + 23 diff --git a/src-ui/src/locale/messages.fr.xlf b/src-ui/src/locale/messages.fr.xlf index 4f09eab72..62dbad342 100644 --- a/src-ui/src/locale/messages.fr.xlf +++ b/src-ui/src/locale/messages.fr.xlf @@ -166,7 +166,7 @@ Confirmer la suppression src/app/components/document-detail/document-detail.component.ts - 192 + 199 @@ -174,7 +174,7 @@ Voulez-vous vraiment supprimer le document "" ? src/app/components/document-detail/document-detail.component.ts - 193 + 200 @@ -182,7 +182,7 @@ Les fichiers liés à ce document seront supprimés définitivement. Cette action est irréversible. src/app/components/document-detail/document-detail.component.ts - 194 + 201 @@ -190,7 +190,7 @@ Supprimer le document src/app/components/document-detail/document-detail.component.ts - 196 + 203 @@ -198,7 +198,7 @@ Une erreur s'est produite lors de la suppression du document : src/app/components/document-detail/document-detail.component.ts - 203 + 210 @@ -1283,6 +1283,14 @@ 73 + + "" + "" + + src/app/components/document-list/bulk-editor/bulk-editor.component.ts + 112 + + "" and "" "" et "" @@ -1292,14 +1300,6 @@ This is for messages like 'modify "tag1" and "tag2"' - - "" - "" - - src/app/components/document-list/bulk-editor/bulk-editor.component.ts - 116 - - , , @@ -1470,6 +1470,14 @@ 27 + + Suggestions: + Suggestions : + + src/app/components/common/input/select/select.component.html + 26 + + Save current view Enregistrer la vue actuelle @@ -1723,7 +1731,7 @@ NSA src/app/services/rest/document.service.ts - 16 + 17 @@ -1731,7 +1739,7 @@ Correspondant src/app/services/rest/document.service.ts - 17 + 18 @@ -1739,7 +1747,7 @@ Titre src/app/services/rest/document.service.ts - 18 + 19 @@ -1747,7 +1755,7 @@ Type de document src/app/services/rest/document.service.ts - 19 + 20 @@ -1755,7 +1763,7 @@ Date de création src/app/services/rest/document.service.ts - 20 + 21 @@ -1763,7 +1771,7 @@ Date d'ajout src/app/services/rest/document.service.ts - 21 + 22 @@ -1771,7 +1779,7 @@ Date de modification src/app/services/rest/document.service.ts - 22 + 23 diff --git a/src/documents/classifier.py b/src/documents/classifier.py index 60c9abeec..b427264c8 100755 --- a/src/documents/classifier.py +++ b/src/documents/classifier.py @@ -26,6 +26,34 @@ def preprocess_content(content): return content +def load_classifier(): + if not os.path.isfile(settings.MODEL_FILE): + logger.debug( + f"Document classification model does not exist (yet), not " + f"performing automatic matching." + ) + return None + + try: + classifier = DocumentClassifier() + classifier.reload() + except (EOFError, IncompatibleClassifierVersionError) as e: + # there's something wrong with the model file. + logger.error( + f"Unrecoverable error while loading document " + f"classification model: {str(e)}, deleting model file." + ) + os.unlink(settings.MODEL_FILE) + classifier = None + except OSError as e: + logger.error( + f"Error while loading document classification model: {str(e)}" + ) + classifier = None + + return classifier + + class DocumentClassifier(object): FORMAT_VERSION = 6 diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 146b11014..f8f7576ef 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -14,7 +14,7 @@ from django.utils import timezone from filelock import FileLock from rest_framework.reverse import reverse -from .classifier import DocumentClassifier, IncompatibleClassifierVersionError +from .classifier import load_classifier from .file_handling import create_source_path_directory, \ generate_unique_filename from .loggers import LoggingMixin @@ -262,14 +262,8 @@ class Consumer(LoggingMixin): # reloading the classifier multiple times, since there are multiple # post-consume hooks that all require the classifier. - try: - classifier = DocumentClassifier() - classifier.reload() - except (OSError, EOFError, IncompatibleClassifierVersionError) as e: - self.log( - "warning", - f"Cannot classify documents: {e}.") - classifier = None + classifier = load_classifier() + self._send_progress(95, 100, 'WORKING', MESSAGE_SAVE_DOCUMENT) # now that everything is done, we can start to store the document # in the system. This will be a transaction and reasonably fast. diff --git a/src/documents/management/commands/document_retagger.py b/src/documents/management/commands/document_retagger.py index 0fb9782c1..b2f5d8918 100755 --- a/src/documents/management/commands/document_retagger.py +++ b/src/documents/management/commands/document_retagger.py @@ -2,8 +2,7 @@ import logging from django.core.management.base import BaseCommand -from documents.classifier import DocumentClassifier, \ - IncompatibleClassifierVersionError +from documents.classifier import load_classifier from documents.models import Document from ...mixins import Renderable from ...signals.handlers import set_correspondent, set_document_type, set_tags @@ -70,13 +69,7 @@ class Command(Renderable, BaseCommand): queryset = Document.objects.all() documents = queryset.distinct() - classifier = DocumentClassifier() - try: - classifier.reload() - except (OSError, EOFError, IncompatibleClassifierVersionError) as e: - logging.getLogger(__name__).warning( - f"Cannot classify documents: {e}.") - classifier = None + classifier = load_classifier() for document in documents: logging.getLogger(__name__).info( diff --git a/src/documents/tasks.py b/src/documents/tasks.py index c67b2b3fa..e0d726d3e 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -6,10 +6,9 @@ from django.db.models.signals import post_save from whoosh.writing import AsyncWriter from documents import index, sanity_checker -from documents.classifier import DocumentClassifier, \ - IncompatibleClassifierVersionError +from documents.classifier import DocumentClassifier, load_classifier from documents.consumer import Consumer, ConsumerError -from documents.models import Document +from documents.models import Document, Tag, DocumentType, Correspondent from documents.sanity_checker import SanityFailedError @@ -30,13 +29,18 @@ def index_reindex(): def train_classifier(): - classifier = DocumentClassifier() + if (not Tag.objects.filter( + matching_algorithm=Tag.MATCH_AUTO).exists() and + not DocumentType.objects.filter( + matching_algorithm=Tag.MATCH_AUTO).exists() and + not Correspondent.objects.filter( + matching_algorithm=Tag.MATCH_AUTO).exists()): - try: - # load the classifier, since we might not have to train it again. - classifier.reload() - except (OSError, EOFError, IncompatibleClassifierVersionError): - # This is what we're going to fix here. + return + + classifier = load_classifier() + + if not classifier: classifier = DocumentClassifier() try: @@ -52,7 +56,7 @@ def train_classifier(): ) except Exception as e: - logging.getLogger(__name__).error( + logging.getLogger(__name__).warning( "Classifier error: " + str(e) ) diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py index 2b332a873..9e4b77189 100644 --- a/src/documents/tests/test_api.py +++ b/src/documents/tests/test_api.py @@ -590,6 +590,10 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertEqual(len(meta['original_metadata']), 0) self.assertGreater(len(meta['archive_metadata']), 0) + def test_get_metadata_invalid_doc(self): + response = self.client.get(f"/api/documents/34576/metadata/") + self.assertEqual(response.status_code, 404) + def test_get_metadata_no_archive(self): doc = Document.objects.create(title="test", filename="file.pdf", mime_type="application/pdf") @@ -605,6 +609,30 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertGreater(len(meta['original_metadata']), 0) self.assertIsNone(meta['archive_metadata']) + def test_get_empty_suggestions(self): + doc = Document.objects.create(title="test", mime_type="application/pdf") + + response = self.client.get(f"/api/documents/{doc.pk}/suggestions/") + + self.assertEqual(response.status_code, 200) + self.assertEqual(response.data, {'correspondents': [], 'tags': [], 'document_types': []}) + + def test_get_suggestions_invalid_doc(self): + response = self.client.get(f"/api/documents/34676/suggestions/") + self.assertEqual(response.status_code, 404) + + @mock.patch("documents.views.match_correspondents") + @mock.patch("documents.views.match_tags") + @mock.patch("documents.views.match_document_types") + def test_get_suggestions(self, match_document_types, match_tags, match_correspondents): + doc = Document.objects.create(title="test", mime_type="application/pdf", content="this is an invoice!") + match_tags.return_value = [Tag(id=56), Tag(id=123)] + match_document_types.return_value = [DocumentType(id=23)] + match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)] + + response = self.client.get(f"/api/documents/{doc.pk}/suggestions/") + self.assertEqual(response.data, {'correspondents': [88,2], 'tags': [56,123], 'document_types': [23]}) + def test_saved_views(self): u1 = User.objects.create_user("user1") u2 = User.objects.create_user("user2") diff --git a/src/documents/tests/test_classifier.py b/src/documents/tests/test_classifier.py index 9e999794d..43c38b691 100644 --- a/src/documents/tests/test_classifier.py +++ b/src/documents/tests/test_classifier.py @@ -1,10 +1,13 @@ +import os import tempfile +from pathlib import Path from time import sleep from unittest import mock +from django.conf import settings from django.test import TestCase, override_settings -from documents.classifier import DocumentClassifier, IncompatibleClassifierVersionError +from documents.classifier import DocumentClassifier, IncompatibleClassifierVersionError, load_classifier from documents.models import Correspondent, Document, Tag, DocumentType from documents.tests.utils import DirectoriesMixin @@ -235,3 +238,30 @@ class TestClassifier(DirectoriesMixin, TestCase): self.classifier.train() self.assertListEqual(self.classifier.predict_tags(doc1.content), [t1.pk]) self.assertListEqual(self.classifier.predict_tags(doc2.content), []) + + def test_load_classifier_not_exists(self): + self.assertFalse(os.path.exists(settings.MODEL_FILE)) + self.assertIsNone(load_classifier()) + + @mock.patch("documents.classifier.DocumentClassifier.reload") + def test_load_classifier(self, reload): + Path(settings.MODEL_FILE).touch() + self.assertIsNotNone(load_classifier()) + + @mock.patch("documents.classifier.DocumentClassifier.reload") + def test_load_classifier_incompatible_version(self, reload): + Path(settings.MODEL_FILE).touch() + self.assertTrue(os.path.exists(settings.MODEL_FILE)) + + reload.side_effect = IncompatibleClassifierVersionError() + self.assertIsNone(load_classifier()) + self.assertFalse(os.path.exists(settings.MODEL_FILE)) + + @mock.patch("documents.classifier.DocumentClassifier.reload") + def test_load_classifier_os_error(self, reload): + Path(settings.MODEL_FILE).touch() + self.assertTrue(os.path.exists(settings.MODEL_FILE)) + + reload.side_effect = OSError() + self.assertIsNone(load_classifier()) + self.assertTrue(os.path.exists(settings.MODEL_FILE)) diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index 22e6afb61..a6f0cc55a 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -460,7 +460,7 @@ class TestConsumer(DirectoriesMixin, TestCase): self._assert_first_last_send_progress() - @mock.patch("documents.consumer.DocumentClassifier") + @mock.patch("documents.consumer.load_classifier") def testClassifyDocument(self, m): correspondent = Correspondent.objects.create(name="test") dtype = DocumentType.objects.create(name="test") diff --git a/src/documents/tests/test_settings.py b/src/documents/tests/test_settings.py index 21f29b4d9..0036daee7 100644 --- a/src/documents/tests/test_settings.py +++ b/src/documents/tests/test_settings.py @@ -20,7 +20,7 @@ class TestSettings(TestCase): self.assertEqual(default_threads, 1) def test_workers_threads(self): - for i in range(2, 64): + for i in range(1, 64): with mock.patch("paperless.settings.multiprocessing.cpu_count") as cpu_count: cpu_count.return_value = i @@ -31,4 +31,4 @@ class TestSettings(TestCase): self.assertTrue(default_workers >= 1) self.assertTrue(default_threads >= 1) - self.assertTrue(default_workers * default_threads < i, f"{i}") + self.assertTrue(default_workers * default_threads <= i, f"{i}") diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py index 653590707..d008f995a 100644 --- a/src/documents/tests/test_tasks.py +++ b/src/documents/tests/test_tasks.py @@ -1,11 +1,12 @@ -from datetime import datetime +import os from unittest import mock +from django.conf import settings from django.test import TestCase from django.utils import timezone from documents import tasks -from documents.models import Document +from documents.models import Document, Tag, Correspondent, DocumentType from documents.sanity_checker import SanityError, SanityFailedError from documents.tests.utils import DirectoriesMixin @@ -22,8 +23,55 @@ class TestTasks(DirectoriesMixin, TestCase): tasks.index_optimize() - def test_train_classifier(self): + @mock.patch("documents.tasks.load_classifier") + def test_train_classifier_no_auto_matching(self, load_classifier): tasks.train_classifier() + load_classifier.assert_not_called() + + @mock.patch("documents.tasks.load_classifier") + def test_train_classifier_with_auto_tag(self, load_classifier): + load_classifier.return_value = None + Tag.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") + tasks.train_classifier() + load_classifier.assert_called_once() + self.assertFalse(os.path.isfile(settings.MODEL_FILE)) + + @mock.patch("documents.tasks.load_classifier") + def test_train_classifier_with_auto_type(self, load_classifier): + load_classifier.return_value = None + DocumentType.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") + tasks.train_classifier() + load_classifier.assert_called_once() + self.assertFalse(os.path.isfile(settings.MODEL_FILE)) + + @mock.patch("documents.tasks.load_classifier") + def test_train_classifier_with_auto_correspondent(self, load_classifier): + load_classifier.return_value = None + Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") + tasks.train_classifier() + load_classifier.assert_called_once() + self.assertFalse(os.path.isfile(settings.MODEL_FILE)) + + def test_train_classifier(self): + c = Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") + doc = Document.objects.create(correspondent=c, content="test", title="test") + self.assertFalse(os.path.isfile(settings.MODEL_FILE)) + + tasks.train_classifier() + self.assertTrue(os.path.isfile(settings.MODEL_FILE)) + mtime = os.stat(settings.MODEL_FILE).st_mtime + + tasks.train_classifier() + self.assertTrue(os.path.isfile(settings.MODEL_FILE)) + mtime2 = os.stat(settings.MODEL_FILE).st_mtime + self.assertEqual(mtime, mtime2) + + doc.content = "test2" + doc.save() + tasks.train_classifier() + self.assertTrue(os.path.isfile(settings.MODEL_FILE)) + mtime3 = os.stat(settings.MODEL_FILE).st_mtime + self.assertNotEqual(mtime2, mtime3) @mock.patch("documents.tasks.sanity_checker.check_sanity") def test_sanity_check(self, m): @@ -35,7 +83,7 @@ class TestTasks(DirectoriesMixin, TestCase): self.assertRaises(SanityFailedError, tasks.sanity_check) m.assert_called_once() - def test_culk_update_documents(self): + def test_bulk_update_documents(self): doc1 = Document.objects.create(title="test", content="my document", checksum="wow", added=timezone.now(), created=timezone.now(), modified=timezone.now()) diff --git a/src/documents/views.py b/src/documents/views.py index 2012e879a..890e76a80 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -35,6 +35,7 @@ from rest_framework.viewsets import ( import documents.index as index from paperless.db import GnuPG from paperless.views import StandardPagination +from .classifier import load_classifier from .filters import ( CorrespondentFilterSet, DocumentFilterSet, @@ -42,6 +43,7 @@ from .filters import ( DocumentTypeFilterSet, LogFilterSet ) +from .matching import match_correspondents, match_tags, match_document_types from .models import Correspondent, Document, Log, Tag, DocumentType, SavedView from .parsers import get_parser_class_for_mime_type from .serialisers import ( @@ -133,10 +135,6 @@ class DocumentTypeViewSet(ModelViewSet): ordering_fields = ("name", "matching_algorithm", "match", "document_count") -class BulkEditForm(object): - pass - - class DocumentViewSet(RetrieveModelMixin, UpdateModelMixin, DestroyModelMixin, @@ -230,31 +228,50 @@ class DocumentViewSet(RetrieveModelMixin, def metadata(self, request, pk=None): try: doc = Document.objects.get(pk=pk) - - meta = { - "original_checksum": doc.checksum, - "original_size": os.stat(doc.source_path).st_size, - "original_mime_type": doc.mime_type, - "media_filename": doc.filename, - "has_archive_version": os.path.isfile(doc.archive_path), - "original_metadata": self.get_metadata( - doc.source_path, doc.mime_type) - } - - if doc.archive_checksum and os.path.isfile(doc.archive_path): - meta['archive_checksum'] = doc.archive_checksum - meta['archive_size'] = os.stat(doc.archive_path).st_size, - meta['archive_metadata'] = self.get_metadata( - doc.archive_path, "application/pdf") - else: - meta['archive_checksum'] = None - meta['archive_size'] = None - meta['archive_metadata'] = None - - return Response(meta) except Document.DoesNotExist: raise Http404() + meta = { + "original_checksum": doc.checksum, + "original_size": os.stat(doc.source_path).st_size, + "original_mime_type": doc.mime_type, + "media_filename": doc.filename, + "has_archive_version": os.path.isfile(doc.archive_path), + "original_metadata": self.get_metadata( + doc.source_path, doc.mime_type) + } + + if doc.archive_checksum and os.path.isfile(doc.archive_path): + meta['archive_checksum'] = doc.archive_checksum + meta['archive_size'] = os.stat(doc.archive_path).st_size, + meta['archive_metadata'] = self.get_metadata( + doc.archive_path, "application/pdf") + else: + meta['archive_checksum'] = None + meta['archive_size'] = None + meta['archive_metadata'] = None + + return Response(meta) + + @action(methods=['get'], detail=True) + def suggestions(self, request, pk=None): + try: + doc = Document.objects.get(pk=pk) + except Document.DoesNotExist: + raise Http404() + + classifier = load_classifier() + + return Response({ + "correspondents": [ + c.id for c in match_correspondents(doc, classifier) + ], + "tags": [t.id for t in match_tags(doc, classifier)], + "document_types": [ + dt.id for dt in match_document_types(doc, classifier) + ] + }) + @action(methods=['get'], detail=True) def preview(self, request, pk=None): try: @@ -382,6 +399,7 @@ class PostDocumentView(APIView): with tempfile.NamedTemporaryFile(prefix="paperless-upload-", dir=settings.SCRATCH_DIR, + buffering=0, delete=False) as f: f.write(doc_data) os.utime(f.name, times=(t, t)) diff --git a/src/paperless/checks.py b/src/paperless/checks.py index 1329ad679..df4d45e38 100644 --- a/src/paperless/checks.py +++ b/src/paperless/checks.py @@ -22,7 +22,7 @@ def path_check(var, directory): exists_hint.format(directory) )) elif not os.access(directory, os.W_OK | os.X_OK): - messages.append(Error( + messages.append(Warning( writeable_message.format(var), writeable_hint.format(directory) )) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 678ce5a21..b6d01ba53 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -366,8 +366,10 @@ LOGGING = { def default_task_workers(): # always leave one core open - available_cores = max(multiprocessing.cpu_count() - 1, 1) + available_cores = max(multiprocessing.cpu_count(), 1) try: + if available_cores < 4: + return available_cores return max( math.floor(math.sqrt(available_cores)), 1 @@ -388,7 +390,7 @@ Q_CLUSTER = { def default_threads_per_worker(task_workers): # always leave one core open - available_cores = max(multiprocessing.cpu_count() - 1, 1) + available_cores = max(multiprocessing.cpu_count(), 1) try: return max( math.floor(available_cores / task_workers),