From 10b53bdfd08c08ee3d9e896f4933a10d741b05db Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Thu, 21 Jan 2021 13:49:12 +0100 Subject: [PATCH 01/42] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 89f55b2d9..d06f78246 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,10 @@ Paperless-ng is a fork of the original project, adding a new interface and many other changes under the hood. For a detailed list of changes, have a look at the changelog in the documentation. +# Survey + +If you already used Paperless-ng for a bit, would like to give some anonymous feedback, and help me decide on what to focus on next: I've created a survey, [see here](https://github.com/jonaswinkler/paperless-ng/issues/402). Thank you! + # How it Works Paperless does not control your scanner, it only helps you deal with what your scanner produces. From eca1289ce2f34f09254341d60801eb3d94ff9bcb Mon Sep 17 00:00:00 2001 From: Fabian Koller Date: Sat, 9 Jan 2021 10:49:44 +0100 Subject: [PATCH 02/42] Build release archive when version is not avail Default ansible installation version to "latest" (pulls the latest published release archive). --- ansible/defaults/main.yml | 2 +- ansible/tasks/install-release.yml | 6 ++ ansible/tasks/install-source.yml | 133 +++++++++++++++++++++++++++ ansible/tasks/main.yml | 148 +++++++++++++++++------------- 4 files changed, 222 insertions(+), 67 deletions(-) create mode 100644 ansible/tasks/install-release.yml create mode 100644 ansible/tasks/install-source.yml diff --git a/ansible/defaults/main.yml b/ansible/defaults/main.yml index 83047307d..aaeffa507 100644 --- a/ansible/defaults/main.yml +++ b/ansible/defaults/main.yml @@ -1,5 +1,5 @@ --- -paperlessng_version: 0.9.14 +paperlessng_version: latest # 'latest', release number, or github branch/tag/commit/ref # Required services paperlessng_redis_host: localhost diff --git a/ansible/tasks/install-release.yml b/ansible/tasks/install-release.yml new file mode 100644 index 000000000..c2dfb0b9f --- /dev/null +++ b/ansible/tasks/install-release.yml @@ -0,0 +1,6 @@ +--- +- name: extract paperless-ng + unarchive: + src: "https://github.com/jonaswinkler/paperless-ng/releases/download/ng-{{ paperlessng_version }}/paperless-ng-{{ paperlessng_version }}.tar.xz" + remote_src: yes + dest: "{{ tempdir.path }}" diff --git a/ansible/tasks/install-source.yml b/ansible/tasks/install-source.yml new file mode 100644 index 000000000..823445dd1 --- /dev/null +++ b/ansible/tasks/install-source.yml @@ -0,0 +1,133 @@ +--- +# https://github.com/jonaswinkler/paperless-ng/blob/dev/.github/workflows/ci.yml +- name: install dev dependencies + apt: + pkg: + - git + - npm + - libqpdf-dev + +- name: create temporary git directory + tempfile: + state: directory + register: gitdir + +- name: pull paperless-ng + git: + repo: https://github.com/jonaswinkler/paperless-ng.git + dest: "{{ gitdir.path }}" + version: "{{ paperlessng_version }}" + refspec: "+refs/pull/*:refs/pull/*" + when: '"No such file or directory" in paperlessng_current_version.stderr or paperlessng_current_version.stdout != paperlessng_version | string' + +- name: compile frontend + command: + cmd: "{{ item }}" + args: + chdir: "{{ gitdir.path }}/src-ui" + failed_when: false + with_items: + - npm install -g @angular/cli + - npm install + - ./node_modules/.bin/ng build --prod + +- name: install pipenv + pip: + name: + - pipenv + - pybind11 # building pikepdf for <0.9.14 + extra_args: --upgrade + +- name: allow building with any Python 3 release + lineinfile: + path: "{{ gitdir.path }}/Pipfile" + regexp: '^python_version = ".+"$' + line: python_version = "3" + +# TODO run dev in separate virtualenv +- name: install Pipfile dependencies + command: + cmd: pipenv install --dev + args: + chdir: "{{ gitdir.path }}" + +- name: clean output directory + file: + path: "{{ gitdir.path }}/dist" + state: absent + +- name: create output directories + file: + path: "{{ item }}" + state: directory + with_items: + - "{{ gitdir.path }}/dist" + - "{{ gitdir.path }}/dist/paperless-ng" + - "{{ gitdir.path }}/dist/paperless-ng/scripts" + +- name: copy application into place + copy: + src: "{{ gitdir.path }}/{{ item.src }}" + remote_src: yes + dest: "{{ gitdir.path }}/dist/paperless-ng/{{ item.dest | default('') }}" + with_items: + - src: CONTRIBUTING.md + - src: LICENSE + - src: Pipfile + - src: Pipfile.lock + - src: README.md + - src: paperless.conf.example + dest: "paperless.conf" + +# TODO can be copied for >=0.9.14 +- name: generate requirements.txt + command: + cmd: pipenv lock --keep-outdated -r + args: + chdir: "{{ gitdir.path }}" + register: requirements + +- name: write requirements.txt + copy: + content: "{{ requirements.stdout }}" + dest: "{{ gitdir.path }}/dist/paperless-ng/requirements.txt" + +- name: glob all scripts + find: + paths: "{{ gitdir.path }}/scripts/" + patterns: + - "*.service" + - "*.sh" + register: glob + +- name: copy scripts + copy: + src: "{{ item.path }}" + remote_src: yes + dest: "{{ gitdir.path }}/dist/paperless-ng/scripts/" + with_items: + - "{{ glob.files }}" + +- name: copy sources + command: + cmd: "cp -r src/ dist/paperless-ng/src" + args: + chdir: "{{ gitdir.path }}" + +- name: package app + archive: + path: "{{ gitdir.path }}/dist/" + dest: "{{ gitdir.path }}/paperless-ng-{{ paperlessng_version }}.tar.xz" + format: xz + +- name: extract paperless-ng + unarchive: + src: "{{ gitdir.path }}/paperless-ng-{{ paperlessng_version }}.tar.xz" + remote_src: yes + dest: "{{ tempdir.path }}" + when: '"No such file or directory" in paperlessng_current_version.stderr or paperlessng_current_version.stdout != paperlessng_version | string' + +- name: remove temporary git directory + file: + path: "{{ gitdir.path }}" + state: absent diff --git a/ansible/tasks/main.yml b/ansible/tasks/main.yml index a353a18ec..15f7f9ba1 100644 --- a/ansible/tasks/main.yml +++ b/ansible/tasks/main.yml @@ -34,7 +34,13 @@ - build-essential - python3-setuptools - python3-wheel - - python3-virtualenv + +# upstream virtualenv in Ubuntu 20.04 is broken +# https://github.com/pypa/virtualenv/issues/1873 +- name: install python virtualenv + pip: + name: virtualenv + extra_args: --upgrade - name: install ocr languages apt: @@ -97,6 +103,18 @@ # GNUPG_HOME required due to paperless db.py create_home: yes +- block: + - name: get latest release version + uri: + url: https://api.github.com/repos/jonaswinkler/paperless-ng/releases/latest + method: GET + register: latest_release + - name: parse latest release version + set_fact: + paperlessng_version: "{{ latest_release.json['tag_name'] | regex_replace('^ng-(.+)$', '\\1') }}" + when: paperlessng_version == "latest" + +# TODO store commit hash of installed version, use instead of version number - name: check for paperless-ng installation command: cmd: 'grep -Po "(?<=Paperless-ng )\d+\.\d+\.\d+" {{ paperlessng_directory }}/docs/changelog.html' @@ -109,59 +127,58 @@ set_fact: fresh_installation: '{{ "No such file or directory" in paperlessng_current_version.stderr }}' update_installation: '{{ "No such file or directory" not in paperlessng_current_version.stderr and paperlessng_current_version.stdout != paperlessng_version | string }}' - reconfigure_only: '{{ paperlessng_current_version.stdout == paperlessng_version | string }}' + reconfigure_only: "{{ paperlessng_current_version.stdout == paperlessng_version | string }}" -- name: backup current paperless-ng installation - copy: - src: "{{ paperlessng_directory }}" - remote_src: yes - dest: "{{ paperlessng_directory }}-{{ ansible_date_time.iso8601 }}/" +- block: + - name: backup current paperless-ng installation + copy: + src: "{{ paperlessng_directory }}" + remote_src: yes + dest: "{{ paperlessng_directory }}-{{ ansible_date_time.iso8601 }}/" + - name: remove current paperless sources + file: + path: "{{ paperlessng_directory }}/{{ item }}" + state: absent + with_items: + - docker + - docs + - scripts + - src + - static when: update_installation -- name: remove current paperless sources - file: - path: "{{ paperlessng_directory }}/{{ item }}" - state: absent - with_items: - - docker - - docs - - scripts - - src - - static - when: update_installation - -- name: create temporary directory - tempfile: - state: directory - register: tempdir - when: not reconfigure_only - -- name: extract paperless-ng - unarchive: - src: "https://github.com/jonaswinkler/paperless-ng/releases/download/ng-{{ paperlessng_version }}/paperless-ng-{{ paperlessng_version }}.tar.xz" - remote_src: yes - dest: "{{ tempdir.path }}" - when: not reconfigure_only - -- name: change owner and permissions of paperless-ng - command: - cmd: "{{ item }}" - warn: false - with_items: - - "chown -R {{ paperlessng_system_user }}:{{ paperlessng_system_group }} {{ tempdir.path }}" - - "find {{ tempdir.path }} -type d -exec chmod 0750 {} ;" - - "find {{ tempdir.path }} -type f -exec chmod 0640 {} ;" - when: not reconfigure_only - -- name: move paperless-ng - command: - cmd: "cp -a {{ tempdir.path }}/paperless-ng/. {{ paperlessng_directory }}" - when: not reconfigure_only - -- name: remove temporary directory - file: - path: "{{ tempdir.path }}" - state: absent +- block: + - name: create temporary directory + tempfile: + state: directory + register: tempdir + - name: check if version is available as release archive + uri: + url: "https://github.com/jonaswinkler/paperless-ng/releases/download/ng-{{ paperlessng_version }}/paperless-ng-{{ paperlessng_version }}.tar.xz" + method: GET + status_code: [200, 302, 404] + register: release_archive + - name: install paperless-ng from source + include_tasks: install-source.yml + when: release_archive.status == 404 + - name: install paperless-ng from release archive + include_tasks: install-release.yml + when: release_archive.status != 404 + - name: change owner and permissions of paperless-ng + command: + cmd: "{{ item }}" + warn: false + with_items: + - "chown -R {{ paperlessng_system_user }}:{{ paperlessng_system_group }} {{ tempdir.path }}" + - "find {{ tempdir.path }} -type d -exec chmod 0750 {} ;" + - "find {{ tempdir.path }} -type f -exec chmod 0640 {} ;" + - name: move paperless-ng + command: + cmd: "cp -a {{ tempdir.path }}/paperless-ng/. {{ paperlessng_directory }}" + - name: remove temporary directory + file: + path: "{{ tempdir.path }}" + state: absent when: not reconfigure_only - name: create paperless-ng directories and set permissions @@ -310,21 +327,20 @@ creates: "{{ paperlessng_virtualenv }}" register: venv -- name: install paperlessng requirements - become: yes - become_user: "{{ paperlessng_system_user }}" - pip: - requirements: "{{ paperlessng_directory }}/requirements.txt" - executable: "{{ paperlessng_virtualenv }}/bin/pip3" - extra_args: --upgrade - when: not reconfigure_only - -- name: migrate database schema - become: yes - become_user: "{{ paperlessng_system_user }}" - command: "{{ paperlessng_virtualenv }}/bin/python3 {{ paperlessng_directory }}/src/manage.py migrate" - register: database_schema - changed_when: '"No migrations to apply." not in database_schema.stdout' +- block: + - name: install paperlessng requirements + become: yes + become_user: "{{ paperlessng_system_user }}" + pip: + requirements: "{{ paperlessng_directory }}/requirements.txt" + executable: "{{ paperlessng_virtualenv }}/bin/pip3" + extra_args: --upgrade + - name: migrate database schema + become: yes + become_user: "{{ paperlessng_system_user }}" + command: "{{ paperlessng_virtualenv }}/bin/python3 {{ paperlessng_directory }}/src/manage.py migrate" + register: database_schema + changed_when: '"No migrations to apply." not in database_schema.stdout' when: not reconfigure_only - name: configure paperless superuser From 8d624937744abf6ed3ce4d521178e1d62f1dfc64 Mon Sep 17 00:00:00 2001 From: Fabian Koller Date: Fri, 22 Jan 2021 11:10:56 +0100 Subject: [PATCH 03/42] Adapt github action to build PR version --- .github/workflows/ansible.yml | 23 ++++++++++++++++++++++- ansible/molecule/fresh/converge.yml | 6 ++++++ ansible/molecule/update/converge.yml | 5 ++--- ansible/molecule/update/prepare.yml | 2 +- ansible/tasks/install-source.yml | 4 +--- 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ansible.yml b/.github/workflows/ansible.yml index 646c7ff81..60589f87d 100644 --- a/.github/workflows/ansible.yml +++ b/.github/workflows/ansible.yml @@ -5,7 +5,7 @@ on: [push, pull_request] jobs: # https://molecule.readthedocs.io/en/latest/ci.html#github-actions - test: + test-fresh: runs-on: ubuntu-latest # https://docs.github.com/en/free-pro-team@latest/actions/reference/context-and-expression-syntax-for-github-actions#github-context if: github.event_name == 'pull_request' || (github.event_name == 'push' && contains(github.ref, 'refs/heads/')) @@ -31,6 +31,27 @@ jobs: cd ansible molecule test -s fresh working-directory: "${{ github.repository }}" + test-update: + runs-on: ubuntu-latest + # https://docs.github.com/en/free-pro-team@latest/actions/reference/context-and-expression-syntax-for-github-actions#github-context + if: github.event_name == 'pull_request' || (github.event_name == 'push' && contains(github.ref, 'refs/heads/')) + steps: + - name: Check out the codebase + uses: actions/checkout@v2 + with: + path: "${{ github.repository }}" + - name: Set up Python + uses: actions/setup-python@v2 + - name: Set up Docker + uses: docker-practice/actions-setup-docker@master + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip + python3 -m pip install molecule[ansible,docker] + ansible --version + docker --version + molecule --version + python --version - name: Test release update with molecule run: | cd ansible diff --git a/ansible/molecule/fresh/converge.yml b/ansible/molecule/fresh/converge.yml index 99e25677b..39d20050d 100644 --- a/ansible/molecule/fresh/converge.yml +++ b/ansible/molecule/fresh/converge.yml @@ -2,6 +2,12 @@ - name: fresh installation hosts: all tasks: + - name: set github ref as version when available + set_fact: + paperlessng_version: "{{ lookup('env', 'GITHUB_REF') | default('latest', True) }}" + - name: debug + debug: + var: paperlessng_version - name: install paperless-ng with default parameters include_role: name: ansible diff --git a/ansible/molecule/update/converge.yml b/ansible/molecule/update/converge.yml index b19a5981a..f5f9b17c2 100644 --- a/ansible/molecule/update/converge.yml +++ b/ansible/molecule/update/converge.yml @@ -2,10 +2,9 @@ - name: update previous release to newest release hosts: all tasks: - - name: set current version as installation target + - name: set github ref as version when available set_fact: - paperlessng_version: 0.9.14 - + paperlessng_version: "{{ lookup('env', 'GITHUB_REF') | default('latest', True) }}" - name: update to newest paperless-ng release include_role: name: ansible diff --git a/ansible/molecule/update/prepare.yml b/ansible/molecule/update/prepare.yml index 6f3734329..138ebdfce 100644 --- a/ansible/molecule/update/prepare.yml +++ b/ansible/molecule/update/prepare.yml @@ -3,7 +3,7 @@ tasks: - name: set previous version as installation target set_fact: - paperlessng_version: 0.9.13 + paperlessng_version: 1.0.0 - name: install previous paperless-ng release include_role: diff --git a/ansible/tasks/install-source.yml b/ansible/tasks/install-source.yml index 823445dd1..8f6dc0e03 100644 --- a/ansible/tasks/install-source.yml +++ b/ansible/tasks/install-source.yml @@ -18,7 +18,6 @@ dest: "{{ gitdir.path }}" version: "{{ paperlessng_version }}" refspec: "+refs/pull/*:refs/pull/*" - when: '"No such file or directory" in paperlessng_current_version.stderr or paperlessng_current_version.stdout != paperlessng_version | string' - name: compile frontend command: @@ -31,6 +30,7 @@ - npm install - ./node_modules/.bin/ng build --prod +# TODO run dev in separate virtualenv - name: install pipenv pip: name: @@ -44,7 +44,6 @@ regexp: '^python_version = ".+"$' line: python_version = "3" -# TODO run dev in separate virtualenv - name: install Pipfile dependencies command: cmd: pipenv install --dev @@ -125,7 +124,6 @@ src: "{{ gitdir.path }}/paperless-ng-{{ paperlessng_version }}.tar.xz" remote_src: yes dest: "{{ tempdir.path }}" - when: '"No such file or directory" in paperlessng_current_version.stderr or paperlessng_current_version.stdout != paperlessng_version | string' - name: remove temporary git directory file: From 25a0845efddb559c0cfb1eb585d9719a7b7047e9 Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Fri, 22 Jan 2021 15:07:52 +0100 Subject: [PATCH 04/42] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d06f78246..6a570d79d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![ci](https://github.com/jonaswinkler/paperless-ng/workflows/ci/badge.svg) +[![ci](https://github.com/jonaswinkler/paperless-ng/workflows/ci/badge.svg)](https://github.com/jonaswinkler/paperless-ng/actions) [![Documentation Status](https://readthedocs.org/projects/paperless-ng/badge/?version=latest)](https://paperless-ng.readthedocs.io/en/latest/?badge=latest) [![Gitter](https://badges.gitter.im/paperless-ng/community.svg)](https://gitter.im/paperless-ng/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Docker Hub Pulls](https://img.shields.io/docker/pulls/jonaswinkler/paperless-ng.svg)](https://hub.docker.com/r/jonaswinkler/paperless-ng) From 6c3b1db4dd3d9086057bdd7da82bb3748daeba37 Mon Sep 17 00:00:00 2001 From: Fabian Koller Date: Fri, 22 Jan 2021 14:21:35 +0100 Subject: [PATCH 05/42] Determine installed version by git commit hash --- .github/workflows/ansible.yml | 4 +- ansible/molecule/fresh/converge.yml | 3 -- ansible/tasks/install-source.yml | 4 +- ansible/tasks/main.yml | 73 ++++++++++++++++++++++++----- 4 files changed, 65 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ansible.yml b/.github/workflows/ansible.yml index 60589f87d..c11472361 100644 --- a/.github/workflows/ansible.yml +++ b/.github/workflows/ansible.yml @@ -21,7 +21,7 @@ jobs: - name: Install dependencies run: | python3 -m pip install --upgrade pip - python3 -m pip install molecule[ansible,docker] + python3 -m pip install molecule[ansible,docker] jmespath ansible --version docker --version molecule --version @@ -47,7 +47,7 @@ jobs: - name: Install dependencies run: | python3 -m pip install --upgrade pip - python3 -m pip install molecule[ansible,docker] + python3 -m pip install molecule[ansible,docker] jmespath ansible --version docker --version molecule --version diff --git a/ansible/molecule/fresh/converge.yml b/ansible/molecule/fresh/converge.yml index 39d20050d..eec6e6444 100644 --- a/ansible/molecule/fresh/converge.yml +++ b/ansible/molecule/fresh/converge.yml @@ -5,9 +5,6 @@ - name: set github ref as version when available set_fact: paperlessng_version: "{{ lookup('env', 'GITHUB_REF') | default('latest', True) }}" - - name: debug - debug: - var: paperlessng_version - name: install paperless-ng with default parameters include_role: name: ansible diff --git a/ansible/tasks/install-source.yml b/ansible/tasks/install-source.yml index 8f6dc0e03..64b7dbeb2 100644 --- a/ansible/tasks/install-source.yml +++ b/ansible/tasks/install-source.yml @@ -116,12 +116,12 @@ - name: package app archive: path: "{{ gitdir.path }}/dist/" - dest: "{{ gitdir.path }}/paperless-ng-{{ paperlessng_version }}.tar.xz" + dest: "{{ gitdir.path }}/paperless-ng-{{ paperlessng_commit }}.tar.xz" format: xz - name: extract paperless-ng unarchive: - src: "{{ gitdir.path }}/paperless-ng-{{ paperlessng_version }}.tar.xz" + src: "{{ gitdir.path }}/paperless-ng-{{ paperlessng_commit }}.tar.xz" remote_src: yes dest: "{{ tempdir.path }}" diff --git a/ansible/tasks/main.yml b/ansible/tasks/main.yml index 15f7f9ba1..5ad30dfcf 100644 --- a/ansible/tasks/main.yml +++ b/ansible/tasks/main.yml @@ -111,23 +111,65 @@ register: latest_release - name: parse latest release version set_fact: - paperlessng_version: "{{ latest_release.json['tag_name'] | regex_replace('^ng-(.+)$', '\\1') }}" + paperlessng_version: "{{ latest_release.json['tag_name'] }}" when: paperlessng_version == "latest" -# TODO store commit hash of installed version, use instead of version number +- block: + - name: sanitize version string + set_fact: + paperlessng_version: "{{ paperlessng_version | regex_replace('^ng-(\\d+\\.\\d+\\.\\d+)$', '\\1') }}" + - name: get tag data + uri: + url: https://api.github.com/repos/jonaswinkler/paperless-ng/tags + method: GET + register: tags + - name: get commit for target tag + set_fact: + paperlessng_commit: "{{ tags.json | json_query('[?name==`ng-' + paperlessng_version +'`] | [0].commit.sha') }}" + when: paperlessng_version | regex_search("^(ng-)?(\d+\.\d+\.\d+)$") + +- block: + - name: check if version is branch + uri: + url: "https://api.github.com/repos/jonaswinkler/paperless-ng/branches/{{ paperlessng_version }}" + method: GET + status_code: [200, 404] + register: branch + - name: get commit for target branch + set_fact: + paperlessng_commit: "{{ branch.json | json_query('commit.sha') }}" + when: branch.status == 200 + - block: + - name: check if version is commit-or-ref + uri: + url: "https://api.github.com/repos/jonaswinkler/paperless-ng/commits/{{ paperlessng_version }}" + method: GET + status_code: [200, 404, 422] + register: commit + - name: get commit for target commit-or-ref + set_fact: + paperlessng_commit: "{{ commit.json | json_query('sha') }}" + when: commit.status == 200 + - name: fail + fail: + msg: "Can not determine commit from `paperlessng_version=={{ paperlessng_version }}`!" + when: commit.status != 200 + when: branch.status == 404 + when: not(paperlessng_version | regex_search("^(ng-)?(\d+\.\d+\.\d+)$")) + - name: check for paperless-ng installation command: - cmd: 'grep -Po "(?<=Paperless-ng )\d+\.\d+\.\d+" {{ paperlessng_directory }}/docs/changelog.html' - changed_when: '"No such file or directory" in paperlessng_current_version.stderr or paperlessng_current_version.stdout != paperlessng_version | string' + cmd: "cat {{ paperlessng_directory }}/.installed_version" + changed_when: '"No such file or directory" in paperlessng_current_commit.stderr or paperlessng_current_commit.stdout != paperlessng_commit | string' failed_when: false ignore_errors: yes - register: paperlessng_current_version + register: paperlessng_current_commit - name: register current state set_fact: - fresh_installation: '{{ "No such file or directory" in paperlessng_current_version.stderr }}' - update_installation: '{{ "No such file or directory" not in paperlessng_current_version.stderr and paperlessng_current_version.stdout != paperlessng_version | string }}' - reconfigure_only: "{{ paperlessng_current_version.stdout == paperlessng_version | string }}" + fresh_installation: '{{ "No such file or directory" in paperlessng_current_commit.stderr }}' + update_installation: '{{ "No such file or directory" not in paperlessng_current_commit.stderr and paperlessng_current_commit.stdout != paperlessng_commit | string }}' + reconfigure_only: "{{ paperlessng_current_commit.stdout == paperlessng_commit | string }}" - block: - name: backup current paperless-ng installation @@ -156,14 +198,14 @@ uri: url: "https://github.com/jonaswinkler/paperless-ng/releases/download/ng-{{ paperlessng_version }}/paperless-ng-{{ paperlessng_version }}.tar.xz" method: GET - status_code: [200, 302, 404] + status_code: [200, 404] register: release_archive - name: install paperless-ng from source include_tasks: install-source.yml when: release_archive.status == 404 - name: install paperless-ng from release archive include_tasks: install-release.yml - when: release_archive.status != 404 + when: release_archive.status == 200 - name: change owner and permissions of paperless-ng command: cmd: "{{ item }}" @@ -175,6 +217,13 @@ - name: move paperless-ng command: cmd: "cp -a {{ tempdir.path }}/paperless-ng/. {{ paperlessng_directory }}" + - name: store commit hash of installed version + copy: + content: "{{ paperlessng_commit }}" + dest: "{{ paperlessng_directory }}/.installed_version" + owner: "{{ paperlessng_system_user }}" + group: "{{ paperlessng_system_group }}" + mode: "0440" - name: remove temporary directory file: path: "{{ tempdir.path }}" @@ -197,7 +246,7 @@ - name: rename initial config command: - cmd: "mv {{ paperlessng_directory }}/paperless.conf {{ paperlessng_directory }}/paperless.conf.template" + cmd: "mv -f {{ paperlessng_directory }}/paperless.conf {{ paperlessng_directory }}/paperless.conf.template" removes: "{{ paperlessng_directory }}/paperless.conf" - name: configure paperless-ng @@ -408,7 +457,7 @@ # https://www.freedesktop.org/software/systemd/man/systemd.exec.html { option: "User", value: "{{ paperlessng_system_user }}" }, { option: "Group", value: "{{ paperlessng_system_group }}" }, - { option: "WorkingDirectory", value: "{{ paperlessng_directory }}/src", }, + { option: "WorkingDirectory", value: "{{ paperlessng_directory }}/src" }, { option: "ProtectSystem", value: "full" }, { option: "NoNewPrivileges", value: "true" }, { option: "PrivateUsers", value: "true" }, From ced6a61869cf30b31124afc114b9d9e4087e6996 Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Fri, 22 Jan 2021 17:22:02 +0100 Subject: [PATCH 06/42] Update README.md --- README.md | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/README.md b/README.md index 6a570d79d..e6b9feace 100644 --- a/README.md +++ b/README.md @@ -54,25 +54,6 @@ If you want to see some screenshots of paperless-ng in action, [some are availab For a complete list of changes from paperless, check out the [changelog](https://paperless-ng.readthedocs.io/en/latest/changelog.html) -# Roadmap for 1.0 - -- Make the front end nice (except mobile). -- Fix whatever bugs I and you find. -- Make the documentation nice. - -## On the chopping block. - -- **GnuPG encrypion.** [Here's a note about encryption in paperless](https://paperless-ng.readthedocs.io/en/latest/administration.html#managing-encryption). The gist of it is that I don't see which attacks this implementation protects against. It gives a false sense of security to users who don't care about how it works. - -## Wont-do list. - -These features will probably never make it into paperless, since paperless is meant to be an easy to use set-and-forget solution. - -- **Document versions.** I might consider adding the ability to update a document with a newer version, but that's about it. The kind of documents that get added to paperless usually don't change at all. -- **Workflows.** I don't see a use case for these, yet. -- **Folders.** Tags are superior in just about every way. -- **Apps / extension support.** Again, paperless is meant to be simple. - # Getting started The recommended way to deploy paperless is docker-compose. The files in the /docker/hub directory are configured to pull the image from Docker Hub. From c0882e74e22083b1e34ec1f6ce3d73d0f6127a23 Mon Sep 17 00:00:00 2001 From: Reto Date: Sat, 23 Jan 2021 12:55:50 +0100 Subject: [PATCH 07/42] tried to fill the gaps and change wording where it felt necessary for better understanding --- docs/setup.rst | 76 ++++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 34 deletions(-) diff --git a/docs/setup.rst b/docs/setup.rst index a3a0bd1e8..e73e6aeaa 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -25,10 +25,10 @@ Paperless consists of the following components: or by any other means such as Apache ``mod_wsgi``. * **The consumer:** This is what watches your consumption folder for documents. - However, the consumer itself does not consume really consume your documents anymore. - It rather notifies a task processor that a new file is ready for consumption. + However, the consumer itself does not really consume your documents. + Now it notifies a task processor that a new file is ready for consumption. I suppose it should be named differently. - This also used to check your emails, but that's now gone elsewhere as well. + This was also used to check your emails, but that's now done elsewhere as well. Start the consumer with the management command ``document_consumer``: @@ -40,25 +40,25 @@ Paperless consists of the following components: .. _setup-task_processor: * **The task processor:** Paperless relies on `Django Q `_ - for doing much of the heavy lifting. This is a task queue that accepts tasks from - multiple sources and processes tasks in parallel. It also comes with a scheduler that executes + for doing most of the heavy lifting. This is a task queue that accepts tasks from + multiple sources and processes these in parallel. It also comes with a scheduler that executes certain commands periodically. This task processor is responsible for: * Consuming documents. When the consumer finds new documents, it notifies the task processor to start a consumption task. - * Consuming emails. It periodically checks your configured accounts for new mails and - produces consumption tasks for any documents it finds. * The task processor also performs the consumption of any documents you upload through the web interface. - * Maintain the search index and the automatic matching algorithm. These are things that paperless + * Consuming emails. It periodically checks your configured accounts for new emails and + notifies the task processor to consume the attachment of an email. + * Maintaining the search index and the automatic matching algorithm. These are things that paperless needs to do from time to time in order to operate properly. This allows paperless to process multiple documents from your consumption folder in parallel! On - a modern multi core system, consumption with full ocr is blazing fast. + a modern multi core system, this makes the consumption process with full OCR blazingly fast. - The task processor comes with a built-in admin interface that you can use to see whenever any of the + The task processor comes with a built-in admin interface that you can use to check whenever any of the tasks fail and inspect the errors (i.e., wrong email credentials, errors during consuming a specific file, etc). @@ -70,8 +70,8 @@ Paperless consists of the following components: $ pipenv run python3 manage.py qcluster * A `redis `_ message broker: This is a really lightweight service that is responsible - for getting the tasks from the webserver and consumer to the task scheduler. These run in different - processes (maybe even on different machines!), and therefore, this is necessary. + for getting the tasks from the webserver and the consumer to the task scheduler. These run in a different + process (maybe even on different machines!), and therefore, this is necessary. * Optional: A database server. Paperless supports both PostgreSQL and SQLite for storing its data. @@ -79,7 +79,7 @@ Paperless consists of the following components: Installation ############ -You can go multiple routes with setting up and running Paperless: +You can go multiple routes to setup and run Paperless: * :ref:`Pull the image from Docker Hub ` * :ref:`Build the Docker image yourself ` @@ -87,14 +87,15 @@ You can go multiple routes with setting up and running Paperless: * :ref:`Use ansible to install Paperless on your system automatically (bare metal) ` The Docker routes are quick & easy. These are the recommended routes. This configures all the stuff -from above automatically so that it just works and uses sensible defaults for all configuration options. +from the above automatically so that it just works and uses sensible defaults for all configuration options. +Here you find a cheat-sheet for docker beginners: `CLI Basics ` -The bare metal route is more complicated to setup but makes it easier +The bare metal route is complicated to setup but makes it easier should you want to contribute some code back. You need to configure and run the above mentioned components yourself. -The ansible route cobines benefits from both options: -the setup process is fully automated, reproducible and idempotent, +The ansible route combines benefits of both options: +the setup process is fully automated, reproducible and independent, it includes the same sensible defaults, and it simultaneously provides the flexibility of a bare metal installation. @@ -103,10 +104,12 @@ and it simultaneously provides the flexibility of a bare metal installation. Install Paperless from Docker Hub ================================= +1. Login with your user and create a folder in your home-directory `mkdir -v ~/paperless-ng` to have a place for your configuration files and consumption directory. + 1. Go to the `/docker/compose directory on the project page `_ - and download one of the ``docker-compose.*.yml`` files, depending on which database backend you + and download one of the `docker-compose.*.yml` files, depending on which database backend you want to use. Rename this file to `docker-compose.yml`. - If you want to enable optional support for Office documents, download a file with ``-tika`` in its name. + If you want to enable optional support for Office documents, download a file with `-tika` in the file name. Download the ``docker-compose.env`` file and the ``.env`` file as well and store them in the same directory. @@ -121,30 +124,31 @@ Install Paperless from Docker Hub If you want to use the included ``docker-compose.*.yml`` file, you need to have at least Docker version **17.09.0** and docker-compose - version **1.17.0**. + version **1.17.0**. + To check do: `docker-compose -v` or `docker -v` See the `Docker installation guide`_ on how to install the current version of Docker for your operating system or Linux distribution of - choice. To get an up-to-date version of docker-compose, follow the - `docker-compose installation guide`_ if your package repository doesn't + choice. To get the latest version of docker-compose, follow the + `docker-compose installation guide`_if your package repository doesn't include it. .. _Docker installation guide: https://docs.docker.com/engine/installation/ .. _docker-compose installation guide: https://docs.docker.com/compose/install/ 3. Modify ``docker-compose.yml`` to your preferences. You may want to change the path - to the consumption directory in this file. Find the line that specifies where + to the consumption directory. Find the line that specifies where to mount the consumption directory: .. code:: - - ./consume:/usr/src/paperless/consume + - ./**consume**:/usr/src/paperless/consume Replace the part BEFORE the colon with a local directory of your choice: .. code:: - - /home/jonaswinkler/paperless-inbox:/usr/src/paperless/consume + - /**home/jonaswinkler/paperless-inbox**:/usr/src/paperless/consume Don't change the part after the colon or paperless wont find your documents. @@ -155,23 +159,27 @@ Install Paperless from Docker Hub both the docker container and you on the host machine have write access to the consumption directory. If your UID and GID on the host system is 1000 (the default for the first normal user on most systems), it will - work out of the box without any modifications. + work out of the box without any modifications. `id "username"` to check. .. note:: - You can use any settings from the file ``paperless.conf.example`` in this file. - Have a look at :ref:`configuration` to see whats available. + You can copy any setting from the file ``paperless.conf.example`` and paste it here. + Have a look at :ref:`configuration` to see what's available. .. caution:: - Certain file systems such as NFS network shares don't support file system + Some file systems such as NFS network shares don't support file system notifications with ``inotify``. When storing the consumption directory - on such a file system, paperless will be unable to pick up new files + on such a file system, paperless will not pick up new files with the default configuration. You will need to use ``PAPERLESS_CONSUMER_POLLING``, which will disable inotify. See :ref:`here `. + +5. Now head over to: https://hub.docker.com/r/jonaswinkler/paperless-ng and choose your preferred + image and copy the link. To download this image do a `docker pull` followed by the link. Do this within the directory with the .yml files. + Depending on your network connection and CPU this will take a while. You have time to get a beverage. 5. Run ``docker-compose up -d``. This will create and start the necessary - containers. + containers, but your are not done yet! 6. To be able to login, you will need a super user. To create it, execute the following command: @@ -181,12 +189,12 @@ Install Paperless from Docker Hub $ docker-compose run --rm webserver createsuperuser This will prompt you to set a username, an optional e-mail address and - finally a password. + finally a password (at least 8 characters). 7. The default ``docker-compose.yml`` exports the webserver on your local port 8000. If you haven't adapted this, you should now be able to visit your - Paperless instance at ``http://127.0.0.1:8000``. You can login with the - user and password you just created. + Paperless instance at ``http://127.0.0.1:8000`` or your servers IP-Address:8000. + Use the login credentials you have created with the previous step. .. _Docker: https://www.docker.com/ .. _docker-compose: https://docs.docker.com/compose/install/ From 2104e654620fa993c3fa557e61833c07a88228b9 Mon Sep 17 00:00:00 2001 From: Reto Date: Sat, 23 Jan 2021 13:14:31 +0100 Subject: [PATCH 08/42] minor changes, like numbering --- docs/setup.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/setup.rst b/docs/setup.rst index e73e6aeaa..d2bd7ed0a 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -106,7 +106,7 @@ Install Paperless from Docker Hub 1. Login with your user and create a folder in your home-directory `mkdir -v ~/paperless-ng` to have a place for your configuration files and consumption directory. -1. Go to the `/docker/compose directory on the project page `_ +2. Go to the `/docker/compose directory on the project page `_ and download one of the `docker-compose.*.yml` files, depending on which database backend you want to use. Rename this file to `docker-compose.yml`. If you want to enable optional support for Office documents, download a file with `-tika` in the file name. @@ -118,7 +118,7 @@ Install Paperless from Docker Hub For new installations, it is recommended to use PostgreSQL as the database backend. -2. Install `Docker`_ and `docker-compose`_. +3. Install `Docker`_ and `docker-compose`_. .. caution:: @@ -136,24 +136,24 @@ Install Paperless from Docker Hub .. _Docker installation guide: https://docs.docker.com/engine/installation/ .. _docker-compose installation guide: https://docs.docker.com/compose/install/ -3. Modify ``docker-compose.yml`` to your preferences. You may want to change the path +4. Modify ``docker-compose.yml`` to your preferences. You may want to change the path to the consumption directory. Find the line that specifies where to mount the consumption directory: .. code:: - - ./**consume**:/usr/src/paperless/consume + - ./consume:/usr/src/paperless/consume Replace the part BEFORE the colon with a local directory of your choice: .. code:: - - /**home/jonaswinkler/paperless-inbox**:/usr/src/paperless/consume + - /home/jonaswinkler/paperless-inbox:/usr/src/paperless/consume Don't change the part after the colon or paperless wont find your documents. -4. Modify ``docker-compose.env``, following the comments in the file. The +5. Modify ``docker-compose.env``, following the comments in the file. The most important change is to set ``USERMAP_UID`` and ``USERMAP_GID`` to the uid and gid of your user on the host system. This ensures that both the docker container and you on the host machine have write access @@ -174,14 +174,14 @@ Install Paperless from Docker Hub with the default configuration. You will need to use ``PAPERLESS_CONSUMER_POLLING``, which will disable inotify. See :ref:`here `. -5. Now head over to: https://hub.docker.com/r/jonaswinkler/paperless-ng and choose your preferred +6. Now head over to: https://hub.docker.com/r/jonaswinkler/paperless-ng and choose your preferred image and copy the link. To download this image do a `docker pull` followed by the link. Do this within the directory with the .yml files. Depending on your network connection and CPU this will take a while. You have time to get a beverage. -5. Run ``docker-compose up -d``. This will create and start the necessary +7. Run ``docker-compose up -d``. This will create and start the necessary containers, but your are not done yet! -6. To be able to login, you will need a super user. To create it, execute the +8. To be able to login, you will need a super user. To create it, execute the following command: .. code-block:: shell-session @@ -191,7 +191,7 @@ Install Paperless from Docker Hub This will prompt you to set a username, an optional e-mail address and finally a password (at least 8 characters). -7. The default ``docker-compose.yml`` exports the webserver on your local port +9. The default ``docker-compose.yml`` exports the webserver on your local port 8000. If you haven't adapted this, you should now be able to visit your Paperless instance at ``http://127.0.0.1:8000`` or your servers IP-Address:8000. Use the login credentials you have created with the previous step. From 0d19957d4b96c8414c46d45e21517065fc0f875e Mon Sep 17 00:00:00 2001 From: Fabian Koller Date: Sat, 23 Jan 2021 22:08:36 +0100 Subject: [PATCH 09/42] Fully prepare release package --- ansible/tasks/install-source.yml | 63 +++++++++++++------------------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/ansible/tasks/install-source.yml b/ansible/tasks/install-source.yml index 64b7dbeb2..04a2991b9 100644 --- a/ansible/tasks/install-source.yml +++ b/ansible/tasks/install-source.yml @@ -5,7 +5,7 @@ pkg: - git - npm - - libqpdf-dev + - gettext - name: create temporary git directory tempfile: @@ -30,26 +30,6 @@ - npm install - ./node_modules/.bin/ng build --prod -# TODO run dev in separate virtualenv -- name: install pipenv - pip: - name: - - pipenv - - pybind11 # building pikepdf for <0.9.14 - extra_args: --upgrade - -- name: allow building with any Python 3 release - lineinfile: - path: "{{ gitdir.path }}/Pipfile" - regexp: '^python_version = ".+"$' - line: python_version = "3" - -- name: install Pipfile dependencies - command: - cmd: pipenv install --dev - args: - chdir: "{{ gitdir.path }}" - - name: clean output directory file: path: "{{ gitdir.path }}/dist" @@ -75,28 +55,16 @@ - src: Pipfile - src: Pipfile.lock - src: README.md + - src: requirements.txt - src: paperless.conf.example dest: "paperless.conf" -# TODO can be copied for >=0.9.14 -- name: generate requirements.txt - command: - cmd: pipenv lock --keep-outdated -r - args: - chdir: "{{ gitdir.path }}" - register: requirements - -- name: write requirements.txt - copy: - content: "{{ requirements.stdout }}" - dest: "{{ gitdir.path }}/dist/paperless-ng/requirements.txt" - - name: glob all scripts find: - paths: "{{ gitdir.path }}/scripts/" + paths: ["{{ gitdir.path }}/scripts/"] patterns: - - "*.service" - - "*.sh" + - "*.service" + - "*.sh" register: glob - name: copy scripts @@ -113,6 +81,27 @@ args: chdir: "{{ gitdir.path }}" +- name: install paperlessng requirements + pip: + requirements: "{{ gitdir.path }}/requirements.txt" + virtualenv: "{{ gitdir.path }}/.venv/" + extra_args: --upgrade + +- name: compile messages + command: "{{ gitdir.path }}/.venv/bin/python3 manage.py compilemessages" + args: + chdir: "{{ gitdir.path }}/dist/paperless-ng/src/" + +- name: collect static files + command: "{{ gitdir.path }}/.venv/bin/python3 manage.py collectstatic --no-input" + args: + chdir: "{{ gitdir.path }}/dist/paperless-ng/src/" + +- name: remove pycache directories + shell: find . -name __pycache__ | xargs rm -r + args: + chdir: "{{ gitdir.path }}/dist/" + - name: package app archive: path: "{{ gitdir.path }}/dist/" From bfbdfe857f28d47dd99fda1e8e12689de142faf7 Mon Sep 17 00:00:00 2001 From: Fabian Koller Date: Sat, 23 Jan 2021 22:09:56 +0100 Subject: [PATCH 10/42] Simplify molecule tests "Upgrade" path includes multiple paths anyway: - installing the latest official release package - builing the current PR from source - upgrading between the two versions --- .github/workflows/ansible.yml | 37 ++------ .../molecule/{update => default}/converge.yml | 0 .../molecule/{fresh => default}/molecule.yml | 0 .../molecule/{update => default}/prepare.yml | 2 +- ansible/molecule/default/verify.yml | 91 +++++++++++++++++++ ansible/molecule/fresh/converge.yml | 10 -- ansible/molecule/fresh/verify.yml | 60 ------------ ansible/molecule/update/molecule.yml | 35 ------- ansible/molecule/update/verify.yml | 60 ------------ 9 files changed, 100 insertions(+), 195 deletions(-) rename ansible/molecule/{update => default}/converge.yml (100%) rename ansible/molecule/{fresh => default}/molecule.yml (100%) rename ansible/molecule/{update => default}/prepare.yml (85%) create mode 100644 ansible/molecule/default/verify.yml delete mode 100644 ansible/molecule/fresh/converge.yml delete mode 100644 ansible/molecule/fresh/verify.yml delete mode 100644 ansible/molecule/update/molecule.yml delete mode 100644 ansible/molecule/update/verify.yml diff --git a/.github/workflows/ansible.yml b/.github/workflows/ansible.yml index c11472361..fd965e760 100644 --- a/.github/workflows/ansible.yml +++ b/.github/workflows/ansible.yml @@ -5,7 +5,7 @@ on: [push, pull_request] jobs: # https://molecule.readthedocs.io/en/latest/ci.html#github-actions - test-fresh: + test: runs-on: ubuntu-latest # https://docs.github.com/en/free-pro-team@latest/actions/reference/context-and-expression-syntax-for-github-actions#github-context if: github.event_name == 'pull_request' || (github.event_name == 'push' && contains(github.ref, 'refs/heads/')) @@ -26,36 +26,15 @@ jobs: docker --version molecule --version python --version - - name: Test fresh installation with molecule + - name: Test installation/build/upgrade with molecule run: | cd ansible - molecule test -s fresh - working-directory: "${{ github.repository }}" - test-update: - runs-on: ubuntu-latest - # https://docs.github.com/en/free-pro-team@latest/actions/reference/context-and-expression-syntax-for-github-actions#github-context - if: github.event_name == 'pull_request' || (github.event_name == 'push' && contains(github.ref, 'refs/heads/')) - steps: - - name: Check out the codebase - uses: actions/checkout@v2 - with: - path: "${{ github.repository }}" - - name: Set up Python - uses: actions/setup-python@v2 - - name: Set up Docker - uses: docker-practice/actions-setup-docker@master - - name: Install dependencies - run: | - python3 -m pip install --upgrade pip - python3 -m pip install molecule[ansible,docker] jmespath - ansible --version - docker --version - molecule --version - python --version - - name: Test release update with molecule - run: | - cd ansible - molecule test -s update + molecule create + molecule verify + molecule converge + molecule idempotence + molecule verify + molecule destroy working-directory: "${{ github.repository }}" # # https://galaxy.ansible.com/docs/contributing/importing.html # release: diff --git a/ansible/molecule/update/converge.yml b/ansible/molecule/default/converge.yml similarity index 100% rename from ansible/molecule/update/converge.yml rename to ansible/molecule/default/converge.yml diff --git a/ansible/molecule/fresh/molecule.yml b/ansible/molecule/default/molecule.yml similarity index 100% rename from ansible/molecule/fresh/molecule.yml rename to ansible/molecule/default/molecule.yml diff --git a/ansible/molecule/update/prepare.yml b/ansible/molecule/default/prepare.yml similarity index 85% rename from ansible/molecule/update/prepare.yml rename to ansible/molecule/default/prepare.yml index 138ebdfce..e175eff5b 100644 --- a/ansible/molecule/update/prepare.yml +++ b/ansible/molecule/default/prepare.yml @@ -3,7 +3,7 @@ tasks: - name: set previous version as installation target set_fact: - paperlessng_version: 1.0.0 + paperlessng_version: latest - name: install previous paperless-ng release include_role: diff --git a/ansible/molecule/default/verify.yml b/ansible/molecule/default/verify.yml new file mode 100644 index 000000000..01dc43192 --- /dev/null +++ b/ansible/molecule/default/verify.yml @@ -0,0 +1,91 @@ +--- +- name: Verify + hosts: all + gather_facts: false + + vars_files: + - ../../defaults/main.yml + + tasks: + - name: check if webserver is up + uri: + url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}" + status_code: [200, 302] + return_content: yes + register: landingpage + failed_when: "'Sign in' not in landingpage.content" + + - name: generate random name and content + set_fact: + content: "{{ lookup('password', '/dev/null length=64 chars=ascii_letters') }}" + filename: "{{ lookup('password', '/dev/null length=8 chars=ascii_letters') }}" + + - name: check if document posting works + uri: + url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/documents/post_document/" + method: POST + body_format: form-multipart + body: + document: + content: "{{ content }}" + filename: "{{ filename }}.txt" + mime_type: text/plain + headers: + Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' + return_content: yes + register: post_document + failed_when: "'OK' not in post_document.content" + + - name: verify uploaded document has been accepted + uri: + url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/" + headers: + Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' + return_content: yes + register: logs + failed_when: "('Consuming ' + filename + '.txt') not in logs.content" + + # assumes txt consumption finished by now, might have to sleep a bit + - name: verify uploaded document has been consumed + uri: + url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/" + headers: + Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' + return_content: yes + register: logs + failed_when: "filename + ' consumption finished' not in logs.content" + + - name: get documents + uri: + url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/documents/" + headers: + Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' + return_content: yes + register: documents + + - name: set document index + set_fact: + index: "{{ documents.json['results'][0]['id'] }}" + + - name: verify uploaded document is avaiable + uri: + url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/documents/{{ index }}/" + headers: + Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' + return_content: yes + register: document + failed_when: "'Not found.' in document.content or content not in document.json['content']" + + - name: check if deleting uploaded document works + uri: + url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/documents/bulk_edit/" + method: POST + body_format: json + body: + documents: ["{{ index }}"] + method: delete + parameters: {} + headers: + Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' + register: delete_document + failed_when: "'OK' not in delete_document.json['result']" diff --git a/ansible/molecule/fresh/converge.yml b/ansible/molecule/fresh/converge.yml deleted file mode 100644 index eec6e6444..000000000 --- a/ansible/molecule/fresh/converge.yml +++ /dev/null @@ -1,10 +0,0 @@ ---- -- name: fresh installation - hosts: all - tasks: - - name: set github ref as version when available - set_fact: - paperlessng_version: "{{ lookup('env', 'GITHUB_REF') | default('latest', True) }}" - - name: install paperless-ng with default parameters - include_role: - name: ansible diff --git a/ansible/molecule/fresh/verify.yml b/ansible/molecule/fresh/verify.yml deleted file mode 100644 index c353783ab..000000000 --- a/ansible/molecule/fresh/verify.yml +++ /dev/null @@ -1,60 +0,0 @@ ---- -- name: Verify - hosts: all - gather_facts: false - - vars_files: - - ../../defaults/main.yml - - tasks: - - name: check if webserver is up - uri: - url: http://localhost:8000 - status_code: [200, 302] - return_content: yes - register: landingpage - failed_when: "'Sign in' not in landingpage.content" - - - name: check if document posting works - uri: - url: http://localhost:8000/api/documents/post_document/ - method: POST - body_format: form-multipart - body: - document: - content: FOO - filename: document.txt - mime_type: text/plain - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: post_document - failed_when: "'OK' not in post_document.content" - - - name: verify uploaded document has been accepted - uri: - url: http://localhost:8000/api/logs/ - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: logs - failed_when: "'Consuming document.txt' not in logs.content" - - # assumes txt consumption finished by now, might have to sleep a bit - - name: verify uploaded document has been consumed - uri: - url: http://localhost:8000/api/logs/ - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: logs - failed_when: "'document consumption finished' not in logs.content" - - - name: verify uploaded document is avaiable - uri: - url: http://localhost:8000/api/documents/1/ - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: document - failed_when: "'Not found.' in document.content or 'FOO' not in document.content" diff --git a/ansible/molecule/update/molecule.yml b/ansible/molecule/update/molecule.yml deleted file mode 100644 index 27f37ba63..000000000 --- a/ansible/molecule/update/molecule.yml +++ /dev/null @@ -1,35 +0,0 @@ ---- -dependency: - name: galaxy -driver: - name: docker -platforms: - - name: ubuntu_focal - image: jrei/systemd-ubuntu:20.04 - privileged: true - volumes: - - /sys/fs/cgroup:/sys/fs/cgroup:ro - tmpfs: - - /tmp - - /run - - /run/lock - override_command: False - # ubuntu 18.04 bionic works except that - # the default redis configuration expects IPv6 which is not enabled in docker by default - # the default Python environment is configured for ASCII instead of UTF-8 - # ubuntu 16.04 xenial only has Python 3.5 which is EOL and breaks multiple dependencies - - name: debian_buster - image: jrei/systemd-debian:10 - privileged: true - volumes: - - /sys/fs/cgroup:/sys/fs/cgroup:ro - tmpfs: - - /tmp - - /run - - /run/lock - override_command: False - # debian 9 stretch only has Python 3.5 which is EOL and breaks multiple dependencies -provisioner: - name: ansible -verifier: - name: ansible diff --git a/ansible/molecule/update/verify.yml b/ansible/molecule/update/verify.yml deleted file mode 100644 index c353783ab..000000000 --- a/ansible/molecule/update/verify.yml +++ /dev/null @@ -1,60 +0,0 @@ ---- -- name: Verify - hosts: all - gather_facts: false - - vars_files: - - ../../defaults/main.yml - - tasks: - - name: check if webserver is up - uri: - url: http://localhost:8000 - status_code: [200, 302] - return_content: yes - register: landingpage - failed_when: "'Sign in' not in landingpage.content" - - - name: check if document posting works - uri: - url: http://localhost:8000/api/documents/post_document/ - method: POST - body_format: form-multipart - body: - document: - content: FOO - filename: document.txt - mime_type: text/plain - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: post_document - failed_when: "'OK' not in post_document.content" - - - name: verify uploaded document has been accepted - uri: - url: http://localhost:8000/api/logs/ - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: logs - failed_when: "'Consuming document.txt' not in logs.content" - - # assumes txt consumption finished by now, might have to sleep a bit - - name: verify uploaded document has been consumed - uri: - url: http://localhost:8000/api/logs/ - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: logs - failed_when: "'document consumption finished' not in logs.content" - - - name: verify uploaded document is avaiable - uri: - url: http://localhost:8000/api/documents/1/ - headers: - Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' - return_content: yes - register: document - failed_when: "'Not found.' in document.content or 'FOO' not in document.content" From 6e688a5b820a226c8e7560111d93d47fc0abea78 Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sun, 24 Jan 2021 00:00:46 +0100 Subject: [PATCH 11/42] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index e6b9feace..a19de79b2 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,8 @@ Here's what you get: # Features * Performs OCR on your documents, adds selectable text to image only documents and adds tags, correspondents and document types to your documents. +* Supports PDF documents, images, plain text files, and Office documents (Word, Excel, Powerpoint, and LibreOffice equivalents). + * Office document support is optional and provided by Apache Tika (see [configuration](https://paperless-ng.readthedocs.io/en/latest/configuration.html#tika-settings)) * Paperless stores your documents plain on disk. Filenames and folders are managed by paperless and can be configured freely. * Single page application front end. Should be pretty snappy. Will be mobile friendly in the future. * Includes a dashboard that shows basic statistics and has document upload. From be2013975c170c9b8c2f36a5a4407dcfcaea1917 Mon Sep 17 00:00:00 2001 From: Fabian Koller Date: Sun, 24 Jan 2021 09:59:54 +0100 Subject: [PATCH 12/42] Simplify building from source Do not package app, instead copy directly into expected directory --- ansible/tasks/install-source.yml | 34 ++++++++------------------------ 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/ansible/tasks/install-source.yml b/ansible/tasks/install-source.yml index 04a2991b9..64fbc525b 100644 --- a/ansible/tasks/install-source.yml +++ b/ansible/tasks/install-source.yml @@ -30,25 +30,19 @@ - npm install - ./node_modules/.bin/ng build --prod -- name: clean output directory - file: - path: "{{ gitdir.path }}/dist" - state: absent - - name: create output directories file: path: "{{ item }}" state: directory with_items: - - "{{ gitdir.path }}/dist" - - "{{ gitdir.path }}/dist/paperless-ng" - - "{{ gitdir.path }}/dist/paperless-ng/scripts" + - "{{ tempdir.path }}/paperless-ng" + - "{{ tempdir.path }}/paperless-ng/scripts" - name: copy application into place copy: src: "{{ gitdir.path }}/{{ item.src }}" remote_src: yes - dest: "{{ gitdir.path }}/dist/paperless-ng/{{ item.dest | default('') }}" + dest: "{{ tempdir.path }}/paperless-ng/{{ item.dest | default('') }}" with_items: - src: CONTRIBUTING.md - src: LICENSE @@ -71,13 +65,13 @@ copy: src: "{{ item.path }}" remote_src: yes - dest: "{{ gitdir.path }}/dist/paperless-ng/scripts/" + dest: "{{ tempdir.path }}/paperless-ng/scripts/" with_items: - "{{ glob.files }}" - name: copy sources command: - cmd: "cp -r src/ dist/paperless-ng/src" + cmd: "cp -r src/ {{ tempdir.path }}/paperless-ng/src" args: chdir: "{{ gitdir.path }}" @@ -90,29 +84,17 @@ - name: compile messages command: "{{ gitdir.path }}/.venv/bin/python3 manage.py compilemessages" args: - chdir: "{{ gitdir.path }}/dist/paperless-ng/src/" + chdir: "{{ tempdir.path }}/paperless-ng/src/" - name: collect static files command: "{{ gitdir.path }}/.venv/bin/python3 manage.py collectstatic --no-input" args: - chdir: "{{ gitdir.path }}/dist/paperless-ng/src/" + chdir: "{{ tempdir.path }}/paperless-ng/src/" - name: remove pycache directories shell: find . -name __pycache__ | xargs rm -r args: - chdir: "{{ gitdir.path }}/dist/" - -- name: package app - archive: - path: "{{ gitdir.path }}/dist/" - dest: "{{ gitdir.path }}/paperless-ng-{{ paperlessng_commit }}.tar.xz" - format: xz - -- name: extract paperless-ng - unarchive: - src: "{{ gitdir.path }}/paperless-ng-{{ paperlessng_commit }}.tar.xz" - remote_src: yes - dest: "{{ tempdir.path }}" + chdir: "{{ tempdir.path }}" - name: remove temporary git directory file: From 28a2479f24ca7f1b3f1a5887ad9101ead9453d9d Mon Sep 17 00:00:00 2001 From: Reto Date: Sun, 24 Jan 2021 19:37:46 +0100 Subject: [PATCH 13/42] fix my ignorance of idempotent --- docs/setup.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/setup.rst b/docs/setup.rst index d2bd7ed0a..24d0e5604 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -95,9 +95,8 @@ should you want to contribute some code back. You need to configure and run the above mentioned components yourself. The ansible route combines benefits of both options: -the setup process is fully automated, reproducible and independent, -it includes the same sensible defaults, -and it simultaneously provides the flexibility of a bare metal installation. +the setup process is fully automated, reproducible and `idempotent `, +it includes the same sensible defaults, and it simultaneously provides the flexibility of a bare metal installation. .. _setup-docker_hub: From 37ade0c6b23acd0550eb5131c030a383d5e79093 Mon Sep 17 00:00:00 2001 From: Reto Date: Sun, 24 Jan 2021 20:00:09 +0100 Subject: [PATCH 14/42] fix my ignorance of idempotent --- docs/setup.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/setup.rst b/docs/setup.rst index d2bd7ed0a..73af557e7 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -88,18 +88,19 @@ You can go multiple routes to setup and run Paperless: The Docker routes are quick & easy. These are the recommended routes. This configures all the stuff from the above automatically so that it just works and uses sensible defaults for all configuration options. -Here you find a cheat-sheet for docker beginners: `CLI Basics ` +Here you find a cheat-sheet for docker beginners: `CLI Basics `_ The bare metal route is complicated to setup but makes it easier should you want to contribute some code back. You need to configure and run the above mentioned components yourself. The ansible route combines benefits of both options: -the setup process is fully automated, reproducible and independent, -it includes the same sensible defaults, -and it simultaneously provides the flexibility of a bare metal installation. +the setup process is fully automated, reproducible and `idempotent `_, +it includes the same sensible defaults, and it simultaneously provides the flexibility of a bare metal installation. .. _setup-docker_hub: +.. _CLI Basics: https://sehn.tech/post/devops-with-docker/ +.. _idempotent: https://docs.ansible.com/ansible/latest/reference_appendices/glossary.html#Idempotency Install Paperless from Docker Hub ================================= From 29ce2515eea976294e838537215316c2f6d71925 Mon Sep 17 00:00:00 2001 From: Fabian Koller Date: Sun, 24 Jan 2021 10:05:42 +0100 Subject: [PATCH 15/42] Build source package in paperlessng_directory Avoids permission problems in /tmp --- ansible/molecule/default/verify.yml | 5 +- ansible/tasks/install-source.yml | 165 +++++++++++++++------------- ansible/tasks/main.yml | 11 +- 3 files changed, 101 insertions(+), 80 deletions(-) diff --git a/ansible/molecule/default/verify.yml b/ansible/molecule/default/verify.yml index 01dc43192..1b3a436ca 100644 --- a/ansible/molecule/default/verify.yml +++ b/ansible/molecule/default/verify.yml @@ -45,7 +45,10 @@ register: logs failed_when: "('Consuming ' + filename + '.txt') not in logs.content" - # assumes txt consumption finished by now, might have to sleep a bit + - name: sleep 5 seconds + pause: + seconds: 5 + - name: verify uploaded document has been consumed uri: url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/" diff --git a/ansible/tasks/install-source.yml b/ansible/tasks/install-source.yml index 64fbc525b..ab8fbfef7 100644 --- a/ansible/tasks/install-source.yml +++ b/ansible/tasks/install-source.yml @@ -1,5 +1,4 @@ --- -# https://github.com/jonaswinkler/paperless-ng/blob/dev/.github/workflows/ci.yml - name: install dev dependencies apt: pkg: @@ -7,96 +6,106 @@ - npm - gettext -- name: create temporary git directory - tempfile: - state: directory - register: gitdir - -- name: pull paperless-ng - git: - repo: https://github.com/jonaswinkler/paperless-ng.git - dest: "{{ gitdir.path }}" - version: "{{ paperlessng_version }}" - refspec: "+refs/pull/*:refs/pull/*" - -- name: compile frontend - command: - cmd: "{{ item }}" - args: - chdir: "{{ gitdir.path }}/src-ui" - failed_when: false - with_items: - - npm install -g @angular/cli - - npm install - - ./node_modules/.bin/ng build --prod - - name: create output directories file: path: "{{ item }}" state: directory + owner: "{{ paperlessng_system_user }}" + group: "{{ paperlessng_system_group }}" + mode: "750" with_items: - "{{ tempdir.path }}/paperless-ng" - "{{ tempdir.path }}/paperless-ng/scripts" -- name: copy application into place - copy: - src: "{{ gitdir.path }}/{{ item.src }}" - remote_src: yes - dest: "{{ tempdir.path }}/paperless-ng/{{ item.dest | default('') }}" - with_items: - - src: CONTRIBUTING.md - - src: LICENSE - - src: Pipfile - - src: Pipfile.lock - - src: README.md - - src: requirements.txt - - src: paperless.conf.example - dest: "paperless.conf" +- block: + - name: create temporary git directory + tempfile: + state: directory + path: "{{ paperlessng_directory }}" + register: gitdir -- name: glob all scripts - find: - paths: ["{{ gitdir.path }}/scripts/"] - patterns: - - "*.service" - - "*.sh" - register: glob + - name: pull paperless-ng + git: + repo: https://github.com/jonaswinkler/paperless-ng.git + dest: "{{ gitdir.path }}" + version: "{{ paperlessng_version }}" + refspec: "+refs/pull/*:refs/pull/*" -- name: copy scripts - copy: - src: "{{ item.path }}" - remote_src: yes - dest: "{{ tempdir.path }}/paperless-ng/scripts/" - with_items: - - "{{ glob.files }}" + - name: compile frontend + command: + cmd: "{{ item }}" + args: + chdir: "{{ gitdir.path }}/src-ui" + failed_when: false + with_items: + - npm install -g @angular/cli + - npm install + - ./node_modules/.bin/ng build --prod -- name: copy sources - command: - cmd: "cp -r src/ {{ tempdir.path }}/paperless-ng/src" - args: - chdir: "{{ gitdir.path }}" + - name: copy application into place + copy: + src: "{{ gitdir.path }}/{{ item.src }}" + remote_src: yes + dest: "{{ tempdir.path }}/paperless-ng/{{ item.dest | default('') }}" + with_items: + - src: CONTRIBUTING.md + - src: LICENSE + - src: Pipfile + - src: Pipfile.lock + - src: README.md + - src: requirements.txt + - src: paperless.conf.example + dest: "paperless.conf" -- name: install paperlessng requirements - pip: - requirements: "{{ gitdir.path }}/requirements.txt" - virtualenv: "{{ gitdir.path }}/.venv/" - extra_args: --upgrade + - name: glob all scripts + find: + paths: ["{{ gitdir.path }}/scripts/"] + patterns: + - "*.service" + - "*.sh" + register: glob -- name: compile messages - command: "{{ gitdir.path }}/.venv/bin/python3 manage.py compilemessages" - args: - chdir: "{{ tempdir.path }}/paperless-ng/src/" + - name: copy scripts + copy: + src: "{{ item.path }}" + remote_src: yes + dest: "{{ tempdir.path }}/paperless-ng/scripts/" + with_items: + - "{{ glob.files }}" -- name: collect static files - command: "{{ gitdir.path }}/.venv/bin/python3 manage.py collectstatic --no-input" - args: - chdir: "{{ tempdir.path }}/paperless-ng/src/" + - name: copy sources + command: + cmd: "cp -r src/ {{ tempdir.path }}/paperless-ng/src" + args: + chdir: "{{ gitdir.path }}" -- name: remove pycache directories - shell: find . -name __pycache__ | xargs rm -r - args: - chdir: "{{ tempdir.path }}" + - name: create paperlessng venv + command: + cmd: "python3 -m virtualenv {{ gitdir.path }}/.venv/ -p /usr/bin/python3" -- name: remove temporary git directory - file: - path: "{{ gitdir.path }}" - state: absent + - name: install paperlessng requirements + command: + cmd: "{{ gitdir.path }}/.venv/bin/python3 -m pip install -r {{ gitdir.path }}/requirements.txt" + + - name: compile messages + command: "{{ gitdir.path }}/.venv/bin/python3 manage.py compilemessages" + args: + chdir: "{{ tempdir.path }}/paperless-ng/src/" + + - name: collect static files + command: "{{ gitdir.path }}/.venv/bin/python3 manage.py collectstatic --no-input" + args: + chdir: "{{ tempdir.path }}/paperless-ng/src/" + + - name: remove pycache directories + shell: find . -name __pycache__ | xargs rm -r + args: + chdir: "{{ tempdir.path }}" + + - name: remove temporary git directory + file: + path: "{{ gitdir.path }}" + state: absent + + become: yes + become_user: "{{ paperlessng_system_user }}" diff --git a/ansible/tasks/main.yml b/ansible/tasks/main.yml index 5ad30dfcf..f45747cb6 100644 --- a/ansible/tasks/main.yml +++ b/ansible/tasks/main.yml @@ -190,9 +190,19 @@ when: update_installation - block: + - name: create paperless-ng directory and set permissions + file: + path: "{{ paperlessng_directory }}" + state: directory + owner: "{{ paperlessng_system_user }}" + group: "{{ paperlessng_system_group }}" + mode: "750" - name: create temporary directory + become: yes + become_user: "{{ paperlessng_system_user }}" tempfile: state: directory + path: "{{ paperlessng_directory }}" register: tempdir - name: check if version is available as release archive uri: @@ -238,7 +248,6 @@ group: "{{ paperlessng_system_group }}" mode: "750" with_items: - - "{{ paperlessng_directory }}" - "{{ paperlessng_consumption_dir }}" - "{{ paperlessng_data_dir }}" - "{{ paperlessng_media_root }}" From 944aaf54383bfa648f3cbeb6d9ffb4eb9528715c Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Tue, 26 Jan 2021 19:32:09 +0100 Subject: [PATCH 16/42] documentation #444 --- docs/setup.rst | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/setup.rst b/docs/setup.rst index 73af557e7..574631a58 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -98,10 +98,11 @@ The ansible route combines benefits of both options: the setup process is fully automated, reproducible and `idempotent `_, it includes the same sensible defaults, and it simultaneously provides the flexibility of a bare metal installation. -.. _setup-docker_hub: .. _CLI Basics: https://sehn.tech/post/devops-with-docker/ .. _idempotent: https://docs.ansible.com/ansible/latest/reference_appendices/glossary.html#Idempotency +.. _setup-docker_hub: + Install Paperless from Docker Hub ================================= @@ -131,7 +132,7 @@ Install Paperless from Docker Hub See the `Docker installation guide`_ on how to install the current version of Docker for your operating system or Linux distribution of choice. To get the latest version of docker-compose, follow the - `docker-compose installation guide`_if your package repository doesn't + `docker-compose installation guide`_ if your package repository doesn't include it. .. _Docker installation guide: https://docs.docker.com/engine/installation/ @@ -348,7 +349,8 @@ writing. Windows is not and will never be supported. .. warning:: This is a development server which should not be used in - production. + production. It is not audited for security and performance + is inferior to production ready web servers. .. hint:: @@ -363,6 +365,11 @@ writing. Windows is not and will never be supported. ``consumer`` script to watch the input folder, and the ``scheduler`` script to run tasks such as email checking and document consumption. + You may need to adjust the path to the ``gunicorn`` executable. This + will be installed as part of the python dependencies, and is either located + in the ``bin`` folder of your virtual environment, or in ``~/.local/bin/`` if + no virtual environment is used. + These services rely on redis and optionally the database server, but don't need to be started in any particular order. The example files depend on redis being started. If you use a database server, you should From d126ecfa4ddf8c82038c3765509c16b090644211 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Tue, 26 Jan 2021 19:42:06 +0100 Subject: [PATCH 17/42] troubleshooting for #346 --- docs/troubleshooting.rst | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index b8343710f..85a9e336a 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -30,9 +30,9 @@ Consumer fails to pickup any new files ###################################### If you notice that the consumer will only pickup files in the consumption -directory at startup, but won't find any other files added later, check out -the configuration file and enable filesystem polling with the setting -``PAPERLESS_CONSUMER_POLLING``. +directory at startup, but won't find any other files added later, you will need to +enable filesystem polling with the configuration option +``PAPERLESS_CONSUMER_POLLING``, see :ref:`here `. This will disable listening to filesystem changes with inotify and paperless will manually check the consumption directory for changes instead. @@ -64,6 +64,22 @@ This may have two reasons: with Inbox tags. Verify that there are documents in your archive without inbox tags. The algorithm will only learn from documents not in your inbox. +UserWarning in sklearn on every single document +############################################### + +You may encounter warnings like this: + +.. code:: + + /usr/local/lib/python3.7/site-packages/sklearn/base.py:315: + UserWarning: Trying to unpickle estimator CountVectorizer from version 0.23.2 when using version 0.24.0. + This might lead to breaking code or invalid results. Use at your own risk. + +This happens when certain dependencies of paperless that are responsible for the auto matching algorithm are +updated. After updating these, your current training data *might* not be compatible anymore. This can be ignored +in most cases. If you want to get rid of the warning or actually experience issues with automatic matching, delete +the file ``classification_model.pickle`` in the data directory and let paperless recreate it. + Permission denied errors in the consumption directory ##################################################### From 20732794736674b7a4fe2ba4c782cab5125a3509 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Tue, 26 Jan 2021 19:53:56 +0100 Subject: [PATCH 18/42] documentation for #441 --- docs/troubleshooting.rst | 44 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index 85a9e336a..a5d020d21 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -94,3 +94,47 @@ Ensure that ``USERMAP_UID`` and ``USERMAP_GID`` are set to the user id and group different from ``1000``. See :ref:`setup-docker_hub`. Also ensure that you are able to read and write to the consumption directory on the host. + +Web-UI stuck at "Loading..." +############################ + +This might have multiple reasons. + + +1. If you built the docker image yourself or deployed using the bare metal route, + make sure that there are files in ``/static/frontend//``. + If there are no files, make sure that you executed ``collectstatic`` successfully, either + manually or as part of the docker image build. + + If the front end is still missing, make sure that the front end is compiled (files present in + ``src/documents/static/frontend``). If it is not, you need to compile the front end yourself + or download the release archive instead of cloning the repository. + +2. Check the output of the web server. You might see errors like this: + + + .. code:: + + [2021-01-25 10:08:04 +0000] [40] [ERROR] Socket error processing request. + Traceback (most recent call last): + File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/sync.py", line 134, in handle + self.handle_request(listener, req, client, addr) + File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/sync.py", line 190, in handle_request + util.reraise(*sys.exc_info()) + File "/usr/local/lib/python3.7/site-packages/gunicorn/util.py", line 625, in reraise + raise value + File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/sync.py", line 178, in handle_request + resp.write_file(respiter) + File "/usr/local/lib/python3.7/site-packages/gunicorn/http/wsgi.py", line 396, in write_file + if not self.sendfile(respiter): + File "/usr/local/lib/python3.7/site-packages/gunicorn/http/wsgi.py", line 386, in sendfile + sent += os.sendfile(sockno, fileno, offset + sent, count) + OSError: [Errno 22] Invalid argument + + To fix this issue, add + + .. code:: + + SENDFILE=0 + + to your `docker-compose.env` file. \ No newline at end of file From 600c13204ee3a1ae7bdb66b325be741ab6e58f53 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Tue, 26 Jan 2021 20:13:29 +0100 Subject: [PATCH 19/42] added FAQ for #449 --- docs/troubleshooting.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index a5d020d21..7f819a772 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -37,6 +37,15 @@ enable filesystem polling with the configuration option This will disable listening to filesystem changes with inotify and paperless will manually check the consumption directory for changes instead. + +Paperless always redirects to /admin +#################################### + +You probably had the old paperless installed at some point. Paperless installed +a permanent redirect to /admin in your browser, and you need to clear your +browsing data / cache to fix that. + + Operation not permitted ####################### From 1483f450ad99a664ea09e10662101fc4c01ad38e Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Tue, 26 Jan 2021 20:32:35 +0100 Subject: [PATCH 20/42] Update issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 42 +++++++++++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 18 ++++++++++ .github/ISSUE_TEMPLATE/other.md | 14 ++++++++ 3 files changed, 74 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/other.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..b72d397c4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,42 @@ +--- +name: Bug report +about: Something is not working +title: "[BUG] Concise description of the issue" +labels: '' +assignees: '' + +--- + + + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Webserver logs** +``` +If available, post any logs from the web server related to your issue. +``` + +**Relevant information** + - Host OS of the machine running paperless: [e.g. Archlinux / Ubuntu 20.04] + - Browser [e.g. chrome, safari] + - Version [e.g. 1.0.0] + - Installation method: [docker / bare metal] + - Any configuration changes you made in `docker-compose.yml`, `docker-compose.env` or `paperless.conf`. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 000000000..21d474b36 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,18 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: "[Feature Request] Consice and clear description of your feature request" +labels: '' +assignees: '' + +--- + + + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. diff --git a/.github/ISSUE_TEMPLATE/other.md b/.github/ISSUE_TEMPLATE/other.md new file mode 100644 index 000000000..aae462edf --- /dev/null +++ b/.github/ISSUE_TEMPLATE/other.md @@ -0,0 +1,14 @@ +--- +name: Other +about: Anything that is not a feature request or bug. +title: '' +labels: '' +assignees: '' + +--- + + From 0b07c3bd8f77487abd18c5c049f2421729aeba3e Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Tue, 26 Jan 2021 20:37:36 +0100 Subject: [PATCH 21/42] more documentation --- docs/troubleshooting.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index 7f819a772..ec430b477 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -86,7 +86,9 @@ You may encounter warnings like this: This happens when certain dependencies of paperless that are responsible for the auto matching algorithm are updated. After updating these, your current training data *might* not be compatible anymore. This can be ignored -in most cases. If you want to get rid of the warning or actually experience issues with automatic matching, delete +in most cases. This warning will disappear automatically when paperless updates the training data. + + f you want to get rid of the warning or actually experience issues with automatic matching, delete the file ``classification_model.pickle`` in the data directory and let paperless recreate it. Permission denied errors in the consumption directory From edfebe18a25c8f10b62e22cc1310a09a7d15f28a Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Tue, 26 Jan 2021 20:48:32 +0100 Subject: [PATCH 22/42] removed all occurences of pipenv from the documentation --- docs/administration.rst | 21 +++++---------------- docs/setup.rst | 25 ++++++++++++++----------- docs/troubleshooting.rst | 2 +- 3 files changed, 20 insertions(+), 28 deletions(-) diff --git a/docs/administration.rst b/docs/administration.rst index 14b986e82..c54323e6e 100644 --- a/docs/administration.rst +++ b/docs/administration.rst @@ -121,27 +121,19 @@ After grabbing the new release and unpacking the contents, do the following: dependencies. The dependencies required are listed in the section about :ref:`bare metal installations `. -2. Update python requirements. If you use Pipenv, this is done with the following steps. +2. Update python requirements. Keep in mind to activate your virtual environment + before that, if you use one. .. code:: shell-session - $ pip install --upgrade pipenv - $ cd /path/to/paperless - $ pipenv clean - $ pipenv install - - This creates a new virtual environment (or uses your existing environment) - and installs all dependencies into it. - - You can also use the included ``requirements.txt`` file instead and create the virtual - environment yourself. This file includes exactly the same dependencies. + $ pip install -r requirements.txt 3. Migrate the database. .. code:: shell-session $ cd src - $ pipenv run python3 manage.py migrate + $ python3 manage.py migrate This might not actually do anything. Not every new paperless version comes with new database migrations. @@ -195,7 +187,7 @@ or .. code:: shell-session $ cd /path/to/paperless/src - $ pipenv run python manage.py + $ python3 manage.py depending on whether you use docker or not. @@ -462,6 +454,3 @@ Basic usage to disable encryption of your document store: .. code:: decrypt_documents [--passphrase SECR3TP4SSPHRA$E] - - -.. _Pipenv: https://pipenv.pypa.io/en/latest/ diff --git a/docs/setup.rst b/docs/setup.rst index 574631a58..40f6bb1b6 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -20,7 +20,7 @@ Paperless consists of the following components: .. code:: shell-session $ cd /path/to/paperless/src/ - $ pipenv run gunicorn -c /usr/src/paperless/gunicorn.conf.py -b 0.0.0.0:8000 paperless.wsgi + $ gunicorn -c ../gunicorn.conf.py -b 0.0.0.0:8000 paperless.wsgi or by any other means such as Apache ``mod_wsgi``. @@ -35,7 +35,7 @@ Paperless consists of the following components: .. code:: shell-session $ cd /path/to/paperless/src/ - $ pipenv run python3 manage.py document_consumer + $ python3 manage.py document_consumer .. _setup-task_processor: @@ -67,7 +67,7 @@ Paperless consists of the following components: .. code:: shell-session $ cd /path/to/paperless/src/ - $ pipenv run python3 manage.py qcluster + $ python3 manage.py qcluster * A `redis `_ message broker: This is a really lightweight service that is responsible for getting the tasks from the webserver and the consumer to the task scheduler. These run in a different @@ -255,7 +255,7 @@ writing. Windows is not and will never be supported. 1. Install dependencies. Paperless requires the following packages. * ``python3`` 3.6, 3.7, 3.8 (3.9 is untested). - * ``python3-pip``, optionally ``pipenv`` for package installation + * ``python3-pip`` * ``python3-dev`` * ``fonts-liberation`` for generating thumbnails for plain text files @@ -324,8 +324,13 @@ writing. Windows is not and will never be supported. Adjust as necessary if you configured different folders. -7. Install python requirements. Paperless comes with both Pipfiles for ``pipenv`` as well as with a ``requirements.txt``. - Both will install exactly the same requirements. It is up to you if you wish to use a virtual environment or not. +7. Install python requirements from the ``requirements.txt`` file. + It is up to you if you wish to use a virtual environment or not. + + .. code:: shell-session + + pip3 install -r requirements.txt + 8. Go to ``/opt/paperless/src``, and execute the following commands: @@ -654,14 +659,12 @@ management commands as below. This will launch the container and initialize the PostgreSQL database. - b) Without docker, open a shell in your virtual environment, switch to + b) Without docker, remember to activate any virtual environment, switch to the ``src`` directory and create the database schema: .. code:: shell-session - $ cd /path/to/paperless - $ pipenv shell - $ cd src + $ cd /path/to/paperless/src $ python3 manage.py migrate This will not copy any data yet. @@ -678,7 +681,7 @@ management commands as below. $ python3 manage.py loaddata data.json -6. Exit the shell. +6. If operating inside Docker, you may exit the shell now. .. code:: shell-session diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index ec430b477..f55d57af5 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -88,7 +88,7 @@ This happens when certain dependencies of paperless that are responsible for the updated. After updating these, your current training data *might* not be compatible anymore. This can be ignored in most cases. This warning will disappear automatically when paperless updates the training data. - f you want to get rid of the warning or actually experience issues with automatic matching, delete +If you want to get rid of the warning or actually experience issues with automatic matching, delete the file ``classification_model.pickle`` in the data directory and let paperless recreate it. Permission denied errors in the consumption directory From ab04817bea12ffe030f5d9772eb9f74c615abaa6 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Tue, 26 Jan 2021 22:10:43 +0100 Subject: [PATCH 23/42] alter defaults for workers and threads to allow more parallel tasks #446 --- docs/configuration.rst | 15 +++++++-------- src/documents/tests/test_settings.py | 4 ++-- src/paperless/settings.py | 6 ++++-- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 5edc003f6..36b124350 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -376,25 +376,24 @@ PAPERLESS_THREADS_PER_WORKER= use a higher thread per worker count. The default is a balance between the two, according to your CPU core count, - with a slight favor towards threads per worker, and leaving at least one core - free for other tasks: + with a slight favor towards threads per worker: +----------------+---------+---------+ | CPU core count | Workers | Threads | +----------------+---------+---------+ | 1 | 1 | 1 | +----------------+---------+---------+ - | 2 | 1 | 1 | + | 2 | 2 | 1 | +----------------+---------+---------+ - | 4 | 1 | 3 | + | 4 | 2 | 2 | +----------------+---------+---------+ - | 6 | 2 | 2 | + | 6 | 2 | 3 | +----------------+---------+---------+ - | 8 | 2 | 3 | + | 8 | 2 | 4 | +----------------+---------+---------+ - | 12 | 3 | 3 | + | 12 | 3 | 4 | +----------------+---------+---------+ - | 16 | 3 | 5 | + | 16 | 4 | 4 | +----------------+---------+---------+ If you only specify PAPERLESS_TASK_WORKERS, paperless will adjust diff --git a/src/documents/tests/test_settings.py b/src/documents/tests/test_settings.py index 21f29b4d9..0036daee7 100644 --- a/src/documents/tests/test_settings.py +++ b/src/documents/tests/test_settings.py @@ -20,7 +20,7 @@ class TestSettings(TestCase): self.assertEqual(default_threads, 1) def test_workers_threads(self): - for i in range(2, 64): + for i in range(1, 64): with mock.patch("paperless.settings.multiprocessing.cpu_count") as cpu_count: cpu_count.return_value = i @@ -31,4 +31,4 @@ class TestSettings(TestCase): self.assertTrue(default_workers >= 1) self.assertTrue(default_threads >= 1) - self.assertTrue(default_workers * default_threads < i, f"{i}") + self.assertTrue(default_workers * default_threads <= i, f"{i}") diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 894ecb60b..bc70cb331 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -354,8 +354,10 @@ LOGGING = { def default_task_workers(): # always leave one core open - available_cores = max(multiprocessing.cpu_count() - 1, 1) + available_cores = max(multiprocessing.cpu_count(), 1) try: + if available_cores < 4: + return available_cores return max( math.floor(math.sqrt(available_cores)), 1 @@ -376,7 +378,7 @@ Q_CLUSTER = { def default_threads_per_worker(task_workers): # always leave one core open - available_cores = max(multiprocessing.cpu_count() - 1, 1) + available_cores = max(multiprocessing.cpu_count(), 1) try: return max( math.floor(available_cores / task_workers), From 763b780aa4c16ddbb0540ba8acbd4e019c1109bd Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Wed, 27 Jan 2021 11:32:30 +0100 Subject: [PATCH 24/42] Update issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 4 ++++ .github/ISSUE_TEMPLATE/feature_request.md | 3 +++ .github/ISSUE_TEMPLATE/other.md | 6 +++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index b72d397c4..0f0ac44e2 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -11,6 +11,10 @@ assignees: '' => Before opening an issue, please check the documentation and see if it helps you resolve your issue: https://paperless-ng.readthedocs.io/en/latest/troubleshooting.html => Please also make sure that you followed the installation instructions. => Please search the issues and look for similar issues before opening a bug report. + +=> If you encounter issues while installing of configuring Paperless-ng, please post that in the "Support" section of the discussions. Remember that Paperless successfully runs on a variety of different systems. If paperless does not start, it's probably an issue with your system, and not an issue of paperless. + +=> Don't remove the [BUG] prefix from the title. --> **Describe the bug** diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 21d474b36..47c36c23d 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -9,6 +9,9 @@ assignees: '' **Is your feature request related to a problem? Please describe.** diff --git a/.github/ISSUE_TEMPLATE/other.md b/.github/ISSUE_TEMPLATE/other.md index aae462edf..ef394f7f9 100644 --- a/.github/ISSUE_TEMPLATE/other.md +++ b/.github/ISSUE_TEMPLATE/other.md @@ -1,7 +1,7 @@ --- name: Other about: Anything that is not a feature request or bug. -title: '' +title: "[Other] Title of your issue" labels: '' assignees: '' @@ -11,4 +11,8 @@ assignees: '' => Discussions, Feedback and other suggestions belong in the "Disussion" section and not on the issue tracker. +=> If you encounter issues while installing of configuring Paperless-ng, please post that in the "Support" section of the discussions. Remember that Paperless successfully runs on a variety of different systems. If paperless does not start, it's probably is an issue with your system, and not an issue of paperless. + +=> Don't remove the [Other] prefix from the title. + --> From 682cf33c78678424d7b640d79f26f1cad62e89df Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Wed, 27 Jan 2021 12:05:30 +0100 Subject: [PATCH 25/42] clarification for some steps in the migration guide --- docs/setup.rst | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/docs/setup.rst b/docs/setup.rst index 40f6bb1b6..afb3784d6 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -539,7 +539,10 @@ Migration to paperless-ng At its core, paperless-ng is still paperless and fully compatible. However, some things have changed under the hood, so you need to adapt your setup depending on -how you installed paperless. The important things to keep in mind are as follows. +how you installed paperless. + +This setup describes how to update an existing paperless Docker installation. +The important things to keep in mind are as follows: * Read the :ref:`changelog ` and take note of breaking changes. * You should decide if you want to stick with SQLite or want to migrate your database @@ -574,11 +577,18 @@ Migration to paperless-ng is then performed in a few simple steps: .. caution:: - Paperless includes a ``.env`` file. This will set the - project name for docker compose to ``paperless`` so that paperless-ng will - automatically reuse your existing paperless volumes. When you start it, it - will migrate your existing data. After that, your old paperless installation - will be incompatible with the migrated volumes. + Paperless-ng includes a ``.env`` file. This will set the + project name for docker compose to ``paperless``, which will also define the name + of the volumes by paperless-ng. However, if you experience that paperless-ng + is not using your old paperless volumes, verify the names of your volumes with + + .. code:: shell-session + + $ docker volume ls | grep _data + + and adjust the project name in the ``.env`` file so that it matches the name + of the volumes before the ``_data`` part. + 4. Download the ``docker-compose.sqlite.yml`` file to ``docker-compose.yml``. If you want to switch to PostgreSQL, do that after you migrated your existing From f872221c49c092c323ed3ebaf02b6f3c467f256b Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Thu, 28 Jan 2021 22:22:25 +0100 Subject: [PATCH 26/42] filesystem permission checks now issue warnings instead of errors, improves NFS compatibility --- src/paperless/checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paperless/checks.py b/src/paperless/checks.py index 1329ad679..df4d45e38 100644 --- a/src/paperless/checks.py +++ b/src/paperless/checks.py @@ -22,7 +22,7 @@ def path_check(var, directory): exists_hint.format(directory) )) elif not os.access(directory, os.W_OK | os.X_OK): - messages.append(Error( + messages.append(Warning( writeable_message.format(var), writeable_hint.format(directory) )) From 107cc2ca208806b493c15eee545afad340abe2b2 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Thu, 28 Jan 2021 22:38:12 +0100 Subject: [PATCH 27/42] add missing quotes --- src-ui/messages.xlf | 14 +++++++------- .../bulk-editor/bulk-editor.component.ts | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src-ui/messages.xlf b/src-ui/messages.xlf index 6175cf700..8bc640c33 100644 --- a/src-ui/messages.xlf +++ b/src-ui/messages.xlf @@ -1124,6 +1124,13 @@ 73 + + "" + + src/app/components/document-list/bulk-editor/bulk-editor.component.ts + 112 + + "" and "" @@ -1132,13 +1139,6 @@ This is for messages like 'modify "tag1" and "tag2"' - - "" - - src/app/components/document-list/bulk-editor/bulk-editor.component.ts - 116 - - , diff --git a/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.ts b/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.ts index 6b2598fe8..04fc2a978 100644 --- a/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.ts +++ b/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.ts @@ -109,7 +109,7 @@ export class BulkEditorComponent { if (items.length == 0) { return "" } else if (items.length == 1) { - return items[0].name + return $localize`"${items[0].name}"` } else if (items.length == 2) { return $localize`:This is for messages like 'modify "tag1" and "tag2"':"${items[0].name}" and "${items[1].name}"` } else { From 05866da04bdcde28dd6eb007117acf31be3c226e Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Fri, 29 Jan 2021 12:54:54 +0100 Subject: [PATCH 28/42] remove dead code --- src/documents/views.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/documents/views.py b/src/documents/views.py index b99bf11c7..d6a894db6 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -132,10 +132,6 @@ class DocumentTypeViewSet(ModelViewSet): ordering_fields = ("name", "matching_algorithm", "match", "document_count") -class BulkEditForm(object): - pass - - class DocumentViewSet(RetrieveModelMixin, UpdateModelMixin, DestroyModelMixin, From 439f06ccda6a34df34042fc2ff43e88a7b42a0f0 Mon Sep 17 00:00:00 2001 From: "transifex-integration[bot]" <43880903+transifex-integration[bot]@users.noreply.github.com> Date: Fri, 29 Jan 2021 12:42:54 +0000 Subject: [PATCH 29/42] Translate /src-ui/messages.xlf in fr translation completed for the source file '/src-ui/messages.xlf' on the 'fr' language. --- src-ui/src/locale/messages.fr.xlf | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src-ui/src/locale/messages.fr.xlf b/src-ui/src/locale/messages.fr.xlf index 4f09eab72..b5f602c7f 100644 --- a/src-ui/src/locale/messages.fr.xlf +++ b/src-ui/src/locale/messages.fr.xlf @@ -1283,6 +1283,14 @@ 73 + + "" + "" + + src/app/components/document-list/bulk-editor/bulk-editor.component.ts + 112 + + "" and "" "" et "" @@ -1292,14 +1300,6 @@ This is for messages like 'modify "tag1" and "tag2"' - - "" - "" - - src/app/components/document-list/bulk-editor/bulk-editor.component.ts - 116 - - , , From ddcc0883eb4f2e04285569e5ffea575eec3f12ba Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Fri, 29 Jan 2021 16:45:23 +0100 Subject: [PATCH 30/42] add support for suggestions --- src/documents/tests/test_api.py | 28 +++++++++++++ src/documents/views.py | 73 +++++++++++++++++++++++---------- 2 files changed, 79 insertions(+), 22 deletions(-) diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py index 2b332a873..9e4b77189 100644 --- a/src/documents/tests/test_api.py +++ b/src/documents/tests/test_api.py @@ -590,6 +590,10 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertEqual(len(meta['original_metadata']), 0) self.assertGreater(len(meta['archive_metadata']), 0) + def test_get_metadata_invalid_doc(self): + response = self.client.get(f"/api/documents/34576/metadata/") + self.assertEqual(response.status_code, 404) + def test_get_metadata_no_archive(self): doc = Document.objects.create(title="test", filename="file.pdf", mime_type="application/pdf") @@ -605,6 +609,30 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertGreater(len(meta['original_metadata']), 0) self.assertIsNone(meta['archive_metadata']) + def test_get_empty_suggestions(self): + doc = Document.objects.create(title="test", mime_type="application/pdf") + + response = self.client.get(f"/api/documents/{doc.pk}/suggestions/") + + self.assertEqual(response.status_code, 200) + self.assertEqual(response.data, {'correspondents': [], 'tags': [], 'document_types': []}) + + def test_get_suggestions_invalid_doc(self): + response = self.client.get(f"/api/documents/34676/suggestions/") + self.assertEqual(response.status_code, 404) + + @mock.patch("documents.views.match_correspondents") + @mock.patch("documents.views.match_tags") + @mock.patch("documents.views.match_document_types") + def test_get_suggestions(self, match_document_types, match_tags, match_correspondents): + doc = Document.objects.create(title="test", mime_type="application/pdf", content="this is an invoice!") + match_tags.return_value = [Tag(id=56), Tag(id=123)] + match_document_types.return_value = [DocumentType(id=23)] + match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)] + + response = self.client.get(f"/api/documents/{doc.pk}/suggestions/") + self.assertEqual(response.data, {'correspondents': [88,2], 'tags': [56,123], 'document_types': [23]}) + def test_saved_views(self): u1 = User.objects.create_user("user1") u2 = User.objects.create_user("user2") diff --git a/src/documents/views.py b/src/documents/views.py index d6a894db6..43ae2b103 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -34,6 +34,7 @@ from rest_framework.viewsets import ( import documents.index as index from paperless.db import GnuPG from paperless.views import StandardPagination +from .classifier import DocumentClassifier, IncompatibleClassifierVersionError from .filters import ( CorrespondentFilterSet, DocumentFilterSet, @@ -41,6 +42,7 @@ from .filters import ( DocumentTypeFilterSet, LogFilterSet ) +from .matching import match_correspondents, match_tags, match_document_types from .models import Correspondent, Document, Log, Tag, DocumentType, SavedView from .parsers import get_parser_class_for_mime_type from .serialisers import ( @@ -225,31 +227,58 @@ class DocumentViewSet(RetrieveModelMixin, def metadata(self, request, pk=None): try: doc = Document.objects.get(pk=pk) - - meta = { - "original_checksum": doc.checksum, - "original_size": os.stat(doc.source_path).st_size, - "original_mime_type": doc.mime_type, - "media_filename": doc.filename, - "has_archive_version": os.path.isfile(doc.archive_path), - "original_metadata": self.get_metadata( - doc.source_path, doc.mime_type) - } - - if doc.archive_checksum and os.path.isfile(doc.archive_path): - meta['archive_checksum'] = doc.archive_checksum - meta['archive_size'] = os.stat(doc.archive_path).st_size, - meta['archive_metadata'] = self.get_metadata( - doc.archive_path, "application/pdf") - else: - meta['archive_checksum'] = None - meta['archive_size'] = None - meta['archive_metadata'] = None - - return Response(meta) except Document.DoesNotExist: raise Http404() + meta = { + "original_checksum": doc.checksum, + "original_size": os.stat(doc.source_path).st_size, + "original_mime_type": doc.mime_type, + "media_filename": doc.filename, + "has_archive_version": os.path.isfile(doc.archive_path), + "original_metadata": self.get_metadata( + doc.source_path, doc.mime_type) + } + + if doc.archive_checksum and os.path.isfile(doc.archive_path): + meta['archive_checksum'] = doc.archive_checksum + meta['archive_size'] = os.stat(doc.archive_path).st_size, + meta['archive_metadata'] = self.get_metadata( + doc.archive_path, "application/pdf") + else: + meta['archive_checksum'] = None + meta['archive_size'] = None + meta['archive_metadata'] = None + + return Response(meta) + + @action(methods=['get'], detail=True) + def suggestions(self, request, pk=None): + try: + doc = Document.objects.get(pk=pk) + except Document.DoesNotExist: + raise Http404() + + try: + classifier = DocumentClassifier() + classifier.reload() + except (OSError, EOFError, IncompatibleClassifierVersionError) as e: + logging.getLogger(__name__).warning( + "Cannot load classifier: Not providing auto matching " + "suggestions" + ) + classifier = None + + return Response({ + "correspondents": [ + c.id for c in match_correspondents(doc, classifier) + ], + "tags": [t.id for t in match_tags(doc, classifier)], + "document_types": [ + dt.id for dt in match_document_types(doc, classifier) + ] + }) + @action(methods=['get'], detail=True) def preview(self, request, pk=None): try: From d22d9a023c63f8b7c2274318456255c92821741e Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Fri, 29 Jan 2021 16:48:51 +0100 Subject: [PATCH 31/42] frontend support for suggestions #264 --- src-ui/messages.xlf | 31 ++++++++------ .../common/input/select/select.component.html | 8 ++++ .../common/input/select/select.component.ts | 13 +++++- .../common/input/tags/tags.component.html | 23 ++++++----- .../common/input/tags/tags.component.ts | 41 +++++++++++-------- .../document-detail.component.html | 8 ++-- .../document-detail.component.ts | 7 ++++ .../data/paperless-document-suggestions.ts | 9 ++++ .../src/app/services/rest/document.service.ts | 5 +++ 9 files changed, 102 insertions(+), 43 deletions(-) create mode 100644 src-ui/src/app/data/paperless-document-suggestions.ts diff --git a/src-ui/messages.xlf b/src-ui/messages.xlf index 8bc640c33..d105ae31d 100644 --- a/src-ui/messages.xlf +++ b/src-ui/messages.xlf @@ -146,35 +146,35 @@ Confirm delete src/app/components/document-detail/document-detail.component.ts - 192 + 199 Do you really want to delete document ""? src/app/components/document-detail/document-detail.component.ts - 193 + 200 The files for this document will be deleted permanently. This operation cannot be undone. src/app/components/document-detail/document-detail.component.ts - 194 + 201 Delete document src/app/components/document-detail/document-detail.component.ts - 196 + 203 Error deleting document: src/app/components/document-detail/document-detail.component.ts - 203 + 210 @@ -1288,6 +1288,13 @@ 27 + + Suggested: + + src/app/components/common/input/select/select.component.html + 26 + + Save current view @@ -1509,49 +1516,49 @@ ASN src/app/services/rest/document.service.ts - 16 + 17 Correspondent src/app/services/rest/document.service.ts - 17 + 18 Title src/app/services/rest/document.service.ts - 18 + 19 Document type src/app/services/rest/document.service.ts - 19 + 20 Created src/app/services/rest/document.service.ts - 20 + 21 Added src/app/services/rest/document.service.ts - 21 + 22 Modified src/app/services/rest/document.service.ts - 22 + 23 diff --git a/src-ui/src/app/components/common/input/select/select.component.html b/src-ui/src/app/components/common/input/select/select.component.html index aa500d0d1..973b09a55 100644 --- a/src-ui/src/app/components/common/input/select/select.component.html +++ b/src-ui/src/app/components/common/input/select/select.component.html @@ -22,4 +22,12 @@ {{hint}} + + Suggested:  + + {{s.name}}  + + + + diff --git a/src-ui/src/app/components/common/input/select/select.component.ts b/src-ui/src/app/components/common/input/select/select.component.ts index 18f30cf6e..e02aaab72 100644 --- a/src-ui/src/app/components/common/input/select/select.component.ts +++ b/src-ui/src/app/components/common/input/select/select.component.ts @@ -30,11 +30,22 @@ export class SelectComponent extends AbstractInputComponent { @Input() allowNull: boolean = false + @Input() + suggestions: number[] + @Output() createNew = new EventEmitter() - + showPlusButton(): boolean { return this.createNew.observers.length > 0 } + getSuggestions() { + if (this.suggestions && this.items) { + return this.suggestions.filter(id => id != this.value).map(id => this.items.find(item => item.id == id)) + } else { + return [] + } + } + } diff --git a/src-ui/src/app/components/common/input/tags/tags.component.html b/src-ui/src/app/components/common/input/tags/tags.component.html index c9a0c96d6..22a7e640a 100644 --- a/src-ui/src/app/components/common/input/tags/tags.component.html +++ b/src-ui/src/app/components/common/input/tags/tags.component.html @@ -2,30 +2,25 @@
- + (change)="onChange(value)" + (blur)="onTouched()"> - +
-
- - - -
- +
@@ -39,5 +34,13 @@
{{hint}} + + Suggested:  + + {{tag.name}}  + + + + diff --git a/src-ui/src/app/components/common/input/tags/tags.component.ts b/src-ui/src/app/components/common/input/tags/tags.component.ts index 5501ac5a6..f77d0570d 100644 --- a/src-ui/src/app/components/common/input/tags/tags.component.ts +++ b/src-ui/src/app/components/common/input/tags/tags.component.ts @@ -26,9 +26,6 @@ export class TagsComponent implements OnInit, ControlValueAccessor { writeValue(newValue: number[]): void { this.value = newValue - if (this.tags) { - this.displayValue = newValue - } } registerOnChange(fn: any): void { this.onChange = fn; @@ -43,7 +40,6 @@ export class TagsComponent implements OnInit, ControlValueAccessor { ngOnInit(): void { this.tagService.listAll().subscribe(result => { this.tags = result.results - this.displayValue = this.value }) } @@ -53,23 +49,28 @@ export class TagsComponent implements OnInit, ControlValueAccessor { @Input() hint - value: number[] + @Input() + suggestions: number[] - displayValue: number[] = [] + value: number[] tags: PaperlessTag[] getTag(id) { - return this.tags.find(tag => tag.id == id) + if (this.tags) { + return this.tags.find(tag => tag.id == id) + } else { + return null + } } removeTag(id) { - let index = this.displayValue.indexOf(id) + let index = this.value.indexOf(id) if (index > -1) { - let oldValue = this.displayValue + let oldValue = this.value oldValue.splice(index, 1) - this.displayValue = [...oldValue] - this.onChange(this.displayValue) + this.value = [...oldValue] + this.onChange(this.value) } } @@ -79,15 +80,23 @@ export class TagsComponent implements OnInit, ControlValueAccessor { modal.componentInstance.success.subscribe(newTag => { this.tagService.listAll().subscribe(tags => { this.tags = tags.results - this.displayValue = [...this.displayValue, newTag.id] - this.onChange(this.displayValue) + this.value = [...this.value, newTag.id] + this.onChange(this.value) }) }) } - ngSelectChange() { - this.value = this.displayValue - this.onChange(this.displayValue) + getSuggestions() { + if (this.suggestions && this.tags) { + return this.suggestions.filter(id => !this.value.includes(id)).map(id => this.tags.find(tag => tag.id == id)) + } else { + return [] + } + } + + addTag(id) { + this.value = [...this.value, id] + this.onChange(this.value) } } diff --git a/src-ui/src/app/components/document-detail/document-detail.component.html b/src-ui/src/app/components/document-detail/document-detail.component.html index 639b9e260..2814a1242 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.html +++ b/src-ui/src/app/components/document-detail/document-detail.component.html @@ -60,10 +60,10 @@ + (createNew)="createCorrespondent()" [suggestions]="suggestions?.correspondents"> - + (createNew)="createDocumentType()" [suggestions]="suggestions?.document_types"> + @@ -145,6 +145,6 @@ - + diff --git a/src-ui/src/app/components/document-detail/document-detail.component.ts b/src-ui/src/app/components/document-detail/document-detail.component.ts index aa2308eac..a7cce715e 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.ts +++ b/src-ui/src/app/components/document-detail/document-detail.component.ts @@ -19,6 +19,7 @@ import { PDFDocumentProxy } from 'ng2-pdf-viewer'; import { ToastService } from 'src/app/services/toast.service'; import { TextComponent } from '../common/input/text/text.component'; import { SettingsService, SETTINGS_KEYS } from 'src/app/services/settings.service'; +import { PaperlessDocumentSuggestions } from 'src/app/data/paperless-document-suggestions'; @Component({ selector: 'app-document-detail', @@ -40,6 +41,8 @@ export class DocumentDetailComponent implements OnInit { documentId: number document: PaperlessDocument metadata: PaperlessDocumentMetadata + suggestions: PaperlessDocumentSuggestions + title: string previewUrl: string downloadUrl: string @@ -95,6 +98,7 @@ export class DocumentDetailComponent implements OnInit { this.previewUrl = this.documentsService.getPreviewUrl(this.documentId) this.downloadUrl = this.documentsService.getDownloadUrl(this.documentId) this.downloadOriginalUrl = this.documentsService.getDownloadUrl(this.documentId, true) + this.suggestions = null if (this.openDocumentService.getOpenDocument(this.documentId)) { this.updateComponent(this.openDocumentService.getOpenDocument(this.documentId)) } else { @@ -112,6 +116,9 @@ export class DocumentDetailComponent implements OnInit { this.documentsService.getMetadata(doc.id).subscribe(result => { this.metadata = result }) + this.documentsService.getSuggestions(doc.id).subscribe(result => { + this.suggestions = result + }) this.title = this.documentTitlePipe.transform(doc.title) this.documentForm.patchValue(doc) } diff --git a/src-ui/src/app/data/paperless-document-suggestions.ts b/src-ui/src/app/data/paperless-document-suggestions.ts new file mode 100644 index 000000000..71459eff2 --- /dev/null +++ b/src-ui/src/app/data/paperless-document-suggestions.ts @@ -0,0 +1,9 @@ +export interface PaperlessDocumentSuggestions { + + tags?: number[] + + correspondents?: number[] + + document_types?: number[] + +} \ No newline at end of file diff --git a/src-ui/src/app/services/rest/document.service.ts b/src-ui/src/app/services/rest/document.service.ts index dd2c32fa8..19b18cfeb 100644 --- a/src-ui/src/app/services/rest/document.service.ts +++ b/src-ui/src/app/services/rest/document.service.ts @@ -11,6 +11,7 @@ import { CorrespondentService } from './correspondent.service'; import { DocumentTypeService } from './document-type.service'; import { TagService } from './tag.service'; import { FILTER_RULE_TYPES } from 'src/app/data/filter-rule-type'; +import { PaperlessDocumentSuggestions } from 'src/app/data/paperless-document-suggestions'; export const DOCUMENT_SORT_FIELDS = [ { field: 'archive_serial_number', name: $localize`ASN` }, @@ -129,4 +130,8 @@ export class DocumentService extends AbstractPaperlessService return this.http.post(this.getResourceUrl(null, 'selection_data'), {"documents": ids}) } + getSuggestions(id: number): Observable { + return this.http.get(this.getResourceUrl(id, 'suggestions')) + } + } From a40e4fe3bc1f0fd9868f7ee40d6c319e303a99dd Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Fri, 29 Jan 2021 16:57:00 +0100 Subject: [PATCH 32/42] update messages --- src-ui/messages.xlf | 4 ++-- .../app/components/common/input/select/select.component.html | 2 +- .../src/app/components/common/input/tags/tags.component.html | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src-ui/messages.xlf b/src-ui/messages.xlf index d105ae31d..0ad552100 100644 --- a/src-ui/messages.xlf +++ b/src-ui/messages.xlf @@ -1288,8 +1288,8 @@ 27
- - Suggested: + + Suggestions: src/app/components/common/input/select/select.component.html 26 diff --git a/src-ui/src/app/components/common/input/select/select.component.html b/src-ui/src/app/components/common/input/select/select.component.html index 973b09a55..540429e89 100644 --- a/src-ui/src/app/components/common/input/select/select.component.html +++ b/src-ui/src/app/components/common/input/select/select.component.html @@ -23,7 +23,7 @@ {{hint}} - Suggested:  + Suggestions:  {{s.name}}  diff --git a/src-ui/src/app/components/common/input/tags/tags.component.html b/src-ui/src/app/components/common/input/tags/tags.component.html index 22a7e640a..677b9f4d1 100644 --- a/src-ui/src/app/components/common/input/tags/tags.component.html +++ b/src-ui/src/app/components/common/input/tags/tags.component.html @@ -35,7 +35,7 @@ {{hint}} - Suggested:  + Suggestions:  {{tag.name}}  From a54784da11b0b86503e6a8ebd6a6c9c85a0a8c9e Mon Sep 17 00:00:00 2001 From: "transifex-integration[bot]" <43880903+transifex-integration[bot]@users.noreply.github.com> Date: Fri, 29 Jan 2021 18:59:00 +0000 Subject: [PATCH 33/42] Translate /src-ui/messages.xlf in de translation completed for the source file '/src-ui/messages.xlf' on the 'de' language. --- src-ui/src/locale/messages.de.xlf | 54 ++++++++++++++++++------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/src-ui/src/locale/messages.de.xlf b/src-ui/src/locale/messages.de.xlf index a898e4630..b6e2708ba 100644 --- a/src-ui/src/locale/messages.de.xlf +++ b/src-ui/src/locale/messages.de.xlf @@ -147,7 +147,7 @@ Created - Erstellt + Ausgestellt src/app/components/document-list/document-list.component.html 129 @@ -166,7 +166,7 @@ Löschen bestätigen src/app/components/document-detail/document-detail.component.ts - 192 + 199 @@ -174,7 +174,7 @@ Möchten Sie das Dokument "" wirklich löschen? src/app/components/document-detail/document-detail.component.ts - 193 + 200 @@ -182,7 +182,7 @@ Die Dateien dieses Dokuments werden permanent gelöscht. Diese Aktion kann nicht rückgängig gemacht werden. src/app/components/document-detail/document-detail.component.ts - 194 + 201 @@ -190,7 +190,7 @@ Dokument löschen src/app/components/document-detail/document-detail.component.ts - 196 + 203 @@ -198,7 +198,7 @@ Fehler beim Löschen des Dokuments: src/app/components/document-detail/document-detail.component.ts - 203 + 210 @@ -307,7 +307,7 @@ Date created - Erstellt am + Ausgestellt am src/app/components/document-detail/document-detail.component.html 61 @@ -1283,6 +1283,14 @@ 73 + + "" + "" + + src/app/components/document-list/bulk-editor/bulk-editor.component.ts + 112 + + "" and "" "" und "" @@ -1292,14 +1300,6 @@ This is for messages like 'modify "tag1" and "tag2"' - - "" - "" - - src/app/components/document-list/bulk-editor/bulk-editor.component.ts - 116 - - , , @@ -1470,6 +1470,14 @@ 27 + + Suggestions: + Vorschläge: + + src/app/components/common/input/select/select.component.html + 26 + + Save current view Aktuelle Ansicht speichern @@ -1723,7 +1731,7 @@ ASN src/app/services/rest/document.service.ts - 16 + 17 @@ -1731,7 +1739,7 @@ Korrespondent src/app/services/rest/document.service.ts - 17 + 18 @@ -1739,7 +1747,7 @@ Titel src/app/services/rest/document.service.ts - 18 + 19 @@ -1747,15 +1755,15 @@ Dokumenttyp src/app/services/rest/document.service.ts - 19 + 20 Created - Erstellt am + Ausgestellt am src/app/services/rest/document.service.ts - 20 + 21 @@ -1763,7 +1771,7 @@ Hinzugefügt am src/app/services/rest/document.service.ts - 21 + 22 @@ -1771,7 +1779,7 @@ Geändert am src/app/services/rest/document.service.ts - 22 + 23 From e4aa31a62d23df8533903764a55dbf291a57d67c Mon Sep 17 00:00:00 2001 From: "transifex-integration[bot]" <43880903+transifex-integration[bot]@users.noreply.github.com> Date: Fri, 29 Jan 2021 19:57:40 +0000 Subject: [PATCH 34/42] Translate /src-ui/messages.xlf in fr translation completed for the source file '/src-ui/messages.xlf' on the 'fr' language. --- src-ui/src/locale/messages.fr.xlf | 32 +++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src-ui/src/locale/messages.fr.xlf b/src-ui/src/locale/messages.fr.xlf index b5f602c7f..62dbad342 100644 --- a/src-ui/src/locale/messages.fr.xlf +++ b/src-ui/src/locale/messages.fr.xlf @@ -166,7 +166,7 @@ Confirmer la suppression src/app/components/document-detail/document-detail.component.ts - 192 + 199 @@ -174,7 +174,7 @@ Voulez-vous vraiment supprimer le document "" ? src/app/components/document-detail/document-detail.component.ts - 193 + 200 @@ -182,7 +182,7 @@ Les fichiers liés à ce document seront supprimés définitivement. Cette action est irréversible. src/app/components/document-detail/document-detail.component.ts - 194 + 201 @@ -190,7 +190,7 @@ Supprimer le document src/app/components/document-detail/document-detail.component.ts - 196 + 203 @@ -198,7 +198,7 @@ Une erreur s'est produite lors de la suppression du document : src/app/components/document-detail/document-detail.component.ts - 203 + 210 @@ -1470,6 +1470,14 @@ 27 + + Suggestions: + Suggestions : + + src/app/components/common/input/select/select.component.html + 26 + + Save current view Enregistrer la vue actuelle @@ -1723,7 +1731,7 @@ NSA src/app/services/rest/document.service.ts - 16 + 17 @@ -1731,7 +1739,7 @@ Correspondant src/app/services/rest/document.service.ts - 17 + 18 @@ -1739,7 +1747,7 @@ Titre src/app/services/rest/document.service.ts - 18 + 19 @@ -1747,7 +1755,7 @@ Type de document src/app/services/rest/document.service.ts - 19 + 20 @@ -1755,7 +1763,7 @@ Date de création src/app/services/rest/document.service.ts - 20 + 21 @@ -1763,7 +1771,7 @@ Date d'ajout src/app/services/rest/document.service.ts - 21 + 22 @@ -1771,7 +1779,7 @@ Date de modification src/app/services/rest/document.service.ts - 22 + 23 From 21c501de28f09c582b78dbce45b825c57feef84e Mon Sep 17 00:00:00 2001 From: Fabian Koller Date: Wed, 27 Jan 2021 07:17:46 +0100 Subject: [PATCH 35/42] force flush on temp file during consumption file.write() does not guarantee that a file handle contains anything without calling an accompanying file.flush() For typical files that are larger than the OS-file buffer, this is no problem For small files (e.g. 64 characters in a .TXT), this race condition leads to `inode/x-empty` because the file inode is created, but no content is written to it --- ansible/molecule/default/verify.yml | 6 +----- src/documents/views.py | 1 + 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/ansible/molecule/default/verify.yml b/ansible/molecule/default/verify.yml index 1b3a436ca..4d7e30f4d 100644 --- a/ansible/molecule/default/verify.yml +++ b/ansible/molecule/default/verify.yml @@ -29,9 +29,9 @@ document: content: "{{ content }}" filename: "{{ filename }}.txt" - mime_type: text/plain headers: Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}' + Content-Type: text/plain return_content: yes register: post_document failed_when: "'OK' not in post_document.content" @@ -45,10 +45,6 @@ register: logs failed_when: "('Consuming ' + filename + '.txt') not in logs.content" - - name: sleep 5 seconds - pause: - seconds: 5 - - name: verify uploaded document has been consumed uri: url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/" diff --git a/src/documents/views.py b/src/documents/views.py index b99bf11c7..7aadba36d 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -383,6 +383,7 @@ class PostDocumentView(APIView): dir=settings.SCRATCH_DIR, delete=False) as f: f.write(doc_data) + f.flush() os.utime(f.name, times=(t, t)) async_task("documents.tasks.consume_file", From 063bfc245cca390fd6745b95fac2ae6875c6e836 Mon Sep 17 00:00:00 2001 From: Fabian Koller Date: Sat, 30 Jan 2021 12:20:55 +0100 Subject: [PATCH 36/42] DEBUG - force newline in .txt --- ansible/molecule/default/verify.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/molecule/default/verify.yml b/ansible/molecule/default/verify.yml index 4d7e30f4d..fde2e8905 100644 --- a/ansible/molecule/default/verify.yml +++ b/ansible/molecule/default/verify.yml @@ -17,7 +17,7 @@ - name: generate random name and content set_fact: - content: "{{ lookup('password', '/dev/null length=64 chars=ascii_letters') }}" + content: "{{ lookup('password', '/dev/null length=64 chars=ascii_letters') }}\n" filename: "{{ lookup('password', '/dev/null length=8 chars=ascii_letters') }}" - name: check if document posting works From d9e06958dcd1dc1888f16efc614dce7feb57f667 Mon Sep 17 00:00:00 2001 From: Fabian Koller Date: Sat, 30 Jan 2021 12:50:05 +0100 Subject: [PATCH 37/42] DEBUG - force flush 2nd try --- ansible/molecule/default/verify.yml | 2 +- src/documents/views.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/molecule/default/verify.yml b/ansible/molecule/default/verify.yml index fde2e8905..4d7e30f4d 100644 --- a/ansible/molecule/default/verify.yml +++ b/ansible/molecule/default/verify.yml @@ -17,7 +17,7 @@ - name: generate random name and content set_fact: - content: "{{ lookup('password', '/dev/null length=64 chars=ascii_letters') }}\n" + content: "{{ lookup('password', '/dev/null length=64 chars=ascii_letters') }}" filename: "{{ lookup('password', '/dev/null length=8 chars=ascii_letters') }}" - name: check if document posting works diff --git a/src/documents/views.py b/src/documents/views.py index 7aadba36d..808ecb925 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -381,9 +381,9 @@ class PostDocumentView(APIView): with tempfile.NamedTemporaryFile(prefix="paperless-upload-", dir=settings.SCRATCH_DIR, + buffering=0, delete=False) as f: f.write(doc_data) - f.flush() os.utime(f.name, times=(t, t)) async_task("documents.tasks.consume_file", From 269673ce05c15ceb05cdda6907c207878ec14d4c Mon Sep 17 00:00:00 2001 From: Fabian Koller Date: Sat, 30 Jan 2021 13:58:54 +0100 Subject: [PATCH 38/42] DEBUG - use very long txt file --- ansible/molecule/default/verify.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ansible/molecule/default/verify.yml b/ansible/molecule/default/verify.yml index 4d7e30f4d..185840783 100644 --- a/ansible/molecule/default/verify.yml +++ b/ansible/molecule/default/verify.yml @@ -17,7 +17,7 @@ - name: generate random name and content set_fact: - content: "{{ lookup('password', '/dev/null length=64 chars=ascii_letters') }}" + content: "{{ lookup('password', '/dev/null length=65536 chars=ascii_letters') }}" filename: "{{ lookup('password', '/dev/null length=8 chars=ascii_letters') }}" - name: check if document posting works @@ -45,6 +45,10 @@ register: logs failed_when: "('Consuming ' + filename + '.txt') not in logs.content" + - name: sleep till consumption finished + pause: + seconds: 10 + - name: verify uploaded document has been consumed uri: url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/" From 87a18eae2d37b1c05052aafb7c4142a13ad10892 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Sat, 30 Jan 2021 14:22:23 +0100 Subject: [PATCH 39/42] centralized classifier loading, better error handling, no error messages when auto matching is not used --- src/documents/classifier.py | 28 ++++++++++ src/documents/consumer.py | 11 +--- .../management/commands/document_retagger.py | 11 +--- src/documents/tasks.py | 24 +++++---- src/documents/tests/test_classifier.py | 32 ++++++++++- src/documents/tests/test_consumer.py | 2 +- src/documents/tests/test_tasks.py | 54 +++++++++++++++++-- src/documents/views.py | 12 +---- 8 files changed, 131 insertions(+), 43 deletions(-) diff --git a/src/documents/classifier.py b/src/documents/classifier.py index 60c9abeec..41cd05412 100755 --- a/src/documents/classifier.py +++ b/src/documents/classifier.py @@ -26,6 +26,34 @@ def preprocess_content(content): return content +def load_classifier(): + if not os.path.isfile(settings.MODEL_FILE): + logger.debug( + f"Document classification model does not exist (yet), not " + f"performing automatic matching." + ) + return None + + try: + classifier = DocumentClassifier() + classifier.reload() + except (EOFError, IncompatibleClassifierVersionError) as e: + # there's something wrong with the model file. + logger.error( + f"Unrecoverable error while loading document classification model: " + f"{str(e)}, deleting model file." + ) + os.unlink(settings.MODEL_FILE) + classifier = None + except OSError as e: + logger.error( + f"Error while loading document classification model: {str(e)}" + ) + classifier = None + + return classifier + + class DocumentClassifier(object): FORMAT_VERSION = 6 diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 5418e3b59..5e76ad03a 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -11,7 +11,7 @@ from django.utils import timezone from filelock import FileLock from rest_framework.reverse import reverse -from .classifier import DocumentClassifier, IncompatibleClassifierVersionError +from .classifier import load_classifier from .file_handling import create_source_path_directory, \ generate_unique_filename from .loggers import LoggingMixin @@ -201,14 +201,7 @@ class Consumer(LoggingMixin): # reloading the classifier multiple times, since there are multiple # post-consume hooks that all require the classifier. - try: - classifier = DocumentClassifier() - classifier.reload() - except (OSError, EOFError, IncompatibleClassifierVersionError) as e: - self.log( - "warning", - f"Cannot classify documents: {e}.") - classifier = None + classifier = load_classifier() # now that everything is done, we can start to store the document # in the system. This will be a transaction and reasonably fast. diff --git a/src/documents/management/commands/document_retagger.py b/src/documents/management/commands/document_retagger.py index 0fb9782c1..b2f5d8918 100755 --- a/src/documents/management/commands/document_retagger.py +++ b/src/documents/management/commands/document_retagger.py @@ -2,8 +2,7 @@ import logging from django.core.management.base import BaseCommand -from documents.classifier import DocumentClassifier, \ - IncompatibleClassifierVersionError +from documents.classifier import load_classifier from documents.models import Document from ...mixins import Renderable from ...signals.handlers import set_correspondent, set_document_type, set_tags @@ -70,13 +69,7 @@ class Command(Renderable, BaseCommand): queryset = Document.objects.all() documents = queryset.distinct() - classifier = DocumentClassifier() - try: - classifier.reload() - except (OSError, EOFError, IncompatibleClassifierVersionError) as e: - logging.getLogger(__name__).warning( - f"Cannot classify documents: {e}.") - classifier = None + classifier = load_classifier() for document in documents: logging.getLogger(__name__).info( diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 38ff532b5..4e74d7350 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -6,10 +6,9 @@ from django.db.models.signals import post_save from whoosh.writing import AsyncWriter from documents import index, sanity_checker -from documents.classifier import DocumentClassifier, \ - IncompatibleClassifierVersionError +from documents.classifier import DocumentClassifier, load_classifier from documents.consumer import Consumer, ConsumerError -from documents.models import Document +from documents.models import Document, Tag, DocumentType, Correspondent from documents.sanity_checker import SanityFailedError @@ -30,13 +29,18 @@ def index_reindex(): def train_classifier(): - classifier = DocumentClassifier() + if (not Tag.objects.filter( + matching_algorithm=Tag.MATCH_AUTO).exists() and + not DocumentType.objects.filter( + matching_algorithm=Tag.MATCH_AUTO).exists() and + not Correspondent.objects.filter( + matching_algorithm=Tag.MATCH_AUTO).exists()): - try: - # load the classifier, since we might not have to train it again. - classifier.reload() - except (OSError, EOFError, IncompatibleClassifierVersionError): - # This is what we're going to fix here. + return + + classifier = load_classifier() + + if not classifier: classifier = DocumentClassifier() try: @@ -52,7 +56,7 @@ def train_classifier(): ) except Exception as e: - logging.getLogger(__name__).error( + logging.getLogger(__name__).warning( "Classifier error: " + str(e) ) diff --git a/src/documents/tests/test_classifier.py b/src/documents/tests/test_classifier.py index 9e999794d..43c38b691 100644 --- a/src/documents/tests/test_classifier.py +++ b/src/documents/tests/test_classifier.py @@ -1,10 +1,13 @@ +import os import tempfile +from pathlib import Path from time import sleep from unittest import mock +from django.conf import settings from django.test import TestCase, override_settings -from documents.classifier import DocumentClassifier, IncompatibleClassifierVersionError +from documents.classifier import DocumentClassifier, IncompatibleClassifierVersionError, load_classifier from documents.models import Correspondent, Document, Tag, DocumentType from documents.tests.utils import DirectoriesMixin @@ -235,3 +238,30 @@ class TestClassifier(DirectoriesMixin, TestCase): self.classifier.train() self.assertListEqual(self.classifier.predict_tags(doc1.content), [t1.pk]) self.assertListEqual(self.classifier.predict_tags(doc2.content), []) + + def test_load_classifier_not_exists(self): + self.assertFalse(os.path.exists(settings.MODEL_FILE)) + self.assertIsNone(load_classifier()) + + @mock.patch("documents.classifier.DocumentClassifier.reload") + def test_load_classifier(self, reload): + Path(settings.MODEL_FILE).touch() + self.assertIsNotNone(load_classifier()) + + @mock.patch("documents.classifier.DocumentClassifier.reload") + def test_load_classifier_incompatible_version(self, reload): + Path(settings.MODEL_FILE).touch() + self.assertTrue(os.path.exists(settings.MODEL_FILE)) + + reload.side_effect = IncompatibleClassifierVersionError() + self.assertIsNone(load_classifier()) + self.assertFalse(os.path.exists(settings.MODEL_FILE)) + + @mock.patch("documents.classifier.DocumentClassifier.reload") + def test_load_classifier_os_error(self, reload): + Path(settings.MODEL_FILE).touch() + self.assertTrue(os.path.exists(settings.MODEL_FILE)) + + reload.side_effect = OSError() + self.assertIsNone(load_classifier()) + self.assertTrue(os.path.exists(settings.MODEL_FILE)) diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index a6861a541..02d1d0004 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -420,7 +420,7 @@ class TestConsumer(DirectoriesMixin, TestCase): self.assertIsNotNone(os.path.isfile(document.title)) self.assertTrue(os.path.isfile(document.source_path)) - @mock.patch("documents.consumer.DocumentClassifier") + @mock.patch("documents.consumer.load_classifier") def testClassifyDocument(self, m): correspondent = Correspondent.objects.create(name="test") dtype = DocumentType.objects.create(name="test") diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py index 653590707..eb310d357 100644 --- a/src/documents/tests/test_tasks.py +++ b/src/documents/tests/test_tasks.py @@ -1,11 +1,12 @@ -from datetime import datetime +import os from unittest import mock +from django.conf import settings from django.test import TestCase from django.utils import timezone from documents import tasks -from documents.models import Document +from documents.models import Document, Tag, Correspondent, DocumentType from documents.sanity_checker import SanityError, SanityFailedError from documents.tests.utils import DirectoriesMixin @@ -22,8 +23,55 @@ class TestTasks(DirectoriesMixin, TestCase): tasks.index_optimize() - def test_train_classifier(self): + @mock.patch("documents.tasks.load_classifier") + def test_train_classifier_no_auto_matching(self, load_classifier): tasks.train_classifier() + load_classifier.assert_not_called() + + @mock.patch("documents.tasks.load_classifier") + def test_train_classifier_with_auto_tag(self, load_classifier): + load_classifier.return_value = None + Tag.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") + tasks.train_classifier() + load_classifier.assert_called_once() + self.assertFalse(os.path.isfile(settings.MODEL_FILE)) + + @mock.patch("documents.tasks.load_classifier") + def test_train_classifier_with_auto_type(self, load_classifier): + load_classifier.return_value = None + DocumentType.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") + tasks.train_classifier() + load_classifier.assert_called_once() + self.assertFalse(os.path.isfile(settings.MODEL_FILE)) + + @mock.patch("documents.tasks.load_classifier") + def test_train_classifier_with_auto_correspondent(self, load_classifier): + load_classifier.return_value = None + Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") + tasks.train_classifier() + load_classifier.assert_called_once() + self.assertFalse(os.path.isfile(settings.MODEL_FILE)) + + def test_train_classifier(self): + c = Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") + doc = Document.objects.create(correspondent=c, content="test", title="test") + self.assertFalse(os.path.isfile(settings.MODEL_FILE)) + + tasks.train_classifier() + self.assertTrue(os.path.isfile(settings.MODEL_FILE)) + mtime = os.stat(settings.MODEL_FILE).st_mtime + + tasks.train_classifier() + self.assertTrue(os.path.isfile(settings.MODEL_FILE)) + mtime2 = os.stat(settings.MODEL_FILE).st_mtime + self.assertEqual(mtime, mtime2) + + doc.content = "test2" + doc.save() + tasks.train_classifier() + self.assertTrue(os.path.isfile(settings.MODEL_FILE)) + mtime3 = os.stat(settings.MODEL_FILE).st_mtime + self.assertNotEqual(mtime2, mtime3) @mock.patch("documents.tasks.sanity_checker.check_sanity") def test_sanity_check(self, m): diff --git a/src/documents/views.py b/src/documents/views.py index 43ae2b103..6fbb42976 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -34,7 +34,7 @@ from rest_framework.viewsets import ( import documents.index as index from paperless.db import GnuPG from paperless.views import StandardPagination -from .classifier import DocumentClassifier, IncompatibleClassifierVersionError +from .classifier import load_classifier from .filters import ( CorrespondentFilterSet, DocumentFilterSet, @@ -259,15 +259,7 @@ class DocumentViewSet(RetrieveModelMixin, except Document.DoesNotExist: raise Http404() - try: - classifier = DocumentClassifier() - classifier.reload() - except (OSError, EOFError, IncompatibleClassifierVersionError) as e: - logging.getLogger(__name__).warning( - "Cannot load classifier: Not providing auto matching " - "suggestions" - ) - classifier = None + classifier = load_classifier() return Response({ "correspondents": [ From 4c6a02aee7e3869b9e7daab3ce9c06b8304518d8 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Sat, 30 Jan 2021 15:22:51 +0100 Subject: [PATCH 40/42] pycodestyle --- src/documents/classifier.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/documents/classifier.py b/src/documents/classifier.py index 41cd05412..b427264c8 100755 --- a/src/documents/classifier.py +++ b/src/documents/classifier.py @@ -40,8 +40,8 @@ def load_classifier(): except (EOFError, IncompatibleClassifierVersionError) as e: # there's something wrong with the model file. logger.error( - f"Unrecoverable error while loading document classification model: " - f"{str(e)}, deleting model file." + f"Unrecoverable error while loading document " + f"classification model: {str(e)}, deleting model file." ) os.unlink(settings.MODEL_FILE) classifier = None From ffa44f51cdc0ce26621fced9d06fa724d483e9d1 Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sat, 30 Jan 2021 15:30:39 +0100 Subject: [PATCH 41/42] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a19de79b2..0fc3185a1 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [Paperless](https://github.com/the-paperless-project/paperless) is an application by Daniel Quinn and contributors that indexes your scanned documents and allows you to easily search for documents and store metadata alongside your documents. -Paperless-ng is a fork of the original project, adding a new interface and many other changes under the hood. For a detailed list of changes, have a look at the changelog in the documentation. +Paperless-ng is a fork of the original project, adding a new interface and many other changes under the hood. For a detailed list of changes, have a look at the [change log](https://paperless-ng.readthedocs.io/en/latest/changelog.html) in the documentation. # Survey From 35a4779cc06d6e7bfd03ed7073804d15d789c7e8 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Sat, 30 Jan 2021 15:44:23 +0100 Subject: [PATCH 42/42] fix typo --- src/documents/tests/test_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py index eb310d357..d008f995a 100644 --- a/src/documents/tests/test_tasks.py +++ b/src/documents/tests/test_tasks.py @@ -83,7 +83,7 @@ class TestTasks(DirectoriesMixin, TestCase): self.assertRaises(SanityFailedError, tasks.sanity_check) m.assert_called_once() - def test_culk_update_documents(self): + def test_bulk_update_documents(self): doc1 = Document.objects.create(title="test", content="my document", checksum="wow", added=timezone.now(), created=timezone.now(), modified=timezone.now())