From bb569b4e7875298049efe3e4c9b4cf29018267f9 Mon Sep 17 00:00:00 2001 From: Fabian Koller Date: Tue, 29 Dec 2020 22:43:52 +0100 Subject: [PATCH] Integrate OCRmyPDF args into ansible config --- ansible/defaults/main.yml | 11 ++++++----- ansible/tasks/main.yml | 6 ++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/ansible/defaults/main.yml b/ansible/defaults/main.yml index fbeef9871..2504f18dc 100644 --- a/ansible/defaults/main.yml +++ b/ansible/defaults/main.yml @@ -11,12 +11,13 @@ paperlessng_virtualenv: "{{ paperlessng_directory }}/.venv" paperlessng_ocr_languages: - eng paperlessng_time_zone: Europe/Berlin -paperlessng_ocrmypdf_args: --optimize 1 -# TODO Does optimze==1 really work with jbig2enc? -# https://ocrmypdf.readthedocs.io/en/latest/jbig2.html#lossy-mode-jbig2 -# Documentation states -O1 only applies lossless transformations -# https://ocrmypdf.readthedocs.io/en/latest/optimizer.html#lossless-optimizations +# see https://ocrmypdf.readthedocs.io/en/latest/api.html#ocrmypdf.ocr +paperlessng_ocrmypdf_args: + - "deskew": true + - "clean": true + - "optimize": 1 paperlessng_use_jbig2enc: true +paperlessng_big2enc_lossy: false paperlessng_superuser_name: paperlessng paperlessng_superuser_email: paperlessng@example.com diff --git a/ansible/tasks/main.yml b/ansible/tasks/main.yml index 2ee2e4db2..4a243f322 100644 --- a/ansible/tasks/main.yml +++ b/ansible/tasks/main.yml @@ -183,10 +183,8 @@ line: "PAPERLESS_FILENAME_FORMAT={{ paperlessng_filename_format }}" - regexp: "^#?PAPERLESS_OCR_LANGUAGE=" line: "PAPERLESS_OCR_LANGUAGE={{ paperlessng_ocr_languages | join('+') }}" - # - regexp: "^#PAPERLESS_OCR_USER_ARG=" - # # TODO JSON dict required in conf - # # https://paperless-ng.readthedocs.io/en/latest/configuration.html#ocr-settings - # line: "PAPERLESS_OCR_USER_ARG=\"{{ paperlessng_ocrmypdf_args }}{{ ' --jbig2-lossy' if paperlessng_use_jbig2enc else '' }}\"" + - regexp: "^#PAPERLESS_OCR_USER_ARG=" + line: "PAPERLESS_OCR_USER_ARG={{ paperlessng_ocrmypdf_args | combine({'jbig2_lossy': true} if paperlessng_big2enc_lossy else {}) }}" - regexp: "^#?PAPERLESS_TIME_ZONE=" line: "PAPERLESS_TIME_ZONE={{ paperlessng_time_zone }}" no_log: yes