From 2dcacaee147abfdccdca4e20262bae749c60be97 Mon Sep 17 00:00:00 2001 From: Uli Fahrer Date: Fri, 27 Aug 2021 08:32:16 +0200 Subject: [PATCH] fix(tika): adapt to Gotenberg 7 API This commit adapts to the latest breaking changes from Gotenberg 7. It also freezes the usage of the Gotenberg server to v7.x. Doing this prevents further breaking changes leaking in our code base. * refs #1250 --- docker/compose/docker-compose.postgres-tika.yml | 4 ++-- docker/compose/docker-compose.sqlite-tika.yml | 4 ++-- docs/configuration.rst | 6 +++--- docs/troubleshooting.rst | 12 ++++++------ scripts/start_services.sh | 2 +- src/paperless_tika/parsers.py | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/docker/compose/docker-compose.postgres-tika.yml b/docker/compose/docker-compose.postgres-tika.yml index 93fff4afc..f301b0d3c 100644 --- a/docker/compose/docker-compose.postgres-tika.yml +++ b/docker/compose/docker-compose.postgres-tika.yml @@ -75,10 +75,10 @@ services: PAPERLESS_TIKA_ENDPOINT: http://tika:9998 gotenberg: - image: thecodingmachine/gotenberg + image: gotenberg/gotenberg:7 restart: unless-stopped environment: - DISABLE_GOOGLE_CHROME: 1 + CHROMIUM_DISABLE_ROUTES: 1 tika: image: apache/tika diff --git a/docker/compose/docker-compose.sqlite-tika.yml b/docker/compose/docker-compose.sqlite-tika.yml index 5dfff0830..f50e51aab 100644 --- a/docker/compose/docker-compose.sqlite-tika.yml +++ b/docker/compose/docker-compose.sqlite-tika.yml @@ -64,10 +64,10 @@ services: PAPERLESS_TIKA_ENDPOINT: http://tika:9998 gotenberg: - image: thecodingmachine/gotenberg + image: gotenberg/gotenberg:7 restart: unless-stopped environment: - DISABLE_GOOGLE_CHROME: 1 + CHROMIUM_DISABLE_ROUTES: 1 tika: image: apache/tika diff --git a/docs/configuration.rst b/docs/configuration.rst index c5bb811f6..7d079a96c 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -402,7 +402,7 @@ Tika settings ############# Paperless can make use of `Tika `_ and -`Gotenberg `_ for parsing and +`Gotenberg `_ for parsing and converting "Office" documents (such as ".doc", ".xlsx" and ".odt"). If you wish to use this, you must provide a Tika server and a Gotenberg server, configure their endpoints, and enable the feature. @@ -444,10 +444,10 @@ requires are as follows: # ... gotenberg: - image: thecodingmachine/gotenberg + image: gotenberg/gotenberg:7 restart: unless-stopped environment: - DISABLE_GOOGLE_CHROME: 1 + CHROMIUM_DISABLE_ROUTES: 1 tika: image: apache/tika diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index 25a1b7f6f..f3ea612a0 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -101,22 +101,22 @@ You may experience these errors when using the optional TIKA integration: .. code:: - requests.exceptions.HTTPError: 504 Server Error: Gateway Timeout for url: http://gotenberg:3000/convert/office + requests.exceptions.HTTPError: 504 Server Error: Gateway Timeout for url: http://gotenberg:3000/forms/libreoffice/convert -Gotenberg is a server that converts Office documents into PDF documents and has a default timeout of 10 seconds. +Gotenberg is a server that converts Office documents into PDF documents and has a default timeout of 30 seconds. When conversion takes longer, Gotenberg raises this error. -You can increase the timeout by configuring an environment variable for gotenberg (see also `here `__). +You can increase the timeout by configuring an environment variable for Gotenberg (see also `here `__). If using docker-compose, this is achieved by the following configuration change in the ``docker-compose.yml`` file: .. code:: yaml gotenberg: - image: thecodingmachine/gotenberg + image: gotenberg/gotenberg:7 restart: unless-stopped environment: - DISABLE_GOOGLE_CHROME: 1 - DEFAULT_WAIT_TIMEOUT: 30 + CHROMIUM_DISABLE_ROUTES: 1 + API_PROCESS_TIMEOUT: 60 Permission denied errors in the consumption directory ##################################################### diff --git a/scripts/start_services.sh b/scripts/start_services.sh index e2fc740a4..ecc842715 100755 --- a/scripts/start_services.sh +++ b/scripts/start_services.sh @@ -1,4 +1,4 @@ docker run -p 5432:5432 -e POSTGRES_PASSWORD=password -v paperless_pgdata:/var/lib/postgresql/data -d postgres:13 docker run -d -p 6379:6379 redis:latest -docker run -p 3000:3000 -d thecodingmachine/gotenberg +docker run -p 3000:3000 -d gotenberg/gotenberg:7 docker run -p 9998:9998 -d apache/tika diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index 6b0f62ada..e6924ed92 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -67,7 +67,7 @@ class TikaDocumentParser(DocumentParser): def convert_to_pdf(self, document_path, file_name): pdf_path = os.path.join(self.tempdir, "convert.pdf") gotenberg_server = settings.PAPERLESS_TIKA_GOTENBERG_ENDPOINT - url = gotenberg_server + "/convert/office" + url = gotenberg_server + "/forms/libreoffice/convert" self.log("info", f"Converting {document_path} to PDF as {pdf_path}") files = {"files": (file_name or os.path.basename(document_path),