mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 03:16:10 -06:00 
			
		
		
		
	Merge pull request #1262 from Tooa/fix-issue-1250
fix(tika): adapt to Gotenberg 7 API
This commit is contained in:
		@@ -75,10 +75,10 @@ services:
 | 
				
			|||||||
      PAPERLESS_TIKA_ENDPOINT: http://tika:9998
 | 
					      PAPERLESS_TIKA_ENDPOINT: http://tika:9998
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  gotenberg:
 | 
					  gotenberg:
 | 
				
			||||||
    image: thecodingmachine/gotenberg
 | 
					    image: gotenberg/gotenberg:7
 | 
				
			||||||
    restart: unless-stopped
 | 
					    restart: unless-stopped
 | 
				
			||||||
    environment:
 | 
					    environment:
 | 
				
			||||||
      DISABLE_GOOGLE_CHROME: 1
 | 
					      CHROMIUM_DISABLE_ROUTES: 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  tika:
 | 
					  tika:
 | 
				
			||||||
    image: apache/tika
 | 
					    image: apache/tika
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -64,10 +64,10 @@ services:
 | 
				
			|||||||
      PAPERLESS_TIKA_ENDPOINT: http://tika:9998
 | 
					      PAPERLESS_TIKA_ENDPOINT: http://tika:9998
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  gotenberg:
 | 
					  gotenberg:
 | 
				
			||||||
    image: thecodingmachine/gotenberg
 | 
					    image: gotenberg/gotenberg:7
 | 
				
			||||||
    restart: unless-stopped
 | 
					    restart: unless-stopped
 | 
				
			||||||
    environment:
 | 
					    environment:
 | 
				
			||||||
      DISABLE_GOOGLE_CHROME: 1
 | 
					      CHROMIUM_DISABLE_ROUTES: 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  tika:
 | 
					  tika:
 | 
				
			||||||
    image: apache/tika
 | 
					    image: apache/tika
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -402,7 +402,7 @@ Tika settings
 | 
				
			|||||||
#############
 | 
					#############
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Paperless can make use of `Tika <https://tika.apache.org/>`_ and
 | 
					Paperless can make use of `Tika <https://tika.apache.org/>`_ and
 | 
				
			||||||
`Gotenberg <https://thecodingmachine.github.io/gotenberg/>`_ for parsing and
 | 
					`Gotenberg <https://gotenberg.dev/>`_ for parsing and
 | 
				
			||||||
converting "Office" documents (such as ".doc", ".xlsx" and ".odt"). If you
 | 
					converting "Office" documents (such as ".doc", ".xlsx" and ".odt"). If you
 | 
				
			||||||
wish to use this, you must provide a Tika server and a Gotenberg server,
 | 
					wish to use this, you must provide a Tika server and a Gotenberg server,
 | 
				
			||||||
configure their endpoints, and enable the feature.
 | 
					configure their endpoints, and enable the feature.
 | 
				
			||||||
@@ -444,10 +444,10 @@ requires are as follows:
 | 
				
			|||||||
        # ...
 | 
					        # ...
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        gotenberg:
 | 
					        gotenberg:
 | 
				
			||||||
            image: thecodingmachine/gotenberg
 | 
					            image: gotenberg/gotenberg:7
 | 
				
			||||||
            restart: unless-stopped
 | 
					            restart: unless-stopped
 | 
				
			||||||
            environment:
 | 
					            environment:
 | 
				
			||||||
                DISABLE_GOOGLE_CHROME: 1
 | 
					                CHROMIUM_DISABLE_ROUTES: 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        tika:
 | 
					        tika:
 | 
				
			||||||
            image: apache/tika
 | 
					            image: apache/tika
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -101,22 +101,22 @@ You may experience these errors when using the optional TIKA integration:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
.. code::
 | 
					.. code::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    requests.exceptions.HTTPError: 504 Server Error: Gateway Timeout for url: http://gotenberg:3000/convert/office
 | 
					    requests.exceptions.HTTPError: 504 Server Error: Gateway Timeout for url: http://gotenberg:3000/forms/libreoffice/convert
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Gotenberg is a server that converts Office documents into PDF documents and has a default timeout of 10 seconds.
 | 
					Gotenberg is a server that converts Office documents into PDF documents and has a default timeout of 30 seconds.
 | 
				
			||||||
When conversion takes longer, Gotenberg raises this error.
 | 
					When conversion takes longer, Gotenberg raises this error.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
You can increase the timeout by configuring an environment variable for gotenberg (see also `here <https://thecodingmachine.github.io/gotenberg/#environment_variables.default_wait_timeout>`__).
 | 
					You can increase the timeout by configuring an environment variable for Gotenberg (see also `here <https://gotenberg.dev/docs/modules/api#properties>`__).
 | 
				
			||||||
If using docker-compose, this is achieved by the following configuration change in the ``docker-compose.yml`` file:
 | 
					If using docker-compose, this is achieved by the following configuration change in the ``docker-compose.yml`` file:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.. code:: yaml
 | 
					.. code:: yaml
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    gotenberg:
 | 
					    gotenberg:
 | 
				
			||||||
        image: thecodingmachine/gotenberg
 | 
					        image: gotenberg/gotenberg:7
 | 
				
			||||||
        restart: unless-stopped
 | 
					        restart: unless-stopped
 | 
				
			||||||
        environment:
 | 
					        environment:
 | 
				
			||||||
            DISABLE_GOOGLE_CHROME: 1
 | 
					            CHROMIUM_DISABLE_ROUTES: 1
 | 
				
			||||||
            DEFAULT_WAIT_TIMEOUT: 30
 | 
					            API_PROCESS_TIMEOUT: 60
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Permission denied errors in the consumption directory
 | 
					Permission denied errors in the consumption directory
 | 
				
			||||||
#####################################################
 | 
					#####################################################
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,4 +1,4 @@
 | 
				
			|||||||
docker run -p 5432:5432 -e POSTGRES_PASSWORD=password -v paperless_pgdata:/var/lib/postgresql/data -d postgres:13
 | 
					docker run -p 5432:5432 -e POSTGRES_PASSWORD=password -v paperless_pgdata:/var/lib/postgresql/data -d postgres:13
 | 
				
			||||||
docker run -d -p 6379:6379 redis:latest
 | 
					docker run -d -p 6379:6379 redis:latest
 | 
				
			||||||
docker run -p 3000:3000 -d thecodingmachine/gotenberg
 | 
					docker run -p 3000:3000 -d gotenberg/gotenberg:7
 | 
				
			||||||
docker run -p 9998:9998 -d apache/tika
 | 
					docker run -p 9998:9998 -d apache/tika
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -67,7 +67,7 @@ class TikaDocumentParser(DocumentParser):
 | 
				
			|||||||
    def convert_to_pdf(self, document_path, file_name):
 | 
					    def convert_to_pdf(self, document_path, file_name):
 | 
				
			||||||
        pdf_path = os.path.join(self.tempdir, "convert.pdf")
 | 
					        pdf_path = os.path.join(self.tempdir, "convert.pdf")
 | 
				
			||||||
        gotenberg_server = settings.PAPERLESS_TIKA_GOTENBERG_ENDPOINT
 | 
					        gotenberg_server = settings.PAPERLESS_TIKA_GOTENBERG_ENDPOINT
 | 
				
			||||||
        url = gotenberg_server + "/convert/office"
 | 
					        url = gotenberg_server + "/forms/libreoffice/convert"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.log("info", f"Converting {document_path} to PDF as {pdf_path}")
 | 
					        self.log("info", f"Converting {document_path} to PDF as {pdf_path}")
 | 
				
			||||||
        files = {"files": (file_name or os.path.basename(document_path),
 | 
					        files = {"files": (file_name or os.path.basename(document_path),
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user