Add the new paperless_tika parser

This parser will use an external Tika and Gotenberg server to parse
"Office" documents (.doc, .xls, .odt, etc.)

Signed-off-by: Jo Vandeginste <Jo.Vandeginste@kuleuven.be>
This commit is contained in:
Jo Vandeginste
2020-12-29 01:23:40 +01:00
parent d690b34ee0
commit b8e8bf3dd4
9 changed files with 276 additions and 0 deletions

View File

@@ -0,0 +1,43 @@
version: "3.4"
services:
broker:
image: redis:6.0
restart: always
webserver:
image: jonaswinkler/paperless-ng:0.9.9
restart: always
depends_on:
- broker
ports:
- 8000:8000
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000"]
interval: 30s
timeout: 10s
retries: 5
volumes:
- data:/usr/src/paperless/data
- media:/usr/src/paperless/media
- ./export:/usr/src/paperless/export
- ./consume:/usr/src/paperless/consume
env_file: docker-compose.env
environment:
PAPERLESS_REDIS: redis://broker:6379
PAPERLESS_TIKA: 1
GOTENBERG_SERVER_ENDPOINT: http://gotenberg:3000
TIKA_SERVER_ENDPOINT: http://tika:9998
gotenberg:
image: thecodingmachine/gotenberg
restart: unless-stopped
environment:
DISABLE_GOOGLE_CHROME: 1
tika:
image: apache/tika
restart: unless-stopped
volumes:
data:
media:

View File

@@ -0,0 +1,43 @@
version: "3.4"
services:
broker:
image: redis:6.0
restart: always
webserver:
build: .
restart: always
depends_on:
- broker
ports:
- 8000:8000
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000"]
interval: 30s
timeout: 10s
retries: 5
volumes:
- data:/usr/src/paperless/data
- media:/usr/src/paperless/media
- ./export:/usr/src/paperless/export
- ./consume:/usr/src/paperless/consume
env_file: docker-compose.env
environment:
PAPERLESS_REDIS: redis://broker:6379
PAPERLESS_TIKA: 1
GOTENBERG_SERVER_ENDPOINT: http://gotenberg:3000
TIKA_SERVER_ENDPOINT: http://tika:9998
gotenberg:
image: thecodingmachine/gotenberg
restart: unless-stopped
environment:
DISABLE_GOOGLE_CHROME: 1
tika:
image: apache/tika
restart: unless-stopped
volumes:
data:
media: