mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge remote-tracking branch 'paperless/dev' into feature-consume-eml
This commit is contained in:
@@ -36,3 +36,7 @@
|
||||
# The default language to use for OCR. Set this to the language most of your
|
||||
# documents are written in.
|
||||
#PAPERLESS_OCR_LANGUAGE=eng
|
||||
|
||||
# Set if accessing paperless via a domain subpath e.g. https://domain.com/PATHPREFIX and using a reverse-proxy like traefik or nginx
|
||||
#PAPERLESS_FORCE_SCRIPT_NAME=/PATHPREFIX
|
||||
#PAPERLESS_STATIC_URL=/PATHPREFIX/static/ # trailing slash required
|
||||
|
102
docker/compose/docker-compose.mariadb-tika.yml
Normal file
102
docker/compose/docker-compose.mariadb-tika.yml
Normal file
@@ -0,0 +1,102 @@
|
||||
# docker-compose file for running paperless from the Docker Hub.
|
||||
# This file contains everything paperless needs to run.
|
||||
# Paperless supports amd64, arm and arm64 hardware.
|
||||
#
|
||||
# All compose files of paperless configure paperless in the following way:
|
||||
#
|
||||
# - Paperless is (re)started on system boot, if it was running before shutdown.
|
||||
# - Docker volumes for storing data are managed by Docker.
|
||||
# - Folders for importing and exporting files are created in the same directory
|
||||
# as this file and mounted to the correct folders inside the container.
|
||||
# - Paperless listens on port 8000.
|
||||
#
|
||||
# In addition to that, this docker-compose file adds the following optional
|
||||
# configurations:
|
||||
#
|
||||
# - Instead of SQLite (default), MariaDB is used as the database server.
|
||||
# - Apache Tika and Gotenberg servers are started with paperless and paperless
|
||||
# is configured to use these services. These provide support for consuming
|
||||
# Office documents (Word, Excel, Power Point and their LibreOffice counter-
|
||||
# parts.
|
||||
#
|
||||
# To install and update paperless with this file, do the following:
|
||||
#
|
||||
# - Copy this file as 'docker-compose.yml' and the files 'docker-compose.env'
|
||||
# and '.env' into a folder.
|
||||
# - Run 'docker-compose pull'.
|
||||
# - Run 'docker-compose run --rm webserver createsuperuser' to create a user.
|
||||
# - Run 'docker-compose up -d'.
|
||||
#
|
||||
# For more extensive installation and update instructions, refer to the
|
||||
# documentation.
|
||||
|
||||
version: "3.4"
|
||||
services:
|
||||
broker:
|
||||
image: docker.io/library/redis:7
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- redisdata:/data
|
||||
|
||||
db:
|
||||
image: docker.io/library/mariadb:10
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- dbdata:/var/lib/mysql
|
||||
environment:
|
||||
MARIADB_HOST: paperless
|
||||
MARIADB_DATABASE: paperless
|
||||
MARIADB_USER: paperless
|
||||
MARIADB_PASSWORD: paperless
|
||||
MARIADB_ROOT_PASSWORD: paperless
|
||||
ports:
|
||||
- "3306:3306"
|
||||
|
||||
webserver:
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- db
|
||||
- broker
|
||||
- gotenberg
|
||||
- tika
|
||||
ports:
|
||||
- 8000:8000
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
volumes:
|
||||
- data:/usr/src/paperless/data
|
||||
- media:/usr/src/paperless/media
|
||||
- ./export:/usr/src/paperless/export
|
||||
- ./consume:/usr/src/paperless/consume
|
||||
env_file: docker-compose.env
|
||||
environment:
|
||||
PAPERLESS_REDIS: redis://broker:6379
|
||||
PAPERLESS_DBENGINE: mariadb
|
||||
PAPERLESS_DBHOST: db
|
||||
PAPERLESS_DBUSER: paperless
|
||||
PAPERLESS_DBPASSWORD: paperless
|
||||
PAPERLESS_DBPORT: 3306
|
||||
PAPERLESS_TIKA_ENABLED: 1
|
||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:7.6
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- "gotenberg"
|
||||
- "--chromium-disable-routes=true"
|
||||
|
||||
tika:
|
||||
image: ghcr.io/paperless-ngx/tika:latest
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
data:
|
||||
media:
|
||||
dbdata:
|
||||
redisdata:
|
83
docker/compose/docker-compose.mariadb.yml
Normal file
83
docker/compose/docker-compose.mariadb.yml
Normal file
@@ -0,0 +1,83 @@
|
||||
# docker-compose file for running paperless from the Docker Hub.
|
||||
# This file contains everything paperless needs to run.
|
||||
# Paperless supports amd64, arm and arm64 hardware.
|
||||
#
|
||||
# All compose files of paperless configure paperless in the following way:
|
||||
#
|
||||
# - Paperless is (re)started on system boot, if it was running before shutdown.
|
||||
# - Docker volumes for storing data are managed by Docker.
|
||||
# - Folders for importing and exporting files are created in the same directory
|
||||
# as this file and mounted to the correct folders inside the container.
|
||||
# - Paperless listens on port 8000.
|
||||
#
|
||||
# In addition to that, this docker-compose file adds the following optional
|
||||
# configurations:
|
||||
#
|
||||
# - Instead of SQLite (default), MariaDB is used as the database server.
|
||||
#
|
||||
# To install and update paperless with this file, do the following:
|
||||
#
|
||||
# - Copy this file as 'docker-compose.yml' and the files 'docker-compose.env'
|
||||
# and '.env' into a folder.
|
||||
# - Run 'docker-compose pull'.
|
||||
# - Run 'docker-compose run --rm webserver createsuperuser' to create a user.
|
||||
# - Run 'docker-compose up -d'.
|
||||
#
|
||||
# For more extensive installation and update instructions, refer to the
|
||||
# documentation.
|
||||
|
||||
version: "3.4"
|
||||
services:
|
||||
broker:
|
||||
image: docker.io/library/redis:7
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- redisdata:/data
|
||||
|
||||
db:
|
||||
image: docker.io/library/mariadb:10
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- dbdata:/var/lib/mysql
|
||||
environment:
|
||||
MARIADB_HOST: paperless
|
||||
MARIADB_DATABASE: paperless
|
||||
MARIADB_USER: paperless
|
||||
MARIADB_PASSWORD: paperless
|
||||
MARIADB_ROOT_PASSWORD: paperless
|
||||
ports:
|
||||
- "3306:3306"
|
||||
|
||||
webserver:
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- db
|
||||
- broker
|
||||
ports:
|
||||
- 8000:8000
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
volumes:
|
||||
- data:/usr/src/paperless/data
|
||||
- media:/usr/src/paperless/media
|
||||
- ./export:/usr/src/paperless/export
|
||||
- ./consume:/usr/src/paperless/consume
|
||||
env_file: docker-compose.env
|
||||
environment:
|
||||
PAPERLESS_REDIS: redis://broker:6379
|
||||
PAPERLESS_DBENGINE: mariadb
|
||||
PAPERLESS_DBHOST: db
|
||||
PAPERLESS_DBUSER: paperless
|
||||
PAPERLESS_DBPASSWORD: paperless
|
||||
PAPERLESS_DBPORT: 3306
|
||||
|
||||
|
||||
volumes:
|
||||
data:
|
||||
media:
|
||||
dbdata:
|
||||
redisdata:
|
@@ -31,7 +31,7 @@
|
||||
version: "3.4"
|
||||
services:
|
||||
broker:
|
||||
image: docker.io/library/redis:6.0
|
||||
image: docker.io/library/redis:7
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- redisdata:/data
|
||||
|
@@ -33,7 +33,7 @@
|
||||
version: "3.4"
|
||||
services:
|
||||
broker:
|
||||
image: docker.io/library/redis:6.0
|
||||
image: docker.io/library/redis:7
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- redisdata:/data
|
||||
@@ -77,7 +77,7 @@ services:
|
||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:7.4
|
||||
image: docker.io/gotenberg/gotenberg:7.6
|
||||
restart: unless-stopped
|
||||
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
|
@@ -29,7 +29,7 @@
|
||||
version: "3.4"
|
||||
services:
|
||||
broker:
|
||||
image: docker.io/library/redis:6.0
|
||||
image: docker.io/library/redis:7
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- redisdata:/data
|
||||
|
@@ -33,7 +33,7 @@
|
||||
version: "3.4"
|
||||
services:
|
||||
broker:
|
||||
image: docker.io/library/redis:6.0
|
||||
image: docker.io/library/redis:7
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- redisdata:/data
|
||||
@@ -65,7 +65,7 @@ services:
|
||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:7.4
|
||||
image: docker.io/gotenberg/gotenberg:7.6
|
||||
restart: unless-stopped
|
||||
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
|
@@ -26,7 +26,7 @@
|
||||
version: "3.4"
|
||||
services:
|
||||
broker:
|
||||
image: docker.io/library/redis:6.0
|
||||
image: docker.io/library/redis:7
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- redisdata:/data
|
||||
|
@@ -50,6 +50,31 @@ map_folders() {
|
||||
# Export these so they can be used in docker-prepare.sh
|
||||
export DATA_DIR="${PAPERLESS_DATA_DIR:-/usr/src/paperless/data}"
|
||||
export MEDIA_ROOT_DIR="${PAPERLESS_MEDIA_ROOT:-/usr/src/paperless/media}"
|
||||
export CONSUME_DIR="${PAPERLESS_CONSUMPTION_DIR:-/usr/src/paperless/consume}"
|
||||
}
|
||||
|
||||
nltk_data () {
|
||||
# Store the NLTK data outside the Docker container
|
||||
local nltk_data_dir="${DATA_DIR}/nltk"
|
||||
readonly truthy_things=("yes y 1 t true")
|
||||
|
||||
# If not set, or it looks truthy
|
||||
if [[ -z "${PAPERLESS_ENABLE_NLTK}" ]] || [[ "${truthy_things[*]}" =~ ${PAPERLESS_ENABLE_NLTK,} ]]; then
|
||||
|
||||
# Download or update the snowball stemmer data
|
||||
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" snowball_data
|
||||
|
||||
# Download or update the stopwords corpus
|
||||
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" stopwords
|
||||
|
||||
# Download or update the punkt tokenizer data
|
||||
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" punkt
|
||||
|
||||
else
|
||||
echo "Skipping NLTK data download"
|
||||
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
initialize() {
|
||||
@@ -62,7 +87,8 @@ initialize() {
|
||||
PAPERLESS_AUTO_LOGIN_USERNAME \
|
||||
PAPERLESS_ADMIN_USER \
|
||||
PAPERLESS_ADMIN_MAIL \
|
||||
PAPERLESS_ADMIN_PASSWORD; do
|
||||
PAPERLESS_ADMIN_PASSWORD \
|
||||
PAPERLESS_REDIS; do
|
||||
# Check for a version of this var with _FILE appended
|
||||
# and convert the contents to the env var value
|
||||
file_env ${env_var}
|
||||
@@ -76,7 +102,11 @@ initialize() {
|
||||
|
||||
local export_dir="/usr/src/paperless/export"
|
||||
|
||||
for dir in "${export_dir}" "${DATA_DIR}" "${DATA_DIR}/index" "${MEDIA_ROOT_DIR}" "${MEDIA_ROOT_DIR}/documents" "${MEDIA_ROOT_DIR}/documents/originals" "${MEDIA_ROOT_DIR}/documents/thumbnails"; do
|
||||
for dir in \
|
||||
"${export_dir}" \
|
||||
"${DATA_DIR}" "${DATA_DIR}/index" \
|
||||
"${MEDIA_ROOT_DIR}" "${MEDIA_ROOT_DIR}/documents" "${MEDIA_ROOT_DIR}/documents/originals" "${MEDIA_ROOT_DIR}/documents/thumbnails" \
|
||||
"${CONSUME_DIR}"; do
|
||||
if [[ ! -d "${dir}" ]]; then
|
||||
echo "Creating directory ${dir}"
|
||||
mkdir "${dir}"
|
||||
@@ -87,15 +117,21 @@ initialize() {
|
||||
echo "Creating directory ${tmp_dir}"
|
||||
mkdir -p "${tmp_dir}"
|
||||
|
||||
nltk_data
|
||||
|
||||
set +e
|
||||
echo "Adjusting permissions of paperless files. This may take a while."
|
||||
chown -R paperless:paperless ${tmp_dir}
|
||||
for dir in "${export_dir}" "${DATA_DIR}" "${MEDIA_ROOT_DIR}"; do
|
||||
for dir in \
|
||||
"${export_dir}" \
|
||||
"${DATA_DIR}" \
|
||||
"${MEDIA_ROOT_DIR}" \
|
||||
"${CONSUME_DIR}"; do
|
||||
find "${dir}" -not \( -user paperless -and -group paperless \) -exec chown paperless:paperless {} +
|
||||
done
|
||||
set -e
|
||||
|
||||
gosu paperless /sbin/docker-prepare.sh
|
||||
"${gosu_cmd[@]}" /sbin/docker-prepare.sh
|
||||
}
|
||||
|
||||
install_languages() {
|
||||
@@ -137,6 +173,11 @@ install_languages() {
|
||||
|
||||
echo "Paperless-ngx docker container starting..."
|
||||
|
||||
gosu_cmd=(gosu paperless)
|
||||
if [ "$(id -u)" == "$(id -u paperless)" ]; then
|
||||
gosu_cmd=()
|
||||
fi
|
||||
|
||||
# Install additional languages if specified
|
||||
if [[ -n "$PAPERLESS_OCR_LANGUAGES" ]]; then
|
||||
install_languages "$PAPERLESS_OCR_LANGUAGES"
|
||||
@@ -146,7 +187,7 @@ initialize
|
||||
|
||||
if [[ "$1" != "/"* ]]; then
|
||||
echo Executing management command "$@"
|
||||
exec gosu paperless python3 manage.py "$@"
|
||||
exec "${gosu_cmd[@]}" python3 manage.py "$@"
|
||||
else
|
||||
echo Executing "$@"
|
||||
exec "$@"
|
||||
|
@@ -28,6 +28,30 @@ wait_for_postgres() {
|
||||
done
|
||||
}
|
||||
|
||||
wait_for_mariadb() {
|
||||
echo "Waiting for MariaDB to start..."
|
||||
|
||||
host="${PAPERLESS_DBHOST:=localhost}"
|
||||
port="${PAPERLESS_DBPORT:=3306}"
|
||||
|
||||
attempt_num=1
|
||||
max_attempts=5
|
||||
|
||||
while ! true > /dev/tcp/$host/$port; do
|
||||
|
||||
if [ $attempt_num -eq $max_attempts ]; then
|
||||
echo "Unable to connect to database."
|
||||
exit 1
|
||||
else
|
||||
echo "Attempt $attempt_num failed! Trying again in 5 seconds..."
|
||||
|
||||
fi
|
||||
|
||||
attempt_num=$(("$attempt_num" + 1))
|
||||
sleep 5
|
||||
done
|
||||
}
|
||||
|
||||
wait_for_redis() {
|
||||
# We use a Python script to send the Redis ping
|
||||
# instead of installing redis-tools just for 1 thing
|
||||
@@ -66,7 +90,9 @@ superuser() {
|
||||
}
|
||||
|
||||
do_work() {
|
||||
if [[ -n "${PAPERLESS_DBHOST}" ]]; then
|
||||
if [[ "${PAPERLESS_DBENGINE}" == "mariadb" ]]; then
|
||||
wait_for_mariadb
|
||||
elif [[ -n "${PAPERLESS_DBHOST}" ]]; then
|
||||
wait_for_postgres
|
||||
fi
|
||||
|
||||
|
15
docker/paperless_cmd.sh
Executable file
15
docker/paperless_cmd.sh
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
rootless_args=()
|
||||
if [ "$(id -u)" == "$(id -u paperless)" ]; then
|
||||
rootless_args=(
|
||||
--user
|
||||
paperless
|
||||
--logfile
|
||||
supervisord.log
|
||||
--pidfile
|
||||
supervisord.pid
|
||||
)
|
||||
fi
|
||||
|
||||
exec /usr/local/bin/supervisord -c /etc/supervisord.conf "${rootless_args[@]}"
|
@@ -19,14 +19,28 @@ stderr_logfile_maxbytes=0
|
||||
[program:consumer]
|
||||
command=python3 manage.py document_consumer
|
||||
user=paperless
|
||||
stopsignal=INT
|
||||
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:scheduler]
|
||||
command=python3 manage.py qcluster
|
||||
[program:celery]
|
||||
|
||||
command = celery --app paperless worker --loglevel INFO
|
||||
user=paperless
|
||||
stopasgroup = true
|
||||
stopwaitsecs = 60
|
||||
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:celery-beat]
|
||||
|
||||
command = celery --app paperless beat --loglevel INFO
|
||||
user=paperless
|
||||
stopasgroup = true
|
||||
|
||||
|
@@ -18,7 +18,7 @@ if __name__ == "__main__":
|
||||
|
||||
REDIS_URL: Final[str] = os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")
|
||||
|
||||
print(f"Waiting for Redis: {REDIS_URL}", flush=True)
|
||||
print(f"Waiting for Redis...", flush=True)
|
||||
|
||||
attempt = 0
|
||||
with Redis.from_url(url=REDIS_URL) as client:
|
||||
@@ -37,8 +37,8 @@ if __name__ == "__main__":
|
||||
attempt += 1
|
||||
|
||||
if attempt >= MAX_RETRY_COUNT:
|
||||
print(f"Failed to connect to: {REDIS_URL}")
|
||||
print(f"Failed to connect to redis using environment variable PAPERLESS_REDIS.")
|
||||
sys.exit(os.EX_UNAVAILABLE)
|
||||
else:
|
||||
print(f"Connected to Redis broker: {REDIS_URL}")
|
||||
print(f"Connected to Redis broker.")
|
||||
sys.exit(os.EX_OK)
|
||||
|
Reference in New Issue
Block a user