Merge remote-tracking branch 'paperless/dev' into feature-consume-eml

This commit is contained in:
phail
2022-10-23 20:37:22 +02:00
225 changed files with 19278 additions and 25141 deletions

View File

@@ -36,3 +36,7 @@
# The default language to use for OCR. Set this to the language most of your
# documents are written in.
#PAPERLESS_OCR_LANGUAGE=eng
# Set if accessing paperless via a domain subpath e.g. https://domain.com/PATHPREFIX and using a reverse-proxy like traefik or nginx
#PAPERLESS_FORCE_SCRIPT_NAME=/PATHPREFIX
#PAPERLESS_STATIC_URL=/PATHPREFIX/static/ # trailing slash required

View File

@@ -0,0 +1,102 @@
# docker-compose file for running paperless from the Docker Hub.
# This file contains everything paperless needs to run.
# Paperless supports amd64, arm and arm64 hardware.
#
# All compose files of paperless configure paperless in the following way:
#
# - Paperless is (re)started on system boot, if it was running before shutdown.
# - Docker volumes for storing data are managed by Docker.
# - Folders for importing and exporting files are created in the same directory
# as this file and mounted to the correct folders inside the container.
# - Paperless listens on port 8000.
#
# In addition to that, this docker-compose file adds the following optional
# configurations:
#
# - Instead of SQLite (default), MariaDB is used as the database server.
# - Apache Tika and Gotenberg servers are started with paperless and paperless
# is configured to use these services. These provide support for consuming
# Office documents (Word, Excel, Power Point and their LibreOffice counter-
# parts.
#
# To install and update paperless with this file, do the following:
#
# - Copy this file as 'docker-compose.yml' and the files 'docker-compose.env'
# and '.env' into a folder.
# - Run 'docker-compose pull'.
# - Run 'docker-compose run --rm webserver createsuperuser' to create a user.
# - Run 'docker-compose up -d'.
#
# For more extensive installation and update instructions, refer to the
# documentation.
version: "3.4"
services:
broker:
image: docker.io/library/redis:7
restart: unless-stopped
volumes:
- redisdata:/data
db:
image: docker.io/library/mariadb:10
restart: unless-stopped
volumes:
- dbdata:/var/lib/mysql
environment:
MARIADB_HOST: paperless
MARIADB_DATABASE: paperless
MARIADB_USER: paperless
MARIADB_PASSWORD: paperless
MARIADB_ROOT_PASSWORD: paperless
ports:
- "3306:3306"
webserver:
image: ghcr.io/paperless-ngx/paperless-ngx:latest
restart: unless-stopped
depends_on:
- db
- broker
- gotenberg
- tika
ports:
- 8000:8000
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000"]
interval: 30s
timeout: 10s
retries: 5
volumes:
- data:/usr/src/paperless/data
- media:/usr/src/paperless/media
- ./export:/usr/src/paperless/export
- ./consume:/usr/src/paperless/consume
env_file: docker-compose.env
environment:
PAPERLESS_REDIS: redis://broker:6379
PAPERLESS_DBENGINE: mariadb
PAPERLESS_DBHOST: db
PAPERLESS_DBUSER: paperless
PAPERLESS_DBPASSWORD: paperless
PAPERLESS_DBPORT: 3306
PAPERLESS_TIKA_ENABLED: 1
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
gotenberg:
image: docker.io/gotenberg/gotenberg:7.6
restart: unless-stopped
command:
- "gotenberg"
- "--chromium-disable-routes=true"
tika:
image: ghcr.io/paperless-ngx/tika:latest
restart: unless-stopped
volumes:
data:
media:
dbdata:
redisdata:

View File

@@ -0,0 +1,83 @@
# docker-compose file for running paperless from the Docker Hub.
# This file contains everything paperless needs to run.
# Paperless supports amd64, arm and arm64 hardware.
#
# All compose files of paperless configure paperless in the following way:
#
# - Paperless is (re)started on system boot, if it was running before shutdown.
# - Docker volumes for storing data are managed by Docker.
# - Folders for importing and exporting files are created in the same directory
# as this file and mounted to the correct folders inside the container.
# - Paperless listens on port 8000.
#
# In addition to that, this docker-compose file adds the following optional
# configurations:
#
# - Instead of SQLite (default), MariaDB is used as the database server.
#
# To install and update paperless with this file, do the following:
#
# - Copy this file as 'docker-compose.yml' and the files 'docker-compose.env'
# and '.env' into a folder.
# - Run 'docker-compose pull'.
# - Run 'docker-compose run --rm webserver createsuperuser' to create a user.
# - Run 'docker-compose up -d'.
#
# For more extensive installation and update instructions, refer to the
# documentation.
version: "3.4"
services:
broker:
image: docker.io/library/redis:7
restart: unless-stopped
volumes:
- redisdata:/data
db:
image: docker.io/library/mariadb:10
restart: unless-stopped
volumes:
- dbdata:/var/lib/mysql
environment:
MARIADB_HOST: paperless
MARIADB_DATABASE: paperless
MARIADB_USER: paperless
MARIADB_PASSWORD: paperless
MARIADB_ROOT_PASSWORD: paperless
ports:
- "3306:3306"
webserver:
image: ghcr.io/paperless-ngx/paperless-ngx:latest
restart: unless-stopped
depends_on:
- db
- broker
ports:
- 8000:8000
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000"]
interval: 30s
timeout: 10s
retries: 5
volumes:
- data:/usr/src/paperless/data
- media:/usr/src/paperless/media
- ./export:/usr/src/paperless/export
- ./consume:/usr/src/paperless/consume
env_file: docker-compose.env
environment:
PAPERLESS_REDIS: redis://broker:6379
PAPERLESS_DBENGINE: mariadb
PAPERLESS_DBHOST: db
PAPERLESS_DBUSER: paperless
PAPERLESS_DBPASSWORD: paperless
PAPERLESS_DBPORT: 3306
volumes:
data:
media:
dbdata:
redisdata:

View File

@@ -31,7 +31,7 @@
version: "3.4"
services:
broker:
image: docker.io/library/redis:6.0
image: docker.io/library/redis:7
restart: unless-stopped
volumes:
- redisdata:/data

View File

@@ -33,7 +33,7 @@
version: "3.4"
services:
broker:
image: docker.io/library/redis:6.0
image: docker.io/library/redis:7
restart: unless-stopped
volumes:
- redisdata:/data
@@ -77,7 +77,7 @@ services:
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
gotenberg:
image: docker.io/gotenberg/gotenberg:7.4
image: docker.io/gotenberg/gotenberg:7.6
restart: unless-stopped
# The gotenberg chromium route is used to convert .eml files. We do not

View File

@@ -29,7 +29,7 @@
version: "3.4"
services:
broker:
image: docker.io/library/redis:6.0
image: docker.io/library/redis:7
restart: unless-stopped
volumes:
- redisdata:/data

View File

@@ -33,7 +33,7 @@
version: "3.4"
services:
broker:
image: docker.io/library/redis:6.0
image: docker.io/library/redis:7
restart: unless-stopped
volumes:
- redisdata:/data
@@ -65,7 +65,7 @@ services:
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
gotenberg:
image: docker.io/gotenberg/gotenberg:7.4
image: docker.io/gotenberg/gotenberg:7.6
restart: unless-stopped
# The gotenberg chromium route is used to convert .eml files. We do not

View File

@@ -26,7 +26,7 @@
version: "3.4"
services:
broker:
image: docker.io/library/redis:6.0
image: docker.io/library/redis:7
restart: unless-stopped
volumes:
- redisdata:/data

View File

@@ -50,6 +50,31 @@ map_folders() {
# Export these so they can be used in docker-prepare.sh
export DATA_DIR="${PAPERLESS_DATA_DIR:-/usr/src/paperless/data}"
export MEDIA_ROOT_DIR="${PAPERLESS_MEDIA_ROOT:-/usr/src/paperless/media}"
export CONSUME_DIR="${PAPERLESS_CONSUMPTION_DIR:-/usr/src/paperless/consume}"
}
nltk_data () {
# Store the NLTK data outside the Docker container
local nltk_data_dir="${DATA_DIR}/nltk"
readonly truthy_things=("yes y 1 t true")
# If not set, or it looks truthy
if [[ -z "${PAPERLESS_ENABLE_NLTK}" ]] || [[ "${truthy_things[*]}" =~ ${PAPERLESS_ENABLE_NLTK,} ]]; then
# Download or update the snowball stemmer data
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" snowball_data
# Download or update the stopwords corpus
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" stopwords
# Download or update the punkt tokenizer data
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" punkt
else
echo "Skipping NLTK data download"
fi
}
initialize() {
@@ -62,7 +87,8 @@ initialize() {
PAPERLESS_AUTO_LOGIN_USERNAME \
PAPERLESS_ADMIN_USER \
PAPERLESS_ADMIN_MAIL \
PAPERLESS_ADMIN_PASSWORD; do
PAPERLESS_ADMIN_PASSWORD \
PAPERLESS_REDIS; do
# Check for a version of this var with _FILE appended
# and convert the contents to the env var value
file_env ${env_var}
@@ -76,7 +102,11 @@ initialize() {
local export_dir="/usr/src/paperless/export"
for dir in "${export_dir}" "${DATA_DIR}" "${DATA_DIR}/index" "${MEDIA_ROOT_DIR}" "${MEDIA_ROOT_DIR}/documents" "${MEDIA_ROOT_DIR}/documents/originals" "${MEDIA_ROOT_DIR}/documents/thumbnails"; do
for dir in \
"${export_dir}" \
"${DATA_DIR}" "${DATA_DIR}/index" \
"${MEDIA_ROOT_DIR}" "${MEDIA_ROOT_DIR}/documents" "${MEDIA_ROOT_DIR}/documents/originals" "${MEDIA_ROOT_DIR}/documents/thumbnails" \
"${CONSUME_DIR}"; do
if [[ ! -d "${dir}" ]]; then
echo "Creating directory ${dir}"
mkdir "${dir}"
@@ -87,15 +117,21 @@ initialize() {
echo "Creating directory ${tmp_dir}"
mkdir -p "${tmp_dir}"
nltk_data
set +e
echo "Adjusting permissions of paperless files. This may take a while."
chown -R paperless:paperless ${tmp_dir}
for dir in "${export_dir}" "${DATA_DIR}" "${MEDIA_ROOT_DIR}"; do
for dir in \
"${export_dir}" \
"${DATA_DIR}" \
"${MEDIA_ROOT_DIR}" \
"${CONSUME_DIR}"; do
find "${dir}" -not \( -user paperless -and -group paperless \) -exec chown paperless:paperless {} +
done
set -e
gosu paperless /sbin/docker-prepare.sh
"${gosu_cmd[@]}" /sbin/docker-prepare.sh
}
install_languages() {
@@ -137,6 +173,11 @@ install_languages() {
echo "Paperless-ngx docker container starting..."
gosu_cmd=(gosu paperless)
if [ "$(id -u)" == "$(id -u paperless)" ]; then
gosu_cmd=()
fi
# Install additional languages if specified
if [[ -n "$PAPERLESS_OCR_LANGUAGES" ]]; then
install_languages "$PAPERLESS_OCR_LANGUAGES"
@@ -146,7 +187,7 @@ initialize
if [[ "$1" != "/"* ]]; then
echo Executing management command "$@"
exec gosu paperless python3 manage.py "$@"
exec "${gosu_cmd[@]}" python3 manage.py "$@"
else
echo Executing "$@"
exec "$@"

View File

@@ -28,6 +28,30 @@ wait_for_postgres() {
done
}
wait_for_mariadb() {
echo "Waiting for MariaDB to start..."
host="${PAPERLESS_DBHOST:=localhost}"
port="${PAPERLESS_DBPORT:=3306}"
attempt_num=1
max_attempts=5
while ! true > /dev/tcp/$host/$port; do
if [ $attempt_num -eq $max_attempts ]; then
echo "Unable to connect to database."
exit 1
else
echo "Attempt $attempt_num failed! Trying again in 5 seconds..."
fi
attempt_num=$(("$attempt_num" + 1))
sleep 5
done
}
wait_for_redis() {
# We use a Python script to send the Redis ping
# instead of installing redis-tools just for 1 thing
@@ -66,7 +90,9 @@ superuser() {
}
do_work() {
if [[ -n "${PAPERLESS_DBHOST}" ]]; then
if [[ "${PAPERLESS_DBENGINE}" == "mariadb" ]]; then
wait_for_mariadb
elif [[ -n "${PAPERLESS_DBHOST}" ]]; then
wait_for_postgres
fi

15
docker/paperless_cmd.sh Executable file
View File

@@ -0,0 +1,15 @@
#!/usr/bin/env bash
rootless_args=()
if [ "$(id -u)" == "$(id -u paperless)" ]; then
rootless_args=(
--user
paperless
--logfile
supervisord.log
--pidfile
supervisord.pid
)
fi
exec /usr/local/bin/supervisord -c /etc/supervisord.conf "${rootless_args[@]}"

View File

@@ -19,14 +19,28 @@ stderr_logfile_maxbytes=0
[program:consumer]
command=python3 manage.py document_consumer
user=paperless
stopsignal=INT
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
[program:scheduler]
command=python3 manage.py qcluster
[program:celery]
command = celery --app paperless worker --loglevel INFO
user=paperless
stopasgroup = true
stopwaitsecs = 60
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
[program:celery-beat]
command = celery --app paperless beat --loglevel INFO
user=paperless
stopasgroup = true

View File

@@ -18,7 +18,7 @@ if __name__ == "__main__":
REDIS_URL: Final[str] = os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")
print(f"Waiting for Redis: {REDIS_URL}", flush=True)
print(f"Waiting for Redis...", flush=True)
attempt = 0
with Redis.from_url(url=REDIS_URL) as client:
@@ -37,8 +37,8 @@ if __name__ == "__main__":
attempt += 1
if attempt >= MAX_RETRY_COUNT:
print(f"Failed to connect to: {REDIS_URL}")
print(f"Failed to connect to redis using environment variable PAPERLESS_REDIS.")
sys.exit(os.EX_UNAVAILABLE)
else:
print(f"Connected to Redis broker: {REDIS_URL}")
print(f"Connected to Redis broker.")
sys.exit(os.EX_OK)