mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge branch 'dev' into celery-tasks
This commit is contained in:
39
docker/docker-compose.env
Normal file
39
docker/docker-compose.env
Normal file
@@ -0,0 +1,39 @@
|
||||
# The UID and GID of the user used to run paperless in the container. Set this
|
||||
# to your UID and GID on the host so that you have write access to the
|
||||
# consumption directory.
|
||||
#USERMAP_UID=1000
|
||||
#USERMAP_GID=1000
|
||||
|
||||
# Additional languages to install for text recognition, separated by a
|
||||
# whitespace. Note that this is
|
||||
# different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the
|
||||
# default language used when guessing the language from the OCR output.
|
||||
# The container installs English, German, Italian, Spanish and French by
|
||||
# default.
|
||||
# See https://packages.debian.org/search?keywords=tesseract-ocr-&searchon=names&suite=buster
|
||||
# for available languages.
|
||||
#PAPERLESS_OCR_LANGUAGES=tur ces
|
||||
|
||||
###############################################################################
|
||||
# Paperless-specific settings #
|
||||
###############################################################################
|
||||
|
||||
# All settings defined in the paperless.conf.example can be used here. The
|
||||
# Docker setup does not use the configuration file.
|
||||
# A few commonly adjusted settings are provided below.
|
||||
|
||||
# Adjust this key if you plan to make paperless available publicly. It should
|
||||
# be a very long sequence of random characters. You don't need to remember it.
|
||||
#PAPERLESS_SECRET_KEY=change-me
|
||||
|
||||
# Use this variable to set a timezone for the Paperless Docker containers. If not specified, defaults to UTC.
|
||||
#PAPERLESS_TIME_ZONE=America/Los_Angeles
|
||||
|
||||
# The default language to use for OCR. Set this to the language most of your
|
||||
# documents are written in.
|
||||
#PAPERLESS_OCR_LANGUAGE=eng
|
||||
|
||||
# By default Paperless does not OCR a document if the text can be retrieved from
|
||||
# the document directly. Set to true to always OCR documents. (i.e., if you
|
||||
# know that some of your documents have faulty/bad OCR data)
|
||||
#PAPERLESS_OCR_ALWAYS=true
|
94
docker/docker-entrypoint.sh
Normal file
94
docker/docker-entrypoint.sh
Normal file
@@ -0,0 +1,94 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# Source: https://github.com/sameersbn/docker-gitlab/
|
||||
map_uidgid() {
|
||||
USERMAP_ORIG_UID=$(id -u paperless)
|
||||
USERMAP_ORIG_GID=$(id -g paperless)
|
||||
USERMAP_NEW_UID=${USERMAP_UID:-$USERMAP_ORIG_UID}
|
||||
USERMAP_NEW_GID=${USERMAP_GID:-${USERMAP_ORIG_GID:-$USERMAP_NEW_UID}}
|
||||
if [[ ${USERMAP_NEW_UID} != "${USERMAP_ORIG_UID}" || ${USERMAP_NEW_GID} != "${USERMAP_ORIG_GID}" ]]; then
|
||||
echo "Mapping UID and GID for paperless:paperless to $USERMAP_NEW_UID:$USERMAP_NEW_GID"
|
||||
usermod -u "${USERMAP_NEW_UID}" paperless
|
||||
groupmod -o -g "${USERMAP_NEW_GID}" paperless
|
||||
fi
|
||||
}
|
||||
|
||||
migrations() {
|
||||
|
||||
(
|
||||
# flock is in place to prevent multiple containers from doing migrations
|
||||
# simultaneously. This also ensures that the db is ready when the command
|
||||
# of the current container starts.
|
||||
flock 200
|
||||
sudo -HEu paperless python3 manage.py migrate
|
||||
) 200>/usr/src/paperless/data/migration_lock
|
||||
|
||||
}
|
||||
|
||||
initialize() {
|
||||
map_uidgid
|
||||
|
||||
for dir in export data data/index media media/documents media/documents/originals media/documents/thumbnails; do
|
||||
if [[ ! -d "../$dir" ]]
|
||||
then
|
||||
echo "creating directory ../$dir"
|
||||
mkdir ../$dir
|
||||
fi
|
||||
done
|
||||
|
||||
chown -R paperless:paperless ../
|
||||
|
||||
migrations
|
||||
|
||||
}
|
||||
|
||||
install_languages() {
|
||||
local langs="$1"
|
||||
read -ra langs <<<"$langs"
|
||||
|
||||
# Check that it is not empty
|
||||
if [ ${#langs[@]} -eq 0 ]; then
|
||||
return
|
||||
fi
|
||||
apt-get update
|
||||
|
||||
for lang in "${langs[@]}"; do
|
||||
pkg="tesseract-ocr-$lang"
|
||||
# English is installed by default
|
||||
#if [[ "$lang" == "eng" ]]; then
|
||||
# continue
|
||||
#fi
|
||||
|
||||
if dpkg -s $pkg &> /dev/null; then
|
||||
echo "package $pkg already installed!"
|
||||
continue
|
||||
fi
|
||||
|
||||
if ! apt-cache show $pkg &> /dev/null; then
|
||||
echo "package $pkg not found! :("
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "Installing package $pkg..."
|
||||
if ! apt-get -y install "$pkg" &> /dev/null; then
|
||||
echo "Could not install $pkg"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
initialize
|
||||
|
||||
# Install additional languages if specified
|
||||
if [[ ! -z "$PAPERLESS_OCR_LANGUAGES" ]]; then
|
||||
install_languages "$PAPERLESS_OCR_LANGUAGES"
|
||||
fi
|
||||
|
||||
if [[ "$1" != "/"* ]]; then
|
||||
exec sudo -HEu paperless python3 manage.py "$@"
|
||||
else
|
||||
exec "$@"
|
||||
fi
|
||||
|
48
docker/gunicorn.conf.py
Normal file
48
docker/gunicorn.conf.py
Normal file
@@ -0,0 +1,48 @@
|
||||
bind = '127.0.0.1:8000'
|
||||
backlog = 2048
|
||||
workers = 3
|
||||
worker_class = 'sync'
|
||||
worker_connections = 1000
|
||||
timeout = 20
|
||||
keepalive = 2
|
||||
spew = False
|
||||
daemon = False
|
||||
pidfile = None
|
||||
umask = 0
|
||||
user = None
|
||||
group = None
|
||||
tmp_upload_dir = None
|
||||
loglevel = 'info'
|
||||
errorlog = '-'
|
||||
accesslog = '-'
|
||||
proc_name = None
|
||||
|
||||
def pre_fork(server, worker):
|
||||
pass
|
||||
|
||||
def pre_exec(server):
|
||||
server.log.info("Forked child, re-executing.")
|
||||
|
||||
def when_ready(server):
|
||||
server.log.info("Server is ready. Spawning workers")
|
||||
|
||||
def worker_int(worker):
|
||||
worker.log.info("worker received INT or QUIT signal")
|
||||
|
||||
## get traceback info
|
||||
import threading, sys, traceback
|
||||
id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
|
||||
code = []
|
||||
for threadId, stack in sys._current_frames().items():
|
||||
code.append("\n# Thread: %s(%d)" % (id2name.get(threadId,""),
|
||||
threadId))
|
||||
for filename, lineno, name, line in traceback.extract_stack(stack):
|
||||
code.append('File: "%s", line %d, in %s' % (filename,
|
||||
lineno, name))
|
||||
if line:
|
||||
code.append(" %s" % (line.strip()))
|
||||
worker.log.debug("\n".join(code))
|
||||
|
||||
def worker_abort(worker):
|
||||
worker.log.info("worker received SIGABRT signal")
|
||||
|
44
docker/hub/docker-compose.postgres.yml
Normal file
44
docker/hub/docker-compose.postgres.yml
Normal file
@@ -0,0 +1,44 @@
|
||||
version: "3.4"
|
||||
services:
|
||||
broker:
|
||||
image: redis:6.0
|
||||
restart: always
|
||||
|
||||
db:
|
||||
image: postgres:13
|
||||
restart: always
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
environment:
|
||||
POSTGRES_DB: paperless
|
||||
POSTGRES_USER: paperless
|
||||
POSTGRES_PASSWORD: paperless
|
||||
|
||||
webserver:
|
||||
image: jonaswinkler/paperless-ng:0.9.1
|
||||
restart: always
|
||||
depends_on:
|
||||
- db
|
||||
- broker
|
||||
ports:
|
||||
- 8000:8000
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
volumes:
|
||||
- data:/usr/src/paperless/data
|
||||
- media:/usr/src/paperless/media
|
||||
- ./export:/usr/src/paperless/export
|
||||
- ./consume:/usr/src/paperless/consume
|
||||
env_file: docker-compose.env
|
||||
environment:
|
||||
PAPERLESS_REDIS: redis://broker:6379
|
||||
PAPERLESS_DBHOST: db
|
||||
|
||||
|
||||
volumes:
|
||||
data:
|
||||
media:
|
||||
pgdata:
|
31
docker/hub/docker-compose.sqlite.yml
Normal file
31
docker/hub/docker-compose.sqlite.yml
Normal file
@@ -0,0 +1,31 @@
|
||||
version: "3.4"
|
||||
services:
|
||||
broker:
|
||||
image: redis:6.0
|
||||
restart: always
|
||||
|
||||
webserver:
|
||||
image: jonaswinkler/paperless-ng:0.9.1
|
||||
restart: always
|
||||
depends_on:
|
||||
- broker
|
||||
ports:
|
||||
- 8000:8000
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
volumes:
|
||||
- data:/usr/src/paperless/data
|
||||
- media:/usr/src/paperless/media
|
||||
- ./export:/usr/src/paperless/export
|
||||
- ./consume:/usr/src/paperless/consume
|
||||
env_file: docker-compose.env
|
||||
environment:
|
||||
PAPERLESS_REDIS: redis://broker:6379
|
||||
|
||||
|
||||
volumes:
|
||||
data:
|
||||
media:
|
96
docker/imagemagick-policy.xml
Normal file
96
docker/imagemagick-policy.xml
Normal file
@@ -0,0 +1,96 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE policymap [
|
||||
<!ELEMENT policymap (policy)+>
|
||||
<!ATTLIST policymap xmlns CDATA #FIXED ''>
|
||||
<!ELEMENT policy EMPTY>
|
||||
<!ATTLIST policy xmlns CDATA #FIXED '' domain NMTOKEN #REQUIRED
|
||||
name NMTOKEN #IMPLIED pattern CDATA #IMPLIED rights NMTOKEN #IMPLIED
|
||||
stealth NMTOKEN #IMPLIED value CDATA #IMPLIED>
|
||||
]>
|
||||
<!--
|
||||
Configure ImageMagick policies.
|
||||
|
||||
Domains include system, delegate, coder, filter, path, or resource.
|
||||
|
||||
Rights include none, read, write, execute and all. Use | to combine them,
|
||||
for example: "read | write" to permit read from, or write to, a path.
|
||||
|
||||
Use a glob expression as a pattern.
|
||||
|
||||
Suppose we do not want users to process MPEG video images:
|
||||
|
||||
<policy domain="delegate" rights="none" pattern="mpeg:decode" />
|
||||
|
||||
Here we do not want users reading images from HTTP:
|
||||
|
||||
<policy domain="coder" rights="none" pattern="HTTP" />
|
||||
|
||||
The /repository file system is restricted to read only. We use a glob
|
||||
expression to match all paths that start with /repository:
|
||||
|
||||
<policy domain="path" rights="read" pattern="/repository/*" />
|
||||
|
||||
Lets prevent users from executing any image filters:
|
||||
|
||||
<policy domain="filter" rights="none" pattern="*" />
|
||||
|
||||
Any large image is cached to disk rather than memory:
|
||||
|
||||
<policy domain="resource" name="area" value="1GP"/>
|
||||
|
||||
Define arguments for the memory, map, area, width, height and disk resources
|
||||
with SI prefixes (.e.g 100MB). In addition, resource policies are maximums
|
||||
for each instance of ImageMagick (e.g. policy memory limit 1GB, -limit 2GB
|
||||
exceeds policy maximum so memory limit is 1GB).
|
||||
|
||||
Rules are processed in order. Here we want to restrict ImageMagick to only
|
||||
read or write a small subset of proven web-safe image types:
|
||||
|
||||
<policy domain="delegate" rights="none" pattern="*" />
|
||||
<policy domain="filter" rights="none" pattern="*" />
|
||||
<policy domain="coder" rights="none" pattern="*" />
|
||||
<policy domain="coder" rights="read|write" pattern="{GIF,JPEG,PNG,WEBP}" />
|
||||
-->
|
||||
<policymap>
|
||||
<!-- <policy domain="system" name="shred" value="2"/> -->
|
||||
<!-- <policy domain="system" name="precision" value="6"/> -->
|
||||
<!-- <policy domain="system" name="memory-map" value="anonymous"/> -->
|
||||
<!-- <policy domain="system" name="max-memory-request" value="256MiB"/> -->
|
||||
<!-- <policy domain="resource" name="temporary-path" value="/tmp"/> -->
|
||||
<policy domain="resource" name="memory" value="256MiB"/>
|
||||
<policy domain="resource" name="map" value="512MiB"/>
|
||||
<policy domain="resource" name="width" value="16KP"/>
|
||||
<policy domain="resource" name="height" value="16KP"/>
|
||||
<!-- <policy domain="resource" name="list-length" value="128"/> -->
|
||||
<policy domain="resource" name="area" value="128MB"/>
|
||||
<policy domain="resource" name="disk" value="1GiB"/>
|
||||
<!-- <policy domain="resource" name="file" value="768"/> -->
|
||||
<!-- <policy domain="resource" name="thread" value="4"/> -->
|
||||
<!-- <policy domain="resource" name="throttle" value="0"/> -->
|
||||
<!-- <policy domain="resource" name="time" value="3600"/> -->
|
||||
<!-- <policy domain="coder" rights="none" pattern="MVG" /> -->
|
||||
<!-- <policy domain="module" rights="none" pattern="{PS,PDF,XPS}" /> -->
|
||||
<!-- <policy domain="delegate" rights="none" pattern="HTTPS" /> -->
|
||||
<!-- <policy domain="path" rights="none" pattern="@*" /> -->
|
||||
<!-- <policy domain="cache" name="memory-map" value="anonymous"/> -->
|
||||
<!-- <policy domain="cache" name="synchronize" value="True"/> -->
|
||||
<!-- <policy domain="cache" name="shared-secret" value="passphrase" stealth="true"/> -->
|
||||
<!-- <policy domain="system" name="pixel-cache-memory" value="anonymous"/> -->
|
||||
<!-- <policy domain="system" name="shred" value="2"/> -->
|
||||
<!-- <policy domain="system" name="precision" value="6"/> -->
|
||||
<!-- not needed due to the need to use explicitly by mvg: -->
|
||||
<!-- <policy domain="delegate" rights="none" pattern="MVG" /> -->
|
||||
<!-- use curl -->
|
||||
<policy domain="delegate" rights="none" pattern="URL" />
|
||||
<policy domain="delegate" rights="none" pattern="HTTPS" />
|
||||
<policy domain="delegate" rights="none" pattern="HTTP" />
|
||||
<!-- in order to avoid to get image with password text -->
|
||||
<policy domain="path" rights="none" pattern="@*"/>
|
||||
<!-- disable ghostscript format types -->
|
||||
<policy domain="coder" rights="none" pattern="PS" />
|
||||
<policy domain="coder" rights="none" pattern="PS2" />
|
||||
<policy domain="coder" rights="none" pattern="PS3" />
|
||||
<policy domain="coder" rights="none" pattern="EPS" />
|
||||
<policy domain="coder" rights="read|write" pattern="PDF" />
|
||||
<policy domain="coder" rights="none" pattern="XPS" />
|
||||
</policymap>
|
60
docker/local/Dockerfile
Normal file
60
docker/local/Dockerfile
Normal file
@@ -0,0 +1,60 @@
|
||||
FROM python:3.7-slim
|
||||
|
||||
WORKDIR /usr/src/paperless/
|
||||
|
||||
COPY requirements.txt ./
|
||||
|
||||
#Dependencies
|
||||
RUN apt-get update \
|
||||
&& apt-get -y --no-install-recommends install \
|
||||
build-essential \
|
||||
curl \
|
||||
ghostscript \
|
||||
gnupg \
|
||||
imagemagick \
|
||||
libatlas-base-dev \
|
||||
libmagic-dev \
|
||||
libpoppler-cpp-dev \
|
||||
libpq-dev \
|
||||
optipng \
|
||||
sudo \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-eng \
|
||||
tesseract-ocr-deu \
|
||||
tesseract-ocr-fra \
|
||||
tesseract-ocr-ita \
|
||||
tesseract-ocr-spa \
|
||||
tzdata \
|
||||
unpaper \
|
||||
&& pip3 install --upgrade supervisor setuptools \
|
||||
&& pip install --no-cache-dir -r requirements.txt \
|
||||
&& apt-get -y purge build-essential \
|
||||
&& apt-get -y autoremove --purge \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& mkdir /var/log/supervisord /var/run/supervisord
|
||||
|
||||
# copy scripts
|
||||
# this fixes issues with imagemagick and PDF
|
||||
COPY docker/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
|
||||
COPY docker/gunicorn.conf.py ./
|
||||
COPY docker/supervisord.conf /etc/supervisord.conf
|
||||
COPY docker/docker-entrypoint.sh /sbin/docker-entrypoint.sh
|
||||
|
||||
# copy app
|
||||
COPY src/ ./src/
|
||||
|
||||
# add users, setup scripts
|
||||
RUN addgroup --gid 1000 paperless \
|
||||
&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \
|
||||
&& chown -R paperless:paperless . \
|
||||
&& chmod 755 /sbin/docker-entrypoint.sh
|
||||
|
||||
WORKDIR /usr/src/paperless/src/
|
||||
|
||||
RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input
|
||||
|
||||
VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", "/usr/src/paperless/consume", "/usr/src/paperless/export"]
|
||||
ENTRYPOINT ["/sbin/docker-entrypoint.sh"]
|
||||
CMD ["/usr/local/bin/supervisord", "-c", "/etc/supervisord.conf"]
|
||||
|
||||
LABEL maintainer="Jonas Winkler <dev@jpwinkler.de>"
|
44
docker/local/docker-compose.postgres.yml
Normal file
44
docker/local/docker-compose.postgres.yml
Normal file
@@ -0,0 +1,44 @@
|
||||
version: "3.4"
|
||||
services:
|
||||
broker:
|
||||
image: redis:6.0
|
||||
restart: always
|
||||
|
||||
db:
|
||||
image: postgres:13
|
||||
restart: always
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
environment:
|
||||
POSTGRES_DB: paperless
|
||||
POSTGRES_USER: paperless
|
||||
POSTGRES_PASSWORD: paperless
|
||||
|
||||
webserver:
|
||||
build: .
|
||||
restart: always
|
||||
depends_on:
|
||||
- db
|
||||
- broker
|
||||
ports:
|
||||
- 8000:8000
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
volumes:
|
||||
- data:/usr/src/paperless/data
|
||||
- media:/usr/src/paperless/media
|
||||
- ./export:/usr/src/paperless/export
|
||||
- ./consume:/usr/src/paperless/consume
|
||||
env_file: docker-compose.env
|
||||
environment:
|
||||
PAPERLESS_REDIS: redis://broker:6379
|
||||
PAPERLESS_DBHOST: db
|
||||
|
||||
|
||||
volumes:
|
||||
data:
|
||||
media:
|
||||
pgdata:
|
31
docker/local/docker-compose.sqlite.yml
Normal file
31
docker/local/docker-compose.sqlite.yml
Normal file
@@ -0,0 +1,31 @@
|
||||
version: "3.4"
|
||||
services:
|
||||
broker:
|
||||
image: redis:6.0
|
||||
restart: always
|
||||
|
||||
webserver:
|
||||
build: .
|
||||
restart: always
|
||||
depends_on:
|
||||
- broker
|
||||
ports:
|
||||
- 8000:8000
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
volumes:
|
||||
- data:/usr/src/paperless/data
|
||||
- media:/usr/src/paperless/media
|
||||
- ./export:/usr/src/paperless/export
|
||||
- ./consume:/usr/src/paperless/consume
|
||||
env_file: docker-compose.env
|
||||
environment:
|
||||
PAPERLESS_REDIS: redis://broker:6379
|
||||
|
||||
|
||||
volumes:
|
||||
data:
|
||||
media:
|
35
docker/supervisord.conf
Normal file
35
docker/supervisord.conf
Normal file
@@ -0,0 +1,35 @@
|
||||
[supervisord]
|
||||
nodaemon=true ; start in foreground if true; default false
|
||||
logfile=/var/log/supervisord/supervisord.log ; main log file; default $CWD/supervisord.log
|
||||
pidfile=/var/run/supervisord/supervisord.pid ; supervisord pidfile; default supervisord.pid
|
||||
logfile_maxbytes=50MB ; max main logfile bytes b4 rotation; default 50MB
|
||||
logfile_backups=10 ; # of main logfile backups; 0 means none, default 10
|
||||
loglevel=info ; log level; default info; others: debug,warn,trace
|
||||
user=root
|
||||
|
||||
[program:daphne]
|
||||
command=daphne -b 0.0.0.0 -p 8000 paperless.asgi:application
|
||||
user=paperless
|
||||
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:consumer]
|
||||
command=python3 manage.py document_consumer
|
||||
user=paperless
|
||||
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:scheduler]
|
||||
command=python3 manage.py qcluster
|
||||
user=paperless
|
||||
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
Reference in New Issue
Block a user