paperless-ngx/Dockerfile
2023-07-27 12:30:41 -07:00

268 lines
8.4 KiB
Docker

# syntax=docker/dockerfile:1
# https://github.com/moby/buildkit/blob/master/frontend/dockerfile/docs/reference.md
# Stage: compile-frontend
# Purpose: Compiles the frontend
# Notes:
# - Does NPM stuff with Typescript and such
FROM --platform=$BUILDPLATFORM docker.io/node:16-bookworm-slim AS compile-frontend
COPY ./src-ui /src/src-ui
WORKDIR /src/src-ui
RUN set -eux \
&& npm update npm -g \
&& npm ci --omit=optional
RUN set -eux \
&& ./node_modules/.bin/ng build --configuration production
# Stage: pipenv-base
# Purpose: Generates a requirements.txt file for building
# Comments:
# - pipenv dependencies are not left in the final image
# - pipenv can't touch the final image somehow
FROM --platform=$BUILDPLATFORM docker.io/python:3.9-alpine as pipenv-base
WORKDIR /usr/src/pipenv
COPY Pipfile* ./
RUN set -eux \
&& echo "Installing pipenv" \
&& python3 -m pip install --no-cache-dir --upgrade pipenv==2023.7.23 \
&& echo "Generating requirement.txt" \
&& pipenv requirements > requirements.txt
# Stage: main-app
# Purpose: The final image
# Comments:
# - Don't leave anything extra in here
FROM docker.io/python:3.9-slim-bookworm as main-app
LABEL org.opencontainers.image.authors="paperless-ngx team <hello@paperless-ngx.com>"
LABEL org.opencontainers.image.documentation="https://docs.paperless-ngx.com/"
LABEL org.opencontainers.image.source="https://github.com/paperless-ngx/paperless-ngx"
LABEL org.opencontainers.image.url="https://github.com/paperless-ngx/paperless-ngx"
LABEL org.opencontainers.image.licenses="GPL-3.0-only"
ARG DEBIAN_FRONTEND=noninteractive
#
# Begin installation and configuration
# Order the steps below from least often changed to most
#
# Packages need for running
ARG RUNTIME_PACKAGES="\
# General utils
curl \
# Docker specific
gosu \
# Timezones support
tzdata \
# fonts for text file thumbnail generation
fonts-liberation \
gettext \
ghostscript \
gnupg \
icc-profiles-free \
imagemagick \
# Image processing
liblept5 \
liblcms2-2 \
libtiff6 \
libfreetype6 \
libwebp7 \
libopenjp2-7 \
libimagequant0 \
libraqm0 \
libjpeg62-turbo \
# PostgreSQL
libpq5 \
postgresql-client \
# MySQL / MariaDB
mariadb-client \
# For Numpy
libatlas3-base \
# OCRmyPDF dependencies
tesseract-ocr \
tesseract-ocr-eng \
tesseract-ocr-deu \
tesseract-ocr-fra \
tesseract-ocr-ita \
tesseract-ocr-spa \
unpaper \
pngquant \
# pikepdf / qpdf
jbig2dec \
libxml2 \
libxslt1.1 \
libgnutls30 \
libqpdf29 \
qpdf \
# Mime type detection
file \
libmagic1 \
media-types \
zlib1g \
# Barcode splitter
libzbar0 \
poppler-utils \
# RapidFuzz on armv7
libatomic1"
# Install basic runtime packages.
# These change very infrequently
RUN set -eux \
echo "Installing system packages" \
&& apt-get update \
&& apt-get install --yes --quiet --no-install-recommends ${RUNTIME_PACKAGES} \
&& rm -rf /var/lib/apt/lists/* \
&& echo "Installing supervisor" \
&& python3 -m pip install --default-timeout=1000 --upgrade --no-cache-dir supervisor==4.2.5
# Copy gunicorn config
# Changes very infrequently
WORKDIR /usr/src/paperless/
COPY gunicorn.conf.py .
# setup docker-specific things
# These change sometimes, but rarely
WORKDIR /usr/src/paperless/src/docker/
COPY [ \
"docker/imagemagick-policy.xml", \
"docker/supervisord.conf", \
"docker/docker-entrypoint.sh", \
"docker/docker-prepare.sh", \
"docker/paperless_cmd.sh", \
"docker/wait-for-redis.py", \
"docker/env-from-file.sh", \
"docker/management_script.sh", \
"docker/flower-conditional.sh", \
"docker/install_management_commands.sh", \
"/usr/src/paperless/src/docker/" \
]
RUN set -eux \
&& echo "Configuring ImageMagick" \
&& mv imagemagick-policy.xml /etc/ImageMagick-6/policy.xml \
&& echo "Configuring supervisord" \
&& mkdir /var/log/supervisord /var/run/supervisord \
&& mv supervisord.conf /etc/supervisord.conf \
&& echo "Setting up Docker scripts" \
&& mv docker-entrypoint.sh /sbin/docker-entrypoint.sh \
&& chmod 755 /sbin/docker-entrypoint.sh \
&& mv docker-prepare.sh /sbin/docker-prepare.sh \
&& chmod 755 /sbin/docker-prepare.sh \
&& mv wait-for-redis.py /sbin/wait-for-redis.py \
&& chmod 755 /sbin/wait-for-redis.py \
&& mv env-from-file.sh /sbin/env-from-file.sh \
&& chmod 755 /sbin/env-from-file.sh \
&& mv paperless_cmd.sh /usr/local/bin/paperless_cmd.sh \
&& chmod 755 /usr/local/bin/paperless_cmd.sh \
&& mv flower-conditional.sh /usr/local/bin/flower-conditional.sh \
&& chmod 755 /usr/local/bin/flower-conditional.sh \
&& echo "Installing managment commands" \
&& chmod +x install_management_commands.sh \
&& ./install_management_commands.sh
# Buildx provided, must be defined to use though
ARG TARGETARCH
ARG TARGETVARIANT
# Can be workflow provided, defaults set for manual building
ARG JBIG2ENC_VERSION=0.29
ARG QPDF_VERSION=11.3.0
ARG PIKEPDF_VERSION=7.2.0
ARG PSYCOPG2_VERSION=2.9.6
# Install the built packages from the installer library images
# These change sometimes
RUN set -eux \
&& echo "Getting binaries" \
&& mkdir paperless-ngx \
&& curl --fail --silent --show-error --output paperless-ngx.tar.gz --location https://github.com/paperless-ngx/builder/archive/58bb061b9b3b63009852d6d875f9a305d9ae6ac9.tar.gz \
&& tar -xf paperless-ngx.tar.gz --directory paperless-ngx --strip-components=1 \
&& cd paperless-ngx \
# Setting a specific revision ensures we know what this installed
# and ensures cache breaking on changes
&& echo "Installing jbig2enc" \
&& cp ./jbig2enc/${JBIG2ENC_VERSION}/${TARGETARCH}${TARGETVARIANT}/jbig2 /usr/local/bin/ \
&& cp ./jbig2enc/${JBIG2ENC_VERSION}/${TARGETARCH}${TARGETVARIANT}/libjbig2enc* /usr/local/lib/ \
&& chmod a+x /usr/local/bin/jbig2 \
&& echo "Installing pikepdf and dependencies" \
&& python3 -m pip install --no-cache-dir ./pikepdf/${PIKEPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/*.whl \
&& python3 -m pip list \
&& echo "Installing psycopg2" \
&& python3 -m pip install --no-cache-dir ./psycopg2/${PSYCOPG2_VERSION}/${TARGETARCH}${TARGETVARIANT}/psycopg2*.whl \
&& python3 -m pip list \
&& echo "Cleaning up image layer" \
&& cd ../ \
&& rm -rf paperless-ngx \
&& rm paperless-ngx.tar.gz
WORKDIR /usr/src/paperless/src/
# Python dependencies
# Change pretty frequently
COPY --from=pipenv-base /usr/src/pipenv/requirements.txt ./
# Packages needed only for building a few quick Python
# dependencies
ARG BUILD_PACKAGES="\
build-essential \
git \
default-libmysqlclient-dev \
pkg-config"
# hadolint ignore=DL3042
RUN --mount=type=cache,target=/root/.cache/pip/,id=pip-cache \
set -eux \
&& echo "Installing build system packages" \
&& apt-get update \
&& apt-get install --yes --quiet --no-install-recommends ${BUILD_PACKAGES} \
&& python3 -m pip install --no-cache-dir --upgrade wheel \
&& echo "Installing Python requirements" \
&& python3 -m pip install --default-timeout=1000 --requirement requirements.txt \
&& echo "Installing NLTK data" \
&& python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" snowball_data \
&& python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" stopwords \
&& python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" punkt \
&& echo "Cleaning up image" \
&& apt-get -y purge ${BUILD_PACKAGES} \
&& apt-get -y autoremove --purge \
&& apt-get clean --yes \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/* \
&& rm -rf /var/tmp/* \
&& rm -rf /var/cache/apt/archives/* \
&& truncate -s 0 /var/log/*log
# copy backend
COPY ./src ./
# copy frontend
COPY --from=compile-frontend /src/src/documents/static/frontend/ ./documents/static/frontend/
# add users, setup scripts
# Mount the compiled frontend to expected location
RUN set -eux \
&& addgroup --gid 1000 paperless \
&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \
&& chown -R paperless:paperless /usr/src/paperless \
&& gosu paperless python3 manage.py collectstatic --clear --no-input --link \
&& gosu paperless python3 manage.py compilemessages
VOLUME ["/usr/src/paperless/data", \
"/usr/src/paperless/media", \
"/usr/src/paperless/consume", \
"/usr/src/paperless/export"]
ENTRYPOINT ["/sbin/docker-entrypoint.sh"]
EXPOSE 8000
CMD ["/usr/local/bin/paperless_cmd.sh"]