diff --git a/.build-config.json b/.build-config.json new file mode 100644 index 000000000..32cf968d5 --- /dev/null +++ b/.build-config.json @@ -0,0 +1,9 @@ +{ + "qpdf": { + "version": "10.6.3" + }, + "jbig2enc": { + "version": "0.29", + "git_tag": "0.29" + } +} diff --git a/.editorconfig b/.editorconfig index 125108a0c..8111f01d8 100644 --- a/.editorconfig +++ b/.editorconfig @@ -33,5 +33,5 @@ indent_style = space [**/test_*.py] max_line_length = off -[Dockerfile] +[Dockerfile*] indent_style = space diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 10b81bf79..34bcf21a2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,73 +45,155 @@ jobs: name: documentation path: docs/_build/html/ - code-checks-backend: - name: "Backend Code Checks" + ci-backend: + uses: ./.github/workflows/reusable-ci-backend.yml + + ci-frontend: + uses: ./.github/workflows/reusable-ci-frontend.yml + + prepare-docker-build: + name: Prepare Docker Pipeline Data + if: github.event_name == 'push' && (startsWith(github.ref, 'refs/heads/feature-') || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/beta' || startsWith(github.ref, 'refs/tags/ngx-') || startsWith(github.ref, 'refs/tags/beta-')) runs-on: ubuntu-20.04 + needs: + - documentation + - ci-backend + - ci-frontend steps: - name: Checkout uses: actions/checkout@v3 - - name: Install checkers - run: | - pipx install reorder-python-imports - pipx install yesqa - pipx install add-trailing-comma - pipx install flake8 - - - name: Run reorder-python-imports - run: | - find src/ -type f -name '*.py' ! -path "*/migrations/*" | xargs reorder-python-imports - - - name: Run yesqa - run: | - find src/ -type f -name '*.py' ! -path "*/migrations/*" | xargs yesqa - - - name: Run add-trailing-comma - run: | - find src/ -type f -name '*.py' ! -path "*/migrations/*" | xargs add-trailing-comma - # black is placed after add-trailing-comma because it may format differently - # if a trailing comma is added - - - name: Run black - uses: psf/black@stable + name: Login to Github Container Registry + uses: docker/login-action@v1 with: - options: "--check --diff" - version: "22.3.0" + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - - name: Run flake8 checks - run: | - cd src/ - flake8 --max-line-length=88 --ignore=E203,W503 - - code-checks-frontend: - name: "Frontend Code Checks" - runs-on: ubuntu-20.04 - steps: - - - name: Checkout - uses: actions/checkout@v3 - - uses: actions/setup-node@v3 + name: Set up Python + uses: actions/setup-python@v3 with: - node-version: '16' + python-version: "3.9" - - name: Install prettier + name: Make script executable run: | - npm install prettier + chmod +x ${GITHUB_WORKSPACE}/docker-builders/get-build-json.py - - name: Run prettier - run: - npx prettier --check --ignore-path Pipfile.lock **/*.js **/*.ts *.md **/*.md + name: Setup qpdf image + id: qpdf-setup + run: | + build_json=$(python ${GITHUB_WORKSPACE}/docker-builders/get-build-json.py qpdf) - tests-backend: - needs: [code-checks-backend] - name: "Backend Tests (${{ matrix.python-version }})" - runs-on: ubuntu-20.04 - strategy: - matrix: - python-version: ['3.8', '3.9', '3.10'] - fail-fast: false + echo ${build_json} + + echo ::set-output name=qpdf-json::${build_json} + - + name: Setup psycopg2 image + id: psycopg2-setup + run: | + build_json=$(python ${GITHUB_WORKSPACE}/docker-builders/get-build-json.py psycopg2) + + echo ${build_json} + + echo ::set-output name=psycopg2-json::${build_json} + - + name: Setup pikepdf image + id: pikepdf-setup + run: | + build_json=$(python ${GITHUB_WORKSPACE}/docker-builders/get-build-json.py pikepdf) + + echo ${build_json} + + echo ::set-output name=pikepdf-json::${build_json} + - + name: Setup jbig2enc image + id: jbig2enc-setup + run: | + build_json=$(python ${GITHUB_WORKSPACE}/docker-builders/get-build-json.py jbig2enc) + + echo ${build_json} + + echo ::set-output name=jbig2enc-json::${build_json} + - + name: Setup frontend image + id: frontend-setup + run: | + build_json=$(python ${GITHUB_WORKSPACE}/docker-builders/get-build-json.py frontend) + + echo ${build_json} + + echo ::set-output name=frontend-json::${build_json} + + outputs: + + qpdf-json: ${{ steps.qpdf-setup.outputs.qpdf-json }} + + pikepdf-json: ${{ steps.pikepdf-setup.outputs.pikepdf-json }} + + psycopg2-json: ${{ steps.psycopg2-setup.outputs.psycopg2-json }} + + jbig2enc-json: ${{ steps.jbig2enc-setup.outputs.jbig2enc-json}} + + frontend-json: ${{ steps.frontend-setup.outputs.frontend-json}} + + build-qpdf-debs: + name: qpdf + needs: + - prepare-docker-build + uses: ./.github/workflows/reusable-workflow-builder.yml + with: + dockerfile: ./docker-builders/Dockerfile.qpdf + build-json: ${{ needs.prepare-docker-build.outputs.qpdf-json }} + build-args: | + QPDF_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.qpdf-json).version }} + + build-jbig2enc: + name: jbig2enc + needs: + - prepare-docker-build + uses: ./.github/workflows/reusable-workflow-builder.yml + with: + dockerfile: ./docker-builders/Dockerfile.jbig2enc + build-json: ${{ needs.prepare-docker-build.outputs.jbig2enc-json }} + build-args: | + JBIG2ENC_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.jbig2enc-json).version }} + + build-psycopg2-wheel: + name: psycopg2 + needs: + - prepare-docker-build + uses: ./.github/workflows/reusable-workflow-builder.yml + with: + dockerfile: ./docker-builders/Dockerfile.psycopg2 + build-json: ${{ needs.prepare-docker-build.outputs.psycopg2-json }} + build-args: | + PSYCOPG2_GIT_TAG=${{ fromJSON(needs.prepare-docker-build.outputs.psycopg2-json).git_tag }} + PSYCOPG2_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.psycopg2-json).version }} + + build-pikepdf-wheel: + name: pikepdf + needs: + - prepare-docker-build + - build-qpdf-debs + uses: ./.github/workflows/reusable-workflow-builder.yml + with: + dockerfile: ./docker-builders/Dockerfile.pikepdf + build-json: ${{ needs.prepare-docker-build.outputs.pikepdf-json }} + build-args: | + REPO=${{ github.repository }} + QPDF_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.qpdf-json).version }} + PIKEPDF_GIT_TAG=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).git_tag }} + PIKEPDF_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).version }} + + build-frontend: + name: Compile frontend + concurrency: + group: ${{ github.workflow }}-build-frontend-${{ github.ref_name }} + cancel-in-progress: false + needs: + - prepare-docker-build + runs-on: ubuntu-latest steps: - name: Checkout @@ -119,77 +201,82 @@ jobs: with: fetch-depth: 2 - - name: Install pipenv - run: pipx install pipenv - - - name: Set up Python - uses: actions/setup-python@v3 - with: - python-version: "${{ matrix.python-version }}" - cache: "pipenv" - cache-dependency-path: 'Pipfile.lock' - - - name: Install system dependencies - run: | - sudo apt-get update -qq - sudo apt-get install -qq --no-install-recommends unpaper tesseract-ocr imagemagick ghostscript optipng libzbar0 poppler-utils - - - name: Install Python dependencies - run: | - pipenv sync --dev - - - name: Tests - run: | - cd src/ - pipenv run pytest - - - name: Get changed files + name: Get changed frontend files id: changed-files-specific uses: tj-actions/changed-files@v18.1 with: files: | - src/** + src-ui/** - - name: List all changed files - run: | - for file in ${{ steps.changed-files-specific.outputs.all_changed_files }}; do - echo "${file} was changed" - done - - - name: Publish coverage results - if: matrix.python-version == '3.9' && steps.changed-files-specific.outputs.any_changed == 'true' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # https://github.com/coveralls-clients/coveralls-python/issues/251 - run: | - cd src/ - pipenv run coveralls --service=github - - tests-frontend: - needs: [code-checks-frontend] - name: "Frontend Tests" - runs-on: ubuntu-20.04 - strategy: - matrix: - node-version: [16.x] - steps: - - uses: actions/checkout@v3 - - name: Use Node.js ${{ matrix.node-version }} - uses: actions/setup-node@v3 + name: Login to Github Container Registry + uses: docker/login-action@v1 with: - node-version: ${{ matrix.node-version }} - - run: cd src-ui && npm ci - - run: cd src-ui && npm run test - - run: cd src-ui && npm run e2e:ci + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - + name: Determine if build needed + id: build-skip-check + # Skip building the frontend if the tag exists and no src-ui files changed + run: | + if ! docker manifest inspect ${{ fromJSON(needs.prepare-docker-build.outputs.frontend-json).image_tag }} &> /dev/null ; then + echo "Build required, no existing image" + echo ::set-output name=frontend-build-needed::true + elif ${{ steps.changed-files-specific.outputs.any_changed }} == 'true' ; then + echo "Build required, src-ui changes" + echo ::set-output name=frontend-build-needed::true + else + echo "No build required" + echo ::set-output name=frontend-build-needed::false + fi + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + if: ${{ steps.build-skip-check.outputs.frontend-build-needed == 'true' }} + - + name: Set up QEMU + uses: docker/setup-qemu-action@v1 + if: ${{ steps.build-skip-check.outputs.frontend-build-needed == 'true' }} + - + name: Compile frontend + uses: docker/build-push-action@v2 + if: ${{ steps.build-skip-check.outputs.frontend-build-needed == 'true' }} + with: + context: . + file: ./docker-builders/Dockerfile.frontend + tags: ${{ fromJSON(needs.prepare-docker-build.outputs.frontend-json).image_tag }} + # The compilation is identical between different platforms + # The buildx and QEMU setup is left, just in case that ever changes + # But the platform is set to the runner's native for speedup + platforms: linux/amd64 + push: true + cache-from: type=gha + cache-to: type=gha,mode=max + - + name: Export frontend artifact from docker + run: | + docker create --name frontend-extract ${{ fromJSON(needs.prepare-docker-build.outputs.frontend-json).image_tag }} + docker cp frontend-extract:/src/src/documents/static/frontend src/documents/static/frontend/ + - + name: Upload frontend artifact + uses: actions/upload-artifact@v3 + with: + name: frontend-compiled + path: src/documents/static/frontend/ # build and push image to docker hub. build-docker-image: - if: github.event_name == 'push' && (startsWith(github.ref, 'refs/heads/feature-') || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/beta' || startsWith(github.ref, 'refs/tags/ngx-') || startsWith(github.ref, 'refs/tags/beta-')) - concurrency: - group: ${{ github.workflow }}-build-docker-image-${{ github.ref }} - cancel-in-progress: true runs-on: ubuntu-20.04 - needs: [tests-backend, tests-frontend] + concurrency: + group: ${{ github.workflow }}-build-docker-image-${{ github.ref_name }} + cancel-in-progress: true + needs: + - prepare-docker-build + - build-psycopg2-wheel + - build-jbig2enc + - build-qpdf-debs + - build-pikepdf-wheel + - build-frontend steps: - name: Gather Docker metadata @@ -226,26 +313,23 @@ jobs: push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.docker-meta.outputs.tags }} labels: ${{ steps.docker-meta.outputs.labels }} + build-args: | + REPO=${{ github.repository }} + JBIG2ENC_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.jbig2enc-json).version }} + QPDF_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.qpdf-json).version }} + PIKEPDF_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).version }} + PSYCOPG2_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.psycopg2-json).version }} + FRONTEND_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.frontend-json).version }} cache-from: type=gha cache-to: type=gha,mode=max - name: Inspect image run: | docker buildx imagetools inspect ${{ fromJSON(steps.docker-meta.outputs.json).tags[0] }} - - - name: Export frontend artifact from docker - run: | - docker create --name frontend-extract ${{ fromJSON(steps.docker-meta.outputs.json).tags[0] }} - docker cp frontend-extract:/usr/src/paperless/src/documents/static/frontend src/documents/static/frontend/ - - - name: Upload frontend artifact - uses: actions/upload-artifact@v3 - with: - name: frontend-compiled - path: src/documents/static/frontend/ build-release: - needs: [build-docker-image, documentation] + needs: + - build-docker-image runs-on: ubuntu-20.04 steps: - @@ -313,8 +397,9 @@ jobs: publish-release: runs-on: ubuntu-20.04 - needs: build-release - if: contains(github.ref, 'refs/tags/ngx-') || contains(github.ref, 'refs/tags/beta-') + needs: + - build-release + if: github.ref_type == 'tag' && (startsWith(github.ref_name, 'ngx-') || startsWith(github.ref_name, 'beta-')) steps: - name: Download release artifact diff --git a/.github/workflows/reusable-ci-backend.yml b/.github/workflows/reusable-ci-backend.yml new file mode 100644 index 000000000..28092fcb1 --- /dev/null +++ b/.github/workflows/reusable-ci-backend.yml @@ -0,0 +1,108 @@ +name: Backend CI Jobs + +on: + workflow_call: + +jobs: + + code-checks-backend: + name: "Code Style Checks" + runs-on: ubuntu-20.04 + steps: + - + name: Checkout + uses: actions/checkout@v3 + - + name: Install checkers + run: | + pipx install reorder-python-imports + pipx install yesqa + pipx install add-trailing-comma + pipx install flake8 + - + name: Run reorder-python-imports + run: | + find src/ -type f -name '*.py' ! -path "*/migrations/*" | xargs reorder-python-imports + - + name: Run yesqa + run: | + find src/ -type f -name '*.py' ! -path "*/migrations/*" | xargs yesqa + - + name: Run add-trailing-comma + run: | + find src/ -type f -name '*.py' ! -path "*/migrations/*" | xargs add-trailing-comma + # black is placed after add-trailing-comma because it may format differently + # if a trailing comma is added + - + name: Run black + uses: psf/black@stable + with: + options: "--check --diff" + version: "22.3.0" + - + name: Run flake8 checks + run: | + cd src/ + flake8 --max-line-length=88 --ignore=E203,W503 + + tests-backend: + name: "Tests (${{ matrix.python-version }})" + runs-on: ubuntu-20.04 + needs: + - code-checks-backend + strategy: + matrix: + python-version: ['3.8', '3.9', '3.10'] + fail-fast: false + steps: + - + name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 2 + - + name: Install pipenv + run: pipx install pipenv + - + name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: "${{ matrix.python-version }}" + cache: "pipenv" + cache-dependency-path: 'Pipfile.lock' + - + name: Install system dependencies + run: | + sudo apt-get update -qq + sudo apt-get install -qq --no-install-recommends unpaper tesseract-ocr imagemagick ghostscript optipng libzbar0 poppler-utils + - + name: Install Python dependencies + run: | + pipenv sync --dev + - + name: Tests + run: | + cd src/ + pipenv run pytest + - + name: Get changed files + id: changed-files-specific + uses: tj-actions/changed-files@v18.1 + with: + files: | + src/** + - + name: List all changed files + run: | + for file in ${{ steps.changed-files-specific.outputs.all_changed_files }}; do + echo "${file} was changed" + done + - + name: Publish coverage results + if: matrix.python-version == '3.9' && steps.changed-files-specific.outputs.any_changed == 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # https://github.com/coveralls-clients/coveralls-python/issues/251 + run: | + cd src/ + pipenv run coveralls --service=github diff --git a/.github/workflows/reusable-ci-frontend.yml b/.github/workflows/reusable-ci-frontend.yml new file mode 100644 index 000000000..cc565775a --- /dev/null +++ b/.github/workflows/reusable-ci-frontend.yml @@ -0,0 +1,42 @@ +name: Frontend CI Jobs + +on: + workflow_call: + +jobs: + + code-checks-frontend: + name: "Code Style Checks" + runs-on: ubuntu-20.04 + steps: + - + name: Checkout + uses: actions/checkout@v3 + - uses: actions/setup-node@v3 + with: + node-version: '16' + - + name: Install prettier + run: | + npm install prettier + - + name: Run prettier + run: + npx prettier --check --ignore-path Pipfile.lock **/*.js **/*.ts *.md **/*.md + tests-frontend: + name: "Tests" + runs-on: ubuntu-20.04 + needs: + - code-checks-frontend + strategy: + matrix: + node-version: [16.x] + steps: + - uses: actions/checkout@v3 + - name: Use Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v3 + with: + node-version: ${{ matrix.node-version }} + - run: cd src-ui && npm ci + - run: cd src-ui && npm run test + - run: cd src-ui && npm run e2e:ci diff --git a/.github/workflows/reusable-workflow-builder.yml b/.github/workflows/reusable-workflow-builder.yml new file mode 100644 index 000000000..543cd3d79 --- /dev/null +++ b/.github/workflows/reusable-workflow-builder.yml @@ -0,0 +1,68 @@ +name: Reusable Image Builder + +on: + workflow_call: + inputs: + dockerfile: + required: true + type: string + build-json: + required: true + type: string + build-args: + required: false + default: "" + type: string + +concurrency: + group: ${{ github.workflow }}-${{ fromJSON(inputs.build-json).name }}-${{ fromJSON(inputs.build-json).version }} + cancel-in-progress: false + +jobs: + build-image: + name: Build ${{ fromJSON(inputs.build-json).name }} @ ${{ fromJSON(inputs.build-json).version }} + runs-on: ubuntu-latest + steps: + - + name: Login to Github Container Registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - + name: Determine if build needed + id: build-skip-check + run: | + if ! docker manifest inspect ${{ fromJSON(inputs.build-json).image_tag }} &> /dev/null ; then + echo "Building, no image exists with this version" + echo ::set-output name=image-exists::false + else + echo "Not building, image exists with this version" + echo ::set-output name=image-exists::true + fi + - + name: Checkout + uses: actions/checkout@v3 + if: ${{ steps.build-skip-check.outputs.image-exists == 'false' }} + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + if: ${{ steps.build-skip-check.outputs.image-exists == 'false' }} + - + name: Set up QEMU + uses: docker/setup-qemu-action@v1 + if: ${{ steps.build-skip-check.outputs.image-exists == 'false' }} + - + name: Build ${{ fromJSON(inputs.build-json).name }} + uses: docker/build-push-action@v2 + if: ${{ steps.build-skip-check.outputs.image-exists == 'false' }} + with: + context: . + file: ${{ inputs.dockerfile }} + tags: ${{ fromJSON(inputs.build-json).image_tag }} + platforms: linux/amd64,linux/arm64,linux/arm/v7 + build-args: ${{ inputs.build-args }} + push: true + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 65ecc7980..f0bf9bace 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -47,7 +47,7 @@ repos: - id: yesqa exclude: "(migrations)" - repo: https://github.com/asottile/add-trailing-comma - rev: "v2.2.2" + rev: "v2.2.3" hooks: - id: add-trailing-comma exclude: "(migrations)" @@ -63,10 +63,17 @@ repos: hooks: - id: black # Dockerfile hooks - - repo: https://github.com/pryorda/dockerfilelint-precommit-hooks - rev: "v0.1.0" + - repo: https://github.com/AleksaC/hadolint-py + rev: v2.10.0 hooks: - - id: dockerfilelint + - id: hadolint + args: + - --ignore + - DL3008 # https://github.com/hadolint/hadolint/wiki/DL3008 (should probably do this at some point) + - --ignore + - DL3013 # https://github.com/hadolint/hadolint/wiki/DL3013 (should probably do this too at some point) + - --ignore + - DL3003 # https://github.com/hadolint/hadolint/wiki/DL3003 (seems excessive to use WORKDIR so much) # Shell script hooks - repo: https://github.com/lovesegfault/beautysh rev: v6.2.1 diff --git a/Dockerfile b/Dockerfile index 8b46d072b..77417383f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,21 @@ -FROM node:16 AS compile-frontend +# Default to pulling from the main repo registry when manually building +ARG REPO="paperless-ngx/paperless-ngx" -COPY . /src +# These are all built previously in the pipeline +# They provide either a .deb, .whl or whatever npm outputs +ARG JBIG2ENC_VERSION +ARG QPDF_VERSION +ARG PIKEPDF_VERSION +ARG PSYCOPG2_VERSION +ARG FRONTEND_VERSION -WORKDIR /src/src-ui -RUN npm update npm -g && npm ci --no-optional -RUN ./node_modules/.bin/ng build --configuration production +FROM ghcr.io/${REPO}/builder/jbig2enc:${JBIG2ENC_VERSION} as jbig2enc-builder +FROM ghcr.io/${REPO}/builder/qpdf:${QPDF_VERSION} as qpdf-builder +FROM ghcr.io/${REPO}/builder/pikepdf:${PIKEPDF_VERSION} as pikepdf-builder +FROM ghcr.io/${REPO}/builder/psycopg2:${PSYCOPG2_VERSION} as psycopg2-builder +FROM ghcr.io/${REPO}/builder/frontend:${FRONTEND_VERSION} as compile-frontend -FROM ghcr.io/paperless-ngx/builder/ngx-base:1.7.0 as main-app +FROM python:3.9-slim-bullseye as main-app LABEL org.opencontainers.image.authors="paperless-ngx team " LABEL org.opencontainers.image.documentation="https://paperless-ngx.readthedocs.io/en/latest/" @@ -14,27 +23,115 @@ LABEL org.opencontainers.image.source="https://github.com/paperless-ngx/paperles LABEL org.opencontainers.image.url="https://github.com/paperless-ngx/paperless-ngx" LABEL org.opencontainers.image.licenses="GPL-3.0-only" +ARG DEBIAN_FRONTEND=noninteractive + +# Packages needed only for building +ARG BUILD_PACKAGES="\ + build-essential \ + git \ + python3-dev" + +# Packages need for running +ARG RUNTIME_PACKAGES="\ + curl \ + file \ + # fonts for text file thumbnail generation + fonts-liberation \ + gettext \ + ghostscript \ + gnupg \ + gosu \ + icc-profiles-free \ + imagemagick \ + media-types \ + liblept5 \ + libpq5 \ + libxml2 \ + libxslt1.1 \ + libgnutls30 \ + libjpeg62-turbo \ + optipng \ + python3 \ + python3-pip \ + python3-setuptools \ + postgresql-client \ + # For Numpy + libatlas3-base \ + # thumbnail size reduction + pngquant \ + # OCRmyPDF dependencies + tesseract-ocr \ + tesseract-ocr-eng \ + tesseract-ocr-deu \ + tesseract-ocr-fra \ + tesseract-ocr-ita \ + tesseract-ocr-spa \ + tzdata \ + unpaper \ + # Mime type detection + zlib1g \ + # Barcode splitter + libzbar0 \ + poppler-utils" + WORKDIR /usr/src/paperless/src/ +# Copy qpdf and runtime library +COPY --from=qpdf-builder /usr/src/qpdf/libqpdf28_*.deb ./ +COPY --from=qpdf-builder /usr/src/qpdf/qpdf_*.deb ./ + +# Copy pikepdf wheel and dependencies +COPY --from=pikepdf-builder /usr/src/pikepdf/wheels/*.whl ./ + +# Copy psycopg2 wheel +COPY --from=psycopg2-builder /usr/src/psycopg2/wheels/psycopg2*.whl ./ + +# copy jbig2enc +COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/.libs/libjbig2enc* /usr/local/lib/ +COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/jbig2 /usr/local/bin/ +COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/*.h /usr/local/include/ + COPY requirements.txt ../ # Python dependencies -RUN apt-get update \ - # python-Levenshtein still needs to be compiled here - && apt-get -y --no-install-recommends install \ - build-essential \ - && python3 -m pip install --upgrade --no-cache-dir pip wheel \ - && python3 -m pip install --default-timeout=1000 --upgrade --no-cache-dir supervisor \ - && python3 -m pip install --default-timeout=1000 --no-cache-dir -r ../requirements.txt \ - && apt-get -y purge build-essential \ - && apt-get -y autoremove --purge \ - && rm -rf /var/lib/apt/lists/* +RUN set -eux \ + && apt-get update \ + && apt-get install --yes --quiet --no-install-recommends ${RUNTIME_PACKAGES} ${BUILD_PACKAGES} \ + && python3 -m pip install --no-cache-dir --upgrade wheel \ + && echo "Installing qpdf" \ + && apt-get install --yes --no-install-recommends ./libqpdf28_*.deb \ + && apt-get install --yes --no-install-recommends ./qpdf_*.deb \ + && echo "Installing pikepdf and dependencies wheel" \ + && python3 -m pip install --no-cache-dir packaging*.whl \ + && python3 -m pip install --no-cache-dir lxml*.whl \ + && python3 -m pip install --no-cache-dir Pillow*.whl \ + && python3 -m pip install --no-cache-dir pyparsing*.whl \ + && python3 -m pip install --no-cache-dir pikepdf*.whl \ + && python -m pip list \ + && echo "Installing psycopg2 wheel" \ + && python3 -m pip install --no-cache-dir psycopg2*.whl \ + && python -m pip list \ + && echo "Installing supervisor" \ + && python3 -m pip install --default-timeout=1000 --upgrade --no-cache-dir supervisor \ + && echo "Installing Python requirements" \ + && python3 -m pip install --default-timeout=1000 --no-cache-dir -r ../requirements.txt \ + && echo "Cleaning up image" \ + && apt-get -y purge ${BUILD_PACKAGES} \ + && apt-get -y autoremove --purge \ + && apt-get clean --yes \ + && rm -rf /var/lib/apt/lists/* \ + && rm -rf /tmp/* \ + && rm -rf /var/tmp/* \ + && rm -rf /var/cache/apt/archives/* \ + && truncate -s 0 /var/log/*log # setup docker-specific things COPY docker/ ./docker/ -RUN cd docker \ - && cp imagemagick-policy.xml /etc/ImageMagick-6/policy.xml \ +WORKDIR /usr/src/paperless/src/docker/ + +RUN set -eux \ + && cp imagemagick-policy.xml /etc/ImageMagick-6/policy.xml \ && mkdir /var/log/supervisord /var/run/supervisord \ && cp supervisord.conf /etc/supervisord.conf \ && cp docker-entrypoint.sh /sbin/docker-entrypoint.sh \ @@ -42,17 +139,20 @@ RUN cd docker \ && cp docker-prepare.sh /sbin/docker-prepare.sh \ && chmod 755 /sbin/docker-prepare.sh \ && chmod +x install_management_commands.sh \ - && ./install_management_commands.sh \ - && cd .. \ - && rm -rf docker/ + && ./install_management_commands.sh -COPY gunicorn.conf.py ../ +WORKDIR /usr/src/paperless/ + +COPY gunicorn.conf.py . + +WORKDIR /usr/src/paperless/src/ # copy app COPY --from=compile-frontend /src/src/ ./ # add users, setup scripts -RUN addgroup --gid 1000 paperless \ +RUN set -eux \ + && addgroup --gid 1000 paperless \ && useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \ && chown -R paperless:paperless ../ \ && gosu paperless python3 manage.py collectstatic --clear --no-input \ diff --git a/build-docker-image.sh b/build-docker-image.sh new file mode 100755 index 000000000..f7ab62ca6 --- /dev/null +++ b/build-docker-image.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +# Helper script for building the Docker image locally. +# Parses and provides the nessecary versions of other images to Docker +# before passing in the rest of script args. A future enhancement +# would be to combine this with the CI script + +# First Argument: The Dockerfile to build +# Other Arguments: Additional arguments to docker build + +# Example Usage: +# ./build-docker-image.sh Dockerfile -t paperless-ngx:my-awesome-feature +# ./build-docker-image.sh docker-builders/Dockerfile.qpdf -t paperless-ngx-build-qpdf:x.y.z + +set -eux + +if ! command -v jq; then + echo "jq required" + exit 1 +elif [ ! -f "$1" ]; then + echo "$1 is not a file, please provide the Dockerfile" + exit 1 +fi + +# Parse what we can from Pipfile.lock +pikepdf_version=$(jq ".default.pikepdf.version" Pipfile.lock | sed 's/=//g' | sed 's/"//g') +psycopg2_version=$(jq ".default.psycopg2.version" Pipfile.lock | sed 's/=//g' | sed 's/"//g') +# Read this from the other config file +qpdf_version=$(jq ".qpdf.version" .build-config.json | sed 's/"//g') +jbig2enc_version=$(jq ".jbig2enc.version" .build-config.json | sed 's/"//g') +# Get the branch name +frontend_version=$(git rev-parse --abbrev-ref HEAD) + +# Get Git tags for building +# psycopg2 uses X_Y_Z git tags +psycopg2_git_tag=${psycopg2_version//./_} +# pikepdf uses vX.Y.Z +pikepdf_git_tag="v${pikepdf_version}" + +docker build --file "$1" \ + --build-arg JBIG2ENC_VERSION="${jbig2enc_version}" \ + --build-arg QPDF_VERSION="${qpdf_version}" \ + --build-arg PIKEPDF_VERSION="${pikepdf_version}" \ + --build-arg PIKEPDF_GIT_TAG="${pikepdf_git_tag}" \ + --build-arg PSYCOPG2_VERSION="${psycopg2_version}" \ + --build-arg PSYCOPG2_GIT_TAG="${psycopg2_git_tag}" \ + --build-arg FRONTEND_VERSION="${frontend_version}" "${@:2}" . diff --git a/docker-builders/Dockerfile.frontend b/docker-builders/Dockerfile.frontend new file mode 100644 index 000000000..26f6c9e15 --- /dev/null +++ b/docker-builders/Dockerfile.frontend @@ -0,0 +1,14 @@ +# This Dockerfile compiles the frontend +# Inputs: None + +FROM node:16-bullseye-slim AS compile-frontend + +COPY ./src /src/src +COPY ./src-ui /src/src-ui + +WORKDIR /src/src-ui +RUN set -eux \ + && npm update npm -g \ + && npm ci --no-optional +RUN set -eux \ + && ./node_modules/.bin/ng build --configuration production diff --git a/docker-builders/Dockerfile.jbig2enc b/docker-builders/Dockerfile.jbig2enc new file mode 100644 index 000000000..4bc633170 --- /dev/null +++ b/docker-builders/Dockerfile.jbig2enc @@ -0,0 +1,39 @@ +# This Dockerfile compiles the jbig2enc library +# Inputs: +# - JBIG2ENC_VERSION - the Git tag to checkout and build + +FROM debian:bullseye-slim + +LABEL org.opencontainers.image.description="A intermediate image with jbig2enc built" + +ARG DEBIAN_FRONTEND=noninteractive + +ARG BUILD_PACKAGES="\ + build-essential \ + automake \ + libtool \ + libleptonica-dev \ + zlib1g-dev \ + git \ + ca-certificates" + +WORKDIR /usr/src/jbig2enc + +# As this is an base image for a multi-stage final image +# the added size of the install is basically irrelevant +RUN apt-get update --quiet \ + && apt-get install --yes --quiet --no-install-recommends ${BUILD_PACKAGES} \ + && rm -rf /var/lib/apt/lists/* + +# Layers after this point change according to required version +# For better caching, seperate the basic installs from +# the building + +ARG JBIG2ENC_VERSION + +RUN set -eux \ + && git clone --quiet --branch $JBIG2ENC_VERSION https://github.com/agl/jbig2enc . +RUN set -eux \ + && ./autogen.sh +RUN set -eux \ + && ./configure && make diff --git a/docker-builders/Dockerfile.pikepdf b/docker-builders/Dockerfile.pikepdf new file mode 100644 index 000000000..3d2e5f235 --- /dev/null +++ b/docker-builders/Dockerfile.pikepdf @@ -0,0 +1,69 @@ +# This Dockerfile builds the pikepdf wheel +# Inputs: +# - REPO - Docker repository to pull qpdf from +# - QPDF_VERSION - The image qpdf version to copy .deb files from +# - PIKEPDF_GIT_TAG - The Git tag to clone and build from +# - PIKEPDF_VERSION - Used to force the built pikepdf version to match + +# Default to pulling from the main repo registry when manually building +ARG REPO="paperless-ngx/paperless-ngx" + +ARG QPDF_VERSION +FROM ghcr.io/${REPO}/builder/qpdf:${QPDF_VERSION} as qpdf-builder + +# This does nothing, except provide a name for a copy below + +FROM python:3.9-slim-bullseye + +LABEL org.opencontainers.image.description="A intermediate image with pikepdf wheel built" + +ARG DEBIAN_FRONTEND=noninteractive + +ARG BUILD_PACKAGES="\ + build-essential \ + git \ + libjpeg62-turbo-dev \ + zlib1g-dev \ + libgnutls28-dev \ + libxml2-dev \ + libxslt1-dev \ + python3-dev \ + python3-pip" + +WORKDIR /usr/src + +COPY --from=qpdf-builder /usr/src/qpdf/*.deb ./ + +# As this is an base image for a multi-stage final image +# the added size of the install is basically irrelevant + +RUN set -eux \ + && apt-get update --quiet \ + && apt-get install --yes --quiet --no-install-recommends $BUILD_PACKAGES \ + && dpkg --install libqpdf28_*.deb \ + && dpkg --install libqpdf-dev_*.deb \ + && python3 -m pip install --no-cache-dir --upgrade pip wheel pybind11 \ + && rm -rf /var/lib/apt/lists/* + +# Layers after this point change according to required version +# For better caching, seperate the basic installs from +# the building + +ARG PIKEPDF_GIT_TAG +ARG PIKEPDF_VERSION + +RUN set -eux \ + && echo "building pikepdf wheel" \ + # Note the v in the tag name here + && git clone --quiet --depth 1 --branch "${PIKEPDF_GIT_TAG}" https://github.com/pikepdf/pikepdf.git \ + && cd pikepdf \ + # pikepdf seems to specifciy either a next version when built OR + # a post release tag. + # In either case, this won't match what we want from requirements.txt + # Directly modify the setup.py to set the version we just checked out of Git + && sed -i "s/use_scm_version=True/version=\"${PIKEPDF_VERSION}\"/g" setup.py \ + # https://github.com/pikepdf/pikepdf/issues/323 + && rm pyproject.toml \ + && mkdir wheels \ + && python3 -m pip wheel . --wheel-dir wheels \ + && ls -ahl wheels diff --git a/docker-builders/Dockerfile.psycopg2 b/docker-builders/Dockerfile.psycopg2 new file mode 100644 index 000000000..cfd7ff56a --- /dev/null +++ b/docker-builders/Dockerfile.psycopg2 @@ -0,0 +1,44 @@ +# This Dockerfile builds the psycopg2 wheel +# Inputs: +# - PSYCOPG2_GIT_TAG - The Git tag to clone and build from +# - PSYCOPG2_VERSION - Unused, kept for future possible usage + +FROM python:3.9-slim-bullseye + +LABEL org.opencontainers.image.description="A intermediate image with psycopg2 wheel built" + +ARG DEBIAN_FRONTEND=noninteractive + +ARG BUILD_PACKAGES="\ + build-essential \ + git \ + libpq-dev \ + python3-dev \ + python3-pip" + +WORKDIR /usr/src + +# As this is an base image for a multi-stage final image +# the added size of the install is basically irrelevant + +RUN set -eux \ + && apt-get update --quiet \ + && apt-get install --yes --quiet --no-install-recommends $BUILD_PACKAGES \ + && rm -rf /var/lib/apt/lists/* \ + && python3 -m pip install --no-cache-dir --upgrade pip wheel + +# Layers after this point change according to required version +# For better caching, seperate the basic installs from +# the building + +ARG PSYCOPG2_GIT_TAG +ARG PSYCOPG2_VERSION + +RUN set -eux \ + && echo "Building psycopg2 wheel" \ + && cd /usr/src \ + && git clone --quiet --depth 1 --branch ${PSYCOPG2_GIT_TAG} https://github.com/psycopg/psycopg2.git \ + && cd psycopg2 \ + && mkdir wheels \ + && python3 -m pip wheel . --wheel-dir wheels \ + && ls -ahl wheels/ diff --git a/docker-builders/Dockerfile.qpdf b/docker-builders/Dockerfile.qpdf new file mode 100644 index 000000000..770d8c2ee --- /dev/null +++ b/docker-builders/Dockerfile.qpdf @@ -0,0 +1,52 @@ +FROM debian:bullseye-slim + +LABEL org.opencontainers.image.description="A intermediate image with qpdf built" + +ARG DEBIAN_FRONTEND=noninteractive + +ARG BUILD_PACKAGES="\ + build-essential \ + debhelper \ + debian-keyring \ + devscripts \ + equivs \ + libtool \ + libjpeg62-turbo-dev \ + libgnutls28-dev \ + packaging-dev \ + zlib1g-dev" + +WORKDIR /usr/src + +# As this is an base image for a multi-stage final image +# the added size of the install is basically irrelevant + +RUN set -eux \ + && apt-get update --quiet \ + && apt-get install --yes --quiet --no-install-recommends $BUILD_PACKAGES \ + && rm -rf /var/lib/apt/lists/* + +# Layers after this point change according to required version +# For better caching, seperate the basic installs from +# the building + +# This must match to pikepdf's minimum at least +ARG QPDF_VERSION + +# In order to get the required version of qpdf, it is backported from bookwork +# and then built from source +RUN set -eux \ + && echo "Building qpdf" \ + && echo "deb-src http://deb.debian.org/debian/ bookworm main" > /etc/apt/sources.list.d/bookworm-src.list \ + && apt-get update \ + && mkdir qpdf \ + && cd qpdf \ + && apt-get source --yes --quiet qpdf=${QPDF_VERSION}-1/bookworm \ + && rm -rf /var/lib/apt/lists/* \ + && cd qpdf-$QPDF_VERSION \ + # We don't need to build the tests (also don't run them) + && rm -rf libtests \ + && DEBEMAIL=hello@paperless-ngx.com debchange --bpo \ + && export DEB_BUILD_OPTIONS="terse nocheck nodoc parallel=2" \ + && dpkg-buildpackage --build=binary --unsigned-source --unsigned-changes \ + && ls -ahl ../*.deb diff --git a/docker-builders/get-build-json.py b/docker-builders/get-build-json.py new file mode 100755 index 000000000..4b96ac8d7 --- /dev/null +++ b/docker-builders/get-build-json.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +""" +This is a helper script for the mutli-stage Docker image builder. +It provides a single point of configuration for package version control. +The output JSON object is used by the CI workflow to determine what versions +to build and pull into the final Docker image. + +Python package information is obtained from the Pipfile.lock. As this is +kept updated by dependabot, it usually will need no further configuration. +The sole exception currently is pikepdf, which has a dependency on qpdf, +and is configured here to use the latest version of qpdf built by the workflow. + +Other package version information is configured directly below, generally by +setting the version and Git information, if any. + +""" +import argparse +import json +import os +from pathlib import Path +from typing import Final + + +def _get_image_tag( + repo_name: str, + pkg_name: str, + pkg_version: str, +) -> str: + return f"ghcr.io/{repo_name}/builder/{pkg_name}:{pkg_version}" + + +def _main(): + parser = argparse.ArgumentParser( + description="Generate a JSON object of information required to build the given package, based on the Pipfile.lock", + ) + parser.add_argument( + "package", + help="The name of the package to generate JSON for", + ) + + PIPFILE_LOCK_PATH: Final[Path] = Path("Pipfile.lock") + BUILD_CONFIG_PATH: Final[Path] = Path(".build-config.json") + + # Read the main config file + build_json: Final = json.loads(BUILD_CONFIG_PATH.read_text()) + + # Read Pipfile.lock file + pipfile_data: Final = json.loads(PIPFILE_LOCK_PATH.read_text()) + + args: Final = parser.parse_args() + + repo_name: Final[str] = os.environ["GITHUB_REPOSITORY"] + + # Default output values + version = None + git_tag = None + extra_config = {} + + if args.package == "frontend": + # Version is just the branch or tag name + version = os.environ["GITHUB_REF_NAME"] + elif args.package in pipfile_data["default"]: + # Read the version from Pipfile.lock + pkg_data = pipfile_data["default"][args.package] + pkg_version = pkg_data["version"].split("==")[-1] + version = pkg_version + + # Based on the package, generate the expected Git tag name + if args.package == "pikepdf": + git_tag = f"v{pkg_version}" + elif args.package == "psycopg2": + git_tag = pkg_version.replace(".", "_") + + # Any extra/special values needed + if args.package == "pikepdf": + extra_config["qpdf_version"] = build_json["qpdf"]["version"] + + elif args.package in build_json: + version = build_json[args.package]["version"] + + if "git_tag" in build_json[args.package]: + git_tag = build_json[args.package]["git_tag"] + else: + raise NotImplementedError(args.package) + + # The JSON object we'll output + output = { + "name": args.package, + "version": version, + "git_tag": git_tag, + "image_tag": _get_image_tag(repo_name, args.package, version), + } + + # Add anything special a package may need + output.update(extra_config) + + # Output the JSON info to stdout + print(json.dumps(output)) + + +if __name__ == "__main__": + _main() diff --git a/docker/docker-prepare.sh b/docker/docker-prepare.sh index 681ccf5a0..48f0c6b82 100755 --- a/docker/docker-prepare.sh +++ b/docker/docker-prepare.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +set -e + wait_for_postgres() { attempt_num=1 max_attempts=5 diff --git a/docker/install_management_commands.sh b/docker/install_management_commands.sh index 9da795b50..bf8bbeb93 100755 --- a/docker/install_management_commands.sh +++ b/docker/install_management_commands.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +set -eu + for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails document_sanity_checker manage_superuser; do echo "installing $command..." diff --git a/docs/Dockerfile b/docs/Dockerfile index 9fb8bd0cc..bb4b35e2d 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -1,10 +1,10 @@ FROM python:3.5.1 # Install Sphinx and Pygments -RUN pip install Sphinx Pygments +RUN pip install --no-cache-dir Sphinx Pygments \ + # Setup directories, copy data + && mkdir /build -# Setup directories, copy data -RUN mkdir /build COPY . /build WORKDIR /build/docs