mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge remote-tracking branch 'origin/dev'
This commit is contained in:
commit
9aea8a7d7c
71
.github/scripts/cleanup-tags.py
vendored
71
.github/scripts/cleanup-tags.py
vendored
@ -15,6 +15,8 @@ from github import ContainerPackage
|
||||
from github import GithubBranchApi
|
||||
from github import GithubContainerRegistryApi
|
||||
|
||||
import docker
|
||||
|
||||
logger = logging.getLogger("cleanup-tags")
|
||||
|
||||
|
||||
@ -151,12 +153,16 @@ class RegistryTagsCleaner:
|
||||
for tag in sorted(self.tags_to_keep):
|
||||
full_name = f"ghcr.io/{self.repo_owner}/{self.package_name}:{tag}"
|
||||
logger.info(f"Checking manifest for {full_name}")
|
||||
# TODO: It would be nice to use RegistryData from docker
|
||||
# except the ID doesn't map to anything in the manifest
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[
|
||||
shutil.which("docker"),
|
||||
"manifest",
|
||||
"buildx",
|
||||
"imagetools",
|
||||
"inspect",
|
||||
"--raw",
|
||||
full_name,
|
||||
],
|
||||
capture_output=True,
|
||||
@ -241,6 +247,65 @@ class RegistryTagsCleaner:
|
||||
# By default, keep anything which is tagged
|
||||
self.tags_to_keep = list(set(self.all_pkgs_tags_to_version.keys()))
|
||||
|
||||
def check_tags_pull(self):
|
||||
"""
|
||||
This method uses the Docker Python SDK to confirm all tags which were
|
||||
kept still pull, for all platforms.
|
||||
|
||||
TODO: This is much slower (although more comprehensive). Maybe a Pool?
|
||||
"""
|
||||
logger.info("Beginning confirmation step")
|
||||
client = docker.from_env()
|
||||
imgs = []
|
||||
for tag in sorted(self.tags_to_keep):
|
||||
repository = f"ghcr.io/{self.repo_owner}/{self.package_name}"
|
||||
for arch, variant in [("amd64", None), ("arm64", None), ("arm", "v7")]:
|
||||
# From 11.2.0 onwards, qpdf is cross compiled, so there is a single arch, amd64
|
||||
# skip others in this case
|
||||
if "qpdf" in self.package_name and arch != "amd64" and tag == "11.2.0":
|
||||
continue
|
||||
# Skip beta and release candidate tags
|
||||
elif "beta" in tag:
|
||||
continue
|
||||
|
||||
# Build the platform name
|
||||
if variant is not None:
|
||||
platform = f"linux/{arch}/{variant}"
|
||||
else:
|
||||
platform = f"linux/{arch}"
|
||||
|
||||
try:
|
||||
logger.info(f"Pulling {repository}:{tag} for {platform}")
|
||||
image = client.images.pull(
|
||||
repository=repository,
|
||||
tag=tag,
|
||||
platform=platform,
|
||||
)
|
||||
imgs.append(image)
|
||||
except docker.errors.APIError as e:
|
||||
logger.error(
|
||||
f"Failed to pull {repository}:{tag}: {e}",
|
||||
)
|
||||
|
||||
# Prevent out of space errors by removing after a few
|
||||
# pulls
|
||||
if len(imgs) > 50:
|
||||
for image in imgs:
|
||||
try:
|
||||
client.images.remove(image.id)
|
||||
except docker.errors.APIError as e:
|
||||
err_str = str(e)
|
||||
# Ignore attempts to remove images that are partly shared
|
||||
# Ignore images which are somehow gone already
|
||||
if (
|
||||
"must be forced" not in err_str
|
||||
and "No such image" not in err_str
|
||||
):
|
||||
logger.error(
|
||||
f"Remove image ghcr.io/{self.repo_owner}/{self.package_name}:{tag} failed: {e}",
|
||||
)
|
||||
imgs = []
|
||||
|
||||
|
||||
class MainImageTagsCleaner(RegistryTagsCleaner):
|
||||
def decide_what_tags_to_keep(self):
|
||||
@ -397,6 +462,10 @@ def _main():
|
||||
# Clean images which are untagged
|
||||
cleaner.clean_untagged(args.is_manifest)
|
||||
|
||||
# Verify remaining tags still pull
|
||||
if args.is_manifest:
|
||||
cleaner.check_tags_pull()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_main()
|
||||
|
6
.github/workflows/ci.yml
vendored
6
.github/workflows/ci.yml
vendored
@ -212,12 +212,6 @@ jobs:
|
||||
name: Prepare Docker Pipeline Data
|
||||
if: github.event_name == 'push' && (startsWith(github.ref, 'refs/heads/feature-') || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/beta' || contains(github.ref, 'beta.rc') || startsWith(github.ref, 'refs/tags/v'))
|
||||
runs-on: ubuntu-22.04
|
||||
# If the push triggered the installer library workflow, wait for it to
|
||||
# complete here. This ensures the required versions for the final
|
||||
# image have been built, while not waiting at all if the versions haven't changed
|
||||
concurrency:
|
||||
group: build-installer-library
|
||||
cancel-in-progress: false
|
||||
needs:
|
||||
- documentation
|
||||
- tests-backend
|
||||
|
14
.github/workflows/cleanup-tags.yml
vendored
14
.github/workflows/cleanup-tags.yml
vendored
@ -62,9 +62,9 @@ jobs:
|
||||
with:
|
||||
python-version: "3.10"
|
||||
-
|
||||
name: Install httpx
|
||||
name: Install Python libraries
|
||||
run: |
|
||||
python -m pip install httpx
|
||||
python -m pip install httpx docker
|
||||
#
|
||||
# Clean up primary package
|
||||
#
|
||||
@ -81,13 +81,3 @@ jobs:
|
||||
if: "${{ env.TOKEN != '' }}"
|
||||
run: |
|
||||
python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --untagged --delete "${{ matrix.cache-name }}"
|
||||
#
|
||||
# Verify tags which are left still pull
|
||||
#
|
||||
-
|
||||
name: Check all tags still pull
|
||||
run: |
|
||||
ghcr_name=$(echo "ghcr.io/${GITHUB_REPOSITORY_OWNER}/${{ matrix.primary-name }}" | awk '{ print tolower($0) }')
|
||||
echo "Pulling all tags of ${ghcr_name}"
|
||||
docker pull --quiet --all-tags ${ghcr_name}
|
||||
docker image list
|
||||
|
139
.github/workflows/installer-library.yml
vendored
139
.github/workflows/installer-library.yml
vendored
@ -169,3 +169,142 @@ jobs:
|
||||
PIKEPDF_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).version }}
|
||||
PILLOW_VERSION=${{ needs.prepare-docker-build.outputs.pillow-version }}
|
||||
LXML_VERSION=${{ needs.prepare-docker-build.outputs.lxml-version }}
|
||||
|
||||
commit-binary-files:
|
||||
name: Store installers
|
||||
needs:
|
||||
- prepare-docker-build
|
||||
- build-qpdf-debs
|
||||
- build-jbig2enc
|
||||
- build-psycopg2-wheel
|
||||
- build-pikepdf-wheel
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
ref: binary-library
|
||||
-
|
||||
name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.9"
|
||||
-
|
||||
name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -qq --no-install-recommends tree
|
||||
-
|
||||
name: Extract qpdf files
|
||||
run: |
|
||||
version=${{ fromJSON(needs.prepare-docker-build.outputs.qpdf-json).version }}
|
||||
tag=${{ fromJSON(needs.prepare-docker-build.outputs.qpdf-json).image_tag }}
|
||||
|
||||
docker pull --quiet ${tag}
|
||||
docker create --name qpdf-extract ${tag}
|
||||
|
||||
mkdir --parents qpdf/${version}/amd64
|
||||
docker cp qpdf-extract:/usr/src/qpdf/${version}/amd64 qpdf/${version}
|
||||
|
||||
mkdir --parents qpdf/${version}/arm64
|
||||
docker cp qpdf-extract:/usr/src/qpdf/${version}/arm64 qpdf/${version}
|
||||
|
||||
mkdir --parents qpdf/${version}/armv7
|
||||
docker cp qpdf-extract:/usr/src/qpdf/${version}/armv7 qpdf/${version}
|
||||
-
|
||||
name: Extract psycopg2 files
|
||||
run: |
|
||||
version=${{ fromJSON(needs.prepare-docker-build.outputs.psycopg2-json).version }}
|
||||
tag=${{ fromJSON(needs.prepare-docker-build.outputs.psycopg2-json).image_tag }}
|
||||
|
||||
docker pull --quiet --platform linux/amd64 ${tag}
|
||||
docker create --platform linux/amd64 --name psycopg2-extract ${tag}
|
||||
mkdir --parents psycopg2/${version}/amd64
|
||||
docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/amd64
|
||||
mv psycopg2/${version}/amd64/wheels/* psycopg2/${version}/amd64
|
||||
rm -r psycopg2/${version}/amd64/wheels/
|
||||
docker rm psycopg2-extract
|
||||
|
||||
docker pull --quiet --platform linux/arm64 ${tag}
|
||||
docker create --platform linux/arm64 --name psycopg2-extract ${tag}
|
||||
mkdir --parents psycopg2/${version}/arm64
|
||||
docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/arm64
|
||||
mv psycopg2/${version}/arm64/wheels/* psycopg2/${version}/arm64
|
||||
rm -r psycopg2/${version}/arm64/wheels/
|
||||
docker rm psycopg2-extract
|
||||
|
||||
docker pull --quiet --platform linux/arm/v7 ${tag}
|
||||
docker create --platform linux/arm/v7 --name psycopg2-extract ${tag}
|
||||
mkdir --parents psycopg2/${version}/armv7
|
||||
docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/armv7
|
||||
mv psycopg2/${version}/armv7/wheels/* psycopg2/${version}/armv7
|
||||
rm -r psycopg2/${version}/armv7/wheels/
|
||||
docker rm psycopg2-extract
|
||||
-
|
||||
name: Extract pikepdf files
|
||||
run: |
|
||||
version=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).version }}
|
||||
tag=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).image_tag }}
|
||||
|
||||
docker pull --quiet --platform linux/amd64 ${tag}
|
||||
docker create --platform linux/amd64 --name pikepdf-extract ${tag}
|
||||
mkdir --parents pikepdf/${version}/amd64
|
||||
docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/amd64
|
||||
mv pikepdf/${version}/amd64/wheels/* pikepdf/${version}/amd64
|
||||
rm -r pikepdf/${version}/amd64/wheels/
|
||||
docker rm pikepdf-extract
|
||||
|
||||
docker pull --quiet --platform linux/arm64 ${tag}
|
||||
docker create --platform linux/arm64 --name pikepdf-extract ${tag}
|
||||
mkdir --parents pikepdf/${version}/arm64
|
||||
docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/arm64
|
||||
mv pikepdf/${version}/arm64/wheels/* pikepdf/${version}/arm64
|
||||
rm -r pikepdf/${version}/arm64/wheels/
|
||||
docker rm pikepdf-extract
|
||||
|
||||
docker pull --quiet --platform linux/arm/v7 ${tag}
|
||||
docker create --platform linux/arm/v7 --name pikepdf-extract ${tag}
|
||||
mkdir --parents pikepdf/${version}/armv7
|
||||
docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/armv7
|
||||
mv pikepdf/${version}/armv7/wheels/* pikepdf/${version}/armv7
|
||||
rm -r pikepdf/${version}/armv7/wheels/
|
||||
docker rm pikepdf-extract
|
||||
-
|
||||
name: Extract jbig2enc files
|
||||
run: |
|
||||
version=${{ fromJSON(needs.prepare-docker-build.outputs.jbig2enc-json).version }}
|
||||
tag=${{ fromJSON(needs.prepare-docker-build.outputs.jbig2enc-json).image_tag }}
|
||||
|
||||
docker pull --quiet --platform linux/amd64 ${tag}
|
||||
docker create --platform linux/amd64 --name jbig2enc-extract ${tag}
|
||||
mkdir --parents jbig2enc/${version}/amd64
|
||||
docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/amd64/
|
||||
mv jbig2enc/${version}/amd64/build/* jbig2enc/${version}/amd64/
|
||||
docker rm jbig2enc-extract
|
||||
|
||||
docker pull --quiet --platform linux/arm64 ${tag}
|
||||
docker create --platform linux/arm64 --name jbig2enc-extract ${tag}
|
||||
mkdir --parents jbig2enc/${version}/arm64
|
||||
docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/arm64
|
||||
mv jbig2enc/${version}/arm64/build/* jbig2enc/${version}/arm64/
|
||||
docker rm jbig2enc-extract
|
||||
|
||||
docker pull --quiet --platform linux/arm/v7 ${tag}
|
||||
docker create --platform linux/arm/v7 --name jbig2enc-extract ${tag}
|
||||
mkdir --parents jbig2enc/${version}/armv7
|
||||
docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/armv7
|
||||
mv jbig2enc/${version}/armv7/build/* jbig2enc/${version}/armv7/
|
||||
docker rm jbig2enc-extract
|
||||
-
|
||||
name: Show file structure
|
||||
run: |
|
||||
tree .
|
||||
-
|
||||
name: Commit files
|
||||
run: |
|
||||
git config --global user.name "github-actions"
|
||||
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git add pikepdf/ qpdf/ psycopg2/ jbig2enc/
|
||||
git commit -m "Updating installer packages" || true
|
||||
git push origin || true
|
||||
|
53
Dockerfile
53
Dockerfile
@ -1,19 +1,5 @@
|
||||
# syntax=docker/dockerfile:1.4
|
||||
|
||||
# Pull the installer images from the library
|
||||
# These are all built previously
|
||||
# They provide either a .deb or .whl
|
||||
|
||||
ARG JBIG2ENC_VERSION
|
||||
ARG QPDF_VERSION
|
||||
ARG PIKEPDF_VERSION
|
||||
ARG PSYCOPG2_VERSION
|
||||
|
||||
FROM ghcr.io/paperless-ngx/paperless-ngx/builder/jbig2enc:${JBIG2ENC_VERSION} as jbig2enc-builder
|
||||
FROM --platform=$BUILDPLATFORM ghcr.io/paperless-ngx/paperless-ngx/builder/qpdf:${QPDF_VERSION} as qpdf-builder
|
||||
FROM ghcr.io/paperless-ngx/paperless-ngx/builder/pikepdf:${PIKEPDF_VERSION} as pikepdf-builder
|
||||
FROM ghcr.io/paperless-ngx/paperless-ngx/builder/psycopg2:${PSYCOPG2_VERSION} as psycopg2-builder
|
||||
|
||||
FROM --platform=$BUILDPLATFORM node:16-bullseye-slim AS compile-frontend
|
||||
|
||||
# This stage compiles the frontend
|
||||
@ -58,24 +44,21 @@ LABEL org.opencontainers.image.url="https://github.com/paperless-ngx/paperless-n
|
||||
LABEL org.opencontainers.image.licenses="GPL-3.0-only"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
# Buildx provided
|
||||
# Buildx provided, must be defined to use though
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
# Workflow provided
|
||||
ARG JBIG2ENC_VERSION
|
||||
ARG QPDF_VERSION
|
||||
ARG PIKEPDF_VERSION
|
||||
ARG PSYCOPG2_VERSION
|
||||
|
||||
#
|
||||
# Begin installation and configuration
|
||||
# Order the steps below from least often changed to most
|
||||
#
|
||||
|
||||
# copy jbig2enc
|
||||
# Basically will never change again
|
||||
COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/.libs/libjbig2enc* /usr/local/lib/
|
||||
COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/jbig2 /usr/local/bin/
|
||||
COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/*.h /usr/local/include/
|
||||
|
||||
# Packages need for running
|
||||
ARG RUNTIME_PACKAGES="\
|
||||
# Python
|
||||
@ -198,19 +181,29 @@ RUN set -eux \
|
||||
# Install the built packages from the installer library images
|
||||
# Use mounts to avoid copying installer files into the image
|
||||
# These change sometimes
|
||||
RUN --mount=type=bind,from=qpdf-builder,target=/qpdf \
|
||||
--mount=type=bind,from=psycopg2-builder,target=/psycopg2 \
|
||||
--mount=type=bind,from=pikepdf-builder,target=/pikepdf \
|
||||
set -eux \
|
||||
RUN set -eux \
|
||||
&& echo "Getting binaries" \
|
||||
&& mkdir paperless-ngx \
|
||||
&& curl --fail --silent --show-error --output paperless-ngx.tar.gz --location https://github.com/paperless-ngx/paperless-ngx/archive/41d6e7e407af09a0882736d50c89b6e015997bff.tar.gz \
|
||||
&& tar -xf paperless-ngx.tar.gz --directory paperless-ngx --strip-components=1 \
|
||||
&& cd paperless-ngx \
|
||||
# Setting a specific revision ensures we know what this installed
|
||||
# and ensures cache breaking on changes
|
||||
&& echo "Installing jbig2enc" \
|
||||
&& cp ./jbig2enc/${JBIG2ENC_VERSION}/${TARGETARCH}${TARGETVARIANT}/jbig2 /usr/local/bin/ \
|
||||
&& cp ./jbig2enc/${JBIG2ENC_VERSION}/${TARGETARCH}${TARGETVARIANT}/libjbig2enc* /usr/local/lib/ \
|
||||
&& echo "Installing qpdf" \
|
||||
&& apt-get install --yes --no-install-recommends /qpdf/usr/src/qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/libqpdf29_*.deb \
|
||||
&& apt-get install --yes --no-install-recommends /qpdf/usr/src/qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/qpdf_*.deb \
|
||||
&& apt-get install --yes --no-install-recommends ./qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/libqpdf29_*.deb \
|
||||
&& apt-get install --yes --no-install-recommends ./qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/qpdf_*.deb \
|
||||
&& echo "Installing pikepdf and dependencies" \
|
||||
&& python3 -m pip install --no-cache-dir /pikepdf/usr/src/wheels/*.whl \
|
||||
&& python3 -m pip install --no-cache-dir ./pikepdf/${PIKEPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/*.whl \
|
||||
&& python3 -m pip list \
|
||||
&& echo "Installing psycopg2" \
|
||||
&& python3 -m pip install --no-cache-dir /psycopg2/usr/src/wheels/psycopg2*.whl \
|
||||
&& python3 -m pip list
|
||||
&& python3 -m pip install --no-cache-dir ./psycopg2/${PSYCOPG2_VERSION}/${TARGETARCH}${TARGETVARIANT}/psycopg2*.whl \
|
||||
&& python3 -m pip list \
|
||||
&& echo "Cleaning up image layer" \
|
||||
&& cd ../ \
|
||||
&& rm -rf paperless-ngx
|
||||
|
||||
WORKDIR /usr/src/paperless/src/
|
||||
|
||||
|
@ -29,7 +29,20 @@ RUN set -eux \
|
||||
&& ./autogen.sh \
|
||||
&& ./configure \
|
||||
&& make \
|
||||
&& echo "Gathering package data" \
|
||||
&& dpkg-query -f '${Package;-40}${Version}\n' -W > ./pkg-list.txt \
|
||||
&& echo "Cleaning up image" \
|
||||
&& apt-get -y purge ${BUILD_PACKAGES} \
|
||||
&& apt-get -y autoremove --purge \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& echo "Moving files around" \
|
||||
&& mkdir build \
|
||||
# Unlink a symlink that causes problems
|
||||
&& unlink ./src/.libs/libjbig2enc.la \
|
||||
# Move what the link pointed to
|
||||
&& mv ./src/libjbig2enc.la ./build/ \
|
||||
# Move the shared library .so files
|
||||
&& mv ./src/.libs/libjbig2enc* ./build/ \
|
||||
# And move the cli binary
|
||||
&& mv ./src/jbig2 ./build/ \
|
||||
&& mv ./pkg-list.txt ./build/
|
||||
|
@ -7,12 +7,17 @@
|
||||
# Default to pulling from the main repo registry when manually building
|
||||
ARG REPO="paperless-ngx/paperless-ngx"
|
||||
|
||||
# This does nothing, except provide a name for a copy below
|
||||
ARG QPDF_VERSION
|
||||
FROM --platform=$BUILDPLATFORM ghcr.io/${REPO}/builder/qpdf:${QPDF_VERSION} as qpdf-builder
|
||||
|
||||
# This does nothing, except provide a name for a copy below
|
||||
|
||||
FROM python:3.9-slim-bullseye as main
|
||||
#
|
||||
# Stage: builder
|
||||
# Purpose:
|
||||
# - Build the pikepdf wheel
|
||||
# - Build any dependent wheels which can't be found
|
||||
#
|
||||
FROM python:3.9-slim-bullseye as builder
|
||||
|
||||
LABEL org.opencontainers.image.description="A intermediate image with pikepdf wheel built"
|
||||
|
||||
@ -100,3 +105,14 @@ RUN set -eux \
|
||||
&& apt-get -y purge ${BUILD_PACKAGES} \
|
||||
&& apt-get -y autoremove --purge \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
#
|
||||
# Stage: package
|
||||
# Purpose: Holds the compiled .whl files in a tiny image to pull
|
||||
#
|
||||
FROM alpine:3.17 as package
|
||||
|
||||
WORKDIR /usr/src/wheels/
|
||||
|
||||
COPY --from=builder /usr/src/wheels/*.whl ./
|
||||
COPY --from=builder /usr/src/wheels/pkg-list.txt ./
|
||||
|
@ -2,7 +2,12 @@
|
||||
# Inputs:
|
||||
# - PSYCOPG2_VERSION - Version to build
|
||||
|
||||
FROM python:3.9-slim-bullseye as main
|
||||
#
|
||||
# Stage: builder
|
||||
# Purpose:
|
||||
# - Build the psycopg2 wheel
|
||||
#
|
||||
FROM python:3.9-slim-bullseye as builder
|
||||
|
||||
LABEL org.opencontainers.image.description="A intermediate image with psycopg2 wheel built"
|
||||
|
||||
@ -48,3 +53,14 @@ RUN set -eux \
|
||||
&& apt-get -y purge ${BUILD_PACKAGES} \
|
||||
&& apt-get -y autoremove --purge \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
#
|
||||
# Stage: package
|
||||
# Purpose: Holds the compiled .whl files in a tiny image to pull
|
||||
#
|
||||
FROM alpine:3.17 as package
|
||||
|
||||
WORKDIR /usr/src/wheels/
|
||||
|
||||
COPY --from=builder /usr/src/wheels/*.whl ./
|
||||
COPY --from=builder /usr/src/wheels/pkg-list.txt ./
|
||||
|
57
docker-builders/README.md
Normal file
57
docker-builders/README.md
Normal file
@ -0,0 +1,57 @@
|
||||
# Installer Library
|
||||
|
||||
This folder contains the Dockerfiles for building certain installers or libraries, which are then pulled into the main image.
|
||||
|
||||
## [jbig2enc](https://github.com/agl/jbig2enc)
|
||||
|
||||
### Why
|
||||
|
||||
JBIG is an image coding which can achieve better compression of images for PDFs.
|
||||
|
||||
### What
|
||||
|
||||
The Docker image builds a shared library file and utility, which is copied into the correct location in the final image.
|
||||
|
||||
### Updating
|
||||
|
||||
1. Ensure the given qpdf version is present in [Debian bookworm](https://packages.debian.org/bookworm/qpdf)
|
||||
2. Update `.build-config.json` to the given version
|
||||
3. If the Debian specific version has incremented, update `Dockerfile.qpdf`
|
||||
|
||||
See Also:
|
||||
|
||||
- [OCRMyPDF Documentation](https://ocrmypdf.readthedocs.io/en/latest/jbig2.html)
|
||||
|
||||
## [psycopg2](https://www.psycopg.org/)
|
||||
|
||||
### Why
|
||||
|
||||
The pre-built wheels of psycopg2 are built on Debian 9, which provides a quite old version of libpq-dev. This causes issue with authentication methods.
|
||||
|
||||
### What
|
||||
|
||||
The image builds psycopg2 wheels on Debian 10 and places the produced wheels into `/usr/src/wheels/`.
|
||||
|
||||
See Also:
|
||||
|
||||
- [Issue 266](https://github.com/paperless-ngx/paperless-ngx/issues/266)
|
||||
|
||||
## [qpdf](https://qpdf.readthedocs.io/en/stable/index.html)
|
||||
|
||||
### Why
|
||||
|
||||
qpdf and it's library provide tools to read, manipulate and fix up PDFs. Version 11 is also required by `pikepdf` 6+ and Debian 9 does not provide above version 10.
|
||||
|
||||
### What
|
||||
|
||||
The Docker image cross compiles .deb installers for each supported architecture of the main image. The installers are placed in `/usr/src/qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/`
|
||||
|
||||
## [pikepdf](https://pikepdf.readthedocs.io/en/latest/)
|
||||
|
||||
### Why
|
||||
|
||||
Required by OCRMyPdf, this is a general purpose library for PDF manipulation in Python via the qpdf libraries.
|
||||
|
||||
### What
|
||||
|
||||
The built wheels are placed into `/usr/src/wheels/`
|
@ -80,7 +80,7 @@ django_checks() {
|
||||
|
||||
search_index() {
|
||||
|
||||
local -r index_version=1
|
||||
local -r index_version=2
|
||||
local -r index_version_file=${DATA_DIR}/.index_version
|
||||
|
||||
if [[ (! -f "${index_version_file}") || $(<"${index_version_file}") != "$index_version" ]]; then
|
||||
|
@ -121,7 +121,17 @@ Executed after the consumer sees a new document in the consumption
|
||||
folder, but before any processing of the document is performed. This
|
||||
script can access the following relevant environment variables set:
|
||||
|
||||
- `DOCUMENT_SOURCE_PATH`
|
||||
| Environment Variable | Description |
|
||||
| ----------------------- | ------------------------------------------------------------ |
|
||||
| `DOCUMENT_SOURCE_PATH` | Original path of the consumed document |
|
||||
| `DOCUMENT_WORKING_PATH` | Path to a copy of the original that consumption will work on |
|
||||
|
||||
!!! note
|
||||
|
||||
Pre-consume scripts which modify the document should only change
|
||||
the `DOCUMENT_WORKING_PATH` file or a second consume task may
|
||||
be triggered, leading to failures as two tasks work on the
|
||||
same document path
|
||||
|
||||
A simple but common example for this would be creating a simple script
|
||||
like this:
|
||||
@ -130,7 +140,7 @@ like this:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
pdf2pdfocr.py -i ${DOCUMENT_SOURCE_PATH}
|
||||
pdf2pdfocr.py -i ${DOCUMENT_WORKING_PATH}
|
||||
```
|
||||
|
||||
`/etc/paperless.conf`
|
||||
@ -157,26 +167,36 @@ Executed after the consumer has successfully processed a document and
|
||||
has moved it into paperless. It receives the following environment
|
||||
variables:
|
||||
|
||||
- `DOCUMENT_ID`
|
||||
- `DOCUMENT_FILE_NAME`
|
||||
- `DOCUMENT_CREATED`
|
||||
- `DOCUMENT_MODIFIED`
|
||||
- `DOCUMENT_ADDED`
|
||||
- `DOCUMENT_SOURCE_PATH`
|
||||
- `DOCUMENT_ARCHIVE_PATH`
|
||||
- `DOCUMENT_THUMBNAIL_PATH`
|
||||
- `DOCUMENT_DOWNLOAD_URL`
|
||||
- `DOCUMENT_THUMBNAIL_URL`
|
||||
- `DOCUMENT_CORRESPONDENT`
|
||||
- `DOCUMENT_TAGS`
|
||||
- `DOCUMENT_ORIGINAL_FILENAME`
|
||||
| Environment Variable | Description |
|
||||
| ---------------------------- | --------------------------------------------- |
|
||||
| `DOCUMENT_ID` | Database primary key of the document |
|
||||
| `DOCUMENT_FILE_NAME` | Formatted filename, not including paths |
|
||||
| `DOCUMENT_CREATED` | Date & time when document created |
|
||||
| `DOCUMENT_MODIFIED` | Date & time when document was last modified |
|
||||
| `DOCUMENT_ADDED` | Date & time when document was added |
|
||||
| `DOCUMENT_SOURCE_PATH` | Path to the original document file |
|
||||
| `DOCUMENT_ARCHIVE_PATH` | Path to the generate archive file (if any) |
|
||||
| `DOCUMENT_THUMBNAIL_PATH` | Path to the generated thumbnail |
|
||||
| `DOCUMENT_DOWNLOAD_URL` | URL for document download |
|
||||
| `DOCUMENT_THUMBNAIL_URL` | URL for the document thumbnail |
|
||||
| `DOCUMENT_CORRESPONDENT` | Assigned correspondent (if any) |
|
||||
| `DOCUMENT_TAGS` | Comma separated list of tags applied (if any) |
|
||||
| `DOCUMENT_ORIGINAL_FILENAME` | Filename of original document |
|
||||
|
||||
The script can be in any language, but for a simple shell script
|
||||
example, you can take a look at
|
||||
[post-consumption-example.sh](https://github.com/paperless-ngx/paperless-ngx/blob/main/scripts/post-consumption-example.sh)
|
||||
in this project.
|
||||
The script can be in any language, A simple shell script example:
|
||||
|
||||
The post consumption script cannot cancel the consumption process.
|
||||
```bash title="post-consumption-example"
|
||||
--8<-- "./scripts/post-consumption-example.sh"
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
The post consumption script cannot cancel the consumption process.
|
||||
|
||||
!!! warning
|
||||
|
||||
The post consumption script should not modify the document files
|
||||
directly
|
||||
|
||||
The script's stdout and stderr will be logged line by line to the
|
||||
webserver log, along with the exit code of the script.
|
||||
|
@ -2,6 +2,9 @@
|
||||
|
||||
## paperless-ngx 1.12.1
|
||||
|
||||
_Note: Version 1.12.x introduced searching of comments which will work for comments added after the upgrade but a reindex of the search index is required in order to be able to search
|
||||
older comments. The Docker image will automatically perform this reindex, bare metal installations will have to perform this manually, see [the docs](https://docs.paperless-ngx.com/administration/#index)._
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- Fix: comments not showing in search until after manual reindex in v1.12 [@shamoon](https://github.com/shamoon) ([#2513](https://github.com/paperless-ngx/paperless-ngx/pull/2513))
|
||||
|
@ -41,6 +41,7 @@ markdown_extensions:
|
||||
anchor_linenums: true
|
||||
- pymdownx.superfences
|
||||
- pymdownx.inlinehilite
|
||||
- pymdownx.snippets
|
||||
strict: true
|
||||
nav:
|
||||
- index.md
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -204,6 +204,10 @@ export class DocumentDetailComponent
|
||||
)
|
||||
.subscribe({
|
||||
next: (titleValue) => {
|
||||
// In the rare case when the field changed just after debounced event was fired.
|
||||
// We dont want to overwrite whats actually in the text field, so just return
|
||||
if (titleValue !== this.titleInput.value) return
|
||||
|
||||
this.title = titleValue
|
||||
this.documentForm.patchValue({ title: titleValue })
|
||||
},
|
||||
|
@ -26,11 +26,11 @@
|
||||
</div>
|
||||
<p class="card-text">
|
||||
<span *ngIf="document.__search_hit__ && document.__search_hit__.highlights" [innerHtml]="document.__search_hit__.highlights"></span>
|
||||
<span *ngIf="document.__search_hit__ && document.__search_hit__.comment_highlights" class="d-block">
|
||||
<span *ngFor="let highlight of searchCommentHighlights" class="d-block">
|
||||
<svg width="1em" height="1em" fill="currentColor" class="me-2">
|
||||
<use xlink:href="assets/bootstrap-icons.svg#chat-left-text"/>
|
||||
</svg>
|
||||
<span [innerHtml]="document.__search_hit__.comment_highlights"></span>
|
||||
<span [innerHtml]="highlight"></span>
|
||||
</span>
|
||||
<span *ngIf="!document.__search_hit__" class="result-content">{{contentTrimmed}}</span>
|
||||
</p>
|
||||
|
@ -70,6 +70,22 @@ export class DocumentCardLargeComponent {
|
||||
}
|
||||
}
|
||||
|
||||
get searchCommentHighlights() {
|
||||
let highlights = []
|
||||
if (
|
||||
this.document['__search_hit__'] &&
|
||||
this.document['__search_hit__'].comment_highlights
|
||||
) {
|
||||
// only show comments with a match
|
||||
highlights = (
|
||||
this.document['__search_hit__'].comment_highlights as string
|
||||
)
|
||||
.split(',')
|
||||
.filter((higlight) => higlight.includes('<span'))
|
||||
}
|
||||
return highlights
|
||||
}
|
||||
|
||||
getIsThumbInverted() {
|
||||
return this.settingsService.get(SETTINGS_KEYS.DARK_MODE_THUMB_INVERTED)
|
||||
}
|
||||
|
@ -143,7 +143,7 @@
|
||||
<p i18n>
|
||||
<em>No tracking data is collected by the app in any way.</em>
|
||||
</p>
|
||||
<app-input-check i18n-title title="Enable update checking" formControlName="updateCheckingEnabled" i18n-hint hint="Note that for users of thirdy-party containers e.g. linuxserver.io this notification may be 'ahead' of the current third-party release."></app-input-check>
|
||||
<app-input-check i18n-title title="Enable update checking" formControlName="updateCheckingEnabled" i18n-hint hint="Note that for users of third-party containers e.g. linuxserver.io this notification may be 'ahead' of the current third-party release."></app-input-check>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
@ -5,7 +5,7 @@ export const environment = {
|
||||
apiBaseUrl: document.baseURI + 'api/',
|
||||
apiVersion: '2',
|
||||
appTitle: 'Paperless-ngx',
|
||||
version: '1.12.1',
|
||||
version: '1.12.1-dev',
|
||||
webSocketHost: window.location.host,
|
||||
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
|
||||
webSocketBaseUrl: base_url.pathname + 'ws/',
|
||||
|
@ -4,7 +4,6 @@ import shutil
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
from math import ceil
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
@ -12,10 +11,9 @@ from typing import Optional
|
||||
import magic
|
||||
from django.conf import settings
|
||||
from pdf2image import convert_from_path
|
||||
from pdf2image.exceptions import PDFPageCountError
|
||||
from pikepdf import Page
|
||||
from pikepdf import PasswordError
|
||||
from pikepdf import Pdf
|
||||
from pikepdf import PdfImage
|
||||
from PIL import Image
|
||||
from PIL import ImageSequence
|
||||
from pyzbar import pyzbar
|
||||
@ -154,52 +152,15 @@ def scan_file_for_barcodes(
|
||||
(page_number, barcode_text) tuples
|
||||
"""
|
||||
|
||||
def _pikepdf_barcode_scan(pdf_filepath: str) -> List[Barcode]:
|
||||
detected_barcodes = []
|
||||
with Pdf.open(pdf_filepath) as pdf:
|
||||
for page_num, page in enumerate(pdf.pages):
|
||||
for image_key in page.images:
|
||||
pdfimage = PdfImage(page.images[image_key])
|
||||
|
||||
# This type is known to have issues:
|
||||
# https://github.com/pikepdf/pikepdf/issues/401
|
||||
if "/CCITTFaxDecode" in pdfimage.filters:
|
||||
raise BarcodeImageFormatError(
|
||||
"Unable to decode CCITTFaxDecode images",
|
||||
)
|
||||
|
||||
# Not all images can be transcoded to a PIL image, which
|
||||
# is what pyzbar expects to receive, so this may
|
||||
# raise an exception, triggering fallback
|
||||
pillow_img = pdfimage.as_pil_image()
|
||||
|
||||
# Scale the image down
|
||||
# See: https://github.com/paperless-ngx/paperless-ngx/issues/2385
|
||||
# TLDR: zbar has issues with larger images
|
||||
width, height = pillow_img.size
|
||||
if width > 1024:
|
||||
scaler = ceil(width / 1024)
|
||||
new_width = int(width / scaler)
|
||||
new_height = int(height / scaler)
|
||||
pillow_img = pillow_img.resize((new_width, new_height))
|
||||
|
||||
width, height = pillow_img.size
|
||||
if height > 2048:
|
||||
scaler = ceil(height / 2048)
|
||||
new_width = int(width / scaler)
|
||||
new_height = int(height / scaler)
|
||||
pillow_img = pillow_img.resize((new_width, new_height))
|
||||
|
||||
for barcode_value in barcode_reader(pillow_img):
|
||||
detected_barcodes.append(Barcode(page_num, barcode_value))
|
||||
|
||||
return detected_barcodes
|
||||
|
||||
def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]:
|
||||
detected_barcodes = []
|
||||
# use a temporary directory in case the file is too big to handle in memory
|
||||
with tempfile.TemporaryDirectory() as path:
|
||||
pages_from_path = convert_from_path(pdf_filepath, output_folder=path)
|
||||
pages_from_path = convert_from_path(
|
||||
pdf_filepath,
|
||||
dpi=300,
|
||||
output_folder=path,
|
||||
)
|
||||
for current_page_number, page in enumerate(pages_from_path):
|
||||
for barcode_value in barcode_reader(page):
|
||||
detected_barcodes.append(
|
||||
@ -219,27 +180,19 @@ def scan_file_for_barcodes(
|
||||
# Always try pikepdf first, it's usually fine, faster and
|
||||
# uses less memory
|
||||
try:
|
||||
barcodes = _pikepdf_barcode_scan(pdf_filepath)
|
||||
barcodes = _pdf2image_barcode_scan(pdf_filepath)
|
||||
# Password protected files can't be checked
|
||||
except PasswordError as e:
|
||||
# This is the exception raised for those
|
||||
except PDFPageCountError as e:
|
||||
logger.warning(
|
||||
f"File is likely password protected, not checking for barcodes: {e}",
|
||||
)
|
||||
# Handle pikepdf related image decoding issues with a fallback to page
|
||||
# by page conversion to images in a temporary directory
|
||||
except Exception as e:
|
||||
# This file is really borked, allow the consumption to continue
|
||||
# but it may fail further on
|
||||
except Exception as e: # pragma: no cover
|
||||
logger.warning(
|
||||
f"Falling back to pdf2image because: {e}",
|
||||
f"Exception during barcode scanning: {e}",
|
||||
)
|
||||
try:
|
||||
barcodes = _pdf2image_barcode_scan(pdf_filepath)
|
||||
# This file is really borked, allow the consumption to continue
|
||||
# but it may fail further on
|
||||
except Exception as e: # pragma: no cover
|
||||
logger.warning(
|
||||
f"Exception during barcode scanning: {e}",
|
||||
)
|
||||
|
||||
else:
|
||||
logger.warning(
|
||||
f"Unsupported file format for barcode reader: {str(mime_type)}",
|
||||
|
@ -1,7 +1,10 @@
|
||||
import datetime
|
||||
import hashlib
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from subprocess import CompletedProcess
|
||||
from subprocess import run
|
||||
from typing import Optional
|
||||
@ -94,7 +97,8 @@ class Consumer(LoggingMixin):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.path = None
|
||||
self.path: Optional[Path] = None
|
||||
self.original_path: Optional[Path] = None
|
||||
self.filename = None
|
||||
self.override_title = None
|
||||
self.override_correspondent_id = None
|
||||
@ -167,16 +171,18 @@ class Consumer(LoggingMixin):
|
||||
|
||||
self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
||||
|
||||
filepath_arg = os.path.normpath(self.path)
|
||||
working_file_path = str(self.path)
|
||||
original_file_path = str(self.original_path)
|
||||
|
||||
script_env = os.environ.copy()
|
||||
script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg
|
||||
script_env["DOCUMENT_SOURCE_PATH"] = original_file_path
|
||||
script_env["DOCUMENT_WORKING_PATH"] = working_file_path
|
||||
|
||||
try:
|
||||
completed_proc = run(
|
||||
args=[
|
||||
settings.PRE_CONSUME_SCRIPT,
|
||||
filepath_arg,
|
||||
original_file_path,
|
||||
],
|
||||
env=script_env,
|
||||
capture_output=True,
|
||||
@ -195,7 +201,7 @@ class Consumer(LoggingMixin):
|
||||
exception=e,
|
||||
)
|
||||
|
||||
def run_post_consume_script(self, document):
|
||||
def run_post_consume_script(self, document: Document):
|
||||
if not settings.POST_CONSUME_SCRIPT:
|
||||
return
|
||||
|
||||
@ -285,8 +291,8 @@ class Consumer(LoggingMixin):
|
||||
Return the document object if it was successfully created.
|
||||
"""
|
||||
|
||||
self.path = path
|
||||
self.filename = override_filename or os.path.basename(path)
|
||||
self.path = Path(path).resolve()
|
||||
self.filename = override_filename or self.path.name
|
||||
self.override_title = override_title
|
||||
self.override_correspondent_id = override_correspondent_id
|
||||
self.override_document_type_id = override_document_type_id
|
||||
@ -311,6 +317,15 @@ class Consumer(LoggingMixin):
|
||||
|
||||
self.log("info", f"Consuming {self.filename}")
|
||||
|
||||
# For the actual work, copy the file into a tempdir
|
||||
self.original_path = self.path
|
||||
tempdir = tempfile.TemporaryDirectory(
|
||||
prefix="paperless-ngx",
|
||||
dir=settings.SCRATCH_DIR,
|
||||
)
|
||||
self.path = Path(tempdir.name) / Path(self.filename)
|
||||
shutil.copy(self.original_path, self.path)
|
||||
|
||||
# Determine the parser class.
|
||||
|
||||
mime_type = magic.from_file(self.path, mime=True)
|
||||
@ -453,11 +468,12 @@ class Consumer(LoggingMixin):
|
||||
# Delete the file only if it was successfully consumed
|
||||
self.log("debug", f"Deleting file {self.path}")
|
||||
os.unlink(self.path)
|
||||
self.original_path.unlink()
|
||||
|
||||
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
||||
shadow_file = os.path.join(
|
||||
os.path.dirname(self.path),
|
||||
"._" + os.path.basename(self.path),
|
||||
os.path.dirname(self.original_path),
|
||||
"._" + os.path.basename(self.original_path),
|
||||
)
|
||||
|
||||
if os.path.isfile(shadow_file):
|
||||
@ -474,6 +490,7 @@ class Consumer(LoggingMixin):
|
||||
)
|
||||
finally:
|
||||
document_parser.cleanup()
|
||||
tempdir.cleanup()
|
||||
|
||||
self.run_post_consume_script(document)
|
||||
|
||||
|
Before Width: | Height: | Size: 33 KiB After Width: | Height: | Size: 33 KiB |
Before Width: | Height: | Size: 39 KiB After Width: | Height: | Size: 39 KiB |
File diff suppressed because it is too large
Load Diff
@ -833,7 +833,8 @@ class PreConsumeTestCase(TestCase):
|
||||
with tempfile.NamedTemporaryFile() as script:
|
||||
with override_settings(PRE_CONSUME_SCRIPT=script.name):
|
||||
c = Consumer()
|
||||
c.path = "path-to-file"
|
||||
c.original_path = "path-to-file"
|
||||
c.path = "/tmp/somewhere/path-to-file"
|
||||
c.run_pre_consume_script()
|
||||
|
||||
m.assert_called_once()
|
||||
@ -841,10 +842,19 @@ class PreConsumeTestCase(TestCase):
|
||||
args, kwargs = m.call_args
|
||||
|
||||
command = kwargs["args"]
|
||||
environment = kwargs["env"]
|
||||
|
||||
self.assertEqual(command[0], script.name)
|
||||
self.assertEqual(command[1], "path-to-file")
|
||||
|
||||
self.assertDictContainsSubset(
|
||||
{
|
||||
"DOCUMENT_SOURCE_PATH": c.original_path,
|
||||
"DOCUMENT_WORKING_PATH": c.path,
|
||||
},
|
||||
environment,
|
||||
)
|
||||
|
||||
@mock.patch("documents.consumer.Consumer.log")
|
||||
def test_script_with_output(self, mocked_log):
|
||||
"""
|
||||
@ -961,9 +971,10 @@ class PostConsumeTestCase(TestCase):
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
args, kwargs = m.call_args
|
||||
_, kwargs = m.call_args
|
||||
|
||||
command = kwargs["args"]
|
||||
environment = kwargs["env"]
|
||||
|
||||
self.assertEqual(command[0], script.name)
|
||||
self.assertEqual(command[1], str(doc.pk))
|
||||
@ -972,6 +983,17 @@ class PostConsumeTestCase(TestCase):
|
||||
self.assertEqual(command[7], "my_bank")
|
||||
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
||||
|
||||
self.assertDictContainsSubset(
|
||||
{
|
||||
"DOCUMENT_ID": str(doc.pk),
|
||||
"DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/",
|
||||
"DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/",
|
||||
"DOCUMENT_CORRESPONDENT": "my_bank",
|
||||
"DOCUMENT_TAGS": "a,b",
|
||||
},
|
||||
environment,
|
||||
)
|
||||
|
||||
def test_script_exit_non_zero(self):
|
||||
"""
|
||||
GIVEN:
|
||||
|
@ -3,6 +3,7 @@ import shutil
|
||||
import tempfile
|
||||
from collections import namedtuple
|
||||
from contextlib import contextmanager
|
||||
from unittest import mock
|
||||
|
||||
from django.apps import apps
|
||||
from django.db import connection
|
||||
@ -86,6 +87,30 @@ class DirectoriesMixin:
|
||||
remove_dirs(self.dirs)
|
||||
|
||||
|
||||
class ConsumerProgressMixin:
|
||||
def setUp(self) -> None:
|
||||
self.send_progress_patcher = mock.patch(
|
||||
"documents.consumer.Consumer._send_progress",
|
||||
)
|
||||
self.send_progress_mock = self.send_progress_patcher.start()
|
||||
super().setUp()
|
||||
|
||||
def tearDown(self) -> None:
|
||||
super().tearDown()
|
||||
self.send_progress_patcher.stop()
|
||||
|
||||
|
||||
class DocumentConsumeDelayMixin:
|
||||
def setUp(self) -> None:
|
||||
self.consume_file_patcher = mock.patch("documents.tasks.consume_file.delay")
|
||||
self.consume_file_mock = self.consume_file_patcher.start()
|
||||
super().setUp()
|
||||
|
||||
def tearDown(self) -> None:
|
||||
super().tearDown()
|
||||
self.consume_file_patcher.stop()
|
||||
|
||||
|
||||
class TestMigrations(TransactionTestCase):
|
||||
@property
|
||||
def app(self):
|
||||
|
@ -477,21 +477,14 @@ class DocumentViewSet(
|
||||
class SearchResultSerializer(DocumentSerializer):
|
||||
def to_representation(self, instance):
|
||||
doc = Document.objects.get(id=instance["id"])
|
||||
comments = ""
|
||||
if hasattr(instance.results.q, "subqueries"):
|
||||
commentTerm = instance.results.q.subqueries[0]
|
||||
comments = ",".join(
|
||||
[
|
||||
str(c.comment)
|
||||
for c in Comment.objects.filter(document=instance["id"])
|
||||
if commentTerm.text in c.comment
|
||||
],
|
||||
)
|
||||
comments = ",".join(
|
||||
[str(c.comment) for c in Comment.objects.filter(document=instance["id"])],
|
||||
)
|
||||
r = super().to_representation(doc)
|
||||
r["__search_hit__"] = {
|
||||
"score": instance.score,
|
||||
"highlights": instance.highlights("content", text=doc.content),
|
||||
"comment_highlights": instance.highlights("content", text=comments)
|
||||
"comment_highlights": instance.highlights("comments", text=comments)
|
||||
if doc
|
||||
else None,
|
||||
"rank": instance.rank,
|
||||
|
@ -271,6 +271,16 @@ class MailDocumentParser(DocumentParser):
|
||||
"paperHeight": "11.7",
|
||||
"scale": "1.0",
|
||||
}
|
||||
|
||||
# Set the output format of the resulting PDF
|
||||
# Valid inputs: https://gotenberg.dev/docs/modules/pdf-engines#uno
|
||||
if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}:
|
||||
data["pdfFormat"] = "PDF/A-2b"
|
||||
elif settings.OCR_OUTPUT_TYPE == "pdfa-1":
|
||||
data["pdfFormat"] = "PDF/A-1a"
|
||||
elif settings.OCR_OUTPUT_TYPE == "pdfa-3":
|
||||
data["pdfFormat"] = "PDF/A-3b"
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
url,
|
||||
|
@ -573,8 +573,8 @@ class TestParser(TestCase):
|
||||
self.parser.gotenberg_server + "/forms/chromium/convert/html",
|
||||
mock_post.call_args.args[0],
|
||||
)
|
||||
self.assertEqual({}, mock_post.call_args.kwargs["headers"])
|
||||
self.assertEqual(
|
||||
self.assertDictEqual({}, mock_post.call_args.kwargs["headers"])
|
||||
self.assertDictEqual(
|
||||
{
|
||||
"marginTop": "0.1",
|
||||
"marginBottom": "0.1",
|
||||
@ -583,6 +583,7 @@ class TestParser(TestCase):
|
||||
"paperWidth": "8.27",
|
||||
"paperHeight": "11.7",
|
||||
"scale": "1.0",
|
||||
"pdfFormat": "PDF/A-2b",
|
||||
},
|
||||
mock_post.call_args.kwargs["data"],
|
||||
)
|
||||
@ -663,8 +664,8 @@ class TestParser(TestCase):
|
||||
self.parser.gotenberg_server + "/forms/chromium/convert/html",
|
||||
mock_post.call_args.args[0],
|
||||
)
|
||||
self.assertEqual({}, mock_post.call_args.kwargs["headers"])
|
||||
self.assertEqual(
|
||||
self.assertDictEqual({}, mock_post.call_args.kwargs["headers"])
|
||||
self.assertDictEqual(
|
||||
{
|
||||
"marginTop": "0.1",
|
||||
"marginBottom": "0.1",
|
||||
|
@ -95,9 +95,19 @@ class TikaDocumentParser(DocumentParser):
|
||||
),
|
||||
}
|
||||
headers = {}
|
||||
data = {}
|
||||
|
||||
# Set the output format of the resulting PDF
|
||||
# Valid inputs: https://gotenberg.dev/docs/modules/pdf-engines#uno
|
||||
if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}:
|
||||
data["pdfFormat"] = "PDF/A-2b"
|
||||
elif settings.OCR_OUTPUT_TYPE == "pdfa-1":
|
||||
data["pdfFormat"] = "PDF/A-1a"
|
||||
elif settings.OCR_OUTPUT_TYPE == "pdfa-3":
|
||||
data["pdfFormat"] = "PDF/A-3b"
|
||||
|
||||
try:
|
||||
response = requests.post(url, files=files, headers=headers)
|
||||
response = requests.post(url, files=files, headers=headers, data=data)
|
||||
response.raise_for_status() # ensure we notice bad responses
|
||||
except Exception as err:
|
||||
raise ParseError(
|
||||
|
Loading…
x
Reference in New Issue
Block a user