mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge remote-tracking branch 'origin/dev'
This commit is contained in:
commit
9aea8a7d7c
71
.github/scripts/cleanup-tags.py
vendored
71
.github/scripts/cleanup-tags.py
vendored
@ -15,6 +15,8 @@ from github import ContainerPackage
|
|||||||
from github import GithubBranchApi
|
from github import GithubBranchApi
|
||||||
from github import GithubContainerRegistryApi
|
from github import GithubContainerRegistryApi
|
||||||
|
|
||||||
|
import docker
|
||||||
|
|
||||||
logger = logging.getLogger("cleanup-tags")
|
logger = logging.getLogger("cleanup-tags")
|
||||||
|
|
||||||
|
|
||||||
@ -151,12 +153,16 @@ class RegistryTagsCleaner:
|
|||||||
for tag in sorted(self.tags_to_keep):
|
for tag in sorted(self.tags_to_keep):
|
||||||
full_name = f"ghcr.io/{self.repo_owner}/{self.package_name}:{tag}"
|
full_name = f"ghcr.io/{self.repo_owner}/{self.package_name}:{tag}"
|
||||||
logger.info(f"Checking manifest for {full_name}")
|
logger.info(f"Checking manifest for {full_name}")
|
||||||
|
# TODO: It would be nice to use RegistryData from docker
|
||||||
|
# except the ID doesn't map to anything in the manifest
|
||||||
try:
|
try:
|
||||||
proc = subprocess.run(
|
proc = subprocess.run(
|
||||||
[
|
[
|
||||||
shutil.which("docker"),
|
shutil.which("docker"),
|
||||||
"manifest",
|
"buildx",
|
||||||
|
"imagetools",
|
||||||
"inspect",
|
"inspect",
|
||||||
|
"--raw",
|
||||||
full_name,
|
full_name,
|
||||||
],
|
],
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
@ -241,6 +247,65 @@ class RegistryTagsCleaner:
|
|||||||
# By default, keep anything which is tagged
|
# By default, keep anything which is tagged
|
||||||
self.tags_to_keep = list(set(self.all_pkgs_tags_to_version.keys()))
|
self.tags_to_keep = list(set(self.all_pkgs_tags_to_version.keys()))
|
||||||
|
|
||||||
|
def check_tags_pull(self):
|
||||||
|
"""
|
||||||
|
This method uses the Docker Python SDK to confirm all tags which were
|
||||||
|
kept still pull, for all platforms.
|
||||||
|
|
||||||
|
TODO: This is much slower (although more comprehensive). Maybe a Pool?
|
||||||
|
"""
|
||||||
|
logger.info("Beginning confirmation step")
|
||||||
|
client = docker.from_env()
|
||||||
|
imgs = []
|
||||||
|
for tag in sorted(self.tags_to_keep):
|
||||||
|
repository = f"ghcr.io/{self.repo_owner}/{self.package_name}"
|
||||||
|
for arch, variant in [("amd64", None), ("arm64", None), ("arm", "v7")]:
|
||||||
|
# From 11.2.0 onwards, qpdf is cross compiled, so there is a single arch, amd64
|
||||||
|
# skip others in this case
|
||||||
|
if "qpdf" in self.package_name and arch != "amd64" and tag == "11.2.0":
|
||||||
|
continue
|
||||||
|
# Skip beta and release candidate tags
|
||||||
|
elif "beta" in tag:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Build the platform name
|
||||||
|
if variant is not None:
|
||||||
|
platform = f"linux/{arch}/{variant}"
|
||||||
|
else:
|
||||||
|
platform = f"linux/{arch}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info(f"Pulling {repository}:{tag} for {platform}")
|
||||||
|
image = client.images.pull(
|
||||||
|
repository=repository,
|
||||||
|
tag=tag,
|
||||||
|
platform=platform,
|
||||||
|
)
|
||||||
|
imgs.append(image)
|
||||||
|
except docker.errors.APIError as e:
|
||||||
|
logger.error(
|
||||||
|
f"Failed to pull {repository}:{tag}: {e}",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Prevent out of space errors by removing after a few
|
||||||
|
# pulls
|
||||||
|
if len(imgs) > 50:
|
||||||
|
for image in imgs:
|
||||||
|
try:
|
||||||
|
client.images.remove(image.id)
|
||||||
|
except docker.errors.APIError as e:
|
||||||
|
err_str = str(e)
|
||||||
|
# Ignore attempts to remove images that are partly shared
|
||||||
|
# Ignore images which are somehow gone already
|
||||||
|
if (
|
||||||
|
"must be forced" not in err_str
|
||||||
|
and "No such image" not in err_str
|
||||||
|
):
|
||||||
|
logger.error(
|
||||||
|
f"Remove image ghcr.io/{self.repo_owner}/{self.package_name}:{tag} failed: {e}",
|
||||||
|
)
|
||||||
|
imgs = []
|
||||||
|
|
||||||
|
|
||||||
class MainImageTagsCleaner(RegistryTagsCleaner):
|
class MainImageTagsCleaner(RegistryTagsCleaner):
|
||||||
def decide_what_tags_to_keep(self):
|
def decide_what_tags_to_keep(self):
|
||||||
@ -397,6 +462,10 @@ def _main():
|
|||||||
# Clean images which are untagged
|
# Clean images which are untagged
|
||||||
cleaner.clean_untagged(args.is_manifest)
|
cleaner.clean_untagged(args.is_manifest)
|
||||||
|
|
||||||
|
# Verify remaining tags still pull
|
||||||
|
if args.is_manifest:
|
||||||
|
cleaner.check_tags_pull()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
_main()
|
_main()
|
||||||
|
6
.github/workflows/ci.yml
vendored
6
.github/workflows/ci.yml
vendored
@ -212,12 +212,6 @@ jobs:
|
|||||||
name: Prepare Docker Pipeline Data
|
name: Prepare Docker Pipeline Data
|
||||||
if: github.event_name == 'push' && (startsWith(github.ref, 'refs/heads/feature-') || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/beta' || contains(github.ref, 'beta.rc') || startsWith(github.ref, 'refs/tags/v'))
|
if: github.event_name == 'push' && (startsWith(github.ref, 'refs/heads/feature-') || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/beta' || contains(github.ref, 'beta.rc') || startsWith(github.ref, 'refs/tags/v'))
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
# If the push triggered the installer library workflow, wait for it to
|
|
||||||
# complete here. This ensures the required versions for the final
|
|
||||||
# image have been built, while not waiting at all if the versions haven't changed
|
|
||||||
concurrency:
|
|
||||||
group: build-installer-library
|
|
||||||
cancel-in-progress: false
|
|
||||||
needs:
|
needs:
|
||||||
- documentation
|
- documentation
|
||||||
- tests-backend
|
- tests-backend
|
||||||
|
14
.github/workflows/cleanup-tags.yml
vendored
14
.github/workflows/cleanup-tags.yml
vendored
@ -62,9 +62,9 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
python-version: "3.10"
|
python-version: "3.10"
|
||||||
-
|
-
|
||||||
name: Install httpx
|
name: Install Python libraries
|
||||||
run: |
|
run: |
|
||||||
python -m pip install httpx
|
python -m pip install httpx docker
|
||||||
#
|
#
|
||||||
# Clean up primary package
|
# Clean up primary package
|
||||||
#
|
#
|
||||||
@ -81,13 +81,3 @@ jobs:
|
|||||||
if: "${{ env.TOKEN != '' }}"
|
if: "${{ env.TOKEN != '' }}"
|
||||||
run: |
|
run: |
|
||||||
python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --untagged --delete "${{ matrix.cache-name }}"
|
python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --untagged --delete "${{ matrix.cache-name }}"
|
||||||
#
|
|
||||||
# Verify tags which are left still pull
|
|
||||||
#
|
|
||||||
-
|
|
||||||
name: Check all tags still pull
|
|
||||||
run: |
|
|
||||||
ghcr_name=$(echo "ghcr.io/${GITHUB_REPOSITORY_OWNER}/${{ matrix.primary-name }}" | awk '{ print tolower($0) }')
|
|
||||||
echo "Pulling all tags of ${ghcr_name}"
|
|
||||||
docker pull --quiet --all-tags ${ghcr_name}
|
|
||||||
docker image list
|
|
||||||
|
139
.github/workflows/installer-library.yml
vendored
139
.github/workflows/installer-library.yml
vendored
@ -169,3 +169,142 @@ jobs:
|
|||||||
PIKEPDF_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).version }}
|
PIKEPDF_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).version }}
|
||||||
PILLOW_VERSION=${{ needs.prepare-docker-build.outputs.pillow-version }}
|
PILLOW_VERSION=${{ needs.prepare-docker-build.outputs.pillow-version }}
|
||||||
LXML_VERSION=${{ needs.prepare-docker-build.outputs.lxml-version }}
|
LXML_VERSION=${{ needs.prepare-docker-build.outputs.lxml-version }}
|
||||||
|
|
||||||
|
commit-binary-files:
|
||||||
|
name: Store installers
|
||||||
|
needs:
|
||||||
|
- prepare-docker-build
|
||||||
|
- build-qpdf-debs
|
||||||
|
- build-jbig2enc
|
||||||
|
- build-psycopg2-wheel
|
||||||
|
- build-pikepdf-wheel
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
steps:
|
||||||
|
-
|
||||||
|
name: Checkout
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
ref: binary-library
|
||||||
|
-
|
||||||
|
name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: "3.9"
|
||||||
|
-
|
||||||
|
name: Install system dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update -qq
|
||||||
|
sudo apt-get install -qq --no-install-recommends tree
|
||||||
|
-
|
||||||
|
name: Extract qpdf files
|
||||||
|
run: |
|
||||||
|
version=${{ fromJSON(needs.prepare-docker-build.outputs.qpdf-json).version }}
|
||||||
|
tag=${{ fromJSON(needs.prepare-docker-build.outputs.qpdf-json).image_tag }}
|
||||||
|
|
||||||
|
docker pull --quiet ${tag}
|
||||||
|
docker create --name qpdf-extract ${tag}
|
||||||
|
|
||||||
|
mkdir --parents qpdf/${version}/amd64
|
||||||
|
docker cp qpdf-extract:/usr/src/qpdf/${version}/amd64 qpdf/${version}
|
||||||
|
|
||||||
|
mkdir --parents qpdf/${version}/arm64
|
||||||
|
docker cp qpdf-extract:/usr/src/qpdf/${version}/arm64 qpdf/${version}
|
||||||
|
|
||||||
|
mkdir --parents qpdf/${version}/armv7
|
||||||
|
docker cp qpdf-extract:/usr/src/qpdf/${version}/armv7 qpdf/${version}
|
||||||
|
-
|
||||||
|
name: Extract psycopg2 files
|
||||||
|
run: |
|
||||||
|
version=${{ fromJSON(needs.prepare-docker-build.outputs.psycopg2-json).version }}
|
||||||
|
tag=${{ fromJSON(needs.prepare-docker-build.outputs.psycopg2-json).image_tag }}
|
||||||
|
|
||||||
|
docker pull --quiet --platform linux/amd64 ${tag}
|
||||||
|
docker create --platform linux/amd64 --name psycopg2-extract ${tag}
|
||||||
|
mkdir --parents psycopg2/${version}/amd64
|
||||||
|
docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/amd64
|
||||||
|
mv psycopg2/${version}/amd64/wheels/* psycopg2/${version}/amd64
|
||||||
|
rm -r psycopg2/${version}/amd64/wheels/
|
||||||
|
docker rm psycopg2-extract
|
||||||
|
|
||||||
|
docker pull --quiet --platform linux/arm64 ${tag}
|
||||||
|
docker create --platform linux/arm64 --name psycopg2-extract ${tag}
|
||||||
|
mkdir --parents psycopg2/${version}/arm64
|
||||||
|
docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/arm64
|
||||||
|
mv psycopg2/${version}/arm64/wheels/* psycopg2/${version}/arm64
|
||||||
|
rm -r psycopg2/${version}/arm64/wheels/
|
||||||
|
docker rm psycopg2-extract
|
||||||
|
|
||||||
|
docker pull --quiet --platform linux/arm/v7 ${tag}
|
||||||
|
docker create --platform linux/arm/v7 --name psycopg2-extract ${tag}
|
||||||
|
mkdir --parents psycopg2/${version}/armv7
|
||||||
|
docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/armv7
|
||||||
|
mv psycopg2/${version}/armv7/wheels/* psycopg2/${version}/armv7
|
||||||
|
rm -r psycopg2/${version}/armv7/wheels/
|
||||||
|
docker rm psycopg2-extract
|
||||||
|
-
|
||||||
|
name: Extract pikepdf files
|
||||||
|
run: |
|
||||||
|
version=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).version }}
|
||||||
|
tag=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).image_tag }}
|
||||||
|
|
||||||
|
docker pull --quiet --platform linux/amd64 ${tag}
|
||||||
|
docker create --platform linux/amd64 --name pikepdf-extract ${tag}
|
||||||
|
mkdir --parents pikepdf/${version}/amd64
|
||||||
|
docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/amd64
|
||||||
|
mv pikepdf/${version}/amd64/wheels/* pikepdf/${version}/amd64
|
||||||
|
rm -r pikepdf/${version}/amd64/wheels/
|
||||||
|
docker rm pikepdf-extract
|
||||||
|
|
||||||
|
docker pull --quiet --platform linux/arm64 ${tag}
|
||||||
|
docker create --platform linux/arm64 --name pikepdf-extract ${tag}
|
||||||
|
mkdir --parents pikepdf/${version}/arm64
|
||||||
|
docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/arm64
|
||||||
|
mv pikepdf/${version}/arm64/wheels/* pikepdf/${version}/arm64
|
||||||
|
rm -r pikepdf/${version}/arm64/wheels/
|
||||||
|
docker rm pikepdf-extract
|
||||||
|
|
||||||
|
docker pull --quiet --platform linux/arm/v7 ${tag}
|
||||||
|
docker create --platform linux/arm/v7 --name pikepdf-extract ${tag}
|
||||||
|
mkdir --parents pikepdf/${version}/armv7
|
||||||
|
docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/armv7
|
||||||
|
mv pikepdf/${version}/armv7/wheels/* pikepdf/${version}/armv7
|
||||||
|
rm -r pikepdf/${version}/armv7/wheels/
|
||||||
|
docker rm pikepdf-extract
|
||||||
|
-
|
||||||
|
name: Extract jbig2enc files
|
||||||
|
run: |
|
||||||
|
version=${{ fromJSON(needs.prepare-docker-build.outputs.jbig2enc-json).version }}
|
||||||
|
tag=${{ fromJSON(needs.prepare-docker-build.outputs.jbig2enc-json).image_tag }}
|
||||||
|
|
||||||
|
docker pull --quiet --platform linux/amd64 ${tag}
|
||||||
|
docker create --platform linux/amd64 --name jbig2enc-extract ${tag}
|
||||||
|
mkdir --parents jbig2enc/${version}/amd64
|
||||||
|
docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/amd64/
|
||||||
|
mv jbig2enc/${version}/amd64/build/* jbig2enc/${version}/amd64/
|
||||||
|
docker rm jbig2enc-extract
|
||||||
|
|
||||||
|
docker pull --quiet --platform linux/arm64 ${tag}
|
||||||
|
docker create --platform linux/arm64 --name jbig2enc-extract ${tag}
|
||||||
|
mkdir --parents jbig2enc/${version}/arm64
|
||||||
|
docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/arm64
|
||||||
|
mv jbig2enc/${version}/arm64/build/* jbig2enc/${version}/arm64/
|
||||||
|
docker rm jbig2enc-extract
|
||||||
|
|
||||||
|
docker pull --quiet --platform linux/arm/v7 ${tag}
|
||||||
|
docker create --platform linux/arm/v7 --name jbig2enc-extract ${tag}
|
||||||
|
mkdir --parents jbig2enc/${version}/armv7
|
||||||
|
docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/armv7
|
||||||
|
mv jbig2enc/${version}/armv7/build/* jbig2enc/${version}/armv7/
|
||||||
|
docker rm jbig2enc-extract
|
||||||
|
-
|
||||||
|
name: Show file structure
|
||||||
|
run: |
|
||||||
|
tree .
|
||||||
|
-
|
||||||
|
name: Commit files
|
||||||
|
run: |
|
||||||
|
git config --global user.name "github-actions"
|
||||||
|
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||||
|
git add pikepdf/ qpdf/ psycopg2/ jbig2enc/
|
||||||
|
git commit -m "Updating installer packages" || true
|
||||||
|
git push origin || true
|
||||||
|
53
Dockerfile
53
Dockerfile
@ -1,19 +1,5 @@
|
|||||||
# syntax=docker/dockerfile:1.4
|
# syntax=docker/dockerfile:1.4
|
||||||
|
|
||||||
# Pull the installer images from the library
|
|
||||||
# These are all built previously
|
|
||||||
# They provide either a .deb or .whl
|
|
||||||
|
|
||||||
ARG JBIG2ENC_VERSION
|
|
||||||
ARG QPDF_VERSION
|
|
||||||
ARG PIKEPDF_VERSION
|
|
||||||
ARG PSYCOPG2_VERSION
|
|
||||||
|
|
||||||
FROM ghcr.io/paperless-ngx/paperless-ngx/builder/jbig2enc:${JBIG2ENC_VERSION} as jbig2enc-builder
|
|
||||||
FROM --platform=$BUILDPLATFORM ghcr.io/paperless-ngx/paperless-ngx/builder/qpdf:${QPDF_VERSION} as qpdf-builder
|
|
||||||
FROM ghcr.io/paperless-ngx/paperless-ngx/builder/pikepdf:${PIKEPDF_VERSION} as pikepdf-builder
|
|
||||||
FROM ghcr.io/paperless-ngx/paperless-ngx/builder/psycopg2:${PSYCOPG2_VERSION} as psycopg2-builder
|
|
||||||
|
|
||||||
FROM --platform=$BUILDPLATFORM node:16-bullseye-slim AS compile-frontend
|
FROM --platform=$BUILDPLATFORM node:16-bullseye-slim AS compile-frontend
|
||||||
|
|
||||||
# This stage compiles the frontend
|
# This stage compiles the frontend
|
||||||
@ -58,24 +44,21 @@ LABEL org.opencontainers.image.url="https://github.com/paperless-ngx/paperless-n
|
|||||||
LABEL org.opencontainers.image.licenses="GPL-3.0-only"
|
LABEL org.opencontainers.image.licenses="GPL-3.0-only"
|
||||||
|
|
||||||
ARG DEBIAN_FRONTEND=noninteractive
|
ARG DEBIAN_FRONTEND=noninteractive
|
||||||
# Buildx provided
|
# Buildx provided, must be defined to use though
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
# Workflow provided
|
# Workflow provided
|
||||||
|
ARG JBIG2ENC_VERSION
|
||||||
ARG QPDF_VERSION
|
ARG QPDF_VERSION
|
||||||
|
ARG PIKEPDF_VERSION
|
||||||
|
ARG PSYCOPG2_VERSION
|
||||||
|
|
||||||
#
|
#
|
||||||
# Begin installation and configuration
|
# Begin installation and configuration
|
||||||
# Order the steps below from least often changed to most
|
# Order the steps below from least often changed to most
|
||||||
#
|
#
|
||||||
|
|
||||||
# copy jbig2enc
|
|
||||||
# Basically will never change again
|
|
||||||
COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/.libs/libjbig2enc* /usr/local/lib/
|
|
||||||
COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/jbig2 /usr/local/bin/
|
|
||||||
COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/*.h /usr/local/include/
|
|
||||||
|
|
||||||
# Packages need for running
|
# Packages need for running
|
||||||
ARG RUNTIME_PACKAGES="\
|
ARG RUNTIME_PACKAGES="\
|
||||||
# Python
|
# Python
|
||||||
@ -198,19 +181,29 @@ RUN set -eux \
|
|||||||
# Install the built packages from the installer library images
|
# Install the built packages from the installer library images
|
||||||
# Use mounts to avoid copying installer files into the image
|
# Use mounts to avoid copying installer files into the image
|
||||||
# These change sometimes
|
# These change sometimes
|
||||||
RUN --mount=type=bind,from=qpdf-builder,target=/qpdf \
|
RUN set -eux \
|
||||||
--mount=type=bind,from=psycopg2-builder,target=/psycopg2 \
|
&& echo "Getting binaries" \
|
||||||
--mount=type=bind,from=pikepdf-builder,target=/pikepdf \
|
&& mkdir paperless-ngx \
|
||||||
set -eux \
|
&& curl --fail --silent --show-error --output paperless-ngx.tar.gz --location https://github.com/paperless-ngx/paperless-ngx/archive/41d6e7e407af09a0882736d50c89b6e015997bff.tar.gz \
|
||||||
|
&& tar -xf paperless-ngx.tar.gz --directory paperless-ngx --strip-components=1 \
|
||||||
|
&& cd paperless-ngx \
|
||||||
|
# Setting a specific revision ensures we know what this installed
|
||||||
|
# and ensures cache breaking on changes
|
||||||
|
&& echo "Installing jbig2enc" \
|
||||||
|
&& cp ./jbig2enc/${JBIG2ENC_VERSION}/${TARGETARCH}${TARGETVARIANT}/jbig2 /usr/local/bin/ \
|
||||||
|
&& cp ./jbig2enc/${JBIG2ENC_VERSION}/${TARGETARCH}${TARGETVARIANT}/libjbig2enc* /usr/local/lib/ \
|
||||||
&& echo "Installing qpdf" \
|
&& echo "Installing qpdf" \
|
||||||
&& apt-get install --yes --no-install-recommends /qpdf/usr/src/qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/libqpdf29_*.deb \
|
&& apt-get install --yes --no-install-recommends ./qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/libqpdf29_*.deb \
|
||||||
&& apt-get install --yes --no-install-recommends /qpdf/usr/src/qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/qpdf_*.deb \
|
&& apt-get install --yes --no-install-recommends ./qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/qpdf_*.deb \
|
||||||
&& echo "Installing pikepdf and dependencies" \
|
&& echo "Installing pikepdf and dependencies" \
|
||||||
&& python3 -m pip install --no-cache-dir /pikepdf/usr/src/wheels/*.whl \
|
&& python3 -m pip install --no-cache-dir ./pikepdf/${PIKEPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/*.whl \
|
||||||
&& python3 -m pip list \
|
&& python3 -m pip list \
|
||||||
&& echo "Installing psycopg2" \
|
&& echo "Installing psycopg2" \
|
||||||
&& python3 -m pip install --no-cache-dir /psycopg2/usr/src/wheels/psycopg2*.whl \
|
&& python3 -m pip install --no-cache-dir ./psycopg2/${PSYCOPG2_VERSION}/${TARGETARCH}${TARGETVARIANT}/psycopg2*.whl \
|
||||||
&& python3 -m pip list
|
&& python3 -m pip list \
|
||||||
|
&& echo "Cleaning up image layer" \
|
||||||
|
&& cd ../ \
|
||||||
|
&& rm -rf paperless-ngx
|
||||||
|
|
||||||
WORKDIR /usr/src/paperless/src/
|
WORKDIR /usr/src/paperless/src/
|
||||||
|
|
||||||
|
@ -29,7 +29,20 @@ RUN set -eux \
|
|||||||
&& ./autogen.sh \
|
&& ./autogen.sh \
|
||||||
&& ./configure \
|
&& ./configure \
|
||||||
&& make \
|
&& make \
|
||||||
|
&& echo "Gathering package data" \
|
||||||
|
&& dpkg-query -f '${Package;-40}${Version}\n' -W > ./pkg-list.txt \
|
||||||
&& echo "Cleaning up image" \
|
&& echo "Cleaning up image" \
|
||||||
&& apt-get -y purge ${BUILD_PACKAGES} \
|
&& apt-get -y purge ${BUILD_PACKAGES} \
|
||||||
&& apt-get -y autoremove --purge \
|
&& apt-get -y autoremove --purge \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
|
&& echo "Moving files around" \
|
||||||
|
&& mkdir build \
|
||||||
|
# Unlink a symlink that causes problems
|
||||||
|
&& unlink ./src/.libs/libjbig2enc.la \
|
||||||
|
# Move what the link pointed to
|
||||||
|
&& mv ./src/libjbig2enc.la ./build/ \
|
||||||
|
# Move the shared library .so files
|
||||||
|
&& mv ./src/.libs/libjbig2enc* ./build/ \
|
||||||
|
# And move the cli binary
|
||||||
|
&& mv ./src/jbig2 ./build/ \
|
||||||
|
&& mv ./pkg-list.txt ./build/
|
||||||
|
@ -7,12 +7,17 @@
|
|||||||
# Default to pulling from the main repo registry when manually building
|
# Default to pulling from the main repo registry when manually building
|
||||||
ARG REPO="paperless-ngx/paperless-ngx"
|
ARG REPO="paperless-ngx/paperless-ngx"
|
||||||
|
|
||||||
|
# This does nothing, except provide a name for a copy below
|
||||||
ARG QPDF_VERSION
|
ARG QPDF_VERSION
|
||||||
FROM --platform=$BUILDPLATFORM ghcr.io/${REPO}/builder/qpdf:${QPDF_VERSION} as qpdf-builder
|
FROM --platform=$BUILDPLATFORM ghcr.io/${REPO}/builder/qpdf:${QPDF_VERSION} as qpdf-builder
|
||||||
|
|
||||||
# This does nothing, except provide a name for a copy below
|
#
|
||||||
|
# Stage: builder
|
||||||
FROM python:3.9-slim-bullseye as main
|
# Purpose:
|
||||||
|
# - Build the pikepdf wheel
|
||||||
|
# - Build any dependent wheels which can't be found
|
||||||
|
#
|
||||||
|
FROM python:3.9-slim-bullseye as builder
|
||||||
|
|
||||||
LABEL org.opencontainers.image.description="A intermediate image with pikepdf wheel built"
|
LABEL org.opencontainers.image.description="A intermediate image with pikepdf wheel built"
|
||||||
|
|
||||||
@ -100,3 +105,14 @@ RUN set -eux \
|
|||||||
&& apt-get -y purge ${BUILD_PACKAGES} \
|
&& apt-get -y purge ${BUILD_PACKAGES} \
|
||||||
&& apt-get -y autoremove --purge \
|
&& apt-get -y autoremove --purge \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
#
|
||||||
|
# Stage: package
|
||||||
|
# Purpose: Holds the compiled .whl files in a tiny image to pull
|
||||||
|
#
|
||||||
|
FROM alpine:3.17 as package
|
||||||
|
|
||||||
|
WORKDIR /usr/src/wheels/
|
||||||
|
|
||||||
|
COPY --from=builder /usr/src/wheels/*.whl ./
|
||||||
|
COPY --from=builder /usr/src/wheels/pkg-list.txt ./
|
||||||
|
@ -2,7 +2,12 @@
|
|||||||
# Inputs:
|
# Inputs:
|
||||||
# - PSYCOPG2_VERSION - Version to build
|
# - PSYCOPG2_VERSION - Version to build
|
||||||
|
|
||||||
FROM python:3.9-slim-bullseye as main
|
#
|
||||||
|
# Stage: builder
|
||||||
|
# Purpose:
|
||||||
|
# - Build the psycopg2 wheel
|
||||||
|
#
|
||||||
|
FROM python:3.9-slim-bullseye as builder
|
||||||
|
|
||||||
LABEL org.opencontainers.image.description="A intermediate image with psycopg2 wheel built"
|
LABEL org.opencontainers.image.description="A intermediate image with psycopg2 wheel built"
|
||||||
|
|
||||||
@ -48,3 +53,14 @@ RUN set -eux \
|
|||||||
&& apt-get -y purge ${BUILD_PACKAGES} \
|
&& apt-get -y purge ${BUILD_PACKAGES} \
|
||||||
&& apt-get -y autoremove --purge \
|
&& apt-get -y autoremove --purge \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
#
|
||||||
|
# Stage: package
|
||||||
|
# Purpose: Holds the compiled .whl files in a tiny image to pull
|
||||||
|
#
|
||||||
|
FROM alpine:3.17 as package
|
||||||
|
|
||||||
|
WORKDIR /usr/src/wheels/
|
||||||
|
|
||||||
|
COPY --from=builder /usr/src/wheels/*.whl ./
|
||||||
|
COPY --from=builder /usr/src/wheels/pkg-list.txt ./
|
||||||
|
57
docker-builders/README.md
Normal file
57
docker-builders/README.md
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# Installer Library
|
||||||
|
|
||||||
|
This folder contains the Dockerfiles for building certain installers or libraries, which are then pulled into the main image.
|
||||||
|
|
||||||
|
## [jbig2enc](https://github.com/agl/jbig2enc)
|
||||||
|
|
||||||
|
### Why
|
||||||
|
|
||||||
|
JBIG is an image coding which can achieve better compression of images for PDFs.
|
||||||
|
|
||||||
|
### What
|
||||||
|
|
||||||
|
The Docker image builds a shared library file and utility, which is copied into the correct location in the final image.
|
||||||
|
|
||||||
|
### Updating
|
||||||
|
|
||||||
|
1. Ensure the given qpdf version is present in [Debian bookworm](https://packages.debian.org/bookworm/qpdf)
|
||||||
|
2. Update `.build-config.json` to the given version
|
||||||
|
3. If the Debian specific version has incremented, update `Dockerfile.qpdf`
|
||||||
|
|
||||||
|
See Also:
|
||||||
|
|
||||||
|
- [OCRMyPDF Documentation](https://ocrmypdf.readthedocs.io/en/latest/jbig2.html)
|
||||||
|
|
||||||
|
## [psycopg2](https://www.psycopg.org/)
|
||||||
|
|
||||||
|
### Why
|
||||||
|
|
||||||
|
The pre-built wheels of psycopg2 are built on Debian 9, which provides a quite old version of libpq-dev. This causes issue with authentication methods.
|
||||||
|
|
||||||
|
### What
|
||||||
|
|
||||||
|
The image builds psycopg2 wheels on Debian 10 and places the produced wheels into `/usr/src/wheels/`.
|
||||||
|
|
||||||
|
See Also:
|
||||||
|
|
||||||
|
- [Issue 266](https://github.com/paperless-ngx/paperless-ngx/issues/266)
|
||||||
|
|
||||||
|
## [qpdf](https://qpdf.readthedocs.io/en/stable/index.html)
|
||||||
|
|
||||||
|
### Why
|
||||||
|
|
||||||
|
qpdf and it's library provide tools to read, manipulate and fix up PDFs. Version 11 is also required by `pikepdf` 6+ and Debian 9 does not provide above version 10.
|
||||||
|
|
||||||
|
### What
|
||||||
|
|
||||||
|
The Docker image cross compiles .deb installers for each supported architecture of the main image. The installers are placed in `/usr/src/qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/`
|
||||||
|
|
||||||
|
## [pikepdf](https://pikepdf.readthedocs.io/en/latest/)
|
||||||
|
|
||||||
|
### Why
|
||||||
|
|
||||||
|
Required by OCRMyPdf, this is a general purpose library for PDF manipulation in Python via the qpdf libraries.
|
||||||
|
|
||||||
|
### What
|
||||||
|
|
||||||
|
The built wheels are placed into `/usr/src/wheels/`
|
@ -80,7 +80,7 @@ django_checks() {
|
|||||||
|
|
||||||
search_index() {
|
search_index() {
|
||||||
|
|
||||||
local -r index_version=1
|
local -r index_version=2
|
||||||
local -r index_version_file=${DATA_DIR}/.index_version
|
local -r index_version_file=${DATA_DIR}/.index_version
|
||||||
|
|
||||||
if [[ (! -f "${index_version_file}") || $(<"${index_version_file}") != "$index_version" ]]; then
|
if [[ (! -f "${index_version_file}") || $(<"${index_version_file}") != "$index_version" ]]; then
|
||||||
|
@ -121,7 +121,17 @@ Executed after the consumer sees a new document in the consumption
|
|||||||
folder, but before any processing of the document is performed. This
|
folder, but before any processing of the document is performed. This
|
||||||
script can access the following relevant environment variables set:
|
script can access the following relevant environment variables set:
|
||||||
|
|
||||||
- `DOCUMENT_SOURCE_PATH`
|
| Environment Variable | Description |
|
||||||
|
| ----------------------- | ------------------------------------------------------------ |
|
||||||
|
| `DOCUMENT_SOURCE_PATH` | Original path of the consumed document |
|
||||||
|
| `DOCUMENT_WORKING_PATH` | Path to a copy of the original that consumption will work on |
|
||||||
|
|
||||||
|
!!! note
|
||||||
|
|
||||||
|
Pre-consume scripts which modify the document should only change
|
||||||
|
the `DOCUMENT_WORKING_PATH` file or a second consume task may
|
||||||
|
be triggered, leading to failures as two tasks work on the
|
||||||
|
same document path
|
||||||
|
|
||||||
A simple but common example for this would be creating a simple script
|
A simple but common example for this would be creating a simple script
|
||||||
like this:
|
like this:
|
||||||
@ -130,7 +140,7 @@ like this:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
pdf2pdfocr.py -i ${DOCUMENT_SOURCE_PATH}
|
pdf2pdfocr.py -i ${DOCUMENT_WORKING_PATH}
|
||||||
```
|
```
|
||||||
|
|
||||||
`/etc/paperless.conf`
|
`/etc/paperless.conf`
|
||||||
@ -157,26 +167,36 @@ Executed after the consumer has successfully processed a document and
|
|||||||
has moved it into paperless. It receives the following environment
|
has moved it into paperless. It receives the following environment
|
||||||
variables:
|
variables:
|
||||||
|
|
||||||
- `DOCUMENT_ID`
|
| Environment Variable | Description |
|
||||||
- `DOCUMENT_FILE_NAME`
|
| ---------------------------- | --------------------------------------------- |
|
||||||
- `DOCUMENT_CREATED`
|
| `DOCUMENT_ID` | Database primary key of the document |
|
||||||
- `DOCUMENT_MODIFIED`
|
| `DOCUMENT_FILE_NAME` | Formatted filename, not including paths |
|
||||||
- `DOCUMENT_ADDED`
|
| `DOCUMENT_CREATED` | Date & time when document created |
|
||||||
- `DOCUMENT_SOURCE_PATH`
|
| `DOCUMENT_MODIFIED` | Date & time when document was last modified |
|
||||||
- `DOCUMENT_ARCHIVE_PATH`
|
| `DOCUMENT_ADDED` | Date & time when document was added |
|
||||||
- `DOCUMENT_THUMBNAIL_PATH`
|
| `DOCUMENT_SOURCE_PATH` | Path to the original document file |
|
||||||
- `DOCUMENT_DOWNLOAD_URL`
|
| `DOCUMENT_ARCHIVE_PATH` | Path to the generate archive file (if any) |
|
||||||
- `DOCUMENT_THUMBNAIL_URL`
|
| `DOCUMENT_THUMBNAIL_PATH` | Path to the generated thumbnail |
|
||||||
- `DOCUMENT_CORRESPONDENT`
|
| `DOCUMENT_DOWNLOAD_URL` | URL for document download |
|
||||||
- `DOCUMENT_TAGS`
|
| `DOCUMENT_THUMBNAIL_URL` | URL for the document thumbnail |
|
||||||
- `DOCUMENT_ORIGINAL_FILENAME`
|
| `DOCUMENT_CORRESPONDENT` | Assigned correspondent (if any) |
|
||||||
|
| `DOCUMENT_TAGS` | Comma separated list of tags applied (if any) |
|
||||||
|
| `DOCUMENT_ORIGINAL_FILENAME` | Filename of original document |
|
||||||
|
|
||||||
The script can be in any language, but for a simple shell script
|
The script can be in any language, A simple shell script example:
|
||||||
example, you can take a look at
|
|
||||||
[post-consumption-example.sh](https://github.com/paperless-ngx/paperless-ngx/blob/main/scripts/post-consumption-example.sh)
|
|
||||||
in this project.
|
|
||||||
|
|
||||||
The post consumption script cannot cancel the consumption process.
|
```bash title="post-consumption-example"
|
||||||
|
--8<-- "./scripts/post-consumption-example.sh"
|
||||||
|
```
|
||||||
|
|
||||||
|
!!! note
|
||||||
|
|
||||||
|
The post consumption script cannot cancel the consumption process.
|
||||||
|
|
||||||
|
!!! warning
|
||||||
|
|
||||||
|
The post consumption script should not modify the document files
|
||||||
|
directly
|
||||||
|
|
||||||
The script's stdout and stderr will be logged line by line to the
|
The script's stdout and stderr will be logged line by line to the
|
||||||
webserver log, along with the exit code of the script.
|
webserver log, along with the exit code of the script.
|
||||||
|
@ -2,6 +2,9 @@
|
|||||||
|
|
||||||
## paperless-ngx 1.12.1
|
## paperless-ngx 1.12.1
|
||||||
|
|
||||||
|
_Note: Version 1.12.x introduced searching of comments which will work for comments added after the upgrade but a reindex of the search index is required in order to be able to search
|
||||||
|
older comments. The Docker image will automatically perform this reindex, bare metal installations will have to perform this manually, see [the docs](https://docs.paperless-ngx.com/administration/#index)._
|
||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
|
|
||||||
- Fix: comments not showing in search until after manual reindex in v1.12 [@shamoon](https://github.com/shamoon) ([#2513](https://github.com/paperless-ngx/paperless-ngx/pull/2513))
|
- Fix: comments not showing in search until after manual reindex in v1.12 [@shamoon](https://github.com/shamoon) ([#2513](https://github.com/paperless-ngx/paperless-ngx/pull/2513))
|
||||||
|
@ -41,6 +41,7 @@ markdown_extensions:
|
|||||||
anchor_linenums: true
|
anchor_linenums: true
|
||||||
- pymdownx.superfences
|
- pymdownx.superfences
|
||||||
- pymdownx.inlinehilite
|
- pymdownx.inlinehilite
|
||||||
|
- pymdownx.snippets
|
||||||
strict: true
|
strict: true
|
||||||
nav:
|
nav:
|
||||||
- index.md
|
- index.md
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -204,6 +204,10 @@ export class DocumentDetailComponent
|
|||||||
)
|
)
|
||||||
.subscribe({
|
.subscribe({
|
||||||
next: (titleValue) => {
|
next: (titleValue) => {
|
||||||
|
// In the rare case when the field changed just after debounced event was fired.
|
||||||
|
// We dont want to overwrite whats actually in the text field, so just return
|
||||||
|
if (titleValue !== this.titleInput.value) return
|
||||||
|
|
||||||
this.title = titleValue
|
this.title = titleValue
|
||||||
this.documentForm.patchValue({ title: titleValue })
|
this.documentForm.patchValue({ title: titleValue })
|
||||||
},
|
},
|
||||||
|
@ -26,11 +26,11 @@
|
|||||||
</div>
|
</div>
|
||||||
<p class="card-text">
|
<p class="card-text">
|
||||||
<span *ngIf="document.__search_hit__ && document.__search_hit__.highlights" [innerHtml]="document.__search_hit__.highlights"></span>
|
<span *ngIf="document.__search_hit__ && document.__search_hit__.highlights" [innerHtml]="document.__search_hit__.highlights"></span>
|
||||||
<span *ngIf="document.__search_hit__ && document.__search_hit__.comment_highlights" class="d-block">
|
<span *ngFor="let highlight of searchCommentHighlights" class="d-block">
|
||||||
<svg width="1em" height="1em" fill="currentColor" class="me-2">
|
<svg width="1em" height="1em" fill="currentColor" class="me-2">
|
||||||
<use xlink:href="assets/bootstrap-icons.svg#chat-left-text"/>
|
<use xlink:href="assets/bootstrap-icons.svg#chat-left-text"/>
|
||||||
</svg>
|
</svg>
|
||||||
<span [innerHtml]="document.__search_hit__.comment_highlights"></span>
|
<span [innerHtml]="highlight"></span>
|
||||||
</span>
|
</span>
|
||||||
<span *ngIf="!document.__search_hit__" class="result-content">{{contentTrimmed}}</span>
|
<span *ngIf="!document.__search_hit__" class="result-content">{{contentTrimmed}}</span>
|
||||||
</p>
|
</p>
|
||||||
|
@ -70,6 +70,22 @@ export class DocumentCardLargeComponent {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get searchCommentHighlights() {
|
||||||
|
let highlights = []
|
||||||
|
if (
|
||||||
|
this.document['__search_hit__'] &&
|
||||||
|
this.document['__search_hit__'].comment_highlights
|
||||||
|
) {
|
||||||
|
// only show comments with a match
|
||||||
|
highlights = (
|
||||||
|
this.document['__search_hit__'].comment_highlights as string
|
||||||
|
)
|
||||||
|
.split(',')
|
||||||
|
.filter((higlight) => higlight.includes('<span'))
|
||||||
|
}
|
||||||
|
return highlights
|
||||||
|
}
|
||||||
|
|
||||||
getIsThumbInverted() {
|
getIsThumbInverted() {
|
||||||
return this.settingsService.get(SETTINGS_KEYS.DARK_MODE_THUMB_INVERTED)
|
return this.settingsService.get(SETTINGS_KEYS.DARK_MODE_THUMB_INVERTED)
|
||||||
}
|
}
|
||||||
|
@ -143,7 +143,7 @@
|
|||||||
<p i18n>
|
<p i18n>
|
||||||
<em>No tracking data is collected by the app in any way.</em>
|
<em>No tracking data is collected by the app in any way.</em>
|
||||||
</p>
|
</p>
|
||||||
<app-input-check i18n-title title="Enable update checking" formControlName="updateCheckingEnabled" i18n-hint hint="Note that for users of thirdy-party containers e.g. linuxserver.io this notification may be 'ahead' of the current third-party release."></app-input-check>
|
<app-input-check i18n-title title="Enable update checking" formControlName="updateCheckingEnabled" i18n-hint hint="Note that for users of third-party containers e.g. linuxserver.io this notification may be 'ahead' of the current third-party release."></app-input-check>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ export const environment = {
|
|||||||
apiBaseUrl: document.baseURI + 'api/',
|
apiBaseUrl: document.baseURI + 'api/',
|
||||||
apiVersion: '2',
|
apiVersion: '2',
|
||||||
appTitle: 'Paperless-ngx',
|
appTitle: 'Paperless-ngx',
|
||||||
version: '1.12.1',
|
version: '1.12.1-dev',
|
||||||
webSocketHost: window.location.host,
|
webSocketHost: window.location.host,
|
||||||
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
|
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
|
||||||
webSocketBaseUrl: base_url.pathname + 'ws/',
|
webSocketBaseUrl: base_url.pathname + 'ws/',
|
||||||
|
@ -4,7 +4,6 @@ import shutil
|
|||||||
import tempfile
|
import tempfile
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from math import ceil
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List
|
from typing import List
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
@ -12,10 +11,9 @@ from typing import Optional
|
|||||||
import magic
|
import magic
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from pdf2image import convert_from_path
|
from pdf2image import convert_from_path
|
||||||
|
from pdf2image.exceptions import PDFPageCountError
|
||||||
from pikepdf import Page
|
from pikepdf import Page
|
||||||
from pikepdf import PasswordError
|
|
||||||
from pikepdf import Pdf
|
from pikepdf import Pdf
|
||||||
from pikepdf import PdfImage
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from PIL import ImageSequence
|
from PIL import ImageSequence
|
||||||
from pyzbar import pyzbar
|
from pyzbar import pyzbar
|
||||||
@ -154,52 +152,15 @@ def scan_file_for_barcodes(
|
|||||||
(page_number, barcode_text) tuples
|
(page_number, barcode_text) tuples
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _pikepdf_barcode_scan(pdf_filepath: str) -> List[Barcode]:
|
|
||||||
detected_barcodes = []
|
|
||||||
with Pdf.open(pdf_filepath) as pdf:
|
|
||||||
for page_num, page in enumerate(pdf.pages):
|
|
||||||
for image_key in page.images:
|
|
||||||
pdfimage = PdfImage(page.images[image_key])
|
|
||||||
|
|
||||||
# This type is known to have issues:
|
|
||||||
# https://github.com/pikepdf/pikepdf/issues/401
|
|
||||||
if "/CCITTFaxDecode" in pdfimage.filters:
|
|
||||||
raise BarcodeImageFormatError(
|
|
||||||
"Unable to decode CCITTFaxDecode images",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Not all images can be transcoded to a PIL image, which
|
|
||||||
# is what pyzbar expects to receive, so this may
|
|
||||||
# raise an exception, triggering fallback
|
|
||||||
pillow_img = pdfimage.as_pil_image()
|
|
||||||
|
|
||||||
# Scale the image down
|
|
||||||
# See: https://github.com/paperless-ngx/paperless-ngx/issues/2385
|
|
||||||
# TLDR: zbar has issues with larger images
|
|
||||||
width, height = pillow_img.size
|
|
||||||
if width > 1024:
|
|
||||||
scaler = ceil(width / 1024)
|
|
||||||
new_width = int(width / scaler)
|
|
||||||
new_height = int(height / scaler)
|
|
||||||
pillow_img = pillow_img.resize((new_width, new_height))
|
|
||||||
|
|
||||||
width, height = pillow_img.size
|
|
||||||
if height > 2048:
|
|
||||||
scaler = ceil(height / 2048)
|
|
||||||
new_width = int(width / scaler)
|
|
||||||
new_height = int(height / scaler)
|
|
||||||
pillow_img = pillow_img.resize((new_width, new_height))
|
|
||||||
|
|
||||||
for barcode_value in barcode_reader(pillow_img):
|
|
||||||
detected_barcodes.append(Barcode(page_num, barcode_value))
|
|
||||||
|
|
||||||
return detected_barcodes
|
|
||||||
|
|
||||||
def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]:
|
def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]:
|
||||||
detected_barcodes = []
|
detected_barcodes = []
|
||||||
# use a temporary directory in case the file is too big to handle in memory
|
# use a temporary directory in case the file is too big to handle in memory
|
||||||
with tempfile.TemporaryDirectory() as path:
|
with tempfile.TemporaryDirectory() as path:
|
||||||
pages_from_path = convert_from_path(pdf_filepath, output_folder=path)
|
pages_from_path = convert_from_path(
|
||||||
|
pdf_filepath,
|
||||||
|
dpi=300,
|
||||||
|
output_folder=path,
|
||||||
|
)
|
||||||
for current_page_number, page in enumerate(pages_from_path):
|
for current_page_number, page in enumerate(pages_from_path):
|
||||||
for barcode_value in barcode_reader(page):
|
for barcode_value in barcode_reader(page):
|
||||||
detected_barcodes.append(
|
detected_barcodes.append(
|
||||||
@ -219,27 +180,19 @@ def scan_file_for_barcodes(
|
|||||||
# Always try pikepdf first, it's usually fine, faster and
|
# Always try pikepdf first, it's usually fine, faster and
|
||||||
# uses less memory
|
# uses less memory
|
||||||
try:
|
try:
|
||||||
barcodes = _pikepdf_barcode_scan(pdf_filepath)
|
barcodes = _pdf2image_barcode_scan(pdf_filepath)
|
||||||
# Password protected files can't be checked
|
# Password protected files can't be checked
|
||||||
except PasswordError as e:
|
# This is the exception raised for those
|
||||||
|
except PDFPageCountError as e:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"File is likely password protected, not checking for barcodes: {e}",
|
f"File is likely password protected, not checking for barcodes: {e}",
|
||||||
)
|
)
|
||||||
# Handle pikepdf related image decoding issues with a fallback to page
|
# This file is really borked, allow the consumption to continue
|
||||||
# by page conversion to images in a temporary directory
|
# but it may fail further on
|
||||||
except Exception as e:
|
except Exception as e: # pragma: no cover
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Falling back to pdf2image because: {e}",
|
f"Exception during barcode scanning: {e}",
|
||||||
)
|
)
|
||||||
try:
|
|
||||||
barcodes = _pdf2image_barcode_scan(pdf_filepath)
|
|
||||||
# This file is really borked, allow the consumption to continue
|
|
||||||
# but it may fail further on
|
|
||||||
except Exception as e: # pragma: no cover
|
|
||||||
logger.warning(
|
|
||||||
f"Exception during barcode scanning: {e}",
|
|
||||||
)
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Unsupported file format for barcode reader: {str(mime_type)}",
|
f"Unsupported file format for barcode reader: {str(mime_type)}",
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
import uuid
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
from subprocess import CompletedProcess
|
from subprocess import CompletedProcess
|
||||||
from subprocess import run
|
from subprocess import run
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
@ -94,7 +97,8 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.path = None
|
self.path: Optional[Path] = None
|
||||||
|
self.original_path: Optional[Path] = None
|
||||||
self.filename = None
|
self.filename = None
|
||||||
self.override_title = None
|
self.override_title = None
|
||||||
self.override_correspondent_id = None
|
self.override_correspondent_id = None
|
||||||
@ -167,16 +171,18 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
||||||
|
|
||||||
filepath_arg = os.path.normpath(self.path)
|
working_file_path = str(self.path)
|
||||||
|
original_file_path = str(self.original_path)
|
||||||
|
|
||||||
script_env = os.environ.copy()
|
script_env = os.environ.copy()
|
||||||
script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg
|
script_env["DOCUMENT_SOURCE_PATH"] = original_file_path
|
||||||
|
script_env["DOCUMENT_WORKING_PATH"] = working_file_path
|
||||||
|
|
||||||
try:
|
try:
|
||||||
completed_proc = run(
|
completed_proc = run(
|
||||||
args=[
|
args=[
|
||||||
settings.PRE_CONSUME_SCRIPT,
|
settings.PRE_CONSUME_SCRIPT,
|
||||||
filepath_arg,
|
original_file_path,
|
||||||
],
|
],
|
||||||
env=script_env,
|
env=script_env,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
@ -195,7 +201,7 @@ class Consumer(LoggingMixin):
|
|||||||
exception=e,
|
exception=e,
|
||||||
)
|
)
|
||||||
|
|
||||||
def run_post_consume_script(self, document):
|
def run_post_consume_script(self, document: Document):
|
||||||
if not settings.POST_CONSUME_SCRIPT:
|
if not settings.POST_CONSUME_SCRIPT:
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -285,8 +291,8 @@ class Consumer(LoggingMixin):
|
|||||||
Return the document object if it was successfully created.
|
Return the document object if it was successfully created.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.path = path
|
self.path = Path(path).resolve()
|
||||||
self.filename = override_filename or os.path.basename(path)
|
self.filename = override_filename or self.path.name
|
||||||
self.override_title = override_title
|
self.override_title = override_title
|
||||||
self.override_correspondent_id = override_correspondent_id
|
self.override_correspondent_id = override_correspondent_id
|
||||||
self.override_document_type_id = override_document_type_id
|
self.override_document_type_id = override_document_type_id
|
||||||
@ -311,6 +317,15 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
self.log("info", f"Consuming {self.filename}")
|
self.log("info", f"Consuming {self.filename}")
|
||||||
|
|
||||||
|
# For the actual work, copy the file into a tempdir
|
||||||
|
self.original_path = self.path
|
||||||
|
tempdir = tempfile.TemporaryDirectory(
|
||||||
|
prefix="paperless-ngx",
|
||||||
|
dir=settings.SCRATCH_DIR,
|
||||||
|
)
|
||||||
|
self.path = Path(tempdir.name) / Path(self.filename)
|
||||||
|
shutil.copy(self.original_path, self.path)
|
||||||
|
|
||||||
# Determine the parser class.
|
# Determine the parser class.
|
||||||
|
|
||||||
mime_type = magic.from_file(self.path, mime=True)
|
mime_type = magic.from_file(self.path, mime=True)
|
||||||
@ -453,11 +468,12 @@ class Consumer(LoggingMixin):
|
|||||||
# Delete the file only if it was successfully consumed
|
# Delete the file only if it was successfully consumed
|
||||||
self.log("debug", f"Deleting file {self.path}")
|
self.log("debug", f"Deleting file {self.path}")
|
||||||
os.unlink(self.path)
|
os.unlink(self.path)
|
||||||
|
self.original_path.unlink()
|
||||||
|
|
||||||
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
||||||
shadow_file = os.path.join(
|
shadow_file = os.path.join(
|
||||||
os.path.dirname(self.path),
|
os.path.dirname(self.original_path),
|
||||||
"._" + os.path.basename(self.path),
|
"._" + os.path.basename(self.original_path),
|
||||||
)
|
)
|
||||||
|
|
||||||
if os.path.isfile(shadow_file):
|
if os.path.isfile(shadow_file):
|
||||||
@ -474,6 +490,7 @@ class Consumer(LoggingMixin):
|
|||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
document_parser.cleanup()
|
document_parser.cleanup()
|
||||||
|
tempdir.cleanup()
|
||||||
|
|
||||||
self.run_post_consume_script(document)
|
self.run_post_consume_script(document)
|
||||||
|
|
||||||
|
Before Width: | Height: | Size: 33 KiB After Width: | Height: | Size: 33 KiB |
Before Width: | Height: | Size: 39 KiB After Width: | Height: | Size: 39 KiB |
File diff suppressed because it is too large
Load Diff
@ -833,7 +833,8 @@ class PreConsumeTestCase(TestCase):
|
|||||||
with tempfile.NamedTemporaryFile() as script:
|
with tempfile.NamedTemporaryFile() as script:
|
||||||
with override_settings(PRE_CONSUME_SCRIPT=script.name):
|
with override_settings(PRE_CONSUME_SCRIPT=script.name):
|
||||||
c = Consumer()
|
c = Consumer()
|
||||||
c.path = "path-to-file"
|
c.original_path = "path-to-file"
|
||||||
|
c.path = "/tmp/somewhere/path-to-file"
|
||||||
c.run_pre_consume_script()
|
c.run_pre_consume_script()
|
||||||
|
|
||||||
m.assert_called_once()
|
m.assert_called_once()
|
||||||
@ -841,10 +842,19 @@ class PreConsumeTestCase(TestCase):
|
|||||||
args, kwargs = m.call_args
|
args, kwargs = m.call_args
|
||||||
|
|
||||||
command = kwargs["args"]
|
command = kwargs["args"]
|
||||||
|
environment = kwargs["env"]
|
||||||
|
|
||||||
self.assertEqual(command[0], script.name)
|
self.assertEqual(command[0], script.name)
|
||||||
self.assertEqual(command[1], "path-to-file")
|
self.assertEqual(command[1], "path-to-file")
|
||||||
|
|
||||||
|
self.assertDictContainsSubset(
|
||||||
|
{
|
||||||
|
"DOCUMENT_SOURCE_PATH": c.original_path,
|
||||||
|
"DOCUMENT_WORKING_PATH": c.path,
|
||||||
|
},
|
||||||
|
environment,
|
||||||
|
)
|
||||||
|
|
||||||
@mock.patch("documents.consumer.Consumer.log")
|
@mock.patch("documents.consumer.Consumer.log")
|
||||||
def test_script_with_output(self, mocked_log):
|
def test_script_with_output(self, mocked_log):
|
||||||
"""
|
"""
|
||||||
@ -961,9 +971,10 @@ class PostConsumeTestCase(TestCase):
|
|||||||
|
|
||||||
m.assert_called_once()
|
m.assert_called_once()
|
||||||
|
|
||||||
args, kwargs = m.call_args
|
_, kwargs = m.call_args
|
||||||
|
|
||||||
command = kwargs["args"]
|
command = kwargs["args"]
|
||||||
|
environment = kwargs["env"]
|
||||||
|
|
||||||
self.assertEqual(command[0], script.name)
|
self.assertEqual(command[0], script.name)
|
||||||
self.assertEqual(command[1], str(doc.pk))
|
self.assertEqual(command[1], str(doc.pk))
|
||||||
@ -972,6 +983,17 @@ class PostConsumeTestCase(TestCase):
|
|||||||
self.assertEqual(command[7], "my_bank")
|
self.assertEqual(command[7], "my_bank")
|
||||||
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
||||||
|
|
||||||
|
self.assertDictContainsSubset(
|
||||||
|
{
|
||||||
|
"DOCUMENT_ID": str(doc.pk),
|
||||||
|
"DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/",
|
||||||
|
"DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/",
|
||||||
|
"DOCUMENT_CORRESPONDENT": "my_bank",
|
||||||
|
"DOCUMENT_TAGS": "a,b",
|
||||||
|
},
|
||||||
|
environment,
|
||||||
|
)
|
||||||
|
|
||||||
def test_script_exit_non_zero(self):
|
def test_script_exit_non_zero(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
|
@ -3,6 +3,7 @@ import shutil
|
|||||||
import tempfile
|
import tempfile
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
from django.apps import apps
|
from django.apps import apps
|
||||||
from django.db import connection
|
from django.db import connection
|
||||||
@ -86,6 +87,30 @@ class DirectoriesMixin:
|
|||||||
remove_dirs(self.dirs)
|
remove_dirs(self.dirs)
|
||||||
|
|
||||||
|
|
||||||
|
class ConsumerProgressMixin:
|
||||||
|
def setUp(self) -> None:
|
||||||
|
self.send_progress_patcher = mock.patch(
|
||||||
|
"documents.consumer.Consumer._send_progress",
|
||||||
|
)
|
||||||
|
self.send_progress_mock = self.send_progress_patcher.start()
|
||||||
|
super().setUp()
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
super().tearDown()
|
||||||
|
self.send_progress_patcher.stop()
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentConsumeDelayMixin:
|
||||||
|
def setUp(self) -> None:
|
||||||
|
self.consume_file_patcher = mock.patch("documents.tasks.consume_file.delay")
|
||||||
|
self.consume_file_mock = self.consume_file_patcher.start()
|
||||||
|
super().setUp()
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
super().tearDown()
|
||||||
|
self.consume_file_patcher.stop()
|
||||||
|
|
||||||
|
|
||||||
class TestMigrations(TransactionTestCase):
|
class TestMigrations(TransactionTestCase):
|
||||||
@property
|
@property
|
||||||
def app(self):
|
def app(self):
|
||||||
|
@ -477,21 +477,14 @@ class DocumentViewSet(
|
|||||||
class SearchResultSerializer(DocumentSerializer):
|
class SearchResultSerializer(DocumentSerializer):
|
||||||
def to_representation(self, instance):
|
def to_representation(self, instance):
|
||||||
doc = Document.objects.get(id=instance["id"])
|
doc = Document.objects.get(id=instance["id"])
|
||||||
comments = ""
|
comments = ",".join(
|
||||||
if hasattr(instance.results.q, "subqueries"):
|
[str(c.comment) for c in Comment.objects.filter(document=instance["id"])],
|
||||||
commentTerm = instance.results.q.subqueries[0]
|
)
|
||||||
comments = ",".join(
|
|
||||||
[
|
|
||||||
str(c.comment)
|
|
||||||
for c in Comment.objects.filter(document=instance["id"])
|
|
||||||
if commentTerm.text in c.comment
|
|
||||||
],
|
|
||||||
)
|
|
||||||
r = super().to_representation(doc)
|
r = super().to_representation(doc)
|
||||||
r["__search_hit__"] = {
|
r["__search_hit__"] = {
|
||||||
"score": instance.score,
|
"score": instance.score,
|
||||||
"highlights": instance.highlights("content", text=doc.content),
|
"highlights": instance.highlights("content", text=doc.content),
|
||||||
"comment_highlights": instance.highlights("content", text=comments)
|
"comment_highlights": instance.highlights("comments", text=comments)
|
||||||
if doc
|
if doc
|
||||||
else None,
|
else None,
|
||||||
"rank": instance.rank,
|
"rank": instance.rank,
|
||||||
|
@ -271,6 +271,16 @@ class MailDocumentParser(DocumentParser):
|
|||||||
"paperHeight": "11.7",
|
"paperHeight": "11.7",
|
||||||
"scale": "1.0",
|
"scale": "1.0",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Set the output format of the resulting PDF
|
||||||
|
# Valid inputs: https://gotenberg.dev/docs/modules/pdf-engines#uno
|
||||||
|
if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}:
|
||||||
|
data["pdfFormat"] = "PDF/A-2b"
|
||||||
|
elif settings.OCR_OUTPUT_TYPE == "pdfa-1":
|
||||||
|
data["pdfFormat"] = "PDF/A-1a"
|
||||||
|
elif settings.OCR_OUTPUT_TYPE == "pdfa-3":
|
||||||
|
data["pdfFormat"] = "PDF/A-3b"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
url,
|
url,
|
||||||
|
@ -573,8 +573,8 @@ class TestParser(TestCase):
|
|||||||
self.parser.gotenberg_server + "/forms/chromium/convert/html",
|
self.parser.gotenberg_server + "/forms/chromium/convert/html",
|
||||||
mock_post.call_args.args[0],
|
mock_post.call_args.args[0],
|
||||||
)
|
)
|
||||||
self.assertEqual({}, mock_post.call_args.kwargs["headers"])
|
self.assertDictEqual({}, mock_post.call_args.kwargs["headers"])
|
||||||
self.assertEqual(
|
self.assertDictEqual(
|
||||||
{
|
{
|
||||||
"marginTop": "0.1",
|
"marginTop": "0.1",
|
||||||
"marginBottom": "0.1",
|
"marginBottom": "0.1",
|
||||||
@ -583,6 +583,7 @@ class TestParser(TestCase):
|
|||||||
"paperWidth": "8.27",
|
"paperWidth": "8.27",
|
||||||
"paperHeight": "11.7",
|
"paperHeight": "11.7",
|
||||||
"scale": "1.0",
|
"scale": "1.0",
|
||||||
|
"pdfFormat": "PDF/A-2b",
|
||||||
},
|
},
|
||||||
mock_post.call_args.kwargs["data"],
|
mock_post.call_args.kwargs["data"],
|
||||||
)
|
)
|
||||||
@ -663,8 +664,8 @@ class TestParser(TestCase):
|
|||||||
self.parser.gotenberg_server + "/forms/chromium/convert/html",
|
self.parser.gotenberg_server + "/forms/chromium/convert/html",
|
||||||
mock_post.call_args.args[0],
|
mock_post.call_args.args[0],
|
||||||
)
|
)
|
||||||
self.assertEqual({}, mock_post.call_args.kwargs["headers"])
|
self.assertDictEqual({}, mock_post.call_args.kwargs["headers"])
|
||||||
self.assertEqual(
|
self.assertDictEqual(
|
||||||
{
|
{
|
||||||
"marginTop": "0.1",
|
"marginTop": "0.1",
|
||||||
"marginBottom": "0.1",
|
"marginBottom": "0.1",
|
||||||
|
@ -95,9 +95,19 @@ class TikaDocumentParser(DocumentParser):
|
|||||||
),
|
),
|
||||||
}
|
}
|
||||||
headers = {}
|
headers = {}
|
||||||
|
data = {}
|
||||||
|
|
||||||
|
# Set the output format of the resulting PDF
|
||||||
|
# Valid inputs: https://gotenberg.dev/docs/modules/pdf-engines#uno
|
||||||
|
if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}:
|
||||||
|
data["pdfFormat"] = "PDF/A-2b"
|
||||||
|
elif settings.OCR_OUTPUT_TYPE == "pdfa-1":
|
||||||
|
data["pdfFormat"] = "PDF/A-1a"
|
||||||
|
elif settings.OCR_OUTPUT_TYPE == "pdfa-3":
|
||||||
|
data["pdfFormat"] = "PDF/A-3b"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.post(url, files=files, headers=headers)
|
response = requests.post(url, files=files, headers=headers, data=data)
|
||||||
response.raise_for_status() # ensure we notice bad responses
|
response.raise_for_status() # ensure we notice bad responses
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
raise ParseError(
|
raise ParseError(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user