mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Merge remote-tracking branch 'origin/dev'
This commit is contained in:
		
							
								
								
									
										71
									
								
								.github/scripts/cleanup-tags.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										71
									
								
								.github/scripts/cleanup-tags.py
									
									
									
									
										vendored
									
									
								
							| @@ -15,6 +15,8 @@ from github import ContainerPackage | ||||
| from github import GithubBranchApi | ||||
| from github import GithubContainerRegistryApi | ||||
|  | ||||
| import docker | ||||
|  | ||||
| logger = logging.getLogger("cleanup-tags") | ||||
|  | ||||
|  | ||||
| @@ -151,12 +153,16 @@ class RegistryTagsCleaner: | ||||
|             for tag in sorted(self.tags_to_keep): | ||||
|                 full_name = f"ghcr.io/{self.repo_owner}/{self.package_name}:{tag}" | ||||
|                 logger.info(f"Checking manifest for {full_name}") | ||||
|                 # TODO: It would be nice to use RegistryData from docker | ||||
|                 # except the ID doesn't map to anything in the manifest | ||||
|                 try: | ||||
|                     proc = subprocess.run( | ||||
|                         [ | ||||
|                             shutil.which("docker"), | ||||
|                             "manifest", | ||||
|                             "buildx", | ||||
|                             "imagetools", | ||||
|                             "inspect", | ||||
|                             "--raw", | ||||
|                             full_name, | ||||
|                         ], | ||||
|                         capture_output=True, | ||||
| @@ -241,6 +247,65 @@ class RegistryTagsCleaner: | ||||
|         # By default, keep anything which is tagged | ||||
|         self.tags_to_keep = list(set(self.all_pkgs_tags_to_version.keys())) | ||||
|  | ||||
|     def check_tags_pull(self): | ||||
|         """ | ||||
|         This method uses the Docker Python SDK to confirm all tags which were | ||||
|         kept still pull, for all platforms. | ||||
|  | ||||
|         TODO: This is much slower (although more comprehensive).  Maybe a Pool? | ||||
|         """ | ||||
|         logger.info("Beginning confirmation step") | ||||
|         client = docker.from_env() | ||||
|         imgs = [] | ||||
|         for tag in sorted(self.tags_to_keep): | ||||
|             repository = f"ghcr.io/{self.repo_owner}/{self.package_name}" | ||||
|             for arch, variant in [("amd64", None), ("arm64", None), ("arm", "v7")]: | ||||
|                 # From 11.2.0 onwards, qpdf is cross compiled, so there is a single arch, amd64 | ||||
|                 # skip others in this case | ||||
|                 if "qpdf" in self.package_name and arch != "amd64" and tag == "11.2.0": | ||||
|                     continue | ||||
|                 # Skip beta and release candidate tags | ||||
|                 elif "beta" in tag: | ||||
|                     continue | ||||
|  | ||||
|                 # Build the platform name | ||||
|                 if variant is not None: | ||||
|                     platform = f"linux/{arch}/{variant}" | ||||
|                 else: | ||||
|                     platform = f"linux/{arch}" | ||||
|  | ||||
|                 try: | ||||
|                     logger.info(f"Pulling {repository}:{tag} for {platform}") | ||||
|                     image = client.images.pull( | ||||
|                         repository=repository, | ||||
|                         tag=tag, | ||||
|                         platform=platform, | ||||
|                     ) | ||||
|                     imgs.append(image) | ||||
|                 except docker.errors.APIError as e: | ||||
|                     logger.error( | ||||
|                         f"Failed to pull {repository}:{tag}: {e}", | ||||
|                     ) | ||||
|  | ||||
|             # Prevent out of space errors by removing after a few | ||||
|             # pulls | ||||
|             if len(imgs) > 50: | ||||
|                 for image in imgs: | ||||
|                     try: | ||||
|                         client.images.remove(image.id) | ||||
|                     except docker.errors.APIError as e: | ||||
|                         err_str = str(e) | ||||
|                         # Ignore attempts to remove images that are partly shared | ||||
|                         # Ignore images which are somehow gone already | ||||
|                         if ( | ||||
|                             "must be forced" not in err_str | ||||
|                             and "No such image" not in err_str | ||||
|                         ): | ||||
|                             logger.error( | ||||
|                                 f"Remove image ghcr.io/{self.repo_owner}/{self.package_name}:{tag} failed: {e}", | ||||
|                             ) | ||||
|                 imgs = [] | ||||
|  | ||||
|  | ||||
| class MainImageTagsCleaner(RegistryTagsCleaner): | ||||
|     def decide_what_tags_to_keep(self): | ||||
| @@ -397,6 +462,10 @@ def _main(): | ||||
|             # Clean images which are untagged | ||||
|             cleaner.clean_untagged(args.is_manifest) | ||||
|  | ||||
|             # Verify remaining tags still pull | ||||
|             if args.is_manifest: | ||||
|                 cleaner.check_tags_pull() | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     _main() | ||||
|   | ||||
							
								
								
									
										6
									
								
								.github/workflows/ci.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/workflows/ci.yml
									
									
									
									
										vendored
									
									
								
							| @@ -212,12 +212,6 @@ jobs: | ||||
|     name: Prepare Docker Pipeline Data | ||||
|     if: github.event_name == 'push' && (startsWith(github.ref, 'refs/heads/feature-') || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/beta' || contains(github.ref, 'beta.rc') || startsWith(github.ref, 'refs/tags/v')) | ||||
|     runs-on: ubuntu-22.04 | ||||
|     # If the push triggered the installer library workflow, wait for it to | ||||
|     # complete here.  This ensures the required versions for the final | ||||
|     # image have been built, while not waiting at all if the versions haven't changed | ||||
|     concurrency: | ||||
|       group: build-installer-library | ||||
|       cancel-in-progress: false | ||||
|     needs: | ||||
|       - documentation | ||||
|       - tests-backend | ||||
|   | ||||
							
								
								
									
										14
									
								
								.github/workflows/cleanup-tags.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										14
									
								
								.github/workflows/cleanup-tags.yml
									
									
									
									
										vendored
									
									
								
							| @@ -62,9 +62,9 @@ jobs: | ||||
|         with: | ||||
|           python-version: "3.10" | ||||
|       - | ||||
|         name: Install httpx | ||||
|         name: Install Python libraries | ||||
|         run: | | ||||
|           python -m pip install httpx | ||||
|           python -m pip install httpx docker | ||||
|       # | ||||
|       # Clean up primary package | ||||
|       # | ||||
| @@ -81,13 +81,3 @@ jobs: | ||||
|         if: "${{ env.TOKEN != '' }}" | ||||
|         run: | | ||||
|           python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --untagged --delete "${{ matrix.cache-name }}" | ||||
|       # | ||||
|       # Verify tags which are left still pull | ||||
|       # | ||||
|       - | ||||
|         name: Check all tags still pull | ||||
|         run: | | ||||
|           ghcr_name=$(echo "ghcr.io/${GITHUB_REPOSITORY_OWNER}/${{ matrix.primary-name }}" | awk '{ print tolower($0) }') | ||||
|           echo "Pulling all tags of ${ghcr_name}" | ||||
|           docker pull --quiet --all-tags ${ghcr_name} | ||||
|           docker image list | ||||
|   | ||||
							
								
								
									
										139
									
								
								.github/workflows/installer-library.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										139
									
								
								.github/workflows/installer-library.yml
									
									
									
									
										vendored
									
									
								
							| @@ -169,3 +169,142 @@ jobs: | ||||
|         PIKEPDF_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).version }} | ||||
|         PILLOW_VERSION=${{ needs.prepare-docker-build.outputs.pillow-version }} | ||||
|         LXML_VERSION=${{ needs.prepare-docker-build.outputs.lxml-version }} | ||||
|  | ||||
|   commit-binary-files: | ||||
|     name: Store installers | ||||
|     needs: | ||||
|       - prepare-docker-build | ||||
|       - build-qpdf-debs | ||||
|       - build-jbig2enc | ||||
|       - build-psycopg2-wheel | ||||
|       - build-pikepdf-wheel | ||||
|     runs-on: ubuntu-22.04 | ||||
|     steps: | ||||
|       - | ||||
|         name: Checkout | ||||
|         uses: actions/checkout@v3 | ||||
|         with: | ||||
|           ref: binary-library | ||||
|       - | ||||
|         name: Set up Python | ||||
|         uses: actions/setup-python@v4 | ||||
|         with: | ||||
|           python-version: "3.9" | ||||
|       - | ||||
|         name: Install system dependencies | ||||
|         run: | | ||||
|           sudo apt-get update -qq | ||||
|           sudo apt-get install -qq --no-install-recommends tree | ||||
|       - | ||||
|         name: Extract qpdf files | ||||
|         run: | | ||||
|           version=${{ fromJSON(needs.prepare-docker-build.outputs.qpdf-json).version }} | ||||
|           tag=${{ fromJSON(needs.prepare-docker-build.outputs.qpdf-json).image_tag }} | ||||
|  | ||||
|           docker pull --quiet ${tag} | ||||
|           docker create --name qpdf-extract ${tag} | ||||
|  | ||||
|           mkdir --parents qpdf/${version}/amd64 | ||||
|           docker cp qpdf-extract:/usr/src/qpdf/${version}/amd64 qpdf/${version} | ||||
|  | ||||
|           mkdir --parents qpdf/${version}/arm64 | ||||
|           docker cp qpdf-extract:/usr/src/qpdf/${version}/arm64 qpdf/${version} | ||||
|  | ||||
|           mkdir --parents qpdf/${version}/armv7 | ||||
|           docker cp qpdf-extract:/usr/src/qpdf/${version}/armv7 qpdf/${version} | ||||
|       - | ||||
|         name: Extract psycopg2 files | ||||
|         run: | | ||||
|           version=${{ fromJSON(needs.prepare-docker-build.outputs.psycopg2-json).version }} | ||||
|           tag=${{ fromJSON(needs.prepare-docker-build.outputs.psycopg2-json).image_tag }} | ||||
|  | ||||
|           docker pull --quiet --platform linux/amd64 ${tag} | ||||
|           docker create --platform linux/amd64 --name psycopg2-extract ${tag} | ||||
|           mkdir --parents psycopg2/${version}/amd64 | ||||
|           docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/amd64 | ||||
|           mv psycopg2/${version}/amd64/wheels/* psycopg2/${version}/amd64 | ||||
|           rm -r psycopg2/${version}/amd64/wheels/ | ||||
|           docker rm psycopg2-extract | ||||
|  | ||||
|           docker pull --quiet --platform linux/arm64 ${tag} | ||||
|           docker create --platform linux/arm64 --name psycopg2-extract ${tag} | ||||
|           mkdir --parents psycopg2/${version}/arm64 | ||||
|           docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/arm64 | ||||
|           mv psycopg2/${version}/arm64/wheels/* psycopg2/${version}/arm64 | ||||
|           rm -r psycopg2/${version}/arm64/wheels/ | ||||
|           docker rm psycopg2-extract | ||||
|  | ||||
|           docker pull --quiet --platform linux/arm/v7 ${tag} | ||||
|           docker create --platform linux/arm/v7 --name psycopg2-extract ${tag} | ||||
|           mkdir --parents psycopg2/${version}/armv7 | ||||
|           docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/armv7 | ||||
|           mv psycopg2/${version}/armv7/wheels/* psycopg2/${version}/armv7 | ||||
|           rm -r psycopg2/${version}/armv7/wheels/ | ||||
|           docker rm psycopg2-extract | ||||
|       - | ||||
|         name: Extract pikepdf files | ||||
|         run: | | ||||
|           version=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).version }} | ||||
|           tag=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).image_tag }} | ||||
|  | ||||
|           docker pull --quiet --platform linux/amd64 ${tag} | ||||
|           docker create --platform linux/amd64 --name pikepdf-extract ${tag} | ||||
|           mkdir --parents pikepdf/${version}/amd64 | ||||
|           docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/amd64 | ||||
|           mv pikepdf/${version}/amd64/wheels/* pikepdf/${version}/amd64 | ||||
|           rm -r pikepdf/${version}/amd64/wheels/ | ||||
|           docker rm pikepdf-extract | ||||
|  | ||||
|           docker pull --quiet --platform linux/arm64 ${tag} | ||||
|           docker create --platform linux/arm64 --name pikepdf-extract ${tag} | ||||
|           mkdir --parents pikepdf/${version}/arm64 | ||||
|           docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/arm64 | ||||
|           mv pikepdf/${version}/arm64/wheels/* pikepdf/${version}/arm64 | ||||
|           rm -r pikepdf/${version}/arm64/wheels/ | ||||
|           docker rm pikepdf-extract | ||||
|  | ||||
|           docker pull --quiet --platform linux/arm/v7 ${tag} | ||||
|           docker create --platform linux/arm/v7 --name pikepdf-extract ${tag} | ||||
|           mkdir --parents pikepdf/${version}/armv7 | ||||
|           docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/armv7 | ||||
|           mv pikepdf/${version}/armv7/wheels/* pikepdf/${version}/armv7 | ||||
|           rm -r pikepdf/${version}/armv7/wheels/ | ||||
|           docker rm pikepdf-extract | ||||
|       - | ||||
|         name: Extract jbig2enc files | ||||
|         run: | | ||||
|           version=${{ fromJSON(needs.prepare-docker-build.outputs.jbig2enc-json).version }} | ||||
|           tag=${{ fromJSON(needs.prepare-docker-build.outputs.jbig2enc-json).image_tag }} | ||||
|  | ||||
|           docker pull --quiet --platform linux/amd64 ${tag} | ||||
|           docker create --platform linux/amd64 --name jbig2enc-extract ${tag} | ||||
|           mkdir --parents jbig2enc/${version}/amd64 | ||||
|           docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/amd64/ | ||||
|           mv jbig2enc/${version}/amd64/build/* jbig2enc/${version}/amd64/ | ||||
|           docker rm jbig2enc-extract | ||||
|  | ||||
|           docker pull --quiet --platform linux/arm64 ${tag} | ||||
|           docker create --platform linux/arm64 --name jbig2enc-extract ${tag} | ||||
|           mkdir --parents jbig2enc/${version}/arm64 | ||||
|           docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/arm64 | ||||
|           mv jbig2enc/${version}/arm64/build/* jbig2enc/${version}/arm64/ | ||||
|           docker rm jbig2enc-extract | ||||
|  | ||||
|           docker pull --quiet --platform linux/arm/v7 ${tag} | ||||
|           docker create --platform linux/arm/v7 --name jbig2enc-extract ${tag} | ||||
|           mkdir --parents jbig2enc/${version}/armv7 | ||||
|           docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/armv7 | ||||
|           mv jbig2enc/${version}/armv7/build/* jbig2enc/${version}/armv7/ | ||||
|           docker rm jbig2enc-extract | ||||
|       - | ||||
|         name: Show file structure | ||||
|         run: | | ||||
|           tree . | ||||
|       - | ||||
|         name: Commit files | ||||
|         run: | | ||||
|           git config --global user.name "github-actions" | ||||
|           git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com" | ||||
|           git add pikepdf/ qpdf/ psycopg2/ jbig2enc/ | ||||
|           git commit -m "Updating installer packages" || true | ||||
|           git push origin || true | ||||
|   | ||||
							
								
								
									
										53
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										53
									
								
								Dockerfile
									
									
									
									
									
								
							| @@ -1,19 +1,5 @@ | ||||
| # syntax=docker/dockerfile:1.4 | ||||
|  | ||||
| # Pull the installer images from the library | ||||
| # These are all built previously | ||||
| # They provide either a .deb or .whl | ||||
|  | ||||
| ARG JBIG2ENC_VERSION | ||||
| ARG QPDF_VERSION | ||||
| ARG PIKEPDF_VERSION | ||||
| ARG PSYCOPG2_VERSION | ||||
|  | ||||
| FROM ghcr.io/paperless-ngx/paperless-ngx/builder/jbig2enc:${JBIG2ENC_VERSION} as jbig2enc-builder | ||||
| FROM --platform=$BUILDPLATFORM ghcr.io/paperless-ngx/paperless-ngx/builder/qpdf:${QPDF_VERSION} as qpdf-builder | ||||
| FROM ghcr.io/paperless-ngx/paperless-ngx/builder/pikepdf:${PIKEPDF_VERSION} as pikepdf-builder | ||||
| FROM ghcr.io/paperless-ngx/paperless-ngx/builder/psycopg2:${PSYCOPG2_VERSION} as psycopg2-builder | ||||
|  | ||||
| FROM --platform=$BUILDPLATFORM node:16-bullseye-slim AS compile-frontend | ||||
|  | ||||
| # This stage compiles the frontend | ||||
| @@ -58,24 +44,21 @@ LABEL org.opencontainers.image.url="https://github.com/paperless-ngx/paperless-n | ||||
| LABEL org.opencontainers.image.licenses="GPL-3.0-only" | ||||
|  | ||||
| ARG DEBIAN_FRONTEND=noninteractive | ||||
| # Buildx provided | ||||
| # Buildx provided, must be defined to use though | ||||
| ARG TARGETARCH | ||||
| ARG TARGETVARIANT | ||||
|  | ||||
| # Workflow provided | ||||
| ARG JBIG2ENC_VERSION | ||||
| ARG QPDF_VERSION | ||||
| ARG PIKEPDF_VERSION | ||||
| ARG PSYCOPG2_VERSION | ||||
|  | ||||
| # | ||||
| # Begin installation and configuration | ||||
| # Order the steps below from least often changed to most | ||||
| # | ||||
|  | ||||
| # copy jbig2enc | ||||
| # Basically will never change again | ||||
| COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/.libs/libjbig2enc* /usr/local/lib/ | ||||
| COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/jbig2 /usr/local/bin/ | ||||
| COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/*.h /usr/local/include/ | ||||
|  | ||||
| # Packages need for running | ||||
| ARG RUNTIME_PACKAGES="\ | ||||
|   # Python | ||||
| @@ -198,19 +181,29 @@ RUN set -eux \ | ||||
| # Install the built packages from the installer library images | ||||
| # Use mounts to avoid copying installer files into the image | ||||
| # These change sometimes | ||||
| RUN --mount=type=bind,from=qpdf-builder,target=/qpdf \ | ||||
|     --mount=type=bind,from=psycopg2-builder,target=/psycopg2 \ | ||||
|     --mount=type=bind,from=pikepdf-builder,target=/pikepdf \ | ||||
|   set -eux \ | ||||
| RUN set -eux \ | ||||
|   && echo "Getting binaries" \ | ||||
|     && mkdir paperless-ngx \ | ||||
|     && curl --fail --silent --show-error --output paperless-ngx.tar.gz --location https://github.com/paperless-ngx/paperless-ngx/archive/41d6e7e407af09a0882736d50c89b6e015997bff.tar.gz \ | ||||
|     && tar -xf paperless-ngx.tar.gz --directory paperless-ngx --strip-components=1 \ | ||||
|     && cd paperless-ngx \ | ||||
|     # Setting a specific revision ensures we know what this installed | ||||
|     # and ensures cache breaking on changes | ||||
|   && echo "Installing jbig2enc" \ | ||||
|     && cp ./jbig2enc/${JBIG2ENC_VERSION}/${TARGETARCH}${TARGETVARIANT}/jbig2 /usr/local/bin/ \ | ||||
|     && cp ./jbig2enc/${JBIG2ENC_VERSION}/${TARGETARCH}${TARGETVARIANT}/libjbig2enc* /usr/local/lib/ \ | ||||
|   && echo "Installing qpdf" \ | ||||
|     && apt-get install --yes --no-install-recommends /qpdf/usr/src/qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/libqpdf29_*.deb \ | ||||
|     && apt-get install --yes --no-install-recommends /qpdf/usr/src/qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/qpdf_*.deb \ | ||||
|     && apt-get install --yes --no-install-recommends ./qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/libqpdf29_*.deb \ | ||||
|     && apt-get install --yes --no-install-recommends ./qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/qpdf_*.deb \ | ||||
|   && echo "Installing pikepdf and dependencies" \ | ||||
|     && python3 -m pip install --no-cache-dir /pikepdf/usr/src/wheels/*.whl \ | ||||
|     && python3 -m pip install --no-cache-dir ./pikepdf/${PIKEPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/*.whl \ | ||||
|     && python3 -m pip list \ | ||||
|   && echo "Installing psycopg2" \ | ||||
|     && python3 -m pip install --no-cache-dir /psycopg2/usr/src/wheels/psycopg2*.whl \ | ||||
|     && python3 -m pip list | ||||
|     && python3 -m pip install --no-cache-dir ./psycopg2/${PSYCOPG2_VERSION}/${TARGETARCH}${TARGETVARIANT}/psycopg2*.whl \ | ||||
|     && python3 -m pip list \ | ||||
|   && echo "Cleaning up image layer" \ | ||||
|     && cd ../ \ | ||||
|     && rm -rf paperless-ngx | ||||
|  | ||||
| WORKDIR /usr/src/paperless/src/ | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,20 @@ RUN set -eux \ | ||||
|     && ./autogen.sh \ | ||||
|     && ./configure \ | ||||
|     && make \ | ||||
|   && echo "Gathering package data" \ | ||||
|     && dpkg-query -f '${Package;-40}${Version}\n' -W > ./pkg-list.txt \ | ||||
|   && echo "Cleaning up image" \ | ||||
|     && apt-get -y purge ${BUILD_PACKAGES} \ | ||||
|     && apt-get -y autoremove --purge \ | ||||
|     && rm -rf /var/lib/apt/lists/* | ||||
|     && rm -rf /var/lib/apt/lists/* \ | ||||
|   && echo "Moving files around" \ | ||||
|     && mkdir build \ | ||||
|     # Unlink a symlink that causes problems | ||||
|     && unlink ./src/.libs/libjbig2enc.la \ | ||||
|     # Move what the link pointed to | ||||
|     && mv ./src/libjbig2enc.la ./build/ \ | ||||
|     # Move the shared library .so files | ||||
|     && mv ./src/.libs/libjbig2enc* ./build/ \ | ||||
|     # And move the cli binary | ||||
|     && mv ./src/jbig2 ./build/ \ | ||||
|     && mv ./pkg-list.txt ./build/ | ||||
|   | ||||
| @@ -7,12 +7,17 @@ | ||||
| # Default to pulling from the main repo registry when manually building | ||||
| ARG REPO="paperless-ngx/paperless-ngx" | ||||
|  | ||||
| # This does nothing, except provide a name for a copy below | ||||
| ARG QPDF_VERSION | ||||
| FROM --platform=$BUILDPLATFORM ghcr.io/${REPO}/builder/qpdf:${QPDF_VERSION} as qpdf-builder | ||||
|  | ||||
| # This does nothing, except provide a name for a copy below | ||||
|  | ||||
| FROM python:3.9-slim-bullseye as main | ||||
| # | ||||
| # Stage: builder | ||||
| # Purpose: | ||||
| #  - Build the pikepdf wheel | ||||
| #  - Build any dependent wheels which can't be found | ||||
| # | ||||
| FROM python:3.9-slim-bullseye as builder | ||||
|  | ||||
| LABEL org.opencontainers.image.description="A intermediate image with pikepdf wheel built" | ||||
|  | ||||
| @@ -100,3 +105,14 @@ RUN set -eux \ | ||||
|     && apt-get -y purge ${BUILD_PACKAGES} \ | ||||
|     && apt-get -y autoremove --purge \ | ||||
|     && rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
| # | ||||
| # Stage: package | ||||
| # Purpose: Holds the compiled .whl files in a tiny image to pull | ||||
| # | ||||
| FROM alpine:3.17 as package | ||||
|  | ||||
| WORKDIR /usr/src/wheels/ | ||||
|  | ||||
| COPY --from=builder /usr/src/wheels/*.whl ./ | ||||
| COPY --from=builder /usr/src/wheels/pkg-list.txt ./ | ||||
|   | ||||
| @@ -2,7 +2,12 @@ | ||||
| # Inputs: | ||||
| #    - PSYCOPG2_VERSION - Version to build | ||||
|  | ||||
| FROM python:3.9-slim-bullseye as main | ||||
| # | ||||
| # Stage: builder | ||||
| # Purpose: | ||||
| #  - Build the psycopg2 wheel | ||||
| # | ||||
| FROM python:3.9-slim-bullseye as builder | ||||
|  | ||||
| LABEL org.opencontainers.image.description="A intermediate image with psycopg2 wheel built" | ||||
|  | ||||
| @@ -48,3 +53,14 @@ RUN set -eux \ | ||||
|     && apt-get -y purge ${BUILD_PACKAGES} \ | ||||
|     && apt-get -y autoremove --purge \ | ||||
|     && rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
| # | ||||
| # Stage: package | ||||
| # Purpose: Holds the compiled .whl files in a tiny image to pull | ||||
| # | ||||
| FROM alpine:3.17 as package | ||||
|  | ||||
| WORKDIR /usr/src/wheels/ | ||||
|  | ||||
| COPY --from=builder /usr/src/wheels/*.whl ./ | ||||
| COPY --from=builder /usr/src/wheels/pkg-list.txt ./ | ||||
|   | ||||
							
								
								
									
										57
									
								
								docker-builders/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								docker-builders/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | ||||
| # Installer Library | ||||
|  | ||||
| This folder contains the Dockerfiles for building certain installers or libraries, which are then pulled into the main image. | ||||
|  | ||||
| ## [jbig2enc](https://github.com/agl/jbig2enc) | ||||
|  | ||||
| ### Why | ||||
|  | ||||
| JBIG is an image coding which can achieve better compression of images for PDFs. | ||||
|  | ||||
| ### What | ||||
|  | ||||
| The Docker image builds a shared library file and utility, which is copied into the correct location in the final image. | ||||
|  | ||||
| ### Updating | ||||
|  | ||||
| 1. Ensure the given qpdf version is present in [Debian bookworm](https://packages.debian.org/bookworm/qpdf) | ||||
| 2. Update `.build-config.json` to the given version | ||||
| 3. If the Debian specific version has incremented, update `Dockerfile.qpdf` | ||||
|  | ||||
| See Also: | ||||
|  | ||||
| - [OCRMyPDF Documentation](https://ocrmypdf.readthedocs.io/en/latest/jbig2.html) | ||||
|  | ||||
| ## [psycopg2](https://www.psycopg.org/) | ||||
|  | ||||
| ### Why | ||||
|  | ||||
| The pre-built wheels of psycopg2 are built on Debian 9, which provides a quite old version of libpq-dev. This causes issue with authentication methods. | ||||
|  | ||||
| ### What | ||||
|  | ||||
| The image builds psycopg2 wheels on Debian 10 and places the produced wheels into `/usr/src/wheels/`. | ||||
|  | ||||
| See Also: | ||||
|  | ||||
| - [Issue 266](https://github.com/paperless-ngx/paperless-ngx/issues/266) | ||||
|  | ||||
| ## [qpdf](https://qpdf.readthedocs.io/en/stable/index.html) | ||||
|  | ||||
| ### Why | ||||
|  | ||||
| qpdf and it's library provide tools to read, manipulate and fix up PDFs. Version 11 is also required by `pikepdf` 6+ and Debian 9 does not provide above version 10. | ||||
|  | ||||
| ### What | ||||
|  | ||||
| The Docker image cross compiles .deb installers for each supported architecture of the main image. The installers are placed in `/usr/src/qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/` | ||||
|  | ||||
| ## [pikepdf](https://pikepdf.readthedocs.io/en/latest/) | ||||
|  | ||||
| ### Why | ||||
|  | ||||
| Required by OCRMyPdf, this is a general purpose library for PDF manipulation in Python via the qpdf libraries. | ||||
|  | ||||
| ### What | ||||
|  | ||||
| The built wheels are placed into `/usr/src/wheels/` | ||||
| @@ -80,7 +80,7 @@ django_checks() { | ||||
|  | ||||
| search_index() { | ||||
|  | ||||
| 	local -r index_version=1 | ||||
| 	local -r index_version=2 | ||||
| 	local -r index_version_file=${DATA_DIR}/.index_version | ||||
|  | ||||
| 	if [[ (! -f "${index_version_file}") || $(<"${index_version_file}") != "$index_version" ]]; then | ||||
|   | ||||
| @@ -121,7 +121,17 @@ Executed after the consumer sees a new document in the consumption | ||||
| folder, but before any processing of the document is performed. This | ||||
| script can access the following relevant environment variables set: | ||||
|  | ||||
| - `DOCUMENT_SOURCE_PATH` | ||||
| | Environment Variable    | Description                                                  | | ||||
| | ----------------------- | ------------------------------------------------------------ | | ||||
| | `DOCUMENT_SOURCE_PATH`  | Original path of the consumed document                       | | ||||
| | `DOCUMENT_WORKING_PATH` | Path to a copy of the original that consumption will work on | | ||||
|  | ||||
| !!! note | ||||
|  | ||||
|     Pre-consume scripts which modify the document should only change | ||||
|     the `DOCUMENT_WORKING_PATH` file or a second consume task may | ||||
|     be triggered, leading to failures as two tasks work on the | ||||
|     same document path | ||||
|  | ||||
| A simple but common example for this would be creating a simple script | ||||
| like this: | ||||
| @@ -130,7 +140,7 @@ like this: | ||||
|  | ||||
| ```bash | ||||
| #!/usr/bin/env bash | ||||
| pdf2pdfocr.py -i ${DOCUMENT_SOURCE_PATH} | ||||
| pdf2pdfocr.py -i ${DOCUMENT_WORKING_PATH} | ||||
| ``` | ||||
|  | ||||
| `/etc/paperless.conf` | ||||
| @@ -157,27 +167,37 @@ Executed after the consumer has successfully processed a document and | ||||
| has moved it into paperless. It receives the following environment | ||||
| variables: | ||||
|  | ||||
| - `DOCUMENT_ID` | ||||
| - `DOCUMENT_FILE_NAME` | ||||
| - `DOCUMENT_CREATED` | ||||
| - `DOCUMENT_MODIFIED` | ||||
| - `DOCUMENT_ADDED` | ||||
| - `DOCUMENT_SOURCE_PATH` | ||||
| - `DOCUMENT_ARCHIVE_PATH` | ||||
| - `DOCUMENT_THUMBNAIL_PATH` | ||||
| - `DOCUMENT_DOWNLOAD_URL` | ||||
| - `DOCUMENT_THUMBNAIL_URL` | ||||
| - `DOCUMENT_CORRESPONDENT` | ||||
| - `DOCUMENT_TAGS` | ||||
| - `DOCUMENT_ORIGINAL_FILENAME` | ||||
| | Environment Variable         | Description                                   | | ||||
| | ---------------------------- | --------------------------------------------- | | ||||
| | `DOCUMENT_ID`                | Database primary key of the document          | | ||||
| | `DOCUMENT_FILE_NAME`         | Formatted filename, not including paths       | | ||||
| | `DOCUMENT_CREATED`           | Date & time when document created             | | ||||
| | `DOCUMENT_MODIFIED`          | Date & time when document was last modified   | | ||||
| | `DOCUMENT_ADDED`             | Date & time when document was added           | | ||||
| | `DOCUMENT_SOURCE_PATH`       | Path to the original document file            | | ||||
| | `DOCUMENT_ARCHIVE_PATH`      | Path to the generate archive file (if any)    | | ||||
| | `DOCUMENT_THUMBNAIL_PATH`    | Path to the generated thumbnail               | | ||||
| | `DOCUMENT_DOWNLOAD_URL`      | URL for document download                     | | ||||
| | `DOCUMENT_THUMBNAIL_URL`     | URL for the document thumbnail                | | ||||
| | `DOCUMENT_CORRESPONDENT`     | Assigned correspondent (if any)               | | ||||
| | `DOCUMENT_TAGS`              | Comma separated list of tags applied (if any) | | ||||
| | `DOCUMENT_ORIGINAL_FILENAME` | Filename of original document                 | | ||||
|  | ||||
| The script can be in any language, but for a simple shell script | ||||
| example, you can take a look at | ||||
| [post-consumption-example.sh](https://github.com/paperless-ngx/paperless-ngx/blob/main/scripts/post-consumption-example.sh) | ||||
| in this project. | ||||
| The script can be in any language, A simple shell script example: | ||||
|  | ||||
| ```bash title="post-consumption-example" | ||||
| --8<-- "./scripts/post-consumption-example.sh" | ||||
| ``` | ||||
|  | ||||
| !!! note | ||||
|  | ||||
|     The post consumption script cannot cancel the consumption process. | ||||
|  | ||||
| !!! warning | ||||
|  | ||||
|     The post consumption script should not modify the document files | ||||
|     directly | ||||
|  | ||||
| The script's stdout and stderr will be logged line by line to the | ||||
| webserver log, along with the exit code of the script. | ||||
|  | ||||
|   | ||||
| @@ -2,6 +2,9 @@ | ||||
|  | ||||
| ## paperless-ngx 1.12.1 | ||||
|  | ||||
| _Note: Version 1.12.x introduced searching of comments which will work for comments added after the upgrade but a reindex of the search index is required in order to be able to search | ||||
| older comments. The Docker image will automatically perform this reindex, bare metal installations will have to perform this manually, see [the docs](https://docs.paperless-ngx.com/administration/#index)._ | ||||
|  | ||||
| ### Bug Fixes | ||||
|  | ||||
| - Fix: comments not showing in search until after manual reindex in v1.12 [@shamoon](https://github.com/shamoon) ([#2513](https://github.com/paperless-ngx/paperless-ngx/pull/2513)) | ||||
|   | ||||
| @@ -41,6 +41,7 @@ markdown_extensions: | ||||
|       anchor_linenums: true | ||||
|   - pymdownx.superfences | ||||
|   - pymdownx.inlinehilite | ||||
|   - pymdownx.snippets | ||||
| strict: true | ||||
| nav: | ||||
|     - index.md | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -204,6 +204,10 @@ export class DocumentDetailComponent | ||||
|             ) | ||||
|             .subscribe({ | ||||
|               next: (titleValue) => { | ||||
|                 // In the rare case when the field changed just after debounced event was fired. | ||||
|                 // We dont want to overwrite whats actually in the text field, so just return | ||||
|                 if (titleValue !== this.titleInput.value) return | ||||
|  | ||||
|                 this.title = titleValue | ||||
|                 this.documentForm.patchValue({ title: titleValue }) | ||||
|               }, | ||||
|   | ||||
| @@ -26,11 +26,11 @@ | ||||
|         </div> | ||||
|         <p class="card-text"> | ||||
|           <span *ngIf="document.__search_hit__ && document.__search_hit__.highlights" [innerHtml]="document.__search_hit__.highlights"></span> | ||||
|           <span *ngIf="document.__search_hit__ && document.__search_hit__.comment_highlights" class="d-block"> | ||||
|           <span *ngFor="let highlight of searchCommentHighlights" class="d-block"> | ||||
|             <svg width="1em" height="1em" fill="currentColor" class="me-2"> | ||||
|               <use xlink:href="assets/bootstrap-icons.svg#chat-left-text"/> | ||||
|             </svg> | ||||
|             <span [innerHtml]="document.__search_hit__.comment_highlights"></span> | ||||
|             <span [innerHtml]="highlight"></span> | ||||
|           </span> | ||||
|           <span *ngIf="!document.__search_hit__" class="result-content">{{contentTrimmed}}</span> | ||||
|         </p> | ||||
|   | ||||
| @@ -70,6 +70,22 @@ export class DocumentCardLargeComponent { | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   get searchCommentHighlights() { | ||||
|     let highlights = [] | ||||
|     if ( | ||||
|       this.document['__search_hit__'] && | ||||
|       this.document['__search_hit__'].comment_highlights | ||||
|     ) { | ||||
|       // only show comments with a match | ||||
|       highlights = ( | ||||
|         this.document['__search_hit__'].comment_highlights as string | ||||
|       ) | ||||
|         .split(',') | ||||
|         .filter((higlight) => higlight.includes('<span')) | ||||
|     } | ||||
|     return highlights | ||||
|   } | ||||
|  | ||||
|   getIsThumbInverted() { | ||||
|     return this.settingsService.get(SETTINGS_KEYS.DARK_MODE_THUMB_INVERTED) | ||||
|   } | ||||
|   | ||||
| @@ -143,7 +143,7 @@ | ||||
|             <p i18n> | ||||
|               <em>No tracking data is collected by the app in any way.</em> | ||||
|             </p> | ||||
|             <app-input-check i18n-title title="Enable update checking" formControlName="updateCheckingEnabled" i18n-hint hint="Note that for users of thirdy-party containers e.g. linuxserver.io this notification may be 'ahead' of the current third-party release."></app-input-check> | ||||
|             <app-input-check i18n-title title="Enable update checking" formControlName="updateCheckingEnabled" i18n-hint hint="Note that for users of third-party containers e.g. linuxserver.io this notification may be 'ahead' of the current third-party release."></app-input-check> | ||||
|           </div> | ||||
|         </div> | ||||
|  | ||||
|   | ||||
| @@ -5,7 +5,7 @@ export const environment = { | ||||
|   apiBaseUrl: document.baseURI + 'api/', | ||||
|   apiVersion: '2', | ||||
|   appTitle: 'Paperless-ngx', | ||||
|   version: '1.12.1', | ||||
|   version: '1.12.1-dev', | ||||
|   webSocketHost: window.location.host, | ||||
|   webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:', | ||||
|   webSocketBaseUrl: base_url.pathname + 'ws/', | ||||
|   | ||||
| @@ -4,7 +4,6 @@ import shutil | ||||
| import tempfile | ||||
| from dataclasses import dataclass | ||||
| from functools import lru_cache | ||||
| from math import ceil | ||||
| from pathlib import Path | ||||
| from typing import List | ||||
| from typing import Optional | ||||
| @@ -12,10 +11,9 @@ from typing import Optional | ||||
| import magic | ||||
| from django.conf import settings | ||||
| from pdf2image import convert_from_path | ||||
| from pdf2image.exceptions import PDFPageCountError | ||||
| from pikepdf import Page | ||||
| from pikepdf import PasswordError | ||||
| from pikepdf import Pdf | ||||
| from pikepdf import PdfImage | ||||
| from PIL import Image | ||||
| from PIL import ImageSequence | ||||
| from pyzbar import pyzbar | ||||
| @@ -154,52 +152,15 @@ def scan_file_for_barcodes( | ||||
|     (page_number, barcode_text) tuples | ||||
|     """ | ||||
|  | ||||
|     def _pikepdf_barcode_scan(pdf_filepath: str) -> List[Barcode]: | ||||
|         detected_barcodes = [] | ||||
|         with Pdf.open(pdf_filepath) as pdf: | ||||
|             for page_num, page in enumerate(pdf.pages): | ||||
|                 for image_key in page.images: | ||||
|                     pdfimage = PdfImage(page.images[image_key]) | ||||
|  | ||||
|                     # This type is known to have issues: | ||||
|                     # https://github.com/pikepdf/pikepdf/issues/401 | ||||
|                     if "/CCITTFaxDecode" in pdfimage.filters: | ||||
|                         raise BarcodeImageFormatError( | ||||
|                             "Unable to decode CCITTFaxDecode images", | ||||
|                         ) | ||||
|  | ||||
|                     # Not all images can be transcoded to a PIL image, which | ||||
|                     # is what pyzbar expects to receive, so this may | ||||
|                     # raise an exception, triggering fallback | ||||
|                     pillow_img = pdfimage.as_pil_image() | ||||
|  | ||||
|                     # Scale the image down | ||||
|                     # See: https://github.com/paperless-ngx/paperless-ngx/issues/2385 | ||||
|                     # TLDR: zbar has issues with larger images | ||||
|                     width, height = pillow_img.size | ||||
|                     if width > 1024: | ||||
|                         scaler = ceil(width / 1024) | ||||
|                         new_width = int(width / scaler) | ||||
|                         new_height = int(height / scaler) | ||||
|                         pillow_img = pillow_img.resize((new_width, new_height)) | ||||
|  | ||||
|                     width, height = pillow_img.size | ||||
|                     if height > 2048: | ||||
|                         scaler = ceil(height / 2048) | ||||
|                         new_width = int(width / scaler) | ||||
|                         new_height = int(height / scaler) | ||||
|                         pillow_img = pillow_img.resize((new_width, new_height)) | ||||
|  | ||||
|                     for barcode_value in barcode_reader(pillow_img): | ||||
|                         detected_barcodes.append(Barcode(page_num, barcode_value)) | ||||
|  | ||||
|         return detected_barcodes | ||||
|  | ||||
|     def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]: | ||||
|         detected_barcodes = [] | ||||
|         # use a temporary directory in case the file is too big to handle in memory | ||||
|         with tempfile.TemporaryDirectory() as path: | ||||
|             pages_from_path = convert_from_path(pdf_filepath, output_folder=path) | ||||
|             pages_from_path = convert_from_path( | ||||
|                 pdf_filepath, | ||||
|                 dpi=300, | ||||
|                 output_folder=path, | ||||
|             ) | ||||
|             for current_page_number, page in enumerate(pages_from_path): | ||||
|                 for barcode_value in barcode_reader(page): | ||||
|                     detected_barcodes.append( | ||||
| @@ -219,27 +180,19 @@ def scan_file_for_barcodes( | ||||
|         # Always try pikepdf first, it's usually fine, faster and | ||||
|         # uses less memory | ||||
|         try: | ||||
|             barcodes = _pikepdf_barcode_scan(pdf_filepath) | ||||
|             barcodes = _pdf2image_barcode_scan(pdf_filepath) | ||||
|         # Password protected files can't be checked | ||||
|         except PasswordError as e: | ||||
|         # This is the exception raised for those | ||||
|         except PDFPageCountError as e: | ||||
|             logger.warning( | ||||
|                 f"File is likely password protected, not checking for barcodes: {e}", | ||||
|             ) | ||||
|         # Handle pikepdf related image decoding issues with a fallback to page | ||||
|         # by page conversion to images in a temporary directory | ||||
|         except Exception as e: | ||||
|             logger.warning( | ||||
|                 f"Falling back to pdf2image because: {e}", | ||||
|             ) | ||||
|             try: | ||||
|                 barcodes = _pdf2image_barcode_scan(pdf_filepath) | ||||
|         # This file is really borked, allow the consumption to continue | ||||
|         # but it may fail further on | ||||
|         except Exception as e:  # pragma: no cover | ||||
|             logger.warning( | ||||
|                 f"Exception during barcode scanning: {e}", | ||||
|             ) | ||||
|  | ||||
|     else: | ||||
|         logger.warning( | ||||
|             f"Unsupported file format for barcode reader: {str(mime_type)}", | ||||
|   | ||||
| @@ -1,7 +1,10 @@ | ||||
| import datetime | ||||
| import hashlib | ||||
| import os | ||||
| import shutil | ||||
| import tempfile | ||||
| import uuid | ||||
| from pathlib import Path | ||||
| from subprocess import CompletedProcess | ||||
| from subprocess import run | ||||
| from typing import Optional | ||||
| @@ -94,7 +97,8 @@ class Consumer(LoggingMixin): | ||||
|  | ||||
|     def __init__(self): | ||||
|         super().__init__() | ||||
|         self.path = None | ||||
|         self.path: Optional[Path] = None | ||||
|         self.original_path: Optional[Path] = None | ||||
|         self.filename = None | ||||
|         self.override_title = None | ||||
|         self.override_correspondent_id = None | ||||
| @@ -167,16 +171,18 @@ class Consumer(LoggingMixin): | ||||
|  | ||||
|         self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}") | ||||
|  | ||||
|         filepath_arg = os.path.normpath(self.path) | ||||
|         working_file_path = str(self.path) | ||||
|         original_file_path = str(self.original_path) | ||||
|  | ||||
|         script_env = os.environ.copy() | ||||
|         script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg | ||||
|         script_env["DOCUMENT_SOURCE_PATH"] = original_file_path | ||||
|         script_env["DOCUMENT_WORKING_PATH"] = working_file_path | ||||
|  | ||||
|         try: | ||||
|             completed_proc = run( | ||||
|                 args=[ | ||||
|                     settings.PRE_CONSUME_SCRIPT, | ||||
|                     filepath_arg, | ||||
|                     original_file_path, | ||||
|                 ], | ||||
|                 env=script_env, | ||||
|                 capture_output=True, | ||||
| @@ -195,7 +201,7 @@ class Consumer(LoggingMixin): | ||||
|                 exception=e, | ||||
|             ) | ||||
|  | ||||
|     def run_post_consume_script(self, document): | ||||
|     def run_post_consume_script(self, document: Document): | ||||
|         if not settings.POST_CONSUME_SCRIPT: | ||||
|             return | ||||
|  | ||||
| @@ -285,8 +291,8 @@ class Consumer(LoggingMixin): | ||||
|         Return the document object if it was successfully created. | ||||
|         """ | ||||
|  | ||||
|         self.path = path | ||||
|         self.filename = override_filename or os.path.basename(path) | ||||
|         self.path = Path(path).resolve() | ||||
|         self.filename = override_filename or self.path.name | ||||
|         self.override_title = override_title | ||||
|         self.override_correspondent_id = override_correspondent_id | ||||
|         self.override_document_type_id = override_document_type_id | ||||
| @@ -311,6 +317,15 @@ class Consumer(LoggingMixin): | ||||
|  | ||||
|         self.log("info", f"Consuming {self.filename}") | ||||
|  | ||||
|         # For the actual work, copy the file into a tempdir | ||||
|         self.original_path = self.path | ||||
|         tempdir = tempfile.TemporaryDirectory( | ||||
|             prefix="paperless-ngx", | ||||
|             dir=settings.SCRATCH_DIR, | ||||
|         ) | ||||
|         self.path = Path(tempdir.name) / Path(self.filename) | ||||
|         shutil.copy(self.original_path, self.path) | ||||
|  | ||||
|         # Determine the parser class. | ||||
|  | ||||
|         mime_type = magic.from_file(self.path, mime=True) | ||||
| @@ -453,11 +468,12 @@ class Consumer(LoggingMixin): | ||||
|                 # Delete the file only if it was successfully consumed | ||||
|                 self.log("debug", f"Deleting file {self.path}") | ||||
|                 os.unlink(self.path) | ||||
|                 self.original_path.unlink() | ||||
|  | ||||
|                 # https://github.com/jonaswinkler/paperless-ng/discussions/1037 | ||||
|                 shadow_file = os.path.join( | ||||
|                     os.path.dirname(self.path), | ||||
|                     "._" + os.path.basename(self.path), | ||||
|                     os.path.dirname(self.original_path), | ||||
|                     "._" + os.path.basename(self.original_path), | ||||
|                 ) | ||||
|  | ||||
|                 if os.path.isfile(shadow_file): | ||||
| @@ -474,6 +490,7 @@ class Consumer(LoggingMixin): | ||||
|             ) | ||||
|         finally: | ||||
|             document_parser.cleanup() | ||||
|             tempdir.cleanup() | ||||
|  | ||||
|         self.run_post_consume_script(document) | ||||
|  | ||||
|   | ||||
| Before Width: | Height: | Size: 33 KiB After Width: | Height: | Size: 33 KiB | 
| Before Width: | Height: | Size: 39 KiB After Width: | Height: | Size: 39 KiB | 
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -833,7 +833,8 @@ class PreConsumeTestCase(TestCase): | ||||
|         with tempfile.NamedTemporaryFile() as script: | ||||
|             with override_settings(PRE_CONSUME_SCRIPT=script.name): | ||||
|                 c = Consumer() | ||||
|                 c.path = "path-to-file" | ||||
|                 c.original_path = "path-to-file" | ||||
|                 c.path = "/tmp/somewhere/path-to-file" | ||||
|                 c.run_pre_consume_script() | ||||
|  | ||||
|                 m.assert_called_once() | ||||
| @@ -841,10 +842,19 @@ class PreConsumeTestCase(TestCase): | ||||
|                 args, kwargs = m.call_args | ||||
|  | ||||
|                 command = kwargs["args"] | ||||
|                 environment = kwargs["env"] | ||||
|  | ||||
|                 self.assertEqual(command[0], script.name) | ||||
|                 self.assertEqual(command[1], "path-to-file") | ||||
|  | ||||
|                 self.assertDictContainsSubset( | ||||
|                     { | ||||
|                         "DOCUMENT_SOURCE_PATH": c.original_path, | ||||
|                         "DOCUMENT_WORKING_PATH": c.path, | ||||
|                     }, | ||||
|                     environment, | ||||
|                 ) | ||||
|  | ||||
|     @mock.patch("documents.consumer.Consumer.log") | ||||
|     def test_script_with_output(self, mocked_log): | ||||
|         """ | ||||
| @@ -961,9 +971,10 @@ class PostConsumeTestCase(TestCase): | ||||
|  | ||||
|                 m.assert_called_once() | ||||
|  | ||||
|                 args, kwargs = m.call_args | ||||
|                 _, kwargs = m.call_args | ||||
|  | ||||
|                 command = kwargs["args"] | ||||
|                 environment = kwargs["env"] | ||||
|  | ||||
|                 self.assertEqual(command[0], script.name) | ||||
|                 self.assertEqual(command[1], str(doc.pk)) | ||||
| @@ -972,6 +983,17 @@ class PostConsumeTestCase(TestCase): | ||||
|                 self.assertEqual(command[7], "my_bank") | ||||
|                 self.assertCountEqual(command[8].split(","), ["a", "b"]) | ||||
|  | ||||
|                 self.assertDictContainsSubset( | ||||
|                     { | ||||
|                         "DOCUMENT_ID": str(doc.pk), | ||||
|                         "DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/", | ||||
|                         "DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/", | ||||
|                         "DOCUMENT_CORRESPONDENT": "my_bank", | ||||
|                         "DOCUMENT_TAGS": "a,b", | ||||
|                     }, | ||||
|                     environment, | ||||
|                 ) | ||||
|  | ||||
|     def test_script_exit_non_zero(self): | ||||
|         """ | ||||
|         GIVEN: | ||||
|   | ||||
| @@ -3,6 +3,7 @@ import shutil | ||||
| import tempfile | ||||
| from collections import namedtuple | ||||
| from contextlib import contextmanager | ||||
| from unittest import mock | ||||
|  | ||||
| from django.apps import apps | ||||
| from django.db import connection | ||||
| @@ -86,6 +87,30 @@ class DirectoriesMixin: | ||||
|         remove_dirs(self.dirs) | ||||
|  | ||||
|  | ||||
| class ConsumerProgressMixin: | ||||
|     def setUp(self) -> None: | ||||
|         self.send_progress_patcher = mock.patch( | ||||
|             "documents.consumer.Consumer._send_progress", | ||||
|         ) | ||||
|         self.send_progress_mock = self.send_progress_patcher.start() | ||||
|         super().setUp() | ||||
|  | ||||
|     def tearDown(self) -> None: | ||||
|         super().tearDown() | ||||
|         self.send_progress_patcher.stop() | ||||
|  | ||||
|  | ||||
| class DocumentConsumeDelayMixin: | ||||
|     def setUp(self) -> None: | ||||
|         self.consume_file_patcher = mock.patch("documents.tasks.consume_file.delay") | ||||
|         self.consume_file_mock = self.consume_file_patcher.start() | ||||
|         super().setUp() | ||||
|  | ||||
|     def tearDown(self) -> None: | ||||
|         super().tearDown() | ||||
|         self.consume_file_patcher.stop() | ||||
|  | ||||
|  | ||||
| class TestMigrations(TransactionTestCase): | ||||
|     @property | ||||
|     def app(self): | ||||
|   | ||||
| @@ -477,21 +477,14 @@ class DocumentViewSet( | ||||
| class SearchResultSerializer(DocumentSerializer): | ||||
|     def to_representation(self, instance): | ||||
|         doc = Document.objects.get(id=instance["id"]) | ||||
|         comments = "" | ||||
|         if hasattr(instance.results.q, "subqueries"): | ||||
|             commentTerm = instance.results.q.subqueries[0] | ||||
|         comments = ",".join( | ||||
|                 [ | ||||
|                     str(c.comment) | ||||
|                     for c in Comment.objects.filter(document=instance["id"]) | ||||
|                     if commentTerm.text in c.comment | ||||
|                 ], | ||||
|             [str(c.comment) for c in Comment.objects.filter(document=instance["id"])], | ||||
|         ) | ||||
|         r = super().to_representation(doc) | ||||
|         r["__search_hit__"] = { | ||||
|             "score": instance.score, | ||||
|             "highlights": instance.highlights("content", text=doc.content), | ||||
|             "comment_highlights": instance.highlights("content", text=comments) | ||||
|             "comment_highlights": instance.highlights("comments", text=comments) | ||||
|             if doc | ||||
|             else None, | ||||
|             "rank": instance.rank, | ||||
|   | ||||
| @@ -271,6 +271,16 @@ class MailDocumentParser(DocumentParser): | ||||
|                 "paperHeight": "11.7", | ||||
|                 "scale": "1.0", | ||||
|             } | ||||
|  | ||||
|             # Set the output format of the resulting PDF | ||||
|             # Valid inputs: https://gotenberg.dev/docs/modules/pdf-engines#uno | ||||
|             if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}: | ||||
|                 data["pdfFormat"] = "PDF/A-2b" | ||||
|             elif settings.OCR_OUTPUT_TYPE == "pdfa-1": | ||||
|                 data["pdfFormat"] = "PDF/A-1a" | ||||
|             elif settings.OCR_OUTPUT_TYPE == "pdfa-3": | ||||
|                 data["pdfFormat"] = "PDF/A-3b" | ||||
|  | ||||
|             try: | ||||
|                 response = requests.post( | ||||
|                     url, | ||||
|   | ||||
| @@ -573,8 +573,8 @@ class TestParser(TestCase): | ||||
|             self.parser.gotenberg_server + "/forms/chromium/convert/html", | ||||
|             mock_post.call_args.args[0], | ||||
|         ) | ||||
|         self.assertEqual({}, mock_post.call_args.kwargs["headers"]) | ||||
|         self.assertEqual( | ||||
|         self.assertDictEqual({}, mock_post.call_args.kwargs["headers"]) | ||||
|         self.assertDictEqual( | ||||
|             { | ||||
|                 "marginTop": "0.1", | ||||
|                 "marginBottom": "0.1", | ||||
| @@ -583,6 +583,7 @@ class TestParser(TestCase): | ||||
|                 "paperWidth": "8.27", | ||||
|                 "paperHeight": "11.7", | ||||
|                 "scale": "1.0", | ||||
|                 "pdfFormat": "PDF/A-2b", | ||||
|             }, | ||||
|             mock_post.call_args.kwargs["data"], | ||||
|         ) | ||||
| @@ -663,8 +664,8 @@ class TestParser(TestCase): | ||||
|             self.parser.gotenberg_server + "/forms/chromium/convert/html", | ||||
|             mock_post.call_args.args[0], | ||||
|         ) | ||||
|         self.assertEqual({}, mock_post.call_args.kwargs["headers"]) | ||||
|         self.assertEqual( | ||||
|         self.assertDictEqual({}, mock_post.call_args.kwargs["headers"]) | ||||
|         self.assertDictEqual( | ||||
|             { | ||||
|                 "marginTop": "0.1", | ||||
|                 "marginBottom": "0.1", | ||||
|   | ||||
| @@ -95,9 +95,19 @@ class TikaDocumentParser(DocumentParser): | ||||
|                 ), | ||||
|             } | ||||
|             headers = {} | ||||
|             data = {} | ||||
|  | ||||
|             # Set the output format of the resulting PDF | ||||
|             # Valid inputs: https://gotenberg.dev/docs/modules/pdf-engines#uno | ||||
|             if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}: | ||||
|                 data["pdfFormat"] = "PDF/A-2b" | ||||
|             elif settings.OCR_OUTPUT_TYPE == "pdfa-1": | ||||
|                 data["pdfFormat"] = "PDF/A-1a" | ||||
|             elif settings.OCR_OUTPUT_TYPE == "pdfa-3": | ||||
|                 data["pdfFormat"] = "PDF/A-3b" | ||||
|  | ||||
|             try: | ||||
|                 response = requests.post(url, files=files, headers=headers) | ||||
|                 response = requests.post(url, files=files, headers=headers, data=data) | ||||
|                 response.raise_for_status()  # ensure we notice bad responses | ||||
|             except Exception as err: | ||||
|                 raise ParseError( | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Trenton Holmes
					Trenton Holmes