mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Merge remote-tracking branch 'origin/dev'
This commit is contained in:
		
							
								
								
									
										71
									
								
								.github/scripts/cleanup-tags.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										71
									
								
								.github/scripts/cleanup-tags.py
									
									
									
									
										vendored
									
									
								
							| @@ -15,6 +15,8 @@ from github import ContainerPackage | |||||||
| from github import GithubBranchApi | from github import GithubBranchApi | ||||||
| from github import GithubContainerRegistryApi | from github import GithubContainerRegistryApi | ||||||
|  |  | ||||||
|  | import docker | ||||||
|  |  | ||||||
| logger = logging.getLogger("cleanup-tags") | logger = logging.getLogger("cleanup-tags") | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -151,12 +153,16 @@ class RegistryTagsCleaner: | |||||||
|             for tag in sorted(self.tags_to_keep): |             for tag in sorted(self.tags_to_keep): | ||||||
|                 full_name = f"ghcr.io/{self.repo_owner}/{self.package_name}:{tag}" |                 full_name = f"ghcr.io/{self.repo_owner}/{self.package_name}:{tag}" | ||||||
|                 logger.info(f"Checking manifest for {full_name}") |                 logger.info(f"Checking manifest for {full_name}") | ||||||
|  |                 # TODO: It would be nice to use RegistryData from docker | ||||||
|  |                 # except the ID doesn't map to anything in the manifest | ||||||
|                 try: |                 try: | ||||||
|                     proc = subprocess.run( |                     proc = subprocess.run( | ||||||
|                         [ |                         [ | ||||||
|                             shutil.which("docker"), |                             shutil.which("docker"), | ||||||
|                             "manifest", |                             "buildx", | ||||||
|  |                             "imagetools", | ||||||
|                             "inspect", |                             "inspect", | ||||||
|  |                             "--raw", | ||||||
|                             full_name, |                             full_name, | ||||||
|                         ], |                         ], | ||||||
|                         capture_output=True, |                         capture_output=True, | ||||||
| @@ -241,6 +247,65 @@ class RegistryTagsCleaner: | |||||||
|         # By default, keep anything which is tagged |         # By default, keep anything which is tagged | ||||||
|         self.tags_to_keep = list(set(self.all_pkgs_tags_to_version.keys())) |         self.tags_to_keep = list(set(self.all_pkgs_tags_to_version.keys())) | ||||||
|  |  | ||||||
|  |     def check_tags_pull(self): | ||||||
|  |         """ | ||||||
|  |         This method uses the Docker Python SDK to confirm all tags which were | ||||||
|  |         kept still pull, for all platforms. | ||||||
|  |  | ||||||
|  |         TODO: This is much slower (although more comprehensive).  Maybe a Pool? | ||||||
|  |         """ | ||||||
|  |         logger.info("Beginning confirmation step") | ||||||
|  |         client = docker.from_env() | ||||||
|  |         imgs = [] | ||||||
|  |         for tag in sorted(self.tags_to_keep): | ||||||
|  |             repository = f"ghcr.io/{self.repo_owner}/{self.package_name}" | ||||||
|  |             for arch, variant in [("amd64", None), ("arm64", None), ("arm", "v7")]: | ||||||
|  |                 # From 11.2.0 onwards, qpdf is cross compiled, so there is a single arch, amd64 | ||||||
|  |                 # skip others in this case | ||||||
|  |                 if "qpdf" in self.package_name and arch != "amd64" and tag == "11.2.0": | ||||||
|  |                     continue | ||||||
|  |                 # Skip beta and release candidate tags | ||||||
|  |                 elif "beta" in tag: | ||||||
|  |                     continue | ||||||
|  |  | ||||||
|  |                 # Build the platform name | ||||||
|  |                 if variant is not None: | ||||||
|  |                     platform = f"linux/{arch}/{variant}" | ||||||
|  |                 else: | ||||||
|  |                     platform = f"linux/{arch}" | ||||||
|  |  | ||||||
|  |                 try: | ||||||
|  |                     logger.info(f"Pulling {repository}:{tag} for {platform}") | ||||||
|  |                     image = client.images.pull( | ||||||
|  |                         repository=repository, | ||||||
|  |                         tag=tag, | ||||||
|  |                         platform=platform, | ||||||
|  |                     ) | ||||||
|  |                     imgs.append(image) | ||||||
|  |                 except docker.errors.APIError as e: | ||||||
|  |                     logger.error( | ||||||
|  |                         f"Failed to pull {repository}:{tag}: {e}", | ||||||
|  |                     ) | ||||||
|  |  | ||||||
|  |             # Prevent out of space errors by removing after a few | ||||||
|  |             # pulls | ||||||
|  |             if len(imgs) > 50: | ||||||
|  |                 for image in imgs: | ||||||
|  |                     try: | ||||||
|  |                         client.images.remove(image.id) | ||||||
|  |                     except docker.errors.APIError as e: | ||||||
|  |                         err_str = str(e) | ||||||
|  |                         # Ignore attempts to remove images that are partly shared | ||||||
|  |                         # Ignore images which are somehow gone already | ||||||
|  |                         if ( | ||||||
|  |                             "must be forced" not in err_str | ||||||
|  |                             and "No such image" not in err_str | ||||||
|  |                         ): | ||||||
|  |                             logger.error( | ||||||
|  |                                 f"Remove image ghcr.io/{self.repo_owner}/{self.package_name}:{tag} failed: {e}", | ||||||
|  |                             ) | ||||||
|  |                 imgs = [] | ||||||
|  |  | ||||||
|  |  | ||||||
| class MainImageTagsCleaner(RegistryTagsCleaner): | class MainImageTagsCleaner(RegistryTagsCleaner): | ||||||
|     def decide_what_tags_to_keep(self): |     def decide_what_tags_to_keep(self): | ||||||
| @@ -397,6 +462,10 @@ def _main(): | |||||||
|             # Clean images which are untagged |             # Clean images which are untagged | ||||||
|             cleaner.clean_untagged(args.is_manifest) |             cleaner.clean_untagged(args.is_manifest) | ||||||
|  |  | ||||||
|  |             # Verify remaining tags still pull | ||||||
|  |             if args.is_manifest: | ||||||
|  |                 cleaner.check_tags_pull() | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|     _main() |     _main() | ||||||
|   | |||||||
							
								
								
									
										6
									
								
								.github/workflows/ci.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/workflows/ci.yml
									
									
									
									
										vendored
									
									
								
							| @@ -212,12 +212,6 @@ jobs: | |||||||
|     name: Prepare Docker Pipeline Data |     name: Prepare Docker Pipeline Data | ||||||
|     if: github.event_name == 'push' && (startsWith(github.ref, 'refs/heads/feature-') || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/beta' || contains(github.ref, 'beta.rc') || startsWith(github.ref, 'refs/tags/v')) |     if: github.event_name == 'push' && (startsWith(github.ref, 'refs/heads/feature-') || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/beta' || contains(github.ref, 'beta.rc') || startsWith(github.ref, 'refs/tags/v')) | ||||||
|     runs-on: ubuntu-22.04 |     runs-on: ubuntu-22.04 | ||||||
|     # If the push triggered the installer library workflow, wait for it to |  | ||||||
|     # complete here.  This ensures the required versions for the final |  | ||||||
|     # image have been built, while not waiting at all if the versions haven't changed |  | ||||||
|     concurrency: |  | ||||||
|       group: build-installer-library |  | ||||||
|       cancel-in-progress: false |  | ||||||
|     needs: |     needs: | ||||||
|       - documentation |       - documentation | ||||||
|       - tests-backend |       - tests-backend | ||||||
|   | |||||||
							
								
								
									
										14
									
								
								.github/workflows/cleanup-tags.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										14
									
								
								.github/workflows/cleanup-tags.yml
									
									
									
									
										vendored
									
									
								
							| @@ -62,9 +62,9 @@ jobs: | |||||||
|         with: |         with: | ||||||
|           python-version: "3.10" |           python-version: "3.10" | ||||||
|       - |       - | ||||||
|         name: Install httpx |         name: Install Python libraries | ||||||
|         run: | |         run: | | ||||||
|           python -m pip install httpx |           python -m pip install httpx docker | ||||||
|       # |       # | ||||||
|       # Clean up primary package |       # Clean up primary package | ||||||
|       # |       # | ||||||
| @@ -81,13 +81,3 @@ jobs: | |||||||
|         if: "${{ env.TOKEN != '' }}" |         if: "${{ env.TOKEN != '' }}" | ||||||
|         run: | |         run: | | ||||||
|           python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --untagged --delete "${{ matrix.cache-name }}" |           python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --untagged --delete "${{ matrix.cache-name }}" | ||||||
|       # |  | ||||||
|       # Verify tags which are left still pull |  | ||||||
|       # |  | ||||||
|       - |  | ||||||
|         name: Check all tags still pull |  | ||||||
|         run: | |  | ||||||
|           ghcr_name=$(echo "ghcr.io/${GITHUB_REPOSITORY_OWNER}/${{ matrix.primary-name }}" | awk '{ print tolower($0) }') |  | ||||||
|           echo "Pulling all tags of ${ghcr_name}" |  | ||||||
|           docker pull --quiet --all-tags ${ghcr_name} |  | ||||||
|           docker image list |  | ||||||
|   | |||||||
							
								
								
									
										139
									
								
								.github/workflows/installer-library.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										139
									
								
								.github/workflows/installer-library.yml
									
									
									
									
										vendored
									
									
								
							| @@ -169,3 +169,142 @@ jobs: | |||||||
|         PIKEPDF_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).version }} |         PIKEPDF_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).version }} | ||||||
|         PILLOW_VERSION=${{ needs.prepare-docker-build.outputs.pillow-version }} |         PILLOW_VERSION=${{ needs.prepare-docker-build.outputs.pillow-version }} | ||||||
|         LXML_VERSION=${{ needs.prepare-docker-build.outputs.lxml-version }} |         LXML_VERSION=${{ needs.prepare-docker-build.outputs.lxml-version }} | ||||||
|  |  | ||||||
|  |   commit-binary-files: | ||||||
|  |     name: Store installers | ||||||
|  |     needs: | ||||||
|  |       - prepare-docker-build | ||||||
|  |       - build-qpdf-debs | ||||||
|  |       - build-jbig2enc | ||||||
|  |       - build-psycopg2-wheel | ||||||
|  |       - build-pikepdf-wheel | ||||||
|  |     runs-on: ubuntu-22.04 | ||||||
|  |     steps: | ||||||
|  |       - | ||||||
|  |         name: Checkout | ||||||
|  |         uses: actions/checkout@v3 | ||||||
|  |         with: | ||||||
|  |           ref: binary-library | ||||||
|  |       - | ||||||
|  |         name: Set up Python | ||||||
|  |         uses: actions/setup-python@v4 | ||||||
|  |         with: | ||||||
|  |           python-version: "3.9" | ||||||
|  |       - | ||||||
|  |         name: Install system dependencies | ||||||
|  |         run: | | ||||||
|  |           sudo apt-get update -qq | ||||||
|  |           sudo apt-get install -qq --no-install-recommends tree | ||||||
|  |       - | ||||||
|  |         name: Extract qpdf files | ||||||
|  |         run: | | ||||||
|  |           version=${{ fromJSON(needs.prepare-docker-build.outputs.qpdf-json).version }} | ||||||
|  |           tag=${{ fromJSON(needs.prepare-docker-build.outputs.qpdf-json).image_tag }} | ||||||
|  |  | ||||||
|  |           docker pull --quiet ${tag} | ||||||
|  |           docker create --name qpdf-extract ${tag} | ||||||
|  |  | ||||||
|  |           mkdir --parents qpdf/${version}/amd64 | ||||||
|  |           docker cp qpdf-extract:/usr/src/qpdf/${version}/amd64 qpdf/${version} | ||||||
|  |  | ||||||
|  |           mkdir --parents qpdf/${version}/arm64 | ||||||
|  |           docker cp qpdf-extract:/usr/src/qpdf/${version}/arm64 qpdf/${version} | ||||||
|  |  | ||||||
|  |           mkdir --parents qpdf/${version}/armv7 | ||||||
|  |           docker cp qpdf-extract:/usr/src/qpdf/${version}/armv7 qpdf/${version} | ||||||
|  |       - | ||||||
|  |         name: Extract psycopg2 files | ||||||
|  |         run: | | ||||||
|  |           version=${{ fromJSON(needs.prepare-docker-build.outputs.psycopg2-json).version }} | ||||||
|  |           tag=${{ fromJSON(needs.prepare-docker-build.outputs.psycopg2-json).image_tag }} | ||||||
|  |  | ||||||
|  |           docker pull --quiet --platform linux/amd64 ${tag} | ||||||
|  |           docker create --platform linux/amd64 --name psycopg2-extract ${tag} | ||||||
|  |           mkdir --parents psycopg2/${version}/amd64 | ||||||
|  |           docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/amd64 | ||||||
|  |           mv psycopg2/${version}/amd64/wheels/* psycopg2/${version}/amd64 | ||||||
|  |           rm -r psycopg2/${version}/amd64/wheels/ | ||||||
|  |           docker rm psycopg2-extract | ||||||
|  |  | ||||||
|  |           docker pull --quiet --platform linux/arm64 ${tag} | ||||||
|  |           docker create --platform linux/arm64 --name psycopg2-extract ${tag} | ||||||
|  |           mkdir --parents psycopg2/${version}/arm64 | ||||||
|  |           docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/arm64 | ||||||
|  |           mv psycopg2/${version}/arm64/wheels/* psycopg2/${version}/arm64 | ||||||
|  |           rm -r psycopg2/${version}/arm64/wheels/ | ||||||
|  |           docker rm psycopg2-extract | ||||||
|  |  | ||||||
|  |           docker pull --quiet --platform linux/arm/v7 ${tag} | ||||||
|  |           docker create --platform linux/arm/v7 --name psycopg2-extract ${tag} | ||||||
|  |           mkdir --parents psycopg2/${version}/armv7 | ||||||
|  |           docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/armv7 | ||||||
|  |           mv psycopg2/${version}/armv7/wheels/* psycopg2/${version}/armv7 | ||||||
|  |           rm -r psycopg2/${version}/armv7/wheels/ | ||||||
|  |           docker rm psycopg2-extract | ||||||
|  |       - | ||||||
|  |         name: Extract pikepdf files | ||||||
|  |         run: | | ||||||
|  |           version=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).version }} | ||||||
|  |           tag=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).image_tag }} | ||||||
|  |  | ||||||
|  |           docker pull --quiet --platform linux/amd64 ${tag} | ||||||
|  |           docker create --platform linux/amd64 --name pikepdf-extract ${tag} | ||||||
|  |           mkdir --parents pikepdf/${version}/amd64 | ||||||
|  |           docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/amd64 | ||||||
|  |           mv pikepdf/${version}/amd64/wheels/* pikepdf/${version}/amd64 | ||||||
|  |           rm -r pikepdf/${version}/amd64/wheels/ | ||||||
|  |           docker rm pikepdf-extract | ||||||
|  |  | ||||||
|  |           docker pull --quiet --platform linux/arm64 ${tag} | ||||||
|  |           docker create --platform linux/arm64 --name pikepdf-extract ${tag} | ||||||
|  |           mkdir --parents pikepdf/${version}/arm64 | ||||||
|  |           docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/arm64 | ||||||
|  |           mv pikepdf/${version}/arm64/wheels/* pikepdf/${version}/arm64 | ||||||
|  |           rm -r pikepdf/${version}/arm64/wheels/ | ||||||
|  |           docker rm pikepdf-extract | ||||||
|  |  | ||||||
|  |           docker pull --quiet --platform linux/arm/v7 ${tag} | ||||||
|  |           docker create --platform linux/arm/v7 --name pikepdf-extract ${tag} | ||||||
|  |           mkdir --parents pikepdf/${version}/armv7 | ||||||
|  |           docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/armv7 | ||||||
|  |           mv pikepdf/${version}/armv7/wheels/* pikepdf/${version}/armv7 | ||||||
|  |           rm -r pikepdf/${version}/armv7/wheels/ | ||||||
|  |           docker rm pikepdf-extract | ||||||
|  |       - | ||||||
|  |         name: Extract jbig2enc files | ||||||
|  |         run: | | ||||||
|  |           version=${{ fromJSON(needs.prepare-docker-build.outputs.jbig2enc-json).version }} | ||||||
|  |           tag=${{ fromJSON(needs.prepare-docker-build.outputs.jbig2enc-json).image_tag }} | ||||||
|  |  | ||||||
|  |           docker pull --quiet --platform linux/amd64 ${tag} | ||||||
|  |           docker create --platform linux/amd64 --name jbig2enc-extract ${tag} | ||||||
|  |           mkdir --parents jbig2enc/${version}/amd64 | ||||||
|  |           docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/amd64/ | ||||||
|  |           mv jbig2enc/${version}/amd64/build/* jbig2enc/${version}/amd64/ | ||||||
|  |           docker rm jbig2enc-extract | ||||||
|  |  | ||||||
|  |           docker pull --quiet --platform linux/arm64 ${tag} | ||||||
|  |           docker create --platform linux/arm64 --name jbig2enc-extract ${tag} | ||||||
|  |           mkdir --parents jbig2enc/${version}/arm64 | ||||||
|  |           docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/arm64 | ||||||
|  |           mv jbig2enc/${version}/arm64/build/* jbig2enc/${version}/arm64/ | ||||||
|  |           docker rm jbig2enc-extract | ||||||
|  |  | ||||||
|  |           docker pull --quiet --platform linux/arm/v7 ${tag} | ||||||
|  |           docker create --platform linux/arm/v7 --name jbig2enc-extract ${tag} | ||||||
|  |           mkdir --parents jbig2enc/${version}/armv7 | ||||||
|  |           docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/armv7 | ||||||
|  |           mv jbig2enc/${version}/armv7/build/* jbig2enc/${version}/armv7/ | ||||||
|  |           docker rm jbig2enc-extract | ||||||
|  |       - | ||||||
|  |         name: Show file structure | ||||||
|  |         run: | | ||||||
|  |           tree . | ||||||
|  |       - | ||||||
|  |         name: Commit files | ||||||
|  |         run: | | ||||||
|  |           git config --global user.name "github-actions" | ||||||
|  |           git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com" | ||||||
|  |           git add pikepdf/ qpdf/ psycopg2/ jbig2enc/ | ||||||
|  |           git commit -m "Updating installer packages" || true | ||||||
|  |           git push origin || true | ||||||
|   | |||||||
							
								
								
									
										53
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										53
									
								
								Dockerfile
									
									
									
									
									
								
							| @@ -1,19 +1,5 @@ | |||||||
| # syntax=docker/dockerfile:1.4 | # syntax=docker/dockerfile:1.4 | ||||||
|  |  | ||||||
| # Pull the installer images from the library |  | ||||||
| # These are all built previously |  | ||||||
| # They provide either a .deb or .whl |  | ||||||
|  |  | ||||||
| ARG JBIG2ENC_VERSION |  | ||||||
| ARG QPDF_VERSION |  | ||||||
| ARG PIKEPDF_VERSION |  | ||||||
| ARG PSYCOPG2_VERSION |  | ||||||
|  |  | ||||||
| FROM ghcr.io/paperless-ngx/paperless-ngx/builder/jbig2enc:${JBIG2ENC_VERSION} as jbig2enc-builder |  | ||||||
| FROM --platform=$BUILDPLATFORM ghcr.io/paperless-ngx/paperless-ngx/builder/qpdf:${QPDF_VERSION} as qpdf-builder |  | ||||||
| FROM ghcr.io/paperless-ngx/paperless-ngx/builder/pikepdf:${PIKEPDF_VERSION} as pikepdf-builder |  | ||||||
| FROM ghcr.io/paperless-ngx/paperless-ngx/builder/psycopg2:${PSYCOPG2_VERSION} as psycopg2-builder |  | ||||||
|  |  | ||||||
| FROM --platform=$BUILDPLATFORM node:16-bullseye-slim AS compile-frontend | FROM --platform=$BUILDPLATFORM node:16-bullseye-slim AS compile-frontend | ||||||
|  |  | ||||||
| # This stage compiles the frontend | # This stage compiles the frontend | ||||||
| @@ -58,24 +44,21 @@ LABEL org.opencontainers.image.url="https://github.com/paperless-ngx/paperless-n | |||||||
| LABEL org.opencontainers.image.licenses="GPL-3.0-only" | LABEL org.opencontainers.image.licenses="GPL-3.0-only" | ||||||
|  |  | ||||||
| ARG DEBIAN_FRONTEND=noninteractive | ARG DEBIAN_FRONTEND=noninteractive | ||||||
| # Buildx provided | # Buildx provided, must be defined to use though | ||||||
| ARG TARGETARCH | ARG TARGETARCH | ||||||
| ARG TARGETVARIANT | ARG TARGETVARIANT | ||||||
|  |  | ||||||
| # Workflow provided | # Workflow provided | ||||||
|  | ARG JBIG2ENC_VERSION | ||||||
| ARG QPDF_VERSION | ARG QPDF_VERSION | ||||||
|  | ARG PIKEPDF_VERSION | ||||||
|  | ARG PSYCOPG2_VERSION | ||||||
|  |  | ||||||
| # | # | ||||||
| # Begin installation and configuration | # Begin installation and configuration | ||||||
| # Order the steps below from least often changed to most | # Order the steps below from least often changed to most | ||||||
| # | # | ||||||
|  |  | ||||||
| # copy jbig2enc |  | ||||||
| # Basically will never change again |  | ||||||
| COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/.libs/libjbig2enc* /usr/local/lib/ |  | ||||||
| COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/jbig2 /usr/local/bin/ |  | ||||||
| COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/*.h /usr/local/include/ |  | ||||||
|  |  | ||||||
| # Packages need for running | # Packages need for running | ||||||
| ARG RUNTIME_PACKAGES="\ | ARG RUNTIME_PACKAGES="\ | ||||||
|   # Python |   # Python | ||||||
| @@ -198,19 +181,29 @@ RUN set -eux \ | |||||||
| # Install the built packages from the installer library images | # Install the built packages from the installer library images | ||||||
| # Use mounts to avoid copying installer files into the image | # Use mounts to avoid copying installer files into the image | ||||||
| # These change sometimes | # These change sometimes | ||||||
| RUN --mount=type=bind,from=qpdf-builder,target=/qpdf \ | RUN set -eux \ | ||||||
|     --mount=type=bind,from=psycopg2-builder,target=/psycopg2 \ |   && echo "Getting binaries" \ | ||||||
|     --mount=type=bind,from=pikepdf-builder,target=/pikepdf \ |     && mkdir paperless-ngx \ | ||||||
|   set -eux \ |     && curl --fail --silent --show-error --output paperless-ngx.tar.gz --location https://github.com/paperless-ngx/paperless-ngx/archive/41d6e7e407af09a0882736d50c89b6e015997bff.tar.gz \ | ||||||
|  |     && tar -xf paperless-ngx.tar.gz --directory paperless-ngx --strip-components=1 \ | ||||||
|  |     && cd paperless-ngx \ | ||||||
|  |     # Setting a specific revision ensures we know what this installed | ||||||
|  |     # and ensures cache breaking on changes | ||||||
|  |   && echo "Installing jbig2enc" \ | ||||||
|  |     && cp ./jbig2enc/${JBIG2ENC_VERSION}/${TARGETARCH}${TARGETVARIANT}/jbig2 /usr/local/bin/ \ | ||||||
|  |     && cp ./jbig2enc/${JBIG2ENC_VERSION}/${TARGETARCH}${TARGETVARIANT}/libjbig2enc* /usr/local/lib/ \ | ||||||
|   && echo "Installing qpdf" \ |   && echo "Installing qpdf" \ | ||||||
|     && apt-get install --yes --no-install-recommends /qpdf/usr/src/qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/libqpdf29_*.deb \ |     && apt-get install --yes --no-install-recommends ./qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/libqpdf29_*.deb \ | ||||||
|     && apt-get install --yes --no-install-recommends /qpdf/usr/src/qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/qpdf_*.deb \ |     && apt-get install --yes --no-install-recommends ./qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/qpdf_*.deb \ | ||||||
|   && echo "Installing pikepdf and dependencies" \ |   && echo "Installing pikepdf and dependencies" \ | ||||||
|     && python3 -m pip install --no-cache-dir /pikepdf/usr/src/wheels/*.whl \ |     && python3 -m pip install --no-cache-dir ./pikepdf/${PIKEPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/*.whl \ | ||||||
|     && python3 -m pip list \ |     && python3 -m pip list \ | ||||||
|   && echo "Installing psycopg2" \ |   && echo "Installing psycopg2" \ | ||||||
|     && python3 -m pip install --no-cache-dir /psycopg2/usr/src/wheels/psycopg2*.whl \ |     && python3 -m pip install --no-cache-dir ./psycopg2/${PSYCOPG2_VERSION}/${TARGETARCH}${TARGETVARIANT}/psycopg2*.whl \ | ||||||
|     && python3 -m pip list |     && python3 -m pip list \ | ||||||
|  |   && echo "Cleaning up image layer" \ | ||||||
|  |     && cd ../ \ | ||||||
|  |     && rm -rf paperless-ngx | ||||||
|  |  | ||||||
| WORKDIR /usr/src/paperless/src/ | WORKDIR /usr/src/paperless/src/ | ||||||
|  |  | ||||||
|   | |||||||
| @@ -29,7 +29,20 @@ RUN set -eux \ | |||||||
|     && ./autogen.sh \ |     && ./autogen.sh \ | ||||||
|     && ./configure \ |     && ./configure \ | ||||||
|     && make \ |     && make \ | ||||||
|  |   && echo "Gathering package data" \ | ||||||
|  |     && dpkg-query -f '${Package;-40}${Version}\n' -W > ./pkg-list.txt \ | ||||||
|   && echo "Cleaning up image" \ |   && echo "Cleaning up image" \ | ||||||
|     && apt-get -y purge ${BUILD_PACKAGES} \ |     && apt-get -y purge ${BUILD_PACKAGES} \ | ||||||
|     && apt-get -y autoremove --purge \ |     && apt-get -y autoremove --purge \ | ||||||
|     && rm -rf /var/lib/apt/lists/* |     && rm -rf /var/lib/apt/lists/* \ | ||||||
|  |   && echo "Moving files around" \ | ||||||
|  |     && mkdir build \ | ||||||
|  |     # Unlink a symlink that causes problems | ||||||
|  |     && unlink ./src/.libs/libjbig2enc.la \ | ||||||
|  |     # Move what the link pointed to | ||||||
|  |     && mv ./src/libjbig2enc.la ./build/ \ | ||||||
|  |     # Move the shared library .so files | ||||||
|  |     && mv ./src/.libs/libjbig2enc* ./build/ \ | ||||||
|  |     # And move the cli binary | ||||||
|  |     && mv ./src/jbig2 ./build/ \ | ||||||
|  |     && mv ./pkg-list.txt ./build/ | ||||||
|   | |||||||
| @@ -7,12 +7,17 @@ | |||||||
| # Default to pulling from the main repo registry when manually building | # Default to pulling from the main repo registry when manually building | ||||||
| ARG REPO="paperless-ngx/paperless-ngx" | ARG REPO="paperless-ngx/paperless-ngx" | ||||||
|  |  | ||||||
|  | # This does nothing, except provide a name for a copy below | ||||||
| ARG QPDF_VERSION | ARG QPDF_VERSION | ||||||
| FROM --platform=$BUILDPLATFORM ghcr.io/${REPO}/builder/qpdf:${QPDF_VERSION} as qpdf-builder | FROM --platform=$BUILDPLATFORM ghcr.io/${REPO}/builder/qpdf:${QPDF_VERSION} as qpdf-builder | ||||||
|  |  | ||||||
| # This does nothing, except provide a name for a copy below | # | ||||||
|  | # Stage: builder | ||||||
| FROM python:3.9-slim-bullseye as main | # Purpose: | ||||||
|  | #  - Build the pikepdf wheel | ||||||
|  | #  - Build any dependent wheels which can't be found | ||||||
|  | # | ||||||
|  | FROM python:3.9-slim-bullseye as builder | ||||||
|  |  | ||||||
| LABEL org.opencontainers.image.description="A intermediate image with pikepdf wheel built" | LABEL org.opencontainers.image.description="A intermediate image with pikepdf wheel built" | ||||||
|  |  | ||||||
| @@ -100,3 +105,14 @@ RUN set -eux \ | |||||||
|     && apt-get -y purge ${BUILD_PACKAGES} \ |     && apt-get -y purge ${BUILD_PACKAGES} \ | ||||||
|     && apt-get -y autoremove --purge \ |     && apt-get -y autoremove --purge \ | ||||||
|     && rm -rf /var/lib/apt/lists/* |     && rm -rf /var/lib/apt/lists/* | ||||||
|  |  | ||||||
|  | # | ||||||
|  | # Stage: package | ||||||
|  | # Purpose: Holds the compiled .whl files in a tiny image to pull | ||||||
|  | # | ||||||
|  | FROM alpine:3.17 as package | ||||||
|  |  | ||||||
|  | WORKDIR /usr/src/wheels/ | ||||||
|  |  | ||||||
|  | COPY --from=builder /usr/src/wheels/*.whl ./ | ||||||
|  | COPY --from=builder /usr/src/wheels/pkg-list.txt ./ | ||||||
|   | |||||||
| @@ -2,7 +2,12 @@ | |||||||
| # Inputs: | # Inputs: | ||||||
| #    - PSYCOPG2_VERSION - Version to build | #    - PSYCOPG2_VERSION - Version to build | ||||||
|  |  | ||||||
| FROM python:3.9-slim-bullseye as main | # | ||||||
|  | # Stage: builder | ||||||
|  | # Purpose: | ||||||
|  | #  - Build the psycopg2 wheel | ||||||
|  | # | ||||||
|  | FROM python:3.9-slim-bullseye as builder | ||||||
|  |  | ||||||
| LABEL org.opencontainers.image.description="A intermediate image with psycopg2 wheel built" | LABEL org.opencontainers.image.description="A intermediate image with psycopg2 wheel built" | ||||||
|  |  | ||||||
| @@ -48,3 +53,14 @@ RUN set -eux \ | |||||||
|     && apt-get -y purge ${BUILD_PACKAGES} \ |     && apt-get -y purge ${BUILD_PACKAGES} \ | ||||||
|     && apt-get -y autoremove --purge \ |     && apt-get -y autoremove --purge \ | ||||||
|     && rm -rf /var/lib/apt/lists/* |     && rm -rf /var/lib/apt/lists/* | ||||||
|  |  | ||||||
|  | # | ||||||
|  | # Stage: package | ||||||
|  | # Purpose: Holds the compiled .whl files in a tiny image to pull | ||||||
|  | # | ||||||
|  | FROM alpine:3.17 as package | ||||||
|  |  | ||||||
|  | WORKDIR /usr/src/wheels/ | ||||||
|  |  | ||||||
|  | COPY --from=builder /usr/src/wheels/*.whl ./ | ||||||
|  | COPY --from=builder /usr/src/wheels/pkg-list.txt ./ | ||||||
|   | |||||||
							
								
								
									
										57
									
								
								docker-builders/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								docker-builders/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | |||||||
|  | # Installer Library | ||||||
|  |  | ||||||
|  | This folder contains the Dockerfiles for building certain installers or libraries, which are then pulled into the main image. | ||||||
|  |  | ||||||
|  | ## [jbig2enc](https://github.com/agl/jbig2enc) | ||||||
|  |  | ||||||
|  | ### Why | ||||||
|  |  | ||||||
|  | JBIG is an image coding which can achieve better compression of images for PDFs. | ||||||
|  |  | ||||||
|  | ### What | ||||||
|  |  | ||||||
|  | The Docker image builds a shared library file and utility, which is copied into the correct location in the final image. | ||||||
|  |  | ||||||
|  | ### Updating | ||||||
|  |  | ||||||
|  | 1. Ensure the given qpdf version is present in [Debian bookworm](https://packages.debian.org/bookworm/qpdf) | ||||||
|  | 2. Update `.build-config.json` to the given version | ||||||
|  | 3. If the Debian specific version has incremented, update `Dockerfile.qpdf` | ||||||
|  |  | ||||||
|  | See Also: | ||||||
|  |  | ||||||
|  | - [OCRMyPDF Documentation](https://ocrmypdf.readthedocs.io/en/latest/jbig2.html) | ||||||
|  |  | ||||||
|  | ## [psycopg2](https://www.psycopg.org/) | ||||||
|  |  | ||||||
|  | ### Why | ||||||
|  |  | ||||||
|  | The pre-built wheels of psycopg2 are built on Debian 9, which provides a quite old version of libpq-dev. This causes issue with authentication methods. | ||||||
|  |  | ||||||
|  | ### What | ||||||
|  |  | ||||||
|  | The image builds psycopg2 wheels on Debian 10 and places the produced wheels into `/usr/src/wheels/`. | ||||||
|  |  | ||||||
|  | See Also: | ||||||
|  |  | ||||||
|  | - [Issue 266](https://github.com/paperless-ngx/paperless-ngx/issues/266) | ||||||
|  |  | ||||||
|  | ## [qpdf](https://qpdf.readthedocs.io/en/stable/index.html) | ||||||
|  |  | ||||||
|  | ### Why | ||||||
|  |  | ||||||
|  | qpdf and it's library provide tools to read, manipulate and fix up PDFs. Version 11 is also required by `pikepdf` 6+ and Debian 9 does not provide above version 10. | ||||||
|  |  | ||||||
|  | ### What | ||||||
|  |  | ||||||
|  | The Docker image cross compiles .deb installers for each supported architecture of the main image. The installers are placed in `/usr/src/qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/` | ||||||
|  |  | ||||||
|  | ## [pikepdf](https://pikepdf.readthedocs.io/en/latest/) | ||||||
|  |  | ||||||
|  | ### Why | ||||||
|  |  | ||||||
|  | Required by OCRMyPdf, this is a general purpose library for PDF manipulation in Python via the qpdf libraries. | ||||||
|  |  | ||||||
|  | ### What | ||||||
|  |  | ||||||
|  | The built wheels are placed into `/usr/src/wheels/` | ||||||
| @@ -80,7 +80,7 @@ django_checks() { | |||||||
|  |  | ||||||
| search_index() { | search_index() { | ||||||
|  |  | ||||||
| 	local -r index_version=1 | 	local -r index_version=2 | ||||||
| 	local -r index_version_file=${DATA_DIR}/.index_version | 	local -r index_version_file=${DATA_DIR}/.index_version | ||||||
|  |  | ||||||
| 	if [[ (! -f "${index_version_file}") || $(<"${index_version_file}") != "$index_version" ]]; then | 	if [[ (! -f "${index_version_file}") || $(<"${index_version_file}") != "$index_version" ]]; then | ||||||
|   | |||||||
| @@ -121,7 +121,17 @@ Executed after the consumer sees a new document in the consumption | |||||||
| folder, but before any processing of the document is performed. This | folder, but before any processing of the document is performed. This | ||||||
| script can access the following relevant environment variables set: | script can access the following relevant environment variables set: | ||||||
|  |  | ||||||
| - `DOCUMENT_SOURCE_PATH` | | Environment Variable    | Description                                                  | | ||||||
|  | | ----------------------- | ------------------------------------------------------------ | | ||||||
|  | | `DOCUMENT_SOURCE_PATH`  | Original path of the consumed document                       | | ||||||
|  | | `DOCUMENT_WORKING_PATH` | Path to a copy of the original that consumption will work on | | ||||||
|  |  | ||||||
|  | !!! note | ||||||
|  |  | ||||||
|  |     Pre-consume scripts which modify the document should only change | ||||||
|  |     the `DOCUMENT_WORKING_PATH` file or a second consume task may | ||||||
|  |     be triggered, leading to failures as two tasks work on the | ||||||
|  |     same document path | ||||||
|  |  | ||||||
| A simple but common example for this would be creating a simple script | A simple but common example for this would be creating a simple script | ||||||
| like this: | like this: | ||||||
| @@ -130,7 +140,7 @@ like this: | |||||||
|  |  | ||||||
| ```bash | ```bash | ||||||
| #!/usr/bin/env bash | #!/usr/bin/env bash | ||||||
| pdf2pdfocr.py -i ${DOCUMENT_SOURCE_PATH} | pdf2pdfocr.py -i ${DOCUMENT_WORKING_PATH} | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| `/etc/paperless.conf` | `/etc/paperless.conf` | ||||||
| @@ -157,26 +167,36 @@ Executed after the consumer has successfully processed a document and | |||||||
| has moved it into paperless. It receives the following environment | has moved it into paperless. It receives the following environment | ||||||
| variables: | variables: | ||||||
|  |  | ||||||
| - `DOCUMENT_ID` | | Environment Variable         | Description                                   | | ||||||
| - `DOCUMENT_FILE_NAME` | | ---------------------------- | --------------------------------------------- | | ||||||
| - `DOCUMENT_CREATED` | | `DOCUMENT_ID`                | Database primary key of the document          | | ||||||
| - `DOCUMENT_MODIFIED` | | `DOCUMENT_FILE_NAME`         | Formatted filename, not including paths       | | ||||||
| - `DOCUMENT_ADDED` | | `DOCUMENT_CREATED`           | Date & time when document created             | | ||||||
| - `DOCUMENT_SOURCE_PATH` | | `DOCUMENT_MODIFIED`          | Date & time when document was last modified   | | ||||||
| - `DOCUMENT_ARCHIVE_PATH` | | `DOCUMENT_ADDED`             | Date & time when document was added           | | ||||||
| - `DOCUMENT_THUMBNAIL_PATH` | | `DOCUMENT_SOURCE_PATH`       | Path to the original document file            | | ||||||
| - `DOCUMENT_DOWNLOAD_URL` | | `DOCUMENT_ARCHIVE_PATH`      | Path to the generate archive file (if any)    | | ||||||
| - `DOCUMENT_THUMBNAIL_URL` | | `DOCUMENT_THUMBNAIL_PATH`    | Path to the generated thumbnail               | | ||||||
| - `DOCUMENT_CORRESPONDENT` | | `DOCUMENT_DOWNLOAD_URL`      | URL for document download                     | | ||||||
| - `DOCUMENT_TAGS` | | `DOCUMENT_THUMBNAIL_URL`     | URL for the document thumbnail                | | ||||||
| - `DOCUMENT_ORIGINAL_FILENAME` | | `DOCUMENT_CORRESPONDENT`     | Assigned correspondent (if any)               | | ||||||
|  | | `DOCUMENT_TAGS`              | Comma separated list of tags applied (if any) | | ||||||
|  | | `DOCUMENT_ORIGINAL_FILENAME` | Filename of original document                 | | ||||||
|  |  | ||||||
| The script can be in any language, but for a simple shell script | The script can be in any language, A simple shell script example: | ||||||
| example, you can take a look at |  | ||||||
| [post-consumption-example.sh](https://github.com/paperless-ngx/paperless-ngx/blob/main/scripts/post-consumption-example.sh) |  | ||||||
| in this project. |  | ||||||
|  |  | ||||||
| The post consumption script cannot cancel the consumption process. | ```bash title="post-consumption-example" | ||||||
|  | --8<-- "./scripts/post-consumption-example.sh" | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | !!! note | ||||||
|  |  | ||||||
|  |     The post consumption script cannot cancel the consumption process. | ||||||
|  |  | ||||||
|  | !!! warning | ||||||
|  |  | ||||||
|  |     The post consumption script should not modify the document files | ||||||
|  |     directly | ||||||
|  |  | ||||||
| The script's stdout and stderr will be logged line by line to the | The script's stdout and stderr will be logged line by line to the | ||||||
| webserver log, along with the exit code of the script. | webserver log, along with the exit code of the script. | ||||||
|   | |||||||
| @@ -2,6 +2,9 @@ | |||||||
|  |  | ||||||
| ## paperless-ngx 1.12.1 | ## paperless-ngx 1.12.1 | ||||||
|  |  | ||||||
|  | _Note: Version 1.12.x introduced searching of comments which will work for comments added after the upgrade but a reindex of the search index is required in order to be able to search | ||||||
|  | older comments. The Docker image will automatically perform this reindex, bare metal installations will have to perform this manually, see [the docs](https://docs.paperless-ngx.com/administration/#index)._ | ||||||
|  |  | ||||||
| ### Bug Fixes | ### Bug Fixes | ||||||
|  |  | ||||||
| - Fix: comments not showing in search until after manual reindex in v1.12 [@shamoon](https://github.com/shamoon) ([#2513](https://github.com/paperless-ngx/paperless-ngx/pull/2513)) | - Fix: comments not showing in search until after manual reindex in v1.12 [@shamoon](https://github.com/shamoon) ([#2513](https://github.com/paperless-ngx/paperless-ngx/pull/2513)) | ||||||
|   | |||||||
| @@ -41,6 +41,7 @@ markdown_extensions: | |||||||
|       anchor_linenums: true |       anchor_linenums: true | ||||||
|   - pymdownx.superfences |   - pymdownx.superfences | ||||||
|   - pymdownx.inlinehilite |   - pymdownx.inlinehilite | ||||||
|  |   - pymdownx.snippets | ||||||
| strict: true | strict: true | ||||||
| nav: | nav: | ||||||
|     - index.md |     - index.md | ||||||
|   | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -204,6 +204,10 @@ export class DocumentDetailComponent | |||||||
|             ) |             ) | ||||||
|             .subscribe({ |             .subscribe({ | ||||||
|               next: (titleValue) => { |               next: (titleValue) => { | ||||||
|  |                 // In the rare case when the field changed just after debounced event was fired. | ||||||
|  |                 // We dont want to overwrite whats actually in the text field, so just return | ||||||
|  |                 if (titleValue !== this.titleInput.value) return | ||||||
|  |  | ||||||
|                 this.title = titleValue |                 this.title = titleValue | ||||||
|                 this.documentForm.patchValue({ title: titleValue }) |                 this.documentForm.patchValue({ title: titleValue }) | ||||||
|               }, |               }, | ||||||
|   | |||||||
| @@ -26,11 +26,11 @@ | |||||||
|         </div> |         </div> | ||||||
|         <p class="card-text"> |         <p class="card-text"> | ||||||
|           <span *ngIf="document.__search_hit__ && document.__search_hit__.highlights" [innerHtml]="document.__search_hit__.highlights"></span> |           <span *ngIf="document.__search_hit__ && document.__search_hit__.highlights" [innerHtml]="document.__search_hit__.highlights"></span> | ||||||
|           <span *ngIf="document.__search_hit__ && document.__search_hit__.comment_highlights" class="d-block"> |           <span *ngFor="let highlight of searchCommentHighlights" class="d-block"> | ||||||
|             <svg width="1em" height="1em" fill="currentColor" class="me-2"> |             <svg width="1em" height="1em" fill="currentColor" class="me-2"> | ||||||
|               <use xlink:href="assets/bootstrap-icons.svg#chat-left-text"/> |               <use xlink:href="assets/bootstrap-icons.svg#chat-left-text"/> | ||||||
|             </svg> |             </svg> | ||||||
|             <span [innerHtml]="document.__search_hit__.comment_highlights"></span> |             <span [innerHtml]="highlight"></span> | ||||||
|           </span> |           </span> | ||||||
|           <span *ngIf="!document.__search_hit__" class="result-content">{{contentTrimmed}}</span> |           <span *ngIf="!document.__search_hit__" class="result-content">{{contentTrimmed}}</span> | ||||||
|         </p> |         </p> | ||||||
|   | |||||||
| @@ -70,6 +70,22 @@ export class DocumentCardLargeComponent { | |||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   get searchCommentHighlights() { | ||||||
|  |     let highlights = [] | ||||||
|  |     if ( | ||||||
|  |       this.document['__search_hit__'] && | ||||||
|  |       this.document['__search_hit__'].comment_highlights | ||||||
|  |     ) { | ||||||
|  |       // only show comments with a match | ||||||
|  |       highlights = ( | ||||||
|  |         this.document['__search_hit__'].comment_highlights as string | ||||||
|  |       ) | ||||||
|  |         .split(',') | ||||||
|  |         .filter((higlight) => higlight.includes('<span')) | ||||||
|  |     } | ||||||
|  |     return highlights | ||||||
|  |   } | ||||||
|  |  | ||||||
|   getIsThumbInverted() { |   getIsThumbInverted() { | ||||||
|     return this.settingsService.get(SETTINGS_KEYS.DARK_MODE_THUMB_INVERTED) |     return this.settingsService.get(SETTINGS_KEYS.DARK_MODE_THUMB_INVERTED) | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -143,7 +143,7 @@ | |||||||
|             <p i18n> |             <p i18n> | ||||||
|               <em>No tracking data is collected by the app in any way.</em> |               <em>No tracking data is collected by the app in any way.</em> | ||||||
|             </p> |             </p> | ||||||
|             <app-input-check i18n-title title="Enable update checking" formControlName="updateCheckingEnabled" i18n-hint hint="Note that for users of thirdy-party containers e.g. linuxserver.io this notification may be 'ahead' of the current third-party release."></app-input-check> |             <app-input-check i18n-title title="Enable update checking" formControlName="updateCheckingEnabled" i18n-hint hint="Note that for users of third-party containers e.g. linuxserver.io this notification may be 'ahead' of the current third-party release."></app-input-check> | ||||||
|           </div> |           </div> | ||||||
|         </div> |         </div> | ||||||
|  |  | ||||||
|   | |||||||
| @@ -5,7 +5,7 @@ export const environment = { | |||||||
|   apiBaseUrl: document.baseURI + 'api/', |   apiBaseUrl: document.baseURI + 'api/', | ||||||
|   apiVersion: '2', |   apiVersion: '2', | ||||||
|   appTitle: 'Paperless-ngx', |   appTitle: 'Paperless-ngx', | ||||||
|   version: '1.12.1', |   version: '1.12.1-dev', | ||||||
|   webSocketHost: window.location.host, |   webSocketHost: window.location.host, | ||||||
|   webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:', |   webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:', | ||||||
|   webSocketBaseUrl: base_url.pathname + 'ws/', |   webSocketBaseUrl: base_url.pathname + 'ws/', | ||||||
|   | |||||||
| @@ -4,7 +4,6 @@ import shutil | |||||||
| import tempfile | import tempfile | ||||||
| from dataclasses import dataclass | from dataclasses import dataclass | ||||||
| from functools import lru_cache | from functools import lru_cache | ||||||
| from math import ceil |  | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
| from typing import List | from typing import List | ||||||
| from typing import Optional | from typing import Optional | ||||||
| @@ -12,10 +11,9 @@ from typing import Optional | |||||||
| import magic | import magic | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from pdf2image import convert_from_path | from pdf2image import convert_from_path | ||||||
|  | from pdf2image.exceptions import PDFPageCountError | ||||||
| from pikepdf import Page | from pikepdf import Page | ||||||
| from pikepdf import PasswordError |  | ||||||
| from pikepdf import Pdf | from pikepdf import Pdf | ||||||
| from pikepdf import PdfImage |  | ||||||
| from PIL import Image | from PIL import Image | ||||||
| from PIL import ImageSequence | from PIL import ImageSequence | ||||||
| from pyzbar import pyzbar | from pyzbar import pyzbar | ||||||
| @@ -154,52 +152,15 @@ def scan_file_for_barcodes( | |||||||
|     (page_number, barcode_text) tuples |     (page_number, barcode_text) tuples | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     def _pikepdf_barcode_scan(pdf_filepath: str) -> List[Barcode]: |  | ||||||
|         detected_barcodes = [] |  | ||||||
|         with Pdf.open(pdf_filepath) as pdf: |  | ||||||
|             for page_num, page in enumerate(pdf.pages): |  | ||||||
|                 for image_key in page.images: |  | ||||||
|                     pdfimage = PdfImage(page.images[image_key]) |  | ||||||
|  |  | ||||||
|                     # This type is known to have issues: |  | ||||||
|                     # https://github.com/pikepdf/pikepdf/issues/401 |  | ||||||
|                     if "/CCITTFaxDecode" in pdfimage.filters: |  | ||||||
|                         raise BarcodeImageFormatError( |  | ||||||
|                             "Unable to decode CCITTFaxDecode images", |  | ||||||
|                         ) |  | ||||||
|  |  | ||||||
|                     # Not all images can be transcoded to a PIL image, which |  | ||||||
|                     # is what pyzbar expects to receive, so this may |  | ||||||
|                     # raise an exception, triggering fallback |  | ||||||
|                     pillow_img = pdfimage.as_pil_image() |  | ||||||
|  |  | ||||||
|                     # Scale the image down |  | ||||||
|                     # See: https://github.com/paperless-ngx/paperless-ngx/issues/2385 |  | ||||||
|                     # TLDR: zbar has issues with larger images |  | ||||||
|                     width, height = pillow_img.size |  | ||||||
|                     if width > 1024: |  | ||||||
|                         scaler = ceil(width / 1024) |  | ||||||
|                         new_width = int(width / scaler) |  | ||||||
|                         new_height = int(height / scaler) |  | ||||||
|                         pillow_img = pillow_img.resize((new_width, new_height)) |  | ||||||
|  |  | ||||||
|                     width, height = pillow_img.size |  | ||||||
|                     if height > 2048: |  | ||||||
|                         scaler = ceil(height / 2048) |  | ||||||
|                         new_width = int(width / scaler) |  | ||||||
|                         new_height = int(height / scaler) |  | ||||||
|                         pillow_img = pillow_img.resize((new_width, new_height)) |  | ||||||
|  |  | ||||||
|                     for barcode_value in barcode_reader(pillow_img): |  | ||||||
|                         detected_barcodes.append(Barcode(page_num, barcode_value)) |  | ||||||
|  |  | ||||||
|         return detected_barcodes |  | ||||||
|  |  | ||||||
|     def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]: |     def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]: | ||||||
|         detected_barcodes = [] |         detected_barcodes = [] | ||||||
|         # use a temporary directory in case the file is too big to handle in memory |         # use a temporary directory in case the file is too big to handle in memory | ||||||
|         with tempfile.TemporaryDirectory() as path: |         with tempfile.TemporaryDirectory() as path: | ||||||
|             pages_from_path = convert_from_path(pdf_filepath, output_folder=path) |             pages_from_path = convert_from_path( | ||||||
|  |                 pdf_filepath, | ||||||
|  |                 dpi=300, | ||||||
|  |                 output_folder=path, | ||||||
|  |             ) | ||||||
|             for current_page_number, page in enumerate(pages_from_path): |             for current_page_number, page in enumerate(pages_from_path): | ||||||
|                 for barcode_value in barcode_reader(page): |                 for barcode_value in barcode_reader(page): | ||||||
|                     detected_barcodes.append( |                     detected_barcodes.append( | ||||||
| @@ -219,27 +180,19 @@ def scan_file_for_barcodes( | |||||||
|         # Always try pikepdf first, it's usually fine, faster and |         # Always try pikepdf first, it's usually fine, faster and | ||||||
|         # uses less memory |         # uses less memory | ||||||
|         try: |         try: | ||||||
|             barcodes = _pikepdf_barcode_scan(pdf_filepath) |             barcodes = _pdf2image_barcode_scan(pdf_filepath) | ||||||
|         # Password protected files can't be checked |         # Password protected files can't be checked | ||||||
|         except PasswordError as e: |         # This is the exception raised for those | ||||||
|  |         except PDFPageCountError as e: | ||||||
|             logger.warning( |             logger.warning( | ||||||
|                 f"File is likely password protected, not checking for barcodes: {e}", |                 f"File is likely password protected, not checking for barcodes: {e}", | ||||||
|             ) |             ) | ||||||
|         # Handle pikepdf related image decoding issues with a fallback to page |  | ||||||
|         # by page conversion to images in a temporary directory |  | ||||||
|         except Exception as e: |  | ||||||
|             logger.warning( |  | ||||||
|                 f"Falling back to pdf2image because: {e}", |  | ||||||
|             ) |  | ||||||
|             try: |  | ||||||
|                 barcodes = _pdf2image_barcode_scan(pdf_filepath) |  | ||||||
|         # This file is really borked, allow the consumption to continue |         # This file is really borked, allow the consumption to continue | ||||||
|         # but it may fail further on |         # but it may fail further on | ||||||
|         except Exception as e:  # pragma: no cover |         except Exception as e:  # pragma: no cover | ||||||
|             logger.warning( |             logger.warning( | ||||||
|                 f"Exception during barcode scanning: {e}", |                 f"Exception during barcode scanning: {e}", | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
|     else: |     else: | ||||||
|         logger.warning( |         logger.warning( | ||||||
|             f"Unsupported file format for barcode reader: {str(mime_type)}", |             f"Unsupported file format for barcode reader: {str(mime_type)}", | ||||||
|   | |||||||
| @@ -1,7 +1,10 @@ | |||||||
| import datetime | import datetime | ||||||
| import hashlib | import hashlib | ||||||
| import os | import os | ||||||
|  | import shutil | ||||||
|  | import tempfile | ||||||
| import uuid | import uuid | ||||||
|  | from pathlib import Path | ||||||
| from subprocess import CompletedProcess | from subprocess import CompletedProcess | ||||||
| from subprocess import run | from subprocess import run | ||||||
| from typing import Optional | from typing import Optional | ||||||
| @@ -94,7 +97,8 @@ class Consumer(LoggingMixin): | |||||||
|  |  | ||||||
|     def __init__(self): |     def __init__(self): | ||||||
|         super().__init__() |         super().__init__() | ||||||
|         self.path = None |         self.path: Optional[Path] = None | ||||||
|  |         self.original_path: Optional[Path] = None | ||||||
|         self.filename = None |         self.filename = None | ||||||
|         self.override_title = None |         self.override_title = None | ||||||
|         self.override_correspondent_id = None |         self.override_correspondent_id = None | ||||||
| @@ -167,16 +171,18 @@ class Consumer(LoggingMixin): | |||||||
|  |  | ||||||
|         self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}") |         self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}") | ||||||
|  |  | ||||||
|         filepath_arg = os.path.normpath(self.path) |         working_file_path = str(self.path) | ||||||
|  |         original_file_path = str(self.original_path) | ||||||
|  |  | ||||||
|         script_env = os.environ.copy() |         script_env = os.environ.copy() | ||||||
|         script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg |         script_env["DOCUMENT_SOURCE_PATH"] = original_file_path | ||||||
|  |         script_env["DOCUMENT_WORKING_PATH"] = working_file_path | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|             completed_proc = run( |             completed_proc = run( | ||||||
|                 args=[ |                 args=[ | ||||||
|                     settings.PRE_CONSUME_SCRIPT, |                     settings.PRE_CONSUME_SCRIPT, | ||||||
|                     filepath_arg, |                     original_file_path, | ||||||
|                 ], |                 ], | ||||||
|                 env=script_env, |                 env=script_env, | ||||||
|                 capture_output=True, |                 capture_output=True, | ||||||
| @@ -195,7 +201,7 @@ class Consumer(LoggingMixin): | |||||||
|                 exception=e, |                 exception=e, | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
|     def run_post_consume_script(self, document): |     def run_post_consume_script(self, document: Document): | ||||||
|         if not settings.POST_CONSUME_SCRIPT: |         if not settings.POST_CONSUME_SCRIPT: | ||||||
|             return |             return | ||||||
|  |  | ||||||
| @@ -285,8 +291,8 @@ class Consumer(LoggingMixin): | |||||||
|         Return the document object if it was successfully created. |         Return the document object if it was successfully created. | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|         self.path = path |         self.path = Path(path).resolve() | ||||||
|         self.filename = override_filename or os.path.basename(path) |         self.filename = override_filename or self.path.name | ||||||
|         self.override_title = override_title |         self.override_title = override_title | ||||||
|         self.override_correspondent_id = override_correspondent_id |         self.override_correspondent_id = override_correspondent_id | ||||||
|         self.override_document_type_id = override_document_type_id |         self.override_document_type_id = override_document_type_id | ||||||
| @@ -311,6 +317,15 @@ class Consumer(LoggingMixin): | |||||||
|  |  | ||||||
|         self.log("info", f"Consuming {self.filename}") |         self.log("info", f"Consuming {self.filename}") | ||||||
|  |  | ||||||
|  |         # For the actual work, copy the file into a tempdir | ||||||
|  |         self.original_path = self.path | ||||||
|  |         tempdir = tempfile.TemporaryDirectory( | ||||||
|  |             prefix="paperless-ngx", | ||||||
|  |             dir=settings.SCRATCH_DIR, | ||||||
|  |         ) | ||||||
|  |         self.path = Path(tempdir.name) / Path(self.filename) | ||||||
|  |         shutil.copy(self.original_path, self.path) | ||||||
|  |  | ||||||
|         # Determine the parser class. |         # Determine the parser class. | ||||||
|  |  | ||||||
|         mime_type = magic.from_file(self.path, mime=True) |         mime_type = magic.from_file(self.path, mime=True) | ||||||
| @@ -453,11 +468,12 @@ class Consumer(LoggingMixin): | |||||||
|                 # Delete the file only if it was successfully consumed |                 # Delete the file only if it was successfully consumed | ||||||
|                 self.log("debug", f"Deleting file {self.path}") |                 self.log("debug", f"Deleting file {self.path}") | ||||||
|                 os.unlink(self.path) |                 os.unlink(self.path) | ||||||
|  |                 self.original_path.unlink() | ||||||
|  |  | ||||||
|                 # https://github.com/jonaswinkler/paperless-ng/discussions/1037 |                 # https://github.com/jonaswinkler/paperless-ng/discussions/1037 | ||||||
|                 shadow_file = os.path.join( |                 shadow_file = os.path.join( | ||||||
|                     os.path.dirname(self.path), |                     os.path.dirname(self.original_path), | ||||||
|                     "._" + os.path.basename(self.path), |                     "._" + os.path.basename(self.original_path), | ||||||
|                 ) |                 ) | ||||||
|  |  | ||||||
|                 if os.path.isfile(shadow_file): |                 if os.path.isfile(shadow_file): | ||||||
| @@ -474,6 +490,7 @@ class Consumer(LoggingMixin): | |||||||
|             ) |             ) | ||||||
|         finally: |         finally: | ||||||
|             document_parser.cleanup() |             document_parser.cleanup() | ||||||
|  |             tempdir.cleanup() | ||||||
|  |  | ||||||
|         self.run_post_consume_script(document) |         self.run_post_consume_script(document) | ||||||
|  |  | ||||||
|   | |||||||
| Before Width: | Height: | Size: 33 KiB After Width: | Height: | Size: 33 KiB | 
| Before Width: | Height: | Size: 39 KiB After Width: | Height: | Size: 39 KiB | 
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -833,7 +833,8 @@ class PreConsumeTestCase(TestCase): | |||||||
|         with tempfile.NamedTemporaryFile() as script: |         with tempfile.NamedTemporaryFile() as script: | ||||||
|             with override_settings(PRE_CONSUME_SCRIPT=script.name): |             with override_settings(PRE_CONSUME_SCRIPT=script.name): | ||||||
|                 c = Consumer() |                 c = Consumer() | ||||||
|                 c.path = "path-to-file" |                 c.original_path = "path-to-file" | ||||||
|  |                 c.path = "/tmp/somewhere/path-to-file" | ||||||
|                 c.run_pre_consume_script() |                 c.run_pre_consume_script() | ||||||
|  |  | ||||||
|                 m.assert_called_once() |                 m.assert_called_once() | ||||||
| @@ -841,10 +842,19 @@ class PreConsumeTestCase(TestCase): | |||||||
|                 args, kwargs = m.call_args |                 args, kwargs = m.call_args | ||||||
|  |  | ||||||
|                 command = kwargs["args"] |                 command = kwargs["args"] | ||||||
|  |                 environment = kwargs["env"] | ||||||
|  |  | ||||||
|                 self.assertEqual(command[0], script.name) |                 self.assertEqual(command[0], script.name) | ||||||
|                 self.assertEqual(command[1], "path-to-file") |                 self.assertEqual(command[1], "path-to-file") | ||||||
|  |  | ||||||
|  |                 self.assertDictContainsSubset( | ||||||
|  |                     { | ||||||
|  |                         "DOCUMENT_SOURCE_PATH": c.original_path, | ||||||
|  |                         "DOCUMENT_WORKING_PATH": c.path, | ||||||
|  |                     }, | ||||||
|  |                     environment, | ||||||
|  |                 ) | ||||||
|  |  | ||||||
|     @mock.patch("documents.consumer.Consumer.log") |     @mock.patch("documents.consumer.Consumer.log") | ||||||
|     def test_script_with_output(self, mocked_log): |     def test_script_with_output(self, mocked_log): | ||||||
|         """ |         """ | ||||||
| @@ -961,9 +971,10 @@ class PostConsumeTestCase(TestCase): | |||||||
|  |  | ||||||
|                 m.assert_called_once() |                 m.assert_called_once() | ||||||
|  |  | ||||||
|                 args, kwargs = m.call_args |                 _, kwargs = m.call_args | ||||||
|  |  | ||||||
|                 command = kwargs["args"] |                 command = kwargs["args"] | ||||||
|  |                 environment = kwargs["env"] | ||||||
|  |  | ||||||
|                 self.assertEqual(command[0], script.name) |                 self.assertEqual(command[0], script.name) | ||||||
|                 self.assertEqual(command[1], str(doc.pk)) |                 self.assertEqual(command[1], str(doc.pk)) | ||||||
| @@ -972,6 +983,17 @@ class PostConsumeTestCase(TestCase): | |||||||
|                 self.assertEqual(command[7], "my_bank") |                 self.assertEqual(command[7], "my_bank") | ||||||
|                 self.assertCountEqual(command[8].split(","), ["a", "b"]) |                 self.assertCountEqual(command[8].split(","), ["a", "b"]) | ||||||
|  |  | ||||||
|  |                 self.assertDictContainsSubset( | ||||||
|  |                     { | ||||||
|  |                         "DOCUMENT_ID": str(doc.pk), | ||||||
|  |                         "DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/", | ||||||
|  |                         "DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/", | ||||||
|  |                         "DOCUMENT_CORRESPONDENT": "my_bank", | ||||||
|  |                         "DOCUMENT_TAGS": "a,b", | ||||||
|  |                     }, | ||||||
|  |                     environment, | ||||||
|  |                 ) | ||||||
|  |  | ||||||
|     def test_script_exit_non_zero(self): |     def test_script_exit_non_zero(self): | ||||||
|         """ |         """ | ||||||
|         GIVEN: |         GIVEN: | ||||||
|   | |||||||
| @@ -3,6 +3,7 @@ import shutil | |||||||
| import tempfile | import tempfile | ||||||
| from collections import namedtuple | from collections import namedtuple | ||||||
| from contextlib import contextmanager | from contextlib import contextmanager | ||||||
|  | from unittest import mock | ||||||
|  |  | ||||||
| from django.apps import apps | from django.apps import apps | ||||||
| from django.db import connection | from django.db import connection | ||||||
| @@ -86,6 +87,30 @@ class DirectoriesMixin: | |||||||
|         remove_dirs(self.dirs) |         remove_dirs(self.dirs) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ConsumerProgressMixin: | ||||||
|  |     def setUp(self) -> None: | ||||||
|  |         self.send_progress_patcher = mock.patch( | ||||||
|  |             "documents.consumer.Consumer._send_progress", | ||||||
|  |         ) | ||||||
|  |         self.send_progress_mock = self.send_progress_patcher.start() | ||||||
|  |         super().setUp() | ||||||
|  |  | ||||||
|  |     def tearDown(self) -> None: | ||||||
|  |         super().tearDown() | ||||||
|  |         self.send_progress_patcher.stop() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class DocumentConsumeDelayMixin: | ||||||
|  |     def setUp(self) -> None: | ||||||
|  |         self.consume_file_patcher = mock.patch("documents.tasks.consume_file.delay") | ||||||
|  |         self.consume_file_mock = self.consume_file_patcher.start() | ||||||
|  |         super().setUp() | ||||||
|  |  | ||||||
|  |     def tearDown(self) -> None: | ||||||
|  |         super().tearDown() | ||||||
|  |         self.consume_file_patcher.stop() | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestMigrations(TransactionTestCase): | class TestMigrations(TransactionTestCase): | ||||||
|     @property |     @property | ||||||
|     def app(self): |     def app(self): | ||||||
|   | |||||||
| @@ -477,21 +477,14 @@ class DocumentViewSet( | |||||||
| class SearchResultSerializer(DocumentSerializer): | class SearchResultSerializer(DocumentSerializer): | ||||||
|     def to_representation(self, instance): |     def to_representation(self, instance): | ||||||
|         doc = Document.objects.get(id=instance["id"]) |         doc = Document.objects.get(id=instance["id"]) | ||||||
|         comments = "" |  | ||||||
|         if hasattr(instance.results.q, "subqueries"): |  | ||||||
|             commentTerm = instance.results.q.subqueries[0] |  | ||||||
|         comments = ",".join( |         comments = ",".join( | ||||||
|                 [ |             [str(c.comment) for c in Comment.objects.filter(document=instance["id"])], | ||||||
|                     str(c.comment) |  | ||||||
|                     for c in Comment.objects.filter(document=instance["id"]) |  | ||||||
|                     if commentTerm.text in c.comment |  | ||||||
|                 ], |  | ||||||
|         ) |         ) | ||||||
|         r = super().to_representation(doc) |         r = super().to_representation(doc) | ||||||
|         r["__search_hit__"] = { |         r["__search_hit__"] = { | ||||||
|             "score": instance.score, |             "score": instance.score, | ||||||
|             "highlights": instance.highlights("content", text=doc.content), |             "highlights": instance.highlights("content", text=doc.content), | ||||||
|             "comment_highlights": instance.highlights("content", text=comments) |             "comment_highlights": instance.highlights("comments", text=comments) | ||||||
|             if doc |             if doc | ||||||
|             else None, |             else None, | ||||||
|             "rank": instance.rank, |             "rank": instance.rank, | ||||||
|   | |||||||
| @@ -271,6 +271,16 @@ class MailDocumentParser(DocumentParser): | |||||||
|                 "paperHeight": "11.7", |                 "paperHeight": "11.7", | ||||||
|                 "scale": "1.0", |                 "scale": "1.0", | ||||||
|             } |             } | ||||||
|  |  | ||||||
|  |             # Set the output format of the resulting PDF | ||||||
|  |             # Valid inputs: https://gotenberg.dev/docs/modules/pdf-engines#uno | ||||||
|  |             if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}: | ||||||
|  |                 data["pdfFormat"] = "PDF/A-2b" | ||||||
|  |             elif settings.OCR_OUTPUT_TYPE == "pdfa-1": | ||||||
|  |                 data["pdfFormat"] = "PDF/A-1a" | ||||||
|  |             elif settings.OCR_OUTPUT_TYPE == "pdfa-3": | ||||||
|  |                 data["pdfFormat"] = "PDF/A-3b" | ||||||
|  |  | ||||||
|             try: |             try: | ||||||
|                 response = requests.post( |                 response = requests.post( | ||||||
|                     url, |                     url, | ||||||
|   | |||||||
| @@ -573,8 +573,8 @@ class TestParser(TestCase): | |||||||
|             self.parser.gotenberg_server + "/forms/chromium/convert/html", |             self.parser.gotenberg_server + "/forms/chromium/convert/html", | ||||||
|             mock_post.call_args.args[0], |             mock_post.call_args.args[0], | ||||||
|         ) |         ) | ||||||
|         self.assertEqual({}, mock_post.call_args.kwargs["headers"]) |         self.assertDictEqual({}, mock_post.call_args.kwargs["headers"]) | ||||||
|         self.assertEqual( |         self.assertDictEqual( | ||||||
|             { |             { | ||||||
|                 "marginTop": "0.1", |                 "marginTop": "0.1", | ||||||
|                 "marginBottom": "0.1", |                 "marginBottom": "0.1", | ||||||
| @@ -583,6 +583,7 @@ class TestParser(TestCase): | |||||||
|                 "paperWidth": "8.27", |                 "paperWidth": "8.27", | ||||||
|                 "paperHeight": "11.7", |                 "paperHeight": "11.7", | ||||||
|                 "scale": "1.0", |                 "scale": "1.0", | ||||||
|  |                 "pdfFormat": "PDF/A-2b", | ||||||
|             }, |             }, | ||||||
|             mock_post.call_args.kwargs["data"], |             mock_post.call_args.kwargs["data"], | ||||||
|         ) |         ) | ||||||
| @@ -663,8 +664,8 @@ class TestParser(TestCase): | |||||||
|             self.parser.gotenberg_server + "/forms/chromium/convert/html", |             self.parser.gotenberg_server + "/forms/chromium/convert/html", | ||||||
|             mock_post.call_args.args[0], |             mock_post.call_args.args[0], | ||||||
|         ) |         ) | ||||||
|         self.assertEqual({}, mock_post.call_args.kwargs["headers"]) |         self.assertDictEqual({}, mock_post.call_args.kwargs["headers"]) | ||||||
|         self.assertEqual( |         self.assertDictEqual( | ||||||
|             { |             { | ||||||
|                 "marginTop": "0.1", |                 "marginTop": "0.1", | ||||||
|                 "marginBottom": "0.1", |                 "marginBottom": "0.1", | ||||||
|   | |||||||
| @@ -95,9 +95,19 @@ class TikaDocumentParser(DocumentParser): | |||||||
|                 ), |                 ), | ||||||
|             } |             } | ||||||
|             headers = {} |             headers = {} | ||||||
|  |             data = {} | ||||||
|  |  | ||||||
|  |             # Set the output format of the resulting PDF | ||||||
|  |             # Valid inputs: https://gotenberg.dev/docs/modules/pdf-engines#uno | ||||||
|  |             if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}: | ||||||
|  |                 data["pdfFormat"] = "PDF/A-2b" | ||||||
|  |             elif settings.OCR_OUTPUT_TYPE == "pdfa-1": | ||||||
|  |                 data["pdfFormat"] = "PDF/A-1a" | ||||||
|  |             elif settings.OCR_OUTPUT_TYPE == "pdfa-3": | ||||||
|  |                 data["pdfFormat"] = "PDF/A-3b" | ||||||
|  |  | ||||||
|             try: |             try: | ||||||
|                 response = requests.post(url, files=files, headers=headers) |                 response = requests.post(url, files=files, headers=headers, data=data) | ||||||
|                 response.raise_for_status()  # ensure we notice bad responses |                 response.raise_for_status()  # ensure we notice bad responses | ||||||
|             except Exception as err: |             except Exception as err: | ||||||
|                 raise ParseError( |                 raise ParseError( | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Trenton Holmes
					Trenton Holmes