Update build-and-release.yml

Yikes, try split ci workflow
Ok, lets try manual Codecov comments
2025-09-26 01:12:43 -05:00 · 2025-09-24 15:22:15 -07:00 · 2025-09-24 15:03:41 -07:00 · 2025-09-24 14:48:06 -07:00 · 2025-09-24 14:38:22 -07:00 · 2025-09-24 14:38:19 -07:00
23 changed files with 672 additions and 862 deletions
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -0,0 +1,430 @@
 name: 'Build and Release'
 on:
  workflow_run:
    workflows:
      - ci
    types:
      - completed
 permissions:
  contents: write
  packages: write
  pull-requests: write
 env:
  DEFAULT_UV_VERSION: "0.8.x"
  DEFAULT_PYTHON_VERSION: "3.11"
  NLTK_DATA: "/usr/share/nltk_data"
 jobs:
  prepare:
    if: >-
      github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push'
    name: Prepare build context
    runs-on: ubuntu-24.04
    outputs:
      should-build: ${{ steps.determine.outputs.should-build }}
      ref: ${{ steps.determine.outputs.ref }}
      ref-name: ${{ steps.determine.outputs.ref-name }}
      sha: ${{ steps.determine.outputs.sha }}
      is-tag: ${{ steps.determine.outputs.is-tag }}
      is-release-target: ${{ steps.determine.outputs.is-release-target }}
      is-beta-rc: ${{ steps.determine.outputs.is-beta-rc }}
    steps:
      - name: Determine ref information
        id: determine
        uses: actions/github-script@v7
        with:
          script: |
            const run = context.payload.workflow_run;
            const owner = context.repo.owner;
            const repo = context.repo.repo;
            const sha = run.head_sha;
            const branch = run.head_branch;
            let ref = undefined;
            let refName = undefined;
            if (branch) {
              ref = `refs/heads/${branch}`;
              refName = branch;
            } else {
              const iterator = github.paginate.iterator(
                github.rest.repos.listTags,
                {
                  owner,
                  repo,
                  per_page: 100,
                },
              );
              for await (const { data } of iterator) {
                const match = data.find((tag) => tag.commit?.sha === sha);
                if (match) {
                  ref = `refs/tags/${match.name}`;
                  refName = match.name;
                  break;
                }
              }
            }
            const outputs = {
              shouldBuild: false,
              ref: ref ?? '',
              refName: refName ?? '',
              sha,
              isTag: ref?.startsWith('refs/tags/') ?? false,
              isReleaseTarget: false,
              isBetaRc: false,
            };
            if (!ref || !refName) {
              core.info('No matching ref found for workflow run; skipping post-CI workflow.');
            } else {
              const allowed =
                ref.startsWith('refs/heads/feature-') ||
                ref.startsWith('refs/heads/fix-') ||
                ref.startsWith('refs/heads/l10n_') ||
                ref === 'refs/heads/dev' ||
                ref === 'refs/heads/beta' ||
                ref.includes('beta.rc') ||
                ref.startsWith('refs/tags/v');
              const isBetaRc = refName.includes('beta.rc');
              const isReleaseTarget = outputs.isTag && (refName.startsWith('v') || isBetaRc);
              outputs.shouldBuild = allowed;
              outputs.isReleaseTarget = isReleaseTarget;
              outputs.isBetaRc = isBetaRc;
            }
            core.setOutput('should-build', outputs.shouldBuild ? 'true' : 'false');
            core.setOutput('ref', outputs.ref);
            core.setOutput('ref-name', outputs.refName);
            core.setOutput('sha', outputs.sha);
            core.setOutput('is-tag', outputs.isTag ? 'true' : 'false');
            core.setOutput('is-release-target', outputs.isReleaseTarget ? 'true' : 'false');
            core.setOutput('is-beta-rc', outputs.isBetaRc ? 'true' : 'false');
  build-docker-image:
    needs: prepare
    if: needs.prepare.outputs.should-build == 'true'
    name: Build Docker image for ${{ needs.prepare.outputs.ref-name }}
    runs-on: ubuntu-24.04
    concurrency:
      group: ${{ github.workflow }}-build-docker-image-${{ needs.prepare.outputs.ref-name || needs.prepare.outputs.sha }}
      cancel-in-progress: true
    env:
      REF: ${{ needs.prepare.outputs.ref }}
      REF_NAME: ${{ needs.prepare.outputs.ref-name }}
      SHA: ${{ needs.prepare.outputs.sha }}
    steps:
      - name: Checkout
        uses: actions/checkout@v5
        with:
          ref: ${{ env.SHA }}
      - name: Check pushing to Docker Hub
        id: push-other-places
        env:
          REPOSITORY_OWNER: ${{ github.repository_owner }}
          REF_NAME: ${{ env.REF_NAME }}
          REF: ${{ env.REF }}
        run: |
          if [[ "$REPOSITORY_OWNER" == "paperless-ngx" ]] && \
             ([[ "$REF_NAME" == "dev" ]] || [[ "$REF_NAME" == "beta" ]] || [[ "$REF" == refs/tags/v* ]]); then
            echo "Enabling DockerHub image push"
            echo "enable=true" >> "$GITHUB_OUTPUT"
          else
            echo "Not pushing to DockerHub"
            echo "enable=false" >> "$GITHUB_OUTPUT"
          fi
      - name: Set ghcr repository name
        id: set-ghcr-repository
        run: |
          ghcr_name=$(echo "${{ github.repository }}" | awk '{ print tolower($0) }')
          echo "Name is ${ghcr_name}"
          echo "ghcr-repository=${ghcr_name}" >> "$GITHUB_OUTPUT"
      - name: Gather Docker metadata
        id: docker-meta
        uses: docker/metadata-action@v5
        env:
          GITHUB_REF: ${{ env.REF }}
          GITHUB_REF_NAME: ${{ env.REF_NAME }}
          GITHUB_SHA: ${{ env.SHA }}
        with:
          images: |
            ghcr.io/${{ steps.set-ghcr-repository.outputs.ghcr-repository }}
            name=paperlessngx/paperless-ngx,enable=${{ steps.push-other-places.outputs.enable }}
            name=quay.io/paperlessngx/paperless-ngx,enable=${{ steps.push-other-places.outputs.enable }}
          tags: |
            type=ref,event=branch
            type=semver,pattern={{version}}
            type=semver,pattern={{major}}.{{minor}}
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3
        with:
          platforms: arm64
      - name: Login to GitHub Container Registry
        uses: docker/login-action@v3
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Login to Docker Hub
        if: steps.push-other-places.outputs.enable == 'true'
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
      - name: Login to Quay.io
        if: steps.push-other-places.outputs.enable == 'true'
        uses: docker/login-action@v3
        with:
          registry: quay.io
          username: ${{ secrets.QUAY_USERNAME }}
          password: ${{ secrets.QUAY_ROBOT_TOKEN }}
      - name: Build and push
        uses: docker/build-push-action@v6
        with:
          context: .
          file: ./Dockerfile
          platforms: linux/amd64,linux/arm64
          push: true
          tags: ${{ steps.docker-meta.outputs.tags }}
          labels: ${{ steps.docker-meta.outputs.labels }}
          build-args: |
            PNGX_TAG_VERSION=${{ steps.docker-meta.outputs.version }}
          cache-from: |
            type=registry,ref=ghcr.io/${{ steps.set-ghcr-repository.outputs.ghcr-repository }}/builder/cache/app:${{ env.REF_NAME }}
            type=registry,ref=ghcr.io/${{ steps.set-ghcr-repository.outputs.ghcr-repository }}/builder/cache/app:dev
          cache-to: |
            type=registry,mode=max,ref=ghcr.io/${{ steps.set-ghcr-repository.outputs.ghcr-repository }}/builder/cache/app:${{ env.REF_NAME }}
      - name: Inspect image
        run: |
          docker buildx imagetools inspect ${{ fromJSON(steps.docker-meta.outputs.json).tags[0] }}
      - name: Export frontend artifact from docker
        run: |
          docker create --name frontend-extract ${{ fromJSON(steps.docker-meta.outputs.json).tags[0] }}
          docker cp frontend-extract:/usr/src/paperless/src/documents/static/frontend src/documents/static/frontend/
      - name: Upload frontend artifact
        uses: actions/upload-artifact@v4
        with:
          name: frontend-compiled
          path: src/documents/static/frontend/
          retention-days: 7
  build-release:
    needs:
      - prepare
      - build-docker-image
    if: needs.prepare.outputs.should-build == 'true'
    name: Build release bundle
    runs-on: ubuntu-24.04
    env:
      REF_NAME: ${{ needs.prepare.outputs.ref-name }}
      SHA: ${{ needs.prepare.outputs.sha }}
      CI_RUN_ID: ${{ github.event.workflow_run.id }}
    steps:
      - name: Checkout
        uses: actions/checkout@v5
        with:
          ref: ${{ env.SHA }}
      - name: Set up Python
        id: setup-python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
      - name: Install uv
        uses: astral-sh/setup-uv@v6
        with:
          version: ${{ env.DEFAULT_UV_VERSION }}
          enable-cache: true
          python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
      - name: Install Python dependencies
        run: |
          uv sync --python ${{ steps.setup-python.outputs.python-version }} --dev --frozen
      - name: Install system dependencies
        run: |
          sudo apt-get update -qq
          sudo apt-get install -qq --no-install-recommends gettext liblept5
      - name: Download frontend artifact
        uses: actions/download-artifact@v5
        with:
          name: frontend-compiled
          path: src/documents/static/frontend/
      - name: Download documentation artifact
        uses: actions/download-artifact@v5
        with:
          name: documentation
          path: docs/_build/html/
          run-id: ${{ env.CI_RUN_ID }}
      - name: Generate requirements file
        run: |
          uv export --quiet --no-dev --all-extras --format requirements-txt --output-file requirements.txt
      - name: Compile messages
        run: |
          cd src/
          uv run \
            --python ${{ steps.setup-python.outputs.python-version }} \
            manage.py compilemessages
      - name: Collect static files
        run: |
          cd src/
          uv run \
            --python ${{ steps.setup-python.outputs.python-version }} \
            manage.py collectstatic --no-input
      - name: Move files
        run: |
          echo "Making dist folders"
          for directory in dist \
                          dist/paperless-ngx \
                          dist/paperless-ngx/scripts;
          do
            mkdir --verbose --parents ${directory}
          done
          echo "Copying basic files"
          for file_name in .dockerignore \
                          .env \
                          Dockerfile \
                          pyproject.toml \
                          uv.lock \
                          requirements.txt \
                          LICENSE \
                          README.md \
                          paperless.conf.example
          do
            cp --verbose ${file_name} dist/paperless-ngx/
          done
          mv --verbose dist/paperless-ngx/paperless.conf.example dist/paperless-ngx/paperless.conf
          echo "Copying Docker related files"
          cp --recursive docker/ dist/paperless-ngx/docker
          echo "Copying startup scripts"
          cp --verbose scripts/*.service scripts/*.sh scripts/*.socket dist/paperless-ngx/scripts/
          echo "Copying source files"
          cp --recursive src/ dist/paperless-ngx/src
          echo "Copying documentation"
          cp --recursive docs/_build/html/ dist/paperless-ngx/docs
          mv --verbose static dist/paperless-ngx
      - name: Make release package
        run: |
          echo "Creating release archive"
          cd dist
          sudo chown -R 1000:1000 paperless-ngx/
          tar -cJf paperless-ngx.tar.xz paperless-ngx/
      - name: Upload release artifact
        uses: actions/upload-artifact@v4
        with:
          name: release
          path: dist/paperless-ngx.tar.xz
          retention-days: 7
  publish-release:
    needs:
      - prepare
      - build-release
    if: needs.prepare.outputs.is-release-target == 'true'
    name: Publish release
    runs-on: ubuntu-24.04
    outputs:
      prerelease: ${{ steps.get_version.outputs.prerelease }}
      changelog: ${{ steps.create-release.outputs.body }}
      version: ${{ steps.get_version.outputs.version }}
    steps:
      - name: Download release artifact
        uses: actions/download-artifact@v5
        with:
          name: release
          path: ./
      - name: Get version
        id: get_version
        run: |
          echo "version=${{ needs.prepare.outputs.ref-name }}" >> "$GITHUB_OUTPUT"
          if [[ ${{ needs.prepare.outputs.is-beta-rc }} == 'true' ]]; then
            echo "prerelease=true" >> "$GITHUB_OUTPUT"
          else
            echo "prerelease=false" >> "$GITHUB_OUTPUT"
          fi
      - name: Create Release and Changelog
        id: create-release
        uses: release-drafter/release-drafter@v6
        with:
          name: Paperless-ngx ${{ steps.get_version.outputs.version }}
          tag: ${{ steps.get_version.outputs.version }}
          version: ${{ steps.get_version.outputs.version }}
          prerelease: ${{ steps.get_version.outputs.prerelease }}
          publish: true
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      - name: Upload release archive
        id: upload-release-asset
        uses: shogo82148/actions-upload-release-asset@v1
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
          upload_url: ${{ steps.create-release.outputs.upload_url }}
          asset_path: ./paperless-ngx.tar.xz
          asset_name: paperless-ngx-${{ steps.get_version.outputs.version }}.tar.xz
          asset_content_type: application/x-xz
  append-changelog:
    needs:
      - publish-release
    if: needs.publish-release.outputs.prerelease == 'false'
    name: Append changelog to docs
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
        uses: actions/checkout@v5
        with:
          ref: main
      - name: Set up Python
        id: setup-python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
      - name: Install uv
        uses: astral-sh/setup-uv@v6
        with:
          version: ${{ env.DEFAULT_UV_VERSION }}
          enable-cache: true
          python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
      - name: Append Changelog to docs
        id: append-Changelog
        working-directory: docs
        run: |
          git branch ${{ needs.publish-release.outputs.version }}-changelog
          git checkout ${{ needs.publish-release.outputs.version }}-changelog
          echo -e "# Changelog\n\n${{ needs.publish-release.outputs.changelog }}\n" > changelog-new.md
          echo "Manually linking usernames"
          sed -i -r 's|@([a-zA-Z0-9_]+) \(\[#|[@\1](https://github.com/\1) ([#|g' changelog-new.md
          echo "Removing unneeded comment tags"
          sed -i -r 's|@<!---->|@|g' changelog-new.md
          CURRENT_CHANGELOG=`tail --lines +2 changelog.md`
          echo -e "$CURRENT_CHANGELOG" >> changelog-new.md
          mv changelog-new.md changelog.md
          uv run \
            --python ${{ steps.setup-python.outputs.python-version }} \
            --dev \
            pre-commit run --files changelog.md || true
          git config --global user.name "github-actions"
          git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
          git commit -am "Changelog ${{ needs.publish-release.outputs.version }} - GHA"
          git push origin ${{ needs.publish-release.outputs.version }}-changelog
      - name: Create Pull Request
        uses: actions/github-script@v7
        with:
          script: |
            const { repo, owner } = context.repo;
            const result = await github.rest.pulls.create({
              title: 'Documentation: Add ${{ needs.publish-release.outputs.version }} changelog',
              owner,
              repo,
              head: '${{ needs.publish-release.outputs.version }}-changelog',
              base: 'main',
              body: 'This PR is auto-generated by CI.'
            });
            github.rest.issues.addLabels({
              owner,
              repo,
              issue_number: result.data.number,
              labels: ['documentation', 'skip-changelog']
            });
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,52 +17,11 @@ env:
  DEFAULT_PYTHON_VERSION: "3.11"
  NLTK_DATA: "/usr/share/nltk_data"
 jobs:
  detect-duplicate:
    name: Detect Duplicate Run
    runs-on: ubuntu-24.04
    outputs:
      should_run: ${{ steps.check.outputs.should_run }}
    steps:
      - name: Check if workflow should run
        id: check
        uses: actions/github-script@v7
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
            if (context.eventName !== 'push') {
              core.info('Not a push event; running workflow.');
              core.setOutput('should_run', 'true');
              return;
            }
            const ref = context.ref || '';
            if (!ref.startsWith('refs/heads/')) {
              core.info('Push is not to a branch; running workflow.');
              core.setOutput('should_run', 'true');
              return;
            }
            const branch = ref.substring('refs/heads/'.length);
            const { owner, repo } = context.repo;
            const prs = await github.paginate(github.rest.pulls.list, {
              owner,
              repo,
              state: 'open',
              head: `${owner}:${branch}`,
              per_page: 100,
            });
            if (prs.length === 0) {
              core.info(`No open PR found for ${branch}; running workflow.`);
              core.setOutput('should_run', 'true');
            } else {
              core.info(`Found ${prs.length} open PR(s) for ${branch}; skipping duplicate push run.`);
              core.setOutput('should_run', 'false');
            }
  pre-commit:
-    needs:
+    # We want to run on external PRs, but not on our own internal PRs as they'll be run
-      - detect-duplicate
+    # by the push to the branch. Without this if check, checks are duplicated since
-    if: needs.detect-duplicate.outputs.should_run == 'true'
+    # internal PRs match both the push and pull_request events.
    if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
    name: Linting Checks
    runs-on: ubuntu-24.04
    steps:
@@ -192,18 +151,6 @@ jobs:
          token: ${{ secrets.CODECOV_TOKEN }}
          flags: backend-python-${{ matrix.python-version }}
          files: coverage.xml
      - name: Upload coverage artifacts
        uses: actions/upload-artifact@v4
        if: always()
        with:
          name: backend-coverage-${{ matrix.python-version }}
          path: |
            .coverage
            coverage.xml
            junit.xml
          retention-days: 1
          include-hidden-files: true
          if-no-files-found: error
      - name: Stop containers
        if: always()
        run: |
@@ -286,17 +233,6 @@ jobs:
          token: ${{ secrets.CODECOV_TOKEN }}
          flags: frontend-node-${{ matrix.node-version }}
          directory: src-ui/coverage/
      - name: Upload coverage artifacts
        uses: actions/upload-artifact@v4
        if: always()
        with:
          name: frontend-coverage-${{ matrix.shard-index }}
          path: |
            src-ui/coverage/lcov.info
            src-ui/coverage/coverage-final.json
            src-ui/junit.xml
          retention-days: 1
          if-no-files-found: error
  tests-frontend-e2e:
    name: "Frontend E2E Tests (Node ${{ matrix.node-version }} - ${{ matrix.shard-index }}/${{ matrix.shard-count }})"
    runs-on: ubuntu-24.04
@@ -377,392 +313,3 @@ jobs:
        env:
          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
        run: cd src-ui && pnpm run build --configuration=production
  sonarqube-analysis:
    name: "SonarQube Analysis"
    runs-on: ubuntu-24.04
    needs:
      - tests-backend
      - tests-frontend
    if: github.repository_owner == 'paperless-ngx'
    steps:
      - name: Checkout
        uses: actions/checkout@v5
        with:
          fetch-depth: 0
      - name: Download all backend coverage
        uses: actions/download-artifact@v5.0.0
        with:
          pattern: backend-coverage-*
          path: ./coverage/
      - name: Download all frontend coverage
        uses: actions/download-artifact@v5.0.0
        with:
          pattern: frontend-coverage-*
          path: ./coverage/
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
      - name: Install coverage tools
        run: |
          pip install coverage
          npm install -g nyc
      # Merge backend coverage from all Python versions
      - name: Merge backend coverage
        run: |
          coverage combine coverage/backend-coverage-*/.coverage
          coverage xml -o merged-backend-coverage.xml
      # Merge frontend coverage from all shards
      - name: Merge frontend coverage
        run: |
          # Find all coverage-final.json files from the shards, exit with error if none found
          shopt -s nullglob
          files=(coverage/frontend-coverage-*/coverage/coverage-final.json)
          if [ ${#files[@]} -eq 0 ]; then
            echo "No frontend coverage JSON found under coverage/" >&2
            exit 1
          fi
          # Create .nyc_output directory and copy each shard's coverage JSON into it with a unique name
          mkdir -p .nyc_output
          for coverage_json in "${files[@]}"; do
            shard=$(basename "$(dirname "$(dirname "$coverage_json")")")
            cp "$coverage_json" ".nyc_output/${shard}.json"
          done
          npx nyc merge .nyc_output .nyc_output/out.json
          npx nyc report --reporter=lcovonly --report-dir coverage
      - name: Upload coverage artifacts
        uses: actions/upload-artifact@v4.6.2
        with:
          name: merged-coverage
          path: |
            merged-backend-coverage.xml
            .nyc_output/*
            coverage/lcov.info
          retention-days: 7
          if-no-files-found: error
          include-hidden-files: true
      - name: SonarQube Analysis
        uses: SonarSource/sonarqube-scan-action@v5
        env:
          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
  build-docker-image:
    name: Build Docker image for ${{ github.ref_name }}
    runs-on: ubuntu-24.04
    if: github.event_name == 'push' && (startsWith(github.ref, 'refs/heads/feature-') || startsWith(github.ref, 'refs/heads/fix-') || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/beta' || contains(github.ref, 'beta.rc') || startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/l10n_'))
    concurrency:
      group: ${{ github.workflow }}-build-docker-image-${{ github.ref_name }}
      cancel-in-progress: true
    needs:
      - tests-backend
      - tests-frontend
      - tests-frontend-e2e
    steps:
      - name: Check pushing to Docker Hub
        id: push-other-places
        # Only push to Dockerhub from the main repo AND the ref is either:
        #  main
        #  dev
        #  beta
        #  a tag
        # Otherwise forks would require a Docker Hub account and secrets setup
        run: |
          if [[ ${{ github.repository_owner }} == "paperless-ngx" && ( ${{ github.ref_name }} == "dev" || ${{ github.ref_name }} == "beta" || ${{ startsWith(github.ref, 'refs/tags/v') }} == "true" ) ]] ; then
            echo "Enabling DockerHub image push"
            echo "enable=true" >> $GITHUB_OUTPUT
          else
            echo "Not pushing to DockerHub"
            echo "enable=false" >> $GITHUB_OUTPUT
          fi
      - name: Set ghcr repository name
        id: set-ghcr-repository
        run: |
          ghcr_name=$(echo "${{ github.repository }}" | awk '{ print tolower($0) }')
          echo "Name is ${ghcr_name}"
          echo "ghcr-repository=${ghcr_name}" >> $GITHUB_OUTPUT
      - name: Gather Docker metadata
        id: docker-meta
        uses: docker/metadata-action@v5
        with:
          images: |
            ghcr.io/${{ steps.set-ghcr-repository.outputs.ghcr-repository }}
            name=paperlessngx/paperless-ngx,enable=${{ steps.push-other-places.outputs.enable }}
            name=quay.io/paperlessngx/paperless-ngx,enable=${{ steps.push-other-places.outputs.enable }}
          tags: |
            # Tag branches with branch name
            type=ref,event=branch
            # Process semver tags
            # For a tag x.y.z or vX.Y.Z, output an x.y.z and x.y image tag
            type=semver,pattern={{version}}
            type=semver,pattern={{major}}.{{minor}}
      - name: Checkout
        uses: actions/checkout@v5
      # If https://github.com/docker/buildx/issues/1044 is resolved,
      # the append input with a native arm64 arch could be used to
      # significantly speed up building
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3
        with:
          platforms: arm64
      - name: Login to GitHub Container Registry
        uses: docker/login-action@v3
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Login to Docker Hub
        uses: docker/login-action@v3
        # Don't attempt to login if not pushing to Docker Hub
        if: steps.push-other-places.outputs.enable == 'true'
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
      - name: Login to Quay.io
        uses: docker/login-action@v3
        # Don't attempt to login if not pushing to Quay.io
        if: steps.push-other-places.outputs.enable == 'true'
        with:
          registry: quay.io
          username: ${{ secrets.QUAY_USERNAME }}
          password: ${{ secrets.QUAY_ROBOT_TOKEN }}
      - name: Build and push
        uses: docker/build-push-action@v6
        with:
          context: .
          file: ./Dockerfile
          platforms: linux/amd64,linux/arm64
          push: ${{ github.event_name != 'pull_request' }}
          tags: ${{ steps.docker-meta.outputs.tags }}
          labels: ${{ steps.docker-meta.outputs.labels }}
          build-args: |
            PNGX_TAG_VERSION=${{ steps.docker-meta.outputs.version }}
          # Get cache layers from this branch, then dev
          # This allows new branches to get at least some cache benefits, generally from dev
          cache-from: |
            type=registry,ref=ghcr.io/${{ steps.set-ghcr-repository.outputs.ghcr-repository }}/builder/cache/app:${{ github.ref_name }}
            type=registry,ref=ghcr.io/${{ steps.set-ghcr-repository.outputs.ghcr-repository }}/builder/cache/app:dev
          cache-to: |
            type=registry,mode=max,ref=ghcr.io/${{ steps.set-ghcr-repository.outputs.ghcr-repository }}/builder/cache/app:${{ github.ref_name }}
      - name: Inspect image
        run: |
          docker buildx imagetools inspect ${{ fromJSON(steps.docker-meta.outputs.json).tags[0] }}
      - name: Export frontend artifact from docker
        run: |
          docker create --name frontend-extract ${{ fromJSON(steps.docker-meta.outputs.json).tags[0] }}
          docker cp frontend-extract:/usr/src/paperless/src/documents/static/frontend src/documents/static/frontend/
      - name: Upload frontend artifact
        uses: actions/upload-artifact@v4
        with:
          name: frontend-compiled
          path: src/documents/static/frontend/
          retention-days: 7
  build-release:
    name: "Build Release"
    needs:
      - build-docker-image
      - documentation
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
        uses: actions/checkout@v5
      - name: Set up Python
        id: setup-python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
      - name: Install uv
        uses: astral-sh/setup-uv@v6
        with:
          version: ${{ env.DEFAULT_UV_VERSION }}
          enable-cache: true
          python-version: ${{ steps.setup-python.outputs.python-version }}
      - name: Install Python dependencies
        run: |
          uv sync --python ${{ steps.setup-python.outputs.python-version }} --dev --frozen
      - name: Install system dependencies
        run: |
          sudo apt-get update -qq
          sudo apt-get install -qq --no-install-recommends gettext liblept5
      - name: Download frontend artifact
        uses: actions/download-artifact@v5
        with:
          name: frontend-compiled
          path: src/documents/static/frontend/
      - name: Download documentation artifact
        uses: actions/download-artifact@v5
        with:
          name: documentation
          path: docs/_build/html/
      - name: Generate requirements file
        run: |
          uv export --quiet --no-dev --all-extras --format requirements-txt --output-file requirements.txt
      - name: Compile messages
        run: |
          cd src/
          uv run \
            --python ${{ steps.setup-python.outputs.python-version }} \
            manage.py compilemessages
      - name: Collect static files
        run: |
          cd src/
          uv run \
            --python ${{ steps.setup-python.outputs.python-version }} \
            manage.py collectstatic --no-input
      - name: Move files
        run: |
          echo "Making dist folders"
          for directory in dist \
                          dist/paperless-ngx \
                          dist/paperless-ngx/scripts;
          do
            mkdir --verbose --parents ${directory}
          done
          echo "Copying basic files"
          for file_name in .dockerignore \
                          .env \
                          Dockerfile \
                          pyproject.toml \
                          uv.lock \
                          requirements.txt \
                          LICENSE \
                          README.md \
                          paperless.conf.example
          do
            cp --verbose ${file_name} dist/paperless-ngx/
          done
          mv --verbose dist/paperless-ngx/paperless.conf.example dist/paperless-ngx/paperless.conf
          echo "Copying Docker related files"
          cp --recursive docker/ dist/paperless-ngx/docker
          echo "Copying startup scripts"
          cp --verbose scripts/*.service scripts/*.sh scripts/*.socket dist/paperless-ngx/scripts/
          echo "Copying source files"
          cp --recursive src/ dist/paperless-ngx/src
          echo "Copying documentation"
          cp --recursive docs/_build/html/ dist/paperless-ngx/docs
          mv --verbose static dist/paperless-ngx
      - name: Make release package
        run: |
          echo "Creating release archive"
          cd dist
          sudo chown -R 1000:1000 paperless-ngx/
          tar -cJf paperless-ngx.tar.xz paperless-ngx/
      - name: Upload release artifact
        uses: actions/upload-artifact@v4
        with:
          name: release
          path: dist/paperless-ngx.tar.xz
          retention-days: 7
  publish-release:
    name: "Publish Release"
    runs-on: ubuntu-24.04
    outputs:
      prerelease: ${{ steps.get_version.outputs.prerelease }}
      changelog: ${{ steps.create-release.outputs.body }}
      version: ${{ steps.get_version.outputs.version }}
    needs:
      - build-release
    if: github.ref_type == 'tag' && (startsWith(github.ref_name, 'v') || contains(github.ref_name, '-beta.rc'))
    steps:
      - name: Download release artifact
        uses: actions/download-artifact@v5
        with:
          name: release
          path: ./
      - name: Get version
        id: get_version
        run: |
          echo "version=${{ github.ref_name }}" >> $GITHUB_OUTPUT
          if [[ ${{ contains(github.ref_name, '-beta.rc') }} == 'true' ]]; then
            echo "prerelease=true" >> $GITHUB_OUTPUT
          else
            echo "prerelease=false" >> $GITHUB_OUTPUT
          fi
      - name: Create Release and Changelog
        id: create-release
        uses: release-drafter/release-drafter@v6
        with:
          name: Paperless-ngx ${{ steps.get_version.outputs.version }}
          tag: ${{ steps.get_version.outputs.version }}
          version: ${{ steps.get_version.outputs.version }}
          prerelease: ${{ steps.get_version.outputs.prerelease }}
          publish: true # ensures release is not marked as draft
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      - name: Upload release archive
        id: upload-release-asset
        uses: shogo82148/actions-upload-release-asset@v1
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
          upload_url: ${{ steps.create-release.outputs.upload_url }}
          asset_path: ./paperless-ngx.tar.xz
          asset_name: paperless-ngx-${{ steps.get_version.outputs.version }}.tar.xz
          asset_content_type: application/x-xz
  append-changelog:
    name: "Append Changelog"
    runs-on: ubuntu-24.04
    needs:
      - publish-release
    if: needs.publish-release.outputs.prerelease == 'false'
    steps:
      - name: Checkout
        uses: actions/checkout@v5
        with:
          ref: main
      - name: Set up Python
        id: setup-python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
      - name: Install uv
        uses: astral-sh/setup-uv@v6
        with:
          version: ${{ env.DEFAULT_UV_VERSION }}
          enable-cache: true
          python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
      - name: Append Changelog to docs
        id: append-Changelog
        working-directory: docs
        run: |
          git branch ${{ needs.publish-release.outputs.version }}-changelog
          git checkout ${{ needs.publish-release.outputs.version }}-changelog
          echo -e "# Changelog\n\n${{ needs.publish-release.outputs.changelog }}\n" > changelog-new.md
          echo "Manually linking usernames"
          sed -i -r 's|@([a-zA-Z0-9_]+) \(\[#|[@\1](https://github.com/\1) ([#|g' changelog-new.md
          echo "Removing unneeded comment tags"
          sed -i -r 's|@<!---->|@|g' changelog-new.md
          CURRENT_CHANGELOG=`tail --lines +2 changelog.md`
          echo -e "$CURRENT_CHANGELOG" >> changelog-new.md
          mv changelog-new.md changelog.md
          uv run \
            --python ${{ steps.setup-python.outputs.python-version }} \
            --dev \
            pre-commit run --files changelog.md || true
          git config --global user.name "github-actions"
          git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
          git commit -am "Changelog ${{ needs.publish-release.outputs.version }} - GHA"
          git push origin ${{ needs.publish-release.outputs.version }}-changelog
      - name: Create Pull Request
        uses: actions/github-script@v7
        with:
          script: |
            const { repo, owner } = context.repo;
            const result = await github.rest.pulls.create({
              title: 'Documentation: Add ${{ needs.publish-release.outputs.version }} changelog',
              owner,
              repo,
              head: '${{ needs.publish-release.outputs.version }}-changelog',
              base: 'main',
              body: 'This PR is auto-generated by CI.'
            });
            github.rest.issues.addLabels({
              owner,
              repo,
              issue_number: result.data.number,
              labels: ['documentation', 'skip-changelog']
            });
--- a/.github/workflows/codecov-comment.yml
+++ b/.github/workflows/codecov-comment.yml
@@ -0,0 +1,220 @@
 name: Codecov PR Comment
 on:
  workflow_run:
    workflows:
      - ci
    types:
      - completed
 permissions:
  contents: read
  pull-requests: write
 jobs:
  comment:
    if: >-
      github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion == 'success'
    runs-on: ubuntu-24.04
    steps:
      - name: Gather pull request context
        id: pr
        uses: actions/github-script@v7
        with:
          script: |
            const run = context.payload.workflow_run;
            if (!run.pull_requests || run.pull_requests.length === 0) {
              core.info('No associated pull request. Skipping.');
              return { shouldRun: false };
            }
            const pr = run.pull_requests[0];
            return {
              shouldRun: true,
              prNumber: pr.number,
              headSha: run.head_sha,
            };
      - name: Fetch Codecov coverage
        id: coverage
        if: steps.pr.outputs.shouldRun == 'true'
        uses: actions/github-script@v7
        env:
          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
          COMMIT_SHA: ${{ steps.pr.outputs.headSha }}
        with:
          script: |
            const token = process.env.CODECOV_TOKEN;
            if (!token) {
              core.warning('CODECOV_TOKEN secret is not available; skipping comment.');
              core.setOutput('shouldComment', 'false');
              return;
            }
            const commitSha = process.env.COMMIT_SHA;
            const owner = context.repo.owner;
            const repo = context.repo.repo;
            const url = `https://codecov.io/api/v2/github/${owner}/repos/${repo}/commits/${commitSha}/report`;
            const maxAttempts = 10;
            const waitMs = 15000;
            const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
            let data;
            for (let attempt = 1; attempt <= maxAttempts; attempt++) {
              core.info(`Fetching Codecov report (attempt ${attempt}/${maxAttempts})`);
              const response = await fetch(url, {
                headers: {
                  Authorization: `Bearer ${token}`,
                  'Content-Type': 'application/json',
                  Accept: 'application/json',
                },
              });
              if (response.status === 404) {
                core.info('Report not ready yet (404). Waiting before retrying.');
                await sleep(waitMs);
                continue;
              }
              if (!response.ok) {
                const text = await response.text();
                throw new Error(`Codecov API returned ${response.status}: ${text}`);
              }
              data = await response.json();
              if (data && Object.keys(data).length > 0) {
                break;
              }
              core.info('Report payload empty. Waiting before retrying.');
              await sleep(waitMs);
            }
            if (!data) {
              core.warning('Unable to retrieve Codecov report after multiple attempts.');
              core.setOutput('shouldComment', 'false');
              return;
            }
            const totals = data.report?.totals ?? data.commit?.totals ?? data.totals;
            if (!totals) {
              core.warning('Codecov response does not contain coverage totals.');
              core.setOutput('shouldComment', 'false');
              return;
            }
            const compareTotals = data.report?.compare?.totals ?? data.compare?.totals;
            const flagsRaw = data.report?.totals_by_flag ?? data.report?.components ?? [];
            const toNumber = (value) => {
              if (value === null || value === undefined || value === '') {
                return undefined;
              }
              const num = Number(value);
              return Number.isFinite(num) ? num : undefined;
            };
            const coverage = toNumber(totals.coverage);
            const baseCoverage = toNumber(compareTotals?.base_coverage ?? compareTotals?.base);
            const delta = toNumber(
              compareTotals?.coverage_change ??
              compareTotals?.coverage_diff ??
              totals.delta ??
              totals.diff ??
              totals.change,
            );
            const formatPercent = (value) => {
              if (value === undefined) return '—';
              return `${value.toFixed(2)}%`;
            };
            const formatDelta = (value) => {
              if (value === undefined) return '—';
              const sign = value >= 0 ? '+' : '';
              return `${sign}${value.toFixed(2)}%`;
            };
            const shortSha = commitSha.slice(0, 7);
            const lines = [
              '<!-- codecov-coverage-comment -->',
              '**Codecov Coverage**',
              '',
              `- Head \`${shortSha}\`: ${formatPercent(coverage)}`,
            ];
            if (baseCoverage !== undefined) {
              lines.push(`- Base: ${formatPercent(baseCoverage)}`);
            }
            if (delta !== undefined) {
              lines.push(`- Change: ${formatDelta(delta)}`);
            }
            const flagEntries = Array.isArray(flagsRaw)
              ? flagsRaw
              : Object.entries(flagsRaw).map(([name, totals]) => ({ name, totals }));
            const flagRows = [];
            for (const entry of flagEntries) {
              const label = entry.flag ?? entry.name ?? entry.component ?? entry.id;
              const entryTotals = entry.totals ?? entry;
              const entryCoverage = toNumber(entryTotals?.coverage);
              const entryDelta = toNumber(
                entryTotals?.coverage_change ??
                entryTotals?.coverage_diff ??
                entryTotals?.delta ??
                entryTotals?.diff,
              );
              if (!label || entryCoverage === undefined) {
                continue;
              }
              flagRows.push(`| ${label} | ${formatPercent(entryCoverage)} | ${formatDelta(entryDelta)} |`);
            }
            if (flagRows.length) {
              lines.push('');
              lines.push('| Flag | Coverage | Change |');
              lines.push('| --- | --- | --- |');
              lines.push(...flagRows);
            }
            const commentBody = lines.join('\n');
            const shouldComment = coverage !== undefined;
            core.setOutput('shouldComment', shouldComment ? 'true' : 'false');
            if (shouldComment) {
              core.setOutput('commentBody', commentBody);
            }
      - name: Upsert coverage comment
        if: steps.pr.outputs.shouldRun == 'true' && steps.coverage.outputs.shouldComment == 'true'
        uses: actions/github-script@v7
        env:
          PR_NUMBER: ${{ steps.pr.outputs.prNumber }}
          COMMENT_BODY: ${{ steps.coverage.outputs.commentBody }}
        with:
          script: |
            const prNumber = Number(process.env.PR_NUMBER);
            const body = process.env.COMMENT_BODY;
            const marker = '<!-- codecov-coverage-comment -->';
            const { data: comments } = await github.rest.issues.listComments({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: prNumber,
              per_page: 100,
            });
            const existing = comments.find((comment) => comment.body?.includes(marker));
            if (existing) {
              core.info(`Updating existing coverage comment (id: ${existing.id}).`);
              await github.rest.issues.updateComment({
                owner: context.repo.owner,
                repo: context.repo.repo,
                comment_id: existing.id,
                body,
              });
            } else {
              core.info('Creating new coverage comment.');
              await github.rest.issues.createComment({
                owner: context.repo.owner,
                repo: context.repo.repo,
                issue_number: prNumber,
                body,
              });
            }
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1805,23 +1805,3 @@ password. All of these options come from their similarly-named [Django settings]
 #### [`PAPERLESS_EMAIL_USE_SSL=<bool>`](#PAPERLESS_EMAIL_USE_SSL) {#PAPERLESS_EMAIL_USE_SSL}
 : Defaults to false.
 ## Remote OCR
 #### [`PAPERLESS_REMOTE_OCR_ENGINE=<str>`](#PAPERLESS_REMOTE_OCR_ENGINE) {#PAPERLESS_REMOTE_OCR_ENGINE}
 : The remote OCR engine to use. Currently only Azure AI is supported as "azureai".
    Defaults to None, which disables remote OCR.
 #### [`PAPERLESS_REMOTE_OCR_API_KEY=<str>`](#PAPERLESS_REMOTE_OCR_API_KEY) {#PAPERLESS_REMOTE_OCR_API_KEY}
 : The API key to use for the remote OCR engine.
    Defaults to None.
 #### [`PAPERLESS_REMOTE_OCR_ENDPOINT=<str>`](#PAPERLESS_REMOTE_OCR_ENDPOINT) {#PAPERLESS_REMOTE_OCR_ENDPOINT}
 : The endpoint to use for the remote OCR engine. This is required for Azure AI.
    Defaults to None.
--- a/docs/index.md
+++ b/docs/index.md
@@ -25,10 +25,9 @@ physical documents into a searchable online archive so you can keep, well, _less
 ## Features
 -   **Organize and index** your scanned documents with tags, correspondents, types, and more.
-   _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way, unless you explicitly choose to do so.
+-   _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way.
 -   Performs **OCR** on your documents, adding searchable and selectable text, even to documents scanned with only images.
-    -   Utilizes the open-source Tesseract engine to recognize more than 100 languages.
+-   Utilizes the open-source Tesseract engine to recognize more than 100 languages.
    -   _New!_ Supports remote OCR with Azure AI (opt-in).
 -   Documents are saved as PDF/A format which is designed for long term storage, alongside the unaltered originals.
 -   Uses machine-learning to automatically add tags, correspondents and document types to your documents.
 -   Supports PDF documents, images, plain text files, Office documents (Word, Excel, PowerPoint, and LibreOffice equivalents)[^1] and more.
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -882,21 +882,6 @@ how regularly you intend to scan documents and use paperless.
    performed the task associated with the document, move it to the
    inbox.
 ## Remote OCR
 !!! important
    This feature is disabled by default and will always remain strictly "opt-in".
 Paperless-ngx supports performing OCR on documents using remote services. At the moment, this is limited to
 [Microsoft's Azure "Document Intelligence" service](https://azure.microsoft.com/en-us/products/ai-services/ai-document-intelligence).
 This is of course a paid service (with a free tier) which requires an Azure account and subscription. Azure AI is not affiliated with
 Paperless-ngx in any way. When enabled, Paperless-ngx will automatically send appropriate documents to Azure for OCR processing, bypassing
 the local OCR engine. See the [configuration](configuration.md#PAPERLESS_REMOTE_OCR_ENGINE) options for more details.
 Additionally, when using a commercial service with this feature, consider both potential costs as well as any associated file size
 or page limitations (e.g. with a free tier).
 ## Architecture
 Paperless-ngx consists of the following components:
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,7 +15,6 @@ classifiers = [
 # This will allow testing to not install a webserver, mysql, etc
 dependencies = [
  "azure-ai-documentintelligence>=1.0.2",
  "babel>=2.17",
  "bleach~=6.2.0",
  "celery[redis]~=5.5.1",
@@ -234,7 +233,6 @@ testpaths = [
  "src/paperless_tesseract/tests/",
  "src/paperless_tika/tests",
  "src/paperless_text/tests/",
  "src/paperless_remote/tests/",
 ]
 addopts = [
  "--pythonwarnings=all",
@@ -257,7 +255,6 @@ PAPERLESS_DISABLE_DBHANDLER = "true"
 PAPERLESS_CACHE_BACKEND = "django.core.cache.backends.locmem.LocMemCache"
 [tool.coverage.run]
 relative_files = true
 source = [
  "src/",
 ]
--- a/sonar-project.properties
+++ b/sonar-project.properties
@@ -1,24 +0,0 @@
 sonar.projectKey=paperless-ngx_paperless-ngx
 sonar.organization=paperless-ngx
 sonar.projectName=Paperless-ngx
 sonar.projectVersion=1.0
 # Source and test directories
 sonar.sources=src/,src-ui/
 sonar.test.inclusions=**/test_*.py,**/tests.py,**/*.spec.ts,**/*.test.ts
 # Language specific settings
 sonar.python.version=3.10,3.11,3.12,3.13
 # Coverage reports
 sonar.python.coverage.reportPaths=merged-backend-coverage.xml
 sonar.javascript.lcov.reportPaths=coverage/lcov.info
 # Test execution reports
 sonar.junit.reportPaths=**/junit.xml,**/test-results.xml
 # Encoding
 sonar.sourceEncoding=UTF-8
 # Exclusions
 sonar.exclusions=**/migrations/**,**/node_modules/**,**/static/**,**/venv/**,**/.venv/**,**/dist/**
--- a/src/documents/barcodes.py
+++ b/src/documents/barcodes.py
@@ -164,6 +164,9 @@ class BarcodePlugin(ConsumeTaskPlugin):
                        mailrule_id=self.input_doc.mailrule_id,
                        # Can't use same folder or the consume might grab it again
                        original_file=(tmp_dir / new_document.name).resolve(),
                        # Adding optional original_path for later uses in
                        # workflow matching
                        original_path=self.input_doc.original_file,
                    ),
                    # All the same metadata
                    self.metadata,
--- a/src/documents/data_models.py
+++ b/src/documents/data_models.py
@@ -156,6 +156,7 @@ class ConsumableDocument:
    source: DocumentSource
    original_file: Path
    original_path: Path | None = None
    mailrule_id: int | None = None
    mime_type: str = dataclasses.field(init=False, default=None)
--- a/src/documents/matching.py
+++ b/src/documents/matching.py
@@ -314,11 +314,19 @@ def consumable_document_matches_workflow(
        trigger_matched = False
    # Document path vs trigger path
    # Use the original_path if set, else us the original_file
    match_against = (
        document.original_path
        if document.original_path is not None
        else document.original_file
    )
    if (
        trigger.filter_path is not None
        and len(trigger.filter_path) > 0
        and not fnmatch(
-            document.original_file,
+            match_against,
            trigger.filter_path,
        )
    ):
--- a/src/documents/tests/test_barcodes.py
+++ b/src/documents/tests/test_barcodes.py
@@ -614,14 +614,16 @@ class TestBarcodeNewConsume(
            self.assertIsNotFile(temp_copy)
            # Check the split files exist
            # Check the original_path is set
            # Check the source is unchanged
            # Check the overrides are unchanged
            for (
                new_input_doc,
                new_doc_overrides,
            ) in self.get_all_consume_delay_call_args():
                self.assertEqual(new_input_doc.source, DocumentSource.ConsumeFolder)
                self.assertIsFile(new_input_doc.original_file)
                self.assertEqual(new_input_doc.original_path, temp_copy)
                self.assertEqual(new_input_doc.source, DocumentSource.ConsumeFolder)
                self.assertEqual(overrides, new_doc_overrides)
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -322,7 +322,6 @@ INSTALLED_APPS = [
    "paperless_tesseract.apps.PaperlessTesseractConfig",
    "paperless_text.apps.PaperlessTextConfig",
    "paperless_mail.apps.PaperlessMailConfig",
    "paperless_remote.apps.PaperlessRemoteParserConfig",
    "django.contrib.admin",
    "rest_framework",
    "rest_framework.authtoken",
@@ -1390,10 +1389,3 @@ WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
    "PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
    "true",
 )
 ###############################################################################
 # Remote Parser                                                               #
 ###############################################################################
 REMOTE_OCR_ENGINE = os.getenv("PAPERLESS_REMOTE_OCR_ENGINE")
 REMOTE_OCR_API_KEY = os.getenv("PAPERLESS_REMOTE_OCR_API_KEY")
 REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT")
--- a/src/paperless_remote/init.py
+++ b/src/paperless_remote/init.py
@@ -1,4 +0,0 @@
 # this is here so that django finds the checks.
 from paperless_remote.checks import check_remote_parser_configured
 __all__ = ["check_remote_parser_configured"]
--- a/src/paperless_remote/apps.py
+++ b/src/paperless_remote/apps.py
@@ -1,14 +0,0 @@
 from django.apps import AppConfig
 from paperless_remote.signals import remote_consumer_declaration
 class PaperlessRemoteParserConfig(AppConfig):
    name = "paperless_remote"
    def ready(self):
        from documents.signals import document_consumer_declaration
        document_consumer_declaration.connect(remote_consumer_declaration)
        AppConfig.ready(self)
--- a/src/paperless_remote/checks.py
+++ b/src/paperless_remote/checks.py
@@ -1,17 +0,0 @@
 from django.conf import settings
 from django.core.checks import Error
 from django.core.checks import register
@register()
 def check_remote_parser_configured(app_configs, **kwargs):
    if settings.REMOTE_OCR_ENGINE == "azureai" and not (
        settings.REMOTE_OCR_ENDPOINT and settings.REMOTE_OCR_API_KEY
    ):
        return [
            Error(
                "Azure AI remote parser requires endpoint and API key to be configured.",
            ),
        ]
    return []
--- a/src/paperless_remote/parsers.py
+++ b/src/paperless_remote/parsers.py
@@ -1,113 +0,0 @@
 from pathlib import Path
 from django.conf import settings
 from paperless_tesseract.parsers import RasterisedDocumentParser
 class RemoteEngineConfig:
    def __init__(
        self,
        engine: str,
        api_key: str | None = None,
        endpoint: str | None = None,
    ):
        self.engine = engine
        self.api_key = api_key
        self.endpoint = endpoint
    def engine_is_valid(self):
        valid = self.engine in ["azureai"] and self.api_key is not None
        if self.engine == "azureai":
            valid = valid and self.endpoint is not None
        return valid
 class RemoteDocumentParser(RasterisedDocumentParser):
    """
    This parser uses a remote OCR engine to parse documents. Currently, it supports Azure AI Vision
    as this is the only service that provides a remote OCR API with text-embedded PDF output.
    """
    logging_name = "paperless.parsing.remote"
    def get_settings(self) -> RemoteEngineConfig:
        """
        Returns the configuration for the remote OCR engine, loaded from Django settings.
        """
        return RemoteEngineConfig(
            engine=settings.REMOTE_OCR_ENGINE,
            api_key=settings.REMOTE_OCR_API_KEY,
            endpoint=settings.REMOTE_OCR_ENDPOINT,
        )
    def supported_mime_types(self):
        if self.settings.engine_is_valid():
            return {
                "application/pdf": ".pdf",
                "image/png": ".png",
                "image/jpeg": ".jpg",
                "image/tiff": ".tiff",
                "image/bmp": ".bmp",
                "image/gif": ".gif",
                "image/webp": ".webp",
            }
        else:
            return {}
    def azure_ai_vision_parse(
        self,
        file: Path,
    ) -> str | None:
        """
        Uses Azure AI Vision to parse the document and return the text content.
        It requests a searchable PDF output with embedded text.
        The PDF is saved to the archive_path attribute.
        Returns the text content extracted from the document.
        If the parsing fails, it returns None.
        """
        from azure.ai.documentintelligence import DocumentIntelligenceClient
        from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
        from azure.ai.documentintelligence.models import AnalyzeOutputOption
        from azure.ai.documentintelligence.models import DocumentContentFormat
        from azure.core.credentials import AzureKeyCredential
        client = DocumentIntelligenceClient(
            endpoint=self.settings.endpoint,
            credential=AzureKeyCredential(self.settings.api_key),
        )
        with file.open("rb") as f:
            analyze_request = AnalyzeDocumentRequest(bytes_source=f.read())
            poller = client.begin_analyze_document(
                model_id="prebuilt-read",
                body=analyze_request,
                output_content_format=DocumentContentFormat.TEXT,
                output=[AnalyzeOutputOption.PDF],  # request searchable PDF output
                content_type="application/json",
            )
        poller.wait()
        result_id = poller.details["operation_id"]
        result = poller.result()
        # Download the PDF with embedded text
        self.archive_path = self.tempdir / "archive.pdf"
        with self.archive_path.open("wb") as f:
            for chunk in client.get_analyze_result_pdf(
                model_id="prebuilt-read",
                result_id=result_id,
            ):
                f.write(chunk)
        client.close()
        return result.content
    def parse(self, document_path: Path, mime_type, file_name=None):
        if not self.settings.engine_is_valid():
            self.log.warning(
                "No valid remote parser engine is configured, content will be empty.",
            )
            self.text = ""
        elif self.settings.engine == "azureai":
            self.text = self.azure_ai_vision_parse(document_path)
--- a/src/paperless_remote/signals.py
+++ b/src/paperless_remote/signals.py
@@ -1,18 +0,0 @@
 def get_parser(*args, **kwargs):
    from paperless_remote.parsers import RemoteDocumentParser
    return RemoteDocumentParser(*args, **kwargs)
 def get_supported_mime_types():
    from paperless_remote.parsers import RemoteDocumentParser
    return RemoteDocumentParser(None).supported_mime_types()
 def remote_consumer_declaration(sender, **kwargs):
    return {
        "parser": get_parser,
        "weight": 5,
        "mime_types": get_supported_mime_types(),
    }
--- a/src/paperless_remote/tests/init.py
+++ b/src/paperless_remote/tests/init.py
--- a/src/paperless_remote/tests/samples/simple-digital.pdf
+++ b/src/paperless_remote/tests/samples/simple-digital.pdf
--- a/src/paperless_remote/tests/test_checks.py
+++ b/src/paperless_remote/tests/test_checks.py
@@ -1,24 +0,0 @@
 from unittest import TestCase
 from django.test import override_settings
 from paperless_remote import check_remote_parser_configured
 class TestChecks(TestCase):
    @override_settings(REMOTE_OCR_ENGINE=None)
    def test_no_engine(self):
        msgs = check_remote_parser_configured(None)
        self.assertEqual(len(msgs), 0)
    @override_settings(REMOTE_OCR_ENGINE="azureai")
    @override_settings(REMOTE_OCR_API_KEY="somekey")
    @override_settings(REMOTE_OCR_ENDPOINT=None)
    def test_azure_no_endpoint(self):
        msgs = check_remote_parser_configured(None)
        self.assertEqual(len(msgs), 1)
        self.assertTrue(
            msgs[0].msg.startswith(
                "Azure AI remote parser requires endpoint and API key to be configured.",
            ),
        )
--- a/src/paperless_remote/tests/test_parser.py
+++ b/src/paperless_remote/tests/test_parser.py
@@ -1,101 +0,0 @@
 import uuid
 from pathlib import Path
 from unittest import mock
 from django.test import TestCase
 from django.test import override_settings
 from documents.tests.utils import DirectoriesMixin
 from documents.tests.utils import FileSystemAssertsMixin
 from paperless_remote.parsers import RemoteDocumentParser
 from paperless_remote.signals import get_parser
 class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
    SAMPLE_FILES = Path(__file__).resolve().parent / "samples"
    def assertContainsStrings(self, content: str, strings: list[str]):
        # Asserts that all strings appear in content, in the given order.
        indices = []
        for s in strings:
            if s in content:
                indices.append(content.index(s))
            else:
                self.fail(f"'{s}' is not in '{content}'")
        self.assertListEqual(indices, sorted(indices))
    @mock.patch("paperless_tesseract.parsers.run_subprocess")
    @mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
    def test_get_text_with_azure(self, mock_client_cls, mock_subprocess):
        # Arrange mock Azure client
        mock_client = mock.Mock()
        mock_client_cls.return_value = mock_client
        # Simulate poller result and its `.details`
        mock_poller = mock.Mock()
        mock_poller.wait.return_value = None
        mock_poller.details = {"operation_id": "fake-op-id"}
        mock_client.begin_analyze_document.return_value = mock_poller
        mock_poller.result.return_value.content = "This is a test document."
        # Return dummy PDF bytes
        mock_client.get_analyze_result_pdf.return_value = [
            b"%PDF-",
            b"1.7 ",
            b"FAKEPDF",
        ]
        # Simulate pdftotext by writing dummy text to sidecar file
        def fake_run(cmd, *args, **kwargs):
            with Path(cmd[-1]).open("w", encoding="utf-8") as f:
                f.write("This is a test document.")
        mock_subprocess.side_effect = fake_run
        with override_settings(
            REMOTE_OCR_ENGINE="azureai",
            REMOTE_OCR_API_KEY="somekey",
            REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
        ):
            parser = get_parser(uuid.uuid4())
            parser.parse(
                self.SAMPLE_FILES / "simple-digital.pdf",
                "application/pdf",
            )
            self.assertContainsStrings(
                parser.text.strip(),
                ["This is a test document."],
            )
    @override_settings(
        REMOTE_OCR_ENGINE="azureai",
        REMOTE_OCR_API_KEY="key",
        REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
    )
    def test_supported_mime_types_valid_config(self):
        parser = RemoteDocumentParser(uuid.uuid4())
        expected_types = {
            "application/pdf": ".pdf",
            "image/png": ".png",
            "image/jpeg": ".jpg",
            "image/tiff": ".tiff",
            "image/bmp": ".bmp",
            "image/gif": ".gif",
            "image/webp": ".webp",
        }
        self.assertEqual(parser.supported_mime_types(), expected_types)
    def test_supported_mime_types_invalid_config(self):
        parser = get_parser(uuid.uuid4())
        self.assertEqual(parser.supported_mime_types(), {})
    @override_settings(
        REMOTE_OCR_ENGINE=None,
        REMOTE_OCR_API_KEY=None,
        REMOTE_OCR_ENDPOINT=None,
    )
    def test_parse_with_invalid_config(self):
        parser = get_parser(uuid.uuid4())
        parser.parse(self.SAMPLE_FILES / "simple-digital.pdf", "application/pdf")
        self.assertEqual(parser.text, "")
--- a/uv.lock
+++ b/uv.lock
@@ -95,34 +95,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/af/cc/55a32a2c98022d88812b5986d2a92c4ff3ee087e83b712ebc703bba452bf/Automat-24.8.1-py3-none-any.whl", hash = "sha256:bf029a7bc3da1e2c24da2343e7598affaa9f10bf0ab63ff808566ce90551e02a", size = 42585, upload-time = "2024-08-19T17:31:56.729Z" },
 ]
 [[package]]
 name = "azure-ai-documentintelligence"
 version = "1.0.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "azure-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "isodate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/44/7b/8115cd713e2caa5e44def85f2b7ebd02a74ae74d7113ba20bdd41fd6dd80/azure_ai_documentintelligence-1.0.2.tar.gz", hash = "sha256:4d75a2513f2839365ebabc0e0e1772f5601b3a8c9a71e75da12440da13b63484", size = 170940 }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/d9/75/c9ec040f23082f54ffb1977ff8f364c2d21c79a640a13d1c1809e7fd6b1a/azure_ai_documentintelligence-1.0.2-py3-none-any.whl", hash = "sha256:e1fb446abbdeccc9759d897898a0fe13141ed29f9ad11fc705f951925822ed59", size = 106005 },
 ]
 [[package]]
 name = "azure-core"
 version = "1.33.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/75/aa/7c9db8edd626f1a7d99d09ef7926f6f4fb34d5f9fa00dc394afdfe8e2a80/azure_core-1.33.0.tar.gz", hash = "sha256:f367aa07b5e3005fec2c1e184b882b0b039910733907d001c20fb08ebb8c0eb9", size = 295633 }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/07/b7/76b7e144aa53bd206bf1ce34fa75350472c3f69bf30e5c8c18bc9881035d/azure_core-1.33.0-py3-none-any.whl", hash = "sha256:9b5b6d0223a1d38c37500e6971118c1e0f13f54951e6893968b38910bc9cda8f", size = 207071 },
 ]
 [[package]]
 name = "babel"
 version = "2.17.0"
@@ -1440,15 +1412,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/c7/fc/4e5a141c3f7c7bed550ac1f69e599e92b6be449dd4677ec09f325cad0955/inotifyrecursive-0.3.5-py3-none-any.whl", hash = "sha256:7e5f4a2e1dc2bef0efa3b5f6b339c41fb4599055a2b54909d020e9e932cc8d2f", size = 8009, upload-time = "2020-11-20T12:38:46.981Z" },
 ]
 [[package]]
 name = "isodate"
 version = "0.7.2"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705 }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 },
 ]
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -2069,7 +2032,6 @@ name = "paperless-ngx"
 version = "2.18.4"
 source = { virtual = "." }
 dependencies = [
    { name = "azure-ai-documentintelligence", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "babel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "bleach", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "celery", extra = ["redis"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2207,7 +2169,6 @@ typing = [
 [package.metadata]
 requires-dist = [
    { name = "azure-ai-documentintelligence", specifier = ">=1.0.2" },
    { name = "babel", specifier = ">=2.17" },
    { name = "bleach", specifier = "~=6.2.0" },
    { name = "celery", extras = ["redis"], specifier = "~=5.5.1" },
Author	SHA1	Message	Date
shamoon	770fb2d60e	Update build-and-release.yml	2025-09-24 15:22:15 -07:00
shamoon	c8ef9e663a	Yikes, try split ci workflow	2025-09-24 15:03:41 -07:00
shamoon	2195e4af45	Ok, lets try manual Codecov comments	2025-09-24 14:48:06 -07:00
shamoon	c6716905a4	Revert "Chore: Enable SonarQube scanning (#10904 )" This reverts commit `8d1f23e9d6`.	2025-09-24 14:38:22 -07:00
shamoon	850ee5a415	Revert "Chore: remove conditional from pre-commit job in CI (#10916 )" This reverts commit `53b393dab5`.	2025-09-24 14:38:19 -07:00
shamoon	b25b5abdb0	Revert "Development: try separating sonar scan" This reverts commit `68e0559053`.	2025-09-24 14:38:13 -07:00
shamoon	68e0559053	Development: try separating sonar scan	2025-09-24 14:26:05 -07:00
DerRockWolf	4ff09c4cf4	Enhancement: support workflow path matching of barcode-split documents (#10723 )	2025-09-24 21:03:03 +00:00