Add info buttons for core metadata items

2025-08-12 00:19:48 +00:00 · 2025-08-04 23:45:50 -04:00
53 changed files with 1281 additions and 2574 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,7 +15,6 @@ env:
  DEFAULT_UV_VERSION: "0.8.x"
  # This is the default version of Python to use in most steps which aren't specific
  DEFAULT_PYTHON_VERSION: "3.11"
  NLTK_DATA: "/usr/share/nltk_data"
 jobs:
  pre-commit:
    # We want to run on external PRs, but not on our own internal PRs as they'll be run
@@ -122,11 +121,8 @@ jobs:
      - name: List installed Python dependencies
        run: |
          uv pip list
      - name: Install or update NLTK dependencies
        run: uv run python -m nltk.downloader punkt punkt_tab snowball_data stopwords -d ${{ env.NLTK_DATA }}
      - name: Tests
        env:
          NLTK_DATA: ${{ env.NLTK_DATA }}
          PAPERLESS_CI_TEST: 1
          # Enable paperless_mail testing against real server
          PAPERLESS_MAIL_TEST_HOST: ${{ secrets.TEST_MAIL_HOST }}
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -31,7 +31,7 @@ repos:
    rev: v2.4.1
    hooks:
      - id: codespell
-        exclude: "(^src-ui/src/locale/)|(^src-ui/pnpm-lock.yaml)|(^src-ui/e2e/)|(^src/paperless_mail/tests/samples/)|(^src/documents/tests/samples/)"
+        exclude: "(^src-ui/src/locale/)|(^src-ui/pnpm-lock.yaml)|(^src-ui/e2e/)|(^src/paperless_mail/tests/samples/)"
        exclude_types:
          - pofile
          - json
--- a/18
+++ b/18
@@ -5,7 +5,7 @@
 # Purpose: Compiles the frontend
 # Notes:
 #  - Does PNPM stuff with Typescript and such
-FROM --platform=$BUILDPLATFORM docker.io/node:20-trixie-slim AS compile-frontend
+FROM --platform=$BUILDPLATFORM docker.io/node:20-bookworm-slim AS compile-frontend
 COPY ./src-ui /src/src-ui
@@ -32,7 +32,7 @@ RUN set -eux \
 # Purpose: Installs s6-overlay and rootfs
 # Comments:
 #  - Don't leave anything extra in here either
-FROM ghcr.io/astral-sh/uv:0.8.4-python3.12-trixie-slim AS s6-overlay-base
+FROM ghcr.io/astral-sh/uv:0.8.4-python3.12-bookworm-slim AS s6-overlay-base
 WORKDIR /usr/src/s6
@@ -170,8 +170,20 @@ RUN set -eux \
    && apt-get update \
    && apt-get install --yes --quiet --no-install-recommends ${RUNTIME_PACKAGES} \
    && echo "Installing pre-built updates" \
-      && curl --fail --silent --no-progress-meter --show-error --location --remote-name-all \
+      && curl --fail --silent --no-progress-meter --show-error --location --remote-name-all --parallel --parallel-max 4 \
        https://github.com/paperless-ngx/builder/releases/download/qpdf-${QPDF_VERSION}/libqpdf29_${QPDF_VERSION}-1_${TARGETARCH}.deb \
        https://github.com/paperless-ngx/builder/releases/download/qpdf-${QPDF_VERSION}/qpdf_${QPDF_VERSION}-1_${TARGETARCH}.deb \
        https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/libgs10_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
        https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/ghostscript_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
        https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/libgs10-common_${GS_VERSION}.dfsg-1_all.deb \
        https://github.com/paperless-ngx/builder/releases/download/jbig2enc-${JBIG2ENC_VERSION}/jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \
      && echo "Installing qpdf ${QPDF_VERSION}" \
        && dpkg --install ./libqpdf29_${QPDF_VERSION}-1_${TARGETARCH}.deb \
        && dpkg --install ./qpdf_${QPDF_VERSION}-1_${TARGETARCH}.deb \
      && echo "Installing Ghostscript ${GS_VERSION}" \
        && dpkg --install ./libgs10-common_${GS_VERSION}.dfsg-1_all.deb \
        && dpkg --install ./libgs10_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
        && dpkg --install ./ghostscript_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
      && echo "Installing jbig2enc" \
        && dpkg --install ./jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \
      && echo "Configuring imagemagick" \
--- a/dev.txt
+++ b/dev.txt
@@ -1,319 +0,0 @@
 adduser 3.134
 apt 2.6.1
 base-files 12.4+deb12u11
 base-passwd 3.6.1
 bash 5.2.15-2+b8
 bsdutils 1:2.38.1-5+deb12u3
 ca-certificates 20230311+deb12u1
 coreutils 9.1-1
 curl 7.88.1-10+deb12u12
 dash 0.5.12-2
 debconf 1.5.82
 debian-archive-keyring 2023.3+deb12u2
 debianutils 5.7-0.5~deb12u1
 diffutils 1:3.8-4
 dirmngr 2.2.40-1.1
 dpkg 1.21.22
 e2fsprogs 1.47.0-2
 file 1:5.44-3
 findutils 4.9.0-4
 fontconfig 2.14.1-4
 fontconfig-config 2.14.1-4
 fonts-liberation 1:1.07.4-11
 fonts-urw-base35 20200910-7
 gcc-12-base 12.2.0-14+deb12u1
 gettext 0.21-12
 gettext-base 0.21-12
 ghostscript 10.03.1~dfsg-1
 gnupg 2.2.40-1.1
 gnupg-l10n 2.2.40-1.1
 gnupg-utils 2.2.40-1.1
 gosu 1.14-1+b10
 gpg 2.2.40-1.1
 gpg-agent 2.2.40-1.1
 gpg-wks-client 2.2.40-1.1
 gpg-wks-server 2.2.40-1.1
 gpgconf 2.2.40-1.1
 gpgsm 2.2.40-1.1
 gpgv 2.2.40-1.1
 grep 3.8-5
 gzip 1.12-1
 hicolor-icon-theme 0.17-2
 hostname 3.23+nmu1
 icc-profiles-free 2.0.1+dfsg-1.1
 imagemagick 8:6.9.11.60+dfsg-1.6+deb12u3
 imagemagick-6-common 8:6.9.11.60+dfsg-1.6+deb12u3
 imagemagick-6.q16 8:6.9.11.60+dfsg-1.6+deb12u3
 init-system-helpers 1.65.2
 jbig2dec 0.19-3
 jbig2enc 0.30-1
 libacl1 2.3.1-3
 libaom3 3.6.0-1+deb12u1
 libapt-pkg6.0 2.6.1
 libarchive13 3.6.2-1+deb12u2
 libassuan0 2.5.5-5
 libattr1 1:2.5.1-4
 libaudit-common 1:3.0.9-1
 libaudit1 1:3.0.9-1
 libavahi-client3 0.8-10+deb12u1
 libavahi-common-data 0.8-10+deb12u1
 libavahi-common3 0.8-10+deb12u1
 libavcodec59 7:5.1.6-0+deb12u1
 libavformat59 7:5.1.6-0+deb12u1
 libavutil57 7:5.1.6-0+deb12u1
 libblkid1 2.38.1-5+deb12u3
 libbluray2 1:1.3.4-1
 libbrotli1 1.0.9-2+b6
 libbsd0 0.11.7-2
 libbz2-1.0 1.0.8-5+b1
 libc-bin 2.36-9+deb12u10
 libc6 2.36-9+deb12u10
 libcairo-gobject2 1.16.0-7
 libcairo2 1.16.0-7
 libcap-ng0 0.8.3-1+b3
 libcap2 1:2.66-4+deb12u1
 libchromaprint1 1.5.1-2+b1
 libcjson1 1.7.15-1+deb12u2
 libcodec2-1.0 1.0.5-1
 libcom-err2 1.47.0-2
 libconfig-inifiles-perl 3.000003-2
 libcrypt1 1:4.4.33-2
 libcups2 2.4.2-3+deb12u8
 libcurl4 7.88.1-10+deb12u12
 libdatrie1 0.2.13-2+b1
 libdav1d6 1.0.0-2+deb12u1
 libdb5.3 5.3.28+dfsg2-1
 libdbus-1-3 1.14.10-1~deb12u1
 libde265-0 1.0.11-1+deb12u2
 libdebconfclient0 0.270
 libdeflate0 1.14-1
 libdrm-common 2.4.114-1
 libdrm2 2.4.114-1+b1
 libedit2 3.1-20221030-2
 libexpat1 2.5.0-1+deb12u1
 libext2fs2 1.47.0-2
 libffi8 3.4.4-1
 libfftw3-double3 3.3.10-1
 libfontconfig1 2.14.1-4
 libfontenc1 1:1.1.4-1
 libfreetype6 2.12.1+dfsg-5+deb12u4
 libfribidi0 1.0.8-2.1
 libgcc-s1 12.2.0-14+deb12u1
 libgcrypt20 1.10.1-3
 libgdbm-compat4 1.23-3
 libgdbm6 1.23-3
 libgdk-pixbuf-2.0-0 2.42.10+dfsg-1+deb12u2
 libgdk-pixbuf2.0-common 2.42.10+dfsg-1+deb12u2
 libgif7 5.2.1-2.5
 libglib2.0-0 2.74.6-2+deb12u6
 libgme0 0.6.3-6
 libgmp10 2:6.2.1+dfsg1-1.1
 libgnutls30 3.7.9-2+deb12u5
 libgomp1 12.2.0-14+deb12u1
 libgpg-error0 1.46-1
 libgraphite2-3 1.3.14-1
 libgs-common 10.0.0~dfsg-11+deb12u7
 libgs10 10.03.1~dfsg-1
 libgs10-common 10.03.1~dfsg-1
 libgsm1 1.0.22-1
 libgssapi-krb5-2 1.20.1-2+deb12u3
 libharfbuzz0b 6.0.0+dfsg-3
 libheif1 1.15.1-1+deb12u1
 libhogweed6 3.8.1-2
 libhwy1 1.0.3-3+deb12u1
 libice6 2:1.0.10-1
 libicu72 72.1-3+deb12u1
 libidn12 1.41-1
 libidn2-0 2.3.3-1+b1
 libijs-0.35 0.35-15
 libimagequant0 2.17.0-1
 libjbig0 2.1-6.1
 libjbig2dec0 0.19-3
 libjpeg62-turbo 1:2.1.5-2
 libjxl0.7 0.7.0-10+deb12u1
 libk5crypto3 1.20.1-2+deb12u3
 libkeyutils1 1.6.3-2
 libkrb5-3 1.20.1-2+deb12u3
 libkrb5support0 1.20.1-2+deb12u3
 libksba8 1.6.3-2
 liblcms2-2 2.14-2
 libldap-2.5-0 2.5.13+dfsg-5
 liblept5 1.82.0-3+b3
 liblerc4 4.0.0+ds-2
 liblqr-1-0 0.4.2-2.1
 libltdl7 2.4.7-7~deb12u1
 liblz4-1 1.9.4-1
 liblzma5 5.4.1-1
 libmagic-mgc 1:5.44-3
 libmagic1 1:5.44-3
 libmagickcore-6.q16-6 8:6.9.11.60+dfsg-1.6+deb12u3
 libmagickwand-6.q16-6 8:6.9.11.60+dfsg-1.6+deb12u3
 libmariadb3 1:10.11.11-0+deb12u1
 libmbedcrypto7 2.28.3-1
 libmd0 1.0.4-2
 libmfx1 22.5.4-1
 libmount1 2.38.1-5+deb12u3
 libmp3lame0 3.100-6
 libmpg123-0 1.31.2-1+deb12u1
 libncurses6 6.4-4
 libncursesw6 6.4-4
 libnettle8 3.8.1-2
 libnghttp2-14 1.52.0-1+deb12u2
 libnorm1 1.5.9+dfsg-2
 libnpth0 1.6-3
 libnsl2 1.3.0-2
 libnspr4 2:4.35-1
 libnss3 2:3.87.1-1+deb12u1
 libnuma1 2.0.16-1
 libogg0 1.3.5-3
 libopenjp2-7 2.5.0-2+deb12u1
 libopenmpt0 0.6.9-1
 libopus0 1.3.1-3
 libp11-kit0 0.24.1-2
 libpam-modules 1.5.2-6+deb12u1
 libpam-modules-bin 1.5.2-6+deb12u1
 libpam-runtime 1.5.2-6+deb12u1
 libpam0g 1.5.2-6+deb12u1
 libpango-1.0-0 1.50.12+ds-1
 libpangocairo-1.0-0 1.50.12+ds-1
 libpangoft2-1.0-0 1.50.12+ds-1
 libpaper1 1.1.29
 libpcre2-8-0 10.42-1
 libperl5.36 5.36.0-7+deb12u2
 libpgm-5.3-0 5.3.128~dfsg-2
 libpixman-1-0 0.42.2-1
 libpng16-16 1.6.39-2
 libpoppler126 22.12.0-2+deb12u1
 libpq5 15.13-0+deb12u1
 libpsl5 0.21.2-1
 libqpdf29 11.9.0-1
 librabbitmq4 0.11.0-1+deb12u1
 librav1e0 0.5.1-6
 libreadline8 8.2-1.3
 librist4 0.2.7+dfsg-1
 librsvg2-2 2.54.7+dfsg-1~deb12u1
 librtmp1 2.4+20151223.gitfa8646d.1-2+b2
 libsasl2-2 2.1.28+dfsg-10
 libsasl2-modules-db 2.1.28+dfsg-10
 libseccomp2 2.5.4-1+deb12u1
 libselinux1 3.4-1+b6
 libsemanage-common 3.4-1
 libsemanage2 3.4-1+b5
 libsepol2 3.4-2.1
 libshine3 3.1.1-2
 libsm6 2:1.2.3-1
 libsmartcols1 2.38.1-5+deb12u3
 libsnappy1v5 1.1.9-3
 libsodium23 1.0.18-1
 libsoxr0 0.1.3-4
 libspeex1 1.2.1-2
 libsqlite3-0 3.40.1-2+deb12u1
 libsrt1.5-gnutls 1.5.1-1+deb12u1
 libss2 1.47.0-2
 libssh-gcrypt-4 0.10.6-0+deb12u1
 libssh2-1 1.10.0-3+b1
 libssl3 3.0.17-1~deb12u1
 libstdc++6 12.2.0-14+deb12u1
 libsvtav1enc1 1.4.1+dfsg-1
 libswresample4 7:5.1.6-0+deb12u1
 libsystemd0 252.38-1~deb12u1
 libtasn1-6 4.19.0-2+deb12u1
 libtesseract5 5.3.0-2
 libthai-data 0.1.29-1
 libthai0 0.1.29-1
 libtheora0 1.1.1+dfsg.1-16.1+b1
 libtiff6 4.5.0-6+deb12u2
 libtinfo6 6.4-4
 libtirpc-common 1.3.3+ds-1
 libtirpc3 1.3.3+ds-1
 libtwolame0 0.4.0-2
 libudev1 252.38-1~deb12u1
 libudfread0 1.1.2-1
 libunistring2 1.0-2
 libuuid1 2.38.1-5+deb12u3
 libv4l-0 1.22.1-5+b2
 libv4lconvert0 1.22.1-5+b2
 libva-drm2 2.17.0-1
 libva-x11-2 2.17.0-1
 libva2 2.17.0-1
 libvdpau1 1.5-2
 libvorbis0a 1.3.7-1
 libvorbisenc2 1.3.7-1
 libvorbisfile3 1.3.7-1
 libvpx7 1.12.0-1+deb12u4
 libwebp7 1.2.4-0.2+deb12u1
 libwebpdemux2 1.2.4-0.2+deb12u1
 libwebpmux3 1.2.4-0.2+deb12u1
 libx11-6 2:1.8.4-2+deb12u2
 libx11-data 2:1.8.4-2+deb12u2
 libx11-xcb1 2:1.8.4-2+deb12u2
 libx264-164 2:0.164.3095+gitbaee400-3
 libx265-199 3.5-2+b1
 libxau6 1:1.0.9-1
 libxcb-dri3-0 1.15-1
 libxcb-render0 1.15-1
 libxcb-shm0 1.15-1
 libxcb1 1.15-1
 libxdmcp6 1:1.1.2-3
 libxext6 2:1.3.4-1+b1
 libxfixes3 1:6.0.0-2
 libxml2 2.9.14+dfsg-1.3~deb12u2
 libxrender1 1:0.9.10-1.1
 libxslt1.1 1.1.35-1+deb12u1
 libxt6 1:1.2.1-1.1
 libxvidcore4 2:1.3.7-1
 libxxhash0 0.8.1-1
 libzbar0 0.23.92-7+deb12u1
 libzmq5 4.3.4-6
 libzstd1 1.5.4+dfsg2-5
 libzvbi-common 0.2.41-1
 libzvbi0 0.2.41-1
 login 1:4.13+dfsg1-1+deb12u1
 logsave 1.47.0-2
 mariadb-client 1:10.11.11-0+deb12u1
 mariadb-client-core 1:10.11.11-0+deb12u1
 mariadb-common 1:10.11.11-0+deb12u1
 mawk 1.3.4.20200120-3.1
 media-types 10.0.0
 mount 2.38.1-5+deb12u3
 mysql-common 5.8+1.1.0
 ncurses-base 6.4-4
 ncurses-bin 6.4-4
 netbase 6.4
 ocl-icd-libopencl1 2.3.1-1
 openssl 3.0.17-1~deb12u1
 passwd 1:4.13+dfsg1-1+deb12u1
 perl 5.36.0-7+deb12u2
 perl-base 5.36.0-7+deb12u2
 perl-modules-5.36 5.36.0-7+deb12u2
 pinentry-curses 1.2.1-1
 pngquant 2.17.0-1
 poppler-data 0.4.12-1
 poppler-utils 22.12.0-2+deb12u1
 postgresql-client 15+248
 postgresql-client-15 15.13-0+deb12u1
 postgresql-client-common 248
 qpdf 11.9.0-1
 readline-common 8.2-1.3
 sed 4.9-1
 sensible-utils 0.0.17+nmu1
 shared-mime-info 2.2-1
 sysvinit-utils 3.06-4
 tar 1.34+dfsg-1.2+deb12u1
 tesseract-ocr 5.3.0-2
 tesseract-ocr-deu 1:4.1.0-2
 tesseract-ocr-eng 1:4.1.0-2
 tesseract-ocr-fra 1:4.1.0-2
 tesseract-ocr-ita 1:4.1.0-2
 tesseract-ocr-osd 1:4.1.0-2
 tesseract-ocr-spa 1:4.1.0-2
 tzdata 2025b-0+deb12u1
 ucf 3.0043+nmu1+deb12u1
 unpaper 7.0.0-0.1
 usr-is-merged 37~deb12u1
 util-linux 2.38.1-5+deb12u3
 util-linux-extra 2.38.1-5+deb12u3
 x11-common 1:7.7+23
 xfonts-encodings 1:1.0.4-2.2
 xfonts-utils 1:7.7+6
 zlib1g 1:1.2.13.dfsg-1
--- a/docs/api.md
+++ b/docs/api.md
@@ -282,18 +282,6 @@ The following methods are supported:
        -   `"merge": true or false` (defaults to false)
    -   The `merge` flag determines if the supplied permissions will overwrite all existing permissions (including
        removing them) or be merged with existing permissions.
 -   `edit_pdf`
    -   Requires `parameters`:
        -   `"doc_ids": [DOCUMENT_ID]` A list of a single document ID to edit.
        -   `"operations": [OPERATION, ...]` A list of operations to perform on the documents. Each operation is a dictionary
            with the following keys:
            -   `"page": PAGE_NUMBER` The page number to edit (1-based).
            -   `"rotate": DEGREES` Optional rotation in degrees (90, 180, 270).
            -   `"doc": OUTPUT_DOCUMENT_INDEX` Optional index of the output document for split operations.
    -   Optional `parameters`:
        -   `"delete_original": true` to delete the original documents after editing.
        -   `"update_document": true` to update the existing document with the edited PDF.
        -   `"include_metadata": true` to copy metadata from the original document to the edited document.
 -   `merge`
    -   No additional `parameters` required.
    -   The ordering of the merged document is determined by the list of IDs.
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1282,30 +1282,6 @@ within your documents.
    Defaults to false.
 ## Workflow webhooks
 #### [`PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES=<str>`](#PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES) {#PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES}
 : A comma-separated list of allowed schemes for webhooks. This setting
 controls which URL schemes are permitted for webhook URLs.
    Defaults to `http,https`.
 #### [`PAPERLESS_WEBHOOKS_ALLOWED_PORTS=<str>`](#PAPERLESS_WEBHOOKS_ALLOWED_PORTS) {#PAPERLESS_WEBHOOKS_ALLOWED_PORTS}
 : A comma-separated list of allowed ports for webhooks. This setting
 controls which ports are permitted for webhook URLs. For example, if you
 set this to `80,443`, webhooks will only be sent to URLs that use these
 ports.
    Defaults to empty list, which allows all ports.
 #### [`PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS=<bool>`](#PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS) {#PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS}
 : If set to false, webhooks cannot be sent to internal URLs (e.g., localhost).
    Defaults to true, which allows internal requests.
 ### Polling {#polling}
 #### [`PAPERLESS_CONSUMER_POLLING=<num>`](#PAPERLESS_CONSUMER_POLLING) {#PAPERLESS_CONSUMER_POLLING}
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -499,10 +499,6 @@ The following workflow action types are available:
 -   Encoding for the request body, either JSON or form data
 -   The request headers as key-value pairs
 For security reasons, webhooks can be limited to specific ports and disallowed from connecting to local URLs. See the relevant
 [configuration settings](configuration.md#workflow-webhooks) to change this behavior. If you are allowing non-admins to create workflows,
 you may want to adjust these settings to prevent abuse.
 #### Workflow placeholders
 Some workflow text can include placeholders but the available options differ depending on the type of
@@ -580,14 +576,12 @@ The following custom field types are supported:
 ## PDF Actions
-Paperless-ngx supports basic editing operations for PDFs (these operations currently cannot be performed on non-PDF files). When viewing an individual document you can
+Paperless-ngx supports four basic editing operations for PDFs (these operations currently cannot be performed on non-PDF files):
 open the 'PDF Editor' to use a simple UI for re-arranging, rotating, deleting pages and splitting documents.
 -   Merging documents: available when selecting multiple documents for 'bulk editing'.
-   Rotating documents: available when selecting multiple documents for 'bulk editing' and via the pdf editor on an individual document's details page.
+-   Rotating documents: available when selecting multiple documents for 'bulk editing' and from an individual document's details page.
-   Splitting documents: via the pdf editor on an individual document's details page.
+-   Splitting documents: available from an individual document's details page.
-   Deleting pages: via the pdf editor on an individual document's details page.
+-   Deleting pages: available from an individual document's details page.
 -   Re-arranging pages: via the pdf editor on an individual document's details page.
 !!! important
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -63,7 +63,7 @@ dependencies = [
  "redis[hiredis]~=5.2.1",
  "scikit-learn~=1.7.0",
  "setproctitle~=1.3.4",
-  "tika-client~=0.10.0",
+  "tika-client~=0.9.0",
  "tqdm~=4.67.1",
  "watchdog~=6.0",
  "whitenoise~=6.9",
@@ -204,9 +204,15 @@ lint.per-file-ignores."docker/wait-for-redis.py" = [
  "INP001",
  "T201",
 ]
 lint.per-file-ignores."src/documents/file_handling.py" = [
  "PTH",
 ] # TODO Enable & remove
 lint.per-file-ignores."src/documents/management/commands/document_consumer.py" = [
  "PTH",
 ] # TODO Enable & remove
 lint.per-file-ignores."src/documents/management/commands/document_exporter.py" = [
  "PTH",
 ] # TODO Enable & remove
 lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [
  "PTH",
 ] # TODO Enable & remove
@@ -216,6 +222,9 @@ lint.per-file-ignores."src/documents/models.py" = [
 lint.per-file-ignores."src/documents/parsers.py" = [
  "PTH",
 ] # TODO Enable & remove
 lint.per-file-ignores."src/documents/signals/handlers.py" = [
  "PTH",
 ] # TODO Enable & remove
 lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
  "RUF001",
 ]
--- a/src-ui/messages.xlf
+++ b/src-ui/messages.xlf
--- a/src-ui/setup-jest.ts
+++ b/src-ui/setup-jest.ts
@@ -121,26 +121,6 @@ if (!URL.revokeObjectURL) {
 }
 Object.defineProperty(window, 'ResizeObserver', { value: mock() })
 if (typeof IntersectionObserver === 'undefined') {
  class MockIntersectionObserver {
    constructor(
      public callback: IntersectionObserverCallback,
      public options?: IntersectionObserverInit
    ) {}
    observe = jest.fn()
    unobserve = jest.fn()
    disconnect = jest.fn()
    takeRecords = jest.fn()
  }
  Object.defineProperty(window, 'IntersectionObserver', {
    writable: true,
    configurable: true,
    value: MockIntersectionObserver,
  })
 }
 HTMLCanvasElement.prototype.getContext = <
  typeof HTMLCanvasElement.prototype.getContext
 >jest.fn()
--- a/src-ui/src/app/components/admin/config/config.component.html
+++ b/src-ui/src/app/components/admin/config/config.component.html
@@ -50,7 +50,7 @@
    <div [ngbNavOutlet]="nav" class="border-start border-end border-bottom p-3 mb-3 shadow-sm"></div>
    <div class="btn-toolbar" role="toolbar">
        <div class="btn-group me-2">
-            <button type="button" (click)="discardChanges()" class="btn btn-outline-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Discard</button>
+            <button type="button" (click)="discardChanges()" class="btn btn-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Discard</button>
        </div>
        <div class="btn-group">
            <button type="submit" class="btn btn-primary" [disabled]="loading || !configForm.valid || (isDirty$ | async) === false" i18n>Save</button>
--- a/src-ui/src/app/components/admin/settings/settings.component.html
+++ b/src-ui/src/app/components/admin/settings/settings.component.html
@@ -358,6 +358,6 @@
  <div [ngbNavOutlet]="nav" class="border-start border-end border-bottom p-3 mb-3 shadow-sm"></div>
-  <button type="button" (click)="reset()" class="btn btn-outline-secondary mb-2" [disabled]="(isDirty$ | async) === false" i18n>Cancel</button>
+  <button type="submit" class="btn btn-primary mb-2" [disabled]="(isDirty$ | async) === false" i18n>Save</button>
-  <button type="submit" class="btn btn-primary ms-2 mb-2" [disabled]="(isDirty$ | async) === false" i18n>Save</button>
+  <button type="button" (click)="reset()" class="btn btn-secondary ms-2 mb-2" [disabled]="(isDirty$ | async) === false" i18n>Cancel</button>
 </form>
--- a/src-ui/src/app/components/common/confirm-dialog/delete-pages-confirm-dialog/delete-pages-confirm-dialog.component.html
+++ b/src-ui/src/app/components/common/confirm-dialog/delete-pages-confirm-dialog/delete-pages-confirm-dialog.component.html
@@ -0,0 +1,54 @@
 <div class="modal-header">
    <h4 class="modal-title" id="modal-basic-title">{{title}}</h4>
    <button type="button" class="btn-close" aria-label="Close" (click)="cancel()">
    </button>
 </div>
 <div class="modal-body">
    <div class="row">
        <div class="col">
            <div class="btn-toolbar flex-nowrap">
                <div class="input-group input-group-sm">
                    <div class="input-group-text" i18n>Page</div>
                    <input class="form-control mw-60" type="number" min="1" [(ngModel)]="currentPage" />
                    <div class="input-group-text" i18n>of {{totalPages}}</div>
                </div>
                <div class="input-group input-group-sm ms-auto">
                    <span class="input-group-text" i18n>Pages to remove</span>
                    <input [ngModel]="pagesString" class="form-control" disabled />
                </div>
            </div>
            <div class="pdf-viewer-container w-100 mt-3">
                <pdf-viewer #pdfViewer [src]="pdfSrc" [(page)]="currentPage"
                [original-size]="false"
                [zoom]="1"
                zoom-scale="page-fit"
                [render-text]="false"
                (pagerendered)="pageRendered($event)"
                (after-load-complete)="pdfPreviewLoaded($event)">
                </pdf-viewer>
            </div>
        </div>
    </div>
 </div>
 <div class="modal-footer flex-nowrap">
    <div>
        @if (message) {
            <p [innerHTML]="message | safeHtml"></p>
        }
        @if (messageBold) {
            <p class="mb-0 small"><b [innerHTML]="messageBold | safeHtml"></b></p>
        }
    </div>
    <button type="button" class="btn" [class]="cancelBtnClass" (click)="cancel()" [disabled]="!buttonsEnabled">
            <span class="d-inline-block" style="padding-bottom: 1px;">{{cancelBtnCaption}}</span>
        </button>
    <button type="button" class="btn" [class]="btnClass" (click)="confirm()" [disabled]="!confirmButtonEnabled || !buttonsEnabled">
        {{btnCaption}}
    </button>
 </div>
 <ng-template #pageCheckOverlay let-page="page" let-pages="pages">
    <div class="position-absolute top-0 start-0 w-100 h-100 p-2" (click)="pageCheckChanged(page)">
        <input type="checkbox" class="form-check-input" />
    </div>
 </ng-template>
--- a/src-ui/src/app/components/common/confirm-dialog/delete-pages-confirm-dialog/delete-pages-confirm-dialog.component.scss
+++ b/src-ui/src/app/components/common/confirm-dialog/delete-pages-confirm-dialog/delete-pages-confirm-dialog.component.scss
@@ -0,0 +1,28 @@
 .pdf-viewer-container {
  background-color: gray;
  height: 550px;
  pdf-viewer {
    width: 100%;
    height: 100%;
  }
 }
 .mw-60 {
  max-width: 60px;
 }
 div.position-absolute:has(.form-check-input:checked) {
  background-color: rgba(var(--bs-dark-rgb), 0.4);
 }
 .form-check-input {
  &:checked {
    background-color: var(--bs-danger);
    border-color: var(--bs-danger);
  }
  &:focus {
    box-shadow: 0 0 0 0.25rem rgba(var(--bs-danger-rgb), var(--pngx-focus-alpha));
    border-color: var(--bs-danger);
  }
 }
--- a/src-ui/src/app/components/common/confirm-dialog/delete-pages-confirm-dialog/delete-pages-confirm-dialog.component.spec.ts
+++ b/src-ui/src/app/components/common/confirm-dialog/delete-pages-confirm-dialog/delete-pages-confirm-dialog.component.spec.ts
@@ -0,0 +1,60 @@
 import { provideHttpClient, withInterceptorsFromDi } from '@angular/common/http'
 import { provideHttpClientTesting } from '@angular/common/http/testing'
 import { ComponentFixture, TestBed } from '@angular/core/testing'
 import { FormsModule, ReactiveFormsModule } from '@angular/forms'
 import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
 import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
 import { SafeHtmlPipe } from 'src/app/pipes/safehtml.pipe'
 import { DeletePagesConfirmDialogComponent } from './delete-pages-confirm-dialog.component'
 describe('DeletePagesConfirmDialogComponent', () => {
  let component: DeletePagesConfirmDialogComponent
  let fixture: ComponentFixture<DeletePagesConfirmDialogComponent>
  beforeEach(async () => {
    await TestBed.configureTestingModule({
      declarations: [],
      imports: [
        NgxBootstrapIconsModule.pick(allIcons),
        FormsModule,
        ReactiveFormsModule,
        DeletePagesConfirmDialogComponent,
      ],
      providers: [
        NgbActiveModal,
        SafeHtmlPipe,
        provideHttpClient(withInterceptorsFromDi()),
        provideHttpClientTesting(),
      ],
    }).compileComponents()
    fixture = TestBed.createComponent(DeletePagesConfirmDialogComponent)
    component = fixture.componentInstance
    fixture.detectChanges()
  })
  it('should return a string with comma-separated pages', () => {
    component.pages = [1, 2, 3, 4]
    expect(component.pagesString).toEqual('1, 2, 3, 4')
  })
  it('should update totalPages when pdf is loaded', () => {
    component.pdfPreviewLoaded({ numPages: 5 } as any)
    expect(component.totalPages).toEqual(5)
  })
  it('should update checks when page is rendered', () => {
    const event = {
      target: document.createElement('div'),
      detail: { pageNumber: 1 },
    } as any
    component.pageRendered(event)
    expect(component['checks'].length).toEqual(1)
  })
  it('should update pages when page check is changed', () => {
    component.pageCheckChanged(1)
    expect(component.pages).toEqual([1])
    component.pageCheckChanged(1)
    expect(component.pages).toEqual([])
  })
 })
--- a/src-ui/src/app/components/common/confirm-dialog/delete-pages-confirm-dialog/delete-pages-confirm-dialog.component.ts
+++ b/src-ui/src/app/components/common/confirm-dialog/delete-pages-confirm-dialog/delete-pages-confirm-dialog.component.ts
@@ -0,0 +1,69 @@
 import { Component, TemplateRef, ViewChild, inject } from '@angular/core'
 import { FormsModule, ReactiveFormsModule } from '@angular/forms'
 import {
  PDFDocumentProxy,
  PdfViewerComponent,
  PdfViewerModule,
 } from 'ng2-pdf-viewer'
 import { SafeHtmlPipe } from 'src/app/pipes/safehtml.pipe'
 import { DocumentService } from 'src/app/services/rest/document.service'
 import { ConfirmDialogComponent } from '../confirm-dialog.component'
@Component({
  selector: 'pngx-delete-pages-confirm-dialog',
  templateUrl: './delete-pages-confirm-dialog.component.html',
  styleUrl: './delete-pages-confirm-dialog.component.scss',
  imports: [PdfViewerModule, FormsModule, ReactiveFormsModule, SafeHtmlPipe],
 })
 export class DeletePagesConfirmDialogComponent extends ConfirmDialogComponent {
  private documentService = inject(DocumentService)
  public documentID: number
  public pages: number[] = []
  public currentPage: number = 1
  public totalPages: number
  @ViewChild('pdfViewer') pdfViewer: PdfViewerComponent
  @ViewChild('pageCheckOverlay') pageCheckOverlay!: TemplateRef<any>
  private checks: HTMLElement[] = []
  public get pagesString(): string {
    return this.pages.join(', ')
  }
  public get pdfSrc(): string {
    return this.documentService.getPreviewUrl(this.documentID)
  }
  constructor() {
    super()
  }
  public pdfPreviewLoaded(pdf: PDFDocumentProxy) {
    this.totalPages = pdf.numPages
  }
  pageRendered(event: CustomEvent) {
    const pageDiv = event.target as HTMLDivElement
    const check = this.pageCheckOverlay.createEmbeddedView({
      page: event.detail.pageNumber,
    })
    this.checks[event.detail.pageNumber - 1] = check.rootNodes[0]
    pageDiv?.insertBefore(check.rootNodes[0], pageDiv.firstChild)
    this.updateChecks()
  }
  pageCheckChanged(pageNumber: number) {
    if (!this.pages.includes(pageNumber)) this.pages.push(pageNumber)
    else if (this.pages.includes(pageNumber))
      this.pages.splice(this.pages.indexOf(pageNumber), 1)
    this.updateChecks()
  }
  private updateChecks() {
    this.checks.forEach((check, i) => {
      const input = check.getElementsByTagName('input')[0]
      input.checked = this.pages.includes(i + 1)
    })
  }
 }
--- a/src-ui/src/app/components/common/confirm-dialog/split-confirm-dialog/split-confirm-dialog.component.html
+++ b/src-ui/src/app/components/common/confirm-dialog/split-confirm-dialog/split-confirm-dialog.component.html
@@ -0,0 +1,59 @@
 <div class="modal-header">
    <h4 class="modal-title" id="modal-basic-title">{{title}}</h4>
    <button type="button" class="btn-close" aria-label="Close" (click)="cancel()">
    </button>
 </div>
 <div class="modal-body">
    <p>{{message}}</p>
    <div class="row mb-2">
        <div class="col-7">
            <div class="input-group input-group-sm">
                <div class="input-group-text" i18n>Page</div>
                <input class="form-control" type="number" min="1" [(ngModel)]="page" />
                <div class="input-group-text" i18n>of {{totalPages}}</div>
            </div>
            <div class="pdf-viewer-container w-100 mt-3">
                <pdf-viewer [src]="pdfSrc" [(page)]="page"
                [original-size]="false"
                [zoom]="1"
                zoom-scale="page-fit"
                (after-load-complete)="pdfPreviewLoaded($event)">
                </pdf-viewer>
            </div>
        </div>
        <div class="col-5">
            <div class="d-grid">
                <button class="btn btn-sm btn-primary" (click)="addSplit()" [disabled]="!canSplit">
                    <i-bs name="plus-circle"></i-bs>&nbsp;
                    <span i18n>Add Split</span>
                </button>
            </div>
            <ul class="list-group mt-3">
                @for (pageStr of pagesString.split(','); track pageStr; let i = $index) {
                    <li class="list-group-item d-flex align-items-center">
                        {{pageStr}}
                        @if (pagesString.split(',').length > 1) {
                            &nbsp;
                            <button class="btn btn-sm btn-danger ms-auto" (click)="removeSplit(i)">
                                <i-bs name="trash"></i-bs>
                            </button>
                        }
                    </li>
                }
            </ul>
        </div>
    </div>
 </div>
 <div class="modal-footer">
    <div class="form-check form-switch me-auto">
       <input class="form-check-input" type="checkbox" role="switch" id="deleteOriginalSwitch" [(ngModel)]="deleteOriginal" [disabled]="!userOwnsDocument">
       <label class="form-check-label" for="deleteOriginalSwitch" i18n>Delete original document after successful split</label>
     </div>
    <button type="button" class="btn" [class]="cancelBtnClass" (click)="cancel()" [disabled]="!buttonsEnabled">
            <span class="d-inline-block" style="padding-bottom: 1px;">{{cancelBtnCaption}}</span>
        </button>
    <button type="button" class="btn" [class]="btnClass" (click)="confirm()" [disabled]="!confirmButtonEnabled || !buttonsEnabled">
        {{btnCaption}}
    </button>
 </div>
--- a/src-ui/src/app/components/common/confirm-dialog/split-confirm-dialog/split-confirm-dialog.component.scss
+++ b/src-ui/src/app/components/common/confirm-dialog/split-confirm-dialog/split-confirm-dialog.component.scss
@@ -0,0 +1,9 @@
 .pdf-viewer-container {
    background-color: gray;
    height: 500px;
    pdf-viewer {
      width: 100%;
      height: 100%;
    }
  }
--- a/src-ui/src/app/components/common/confirm-dialog/split-confirm-dialog/split-confirm-dialog.component.spec.ts
+++ b/src-ui/src/app/components/common/confirm-dialog/split-confirm-dialog/split-confirm-dialog.component.spec.ts
@@ -0,0 +1,107 @@
 import { ComponentFixture, TestBed } from '@angular/core/testing'
 import { provideHttpClient, withInterceptorsFromDi } from '@angular/common/http'
 import { provideHttpClientTesting } from '@angular/common/http/testing'
 import { FormsModule, ReactiveFormsModule } from '@angular/forms'
 import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
 import { PdfViewerModule } from 'ng2-pdf-viewer'
 import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
 import { of } from 'rxjs'
 import { DocumentService } from 'src/app/services/rest/document.service'
 import { SplitConfirmDialogComponent } from './split-confirm-dialog.component'
 describe('SplitConfirmDialogComponent', () => {
  let component: SplitConfirmDialogComponent
  let fixture: ComponentFixture<SplitConfirmDialogComponent>
  let documentService: DocumentService
  beforeEach(async () => {
    await TestBed.configureTestingModule({
      imports: [
        NgxBootstrapIconsModule.pick(allIcons),
        ReactiveFormsModule,
        FormsModule,
        PdfViewerModule,
        SplitConfirmDialogComponent,
      ],
      providers: [
        NgbActiveModal,
        provideHttpClient(withInterceptorsFromDi()),
        provideHttpClientTesting(),
      ],
    }).compileComponents()
    fixture = TestBed.createComponent(SplitConfirmDialogComponent)
    documentService = TestBed.inject(DocumentService)
    component = fixture.componentInstance
    fixture.detectChanges()
  })
  it('should load document on init', () => {
    const getSpy = jest.spyOn(documentService, 'get')
    component.documentID = 1
    getSpy.mockReturnValue(of({ id: 1 } as any))
    component.ngOnInit()
    expect(documentService.get).toHaveBeenCalledWith(1)
  })
  it('should update pagesString when pages are added', () => {
    component.totalPages = 5
    component.page = 2
    component.addSplit()
    expect(component.pagesString).toEqual('1-2,3-5')
    component.page = 4
    component.addSplit()
    expect(component.pagesString).toEqual('1-2,3-4,5')
  })
  it('should update pagesString when pages are removed', () => {
    component.totalPages = 5
    component.page = 2
    component.addSplit()
    component.page = 4
    component.addSplit()
    expect(component.pagesString).toEqual('1-2,3-4,5')
    component.removeSplit(0)
    expect(component.pagesString).toEqual('1-4,5')
  })
  it('should enable confirm button when pages are added', () => {
    component.totalPages = 5
    component.page = 2
    component.addSplit()
    expect(component.confirmButtonEnabled).toBeTruthy()
  })
  it('should disable confirm button when all pages are removed', () => {
    component.totalPages = 5
    component.page = 2
    component.addSplit()
    component.removeSplit(0)
    expect(component.confirmButtonEnabled).toBeFalsy()
  })
  it('should not add split if page is the last page', () => {
    component.totalPages = 5
    component.page = 5
    component.addSplit()
    expect(component.pagesString).toEqual('1-5')
  })
  it('should update totalPages when pdf is loaded', () => {
    component.pdfPreviewLoaded({ numPages: 5 } as any)
    expect(component.totalPages).toEqual(5)
  })
  it('should correctly disable split button', () => {
    component.totalPages = 5
    component.page = 1
    expect(component.canSplit).toBeTruthy()
    component.page = 5
    expect(component.canSplit).toBeFalsy()
    component.page = 4
    expect(component.canSplit).toBeTruthy()
    component['pages'] = new Set([1, 2, 3, 4])
    expect(component.canSplit).toBeFalsy()
  })
 })
--- a/src-ui/src/app/components/common/confirm-dialog/split-confirm-dialog/split-confirm-dialog.component.ts
+++ b/src-ui/src/app/components/common/confirm-dialog/split-confirm-dialog/split-confirm-dialog.component.ts
@@ -0,0 +1,98 @@
 import { Component, OnInit, inject } from '@angular/core'
 import { FormsModule, ReactiveFormsModule } from '@angular/forms'
 import { PDFDocumentProxy, PdfViewerModule } from 'ng2-pdf-viewer'
 import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
 import { Document } from 'src/app/data/document'
 import { PermissionsService } from 'src/app/services/permissions.service'
 import { DocumentService } from 'src/app/services/rest/document.service'
 import { ConfirmDialogComponent } from '../confirm-dialog.component'
@Component({
  selector: 'pngx-split-confirm-dialog',
  templateUrl: './split-confirm-dialog.component.html',
  styleUrl: './split-confirm-dialog.component.scss',
  imports: [
    FormsModule,
    ReactiveFormsModule,
    NgxBootstrapIconsModule,
    PdfViewerModule,
  ],
 })
 export class SplitConfirmDialogComponent
  extends ConfirmDialogComponent
  implements OnInit
 {
  private documentService = inject(DocumentService)
  private permissionService = inject(PermissionsService)
  public get pagesString(): string {
    let pagesStr = ''
    let lastPage = 1
    for (let i = 1; i <= this.totalPages; i++) {
      if (this.pages.has(i) || i === this.totalPages) {
        if (lastPage === i) {
          pagesStr += `${i},`
          lastPage = Math.min(i + 1, this.totalPages)
        } else {
          pagesStr += `${lastPage}-${i},`
          lastPage = Math.min(i + 1, this.totalPages)
        }
      }
    }
    return pagesStr.replace(/,$/, '')
  }
  private pages: Set<number> = new Set()
  public documentID: number
  private document: Document
  public page: number = 1
  public totalPages: number
  public deleteOriginal: boolean = false
  public get canSplit(): boolean {
    return (
      this.page < this.totalPages &&
      this.pages.size < this.totalPages - 1 &&
      !this.pages.has(this.page)
    )
  }
  public get pdfSrc(): string {
    return this.documentService.getPreviewUrl(this.documentID)
  }
  constructor() {
    super()
    this.confirmButtonEnabled = this.pages.size > 0
  }
  ngOnInit(): void {
    this.documentService.get(this.documentID).subscribe((r) => {
      this.document = r
    })
  }
  pdfPreviewLoaded(pdf: PDFDocumentProxy) {
    this.totalPages = pdf.numPages
  }
  addSplit() {
    if (this.page === this.totalPages) return
    this.pages.add(this.page)
    this.pages = new Set(Array.from(this.pages).sort((a, b) => a - b))
    this.confirmButtonEnabled = this.pages.size > 0
  }
  removeSplit(i: number) {
    let page = Array.from(this.pages)[Math.min(i, this.pages.size - 1)]
    this.pages.delete(page)
    this.confirmButtonEnabled = this.pages.size > 0
  }
  get userOwnsDocument(): boolean {
    return this.permissionService.currentUserOwnsObject(this.document)
  }
 }
--- a/src-ui/src/app/components/common/pdf-editor/pdf-editor.component.html
+++ b/src-ui/src/app/components/common/pdf-editor/pdf-editor.component.html
@@ -1,103 +0,0 @@
 <pdf-viewer [src]="pdfSrc" [render-text]="false" zoom="0.4" (after-load-complete)="pdfLoaded($event)"></pdf-viewer>
 <div class="modal-header">
  <h4 class="modal-title">{{ title }}</h4>
  <button type="button" class="btn-close" aria-label="Close" (click)="cancel()"></button>
 </div>
 <div class="modal-body">
  <div class="btn-toolbar mb-2">
    <div class="btn-group me-3">
      <button class="btn btn-sm btn-secondary" (click)="selectAll()" title="Select all pages" i18n-title>
        <i-bs name="check-all"></i-bs>
      </button>
      <button class="btn btn-sm btn-secondary" (click)="deselectAll()" [disabled]="!hasSelection()" title="Deselect all pages" i18n-title>
        <i-bs name="x"></i-bs>
      </button>
    </div>
    <div class="btn-group">
      <button class="btn btn-sm btn-secondary" (click)="rotateSelected(-90)" [disabled]="!hasSelection()" title="Rotate selected pages counter-clockwise" i18n-title>
        <i-bs name="arrow-counterclockwise"></i-bs>
      </button>
      <button class="btn btn-sm btn-secondary" (click)="rotateSelected(90)" [disabled]="!hasSelection()" title="Rotate selected pages clockwise" i18n-title>
        <i-bs name="arrow-clockwise"></i-bs>
      </button>
      <button class="btn btn-sm btn-danger" (click)="deleteSelected()" [disabled]="!hasSelection()" title="Delete selected pages" i18n-title>
        <i-bs name="trash"></i-bs>
      </button>
    </div>
  </div>
  <div cdkDropList (cdkDropListDropped)="drop($event)" cdkDropListOrientation="mixed" class="d-flex flex-wrap row-cols-5">
    @for (p of pages; track p.page; let i = $index) {
      <div class="page-item rounded p-2" cdkDrag (click)="toggleSelection(i)" [class.selected]="p.selected">
        <div class="btn-toolbar hover-actions z-10">
          <div class="btn-group me-2">
            <button class="btn btn-sm btn-dark" (click)="rotate(i); $event.stopPropagation()" title="Rotate page counter-clockwise" i18n-title>
              <i-bs name="arrow-counterclockwise"></i-bs>
            </button>
            <button class="btn btn-sm btn-dark" (click)="rotate(i); $event.stopPropagation()" title="Rotate page clockwise" i18n-title>
              <i-bs name="arrow-clockwise"></i-bs>
            </button>
          </div>
          <div class="btn-group">
            <button class="btn btn-sm btn-dark text-danger" (click)="remove(i); $event.stopPropagation()" title="Delete page" i18n-title>
              <i-bs name="trash"></i-bs>
            </button>
            <button class="btn btn-sm btn-dark" (click)="toggleSplit(i); $event.stopPropagation()" title="Add / remove document split here" i18n-title>
              <i-bs name="scissors"></i-bs>
            </button>
          </div>
        </div>
        <div class="border-end border-bottom bg-light py-1 px-2 document-check z-10">
          <div class="form-check">
            <input type="checkbox" class="form-check-input" id="page{{i}}" [checked]="p.selected" (click)="toggleSelection(i); $event.stopPropagation()">
            <label class="form-check-label" for="page{{i}}"></label>
          </div>
        </div>
        <div class="pdf-viewer-container w-100" [class.selected]="p.selected">
          @defer (on viewport) {
            @if (!p.loaded) {
              <div class="placeholder-glow w-100 h-100 z-10">
                <span class="placeholder w-100 h-100"></span>
              </div>
            }
            <pdf-viewer class="fade" [class.show]="p.loaded" [src]="pdfSrc" [page]="p.page" [rotation]="p.rotate" [original-size]="false" [show-all]="false" [render-text]="false" (page-rendered)="p.loaded = true"></pdf-viewer>
          } @placeholder {
            <div class="placeholder-glow w-100 h-100 z-10">
              <span class="placeholder w-100 h-100"></span>
            </div>
          }
        </div>
        @if (p.splitAfter) {
          <div class="split-after rounded position-absolute top-0 end-0 bg-dark text-uppercase text-center h-100 px-1 small fw-bold">&mdash; <span i18n>Split here</span> &mdash;</div>
        }
      </div>
    }
  </div>
 </div>
 <div class="modal-footer flex-column">
  <div class="d-flex w-100 justify-content-between align-items-center">
    <div class="btn-group" role="group">
      <input type="radio" class="btn-check" [(ngModel)]="editMode" [value]="PdfEditorEditMode.Create" id="editModeCreate" name="editmode">
      <label for="editModeCreate" class="btn btn-outline-primary btn-sm">
        <i-bs name="plus"></i-bs>
        <span class="form-check-label ms-1" i18n>Create new document(s)</span>
      </label>
      <input type="radio" class="btn-check" [(ngModel)]="editMode" [value]="PdfEditorEditMode.Update" id="editModeUpdate" name="editmode" [disabled]="hasSplit()">
      <label for="editModeUpdate" class="btn btn-outline-primary btn-sm">
        <i-bs name="pencil"></i-bs>
        <span class="form-check-label ms-2" i18n>Update existing document</span>
      </label>
    </div>
    @if (editMode === PdfEditorEditMode.Create) {
      <div class="form-check ms-3">
        <input class="form-check-input" type="checkbox" id="copyMeta" [(ngModel)]="includeMetadata">
        <label class="form-check-label" for="copyMeta" i18n>Copy metadata</label>
      </div>
      <div class="form-check ms-3">
        <input class="form-check-input" type="checkbox" id="deleteOriginal" [(ngModel)]="deleteOriginal">
        <label class="form-check-label" for="deleteOriginal" i18n>Delete original</label>
      </div>
    }
    <button type="button" class="btn ms-auto me-2" [class]="cancelBtnClass" (click)="cancel()" [disabled]="!buttonsEnabled">{{ cancelBtnCaption }}</button>
    <button type="button" class="btn" [class]="btnClass" (click)="confirm()" [disabled]="pages.length === 0">{{ btnCaption }}</button>
  </div>
 </div>
--- a/src-ui/src/app/components/common/pdf-editor/pdf-editor.component.scss
+++ b/src-ui/src/app/components/common/pdf-editor/pdf-editor.component.scss
@@ -1,70 +0,0 @@
 .page-item {
  position: relative;
  cursor: pointer;
  border: 1px solid transparent;
  background-origin: border-box;
  &.selected {
    background-color: var(--pngx-primary-darken-5);
  }
 }
 .pdf-viewer-container {
  background-color: gray;
  height: 240px;
  pdf-viewer {
    width: 100%;
    height: 100%;
  }
 }
 ::ng-deep .ng2-pdf-viewer-container {
  overflow: hidden;
 }
 .hover-actions {
  position: absolute;
  top: 0;
  right: 0;
  display: none;
 }
 .page-item:hover .hover-actions {
  display: block;
 }
 .document-check {
  display: none;
  position: absolute;
  top: 0;
  left: 0;
  padding: 0.5rem;
  border-top-left-radius: 0.25rem;
  border-bottom-right-radius: 0.25rem;
  pointer-events: none;
  .form-check {
    padding: 0;
    min-height: 0;
    margin-bottom: 0;
    .form-check-input {
      margin-left: 0;
    }
  }
 }
 .page-item:hover .document-check, .selected .document-check {
  display: block;
 }
 .z-10 {
    z-index: 10;
 }
 .split-after {
  writing-mode: vertical-rl;
 }
--- a/src-ui/src/app/components/common/pdf-editor/pdf-editor.component.spec.ts
+++ b/src-ui/src/app/components/common/pdf-editor/pdf-editor.component.spec.ts
@@ -1,142 +0,0 @@
 import { provideHttpClient, withInterceptorsFromDi } from '@angular/common/http'
 import { provideHttpClientTesting } from '@angular/common/http/testing'
 import { ComponentFixture, TestBed } from '@angular/core/testing'
 import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
 import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
 import { PDFEditorComponent } from './pdf-editor.component'
 describe('PDFEditorComponent', () => {
  let component: PDFEditorComponent
  let fixture: ComponentFixture<PDFEditorComponent>
  beforeEach(async () => {
    await TestBed.configureTestingModule({
      imports: [PDFEditorComponent, NgxBootstrapIconsModule.pick(allIcons)],
      providers: [
        provideHttpClient(withInterceptorsFromDi()),
        provideHttpClientTesting(),
        { provide: NgbActiveModal, useValue: {} },
      ],
    }).compileComponents()
    fixture = TestBed.createComponent(PDFEditorComponent)
    component = fixture.componentInstance
    fixture.detectChanges()
  })
  it('should return correct operations with no changes', () => {
    component.pages = [
      { page: 1, rotate: 0, splitAfter: false },
      { page: 2, rotate: 0, splitAfter: false },
      { page: 3, rotate: 0, splitAfter: false },
    ]
    const ops = component.getOperations()
    expect(ops).toEqual([
      { page: 1, rotate: 0, doc: 0 },
      { page: 2, rotate: 0, doc: 0 },
      { page: 3, rotate: 0, doc: 0 },
    ])
  })
  it('should rotate, delete and reorder pages', () => {
    component.pages = [
      { page: 1, rotate: 0, splitAfter: false, selected: false },
      { page: 2, rotate: 0, splitAfter: false, selected: false },
    ]
    component.toggleSelection(0)
    component.rotateSelected(90)
    expect(component.pages[0].rotate).toBe(90)
    component.toggleSelection(0) // deselect
    component.toggleSelection(1)
    component.deleteSelected()
    expect(component.pages.length).toBe(1)
    component.pages.push({ page: 2, rotate: 0, splitAfter: false })
    component.drop({ previousIndex: 0, currentIndex: 1 } as any)
    expect(component.pages[0].page).toBe(2)
    component.rotate(0)
    expect(component.pages[0].rotate).toBe(90)
  })
  it('should handle empty pages array', () => {
    component.pages = []
    expect(component.getOperations()).toEqual([])
  })
  it('should increment doc index after splitAfter', () => {
    component.pages = [
      { page: 1, rotate: 0, splitAfter: true },
      { page: 2, rotate: 0, splitAfter: false },
      { page: 3, rotate: 0, splitAfter: true },
      { page: 4, rotate: 0, splitAfter: false },
    ]
    const ops = component.getOperations()
    expect(ops).toEqual([
      { page: 1, rotate: 0, doc: 0 },
      { page: 2, rotate: 0, doc: 1 },
      { page: 3, rotate: 0, doc: 1 },
      { page: 4, rotate: 0, doc: 2 },
    ])
  })
  it('should include rotations in operations', () => {
    component.pages = [
      { page: 1, rotate: 90, splitAfter: false },
      { page: 2, rotate: 180, splitAfter: true },
      { page: 3, rotate: 270, splitAfter: false },
    ]
    const ops = component.getOperations()
    expect(ops).toEqual([
      { page: 1, rotate: 90, doc: 0 },
      { page: 2, rotate: 180, doc: 0 },
      { page: 3, rotate: 270, doc: 1 },
    ])
  })
  it('should handle remove operation', () => {
    component.pages = [
      { page: 1, rotate: 0, splitAfter: false, selected: false },
      { page: 2, rotate: 0, splitAfter: false, selected: true },
      { page: 3, rotate: 0, splitAfter: false, selected: false },
    ]
    component.remove(1) // remove page 2
    expect(component.pages.length).toBe(2)
    expect(component.pages[0].page).toBe(1)
    expect(component.pages[1].page).toBe(3)
  })
  it('should toggle splitAfter correctly', () => {
    component.pages = [
      { page: 1, rotate: 0, splitAfter: false },
      { page: 2, rotate: 0, splitAfter: false },
    ]
    component.toggleSplit(0)
    expect(component.pages[0].splitAfter).toBeTruthy()
    component.toggleSplit(1)
    expect(component.pages[1].splitAfter).toBeTruthy()
  })
  it('should select and deselect all pages', () => {
    component.pages = [
      { page: 1, rotate: 0, splitAfter: false, selected: false },
      { page: 2, rotate: 0, splitAfter: false, selected: false },
    ]
    component.selectAll()
    expect(component.pages.every((p) => p.selected)).toBeTruthy()
    expect(component.hasSelection()).toBeTruthy()
    component.deselectAll()
    expect(component.pages.every((p) => !p.selected)).toBeTruthy()
    expect(component.hasSelection()).toBeFalsy()
  })
  it('should handle pdf loading and page generation', () => {
    const mockPdf = {
      numPages: 3,
      getPage: (pageNum: number) => Promise.resolve({ pageNumber: pageNum }),
    }
    component.pdfLoaded(mockPdf as any)
    expect(component.totalPages).toBe(3)
    expect(component.pages.length).toBe(3)
    expect(component.pages[0].page).toBe(1)
    expect(component.pages[1].page).toBe(2)
    expect(component.pages[2].page).toBe(3)
  })
 })
--- a/src-ui/src/app/components/common/pdf-editor/pdf-editor.component.ts
+++ b/src-ui/src/app/components/common/pdf-editor/pdf-editor.component.ts
@@ -1,133 +0,0 @@
 import {
  CdkDragDrop,
  DragDropModule,
  moveItemInArray,
 } from '@angular/cdk/drag-drop'
 import { Component, inject } from '@angular/core'
 import { FormsModule } from '@angular/forms'
 import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
 import { PDFDocumentProxy, PdfViewerModule } from 'ng2-pdf-viewer'
 import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
 import { DocumentService } from 'src/app/services/rest/document.service'
 import { ConfirmDialogComponent } from '../confirm-dialog/confirm-dialog.component'
 interface PageOperation {
  page: number
  rotate: number
  splitAfter: boolean
  selected?: boolean
  loaded?: boolean
 }
 export enum PdfEditorEditMode {
  Update = 'update',
  Create = 'create',
 }
@Component({
  selector: 'pngx-pdf-editor',
  templateUrl: './pdf-editor.component.html',
  styleUrl: './pdf-editor.component.scss',
  imports: [
    DragDropModule,
    FormsModule,
    PdfViewerModule,
    NgxBootstrapIconsModule,
  ],
 })
 export class PDFEditorComponent extends ConfirmDialogComponent {
  public PdfEditorEditMode = PdfEditorEditMode
  private documentService = inject(DocumentService)
  activeModal: NgbActiveModal = inject(NgbActiveModal)
  documentID: number
  pages: PageOperation[] = []
  totalPages = 0
  editMode: PdfEditorEditMode = PdfEditorEditMode.Create
  deleteOriginal: boolean = false
  includeMetadata: boolean = true
  get pdfSrc(): string {
    return this.documentService.getPreviewUrl(this.documentID)
  }
  pdfLoaded(pdf: PDFDocumentProxy) {
    this.totalPages = pdf.numPages
    this.pages = Array.from({ length: this.totalPages }, (_, i) => ({
      page: i + 1,
      rotate: 0,
      splitAfter: false,
      selected: false,
      loaded: false,
    }))
  }
  toggleSelection(i: number) {
    this.pages[i].selected = !this.pages[i].selected
  }
  rotate(i: number) {
    this.pages[i].rotate = (this.pages[i].rotate + 90) % 360
  }
  rotateSelected(dir: number) {
    for (let p of this.pages) {
      if (p.selected) {
        p.rotate = (p.rotate + dir + 360) % 360
      }
    }
  }
  remove(i: number) {
    this.pages.splice(i, 1)
  }
  toggleSplit(i: number) {
    this.pages[i].splitAfter = !this.pages[i].splitAfter
    if (this.pages[i].splitAfter) {
      // force create mode
      this.editMode = PdfEditorEditMode.Create
    }
  }
  selectAll() {
    this.pages.forEach((p) => (p.selected = true))
  }
  deselectAll() {
    this.pages.forEach((p) => (p.selected = false))
  }
  deleteSelected() {
    this.pages = this.pages.filter((p) => !p.selected)
  }
  hasSelection(): boolean {
    return this.pages.some((p) => p.selected)
  }
  hasSplit(): boolean {
    return this.pages.some((p) => p.splitAfter)
  }
  drop(event: CdkDragDrop<PageOperation[]>) {
    moveItemInArray(this.pages, event.previousIndex, event.currentIndex)
  }
  getOperations() {
    return this.pages.map((p, idx) => ({
      page: p.page,
      rotate: p.rotate,
      doc: this.computeDocIndex(idx),
    }))
  }
  private computeDocIndex(index: number): number {
    let docIndex = 0
    for (let i = 0; i <= index; i++) {
      if (this.pages[i].splitAfter && i < index) docIndex++
    }
    return docIndex
  }
 }
--- a/src-ui/src/app/components/document-detail/document-detail.component.html
+++ b/src-ui/src/app/components/document-detail/document-detail.component.html
@@ -58,8 +58,16 @@
        <i-bs width="1em" height="1em" name="diagram-3"></i-bs>&nbsp;<span i18n>More like this</span>
      </button>
-      <button ngbDropdownItem (click)="editPdf()" [disabled]="!userIsOwner || !userCanEdit || originalContentRenderType !== ContentRenderType.PDF">
+      <button ngbDropdownItem (click)="splitDocument()" [disabled]="!userCanAdd || originalContentRenderType !== ContentRenderType.PDF || previewNumPages === 1">
-        <i-bs name="pencil"></i-bs>&nbsp;<ng-container i18n>PDF Editor</ng-container>
+        <i-bs width="1em" height="1em" name="scissors"></i-bs>&nbsp;<span i18n>Split</span>
      </button>
      <button ngbDropdownItem (click)="rotateDocument()" [disabled]="!userIsOwner || !userCanEdit || originalContentRenderType !== ContentRenderType.PDF">
        <i-bs name="arrow-clockwise"></i-bs>&nbsp;<ng-container i18n>Rotate</ng-container>
      </button>
      <button ngbDropdownItem (click)="deletePages()" [disabled]="!userIsOwner || !userCanEdit || originalContentRenderType !== ContentRenderType.PDF || previewNumPages === 1">
        <i-bs name="file-earmark-minus"></i-bs>&nbsp;<ng-container i18n>Delete page(s)</ng-container>
      </button>
    </div>
  </div>
--- a/src-ui/src/app/components/document-detail/document-detail.component.spec.ts
+++ b/src-ui/src/app/components/document-detail/document-detail.component.spec.ts
@@ -1158,43 +1158,81 @@ describe('DocumentDetailComponent', () => {
    ).not.toBeUndefined()
  })
-  it('should support pdf editor, handle error', () => {
+  it('should support split', () => {
    let modal: NgbModalRef
    modalService.activeInstances.subscribe((m) => (modal = m[0]))
    const closeSpy = jest.spyOn(openDocumentsService, 'closeDocument')
    const errorSpy = jest.spyOn(toastService, 'showError')
    initNormally()
-    component.editPdf()
+    component.splitDocument()
    expect(modal).not.toBeUndefined()
    modal.componentInstance.documentID = doc.id
-    modal.componentInstance.pages = [{ page: 1, rotate: 0, splitAfter: false }]
+    modal.componentInstance.totalPages = 5
    modal.componentInstance.page = 2
    modal.componentInstance.addSplit()
    modal.componentInstance.confirm()
    let req = httpTestingController.expectOne(
      `${environment.apiBaseUrl}documents/bulk_edit/`
    )
    expect(req.request.body).toEqual({
      documents: [doc.id],
-      method: 'edit_pdf',
+      method: 'split',
-      parameters: {
+      parameters: { pages: '1-2,3-5', delete_originals: false },
        operations: [{ page: 1, rotate: 0, doc: 0 }],
        delete_original: false,
        update_document: false,
        include_metadata: true,
      },
    })
-    req.error(new ErrorEvent('failed'))
+    req.error(new ProgressEvent('failed'))
-    expect(errorSpy).toHaveBeenCalled()
+    modal.componentInstance.confirm()
-
+    req = httpTestingController.expectOne(
-    component.editPdf()
+      `${environment.apiBaseUrl}documents/bulk_edit/`
-    modal.componentInstance.documentID = doc.id
+    )
-    modal.componentInstance.pages = [{ page: 1, rotate: 0, splitAfter: true }]
+    req.flush(true)
-    modal.componentInstance.deleteOriginal = true
+  })
  it('should support rotate', () => {
    let modal: NgbModalRef
    modalService.activeInstances.subscribe((m) => (modal = m[0]))
    initNormally()
    component.rotateDocument()
    expect(modal).not.toBeUndefined()
    modal.componentInstance.documentID = doc.id
    modal.componentInstance.rotate()
    modal.componentInstance.confirm()
    let req = httpTestingController.expectOne(
      `${environment.apiBaseUrl}documents/bulk_edit/`
    )
    expect(req.request.body).toEqual({
      documents: [doc.id],
      method: 'rotate',
      parameters: { degrees: 90 },
    })
    req.error(new ProgressEvent('failed'))
    modal.componentInstance.confirm()
    req = httpTestingController.expectOne(
      `${environment.apiBaseUrl}documents/bulk_edit/`
    )
    req.flush(true)
  })
  it('should support delete pages', () => {
    let modal: NgbModalRef
    modalService.activeInstances.subscribe((m) => (modal = m[0]))
    initNormally()
    component.deletePages()
    expect(modal).not.toBeUndefined()
    modal.componentInstance.documentID = doc.id
    modal.componentInstance.pages = [1, 2]
    modal.componentInstance.confirm()
    let req = httpTestingController.expectOne(
      `${environment.apiBaseUrl}documents/bulk_edit/`
    )
    expect(req.request.body).toEqual({
      documents: [doc.id],
      method: 'delete_pages',
      parameters: { pages: [1, 2] },
    })
    req.error(new ProgressEvent('failed'))
    modal.componentInstance.confirm()
    req = httpTestingController.expectOne(
      `${environment.apiBaseUrl}documents/bulk_edit/`
    )
    req.flush(true)
    expect(closeSpy).toHaveBeenCalled()
  })
  it('should support keyboard shortcuts', () => {
--- a/src-ui/src/app/components/document-detail/document-detail.component.ts
+++ b/src-ui/src/app/components/document-detail/document-detail.component.ts
@@ -82,6 +82,9 @@ import { getFilenameFromContentDisposition } from 'src/app/utils/http'
 import { ISODateAdapter } from 'src/app/utils/ngb-iso-date-adapter'
 import * as UTIF from 'utif'
 import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component'
 import { DeletePagesConfirmDialogComponent } from '../common/confirm-dialog/delete-pages-confirm-dialog/delete-pages-confirm-dialog.component'
 import { RotateConfirmDialogComponent } from '../common/confirm-dialog/rotate-confirm-dialog/rotate-confirm-dialog.component'
 import { SplitConfirmDialogComponent } from '../common/confirm-dialog/split-confirm-dialog/split-confirm-dialog.component'
 import { CustomFieldsDropdownComponent } from '../common/custom-fields-dropdown/custom-fields-dropdown.component'
 import { CorrespondentEditDialogComponent } from '../common/edit-dialog/correspondent-edit-dialog/correspondent-edit-dialog.component'
 import { DocumentTypeEditDialogComponent } from '../common/edit-dialog/document-type-edit-dialog/document-type-edit-dialog.component'
@@ -99,10 +102,6 @@ import { TagsComponent } from '../common/input/tags/tags.component'
 import { TextComponent } from '../common/input/text/text.component'
 import { UrlComponent } from '../common/input/url/url.component'
 import { PageHeaderComponent } from '../common/page-header/page-header.component'
 import {
  PDFEditorComponent,
  PdfEditorEditMode,
 } from '../common/pdf-editor/pdf-editor.component'
 import { ShareLinksDialogComponent } from '../common/share-links-dialog/share-links-dialog.component'
 import { DocumentHistoryComponent } from '../document-history/document-history.component'
 import { DocumentNotesComponent } from '../document-notes/document-notes.component'
@@ -1350,13 +1349,13 @@ export class DocumentDetailComponent
    this.documentForm.updateValueAndValidity()
  }
-  editPdf() {
+  splitDocument() {
-    let modal = this.modalService.open(PDFEditorComponent, {
+    let modal = this.modalService.open(SplitConfirmDialogComponent, {
      backdrop: 'static',
-      size: 'xl',
+      size: 'lg',
      scrollable: true,
    })
-    modal.componentInstance.title = $localize`PDF Editor`
+    modal.componentInstance.title = $localize`Split confirm`
    modal.componentInstance.messageBold = $localize`This operation will split the selected document(s) into new documents.`
    modal.componentInstance.btnCaption = $localize`Proceed`
    modal.componentInstance.documentID = this.document.id
    modal.componentInstance.confirmClicked
@@ -1364,30 +1363,103 @@ export class DocumentDetailComponent
      .subscribe(() => {
        modal.componentInstance.buttonsEnabled = false
        this.documentsService
-          .bulkEdit([this.document.id], 'edit_pdf', {
+          .bulkEdit([this.document.id], 'split', {
-            operations: modal.componentInstance.getOperations(),
+            pages: modal.componentInstance.pagesString,
-            delete_original: modal.componentInstance.deleteOriginal,
+            delete_originals: modal.componentInstance.deleteOriginal,
            update_document:
              modal.componentInstance.editMode == PdfEditorEditMode.Update,
            include_metadata: modal.componentInstance.includeMetadata,
          })
          .pipe(first(), takeUntil(this.unsubscribeNotifier))
          .subscribe({
            next: () => {
              this.toastService.showInfo(
-                $localize`PDF edit operation for "${this.document.title}" will begin in the background.`
+                $localize`Split operation for "${this.document.title}" will begin in the background.`
              )
              modal.close()
              if (modal.componentInstance.deleteOriginal) {
                this.openDocumentService.closeDocument(this.document)
              }
            },
            error: (error) => {
              if (modal) {
                modal.componentInstance.buttonsEnabled = true
              }
              this.toastService.showError(
-                $localize`Error executing PDF edit operation`,
+                $localize`Error executing split operation`,
                error
              )
            },
          })
      })
  }
  rotateDocument() {
    let modal = this.modalService.open(RotateConfirmDialogComponent, {
      backdrop: 'static',
      size: 'lg',
    })
    modal.componentInstance.title = $localize`Rotate confirm`
    modal.componentInstance.messageBold = $localize`This operation will permanently rotate the original version of the current document.`
    modal.componentInstance.btnCaption = $localize`Proceed`
    modal.componentInstance.documentID = this.document.id
    modal.componentInstance.showPDFNote = false
    modal.componentInstance.confirmClicked
      .pipe(takeUntil(this.unsubscribeNotifier))
      .subscribe(() => {
        modal.componentInstance.buttonsEnabled = false
        this.documentsService
          .bulkEdit([this.document.id], 'rotate', {
            degrees: modal.componentInstance.degrees,
          })
          .pipe(first(), takeUntil(this.unsubscribeNotifier))
          .subscribe({
            next: () => {
              this.toastService.show({
                content: $localize`Rotation of "${this.document.title}" will begin in the background. Close and re-open the document after the operation has completed to see the changes.`,
                delay: 8000,
                action: this.close.bind(this),
                actionName: $localize`Close`,
              })
              modal.close()
            },
            error: (error) => {
              if (modal) {
                modal.componentInstance.buttonsEnabled = true
              }
              this.toastService.showError(
                $localize`Error executing rotate operation`,
                error
              )
            },
          })
      })
  }
  deletePages() {
    let modal = this.modalService.open(DeletePagesConfirmDialogComponent, {
      backdrop: 'static',
    })
    modal.componentInstance.title = $localize`Delete pages confirm`
    modal.componentInstance.messageBold = $localize`This operation will permanently delete the selected pages from the original document.`
    modal.componentInstance.btnCaption = $localize`Proceed`
    modal.componentInstance.documentID = this.document.id
    modal.componentInstance.confirmClicked
      .pipe(takeUntil(this.unsubscribeNotifier))
      .subscribe(() => {
        modal.componentInstance.buttonsEnabled = false
        this.documentsService
          .bulkEdit([this.document.id], 'delete_pages', {
            pages: modal.componentInstance.pages,
          })
          .pipe(first(), takeUntil(this.unsubscribeNotifier))
          .subscribe({
            next: () => {
              this.toastService.showInfo(
                $localize`Delete pages operation for "${this.document.title}" will begin in the background. Close and re-open or reload this document after the operation has completed to see the changes.`
              )
              modal.close()
            },
            error: (error) => {
              if (modal) {
                modal.componentInstance.buttonsEnabled = true
              }
              this.toastService.showError(
                $localize`Error executing delete pages operation`,
                error
              )
            },
--- a/src-ui/src/app/components/manage/management-list/management-list.component.spec.ts
+++ b/src-ui/src/app/components/manage/management-list/management-list.component.spec.ts
@@ -164,7 +164,7 @@ describe('ManagementListComponent', () => {
    const toastInfoSpy = jest.spyOn(toastService, 'showInfo')
    const reloadSpy = jest.spyOn(component, 'reloadData')
-    const createButton = fixture.debugElement.queryAll(By.css('button'))[4]
+    const createButton = fixture.debugElement.queryAll(By.css('button'))[3]
    createButton.triggerEventHandler('click')
    expect(modal).not.toBeUndefined()
@@ -188,7 +188,7 @@ describe('ManagementListComponent', () => {
    const toastInfoSpy = jest.spyOn(toastService, 'showInfo')
    const reloadSpy = jest.spyOn(component, 'reloadData')
-    const editButton = fixture.debugElement.queryAll(By.css('button'))[7]
+    const editButton = fixture.debugElement.queryAll(By.css('button'))[6]
    editButton.triggerEventHandler('click')
    expect(modal).not.toBeUndefined()
@@ -213,7 +213,7 @@ describe('ManagementListComponent', () => {
    const deleteSpy = jest.spyOn(tagService, 'delete')
    const reloadSpy = jest.spyOn(component, 'reloadData')
-    const deleteButton = fixture.debugElement.queryAll(By.css('button'))[8]
+    const deleteButton = fixture.debugElement.queryAll(By.css('button'))[7]
    deleteButton.triggerEventHandler('click')
    expect(modal).not.toBeUndefined()
@@ -233,7 +233,7 @@ describe('ManagementListComponent', () => {
  it('should support quick filter for objects', () => {
    const qfSpy = jest.spyOn(documentListViewService, 'quickFilter')
-    const filterButton = fixture.debugElement.queryAll(By.css('button'))[9]
+    const filterButton = fixture.debugElement.queryAll(By.css('button'))[8]
    filterButton.triggerEventHandler('click')
    expect(qfSpy).toHaveBeenCalledWith([
      { rule_type: FILTER_HAS_TAGS_ALL, value: tags[0].id.toString() },
--- a/src-ui/src/app/components/manage/saved-views/saved-views.component.html
+++ b/src-ui/src/app/components/manage/saved-views/saved-views.component.html
@@ -70,6 +70,6 @@
    }
  </ul>
-  <button type="button" (click)="reset()" class="btn btn-outline-secondary mb-2" [disabled]="(isDirty$ | async) === false" i18n>Cancel</button>
+  <button type="submit" class="btn btn-primary mb-2" [disabled]="(isDirty$ | async) === false" i18n>Save</button>
-  <button type="submit" class="btn btn-primary ms-2 mb-2" [disabled]="(isDirty$ | async) === false" i18n>Save</button>
+  <button type="button" (click)="reset()" class="btn btn-secondary ms-2 mb-2" [disabled]="(isDirty$ | async) === false" i18n>Cancel</button>
 </form>
--- a/src/documents/bulk_edit.py
+++ b/src/documents/bulk_edit.py
@@ -497,103 +497,6 @@ def delete_pages(doc_ids: list[int], pages: list[int]) -> Literal["OK"]:
    return "OK"
 def edit_pdf(
    doc_ids: list[int],
    operations: list[dict],
    *,
    delete_original: bool = False,
    update_document: bool = False,
    include_metadata: bool = True,
    user: User | None = None,
 ) -> Literal["OK"]:
    """
    Operations is a list of dictionaries describing the final PDF pages.
    Each entry must contain the original page number in `page` and may
    specify `rotate` in degrees and `doc` indicating the output
    document index (for splitting). Pages omitted from the list are
    discarded.
    """
    logger.info(
        f"Editing PDF of document {doc_ids[0]} with {len(operations)} operations",
    )
    doc = Document.objects.get(id=doc_ids[0])
    import pikepdf
    pdf_docs: list[pikepdf.Pdf] = []
    try:
        with pikepdf.open(doc.source_path) as src:
            # prepare output documents
            max_idx = max(op.get("doc", 0) for op in operations)
            pdf_docs = [pikepdf.new() for _ in range(max_idx + 1)]
            if update_document and len(pdf_docs) > 1:
                logger.error(
                    "Update requested but multiple output documents specified",
                )
                raise ValueError("Multiple output documents specified")
            for op in operations:
                dst = pdf_docs[op.get("doc", 0)]
                page = src.pages[op["page"] - 1]
                dst.pages.append(page)
                if op.get("rotate"):
                    dst.pages[-1].rotate(op["rotate"], relative=True)
        if update_document:
            temp_path = doc.source_path.with_suffix(".tmp.pdf")
            pdf = pdf_docs[0]
            pdf.remove_unreferenced_resources()
            # save the edited PDF to a temporary file in case of errors
            pdf.save(temp_path)
            # replace the original document with the edited one
            temp_path.replace(doc.source_path)
            doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
            doc.page_count = len(pdf.pages)
            doc.save()
            update_document_content_maybe_archive_file.delay(document_id=doc.id)
        else:
            consume_tasks = []
            overrides = (
                DocumentMetadataOverrides().from_document(doc)
                if include_metadata
                else DocumentMetadataOverrides()
            )
            if user is not None:
                overrides.owner_id = user.id
            for idx, pdf in enumerate(pdf_docs, start=1):
                filepath: Path = (
                    Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
                    / f"{doc.id}_edit_{idx}.pdf"
                )
                pdf.remove_unreferenced_resources()
                pdf.save(filepath)
                consume_tasks.append(
                    consume_file.s(
                        ConsumableDocument(
                            source=DocumentSource.ConsumeFolder,
                            original_file=filepath,
                        ),
                        overrides,
                    ),
                )
            if delete_original:
                chord(header=consume_tasks, body=delete.si([doc.id])).delay()
            else:
                group(consume_tasks).delay()
    except Exception as e:
        logger.exception(f"Error editing document {doc.id}: {e}")
        raise ValueError(
            f"An error occurred while editing the document: {e}",
        ) from e
    return "OK"
 def reflect_doclinks(
    document: Document,
    field: CustomField,
--- a/src/documents/caching.py
+++ b/src/documents/caching.py
@@ -1,23 +1,16 @@
 from __future__ import annotations
 import logging
 import pickle
 from binascii import hexlify
 from collections import OrderedDict
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Final
 from django.conf import settings
 from django.core.cache import cache
 from django.core.cache import caches
 from documents.models import Document
 if TYPE_CHECKING:
    from django.core.cache.backends.base import BaseCache
    from documents.classifier import DocumentClassifier
 logger = logging.getLogger("paperless.caching")
@@ -46,80 +39,6 @@ CACHE_1_MINUTE: Final[int] = 60
 CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE
 CACHE_50_MINUTES: Final[int] = 50 * CACHE_1_MINUTE
 read_cache = caches["read-cache"]
 class LRUCache:
    def __init__(self, capacity: int = 128):
        self._data = OrderedDict()
        self.capacity = capacity
    def get(self, key, default=None) -> Any | None:
        if key in self._data:
            self._data.move_to_end(key)
            return self._data[key]
        return default
    def set(self, key, value) -> None:
        self._data[key] = value
        self._data.move_to_end(key)
        while len(self._data) > self.capacity:
            self._data.popitem(last=False)
 class StoredLRUCache(LRUCache):
    """
    LRU cache that can persist its entire contents as a single entry in a backend cache.
    Useful for sharing a cache across multiple workers or processes.
    Workflow:
        1. Load the cache state from the backend using `load()`.
        2. Use `get()` and `set()` locally as usual.
        3. Persist changes back to the backend using `save()`.
    """
    def __init__(
        self,
        backend_key: str,
        capacity: int = 128,
        backend: BaseCache = read_cache,
        backend_ttl=settings.CACHALOT_TIMEOUT,
    ):
        if backend_key is None:
            raise ValueError("backend_key is mandatory")
        super().__init__(capacity)
        self._backend_key = backend_key
        self._backend = backend
        self.backend_ttl = backend_ttl
    def load(self) -> None:
        """
        Load the whole cache content from backend storage.
        If no valid cached data exists in the backend, the local cache is cleared.
        """
        serialized_data = self._backend.get(self._backend_key)
        try:
            self._data = (
                pickle.loads(serialized_data) if serialized_data else OrderedDict()
            )
        except pickle.PickleError:
            logger.warning(
                "Cache exists in backend but could not be read (possibly invalid format)",
            )
    def save(self) -> None:
        """Save the entire local cache to the backend as a serialized object.
        The backend entry will expire after the configured TTL.
        """
        self._backend.set(
            self._backend_key,
            pickle.dumps(self._data),
            self.backend_ttl,
        )
 def get_suggestion_cache_key(document_id: int) -> str:
    """
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -16,29 +16,16 @@ if TYPE_CHECKING:
 from django.conf import settings
 from django.core.cache import cache
 from django.core.cache import caches
 from documents.caching import CACHE_5_MINUTES
 from documents.caching import CACHE_50_MINUTES
 from documents.caching import CLASSIFIER_HASH_KEY
 from documents.caching import CLASSIFIER_MODIFIED_KEY
 from documents.caching import CLASSIFIER_VERSION_KEY
 from documents.caching import StoredLRUCache
 from documents.models import Document
 from documents.models import MatchingModel
 logger = logging.getLogger("paperless.classifier")
 ADVANCED_TEXT_PROCESSING_ENABLED = (
    settings.NLTK_LANGUAGE is not None and settings.NLTK_ENABLED
 )
 read_cache = caches["read-cache"]
 RE_DIGIT = re.compile(r"\d")
 RE_WORD = re.compile(r"\b[\w]+\b")  # words that may contain digits
 class IncompatibleClassifierVersionError(Exception):
    def __init__(self, message: str, *args: object) -> None:
@@ -105,27 +92,14 @@ class DocumentClassifier:
        self.last_auto_type_hash: bytes | None = None
        self.data_vectorizer = None
        self.data_vectorizer_hash = None
        self.tags_binarizer = None
        self.tags_classifier = None
        self.correspondent_classifier = None
        self.document_type_classifier = None
        self.storage_path_classifier = None
        self._stemmer = None
        # 10,000 elements roughly use 200 to 500 KB per worker,
        # and also in the shared Redis cache,
        # Keep this cache small to minimize lookup and I/O latency.
        if ADVANCED_TEXT_PROCESSING_ENABLED:
            self._stem_cache = StoredLRUCache(
                f"stem_cache_v{self.FORMAT_VERSION}",
                capacity=10000,
            )
        self._stop_words = None
-    def _update_data_vectorizer_hash(self):
+        self._stemmer = None
-        self.data_vectorizer_hash = sha256(
+        self._stop_words = None
            pickle.dumps(self.data_vectorizer),
        ).hexdigest()
    def load(self) -> None:
        from sklearn.exceptions import InconsistentVersionWarning
@@ -145,7 +119,6 @@ class DocumentClassifier:
                        self.last_auto_type_hash = pickle.load(f)
                        self.data_vectorizer = pickle.load(f)
                        self._update_data_vectorizer_hash()
                        self.tags_binarizer = pickle.load(f)
                        self.tags_classifier = pickle.load(f)
@@ -296,7 +269,7 @@ class DocumentClassifier:
            Generates the content for documents, but once at a time
            """
            for doc in docs_queryset:
-                yield self.preprocess_content(doc.content, shared_cache=False)
+                yield self.preprocess_content(doc.content)
        self.data_vectorizer = CountVectorizer(
            analyzer="word",
@@ -374,7 +347,6 @@ class DocumentClassifier:
        self.last_doc_change_time = latest_doc_change
        self.last_auto_type_hash = hasher.digest()
        self._update_data_vectorizer_hash()
        # Set the classifier information into the cache
        # Caching for 50 minutes, so slightly less than the normal retrain time
@@ -384,15 +356,30 @@ class DocumentClassifier:
        return True
-    def _init_advanced_text_processing(self):
+    def preprocess_content(self, content: str) -> str:  # pragma: no cover
-        if self._stop_words is None or self._stemmer is None:
+        """
        Process to contents of a document, distilling it down into
        words which are meaningful to the content
        """
        # Lower case the document
        content = content.lower().strip()
        # Reduce spaces
        content = re.sub(r"\s+", " ", content)
        # Get only the letters
        content = re.sub(r"[^\w\s]", " ", content)
        # If the NLTK language is supported, do further processing
        if settings.NLTK_LANGUAGE is not None and settings.NLTK_ENABLED:
            import nltk
            from nltk.corpus import stopwords
            from nltk.stem import SnowballStemmer
            from nltk.tokenize import word_tokenize
            # Not really hacky, since it isn't private and is documented, but
            # set the search path for NLTK data to the single location it should be in
            nltk.data.path = [settings.NLTK_DIR]
            try:
                # Preload the corpus early, to force the lazy loader to transform
                stopwords.ensure_loaded()
@@ -400,100 +387,41 @@ class DocumentClassifier:
                # Do some one time setup
                # Sometimes, somehow, there's multiple threads loading the corpus
                # and it's not thread safe, raising an AttributeError
                if self._stemmer is None:
                    self._stemmer = SnowballStemmer(settings.NLTK_LANGUAGE)
-                self._stop_words = frozenset(stopwords.words(settings.NLTK_LANGUAGE))
+                if self._stop_words is None:
-            except AttributeError:
+                    self._stop_words = set(stopwords.words(settings.NLTK_LANGUAGE))
                logger.debug("Could not initialize NLTK for advanced text processing.")
                return False
        return True
    def stem_and_skip_stop_words(self, words: list[str], *, shared_cache=True):
        """
        Reduce a list of words to their stem. Stop words are converted to empty strings.
        :param words: the list of words to stem
        """
        def _stem_and_skip_stop_word(word: str):
            """
            Reduce a given word to its stem. If it's a stop word, return an empty string.
            E.g. "amazement", "amaze" and "amazed" all return "amaz".
            """
            cached = self._stem_cache.get(word)
            if cached is not None:
                return cached
            elif word in self._stop_words:
                return ""
            # Assumption: words that contain numbers are never stemmed
            elif RE_DIGIT.search(word):
                return word
            else:
                result = self._stemmer.stem(word)
                self._stem_cache.set(word, result)
                return result
        if shared_cache:
            self._stem_cache.load()
        # Stem the words and skip stop words
        result = " ".join(
            filter(None, (_stem_and_skip_stop_word(w) for w in words)),
        )
        if shared_cache:
            self._stem_cache.save()
        return result
    def preprocess_content(
        self,
        content: str,
        *,
        shared_cache=True,
    ) -> str:
        """
        Process the contents of a document, distilling it down into
        words which are meaningful to the content.
        A stemmer cache is shared across workers with the parameter "shared_cache".
        This is unnecessary when training the classifier.
        """
        # Lower case the document, reduce space,
        # and keep only letters and digits.
        content = " ".join(match.group().lower() for match in RE_WORD.finditer(content))
        if ADVANCED_TEXT_PROCESSING_ENABLED:
            from nltk.tokenize import word_tokenize
            if not self._init_advanced_text_processing():
                return content
                # Tokenize
                # This splits the content into tokens, roughly words
-            words = word_tokenize(content, language=settings.NLTK_LANGUAGE)
+                words: list[str] = word_tokenize(
-            # Stem the words and skip stop words
+                    content,
-            content = self.stem_and_skip_stop_words(words, shared_cache=shared_cache)
+                    language=settings.NLTK_LANGUAGE,
                )
                meaningful_words = []
                for word in words:
                    # Skip stop words
                    # These are words like "a", "and", "the" which add little meaning
                    if word in self._stop_words:
                        continue
                    # Stem the words
                    # This reduces the words to their stems.
                    # "amazement" returns "amaz"
                    # "amaze" returns "amaz
                    # "amazed" returns "amaz"
                    meaningful_words.append(self._stemmer.stem(word))
                return " ".join(meaningful_words)
            except AttributeError:
                return content
-    def _get_vectorizer_cache_key(self, content: str):
+        return content
        hash = sha256(content.encode())
        hash.update(
            f"|{self.FORMAT_VERSION}|{settings.NLTK_LANGUAGE}|{settings.NLTK_ENABLED}|{self.data_vectorizer_hash}".encode(),
        )
        return f"vectorized_content_{hash.hexdigest()}"
    def _vectorize(self, content: str):
        key = self._get_vectorizer_cache_key(content)
        serialized_result = read_cache.get(key)
        if serialized_result is None:
            result = self.data_vectorizer.transform([self.preprocess_content(content)])
            read_cache.set(key, pickle.dumps(result), CACHE_5_MINUTES)
        else:
            read_cache.touch(key, CACHE_5_MINUTES)
            result = pickle.loads(serialized_result)
        return result
    def predict_correspondent(self, content: str) -> int | None:
        if self.correspondent_classifier:
-            X = self._vectorize(content)
+            X = self.data_vectorizer.transform([self.preprocess_content(content)])
            correspondent_id = self.correspondent_classifier.predict(X)
            if correspondent_id != -1:
                return correspondent_id
@@ -504,7 +432,7 @@ class DocumentClassifier:
    def predict_document_type(self, content: str) -> int | None:
        if self.document_type_classifier:
-            X = self._vectorize(content)
+            X = self.data_vectorizer.transform([self.preprocess_content(content)])
            document_type_id = self.document_type_classifier.predict(X)
            if document_type_id != -1:
                return document_type_id
@@ -517,7 +445,7 @@ class DocumentClassifier:
        from sklearn.utils.multiclass import type_of_target
        if self.tags_classifier:
-            X = self._vectorize(content)
+            X = self.data_vectorizer.transform([self.preprocess_content(content)])
            y = self.tags_classifier.predict(X)
            tags_ids = self.tags_binarizer.inverse_transform(y)[0]
            if type_of_target(y).startswith("multilabel"):
@@ -536,7 +464,7 @@ class DocumentClassifier:
    def predict_storage_path(self, content: str) -> int | None:
        if self.storage_path_classifier:
-            X = self._vectorize(content)
+            X = self.data_vectorizer.transform([self.preprocess_content(content)])
            storage_path_id = self.storage_path_classifier.predict(X)
            if storage_path_id != -1:
                return storage_path_id
--- a/src/documents/file_handling.py
+++ b/src/documents/file_handling.py
@@ -1,5 +1,4 @@
 import os
 from pathlib import Path
 from django.conf import settings
@@ -8,15 +7,19 @@ from documents.templating.filepath import validate_filepath_template_and_render
 from documents.templating.utils import convert_format_str_to_template_format
-def create_source_path_directory(source_path: Path) -> None:
+def create_source_path_directory(source_path):
-    source_path.parent.mkdir(parents=True, exist_ok=True)
+    os.makedirs(os.path.dirname(source_path), exist_ok=True)
-def delete_empty_directories(directory: Path, root: Path) -> None:
+def delete_empty_directories(directory, root):
-    if not directory.is_dir():
+    if not os.path.isdir(directory):
        return
-    if not directory.is_relative_to(root):
+    # Go up in the directory hierarchy and try to delete all directories
    directory = os.path.normpath(directory)
    root = os.path.normpath(root)
    if not directory.startswith(root + os.path.sep):
        # don't do anything outside our originals folder.
        # append os.path.set so that we avoid these cases:
@@ -24,12 +27,11 @@ def delete_empty_directories(directory: Path, root: Path) -> None:
        #   root = /home/originals ("/" gets appended and startswith fails)
        return
    # Go up in the directory hierarchy and try to delete all directories
    while directory != root:
-        if not list(directory.iterdir()):
+        if not os.listdir(directory):
            # it's empty
            try:
-                directory.rmdir()
+                os.rmdir(directory)
            except OSError:
                # whatever. empty directories aren't that bad anyway.
                return
@@ -38,10 +40,10 @@ def delete_empty_directories(directory: Path, root: Path) -> None:
            return
        # go one level up
-        directory = directory.parent
+        directory = os.path.normpath(os.path.dirname(directory))
-def generate_unique_filename(doc, *, archive_filename=False) -> Path:
+def generate_unique_filename(doc, *, archive_filename=False):
    """
    Generates a unique filename for doc in settings.ORIGINALS_DIR.
@@ -54,32 +56,21 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
    """
    if archive_filename:
-        old_filename: Path | None = (
+        old_filename = doc.archive_filename
            Path(doc.archive_filename) if doc.archive_filename else None
        )
        root = settings.ARCHIVE_DIR
    else:
-        old_filename = Path(doc.filename) if doc.filename else None
+        old_filename = doc.filename
        root = settings.ORIGINALS_DIR
    # If generating archive filenames, try to make a name that is similar to
    # the original filename first.
    if archive_filename and doc.filename:
-        # Generate the full path using the same logic as generate_filename
+        new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
-        base_generated = generate_filename(doc, archive_filename=archive_filename)
+        if new_filename == old_filename or not os.path.exists(
-
+            os.path.join(root, new_filename),
-        # Try to create a simple PDF version based on the original filename
+        ):
-        # but preserve any directory structure from the template
+            return new_filename
        if str(base_generated.parent) != ".":
            # Has directory structure, preserve it
            simple_pdf_name = base_generated.parent / (Path(doc.filename).stem + ".pdf")
        else:
            # No directory structure
            simple_pdf_name = Path(Path(doc.filename).stem + ".pdf")
        if simple_pdf_name == old_filename or not (root / simple_pdf_name).exists():
            return simple_pdf_name
    counter = 0
@@ -93,7 +84,7 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
            # still the same as before.
            return new_filename
-        if (root / new_filename).exists():
+        if os.path.exists(os.path.join(root, new_filename)):
            counter += 1
        else:
            return new_filename
@@ -105,8 +96,8 @@ def generate_filename(
    counter=0,
    append_gpg=True,
    archive_filename=False,
-) -> Path:
+):
-    base_path: Path | None = None
+    path = ""
    def format_filename(document: Document, template_str: str) -> str | None:
        rendered_filename = validate_filepath_template_and_render(
@@ -143,34 +134,17 @@ def generate_filename(
    # If we have one, render it
    if filename_format is not None:
-        rendered_path: str | None = format_filename(doc, filename_format)
+        path = format_filename(doc, filename_format)
        if rendered_path:
            base_path = Path(rendered_path)
    counter_str = f"_{counter:02}" if counter else ""
    filetype_str = ".pdf" if archive_filename else doc.file_type
-    if base_path:
+    if path:
-        # Split the path into directory and filename parts
+        filename = f"{path}{counter_str}{filetype_str}"
        directory = base_path.parent
        # Use the full name (not just stem) as the base filename
        base_filename = base_path.name
        # Build the final filename with counter and filetype
        final_filename = f"{base_filename}{counter_str}{filetype_str}"
        # If we have a directory component, include it
        if str(directory) != ".":
            full_path = directory / final_filename
    else:
-            full_path = Path(final_filename)
+        filename = f"{doc.pk:07}{counter_str}{filetype_str}"
    else:
        # No template, use document ID
        final_filename = f"{doc.pk:07}{counter_str}{filetype_str}"
        full_path = Path(final_filename)
    # Add GPG extension if needed
    if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
-        full_path = full_path.with_suffix(full_path.suffix + ".gpg")
+        filename += ".gpg"
-    return full_path
+    return filename
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -236,7 +236,10 @@ class Command(CryptMixin, BaseCommand):
                # now make an archive in the original target, with all files stored
                if self.zip_export and temp_dir is not None:
                    shutil.make_archive(
-                        self.original_target / options["zip_name"],
+                        os.path.join(
                            self.original_target,
                            options["zip_name"],
                        ),
                        format="zip",
                        root_dir=temp_dir.name,
                    )
@@ -339,7 +342,7 @@ class Command(CryptMixin, BaseCommand):
                )
            if self.split_manifest:
-                manifest_name = base_name.with_name(f"{base_name.stem}-manifest.json")
+                manifest_name = Path(base_name + "-manifest.json")
                if self.use_folder_prefix:
                    manifest_name = Path("json") / manifest_name
                manifest_name = (self.target / manifest_name).resolve()
@@ -413,7 +416,7 @@ class Command(CryptMixin, BaseCommand):
                    else:
                        item.unlink()
-    def generate_base_name(self, document: Document) -> Path:
+    def generate_base_name(self, document: Document) -> str:
        """
        Generates a unique name for the document, one which hasn't already been exported (or will be)
        """
@@ -433,12 +436,12 @@ class Command(CryptMixin, BaseCommand):
                break
            else:
                filename_counter += 1
-        return Path(base_name)
+        return base_name
    def generate_document_targets(
        self,
        document: Document,
-        base_name: Path,
+        base_name: str,
        document_dict: dict,
    ) -> tuple[Path, Path | None, Path | None]:
        """
@@ -446,25 +449,25 @@ class Command(CryptMixin, BaseCommand):
        """
        original_name = base_name
        if self.use_folder_prefix:
-            original_name = Path("originals") / original_name
+            original_name = os.path.join("originals", original_name)
-        original_target = (self.target / original_name).resolve()
+        original_target = (self.target / Path(original_name)).resolve()
-        document_dict[EXPORTER_FILE_NAME] = str(original_name)
+        document_dict[EXPORTER_FILE_NAME] = original_name
        if not self.no_thumbnail:
-            thumbnail_name = base_name.parent / (base_name.stem + "-thumbnail.webp")
+            thumbnail_name = base_name + "-thumbnail.webp"
            if self.use_folder_prefix:
-                thumbnail_name = Path("thumbnails") / thumbnail_name
+                thumbnail_name = os.path.join("thumbnails", thumbnail_name)
-            thumbnail_target = (self.target / thumbnail_name).resolve()
+            thumbnail_target = (self.target / Path(thumbnail_name)).resolve()
-            document_dict[EXPORTER_THUMBNAIL_NAME] = str(thumbnail_name)
+            document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
        else:
            thumbnail_target = None
        if not self.no_archive and document.has_archive_version:
-            archive_name = base_name.parent / (base_name.stem + "-archive.pdf")
+            archive_name = base_name + "-archive.pdf"
            if self.use_folder_prefix:
-                archive_name = Path("archive") / archive_name
+                archive_name = os.path.join("archive", archive_name)
-            archive_target = (self.target / archive_name).resolve()
+            archive_target = (self.target / Path(archive_name)).resolve()
-            document_dict[EXPORTER_ARCHIVE_NAME] = str(archive_name)
+            document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
        else:
            archive_target = None
@@ -569,7 +572,7 @@ class Command(CryptMixin, BaseCommand):
        perform_copy = False
        if target.exists():
-            source_stat = source.stat()
+            source_stat = os.stat(source)
            target_stat = target.stat()
            if self.compare_checksums and source_checksum:
                target_checksum = hashlib.md5(target.read_bytes()).hexdigest()
--- a/src/documents/migrations/0014_document_checksum.py
+++ b/src/documents/migrations/0014_document_checksum.py
@@ -63,11 +63,11 @@ class Document:
            / "documents"
            / "originals"
            / f"{self.pk:07}.{self.file_type}.gpg"
-        )
+        ).as_posix()
    @property
    def source_file(self):
-        return self.source_path.open("rb")
+        return Path(self.source_path).open("rb")
    @property
    def file_name(self):
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -1293,7 +1293,6 @@ class BulkEditSerializer(
            "merge",
            "split",
            "delete_pages",
            "edit_pdf",
        ],
        label="Method",
        write_only=True,
@@ -1367,10 +1366,7 @@ class BulkEditSerializer(
            return bulk_edit.split
        elif method == "delete_pages":
            return bulk_edit.delete_pages
-        elif method == "edit_pdf":
+        else:
            return bulk_edit.edit_pdf
        else:  # pragma: no cover
            # This will never happen as it is handled by the ChoiceField
            raise serializers.ValidationError("Unsupported method.")
    def _validate_parameters_tags(self, parameters):
@@ -1524,47 +1520,6 @@ class BulkEditSerializer(
        else:
            parameters["archive_fallback"] = False
    def _validate_parameters_edit_pdf(self, parameters, document_id):
        if "operations" not in parameters:
            raise serializers.ValidationError("operations not specified")
        if not isinstance(parameters["operations"], list):
            raise serializers.ValidationError("operations must be a list")
        for op in parameters["operations"]:
            if not isinstance(op, dict):
                raise serializers.ValidationError("invalid operation entry")
            if "page" not in op or not isinstance(op["page"], int):
                raise serializers.ValidationError("page must be an integer")
            if "rotate" in op and not isinstance(op["rotate"], int):
                raise serializers.ValidationError("rotate must be an integer")
            if "doc" in op and not isinstance(op["doc"], int):
                raise serializers.ValidationError("doc must be an integer")
        if "update_document" in parameters:
            if not isinstance(parameters["update_document"], bool):
                raise serializers.ValidationError("update_document must be a boolean")
        else:
            parameters["update_document"] = False
        if "include_metadata" in parameters:
            if not isinstance(parameters["include_metadata"], bool):
                raise serializers.ValidationError("include_metadata must be a boolean")
        else:
            parameters["include_metadata"] = True
        if parameters["update_document"]:
            max_idx = max(op.get("doc", 0) for op in parameters["operations"])
            if max_idx > 0:
                raise serializers.ValidationError(
                    "update_document only allowed with a single output document",
                )
        doc = Document.objects.get(id=document_id)
        # doc existence is already validated
        if doc.page_count:
            for op in parameters["operations"]:
                if op["page"] < 1 or op["page"] > doc.page_count:
                    raise serializers.ValidationError(
                        f"Page {op['page']} is out of bounds for document with {doc.page_count} pages.",
                    )
    def validate(self, attrs):
        method = attrs["method"]
        parameters = attrs["parameters"]
@@ -1599,12 +1554,6 @@ class BulkEditSerializer(
            self._validate_parameters_delete_pages(parameters)
        elif method == bulk_edit.merge:
            self._validate_parameters_merge(parameters)
        elif method == bulk_edit.edit_pdf:
            if len(attrs["documents"]) > 1:
                raise serializers.ValidationError(
                    "Edit PDF method only supports one document",
                )
            self._validate_parameters_edit_pdf(parameters, attrs["documents"][0])
        return attrs
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -1,12 +1,9 @@
 from __future__ import annotations
 import ipaddress
 import logging
 import os
 import shutil
 import socket
 from pathlib import Path
 from typing import TYPE_CHECKING
 from urllib.parse import urlparse
 import httpx
 from celery import shared_task
@@ -54,6 +51,8 @@ from documents.permissions import set_permissions_for_object
 from documents.templating.workflows import parse_w_workflow_placeholders
 if TYPE_CHECKING:
    from pathlib import Path
    from documents.classifier import DocumentClassifier
    from documents.data_models import ConsumableDocument
    from documents.data_models import DocumentMetadataOverrides
@@ -330,16 +329,15 @@ def cleanup_document_deletion(sender, instance, **kwargs):
            # Find a non-conflicting filename in case a document with the same
            # name was moved to trash earlier
            counter = 0
-            old_filename = Path(instance.source_path).name
+            old_filename = os.path.split(instance.source_path)[1]
-            old_filebase = Path(old_filename).stem
+            (old_filebase, old_fileext) = os.path.splitext(old_filename)
            old_fileext = Path(old_filename).suffix
            while True:
                new_file_path = settings.EMPTY_TRASH_DIR / (
                    old_filebase + (f"_{counter:02}" if counter else "") + old_fileext
                )
-                if new_file_path.exists():
+                if os.path.exists(new_file_path):
                    counter += 1
                else:
                    break
@@ -363,26 +361,26 @@ def cleanup_document_deletion(sender, instance, **kwargs):
            files += (instance.source_path,)
        for filename in files:
-            if filename and filename.is_file():
+            if filename and os.path.isfile(filename):
                try:
-                    filename.unlink()
+                    os.unlink(filename)
                    logger.debug(f"Deleted file {filename}.")
                except OSError as e:
                    logger.warning(
                        f"While deleting document {instance!s}, the file "
                        f"{filename} could not be deleted: {e}",
                    )
-            elif filename and not filename.is_file():
+            elif filename and not os.path.isfile(filename):
                logger.warning(f"Expected {filename} to exist, but it did not")
        delete_empty_directories(
-            Path(instance.source_path).parent,
+            os.path.dirname(instance.source_path),
            root=settings.ORIGINALS_DIR,
        )
        if instance.has_archive_version:
            delete_empty_directories(
-                Path(instance.archive_path).parent,
+                os.path.dirname(instance.archive_path),
                root=settings.ARCHIVE_DIR,
            )
@@ -403,14 +401,14 @@ def update_filename_and_move_files(
    if isinstance(instance, CustomFieldInstance):
        instance = instance.document
-    def validate_move(instance, old_path: Path, new_path: Path):
+    def validate_move(instance, old_path, new_path):
-        if not old_path.is_file():
+        if not os.path.isfile(old_path):
            # Can't do anything if the old file does not exist anymore.
            msg = f"Document {instance!s}: File {old_path} doesn't exist."
            logger.fatal(msg)
            raise CannotMoveFilesException(msg)
-        if new_path.is_file():
+        if os.path.isfile(new_path):
            # Can't do anything if the new file already exists. Skip updating file.
            msg = f"Document {instance!s}: Cannot rename file since target path {new_path} already exists."
            logger.warning(msg)
@@ -438,20 +436,16 @@ def update_filename_and_move_files(
            old_filename = instance.filename
            old_source_path = instance.source_path
-            # Need to convert to string to be able to save it to the db
+            instance.filename = generate_unique_filename(instance)
            instance.filename = str(generate_unique_filename(instance))
            move_original = old_filename != instance.filename
            old_archive_filename = instance.archive_filename
            old_archive_path = instance.archive_path
            if instance.has_archive_version:
-                # Need to convert to string to be able to save it to the db
+                instance.archive_filename = generate_unique_filename(
                instance.archive_filename = str(
                    generate_unique_filename(
                    instance,
                    archive_filename=True,
                    ),
                )
                move_archive = old_archive_filename != instance.archive_filename
@@ -493,11 +487,11 @@ def update_filename_and_move_files(
            # Try to move files to their original location.
            try:
-                if move_original and instance.source_path.is_file():
+                if move_original and os.path.isfile(instance.source_path):
                    logger.info("Restoring previous original path")
                    shutil.move(instance.source_path, old_source_path)
-                if move_archive and instance.archive_path.is_file():
+                if move_archive and os.path.isfile(instance.archive_path):
                    logger.info("Restoring previous archive path")
                    shutil.move(instance.archive_path, old_archive_path)
@@ -518,15 +512,17 @@ def update_filename_and_move_files(
        # finally, remove any empty sub folders. This will do nothing if
        # something has failed above.
-        if not old_source_path.is_file():
+        if not os.path.isfile(old_source_path):
            delete_empty_directories(
-                Path(old_source_path).parent,
+                os.path.dirname(old_source_path),
                root=settings.ORIGINALS_DIR,
            )
-        if instance.has_archive_version and not old_archive_path.is_file():
+        if instance.has_archive_version and not os.path.isfile(
            old_archive_path,
        ):
            delete_empty_directories(
-                Path(old_archive_path).parent,
+                os.path.dirname(old_archive_path),
                root=settings.ARCHIVE_DIR,
            )
@@ -663,28 +659,6 @@ def run_workflows_updated(sender, document: Document, logging_group=None, **kwar
    )
 def _is_public_ip(ip: str) -> bool:
    try:
        obj = ipaddress.ip_address(ip)
        return not (
            obj.is_private
            or obj.is_loopback
            or obj.is_link_local
            or obj.is_multicast
            or obj.is_unspecified
        )
    except ValueError:  # pragma: no cover
        return False
 def _resolve_first_ip(host: str) -> str | None:
    try:
        info = socket.getaddrinfo(host, None)
        return info[0][4][0] if info else None
    except Exception:  # pragma: no cover
        return None
@shared_task(
    retry_backoff=True,
    autoretry_for=(httpx.HTTPStatusError,),
@@ -699,35 +673,11 @@ def send_webhook(
    *,
    as_json: bool = False,
 ):
    p = urlparse(url)
    if p.scheme.lower() not in settings.WEBHOOKS_ALLOWED_SCHEMES or not p.hostname:
        logger.warning("Webhook blocked: invalid scheme/hostname")
        raise ValueError("Invalid URL scheme or hostname.")
    port = p.port or (443 if p.scheme == "https" else 80)
    if (
        len(settings.WEBHOOKS_ALLOWED_PORTS) > 0
        and port not in settings.WEBHOOKS_ALLOWED_PORTS
    ):
        logger.warning("Webhook blocked: port not permitted")
        raise ValueError("Destination port not permitted.")
    ip = _resolve_first_ip(p.hostname)
    if not ip or (
        not _is_public_ip(ip) and not settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS
    ):
        logger.warning("Webhook blocked: destination not allowed")
        raise ValueError("Destination host is not allowed.")
    try:
        post_args = {
            "url": url,
-            "headers": {
+            "headers": headers,
-                k: v for k, v in (headers or {}).items() if k.lower() != "host"
+            "files": files,
            },
            "files": files or None,
            "timeout": 5.0,
            "follow_redirects": False,
        }
        if as_json:
            post_args["json"] = data
@@ -748,6 +698,15 @@ def send_webhook(
        )
        raise e
        logger.info(
            f"Webhook sent to {url}",
        )
    except Exception as e:
        logger.error(
            f"Failed attempt sending webhook to {url}: {e}",
        )
        raise e
 def run_workflows(
    trigger_type: WorkflowTrigger.WorkflowTriggerType,
@@ -1260,7 +1219,10 @@ def run_workflows(
                    )
            files = None
            if action.webhook.include_document:
-                with original_file.open("rb") as f:
+                with open(
                    original_file,
                    "rb",
                ) as f:
                    files = {
                        "file": (
                            filename,
--- a/src/documents/tests/samples/content.txt
+++ b/src/documents/tests/samples/content.txt
@@ -1,34 +0,0 @@
 Sample textual document content.
 Include as many characters as possible, to check the classifier's vectorization.
 Hey 00, this is "a" test0707 content.
 This is an example document — created on 2025-06-25.
 Digits: 0123456789
 Punctuation: . , ; : ! ? ' " ( ) [ ] { } — – …
 English text: The quick brown fox jumps over the lazy dog.
 English stop words: We’ve been doing it before.
 Accented Latin (diacritics): àâäæçéèêëîïôœùûüÿñ
 Arabic: لقد قام المترجم بعمل جيد
 Greek: Αλφα, Βήτα, Γάμμα, Δέλτα, Ωμέγα
 Cyrillic: Привет, как дела? Добро пожаловать!
 Chinese (Simplified): 你好，世界！今天的天气很好。
 Chinese (Traditional): 歡迎來到世界，今天天氣很好。
 Japanese (Kanji, Hiragana, Katakana): 東京へ行きます。カタカナ、ひらがな、漢字。
 Korean (Hangul): 안녕하세요. 오늘 날씨 어때요?
 Arabic: مرحبًا، كيف حالك؟
 Hebrew: שלום, מה שלומך?
 Emoji: 😀 🐍 📘 ✅ ©️ 🇺🇳
 Symbols: © ® ™ § ¶ † ‡ ∞ µ ∑ ∆ √
 Math: ∫₀^∞ x² dx = ∞, π ≈ 3.14159, ∇·E = ρ/ε₀
 Currency: 1$ € ¥ £ ₹
 Date formats: 25/06/2025, June 25, 2025, 2025年6月25日
 Quote in French: « Bonjour, ça va ? »
 Quote in German: „Guten Tag! Wie geht's?“
 Newline test:
 \r\n
 \r
 Tab\ttest\tspacing
 / = +) ( []) ~ * #192 +33601010101 § ¤
 End of document.
--- a/src/documents/tests/samples/preprocessed_content.txt
+++ b/src/documents/tests/samples/preprocessed_content.txt
@@ -1 +0,0 @@
 sample textual document content include as many characters as possible to check the classifier s vectorization hey 00 this is a test0707 content this is an example document created on 2025 06 25 digits 0123456789 punctuation english text the quick brown fox jumps over the lazy dog english stop words we ve been doing it before accented latin diacritics àâäæçéèêëîïôœùûüÿñ arabic لقد قام المترجم بعمل جيد greek αλφα βήτα γάμμα δέλτα ωμέγα cyrillic привет как дела добро пожаловать chinese simplified 你好 世界 今天的天气很好 chinese traditional 歡迎來到世界 今天天氣很好 japanese kanji hiragana katakana 東京へ行きます カタカナ ひらがな 漢字 korean hangul 안녕하세요 오늘 날씨 어때요 arabic مرحب ا كيف حالك hebrew שלום מה שלומך emoji symbols µ math ₀ x² dx π 3 14159 e ρ ε₀ currency 1 date formats 25 06 2025 june 25 2025 2025年6月25日 quote in french bonjour ça va quote in german guten tag wie geht s newline test r n r tab ttest tspacing 192 33601010101 end of document
--- a/src/documents/tests/samples/preprocessed_content_advanced.txt
+++ b/src/documents/tests/samples/preprocessed_content_advanced.txt
@@ -1 +0,0 @@
 sampl textual document content includ mani charact possibl check classifi vector hey 00 test0707 content exampl document creat 2025 06 25 digit 0123456789 punctuat english text quick brown fox jump lazi dog english stop word accent latin diacrit àâäæçéèêëîïôœùûüÿñ arab لقد قام المترجم بعمل جيد greek αλφα βήτα γάμμα δέλτα ωμέγα cyril привет как дела добро пожаловать chines simplifi 你好 世界 今天的天气很好 chines tradit 歡迎來到世界 今天天氣很好 japanes kanji hiragana katakana 東京へ行きます カタカナ ひらがな 漢字 korean hangul 안녕하세요 오늘 날씨 어때요 arab مرحب ا كيف حالك hebrew שלום מה שלומך emoji symbol µ math ₀ x² dx π 3 14159 e ρ ε₀ currenc 1 date format 25 06 2025 june 25 2025 2025年6月25日 quot french bonjour ça va quot german guten tag wie geht newlin test r n r tab ttest tspace 192 33601010101 end document
--- a/src/documents/tests/test_api_bulk_edit.py
+++ b/src/documents/tests/test_api_bulk_edit.py
@@ -41,7 +41,6 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
            title="B",
            correspondent=self.c1,
            document_type=self.dt1,
            page_count=5,
        )
        self.doc3 = Document.objects.create(
            checksum="C",
@@ -1370,218 +1369,6 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn(b"pages must be a list of integers", response.content)
    @mock.patch("documents.serialisers.bulk_edit.edit_pdf")
    def test_edit_pdf(self, m):
        self.setup_mock(m, "edit_pdf")
        response = self.client.post(
            "/api/documents/bulk_edit/",
            json.dumps(
                {
                    "documents": [self.doc2.id],
                    "method": "edit_pdf",
                    "parameters": {"operations": [{"page": 1}]},
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_200_OK)
        m.assert_called_once()
        args, kwargs = m.call_args
        self.assertCountEqual(args[0], [self.doc2.id])
        self.assertEqual(kwargs["operations"], [{"page": 1}])
        self.assertEqual(kwargs["user"], self.user)
    def test_edit_pdf_invalid_params(self):
        # multiple documents
        response = self.client.post(
            "/api/documents/bulk_edit/",
            json.dumps(
                {
                    "documents": [self.doc2.id, self.doc3.id],
                    "method": "edit_pdf",
                    "parameters": {"operations": [{"page": 1}]},
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn(b"Edit PDF method only supports one document", response.content)
        # no operations specified
        response = self.client.post(
            "/api/documents/bulk_edit/",
            json.dumps(
                {
                    "documents": [self.doc2.id],
                    "method": "edit_pdf",
                    "parameters": {},
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn(b"operations not specified", response.content)
        # operations not a list
        response = self.client.post(
            "/api/documents/bulk_edit/",
            json.dumps(
                {
                    "documents": [self.doc2.id],
                    "method": "edit_pdf",
                    "parameters": {"operations": "not_a_list"},
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn(b"operations must be a list", response.content)
        # invalid operation
        response = self.client.post(
            "/api/documents/bulk_edit/",
            json.dumps(
                {
                    "documents": [self.doc2.id],
                    "method": "edit_pdf",
                    "parameters": {"operations": ["invalid_operation"]},
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn(b"invalid operation entry", response.content)
        # page not an int
        response = self.client.post(
            "/api/documents/bulk_edit/",
            json.dumps(
                {
                    "documents": [self.doc2.id],
                    "method": "edit_pdf",
                    "parameters": {"operations": [{"page": "not_an_int"}]},
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn(b"page must be an integer", response.content)
        # rotate not an int
        response = self.client.post(
            "/api/documents/bulk_edit/",
            json.dumps(
                {
                    "documents": [self.doc2.id],
                    "method": "edit_pdf",
                    "parameters": {"operations": [{"page": 1, "rotate": "not_an_int"}]},
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn(b"rotate must be an integer", response.content)
        # doc not an int
        response = self.client.post(
            "/api/documents/bulk_edit/",
            json.dumps(
                {
                    "documents": [self.doc2.id],
                    "method": "edit_pdf",
                    "parameters": {"operations": [{"page": 1, "doc": "not_an_int"}]},
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn(b"doc must be an integer", response.content)
        # update_document not a boolean
        response = self.client.post(
            "/api/documents/bulk_edit/",
            json.dumps(
                {
                    "documents": [self.doc2.id],
                    "method": "edit_pdf",
                    "parameters": {
                        "update_document": "not_a_bool",
                        "operations": [{"page": 1}],
                    },
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn(b"update_document must be a boolean", response.content)
        # include_metadata not a boolean
        response = self.client.post(
            "/api/documents/bulk_edit/",
            json.dumps(
                {
                    "documents": [self.doc2.id],
                    "method": "edit_pdf",
                    "parameters": {
                        "include_metadata": "not_a_bool",
                        "operations": [{"page": 1}],
                    },
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn(b"include_metadata must be a boolean", response.content)
        # update_document True but output would be multiple documents
        response = self.client.post(
            "/api/documents/bulk_edit/",
            json.dumps(
                {
                    "documents": [self.doc2.id],
                    "method": "edit_pdf",
                    "parameters": {
                        "update_document": True,
                        "operations": [{"page": 1, "doc": 1}, {"page": 2, "doc": 2}],
                    },
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn(
            b"update_document only allowed with a single output document",
            response.content,
        )
    @mock.patch("documents.serialisers.bulk_edit.edit_pdf")
    def test_edit_pdf_page_out_of_bounds(self, m):
        """
        GIVEN:
            - API data for editing PDF is called
            - The page number is out of bounds
        WHEN:
            - API is called
        THEN:
            - The API fails with a correct error code
        """
        self.setup_mock(m, "edit_pdf")
        response = self.client.post(
            "/api/documents/bulk_edit/",
            json.dumps(
                {
                    "documents": [self.doc2.id],
                    "method": "edit_pdf",
                    "parameters": {"operations": [{"page": 99}]},
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn(b"out of bounds", response.content)
    @override_settings(AUDIT_LOG_ENABLED=True)
    def test_bulk_edit_audit_log_enabled_simple_field(self):
        """
--- a/src/documents/tests/test_bulk_edit.py
+++ b/src/documents/tests/test_bulk_edit.py
@@ -909,156 +909,3 @@ class TestPDFActions(DirectoriesMixin, TestCase):
            expected_str = "Error deleting pages from document"
            self.assertIn(expected_str, error_str)
            mock_update_archive_file.assert_not_called()
    @mock.patch("documents.bulk_edit.group")
    @mock.patch("documents.tasks.consume_file.s")
    def test_edit_pdf_basic_operations(self, mock_consume_file, mock_group):
        """
        GIVEN:
            - Existing document
        WHEN:
            - edit_pdf is called with two operations to split the doc and rotate pages
        THEN:
            - A grouped task is generated and delay() is called
        """
        mock_group.return_value.delay.return_value = None
        doc_ids = [self.doc2.id]
        operations = [{"page": 1, "doc": 0}, {"page": 2, "doc": 1, "rotate": 90}]
        result = bulk_edit.edit_pdf(doc_ids, operations)
        self.assertEqual(result, "OK")
        mock_group.return_value.delay.assert_called_once()
    @mock.patch("documents.bulk_edit.group")
    @mock.patch("documents.tasks.consume_file.s")
    def test_edit_pdf_with_user_override(self, mock_consume_file, mock_group):
        """
        GIVEN:
            - Existing document
        WHEN:
            - edit_pdf is called with user override
        THEN:
            - Task is created with user context
        """
        mock_group.return_value.delay.return_value = None
        doc_ids = [self.doc2.id]
        operations = [{"page": 1, "doc": 0}, {"page": 2, "doc": 1}]
        user = User.objects.create(username="editor")
        result = bulk_edit.edit_pdf(doc_ids, operations, user=user)
        self.assertEqual(result, "OK")
        mock_group.return_value.delay.assert_called_once()
    @mock.patch("documents.bulk_edit.chord")
    @mock.patch("documents.tasks.consume_file.s")
    def test_edit_pdf_with_delete_original(self, mock_consume_file, mock_chord):
        """
        GIVEN:
            - Existing document
        WHEN:
            - edit_pdf is called with delete_original=True
        THEN:
            - Task group is triggered
        """
        mock_chord.return_value.delay.return_value = None
        doc_ids = [self.doc2.id]
        operations = [{"page": 1}, {"page": 2}]
        result = bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)
        self.assertEqual(result, "OK")
        mock_chord.assert_called_once()
    @mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
    def test_edit_pdf_with_update_document(self, mock_update_document):
        """
        GIVEN:
            - A single existing PDF document
        WHEN:
            - edit_pdf is called with update_document=True and a single output
        THEN:
            - The original document is updated in-place
            - The update_document_content_maybe_archive_file task is triggered
        """
        doc_ids = [self.doc2.id]
        operations = [{"page": 1}, {"page": 2}]
        original_checksum = self.doc2.checksum
        original_page_count = self.doc2.page_count
        result = bulk_edit.edit_pdf(
            doc_ids,
            operations=operations,
            update_document=True,
            delete_original=False,
        )
        self.assertEqual(result, "OK")
        self.doc2.refresh_from_db()
        self.assertNotEqual(self.doc2.checksum, original_checksum)
        self.assertNotEqual(self.doc2.page_count, original_page_count)
        mock_update_document.assert_called_once_with(document_id=self.doc2.id)
    @mock.patch("documents.bulk_edit.group")
    @mock.patch("documents.tasks.consume_file.s")
    def test_edit_pdf_without_metadata(self, mock_consume_file, mock_group):
        """
        GIVEN:
            - Existing document
        WHEN:
            - edit_pdf is called with include_metadata=False
        THEN:
            - Tasks are created with empty metadata
        """
        mock_group.return_value.delay.return_value = None
        doc_ids = [self.doc2.id]
        operations = [{"page": 1}]
        result = bulk_edit.edit_pdf(doc_ids, operations, include_metadata=False)
        self.assertEqual(result, "OK")
        mock_group.return_value.delay.assert_called_once()
    @mock.patch("documents.bulk_edit.group")
    @mock.patch("documents.tasks.consume_file.s")
    def test_edit_pdf_open_failure(self, mock_consume_file, mock_group):
        """
        GIVEN:
            - Existing document
        WHEN:
            - edit_pdf fails to open PDF
        THEN:
            - Task group is not called
        """
        doc_ids = [self.doc2.id]
        operations = [
            {"page": 9999},  # invalid page, forces error during PDF load
        ]
        with self.assertLogs("paperless.bulk_edit", level="ERROR"):
            with self.assertRaises(Exception):
                bulk_edit.edit_pdf(doc_ids, operations)
        mock_group.assert_not_called()
        mock_consume_file.assert_not_called()
    @mock.patch("documents.bulk_edit.group")
    @mock.patch("documents.tasks.consume_file.s")
    def test_edit_pdf_multiple_outputs_with_update_flag_errors(
        self,
        mock_consume_file,
        mock_group,
    ):
        """
        GIVEN:
            - Existing document
        WHEN:
            - edit_pdf is called with multiple outputs and update_document=True
        THEN:
            - An error is logged and task group is not called
        """
        doc_ids = [self.doc2.id]
        operations = [
            {"page": 1, "doc": 0},
            {"page": 2, "doc": 1},
        ]
        with self.assertLogs("paperless.bulk_edit", level="ERROR"):
            with self.assertRaises(ValueError):
                bulk_edit.edit_pdf(doc_ids, operations, update_document=True)
        mock_group.assert_not_called()
        mock_consume_file.assert_not_called()
--- a/src/documents/tests/test_caching.py
+++ b/src/documents/tests/test_caching.py
@@ -1,45 +0,0 @@
 import pickle
 from documents.caching import StoredLRUCache
 def test_lru_cache_entries():
    CACHE_TTL = 1
    # LRU cache with a capacity of 2 elements
    cache = StoredLRUCache("test_lru_cache_key", 2, backend_ttl=CACHE_TTL)
    cache.set(1, 1)
    cache.set(2, 2)
    assert cache.get(2) == 2
    assert cache.get(1) == 1
    # The oldest entry (2) should be removed
    cache.set(3, 3)
    assert cache.get(3) == 3
    assert not cache.get(2)
    assert cache.get(1) == 1
    # Save the cache, restore it and check it overwrites the current cache in memory
    cache.save()
    cache.set(4, 4)
    assert not cache.get(3)
    cache.load()
    assert not cache.get(4)
    assert cache.get(3) == 3
    assert cache.get(1) == 1
 def test_stored_lru_cache_key_ttl(mocker):
    mock_backend = mocker.Mock()
    cache = StoredLRUCache("test_key", backend=mock_backend, backend_ttl=321)
    # Simulate storing values
    cache.set("x", "X")
    cache.set("y", "Y")
    cache.save()
    # Assert backend.set was called with pickled data, key and TTL
    mock_backend.set.assert_called_once()
    key, data, timeout = mock_backend.set.call_args[0]
    assert key == "test_key"
    assert timeout == 321
    assert pickle.loads(data) == {"x": "X", "y": "Y"}
--- a/src/documents/tests/test_classifier.py
+++ b/src/documents/tests/test_classifier.py
@@ -21,7 +21,7 @@ from documents.models import Tag
 from documents.tests.utils import DirectoriesMixin
-def dummy_preprocess(content: str, **kwargs):
+def dummy_preprocess(content: str):
    """
    Simpler, faster pre-processing for testing purposes
    """
@@ -223,26 +223,11 @@ class TestClassifier(DirectoriesMixin, TestCase):
        self.generate_test_data()
        self.classifier.train()
        with (
            mock.patch.object(
                self.classifier.data_vectorizer,
                "transform",
                wraps=self.classifier.data_vectorizer.transform,
            ) as mock_transform,
            mock.patch.object(
                self.classifier,
                "preprocess_content",
                wraps=self.classifier.preprocess_content,
            ) as mock_preprocess_content,
        ):
        self.assertEqual(
            self.classifier.predict_correspondent(self.doc1.content),
            self.c1.pk,
        )
-            self.assertEqual(
+        self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
                self.classifier.predict_correspondent(self.doc2.content),
                None,
            )
        self.assertListEqual(
            self.classifier.predict_tags(self.doc1.content),
            [self.t1.pk],
@@ -255,15 +240,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
            self.classifier.predict_document_type(self.doc1.content),
            self.dt.pk,
        )
-            self.assertEqual(
+        self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
                self.classifier.predict_document_type(self.doc2.content),
                None,
            )
            # Check that the classifier vectorized content and text preprocessing has been cached
            # It should be called once per document (doc1 and doc2)
            self.assertEqual(mock_preprocess_content.call_count, 2)
            self.assertEqual(mock_transform.call_count, 2)
    def test_no_retrain_if_no_change(self):
        """
@@ -717,67 +694,3 @@ class TestClassifier(DirectoriesMixin, TestCase):
        mock_load.side_effect = Exception()
        with self.assertRaises(Exception):
            load_classifier(raise_exception=True)
 def test_preprocess_content():
    """
    GIVEN:
        - Advanced text processing is enabled (default)
    WHEN:
        - Classifier preprocesses a document's content
    THEN:
        - Processed content matches the expected output (stemmed words)
    """
    with (Path(__file__).parent / "samples" / "content.txt").open("r") as f:
        content = f.read()
    with (Path(__file__).parent / "samples" / "preprocessed_content_advanced.txt").open(
        "r",
    ) as f:
        expected_preprocess_content = f.read().rstrip()
    classifier = DocumentClassifier()
    result = classifier.preprocess_content(content)
    assert result == expected_preprocess_content
 def test_preprocess_content_nltk_disabled():
    """
    GIVEN:
        - Advanced text processing is disabled
    WHEN:
        - Classifier preprocesses a document's content
    THEN:
        - Processed content matches the expected output (unstemmed words)
    """
    with (Path(__file__).parent / "samples" / "content.txt").open("r") as f:
        content = f.read()
    with (Path(__file__).parent / "samples" / "preprocessed_content.txt").open(
        "r",
    ) as f:
        expected_preprocess_content = f.read().rstrip()
    classifier = DocumentClassifier()
    with mock.patch("documents.classifier.ADVANCED_TEXT_PROCESSING_ENABLED", new=False):
        result = classifier.preprocess_content(content)
    assert result == expected_preprocess_content
 def test_preprocess_content_nltk_load_fail(mocker):
    """
    GIVEN:
        - NLTK stop words fail to load
    WHEN:
        - Classifier preprocesses a document's content
    THEN:
        - Processed content matches the expected output (unstemmed words)
    """
    _module = mocker.MagicMock(name="nltk_corpus_mock")
    _module.stopwords.words.side_effect = AttributeError()
    mocker.patch.dict("sys.modules", {"nltk.corpus": _module})
    classifier = DocumentClassifier()
    with (Path(__file__).parent / "samples" / "content.txt").open("r") as f:
        content = f.read()
    with (Path(__file__).parent / "samples" / "preprocessed_content.txt").open(
        "r",
    ) as f:
        expected_preprocess_content = f.read().rstrip()
    result = classifier.preprocess_content(content)
    assert result == expected_preprocess_content
--- a/src/documents/tests/test_document_model.py
+++ b/src/documents/tests/test_document_model.py
@@ -41,9 +41,11 @@ class TestDocument(TestCase):
        Path(file_path).touch()
        Path(thumb_path).touch()
-        with mock.patch("documents.signals.handlers.Path.unlink") as mock_unlink:
+        with mock.patch("documents.signals.handlers.os.unlink") as mock_unlink:
            document.delete()
            empty_trash([document.pk])
            mock_unlink.assert_any_call(file_path)
            mock_unlink.assert_any_call(thumb_path)
            self.assertEqual(mock_unlink.call_count, 2)
    def test_document_soft_delete(self):
@@ -61,7 +63,7 @@ class TestDocument(TestCase):
        Path(file_path).touch()
        Path(thumb_path).touch()
-        with mock.patch("documents.signals.handlers.Path.unlink") as mock_unlink:
+        with mock.patch("documents.signals.handlers.os.unlink") as mock_unlink:
            document.delete()
            self.assertEqual(mock_unlink.call_count, 0)
--- a/src/documents/tests/test_file_handling.py
+++ b/src/documents/tests/test_file_handling.py
@@ -34,12 +34,12 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
        document.save()
-        self.assertEqual(generate_filename(document), Path(f"{document.pk:07d}.pdf"))
+        self.assertEqual(generate_filename(document), f"{document.pk:07d}.pdf")
        document.storage_type = Document.STORAGE_TYPE_GPG
        self.assertEqual(
            generate_filename(document),
-            Path(f"{document.pk:07d}.pdf.gpg"),
+            f"{document.pk:07d}.pdf.gpg",
        )
    @override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
@@ -58,12 +58,12 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        document.filename = generate_filename(document)
        # Ensure that filename is properly generated
-        self.assertEqual(document.filename, Path("none/none.pdf"))
+        self.assertEqual(document.filename, "none/none.pdf")
        # Enable encryption and check again
        document.storage_type = Document.STORAGE_TYPE_GPG
        document.filename = generate_filename(document)
-        self.assertEqual(document.filename, Path("none/none.pdf.gpg"))
+        self.assertEqual(document.filename, "none/none.pdf.gpg")
        document.save()
@@ -96,7 +96,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        # Ensure that filename is properly generated
        document.filename = generate_filename(document)
-        self.assertEqual(document.filename, Path("none/none.pdf"))
+        self.assertEqual(document.filename, "none/none.pdf")
        create_source_path_directory(document.source_path)
        document.source_path.touch()
@@ -137,7 +137,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        # Ensure that filename is properly generated
        document.filename = generate_filename(document)
-        self.assertEqual(document.filename, Path("none/none.pdf"))
+        self.assertEqual(document.filename, "none/none.pdf")
        create_source_path_directory(document.source_path)
        Path(document.source_path).touch()
@@ -247,7 +247,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        # Ensure that filename is properly generated
        document.filename = generate_filename(document)
-        self.assertEqual(document.filename, Path("none/none.pdf"))
+        self.assertEqual(document.filename, "none/none.pdf")
        create_source_path_directory(document.source_path)
@@ -269,11 +269,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        dt = DocumentType.objects.create(name="my_doc_type")
        d = Document.objects.create(title="the_doc", mime_type="application/pdf")
-        self.assertEqual(generate_filename(d), Path("none - the_doc.pdf"))
+        self.assertEqual(generate_filename(d), "none - the_doc.pdf")
        d.document_type = dt
-        self.assertEqual(generate_filename(d), Path("my_doc_type - the_doc.pdf"))
+        self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf")
    @override_settings(FILENAME_FORMAT="{asn} - {title}")
    def test_asn(self):
@@ -289,8 +289,8 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            archive_serial_number=None,
            checksum="B",
        )
-        self.assertEqual(generate_filename(d1), Path("652 - the_doc.pdf"))
+        self.assertEqual(generate_filename(d1), "652 - the_doc.pdf")
-        self.assertEqual(generate_filename(d2), Path("none - the_doc.pdf"))
+        self.assertEqual(generate_filename(d2), "none - the_doc.pdf")
    @override_settings(FILENAME_FORMAT="{title} {tag_list}")
    def test_tag_list(self):
@@ -298,7 +298,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        doc.tags.create(name="tag2")
        doc.tags.create(name="tag1")
-        self.assertEqual(generate_filename(doc), Path("doc1 tag1,tag2.pdf"))
+        self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf")
        doc = Document.objects.create(
            title="doc2",
@@ -306,7 +306,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            mime_type="application/pdf",
        )
-        self.assertEqual(generate_filename(doc), Path("doc2.pdf"))
+        self.assertEqual(generate_filename(doc), "doc2.pdf")
    @override_settings(FILENAME_FORMAT="//etc/something/{title}")
    def test_filename_relative(self):
@@ -330,11 +330,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            created=d1,
        )
-        self.assertEqual(generate_filename(doc1), Path("2020-03-06.pdf"))
+        self.assertEqual(generate_filename(doc1), "2020-03-06.pdf")
        doc1.created = datetime.date(2020, 11, 16)
-        self.assertEqual(generate_filename(doc1), Path("2020-11-16.pdf"))
+        self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
    @override_settings(
        FILENAME_FORMAT="{added_year}-{added_month}-{added_day}",
@@ -347,11 +347,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            added=d1,
        )
-        self.assertEqual(generate_filename(doc1), Path("232-01-09.pdf"))
+        self.assertEqual(generate_filename(doc1), "232-01-09.pdf")
        doc1.added = timezone.make_aware(datetime.datetime(2020, 11, 16, 1, 1, 1))
-        self.assertEqual(generate_filename(doc1), Path("2020-11-16.pdf"))
+        self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
    @override_settings(
        FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}",
@@ -389,11 +389,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        document.mime_type = "application/pdf"
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        self.assertEqual(generate_filename(document), Path("0000001.pdf"))
+        self.assertEqual(generate_filename(document), "0000001.pdf")
        document.pk = 13579
-        self.assertEqual(generate_filename(document), Path("0013579.pdf"))
+        self.assertEqual(generate_filename(document), "0013579.pdf")
    @override_settings(FILENAME_FORMAT=None)
    def test_format_none(self):
@@ -402,7 +402,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        document.mime_type = "application/pdf"
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        self.assertEqual(generate_filename(document), Path("0000001.pdf"))
+        self.assertEqual(generate_filename(document), "0000001.pdf")
    def test_try_delete_empty_directories(self):
        # Create our working directory
@@ -428,7 +428,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        document.mime_type = "application/pdf"
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        self.assertEqual(generate_filename(document), Path("0000001.pdf"))
+        self.assertEqual(generate_filename(document), "0000001.pdf")
    @override_settings(FILENAME_FORMAT="{created__year}")
    def test_invalid_format_key(self):
@@ -437,7 +437,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        document.mime_type = "application/pdf"
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        self.assertEqual(generate_filename(document), Path("0000001.pdf"))
+        self.assertEqual(generate_filename(document), "0000001.pdf")
    @override_settings(FILENAME_FORMAT="{title}")
    def test_duplicates(self):
@@ -564,7 +564,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            value_select="abc123",
        )
-        self.assertEqual(generate_filename(doc), Path("document_apple.pdf"))
+        self.assertEqual(generate_filename(doc), "document_apple.pdf")
        # handler should not have been called
        self.assertEqual(m.call_count, 0)
@@ -576,7 +576,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            ],
        }
        cf.save()
-        self.assertEqual(generate_filename(doc), Path("document_aubergine.pdf"))
+        self.assertEqual(generate_filename(doc), "document_aubergine.pdf")
        # handler should have been called
        self.assertEqual(m.call_count, 1)
@@ -897,7 +897,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
            pk=1,
            checksum="1",
        )
-        self.assertEqual(generate_filename(doc), Path("This. is the title.pdf"))
+        self.assertEqual(generate_filename(doc), "This. is the title.pdf")
        doc = Document.objects.create(
            title="my\\invalid/../title:yay",
@@ -905,7 +905,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
            pk=2,
            checksum="2",
        )
-        self.assertEqual(generate_filename(doc), Path("my-invalid-..-title-yay.pdf"))
+        self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf")
    @override_settings(FILENAME_FORMAT="{created}")
    def test_date(self):
@@ -916,7 +916,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
            pk=2,
            checksum="2",
        )
-        self.assertEqual(generate_filename(doc), Path("2020-05-21.pdf"))
+        self.assertEqual(generate_filename(doc), "2020-05-21.pdf")
    def test_dynamic_path(self):
        """
@@ -935,7 +935,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
            checksum="2",
            storage_path=StoragePath.objects.create(path="TestFolder/{{created}}"),
        )
-        self.assertEqual(generate_filename(doc), Path("TestFolder/2020-06-25.pdf"))
+        self.assertEqual(generate_filename(doc), "TestFolder/2020-06-25.pdf")
    def test_dynamic_path_with_none(self):
        """
@@ -956,7 +956,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
            checksum="2",
            storage_path=StoragePath.objects.create(path="{{asn}} - {{created}}"),
        )
-        self.assertEqual(generate_filename(doc), Path("none - 2020-06-25.pdf"))
+        self.assertEqual(generate_filename(doc), "none - 2020-06-25.pdf")
    @override_settings(
        FILENAME_FORMAT_REMOVE_NONE=True,
@@ -984,7 +984,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
            checksum="2",
            storage_path=sp,
        )
-        self.assertEqual(generate_filename(doc), Path("TestFolder/2020-06-25.pdf"))
+        self.assertEqual(generate_filename(doc), "TestFolder/2020-06-25.pdf")
        # Special case, undefined variable, then defined at the start of the template
        # This could lead to an absolute path after we remove the leading -none-, but leave the leading /
@@ -993,7 +993,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
            "{{ owner_username }}/{{ created_year }}/{{ correspondent }}/{{ title }}"
        )
        sp.save()
-        self.assertEqual(generate_filename(doc), Path("2020/does not matter.pdf"))
+        self.assertEqual(generate_filename(doc), "2020/does not matter.pdf")
    def test_multiple_doc_paths(self):
        """
@@ -1028,14 +1028,8 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
            ),
        )
-        self.assertEqual(
+        self.assertEqual(generate_filename(doc_a), "ThisIsAFolder/4/2020-06-25.pdf")
-            generate_filename(doc_a),
+        self.assertEqual(generate_filename(doc_b), "SomeImportantNone/2020-07-25.pdf")
            Path("ThisIsAFolder/4/2020-06-25.pdf"),
        )
        self.assertEqual(
            generate_filename(doc_b),
            Path("SomeImportantNone/2020-07-25.pdf"),
        )
    @override_settings(
        FILENAME_FORMAT=None,
@@ -1070,11 +1064,8 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
            ),
        )
-        self.assertEqual(generate_filename(doc_a), Path("0000002.pdf"))
+        self.assertEqual(generate_filename(doc_a), "0000002.pdf")
-        self.assertEqual(
+        self.assertEqual(generate_filename(doc_b), "SomeImportantNone/2020-07-25.pdf")
            generate_filename(doc_b),
            Path("SomeImportantNone/2020-07-25.pdf"),
        )
    @override_settings(
        FILENAME_FORMAT="{created_year_short}/{created_month_name_short}/{created_month_name}/{title}",
@@ -1087,7 +1078,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
            pk=2,
            checksum="2",
        )
-        self.assertEqual(generate_filename(doc), Path("89/Dec/December/The Title.pdf"))
+        self.assertEqual(generate_filename(doc), "89/Dec/December/The Title.pdf")
    @override_settings(
        FILENAME_FORMAT="{added_year_short}/{added_month_name}/{added_month_name_short}/{title}",
@@ -1100,7 +1091,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
            pk=2,
            checksum="2",
        )
-        self.assertEqual(generate_filename(doc), Path("84/August/Aug/The Title.pdf"))
+        self.assertEqual(generate_filename(doc), "84/August/Aug/The Title.pdf")
    @override_settings(
        FILENAME_FORMAT="{owner_username}/{title}",
@@ -1133,8 +1124,8 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
            checksum="3",
        )
-        self.assertEqual(generate_filename(owned_doc), Path("user1/The Title.pdf"))
+        self.assertEqual(generate_filename(owned_doc), "user1/The Title.pdf")
-        self.assertEqual(generate_filename(no_owner_doc), Path("none/does matter.pdf"))
+        self.assertEqual(generate_filename(no_owner_doc), "none/does matter.pdf")
    @override_settings(
        FILENAME_FORMAT="{original_name}",
@@ -1180,20 +1171,17 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
            original_filename="logs.txt",
        )
-        self.assertEqual(generate_filename(doc_with_original), Path("someepdf.pdf"))
+        self.assertEqual(generate_filename(doc_with_original), "someepdf.pdf")
        self.assertEqual(
            generate_filename(tricky_with_original),
-            Path("some pdf with spaces and stuff.pdf"),
+            "some pdf with spaces and stuff.pdf",
        )
-        self.assertEqual(generate_filename(no_original), Path("none.pdf"))
+        self.assertEqual(generate_filename(no_original), "none.pdf")
-        self.assertEqual(generate_filename(text_doc), Path("logs.txt"))
+        self.assertEqual(generate_filename(text_doc), "logs.txt")
-        self.assertEqual(
+        self.assertEqual(generate_filename(text_doc, archive_filename=True), "logs.pdf")
            generate_filename(text_doc, archive_filename=True),
            Path("logs.pdf"),
        )
    @override_settings(
        FILENAME_FORMAT="XX{correspondent}/{title}",
@@ -1218,7 +1206,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        # Ensure that filename is properly generated
        document.filename = generate_filename(document)
-        self.assertEqual(document.filename, Path("XX/doc1.pdf"))
+        self.assertEqual(document.filename, "XX/doc1.pdf")
    def test_complex_template_strings(self):
        """
@@ -1256,19 +1244,19 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        self.assertEqual(
            generate_filename(doc_a),
-            Path("somepath/some where/2020-06-25/Does Matter.pdf"),
+            "somepath/some where/2020-06-25/Does Matter.pdf",
        )
        doc_a.checksum = "5"
        self.assertEqual(
            generate_filename(doc_a),
-            Path("somepath/2024-10-01/Does Matter.pdf"),
+            "somepath/2024-10-01/Does Matter.pdf",
        )
        sp.path = "{{ document.title|lower }}{{ document.archive_serial_number - 2 }}"
        sp.save()
-        self.assertEqual(generate_filename(doc_a), Path("does matter23.pdf"))
+        self.assertEqual(generate_filename(doc_a), "does matter23.pdf")
        sp.path = """
                 somepath/
@@ -1287,13 +1275,13 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        sp.save()
        self.assertEqual(
            generate_filename(doc_a),
-            Path("somepath/asn-000-200/Does Matter/Does Matter.pdf"),
+            "somepath/asn-000-200/Does Matter/Does Matter.pdf",
        )
        doc_a.archive_serial_number = 301
        doc_a.save()
        self.assertEqual(
            generate_filename(doc_a),
-            Path("somepath/asn-201-400/asn-3xx/Does Matter.pdf"),
+            "somepath/asn-201-400/asn-3xx/Does Matter.pdf",
        )
    @override_settings(
@@ -1322,7 +1310,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        with self.assertLogs(level=logging.WARNING) as capture:
            self.assertEqual(
                generate_filename(doc_a),
-                Path("0000002.pdf"),
+                "0000002.pdf",
            )
            self.assertEqual(len(capture.output), 1)
@@ -1357,7 +1345,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        with self.assertLogs(level=logging.WARNING) as capture:
            self.assertEqual(
                generate_filename(doc_a),
-                Path("0000002.pdf"),
+                "0000002.pdf",
            )
            self.assertEqual(len(capture.output), 1)
@@ -1425,7 +1413,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        ):
            self.assertEqual(
                generate_filename(doc_a),
-                Path("invoices/1234.pdf"),
+                "invoices/1234.pdf",
            )
        with override_settings(
@@ -1439,7 +1427,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        ):
            self.assertEqual(
                generate_filename(doc_a),
-                Path("Some Title_ChoiceOne.pdf"),
+                "Some Title_ChoiceOne.pdf",
            )
            # Check for handling Nones well
@@ -1448,7 +1436,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
            self.assertEqual(
                generate_filename(doc_a),
-                Path("Some Title_Default Value.pdf"),
+                "Some Title_Default Value.pdf",
            )
        cf.name = "Invoice Number"
@@ -1461,7 +1449,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        ):
            self.assertEqual(
                generate_filename(doc_a),
-                Path("invoices/4567.pdf"),
+                "invoices/4567.pdf",
            )
        with override_settings(
@@ -1469,7 +1457,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        ):
            self.assertEqual(
                generate_filename(doc_a),
-                Path("invoices/0.pdf"),
+                "invoices/0.pdf",
            )
    def test_datetime_filter(self):
@@ -1508,7 +1496,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        ):
            self.assertEqual(
                generate_filename(doc_a),
-                Path("2020/Some Title.pdf"),
+                "2020/Some Title.pdf",
            )
        with override_settings(
@@ -1516,7 +1504,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        ):
            self.assertEqual(
                generate_filename(doc_a),
-                Path("2020-06-25/Some Title.pdf"),
+                "2020-06-25/Some Title.pdf",
            )
        with override_settings(
@@ -1524,7 +1512,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        ):
            self.assertEqual(
                generate_filename(doc_a),
-                Path("2024-10-01/Some Title.pdf"),
+                "2024-10-01/Some Title.pdf",
            )
    def test_slugify_filter(self):
@@ -1551,7 +1539,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        ):
            self.assertEqual(
                generate_filename(doc),
-                Path("some-title-with-special-characters.pdf"),
+                "some-title-with-special-characters.pdf",
            )
        # Test with correspondent name containing spaces and special chars
@@ -1565,7 +1553,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        ):
            self.assertEqual(
                generate_filename(doc),
-                Path("johns-office-workplace/some-title-with-special-characters.pdf"),
+                "johns-office-workplace/some-title-with-special-characters.pdf",
            )
        # Test with custom fields
@@ -1584,5 +1572,5 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        ):
            self.assertEqual(
                generate_filename(doc),
-                Path("brussels-belgium/some-title-with-special-characters.pdf"),
+                "brussels-belgium/some-title-with-special-characters.pdf",
            )
--- a/src/documents/tests/test_management_exporter.py
+++ b/src/documents/tests/test_management_exporter.py
@@ -209,7 +209,7 @@ class TestExportImport(
            4,
        )
-        self.assertIsFile(self.target / "manifest.json")
+        self.assertIsFile((self.target / "manifest.json").as_posix())
        self.assertEqual(
            self._get_document_from_manifest(manifest, self.d1.id)["fields"]["title"],
@@ -235,7 +235,9 @@ class TestExportImport(
                ).as_posix()
                self.assertIsFile(fname)
                self.assertIsFile(
-                    self.target / element[document_exporter.EXPORTER_THUMBNAIL_NAME],
+                    (
                        self.target / element[document_exporter.EXPORTER_THUMBNAIL_NAME]
                    ).as_posix(),
                )
                with Path(fname).open("rb") as f:
@@ -250,7 +252,7 @@ class TestExportImport(
                if document_exporter.EXPORTER_ARCHIVE_NAME in element:
                    fname = (
                        self.target / element[document_exporter.EXPORTER_ARCHIVE_NAME]
-                    )
+                    ).as_posix()
                    self.assertIsFile(fname)
                    with Path(fname).open("rb") as f:
@@ -310,7 +312,7 @@ class TestExportImport(
        )
        self._do_export()
-        self.assertIsFile(self.target / "manifest.json")
+        self.assertIsFile((self.target / "manifest.json").as_posix())
        st_mtime_1 = (self.target / "manifest.json").stat().st_mtime
@@ -320,7 +322,7 @@ class TestExportImport(
            self._do_export()
            m.assert_not_called()
-        self.assertIsFile(self.target / "manifest.json")
+        self.assertIsFile((self.target / "manifest.json").as_posix())
        st_mtime_2 = (self.target / "manifest.json").stat().st_mtime
        Path(self.d1.source_path).touch()
@@ -332,7 +334,7 @@ class TestExportImport(
            self.assertEqual(m.call_count, 1)
        st_mtime_3 = (self.target / "manifest.json").stat().st_mtime
-        self.assertIsFile(self.target / "manifest.json")
+        self.assertIsFile((self.target / "manifest.json").as_posix())
        self.assertNotEqual(st_mtime_1, st_mtime_2)
        self.assertNotEqual(st_mtime_2, st_mtime_3)
@@ -350,7 +352,7 @@ class TestExportImport(
        self._do_export()
-        self.assertIsFile(self.target / "manifest.json")
+        self.assertIsFile((self.target / "manifest.json").as_posix())
        with mock.patch(
            "documents.management.commands.document_exporter.copy_file_with_basic_stats",
@@ -358,7 +360,7 @@ class TestExportImport(
            self._do_export()
            m.assert_not_called()
-        self.assertIsFile(self.target / "manifest.json")
+        self.assertIsFile((self.target / "manifest.json").as_posix())
        self.d2.checksum = "asdfasdgf3"
        self.d2.save()
@@ -369,7 +371,7 @@ class TestExportImport(
            self._do_export(compare_checksums=True)
            self.assertEqual(m.call_count, 1)
-        self.assertIsFile(self.target / "manifest.json")
+        self.assertIsFile((self.target / "manifest.json").as_posix())
    def test_update_export_deleted_document(self):
        shutil.rmtree(Path(self.dirs.media_dir) / "documents")
@@ -383,7 +385,7 @@ class TestExportImport(
        self.assertTrue(len(manifest), 7)
        doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
        self.assertIsFile(
-            str(self.target / doc_from_manifest[EXPORTER_FILE_NAME]),
+            (self.target / doc_from_manifest[EXPORTER_FILE_NAME]).as_posix(),
        )
        self.d3.delete()
@@ -395,12 +397,12 @@ class TestExportImport(
            self.d3.id,
        )
        self.assertIsFile(
-            self.target / doc_from_manifest[EXPORTER_FILE_NAME],
+            (self.target / doc_from_manifest[EXPORTER_FILE_NAME]).as_posix(),
        )
        manifest = self._do_export(delete=True)
        self.assertIsNotFile(
-            self.target / doc_from_manifest[EXPORTER_FILE_NAME],
+            (self.target / doc_from_manifest[EXPORTER_FILE_NAME]).as_posix(),
        )
        self.assertTrue(len(manifest), 6)
@@ -414,20 +416,20 @@ class TestExportImport(
        )
        self._do_export(use_filename_format=True)
-        self.assertIsFile(self.target / "wow1" / "c.pdf")
+        self.assertIsFile((self.target / "wow1" / "c.pdf").as_posix())
-        self.assertIsFile(self.target / "manifest.json")
+        self.assertIsFile((self.target / "manifest.json").as_posix())
        self.d1.title = "new_title"
        self.d1.save()
        self._do_export(use_filename_format=True, delete=True)
-        self.assertIsNotFile(self.target / "wow1" / "c.pdf")
+        self.assertIsNotFile((self.target / "wow1" / "c.pdf").as_posix())
-        self.assertIsNotDir(self.target / "wow1")
+        self.assertIsNotDir((self.target / "wow1").as_posix())
-        self.assertIsFile(self.target / "new_title" / "c.pdf")
+        self.assertIsFile((self.target / "new_title" / "c.pdf").as_posix())
-        self.assertIsFile(self.target / "manifest.json")
+        self.assertIsFile((self.target / "manifest.json").as_posix())
-        self.assertIsFile(self.target / "wow2" / "none.pdf")
+        self.assertIsFile((self.target / "wow2" / "none.pdf").as_posix())
        self.assertIsFile(
-            self.target / "wow2" / "none_01.pdf",
+            (self.target / "wow2" / "none_01.pdf").as_posix(),
        )
    def test_export_missing_files(self):
--- a/src/documents/tests/test_migration_mime_type.py
+++ b/src/documents/tests/test_migration_mime_type.py
@@ -20,7 +20,7 @@ def source_path_before(self):
        if self.storage_type == STORAGE_TYPE_GPG:
            fname += ".gpg"
-    return Path(settings.ORIGINALS_DIR) / fname
+    return (Path(settings.ORIGINALS_DIR) / fname).as_posix()
 def file_type_after(self):
@@ -35,7 +35,7 @@ def source_path_after(doc):
        if doc.storage_type == STORAGE_TYPE_GPG:
            fname += ".gpg"  # pragma: no cover
-    return Path(settings.ORIGINALS_DIR) / fname
+    return (Path(settings.ORIGINALS_DIR) / fname).as_posix()
@override_settings(PASSPHRASE="test")
--- a/src/documents/tests/test_workflows.py
+++ b/src/documents/tests/test_workflows.py
@@ -1,10 +1,8 @@
 import shutil
 import socket
 from datetime import timedelta
 from typing import TYPE_CHECKING
 from unittest import mock
 import pytest
 from django.contrib.auth.models import Group
 from django.contrib.auth.models import User
 from django.test import override_settings
@@ -12,7 +10,6 @@ from django.utils import timezone
 from guardian.shortcuts import assign_perm
 from guardian.shortcuts import get_groups_with_perms
 from guardian.shortcuts import get_users_with_perms
 from httpx import HTTPError
 from httpx import HTTPStatusError
 from pytest_httpx import HTTPXMock
 from rest_framework.test import APITestCase
@@ -2828,8 +2825,6 @@ class TestWorkflows(
                content="Test message",
                headers={},
                files=None,
                follow_redirects=False,
                timeout=5,
            )
            expected_str = "Webhook sent to http://paperless-ngx.com"
@@ -2847,8 +2842,6 @@ class TestWorkflows(
                data={"message": "Test message"},
                headers={},
                files=None,
                follow_redirects=False,
                timeout=5,
            )
    @mock.patch("httpx.post")
@@ -2969,164 +2962,3 @@ class TestWebhookSend:
            as_json=True,
        )
        assert httpx_mock.get_request().headers["Content-Type"] == "application/json"
@pytest.fixture
 def resolve_to(monkeypatch):
    """
    Force DNS resolution to a specific IP for any hostname.
    """
    def _set(ip: str):
        def fake_getaddrinfo(host, *_args, **_kwargs):
            return [(socket.AF_INET, None, None, "", (ip, 0))]
        monkeypatch.setattr(socket, "getaddrinfo", fake_getaddrinfo)
    return _set
 class TestWebhookSecurity:
    def test_blocks_invalid_scheme_or_hostname(self, httpx_mock: HTTPXMock):
        """
        GIVEN:
            - Invalid URL schemes or hostnames
        WHEN:
            - send_webhook is called with such URLs
        THEN:
            - ValueError is raised
        """
        with pytest.raises(ValueError):
            send_webhook(
                "ftp://example.com",
                data="",
                headers={},
                files=None,
                as_json=False,
            )
        with pytest.raises(ValueError):
            send_webhook(
                "http:///nohost",
                data="",
                headers={},
                files=None,
                as_json=False,
            )
    @override_settings(WEBHOOKS_ALLOWED_PORTS=[80, 443])
    def test_blocks_disallowed_port(self, httpx_mock: HTTPXMock):
        """
        GIVEN:
            - URL with a disallowed port
        WHEN:
            - send_webhook is called with such URL
        THEN:
            - ValueError is raised
        """
        with pytest.raises(ValueError):
            send_webhook(
                "http://paperless-ngx.com:8080",
                data="",
                headers={},
                files=None,
                as_json=False,
            )
        assert httpx_mock.get_request() is None
    @override_settings(WEBHOOKS_ALLOW_INTERNAL_REQUESTS=False)
    def test_blocks_private_loopback_linklocal(self, httpx_mock: HTTPXMock, resolve_to):
        """
        GIVEN:
            - URL with a private, loopback, or link-local IP address
            - WEBHOOKS_ALLOW_INTERNAL_REQUESTS is False
        WHEN:
            - send_webhook is called with such URL
        THEN:
            - ValueError is raised
        """
        resolve_to("127.0.0.1")
        with pytest.raises(ValueError):
            send_webhook(
                "http://paperless-ngx.com",
                data="",
                headers={},
                files=None,
                as_json=False,
            )
    def test_allows_public_ip_and_sends(self, httpx_mock: HTTPXMock, resolve_to):
        """
        GIVEN:
            - URL with a public IP address
        WHEN:
            - send_webhook is called with such URL
        THEN:
            - Request is sent successfully
        """
        resolve_to("52.207.186.75")
        httpx_mock.add_response(content=b"ok")
        send_webhook(
            url="http://paperless-ngx.com",
            data="hi",
            headers={},
            files=None,
            as_json=False,
        )
        req = httpx_mock.get_request()
        assert req.url.host == "paperless-ngx.com"
    def test_follow_redirects_disabled(self, httpx_mock: HTTPXMock, resolve_to):
        """
        GIVEN:
            - A URL that redirects
        WHEN:
            - send_webhook is called with follow_redirects=False
        THEN:
            - Request is made to the original URL and does not follow the redirect
        """
        resolve_to("52.207.186.75")
        # Return a redirect and ensure we don't follow it (only one request recorded)
        httpx_mock.add_response(
            status_code=302,
            headers={"location": "http://internal-service.local"},
            content=b"",
        )
        with pytest.raises(HTTPError):
            send_webhook(
                "http://paperless-ngx.com",
                data="",
                headers={},
                files=None,
                as_json=False,
            )
        assert len(httpx_mock.get_requests()) == 1
    def test_strips_user_supplied_host_header(self, httpx_mock: HTTPXMock, resolve_to):
        """
        GIVEN:
            - A URL with a user-supplied Host header
        WHEN:
            - send_webhook is called with a malicious Host header
        THEN:
            - The Host header is stripped and replaced with the resolved hostname
        """
        resolve_to("52.207.186.75")
        httpx_mock.add_response(content=b"ok")
        send_webhook(
            url="http://paperless-ngx.com",
            data="ok",
            headers={"Host": "evil.test"},
            files=None,
            as_json=False,
        )
        req = httpx_mock.get_request()
        assert req.headers["Host"] == "paperless-ngx.com"
        assert "evil.test" not in req.headers.get("Host", "")
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -1321,7 +1321,6 @@ class BulkEditView(PassUserMixin):
        "delete_pages": "checksum",
        "split": None,
        "merge": None,
        "edit_pdf": "checksum",
        "reprocess": "checksum",
    }
@@ -1340,7 +1339,6 @@ class BulkEditView(PassUserMixin):
        if method in [
            bulk_edit.split,
            bulk_edit.merge,
            bulk_edit.edit_pdf,
        ]:
            parameters["user"] = user
@@ -1360,7 +1358,6 @@ class BulkEditView(PassUserMixin):
            # check ownership for methods that change original document
            if (
                (
                has_perms
                and method
                in [
@@ -1368,28 +1365,20 @@ class BulkEditView(PassUserMixin):
                    bulk_edit.delete,
                    bulk_edit.rotate,
                    bulk_edit.delete_pages,
                        bulk_edit.edit_pdf,
                ]
-                )
+            ) or (
                or (
                method in [bulk_edit.merge, bulk_edit.split]
                and parameters["delete_originals"]
                )
                or (method == bulk_edit.edit_pdf and parameters["update_document"])
            ):
                has_perms = user_is_owner_of_all_documents
            # check global add permissions for methods that create documents
            if (
                has_perms
-                and (
+                and method in [bulk_edit.split, bulk_edit.merge]
-                    method in [bulk_edit.split, bulk_edit.merge]
+                and not user.has_perm(
-                    or (
+                    "documents.add_document",
                        method == bulk_edit.edit_pdf
                        and not parameters["update_document"]
                )
                )
                and not user.has_perm("documents.add_document")
            ):
                has_perms = False
@@ -1427,6 +1416,7 @@ class BulkEditView(PassUserMixin):
                    )
                }
            # TODO: parameter validation
            result = method(documents, **parameters)
            if settings.AUDIT_LOG_ENABLED and modified_field:
--- a/src/locale/en_US/LC_MESSAGES/django.po
+++ b/src/locale/en_US/LC_MESSAGES/django.po
@@ -2,7 +2,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2025-08-11 17:31+0000\n"
+"POT-Creation-Date: 2025-08-02 12:55+0000\n"
 "PO-Revision-Date: 2022-02-17 04:17\n"
 "Last-Translator: \n"
 "Language-Team: English\n"
@@ -1185,12 +1185,12 @@ msgstr ""
 msgid "Invalid color."
 msgstr ""
-#: documents/serialisers.py:1700
+#: documents/serialisers.py:1649
 #, python-format
 msgid "File type %(type)s not supported"
 msgstr ""
-#: documents/serialisers.py:1794
+#: documents/serialisers.py:1743
 msgid "Invalid variable detected."
 msgstr ""
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -1421,25 +1421,3 @@ OUTLOOK_OAUTH_ENABLED = bool(
    and OUTLOOK_OAUTH_CLIENT_ID
    and OUTLOOK_OAUTH_CLIENT_SECRET,
 )
 ###############################################################################
 # Webhooks
 ###############################################################################
 WEBHOOKS_ALLOWED_SCHEMES = set(
    s.lower()
    for s in __get_list(
        "PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES",
        ["http", "https"],
    )
 )
 WEBHOOKS_ALLOWED_PORTS = set(
    int(p)
    for p in __get_list(
        "PAPERLESS_WEBHOOKS_ALLOWED_PORTS",
        [],
    )
 )
 WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
    "PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
    "true",
 )
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.10"
 resolution-markers = [
    "sys_platform == 'darwin'",
@@ -312,15 +312,15 @@ wheels = [
 [[package]]
 name = "channels"
-version = "4.3.1"
+version = "4.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "asgiref", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "django", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/12/a0/46450fcf9e56af18a6b0440ba49db6635419bb7bc84142c35f4143b1a66c/channels-4.3.1.tar.gz", hash = "sha256:97413ffd674542db08e16a9ef09cd86ec0113e5f8125fbd33cf0854adcf27cdb", size = 26896, upload-time = "2025-08-01T13:25:19.952Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/04/6768c7a887f9c593c4d49f99130c8aec4ea06e750bc17c306b689f6caf3b/channels-4.3.0.tar.gz", hash = "sha256:7db32c61dcd88eada1647e6c6f6ad2eb724b75d4852eeff26ad1c51ccd1a37f7", size = 26816, upload-time = "2025-07-28T13:52:50.334Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/89/1c/eae1c2a8c195760376e7f65d0bdcc3e966695d29cfbe5c54841ce5c71408/channels-4.3.1-py3-none-any.whl", hash = "sha256:b091d4b26f91d807de3e84aead7ba785314f27eaf5bac31dd51b1c956b883859", size = 31286, upload-time = "2025-08-01T13:25:18.845Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/59/0866202ee593e1b0dab0b472ebb8169e1b2b7886ad3008d193da2bbe10cb/channels-4.3.0-py3-none-any.whl", hash = "sha256:0497f3affb95e621b37d6bae1b6a5d9e8e1e1221007a2566f280091cf30ffcce", size = 31238, upload-time = "2025-07-28T13:52:49.117Z" },
 ]
 [[package]]
@@ -1946,7 +1946,6 @@ dependencies = [
    { name = "ocrmypdf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pathvalidate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pdf2image", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "psycopg-pool", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "python-dotenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "python-gnupg", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -1976,7 +1975,7 @@ postgres = [
    { name = "psycopg-c", version = "3.2.9", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version != '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version != '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
    { name = "psycopg-c", version = "3.2.9", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_aarch64.whl" }, marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'" },
    { name = "psycopg-c", version = "3.2.9", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl" }, marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "psycopg-pool", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "psycopg-pool", version = "3.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or sys_platform == 'darwin'" },
 ]
 webserver = [
    { name = "granian", extra = ["uvloop"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2085,8 +2084,7 @@ requires-dist = [
    { name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'postgres'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_aarch64.whl" },
    { name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'postgres'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl" },
    { name = "psycopg-c", marker = "(python_full_version != '3.12.*' and platform_machine == 'aarch64' and extra == 'postgres') or (python_full_version != '3.12.*' and platform_machine == 'x86_64' and extra == 'postgres') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'postgres') or (sys_platform != 'linux' and extra == 'postgres')", specifier = "==3.2.9" },
-    { name = "psycopg-pool" },
+    { name = "psycopg-pool", marker = "extra == 'postgres'" },
    { name = "psycopg-pool", marker = "extra == 'postgres'", specifier = "==3.2.6" },
    { name = "python-dateutil", specifier = "~=2.9.0" },
    { name = "python-dotenv", specifier = "~=1.1.0" },
    { name = "python-gnupg", specifier = "~=0.5.4" },
@@ -2097,7 +2095,7 @@ requires-dist = [
    { name = "redis", extras = ["hiredis"], specifier = "~=5.2.1" },
    { name = "scikit-learn", specifier = "~=1.7.0" },
    { name = "setproctitle", specifier = "~=1.3.4" },
-    { name = "tika-client", specifier = "~=0.10.0" },
+    { name = "tika-client", specifier = "~=0.9.0" },
    { name = "tqdm", specifier = "~=4.67.1" },
    { name = "watchdog", specifier = "~=6.0" },
    { name = "whitenoise", specifier = "~=6.9" },
@@ -2438,7 +2436,7 @@ c = [
    { name = "psycopg-c", version = "3.2.9", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl" }, marker = "python_full_version == '3.12.*' and implementation_name != 'pypy' and platform_machine == 'x86_64' and sys_platform == 'linux'" },
 ]
 pool = [
-    { name = "psycopg-pool", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "psycopg-pool", version = "3.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or sys_platform == 'darwin'" },
 ]
 [[package]]
@@ -2477,14 +2475,12 @@ wheels = [
 name = "psycopg-pool"
 version = "3.2.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/cf/13/1e7850bb2c69a63267c3dbf37387d3f71a00fd0e2fa55c5db14d64ba1af4/psycopg_pool-3.2.6.tar.gz", hash = "sha256:0f92a7817719517212fbfe2fd58b8c35c1850cdd2a80d36b581ba2085d9148e5", size = 29770, upload-time = "2025-02-26T12:03:47.129Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/47/fd/4feb52a55c1a4bd748f2acaed1903ab54a723c47f6d0242780f4d97104d4/psycopg_pool-3.2.6-py3-none-any.whl", hash = "sha256:5887318a9f6af906d041a0b1dc1c60f8f0dda8340c2572b74e10907b51ed5da7", size = 38252, upload-time = "2025-02-26T12:03:45.073Z" },
 ]
 [[package]]
 name = "pyasn1"
 version = "0.6.1"
@@ -2708,11 +2704,11 @@ wheels = [
 [[package]]
 name = "python-gnupg"
-version = "0.5.5"
+version = "0.5.4"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/42/d0/72a14a79f26c6119b281f6ccc475a787432ef155560278e60df97ce68a86/python-gnupg-0.5.5.tar.gz", hash = "sha256:3fdcaf76f60a1b948ff8e37dc398d03cf9ce7427065d583082b92da7a4ff5a63", size = 66467, upload-time = "2025-08-04T19:26:55.778Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f1/3e/ba0dc69c9f4e0aeb24d93175230ef057c151790a7516012f61014918992d/python-gnupg-0.5.4.tar.gz", hash = "sha256:f2fdb5fb29615c77c2743e1cb3d9314353a6e87b10c37d238d91ae1c6feae086", size = 65705, upload-time = "2025-01-07T11:58:34.073Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/aa/19/c147f78cc18c8788f54d4a16a22f6c05deba85ead5672d3ddf6dcba5a5fe/python_gnupg-0.5.5-py2.py3-none-any.whl", hash = "sha256:51fa7b8831ff0914bc73d74c59b99c613de7247b91294323c39733bb85ac3fc1", size = 21916, upload-time = "2025-08-04T19:26:54.307Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/5b/6666ed5a0d3ce4d5444af62e373d5ba8ab253a03487c86f2f9f1078e7c31/python_gnupg-0.5.4-py2.py3-none-any.whl", hash = "sha256:40ce25cde9df29af91fe931ce9df3ce544e14a37f62b13ca878c897217b2de6c", size = 21730, upload-time = "2025-01-07T11:58:32.249Z" },
 ]
 [[package]]
@@ -3358,16 +3354,16 @@ wheels = [
 [[package]]
 name = "tika-client"
-version = "0.10.0"
+version = "0.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "typing-extensions", marker = "(python_full_version < '3.11' and sys_platform == 'darwin') or (python_full_version < '3.11' and sys_platform == 'linux')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/21/be/65bfc47e4689ecd5ead20cf47dc0084fd767b7e71e8cfabf5fddc42aae3c/tika_client-0.10.0.tar.gz", hash = "sha256:3101e8b2482ae4cb7f87be13ada970ff691bdc3404d94cd52f5e57a09c99370c", size = 2178257, upload-time = "2025-08-04T17:47:30.414Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/ad/3508e42b470a037b3f5c19ca9993893d0faa30ba7ec7e6ac33db9bc3bf51/tika_client-0.9.0.tar.gz", hash = "sha256:c10bba8e40ede23c039f84ccd821fb2d290d339cc26cbd267ab9b561a1e83659", size = 2175246, upload-time = "2025-01-15T18:46:23.901Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b1/31/002e0fa5bca67d6a19da8c294273486f6c46cbcc83d6879719a38a181461/tika_client-0.10.0-py3-none-any.whl", hash = "sha256:f5486cc884e4522575662aa295bda761bf9f101ac8d92840155b58ab8b96f6e2", size = 18237, upload-time = "2025-08-04T17:47:28.966Z" },
+    { url = "https://files.pythonhosted.org/packages/36/8c/90ba51e014fb548ee34dd5ed14e85ec4a205ff97b89ca393e4de321304ac/tika_client-0.9.0-py3-none-any.whl", hash = "sha256:2464e8335b5e92c276641c729e7707f1e894a2bfb51cc59abdd3bdfb532da8a0", size = 17963, upload-time = "2025-01-15T18:46:21.143Z" },
 ]
 [[package]]
		`@@ -1 +0,0 @@`
			sample textual document content include as many characters as possible to check the classifier s vectorization hey 00 this is a test0707 content this is an example document created on 2025 06 25 digits 0123456789 punctuation english text the quick brown fox jumps over the lazy dog english stop words we ve been doing it before accented latin diacritics àâäæçéèêëîïôœùûüÿñ arabic لقد قام المترجم بعمل جيد greek αλφα βήτα γάμμα δέλτα ωμέγα cyrillic привет как дела добро пожаловать chinese simplified 你好世界今天的天气很好 chinese traditional 歡迎來到世界今天天氣很好 japanese kanji hiragana katakana 東京へ行きますカタカナひらがな漢字 korean hangul 안녕하세요 오늘 날씨 어때요 arabic مرحب ا كيف حالك hebrew שלום מה שלומך emoji symbols µ math ₀ x² dx π 3 14159 e ρ ε₀ currency 1 date formats 25 06 2025 june 25 2025 2025年6月25日 quote in french bonjour ça va quote in german guten tag wie geht s newline test r n r tab ttest tspacing 192 33601010101 end of document
		`@@ -1 +0,0 @@`
			sampl textual document content includ mani charact possibl check classifi vector hey 00 test0707 content exampl document creat 2025 06 25 digit 0123456789 punctuat english text quick brown fox jump lazi dog english stop word accent latin diacrit àâäæçéèêëîïôœùûüÿñ arab لقد قام المترجم بعمل جيد greek αλφα βήτα γάμμα δέλτα ωμέγα cyril привет как дела добро пожаловать chines simplifi 你好世界今天的天气很好 chines tradit 歡迎來到世界今天天氣很好 japanes kanji hiragana katakana 東京へ行きますカタカナひらがな漢字 korean hangul 안녕하세요 오늘 날씨 어때요 arab مرحب ا كيف حالك hebrew שלום מה שלומך emoji symbol µ math ₀ x² dx π 3 14159 e ρ ε₀ currenc 1 date format 25 06 2025 june 25 2025 2025年6月25日 quot french bonjour ça va quot german guten tag wie geht newlin test r n r tab ttest tspace 192 33601010101 end document