Compare commits

..

1 Commits

Author SHA1 Message Date
shamoon
1b6a9d3816 Add info buttons for core metadata items 2025-08-04 23:45:50 -04:00
53 changed files with 1281 additions and 2574 deletions

View File

@@ -15,7 +15,6 @@ env:
DEFAULT_UV_VERSION: "0.8.x" DEFAULT_UV_VERSION: "0.8.x"
# This is the default version of Python to use in most steps which aren't specific # This is the default version of Python to use in most steps which aren't specific
DEFAULT_PYTHON_VERSION: "3.11" DEFAULT_PYTHON_VERSION: "3.11"
NLTK_DATA: "/usr/share/nltk_data"
jobs: jobs:
pre-commit: pre-commit:
# We want to run on external PRs, but not on our own internal PRs as they'll be run # We want to run on external PRs, but not on our own internal PRs as they'll be run
@@ -122,11 +121,8 @@ jobs:
- name: List installed Python dependencies - name: List installed Python dependencies
run: | run: |
uv pip list uv pip list
- name: Install or update NLTK dependencies
run: uv run python -m nltk.downloader punkt punkt_tab snowball_data stopwords -d ${{ env.NLTK_DATA }}
- name: Tests - name: Tests
env: env:
NLTK_DATA: ${{ env.NLTK_DATA }}
PAPERLESS_CI_TEST: 1 PAPERLESS_CI_TEST: 1
# Enable paperless_mail testing against real server # Enable paperless_mail testing against real server
PAPERLESS_MAIL_TEST_HOST: ${{ secrets.TEST_MAIL_HOST }} PAPERLESS_MAIL_TEST_HOST: ${{ secrets.TEST_MAIL_HOST }}

View File

@@ -31,7 +31,7 @@ repos:
rev: v2.4.1 rev: v2.4.1
hooks: hooks:
- id: codespell - id: codespell
exclude: "(^src-ui/src/locale/)|(^src-ui/pnpm-lock.yaml)|(^src-ui/e2e/)|(^src/paperless_mail/tests/samples/)|(^src/documents/tests/samples/)" exclude: "(^src-ui/src/locale/)|(^src-ui/pnpm-lock.yaml)|(^src-ui/e2e/)|(^src/paperless_mail/tests/samples/)"
exclude_types: exclude_types:
- pofile - pofile
- json - json

View File

@@ -5,7 +5,7 @@
# Purpose: Compiles the frontend # Purpose: Compiles the frontend
# Notes: # Notes:
# - Does PNPM stuff with Typescript and such # - Does PNPM stuff with Typescript and such
FROM --platform=$BUILDPLATFORM docker.io/node:20-trixie-slim AS compile-frontend FROM --platform=$BUILDPLATFORM docker.io/node:20-bookworm-slim AS compile-frontend
COPY ./src-ui /src/src-ui COPY ./src-ui /src/src-ui
@@ -32,7 +32,7 @@ RUN set -eux \
# Purpose: Installs s6-overlay and rootfs # Purpose: Installs s6-overlay and rootfs
# Comments: # Comments:
# - Don't leave anything extra in here either # - Don't leave anything extra in here either
FROM ghcr.io/astral-sh/uv:0.8.4-python3.12-trixie-slim AS s6-overlay-base FROM ghcr.io/astral-sh/uv:0.8.4-python3.12-bookworm-slim AS s6-overlay-base
WORKDIR /usr/src/s6 WORKDIR /usr/src/s6
@@ -170,8 +170,20 @@ RUN set -eux \
&& apt-get update \ && apt-get update \
&& apt-get install --yes --quiet --no-install-recommends ${RUNTIME_PACKAGES} \ && apt-get install --yes --quiet --no-install-recommends ${RUNTIME_PACKAGES} \
&& echo "Installing pre-built updates" \ && echo "Installing pre-built updates" \
&& curl --fail --silent --no-progress-meter --show-error --location --remote-name-all \ && curl --fail --silent --no-progress-meter --show-error --location --remote-name-all --parallel --parallel-max 4 \
https://github.com/paperless-ngx/builder/releases/download/qpdf-${QPDF_VERSION}/libqpdf29_${QPDF_VERSION}-1_${TARGETARCH}.deb \
https://github.com/paperless-ngx/builder/releases/download/qpdf-${QPDF_VERSION}/qpdf_${QPDF_VERSION}-1_${TARGETARCH}.deb \
https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/libgs10_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/ghostscript_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/libgs10-common_${GS_VERSION}.dfsg-1_all.deb \
https://github.com/paperless-ngx/builder/releases/download/jbig2enc-${JBIG2ENC_VERSION}/jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \ https://github.com/paperless-ngx/builder/releases/download/jbig2enc-${JBIG2ENC_VERSION}/jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \
&& echo "Installing qpdf ${QPDF_VERSION}" \
&& dpkg --install ./libqpdf29_${QPDF_VERSION}-1_${TARGETARCH}.deb \
&& dpkg --install ./qpdf_${QPDF_VERSION}-1_${TARGETARCH}.deb \
&& echo "Installing Ghostscript ${GS_VERSION}" \
&& dpkg --install ./libgs10-common_${GS_VERSION}.dfsg-1_all.deb \
&& dpkg --install ./libgs10_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
&& dpkg --install ./ghostscript_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
&& echo "Installing jbig2enc" \ && echo "Installing jbig2enc" \
&& dpkg --install ./jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \ && dpkg --install ./jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \
&& echo "Configuring imagemagick" \ && echo "Configuring imagemagick" \

319
dev.txt
View File

@@ -1,319 +0,0 @@
adduser 3.134
apt 2.6.1
base-files 12.4+deb12u11
base-passwd 3.6.1
bash 5.2.15-2+b8
bsdutils 1:2.38.1-5+deb12u3
ca-certificates 20230311+deb12u1
coreutils 9.1-1
curl 7.88.1-10+deb12u12
dash 0.5.12-2
debconf 1.5.82
debian-archive-keyring 2023.3+deb12u2
debianutils 5.7-0.5~deb12u1
diffutils 1:3.8-4
dirmngr 2.2.40-1.1
dpkg 1.21.22
e2fsprogs 1.47.0-2
file 1:5.44-3
findutils 4.9.0-4
fontconfig 2.14.1-4
fontconfig-config 2.14.1-4
fonts-liberation 1:1.07.4-11
fonts-urw-base35 20200910-7
gcc-12-base 12.2.0-14+deb12u1
gettext 0.21-12
gettext-base 0.21-12
ghostscript 10.03.1~dfsg-1
gnupg 2.2.40-1.1
gnupg-l10n 2.2.40-1.1
gnupg-utils 2.2.40-1.1
gosu 1.14-1+b10
gpg 2.2.40-1.1
gpg-agent 2.2.40-1.1
gpg-wks-client 2.2.40-1.1
gpg-wks-server 2.2.40-1.1
gpgconf 2.2.40-1.1
gpgsm 2.2.40-1.1
gpgv 2.2.40-1.1
grep 3.8-5
gzip 1.12-1
hicolor-icon-theme 0.17-2
hostname 3.23+nmu1
icc-profiles-free 2.0.1+dfsg-1.1
imagemagick 8:6.9.11.60+dfsg-1.6+deb12u3
imagemagick-6-common 8:6.9.11.60+dfsg-1.6+deb12u3
imagemagick-6.q16 8:6.9.11.60+dfsg-1.6+deb12u3
init-system-helpers 1.65.2
jbig2dec 0.19-3
jbig2enc 0.30-1
libacl1 2.3.1-3
libaom3 3.6.0-1+deb12u1
libapt-pkg6.0 2.6.1
libarchive13 3.6.2-1+deb12u2
libassuan0 2.5.5-5
libattr1 1:2.5.1-4
libaudit-common 1:3.0.9-1
libaudit1 1:3.0.9-1
libavahi-client3 0.8-10+deb12u1
libavahi-common-data 0.8-10+deb12u1
libavahi-common3 0.8-10+deb12u1
libavcodec59 7:5.1.6-0+deb12u1
libavformat59 7:5.1.6-0+deb12u1
libavutil57 7:5.1.6-0+deb12u1
libblkid1 2.38.1-5+deb12u3
libbluray2 1:1.3.4-1
libbrotli1 1.0.9-2+b6
libbsd0 0.11.7-2
libbz2-1.0 1.0.8-5+b1
libc-bin 2.36-9+deb12u10
libc6 2.36-9+deb12u10
libcairo-gobject2 1.16.0-7
libcairo2 1.16.0-7
libcap-ng0 0.8.3-1+b3
libcap2 1:2.66-4+deb12u1
libchromaprint1 1.5.1-2+b1
libcjson1 1.7.15-1+deb12u2
libcodec2-1.0 1.0.5-1
libcom-err2 1.47.0-2
libconfig-inifiles-perl 3.000003-2
libcrypt1 1:4.4.33-2
libcups2 2.4.2-3+deb12u8
libcurl4 7.88.1-10+deb12u12
libdatrie1 0.2.13-2+b1
libdav1d6 1.0.0-2+deb12u1
libdb5.3 5.3.28+dfsg2-1
libdbus-1-3 1.14.10-1~deb12u1
libde265-0 1.0.11-1+deb12u2
libdebconfclient0 0.270
libdeflate0 1.14-1
libdrm-common 2.4.114-1
libdrm2 2.4.114-1+b1
libedit2 3.1-20221030-2
libexpat1 2.5.0-1+deb12u1
libext2fs2 1.47.0-2
libffi8 3.4.4-1
libfftw3-double3 3.3.10-1
libfontconfig1 2.14.1-4
libfontenc1 1:1.1.4-1
libfreetype6 2.12.1+dfsg-5+deb12u4
libfribidi0 1.0.8-2.1
libgcc-s1 12.2.0-14+deb12u1
libgcrypt20 1.10.1-3
libgdbm-compat4 1.23-3
libgdbm6 1.23-3
libgdk-pixbuf-2.0-0 2.42.10+dfsg-1+deb12u2
libgdk-pixbuf2.0-common 2.42.10+dfsg-1+deb12u2
libgif7 5.2.1-2.5
libglib2.0-0 2.74.6-2+deb12u6
libgme0 0.6.3-6
libgmp10 2:6.2.1+dfsg1-1.1
libgnutls30 3.7.9-2+deb12u5
libgomp1 12.2.0-14+deb12u1
libgpg-error0 1.46-1
libgraphite2-3 1.3.14-1
libgs-common 10.0.0~dfsg-11+deb12u7
libgs10 10.03.1~dfsg-1
libgs10-common 10.03.1~dfsg-1
libgsm1 1.0.22-1
libgssapi-krb5-2 1.20.1-2+deb12u3
libharfbuzz0b 6.0.0+dfsg-3
libheif1 1.15.1-1+deb12u1
libhogweed6 3.8.1-2
libhwy1 1.0.3-3+deb12u1
libice6 2:1.0.10-1
libicu72 72.1-3+deb12u1
libidn12 1.41-1
libidn2-0 2.3.3-1+b1
libijs-0.35 0.35-15
libimagequant0 2.17.0-1
libjbig0 2.1-6.1
libjbig2dec0 0.19-3
libjpeg62-turbo 1:2.1.5-2
libjxl0.7 0.7.0-10+deb12u1
libk5crypto3 1.20.1-2+deb12u3
libkeyutils1 1.6.3-2
libkrb5-3 1.20.1-2+deb12u3
libkrb5support0 1.20.1-2+deb12u3
libksba8 1.6.3-2
liblcms2-2 2.14-2
libldap-2.5-0 2.5.13+dfsg-5
liblept5 1.82.0-3+b3
liblerc4 4.0.0+ds-2
liblqr-1-0 0.4.2-2.1
libltdl7 2.4.7-7~deb12u1
liblz4-1 1.9.4-1
liblzma5 5.4.1-1
libmagic-mgc 1:5.44-3
libmagic1 1:5.44-3
libmagickcore-6.q16-6 8:6.9.11.60+dfsg-1.6+deb12u3
libmagickwand-6.q16-6 8:6.9.11.60+dfsg-1.6+deb12u3
libmariadb3 1:10.11.11-0+deb12u1
libmbedcrypto7 2.28.3-1
libmd0 1.0.4-2
libmfx1 22.5.4-1
libmount1 2.38.1-5+deb12u3
libmp3lame0 3.100-6
libmpg123-0 1.31.2-1+deb12u1
libncurses6 6.4-4
libncursesw6 6.4-4
libnettle8 3.8.1-2
libnghttp2-14 1.52.0-1+deb12u2
libnorm1 1.5.9+dfsg-2
libnpth0 1.6-3
libnsl2 1.3.0-2
libnspr4 2:4.35-1
libnss3 2:3.87.1-1+deb12u1
libnuma1 2.0.16-1
libogg0 1.3.5-3
libopenjp2-7 2.5.0-2+deb12u1
libopenmpt0 0.6.9-1
libopus0 1.3.1-3
libp11-kit0 0.24.1-2
libpam-modules 1.5.2-6+deb12u1
libpam-modules-bin 1.5.2-6+deb12u1
libpam-runtime 1.5.2-6+deb12u1
libpam0g 1.5.2-6+deb12u1
libpango-1.0-0 1.50.12+ds-1
libpangocairo-1.0-0 1.50.12+ds-1
libpangoft2-1.0-0 1.50.12+ds-1
libpaper1 1.1.29
libpcre2-8-0 10.42-1
libperl5.36 5.36.0-7+deb12u2
libpgm-5.3-0 5.3.128~dfsg-2
libpixman-1-0 0.42.2-1
libpng16-16 1.6.39-2
libpoppler126 22.12.0-2+deb12u1
libpq5 15.13-0+deb12u1
libpsl5 0.21.2-1
libqpdf29 11.9.0-1
librabbitmq4 0.11.0-1+deb12u1
librav1e0 0.5.1-6
libreadline8 8.2-1.3
librist4 0.2.7+dfsg-1
librsvg2-2 2.54.7+dfsg-1~deb12u1
librtmp1 2.4+20151223.gitfa8646d.1-2+b2
libsasl2-2 2.1.28+dfsg-10
libsasl2-modules-db 2.1.28+dfsg-10
libseccomp2 2.5.4-1+deb12u1
libselinux1 3.4-1+b6
libsemanage-common 3.4-1
libsemanage2 3.4-1+b5
libsepol2 3.4-2.1
libshine3 3.1.1-2
libsm6 2:1.2.3-1
libsmartcols1 2.38.1-5+deb12u3
libsnappy1v5 1.1.9-3
libsodium23 1.0.18-1
libsoxr0 0.1.3-4
libspeex1 1.2.1-2
libsqlite3-0 3.40.1-2+deb12u1
libsrt1.5-gnutls 1.5.1-1+deb12u1
libss2 1.47.0-2
libssh-gcrypt-4 0.10.6-0+deb12u1
libssh2-1 1.10.0-3+b1
libssl3 3.0.17-1~deb12u1
libstdc++6 12.2.0-14+deb12u1
libsvtav1enc1 1.4.1+dfsg-1
libswresample4 7:5.1.6-0+deb12u1
libsystemd0 252.38-1~deb12u1
libtasn1-6 4.19.0-2+deb12u1
libtesseract5 5.3.0-2
libthai-data 0.1.29-1
libthai0 0.1.29-1
libtheora0 1.1.1+dfsg.1-16.1+b1
libtiff6 4.5.0-6+deb12u2
libtinfo6 6.4-4
libtirpc-common 1.3.3+ds-1
libtirpc3 1.3.3+ds-1
libtwolame0 0.4.0-2
libudev1 252.38-1~deb12u1
libudfread0 1.1.2-1
libunistring2 1.0-2
libuuid1 2.38.1-5+deb12u3
libv4l-0 1.22.1-5+b2
libv4lconvert0 1.22.1-5+b2
libva-drm2 2.17.0-1
libva-x11-2 2.17.0-1
libva2 2.17.0-1
libvdpau1 1.5-2
libvorbis0a 1.3.7-1
libvorbisenc2 1.3.7-1
libvorbisfile3 1.3.7-1
libvpx7 1.12.0-1+deb12u4
libwebp7 1.2.4-0.2+deb12u1
libwebpdemux2 1.2.4-0.2+deb12u1
libwebpmux3 1.2.4-0.2+deb12u1
libx11-6 2:1.8.4-2+deb12u2
libx11-data 2:1.8.4-2+deb12u2
libx11-xcb1 2:1.8.4-2+deb12u2
libx264-164 2:0.164.3095+gitbaee400-3
libx265-199 3.5-2+b1
libxau6 1:1.0.9-1
libxcb-dri3-0 1.15-1
libxcb-render0 1.15-1
libxcb-shm0 1.15-1
libxcb1 1.15-1
libxdmcp6 1:1.1.2-3
libxext6 2:1.3.4-1+b1
libxfixes3 1:6.0.0-2
libxml2 2.9.14+dfsg-1.3~deb12u2
libxrender1 1:0.9.10-1.1
libxslt1.1 1.1.35-1+deb12u1
libxt6 1:1.2.1-1.1
libxvidcore4 2:1.3.7-1
libxxhash0 0.8.1-1
libzbar0 0.23.92-7+deb12u1
libzmq5 4.3.4-6
libzstd1 1.5.4+dfsg2-5
libzvbi-common 0.2.41-1
libzvbi0 0.2.41-1
login 1:4.13+dfsg1-1+deb12u1
logsave 1.47.0-2
mariadb-client 1:10.11.11-0+deb12u1
mariadb-client-core 1:10.11.11-0+deb12u1
mariadb-common 1:10.11.11-0+deb12u1
mawk 1.3.4.20200120-3.1
media-types 10.0.0
mount 2.38.1-5+deb12u3
mysql-common 5.8+1.1.0
ncurses-base 6.4-4
ncurses-bin 6.4-4
netbase 6.4
ocl-icd-libopencl1 2.3.1-1
openssl 3.0.17-1~deb12u1
passwd 1:4.13+dfsg1-1+deb12u1
perl 5.36.0-7+deb12u2
perl-base 5.36.0-7+deb12u2
perl-modules-5.36 5.36.0-7+deb12u2
pinentry-curses 1.2.1-1
pngquant 2.17.0-1
poppler-data 0.4.12-1
poppler-utils 22.12.0-2+deb12u1
postgresql-client 15+248
postgresql-client-15 15.13-0+deb12u1
postgresql-client-common 248
qpdf 11.9.0-1
readline-common 8.2-1.3
sed 4.9-1
sensible-utils 0.0.17+nmu1
shared-mime-info 2.2-1
sysvinit-utils 3.06-4
tar 1.34+dfsg-1.2+deb12u1
tesseract-ocr 5.3.0-2
tesseract-ocr-deu 1:4.1.0-2
tesseract-ocr-eng 1:4.1.0-2
tesseract-ocr-fra 1:4.1.0-2
tesseract-ocr-ita 1:4.1.0-2
tesseract-ocr-osd 1:4.1.0-2
tesseract-ocr-spa 1:4.1.0-2
tzdata 2025b-0+deb12u1
ucf 3.0043+nmu1+deb12u1
unpaper 7.0.0-0.1
usr-is-merged 37~deb12u1
util-linux 2.38.1-5+deb12u3
util-linux-extra 2.38.1-5+deb12u3
x11-common 1:7.7+23
xfonts-encodings 1:1.0.4-2.2
xfonts-utils 1:7.7+6
zlib1g 1:1.2.13.dfsg-1

View File

@@ -282,18 +282,6 @@ The following methods are supported:
- `"merge": true or false` (defaults to false) - `"merge": true or false` (defaults to false)
- The `merge` flag determines if the supplied permissions will overwrite all existing permissions (including - The `merge` flag determines if the supplied permissions will overwrite all existing permissions (including
removing them) or be merged with existing permissions. removing them) or be merged with existing permissions.
- `edit_pdf`
- Requires `parameters`:
- `"doc_ids": [DOCUMENT_ID]` A list of a single document ID to edit.
- `"operations": [OPERATION, ...]` A list of operations to perform on the documents. Each operation is a dictionary
with the following keys:
- `"page": PAGE_NUMBER` The page number to edit (1-based).
- `"rotate": DEGREES` Optional rotation in degrees (90, 180, 270).
- `"doc": OUTPUT_DOCUMENT_INDEX` Optional index of the output document for split operations.
- Optional `parameters`:
- `"delete_original": true` to delete the original documents after editing.
- `"update_document": true` to update the existing document with the edited PDF.
- `"include_metadata": true` to copy metadata from the original document to the edited document.
- `merge` - `merge`
- No additional `parameters` required. - No additional `parameters` required.
- The ordering of the merged document is determined by the list of IDs. - The ordering of the merged document is determined by the list of IDs.

View File

@@ -1282,30 +1282,6 @@ within your documents.
Defaults to false. Defaults to false.
## Workflow webhooks
#### [`PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES=<str>`](#PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES) {#PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES}
: A comma-separated list of allowed schemes for webhooks. This setting
controls which URL schemes are permitted for webhook URLs.
Defaults to `http,https`.
#### [`PAPERLESS_WEBHOOKS_ALLOWED_PORTS=<str>`](#PAPERLESS_WEBHOOKS_ALLOWED_PORTS) {#PAPERLESS_WEBHOOKS_ALLOWED_PORTS}
: A comma-separated list of allowed ports for webhooks. This setting
controls which ports are permitted for webhook URLs. For example, if you
set this to `80,443`, webhooks will only be sent to URLs that use these
ports.
Defaults to empty list, which allows all ports.
#### [`PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS=<bool>`](#PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS) {#PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS}
: If set to false, webhooks cannot be sent to internal URLs (e.g., localhost).
Defaults to true, which allows internal requests.
### Polling {#polling} ### Polling {#polling}
#### [`PAPERLESS_CONSUMER_POLLING=<num>`](#PAPERLESS_CONSUMER_POLLING) {#PAPERLESS_CONSUMER_POLLING} #### [`PAPERLESS_CONSUMER_POLLING=<num>`](#PAPERLESS_CONSUMER_POLLING) {#PAPERLESS_CONSUMER_POLLING}

View File

@@ -499,10 +499,6 @@ The following workflow action types are available:
- Encoding for the request body, either JSON or form data - Encoding for the request body, either JSON or form data
- The request headers as key-value pairs - The request headers as key-value pairs
For security reasons, webhooks can be limited to specific ports and disallowed from connecting to local URLs. See the relevant
[configuration settings](configuration.md#workflow-webhooks) to change this behavior. If you are allowing non-admins to create workflows,
you may want to adjust these settings to prevent abuse.
#### Workflow placeholders #### Workflow placeholders
Some workflow text can include placeholders but the available options differ depending on the type of Some workflow text can include placeholders but the available options differ depending on the type of
@@ -580,14 +576,12 @@ The following custom field types are supported:
## PDF Actions ## PDF Actions
Paperless-ngx supports basic editing operations for PDFs (these operations currently cannot be performed on non-PDF files). When viewing an individual document you can Paperless-ngx supports four basic editing operations for PDFs (these operations currently cannot be performed on non-PDF files):
open the 'PDF Editor' to use a simple UI for re-arranging, rotating, deleting pages and splitting documents.
- Merging documents: available when selecting multiple documents for 'bulk editing'. - Merging documents: available when selecting multiple documents for 'bulk editing'.
- Rotating documents: available when selecting multiple documents for 'bulk editing' and via the pdf editor on an individual document's details page. - Rotating documents: available when selecting multiple documents for 'bulk editing' and from an individual document's details page.
- Splitting documents: via the pdf editor on an individual document's details page. - Splitting documents: available from an individual document's details page.
- Deleting pages: via the pdf editor on an individual document's details page. - Deleting pages: available from an individual document's details page.
- Re-arranging pages: via the pdf editor on an individual document's details page.
!!! important !!! important

View File

@@ -63,7 +63,7 @@ dependencies = [
"redis[hiredis]~=5.2.1", "redis[hiredis]~=5.2.1",
"scikit-learn~=1.7.0", "scikit-learn~=1.7.0",
"setproctitle~=1.3.4", "setproctitle~=1.3.4",
"tika-client~=0.10.0", "tika-client~=0.9.0",
"tqdm~=4.67.1", "tqdm~=4.67.1",
"watchdog~=6.0", "watchdog~=6.0",
"whitenoise~=6.9", "whitenoise~=6.9",
@@ -204,9 +204,15 @@ lint.per-file-ignores."docker/wait-for-redis.py" = [
"INP001", "INP001",
"T201", "T201",
] ]
lint.per-file-ignores."src/documents/file_handling.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/management/commands/document_consumer.py" = [ lint.per-file-ignores."src/documents/management/commands/document_consumer.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
lint.per-file-ignores."src/documents/management/commands/document_exporter.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [ lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
@@ -216,6 +222,9 @@ lint.per-file-ignores."src/documents/models.py" = [
lint.per-file-ignores."src/documents/parsers.py" = [ lint.per-file-ignores."src/documents/parsers.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
lint.per-file-ignores."src/documents/signals/handlers.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [ lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
"RUF001", "RUF001",
] ]

File diff suppressed because it is too large Load Diff

View File

@@ -121,26 +121,6 @@ if (!URL.revokeObjectURL) {
} }
Object.defineProperty(window, 'ResizeObserver', { value: mock() }) Object.defineProperty(window, 'ResizeObserver', { value: mock() })
if (typeof IntersectionObserver === 'undefined') {
class MockIntersectionObserver {
constructor(
public callback: IntersectionObserverCallback,
public options?: IntersectionObserverInit
) {}
observe = jest.fn()
unobserve = jest.fn()
disconnect = jest.fn()
takeRecords = jest.fn()
}
Object.defineProperty(window, 'IntersectionObserver', {
writable: true,
configurable: true,
value: MockIntersectionObserver,
})
}
HTMLCanvasElement.prototype.getContext = < HTMLCanvasElement.prototype.getContext = <
typeof HTMLCanvasElement.prototype.getContext typeof HTMLCanvasElement.prototype.getContext
>jest.fn() >jest.fn()

View File

@@ -50,7 +50,7 @@
<div [ngbNavOutlet]="nav" class="border-start border-end border-bottom p-3 mb-3 shadow-sm"></div> <div [ngbNavOutlet]="nav" class="border-start border-end border-bottom p-3 mb-3 shadow-sm"></div>
<div class="btn-toolbar" role="toolbar"> <div class="btn-toolbar" role="toolbar">
<div class="btn-group me-2"> <div class="btn-group me-2">
<button type="button" (click)="discardChanges()" class="btn btn-outline-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Discard</button> <button type="button" (click)="discardChanges()" class="btn btn-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Discard</button>
</div> </div>
<div class="btn-group"> <div class="btn-group">
<button type="submit" class="btn btn-primary" [disabled]="loading || !configForm.valid || (isDirty$ | async) === false" i18n>Save</button> <button type="submit" class="btn btn-primary" [disabled]="loading || !configForm.valid || (isDirty$ | async) === false" i18n>Save</button>

View File

@@ -358,6 +358,6 @@
<div [ngbNavOutlet]="nav" class="border-start border-end border-bottom p-3 mb-3 shadow-sm"></div> <div [ngbNavOutlet]="nav" class="border-start border-end border-bottom p-3 mb-3 shadow-sm"></div>
<button type="button" (click)="reset()" class="btn btn-outline-secondary mb-2" [disabled]="(isDirty$ | async) === false" i18n>Cancel</button> <button type="submit" class="btn btn-primary mb-2" [disabled]="(isDirty$ | async) === false" i18n>Save</button>
<button type="submit" class="btn btn-primary ms-2 mb-2" [disabled]="(isDirty$ | async) === false" i18n>Save</button> <button type="button" (click)="reset()" class="btn btn-secondary ms-2 mb-2" [disabled]="(isDirty$ | async) === false" i18n>Cancel</button>
</form> </form>

View File

@@ -0,0 +1,54 @@
<div class="modal-header">
<h4 class="modal-title" id="modal-basic-title">{{title}}</h4>
<button type="button" class="btn-close" aria-label="Close" (click)="cancel()">
</button>
</div>
<div class="modal-body">
<div class="row">
<div class="col">
<div class="btn-toolbar flex-nowrap">
<div class="input-group input-group-sm">
<div class="input-group-text" i18n>Page</div>
<input class="form-control mw-60" type="number" min="1" [(ngModel)]="currentPage" />
<div class="input-group-text" i18n>of {{totalPages}}</div>
</div>
<div class="input-group input-group-sm ms-auto">
<span class="input-group-text" i18n>Pages to remove</span>
<input [ngModel]="pagesString" class="form-control" disabled />
</div>
</div>
<div class="pdf-viewer-container w-100 mt-3">
<pdf-viewer #pdfViewer [src]="pdfSrc" [(page)]="currentPage"
[original-size]="false"
[zoom]="1"
zoom-scale="page-fit"
[render-text]="false"
(pagerendered)="pageRendered($event)"
(after-load-complete)="pdfPreviewLoaded($event)">
</pdf-viewer>
</div>
</div>
</div>
</div>
<div class="modal-footer flex-nowrap">
<div>
@if (message) {
<p [innerHTML]="message | safeHtml"></p>
}
@if (messageBold) {
<p class="mb-0 small"><b [innerHTML]="messageBold | safeHtml"></b></p>
}
</div>
<button type="button" class="btn" [class]="cancelBtnClass" (click)="cancel()" [disabled]="!buttonsEnabled">
<span class="d-inline-block" style="padding-bottom: 1px;">{{cancelBtnCaption}}</span>
</button>
<button type="button" class="btn" [class]="btnClass" (click)="confirm()" [disabled]="!confirmButtonEnabled || !buttonsEnabled">
{{btnCaption}}
</button>
</div>
<ng-template #pageCheckOverlay let-page="page" let-pages="pages">
<div class="position-absolute top-0 start-0 w-100 h-100 p-2" (click)="pageCheckChanged(page)">
<input type="checkbox" class="form-check-input" />
</div>
</ng-template>

View File

@@ -0,0 +1,28 @@
.pdf-viewer-container {
background-color: gray;
height: 550px;
pdf-viewer {
width: 100%;
height: 100%;
}
}
.mw-60 {
max-width: 60px;
}
div.position-absolute:has(.form-check-input:checked) {
background-color: rgba(var(--bs-dark-rgb), 0.4);
}
.form-check-input {
&:checked {
background-color: var(--bs-danger);
border-color: var(--bs-danger);
}
&:focus {
box-shadow: 0 0 0 0.25rem rgba(var(--bs-danger-rgb), var(--pngx-focus-alpha));
border-color: var(--bs-danger);
}
}

View File

@@ -0,0 +1,60 @@
import { provideHttpClient, withInterceptorsFromDi } from '@angular/common/http'
import { provideHttpClientTesting } from '@angular/common/http/testing'
import { ComponentFixture, TestBed } from '@angular/core/testing'
import { FormsModule, ReactiveFormsModule } from '@angular/forms'
import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
import { SafeHtmlPipe } from 'src/app/pipes/safehtml.pipe'
import { DeletePagesConfirmDialogComponent } from './delete-pages-confirm-dialog.component'
describe('DeletePagesConfirmDialogComponent', () => {
let component: DeletePagesConfirmDialogComponent
let fixture: ComponentFixture<DeletePagesConfirmDialogComponent>
beforeEach(async () => {
await TestBed.configureTestingModule({
declarations: [],
imports: [
NgxBootstrapIconsModule.pick(allIcons),
FormsModule,
ReactiveFormsModule,
DeletePagesConfirmDialogComponent,
],
providers: [
NgbActiveModal,
SafeHtmlPipe,
provideHttpClient(withInterceptorsFromDi()),
provideHttpClientTesting(),
],
}).compileComponents()
fixture = TestBed.createComponent(DeletePagesConfirmDialogComponent)
component = fixture.componentInstance
fixture.detectChanges()
})
it('should return a string with comma-separated pages', () => {
component.pages = [1, 2, 3, 4]
expect(component.pagesString).toEqual('1, 2, 3, 4')
})
it('should update totalPages when pdf is loaded', () => {
component.pdfPreviewLoaded({ numPages: 5 } as any)
expect(component.totalPages).toEqual(5)
})
it('should update checks when page is rendered', () => {
const event = {
target: document.createElement('div'),
detail: { pageNumber: 1 },
} as any
component.pageRendered(event)
expect(component['checks'].length).toEqual(1)
})
it('should update pages when page check is changed', () => {
component.pageCheckChanged(1)
expect(component.pages).toEqual([1])
component.pageCheckChanged(1)
expect(component.pages).toEqual([])
})
})

View File

@@ -0,0 +1,69 @@
import { Component, TemplateRef, ViewChild, inject } from '@angular/core'
import { FormsModule, ReactiveFormsModule } from '@angular/forms'
import {
PDFDocumentProxy,
PdfViewerComponent,
PdfViewerModule,
} from 'ng2-pdf-viewer'
import { SafeHtmlPipe } from 'src/app/pipes/safehtml.pipe'
import { DocumentService } from 'src/app/services/rest/document.service'
import { ConfirmDialogComponent } from '../confirm-dialog.component'
@Component({
selector: 'pngx-delete-pages-confirm-dialog',
templateUrl: './delete-pages-confirm-dialog.component.html',
styleUrl: './delete-pages-confirm-dialog.component.scss',
imports: [PdfViewerModule, FormsModule, ReactiveFormsModule, SafeHtmlPipe],
})
export class DeletePagesConfirmDialogComponent extends ConfirmDialogComponent {
private documentService = inject(DocumentService)
public documentID: number
public pages: number[] = []
public currentPage: number = 1
public totalPages: number
@ViewChild('pdfViewer') pdfViewer: PdfViewerComponent
@ViewChild('pageCheckOverlay') pageCheckOverlay!: TemplateRef<any>
private checks: HTMLElement[] = []
public get pagesString(): string {
return this.pages.join(', ')
}
public get pdfSrc(): string {
return this.documentService.getPreviewUrl(this.documentID)
}
constructor() {
super()
}
public pdfPreviewLoaded(pdf: PDFDocumentProxy) {
this.totalPages = pdf.numPages
}
pageRendered(event: CustomEvent) {
const pageDiv = event.target as HTMLDivElement
const check = this.pageCheckOverlay.createEmbeddedView({
page: event.detail.pageNumber,
})
this.checks[event.detail.pageNumber - 1] = check.rootNodes[0]
pageDiv?.insertBefore(check.rootNodes[0], pageDiv.firstChild)
this.updateChecks()
}
pageCheckChanged(pageNumber: number) {
if (!this.pages.includes(pageNumber)) this.pages.push(pageNumber)
else if (this.pages.includes(pageNumber))
this.pages.splice(this.pages.indexOf(pageNumber), 1)
this.updateChecks()
}
private updateChecks() {
this.checks.forEach((check, i) => {
const input = check.getElementsByTagName('input')[0]
input.checked = this.pages.includes(i + 1)
})
}
}

View File

@@ -0,0 +1,59 @@
<div class="modal-header">
<h4 class="modal-title" id="modal-basic-title">{{title}}</h4>
<button type="button" class="btn-close" aria-label="Close" (click)="cancel()">
</button>
</div>
<div class="modal-body">
<p>{{message}}</p>
<div class="row mb-2">
<div class="col-7">
<div class="input-group input-group-sm">
<div class="input-group-text" i18n>Page</div>
<input class="form-control" type="number" min="1" [(ngModel)]="page" />
<div class="input-group-text" i18n>of {{totalPages}}</div>
</div>
<div class="pdf-viewer-container w-100 mt-3">
<pdf-viewer [src]="pdfSrc" [(page)]="page"
[original-size]="false"
[zoom]="1"
zoom-scale="page-fit"
(after-load-complete)="pdfPreviewLoaded($event)">
</pdf-viewer>
</div>
</div>
<div class="col-5">
<div class="d-grid">
<button class="btn btn-sm btn-primary" (click)="addSplit()" [disabled]="!canSplit">
<i-bs name="plus-circle"></i-bs>&nbsp;
<span i18n>Add Split</span>
</button>
</div>
<ul class="list-group mt-3">
@for (pageStr of pagesString.split(','); track pageStr; let i = $index) {
<li class="list-group-item d-flex align-items-center">
{{pageStr}}
@if (pagesString.split(',').length > 1) {
&nbsp;
<button class="btn btn-sm btn-danger ms-auto" (click)="removeSplit(i)">
<i-bs name="trash"></i-bs>
</button>
}
</li>
}
</ul>
</div>
</div>
</div>
<div class="modal-footer">
<div class="form-check form-switch me-auto">
<input class="form-check-input" type="checkbox" role="switch" id="deleteOriginalSwitch" [(ngModel)]="deleteOriginal" [disabled]="!userOwnsDocument">
<label class="form-check-label" for="deleteOriginalSwitch" i18n>Delete original document after successful split</label>
</div>
<button type="button" class="btn" [class]="cancelBtnClass" (click)="cancel()" [disabled]="!buttonsEnabled">
<span class="d-inline-block" style="padding-bottom: 1px;">{{cancelBtnCaption}}</span>
</button>
<button type="button" class="btn" [class]="btnClass" (click)="confirm()" [disabled]="!confirmButtonEnabled || !buttonsEnabled">
{{btnCaption}}
</button>
</div>

View File

@@ -0,0 +1,9 @@
.pdf-viewer-container {
background-color: gray;
height: 500px;
pdf-viewer {
width: 100%;
height: 100%;
}
}

View File

@@ -0,0 +1,107 @@
import { ComponentFixture, TestBed } from '@angular/core/testing'
import { provideHttpClient, withInterceptorsFromDi } from '@angular/common/http'
import { provideHttpClientTesting } from '@angular/common/http/testing'
import { FormsModule, ReactiveFormsModule } from '@angular/forms'
import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
import { PdfViewerModule } from 'ng2-pdf-viewer'
import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
import { of } from 'rxjs'
import { DocumentService } from 'src/app/services/rest/document.service'
import { SplitConfirmDialogComponent } from './split-confirm-dialog.component'
describe('SplitConfirmDialogComponent', () => {
let component: SplitConfirmDialogComponent
let fixture: ComponentFixture<SplitConfirmDialogComponent>
let documentService: DocumentService
beforeEach(async () => {
await TestBed.configureTestingModule({
imports: [
NgxBootstrapIconsModule.pick(allIcons),
ReactiveFormsModule,
FormsModule,
PdfViewerModule,
SplitConfirmDialogComponent,
],
providers: [
NgbActiveModal,
provideHttpClient(withInterceptorsFromDi()),
provideHttpClientTesting(),
],
}).compileComponents()
fixture = TestBed.createComponent(SplitConfirmDialogComponent)
documentService = TestBed.inject(DocumentService)
component = fixture.componentInstance
fixture.detectChanges()
})
it('should load document on init', () => {
const getSpy = jest.spyOn(documentService, 'get')
component.documentID = 1
getSpy.mockReturnValue(of({ id: 1 } as any))
component.ngOnInit()
expect(documentService.get).toHaveBeenCalledWith(1)
})
it('should update pagesString when pages are added', () => {
component.totalPages = 5
component.page = 2
component.addSplit()
expect(component.pagesString).toEqual('1-2,3-5')
component.page = 4
component.addSplit()
expect(component.pagesString).toEqual('1-2,3-4,5')
})
it('should update pagesString when pages are removed', () => {
component.totalPages = 5
component.page = 2
component.addSplit()
component.page = 4
component.addSplit()
expect(component.pagesString).toEqual('1-2,3-4,5')
component.removeSplit(0)
expect(component.pagesString).toEqual('1-4,5')
})
it('should enable confirm button when pages are added', () => {
component.totalPages = 5
component.page = 2
component.addSplit()
expect(component.confirmButtonEnabled).toBeTruthy()
})
it('should disable confirm button when all pages are removed', () => {
component.totalPages = 5
component.page = 2
component.addSplit()
component.removeSplit(0)
expect(component.confirmButtonEnabled).toBeFalsy()
})
it('should not add split if page is the last page', () => {
component.totalPages = 5
component.page = 5
component.addSplit()
expect(component.pagesString).toEqual('1-5')
})
it('should update totalPages when pdf is loaded', () => {
component.pdfPreviewLoaded({ numPages: 5 } as any)
expect(component.totalPages).toEqual(5)
})
it('should correctly disable split button', () => {
component.totalPages = 5
component.page = 1
expect(component.canSplit).toBeTruthy()
component.page = 5
expect(component.canSplit).toBeFalsy()
component.page = 4
expect(component.canSplit).toBeTruthy()
component['pages'] = new Set([1, 2, 3, 4])
expect(component.canSplit).toBeFalsy()
})
})

View File

@@ -0,0 +1,98 @@
import { Component, OnInit, inject } from '@angular/core'
import { FormsModule, ReactiveFormsModule } from '@angular/forms'
import { PDFDocumentProxy, PdfViewerModule } from 'ng2-pdf-viewer'
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { Document } from 'src/app/data/document'
import { PermissionsService } from 'src/app/services/permissions.service'
import { DocumentService } from 'src/app/services/rest/document.service'
import { ConfirmDialogComponent } from '../confirm-dialog.component'
@Component({
selector: 'pngx-split-confirm-dialog',
templateUrl: './split-confirm-dialog.component.html',
styleUrl: './split-confirm-dialog.component.scss',
imports: [
FormsModule,
ReactiveFormsModule,
NgxBootstrapIconsModule,
PdfViewerModule,
],
})
export class SplitConfirmDialogComponent
extends ConfirmDialogComponent
implements OnInit
{
private documentService = inject(DocumentService)
private permissionService = inject(PermissionsService)
public get pagesString(): string {
let pagesStr = ''
let lastPage = 1
for (let i = 1; i <= this.totalPages; i++) {
if (this.pages.has(i) || i === this.totalPages) {
if (lastPage === i) {
pagesStr += `${i},`
lastPage = Math.min(i + 1, this.totalPages)
} else {
pagesStr += `${lastPage}-${i},`
lastPage = Math.min(i + 1, this.totalPages)
}
}
}
return pagesStr.replace(/,$/, '')
}
private pages: Set<number> = new Set()
public documentID: number
private document: Document
public page: number = 1
public totalPages: number
public deleteOriginal: boolean = false
public get canSplit(): boolean {
return (
this.page < this.totalPages &&
this.pages.size < this.totalPages - 1 &&
!this.pages.has(this.page)
)
}
public get pdfSrc(): string {
return this.documentService.getPreviewUrl(this.documentID)
}
constructor() {
super()
this.confirmButtonEnabled = this.pages.size > 0
}
ngOnInit(): void {
this.documentService.get(this.documentID).subscribe((r) => {
this.document = r
})
}
pdfPreviewLoaded(pdf: PDFDocumentProxy) {
this.totalPages = pdf.numPages
}
addSplit() {
if (this.page === this.totalPages) return
this.pages.add(this.page)
this.pages = new Set(Array.from(this.pages).sort((a, b) => a - b))
this.confirmButtonEnabled = this.pages.size > 0
}
removeSplit(i: number) {
let page = Array.from(this.pages)[Math.min(i, this.pages.size - 1)]
this.pages.delete(page)
this.confirmButtonEnabled = this.pages.size > 0
}
get userOwnsDocument(): boolean {
return this.permissionService.currentUserOwnsObject(this.document)
}
}

View File

@@ -1,103 +0,0 @@
<pdf-viewer [src]="pdfSrc" [render-text]="false" zoom="0.4" (after-load-complete)="pdfLoaded($event)"></pdf-viewer>
<div class="modal-header">
<h4 class="modal-title">{{ title }}</h4>
<button type="button" class="btn-close" aria-label="Close" (click)="cancel()"></button>
</div>
<div class="modal-body">
<div class="btn-toolbar mb-2">
<div class="btn-group me-3">
<button class="btn btn-sm btn-secondary" (click)="selectAll()" title="Select all pages" i18n-title>
<i-bs name="check-all"></i-bs>
</button>
<button class="btn btn-sm btn-secondary" (click)="deselectAll()" [disabled]="!hasSelection()" title="Deselect all pages" i18n-title>
<i-bs name="x"></i-bs>
</button>
</div>
<div class="btn-group">
<button class="btn btn-sm btn-secondary" (click)="rotateSelected(-90)" [disabled]="!hasSelection()" title="Rotate selected pages counter-clockwise" i18n-title>
<i-bs name="arrow-counterclockwise"></i-bs>
</button>
<button class="btn btn-sm btn-secondary" (click)="rotateSelected(90)" [disabled]="!hasSelection()" title="Rotate selected pages clockwise" i18n-title>
<i-bs name="arrow-clockwise"></i-bs>
</button>
<button class="btn btn-sm btn-danger" (click)="deleteSelected()" [disabled]="!hasSelection()" title="Delete selected pages" i18n-title>
<i-bs name="trash"></i-bs>
</button>
</div>
</div>
<div cdkDropList (cdkDropListDropped)="drop($event)" cdkDropListOrientation="mixed" class="d-flex flex-wrap row-cols-5">
@for (p of pages; track p.page; let i = $index) {
<div class="page-item rounded p-2" cdkDrag (click)="toggleSelection(i)" [class.selected]="p.selected">
<div class="btn-toolbar hover-actions z-10">
<div class="btn-group me-2">
<button class="btn btn-sm btn-dark" (click)="rotate(i); $event.stopPropagation()" title="Rotate page counter-clockwise" i18n-title>
<i-bs name="arrow-counterclockwise"></i-bs>
</button>
<button class="btn btn-sm btn-dark" (click)="rotate(i); $event.stopPropagation()" title="Rotate page clockwise" i18n-title>
<i-bs name="arrow-clockwise"></i-bs>
</button>
</div>
<div class="btn-group">
<button class="btn btn-sm btn-dark text-danger" (click)="remove(i); $event.stopPropagation()" title="Delete page" i18n-title>
<i-bs name="trash"></i-bs>
</button>
<button class="btn btn-sm btn-dark" (click)="toggleSplit(i); $event.stopPropagation()" title="Add / remove document split here" i18n-title>
<i-bs name="scissors"></i-bs>
</button>
</div>
</div>
<div class="border-end border-bottom bg-light py-1 px-2 document-check z-10">
<div class="form-check">
<input type="checkbox" class="form-check-input" id="page{{i}}" [checked]="p.selected" (click)="toggleSelection(i); $event.stopPropagation()">
<label class="form-check-label" for="page{{i}}"></label>
</div>
</div>
<div class="pdf-viewer-container w-100" [class.selected]="p.selected">
@defer (on viewport) {
@if (!p.loaded) {
<div class="placeholder-glow w-100 h-100 z-10">
<span class="placeholder w-100 h-100"></span>
</div>
}
<pdf-viewer class="fade" [class.show]="p.loaded" [src]="pdfSrc" [page]="p.page" [rotation]="p.rotate" [original-size]="false" [show-all]="false" [render-text]="false" (page-rendered)="p.loaded = true"></pdf-viewer>
} @placeholder {
<div class="placeholder-glow w-100 h-100 z-10">
<span class="placeholder w-100 h-100"></span>
</div>
}
</div>
@if (p.splitAfter) {
<div class="split-after rounded position-absolute top-0 end-0 bg-dark text-uppercase text-center h-100 px-1 small fw-bold">&mdash; <span i18n>Split here</span> &mdash;</div>
}
</div>
}
</div>
</div>
<div class="modal-footer flex-column">
<div class="d-flex w-100 justify-content-between align-items-center">
<div class="btn-group" role="group">
<input type="radio" class="btn-check" [(ngModel)]="editMode" [value]="PdfEditorEditMode.Create" id="editModeCreate" name="editmode">
<label for="editModeCreate" class="btn btn-outline-primary btn-sm">
<i-bs name="plus"></i-bs>
<span class="form-check-label ms-1" i18n>Create new document(s)</span>
</label>
<input type="radio" class="btn-check" [(ngModel)]="editMode" [value]="PdfEditorEditMode.Update" id="editModeUpdate" name="editmode" [disabled]="hasSplit()">
<label for="editModeUpdate" class="btn btn-outline-primary btn-sm">
<i-bs name="pencil"></i-bs>
<span class="form-check-label ms-2" i18n>Update existing document</span>
</label>
</div>
@if (editMode === PdfEditorEditMode.Create) {
<div class="form-check ms-3">
<input class="form-check-input" type="checkbox" id="copyMeta" [(ngModel)]="includeMetadata">
<label class="form-check-label" for="copyMeta" i18n>Copy metadata</label>
</div>
<div class="form-check ms-3">
<input class="form-check-input" type="checkbox" id="deleteOriginal" [(ngModel)]="deleteOriginal">
<label class="form-check-label" for="deleteOriginal" i18n>Delete original</label>
</div>
}
<button type="button" class="btn ms-auto me-2" [class]="cancelBtnClass" (click)="cancel()" [disabled]="!buttonsEnabled">{{ cancelBtnCaption }}</button>
<button type="button" class="btn" [class]="btnClass" (click)="confirm()" [disabled]="pages.length === 0">{{ btnCaption }}</button>
</div>
</div>

View File

@@ -1,70 +0,0 @@
.page-item {
position: relative;
cursor: pointer;
border: 1px solid transparent;
background-origin: border-box;
&.selected {
background-color: var(--pngx-primary-darken-5);
}
}
.pdf-viewer-container {
background-color: gray;
height: 240px;
pdf-viewer {
width: 100%;
height: 100%;
}
}
::ng-deep .ng2-pdf-viewer-container {
overflow: hidden;
}
.hover-actions {
position: absolute;
top: 0;
right: 0;
display: none;
}
.page-item:hover .hover-actions {
display: block;
}
.document-check {
display: none;
position: absolute;
top: 0;
left: 0;
padding: 0.5rem;
border-top-left-radius: 0.25rem;
border-bottom-right-radius: 0.25rem;
pointer-events: none;
.form-check {
padding: 0;
min-height: 0;
margin-bottom: 0;
.form-check-input {
margin-left: 0;
}
}
}
.page-item:hover .document-check, .selected .document-check {
display: block;
}
.z-10 {
z-index: 10;
}
.split-after {
writing-mode: vertical-rl;
}

View File

@@ -1,142 +0,0 @@
import { provideHttpClient, withInterceptorsFromDi } from '@angular/common/http'
import { provideHttpClientTesting } from '@angular/common/http/testing'
import { ComponentFixture, TestBed } from '@angular/core/testing'
import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
import { PDFEditorComponent } from './pdf-editor.component'
describe('PDFEditorComponent', () => {
let component: PDFEditorComponent
let fixture: ComponentFixture<PDFEditorComponent>
beforeEach(async () => {
await TestBed.configureTestingModule({
imports: [PDFEditorComponent, NgxBootstrapIconsModule.pick(allIcons)],
providers: [
provideHttpClient(withInterceptorsFromDi()),
provideHttpClientTesting(),
{ provide: NgbActiveModal, useValue: {} },
],
}).compileComponents()
fixture = TestBed.createComponent(PDFEditorComponent)
component = fixture.componentInstance
fixture.detectChanges()
})
it('should return correct operations with no changes', () => {
component.pages = [
{ page: 1, rotate: 0, splitAfter: false },
{ page: 2, rotate: 0, splitAfter: false },
{ page: 3, rotate: 0, splitAfter: false },
]
const ops = component.getOperations()
expect(ops).toEqual([
{ page: 1, rotate: 0, doc: 0 },
{ page: 2, rotate: 0, doc: 0 },
{ page: 3, rotate: 0, doc: 0 },
])
})
it('should rotate, delete and reorder pages', () => {
component.pages = [
{ page: 1, rotate: 0, splitAfter: false, selected: false },
{ page: 2, rotate: 0, splitAfter: false, selected: false },
]
component.toggleSelection(0)
component.rotateSelected(90)
expect(component.pages[0].rotate).toBe(90)
component.toggleSelection(0) // deselect
component.toggleSelection(1)
component.deleteSelected()
expect(component.pages.length).toBe(1)
component.pages.push({ page: 2, rotate: 0, splitAfter: false })
component.drop({ previousIndex: 0, currentIndex: 1 } as any)
expect(component.pages[0].page).toBe(2)
component.rotate(0)
expect(component.pages[0].rotate).toBe(90)
})
it('should handle empty pages array', () => {
component.pages = []
expect(component.getOperations()).toEqual([])
})
it('should increment doc index after splitAfter', () => {
component.pages = [
{ page: 1, rotate: 0, splitAfter: true },
{ page: 2, rotate: 0, splitAfter: false },
{ page: 3, rotate: 0, splitAfter: true },
{ page: 4, rotate: 0, splitAfter: false },
]
const ops = component.getOperations()
expect(ops).toEqual([
{ page: 1, rotate: 0, doc: 0 },
{ page: 2, rotate: 0, doc: 1 },
{ page: 3, rotate: 0, doc: 1 },
{ page: 4, rotate: 0, doc: 2 },
])
})
it('should include rotations in operations', () => {
component.pages = [
{ page: 1, rotate: 90, splitAfter: false },
{ page: 2, rotate: 180, splitAfter: true },
{ page: 3, rotate: 270, splitAfter: false },
]
const ops = component.getOperations()
expect(ops).toEqual([
{ page: 1, rotate: 90, doc: 0 },
{ page: 2, rotate: 180, doc: 0 },
{ page: 3, rotate: 270, doc: 1 },
])
})
it('should handle remove operation', () => {
component.pages = [
{ page: 1, rotate: 0, splitAfter: false, selected: false },
{ page: 2, rotate: 0, splitAfter: false, selected: true },
{ page: 3, rotate: 0, splitAfter: false, selected: false },
]
component.remove(1) // remove page 2
expect(component.pages.length).toBe(2)
expect(component.pages[0].page).toBe(1)
expect(component.pages[1].page).toBe(3)
})
it('should toggle splitAfter correctly', () => {
component.pages = [
{ page: 1, rotate: 0, splitAfter: false },
{ page: 2, rotate: 0, splitAfter: false },
]
component.toggleSplit(0)
expect(component.pages[0].splitAfter).toBeTruthy()
component.toggleSplit(1)
expect(component.pages[1].splitAfter).toBeTruthy()
})
it('should select and deselect all pages', () => {
component.pages = [
{ page: 1, rotate: 0, splitAfter: false, selected: false },
{ page: 2, rotate: 0, splitAfter: false, selected: false },
]
component.selectAll()
expect(component.pages.every((p) => p.selected)).toBeTruthy()
expect(component.hasSelection()).toBeTruthy()
component.deselectAll()
expect(component.pages.every((p) => !p.selected)).toBeTruthy()
expect(component.hasSelection()).toBeFalsy()
})
it('should handle pdf loading and page generation', () => {
const mockPdf = {
numPages: 3,
getPage: (pageNum: number) => Promise.resolve({ pageNumber: pageNum }),
}
component.pdfLoaded(mockPdf as any)
expect(component.totalPages).toBe(3)
expect(component.pages.length).toBe(3)
expect(component.pages[0].page).toBe(1)
expect(component.pages[1].page).toBe(2)
expect(component.pages[2].page).toBe(3)
})
})

View File

@@ -1,133 +0,0 @@
import {
CdkDragDrop,
DragDropModule,
moveItemInArray,
} from '@angular/cdk/drag-drop'
import { Component, inject } from '@angular/core'
import { FormsModule } from '@angular/forms'
import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
import { PDFDocumentProxy, PdfViewerModule } from 'ng2-pdf-viewer'
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { DocumentService } from 'src/app/services/rest/document.service'
import { ConfirmDialogComponent } from '../confirm-dialog/confirm-dialog.component'
interface PageOperation {
page: number
rotate: number
splitAfter: boolean
selected?: boolean
loaded?: boolean
}
export enum PdfEditorEditMode {
Update = 'update',
Create = 'create',
}
@Component({
selector: 'pngx-pdf-editor',
templateUrl: './pdf-editor.component.html',
styleUrl: './pdf-editor.component.scss',
imports: [
DragDropModule,
FormsModule,
PdfViewerModule,
NgxBootstrapIconsModule,
],
})
export class PDFEditorComponent extends ConfirmDialogComponent {
public PdfEditorEditMode = PdfEditorEditMode
private documentService = inject(DocumentService)
activeModal: NgbActiveModal = inject(NgbActiveModal)
documentID: number
pages: PageOperation[] = []
totalPages = 0
editMode: PdfEditorEditMode = PdfEditorEditMode.Create
deleteOriginal: boolean = false
includeMetadata: boolean = true
get pdfSrc(): string {
return this.documentService.getPreviewUrl(this.documentID)
}
pdfLoaded(pdf: PDFDocumentProxy) {
this.totalPages = pdf.numPages
this.pages = Array.from({ length: this.totalPages }, (_, i) => ({
page: i + 1,
rotate: 0,
splitAfter: false,
selected: false,
loaded: false,
}))
}
toggleSelection(i: number) {
this.pages[i].selected = !this.pages[i].selected
}
rotate(i: number) {
this.pages[i].rotate = (this.pages[i].rotate + 90) % 360
}
rotateSelected(dir: number) {
for (let p of this.pages) {
if (p.selected) {
p.rotate = (p.rotate + dir + 360) % 360
}
}
}
remove(i: number) {
this.pages.splice(i, 1)
}
toggleSplit(i: number) {
this.pages[i].splitAfter = !this.pages[i].splitAfter
if (this.pages[i].splitAfter) {
// force create mode
this.editMode = PdfEditorEditMode.Create
}
}
selectAll() {
this.pages.forEach((p) => (p.selected = true))
}
deselectAll() {
this.pages.forEach((p) => (p.selected = false))
}
deleteSelected() {
this.pages = this.pages.filter((p) => !p.selected)
}
hasSelection(): boolean {
return this.pages.some((p) => p.selected)
}
hasSplit(): boolean {
return this.pages.some((p) => p.splitAfter)
}
drop(event: CdkDragDrop<PageOperation[]>) {
moveItemInArray(this.pages, event.previousIndex, event.currentIndex)
}
getOperations() {
return this.pages.map((p, idx) => ({
page: p.page,
rotate: p.rotate,
doc: this.computeDocIndex(idx),
}))
}
private computeDocIndex(index: number): number {
let docIndex = 0
for (let i = 0; i <= index; i++) {
if (this.pages[i].splitAfter && i < index) docIndex++
}
return docIndex
}
}

View File

@@ -58,8 +58,16 @@
<i-bs width="1em" height="1em" name="diagram-3"></i-bs>&nbsp;<span i18n>More like this</span> <i-bs width="1em" height="1em" name="diagram-3"></i-bs>&nbsp;<span i18n>More like this</span>
</button> </button>
<button ngbDropdownItem (click)="editPdf()" [disabled]="!userIsOwner || !userCanEdit || originalContentRenderType !== ContentRenderType.PDF"> <button ngbDropdownItem (click)="splitDocument()" [disabled]="!userCanAdd || originalContentRenderType !== ContentRenderType.PDF || previewNumPages === 1">
<i-bs name="pencil"></i-bs>&nbsp;<ng-container i18n>PDF Editor</ng-container> <i-bs width="1em" height="1em" name="scissors"></i-bs>&nbsp;<span i18n>Split</span>
</button>
<button ngbDropdownItem (click)="rotateDocument()" [disabled]="!userIsOwner || !userCanEdit || originalContentRenderType !== ContentRenderType.PDF">
<i-bs name="arrow-clockwise"></i-bs>&nbsp;<ng-container i18n>Rotate</ng-container>
</button>
<button ngbDropdownItem (click)="deletePages()" [disabled]="!userIsOwner || !userCanEdit || originalContentRenderType !== ContentRenderType.PDF || previewNumPages === 1">
<i-bs name="file-earmark-minus"></i-bs>&nbsp;<ng-container i18n>Delete page(s)</ng-container>
</button> </button>
</div> </div>
</div> </div>

View File

@@ -1158,43 +1158,81 @@ describe('DocumentDetailComponent', () => {
).not.toBeUndefined() ).not.toBeUndefined()
}) })
it('should support pdf editor, handle error', () => { it('should support split', () => {
let modal: NgbModalRef let modal: NgbModalRef
modalService.activeInstances.subscribe((m) => (modal = m[0])) modalService.activeInstances.subscribe((m) => (modal = m[0]))
const closeSpy = jest.spyOn(openDocumentsService, 'closeDocument')
const errorSpy = jest.spyOn(toastService, 'showError')
initNormally() initNormally()
component.editPdf() component.splitDocument()
expect(modal).not.toBeUndefined() expect(modal).not.toBeUndefined()
modal.componentInstance.documentID = doc.id modal.componentInstance.documentID = doc.id
modal.componentInstance.pages = [{ page: 1, rotate: 0, splitAfter: false }] modal.componentInstance.totalPages = 5
modal.componentInstance.page = 2
modal.componentInstance.addSplit()
modal.componentInstance.confirm() modal.componentInstance.confirm()
let req = httpTestingController.expectOne( let req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/bulk_edit/` `${environment.apiBaseUrl}documents/bulk_edit/`
) )
expect(req.request.body).toEqual({ expect(req.request.body).toEqual({
documents: [doc.id], documents: [doc.id],
method: 'edit_pdf', method: 'split',
parameters: { parameters: { pages: '1-2,3-5', delete_originals: false },
operations: [{ page: 1, rotate: 0, doc: 0 }],
delete_original: false,
update_document: false,
include_metadata: true,
},
}) })
req.error(new ErrorEvent('failed')) req.error(new ProgressEvent('failed'))
expect(errorSpy).toHaveBeenCalled() modal.componentInstance.confirm()
req = httpTestingController.expectOne(
component.editPdf() `${environment.apiBaseUrl}documents/bulk_edit/`
modal.componentInstance.documentID = doc.id )
modal.componentInstance.pages = [{ page: 1, rotate: 0, splitAfter: true }] req.flush(true)
modal.componentInstance.deleteOriginal = true })
it('should support rotate', () => {
let modal: NgbModalRef
modalService.activeInstances.subscribe((m) => (modal = m[0]))
initNormally()
component.rotateDocument()
expect(modal).not.toBeUndefined()
modal.componentInstance.documentID = doc.id
modal.componentInstance.rotate()
modal.componentInstance.confirm()
let req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/bulk_edit/`
)
expect(req.request.body).toEqual({
documents: [doc.id],
method: 'rotate',
parameters: { degrees: 90 },
})
req.error(new ProgressEvent('failed'))
modal.componentInstance.confirm()
req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/bulk_edit/`
)
req.flush(true)
})
it('should support delete pages', () => {
let modal: NgbModalRef
modalService.activeInstances.subscribe((m) => (modal = m[0]))
initNormally()
component.deletePages()
expect(modal).not.toBeUndefined()
modal.componentInstance.documentID = doc.id
modal.componentInstance.pages = [1, 2]
modal.componentInstance.confirm()
let req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/bulk_edit/`
)
expect(req.request.body).toEqual({
documents: [doc.id],
method: 'delete_pages',
parameters: { pages: [1, 2] },
})
req.error(new ProgressEvent('failed'))
modal.componentInstance.confirm() modal.componentInstance.confirm()
req = httpTestingController.expectOne( req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/bulk_edit/` `${environment.apiBaseUrl}documents/bulk_edit/`
) )
req.flush(true) req.flush(true)
expect(closeSpy).toHaveBeenCalled()
}) })
it('should support keyboard shortcuts', () => { it('should support keyboard shortcuts', () => {

View File

@@ -82,6 +82,9 @@ import { getFilenameFromContentDisposition } from 'src/app/utils/http'
import { ISODateAdapter } from 'src/app/utils/ngb-iso-date-adapter' import { ISODateAdapter } from 'src/app/utils/ngb-iso-date-adapter'
import * as UTIF from 'utif' import * as UTIF from 'utif'
import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component' import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component'
import { DeletePagesConfirmDialogComponent } from '../common/confirm-dialog/delete-pages-confirm-dialog/delete-pages-confirm-dialog.component'
import { RotateConfirmDialogComponent } from '../common/confirm-dialog/rotate-confirm-dialog/rotate-confirm-dialog.component'
import { SplitConfirmDialogComponent } from '../common/confirm-dialog/split-confirm-dialog/split-confirm-dialog.component'
import { CustomFieldsDropdownComponent } from '../common/custom-fields-dropdown/custom-fields-dropdown.component' import { CustomFieldsDropdownComponent } from '../common/custom-fields-dropdown/custom-fields-dropdown.component'
import { CorrespondentEditDialogComponent } from '../common/edit-dialog/correspondent-edit-dialog/correspondent-edit-dialog.component' import { CorrespondentEditDialogComponent } from '../common/edit-dialog/correspondent-edit-dialog/correspondent-edit-dialog.component'
import { DocumentTypeEditDialogComponent } from '../common/edit-dialog/document-type-edit-dialog/document-type-edit-dialog.component' import { DocumentTypeEditDialogComponent } from '../common/edit-dialog/document-type-edit-dialog/document-type-edit-dialog.component'
@@ -99,10 +102,6 @@ import { TagsComponent } from '../common/input/tags/tags.component'
import { TextComponent } from '../common/input/text/text.component' import { TextComponent } from '../common/input/text/text.component'
import { UrlComponent } from '../common/input/url/url.component' import { UrlComponent } from '../common/input/url/url.component'
import { PageHeaderComponent } from '../common/page-header/page-header.component' import { PageHeaderComponent } from '../common/page-header/page-header.component'
import {
PDFEditorComponent,
PdfEditorEditMode,
} from '../common/pdf-editor/pdf-editor.component'
import { ShareLinksDialogComponent } from '../common/share-links-dialog/share-links-dialog.component' import { ShareLinksDialogComponent } from '../common/share-links-dialog/share-links-dialog.component'
import { DocumentHistoryComponent } from '../document-history/document-history.component' import { DocumentHistoryComponent } from '../document-history/document-history.component'
import { DocumentNotesComponent } from '../document-notes/document-notes.component' import { DocumentNotesComponent } from '../document-notes/document-notes.component'
@@ -1350,13 +1349,13 @@ export class DocumentDetailComponent
this.documentForm.updateValueAndValidity() this.documentForm.updateValueAndValidity()
} }
editPdf() { splitDocument() {
let modal = this.modalService.open(PDFEditorComponent, { let modal = this.modalService.open(SplitConfirmDialogComponent, {
backdrop: 'static', backdrop: 'static',
size: 'xl', size: 'lg',
scrollable: true,
}) })
modal.componentInstance.title = $localize`PDF Editor` modal.componentInstance.title = $localize`Split confirm`
modal.componentInstance.messageBold = $localize`This operation will split the selected document(s) into new documents.`
modal.componentInstance.btnCaption = $localize`Proceed` modal.componentInstance.btnCaption = $localize`Proceed`
modal.componentInstance.documentID = this.document.id modal.componentInstance.documentID = this.document.id
modal.componentInstance.confirmClicked modal.componentInstance.confirmClicked
@@ -1364,30 +1363,103 @@ export class DocumentDetailComponent
.subscribe(() => { .subscribe(() => {
modal.componentInstance.buttonsEnabled = false modal.componentInstance.buttonsEnabled = false
this.documentsService this.documentsService
.bulkEdit([this.document.id], 'edit_pdf', { .bulkEdit([this.document.id], 'split', {
operations: modal.componentInstance.getOperations(), pages: modal.componentInstance.pagesString,
delete_original: modal.componentInstance.deleteOriginal, delete_originals: modal.componentInstance.deleteOriginal,
update_document:
modal.componentInstance.editMode == PdfEditorEditMode.Update,
include_metadata: modal.componentInstance.includeMetadata,
}) })
.pipe(first(), takeUntil(this.unsubscribeNotifier)) .pipe(first(), takeUntil(this.unsubscribeNotifier))
.subscribe({ .subscribe({
next: () => { next: () => {
this.toastService.showInfo( this.toastService.showInfo(
$localize`PDF edit operation for "${this.document.title}" will begin in the background.` $localize`Split operation for "${this.document.title}" will begin in the background.`
) )
modal.close() modal.close()
if (modal.componentInstance.deleteOriginal) {
this.openDocumentService.closeDocument(this.document)
}
}, },
error: (error) => { error: (error) => {
if (modal) { if (modal) {
modal.componentInstance.buttonsEnabled = true modal.componentInstance.buttonsEnabled = true
} }
this.toastService.showError( this.toastService.showError(
$localize`Error executing PDF edit operation`, $localize`Error executing split operation`,
error
)
},
})
})
}
rotateDocument() {
let modal = this.modalService.open(RotateConfirmDialogComponent, {
backdrop: 'static',
size: 'lg',
})
modal.componentInstance.title = $localize`Rotate confirm`
modal.componentInstance.messageBold = $localize`This operation will permanently rotate the original version of the current document.`
modal.componentInstance.btnCaption = $localize`Proceed`
modal.componentInstance.documentID = this.document.id
modal.componentInstance.showPDFNote = false
modal.componentInstance.confirmClicked
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe(() => {
modal.componentInstance.buttonsEnabled = false
this.documentsService
.bulkEdit([this.document.id], 'rotate', {
degrees: modal.componentInstance.degrees,
})
.pipe(first(), takeUntil(this.unsubscribeNotifier))
.subscribe({
next: () => {
this.toastService.show({
content: $localize`Rotation of "${this.document.title}" will begin in the background. Close and re-open the document after the operation has completed to see the changes.`,
delay: 8000,
action: this.close.bind(this),
actionName: $localize`Close`,
})
modal.close()
},
error: (error) => {
if (modal) {
modal.componentInstance.buttonsEnabled = true
}
this.toastService.showError(
$localize`Error executing rotate operation`,
error
)
},
})
})
}
deletePages() {
let modal = this.modalService.open(DeletePagesConfirmDialogComponent, {
backdrop: 'static',
})
modal.componentInstance.title = $localize`Delete pages confirm`
modal.componentInstance.messageBold = $localize`This operation will permanently delete the selected pages from the original document.`
modal.componentInstance.btnCaption = $localize`Proceed`
modal.componentInstance.documentID = this.document.id
modal.componentInstance.confirmClicked
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe(() => {
modal.componentInstance.buttonsEnabled = false
this.documentsService
.bulkEdit([this.document.id], 'delete_pages', {
pages: modal.componentInstance.pages,
})
.pipe(first(), takeUntil(this.unsubscribeNotifier))
.subscribe({
next: () => {
this.toastService.showInfo(
$localize`Delete pages operation for "${this.document.title}" will begin in the background. Close and re-open or reload this document after the operation has completed to see the changes.`
)
modal.close()
},
error: (error) => {
if (modal) {
modal.componentInstance.buttonsEnabled = true
}
this.toastService.showError(
$localize`Error executing delete pages operation`,
error error
) )
}, },

View File

@@ -164,7 +164,7 @@ describe('ManagementListComponent', () => {
const toastInfoSpy = jest.spyOn(toastService, 'showInfo') const toastInfoSpy = jest.spyOn(toastService, 'showInfo')
const reloadSpy = jest.spyOn(component, 'reloadData') const reloadSpy = jest.spyOn(component, 'reloadData')
const createButton = fixture.debugElement.queryAll(By.css('button'))[4] const createButton = fixture.debugElement.queryAll(By.css('button'))[3]
createButton.triggerEventHandler('click') createButton.triggerEventHandler('click')
expect(modal).not.toBeUndefined() expect(modal).not.toBeUndefined()
@@ -188,7 +188,7 @@ describe('ManagementListComponent', () => {
const toastInfoSpy = jest.spyOn(toastService, 'showInfo') const toastInfoSpy = jest.spyOn(toastService, 'showInfo')
const reloadSpy = jest.spyOn(component, 'reloadData') const reloadSpy = jest.spyOn(component, 'reloadData')
const editButton = fixture.debugElement.queryAll(By.css('button'))[7] const editButton = fixture.debugElement.queryAll(By.css('button'))[6]
editButton.triggerEventHandler('click') editButton.triggerEventHandler('click')
expect(modal).not.toBeUndefined() expect(modal).not.toBeUndefined()
@@ -213,7 +213,7 @@ describe('ManagementListComponent', () => {
const deleteSpy = jest.spyOn(tagService, 'delete') const deleteSpy = jest.spyOn(tagService, 'delete')
const reloadSpy = jest.spyOn(component, 'reloadData') const reloadSpy = jest.spyOn(component, 'reloadData')
const deleteButton = fixture.debugElement.queryAll(By.css('button'))[8] const deleteButton = fixture.debugElement.queryAll(By.css('button'))[7]
deleteButton.triggerEventHandler('click') deleteButton.triggerEventHandler('click')
expect(modal).not.toBeUndefined() expect(modal).not.toBeUndefined()
@@ -233,7 +233,7 @@ describe('ManagementListComponent', () => {
it('should support quick filter for objects', () => { it('should support quick filter for objects', () => {
const qfSpy = jest.spyOn(documentListViewService, 'quickFilter') const qfSpy = jest.spyOn(documentListViewService, 'quickFilter')
const filterButton = fixture.debugElement.queryAll(By.css('button'))[9] const filterButton = fixture.debugElement.queryAll(By.css('button'))[8]
filterButton.triggerEventHandler('click') filterButton.triggerEventHandler('click')
expect(qfSpy).toHaveBeenCalledWith([ expect(qfSpy).toHaveBeenCalledWith([
{ rule_type: FILTER_HAS_TAGS_ALL, value: tags[0].id.toString() }, { rule_type: FILTER_HAS_TAGS_ALL, value: tags[0].id.toString() },

View File

@@ -70,6 +70,6 @@
} }
</ul> </ul>
<button type="button" (click)="reset()" class="btn btn-outline-secondary mb-2" [disabled]="(isDirty$ | async) === false" i18n>Cancel</button> <button type="submit" class="btn btn-primary mb-2" [disabled]="(isDirty$ | async) === false" i18n>Save</button>
<button type="submit" class="btn btn-primary ms-2 mb-2" [disabled]="(isDirty$ | async) === false" i18n>Save</button> <button type="button" (click)="reset()" class="btn btn-secondary ms-2 mb-2" [disabled]="(isDirty$ | async) === false" i18n>Cancel</button>
</form> </form>

View File

@@ -497,103 +497,6 @@ def delete_pages(doc_ids: list[int], pages: list[int]) -> Literal["OK"]:
return "OK" return "OK"
def edit_pdf(
doc_ids: list[int],
operations: list[dict],
*,
delete_original: bool = False,
update_document: bool = False,
include_metadata: bool = True,
user: User | None = None,
) -> Literal["OK"]:
"""
Operations is a list of dictionaries describing the final PDF pages.
Each entry must contain the original page number in `page` and may
specify `rotate` in degrees and `doc` indicating the output
document index (for splitting). Pages omitted from the list are
discarded.
"""
logger.info(
f"Editing PDF of document {doc_ids[0]} with {len(operations)} operations",
)
doc = Document.objects.get(id=doc_ids[0])
import pikepdf
pdf_docs: list[pikepdf.Pdf] = []
try:
with pikepdf.open(doc.source_path) as src:
# prepare output documents
max_idx = max(op.get("doc", 0) for op in operations)
pdf_docs = [pikepdf.new() for _ in range(max_idx + 1)]
if update_document and len(pdf_docs) > 1:
logger.error(
"Update requested but multiple output documents specified",
)
raise ValueError("Multiple output documents specified")
for op in operations:
dst = pdf_docs[op.get("doc", 0)]
page = src.pages[op["page"] - 1]
dst.pages.append(page)
if op.get("rotate"):
dst.pages[-1].rotate(op["rotate"], relative=True)
if update_document:
temp_path = doc.source_path.with_suffix(".tmp.pdf")
pdf = pdf_docs[0]
pdf.remove_unreferenced_resources()
# save the edited PDF to a temporary file in case of errors
pdf.save(temp_path)
# replace the original document with the edited one
temp_path.replace(doc.source_path)
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
doc.page_count = len(pdf.pages)
doc.save()
update_document_content_maybe_archive_file.delay(document_id=doc.id)
else:
consume_tasks = []
overrides = (
DocumentMetadataOverrides().from_document(doc)
if include_metadata
else DocumentMetadataOverrides()
)
if user is not None:
overrides.owner_id = user.id
for idx, pdf in enumerate(pdf_docs, start=1):
filepath: Path = (
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
/ f"{doc.id}_edit_{idx}.pdf"
)
pdf.remove_unreferenced_resources()
pdf.save(filepath)
consume_tasks.append(
consume_file.s(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
),
overrides,
),
)
if delete_original:
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
else:
group(consume_tasks).delay()
except Exception as e:
logger.exception(f"Error editing document {doc.id}: {e}")
raise ValueError(
f"An error occurred while editing the document: {e}",
) from e
return "OK"
def reflect_doclinks( def reflect_doclinks(
document: Document, document: Document,
field: CustomField, field: CustomField,

View File

@@ -1,23 +1,16 @@
from __future__ import annotations from __future__ import annotations
import logging import logging
import pickle
from binascii import hexlify from binascii import hexlify
from collections import OrderedDict
from dataclasses import dataclass from dataclasses import dataclass
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from typing import Any
from typing import Final from typing import Final
from django.conf import settings
from django.core.cache import cache from django.core.cache import cache
from django.core.cache import caches
from documents.models import Document from documents.models import Document
if TYPE_CHECKING: if TYPE_CHECKING:
from django.core.cache.backends.base import BaseCache
from documents.classifier import DocumentClassifier from documents.classifier import DocumentClassifier
logger = logging.getLogger("paperless.caching") logger = logging.getLogger("paperless.caching")
@@ -46,80 +39,6 @@ CACHE_1_MINUTE: Final[int] = 60
CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE
CACHE_50_MINUTES: Final[int] = 50 * CACHE_1_MINUTE CACHE_50_MINUTES: Final[int] = 50 * CACHE_1_MINUTE
read_cache = caches["read-cache"]
class LRUCache:
def __init__(self, capacity: int = 128):
self._data = OrderedDict()
self.capacity = capacity
def get(self, key, default=None) -> Any | None:
if key in self._data:
self._data.move_to_end(key)
return self._data[key]
return default
def set(self, key, value) -> None:
self._data[key] = value
self._data.move_to_end(key)
while len(self._data) > self.capacity:
self._data.popitem(last=False)
class StoredLRUCache(LRUCache):
"""
LRU cache that can persist its entire contents as a single entry in a backend cache.
Useful for sharing a cache across multiple workers or processes.
Workflow:
1. Load the cache state from the backend using `load()`.
2. Use `get()` and `set()` locally as usual.
3. Persist changes back to the backend using `save()`.
"""
def __init__(
self,
backend_key: str,
capacity: int = 128,
backend: BaseCache = read_cache,
backend_ttl=settings.CACHALOT_TIMEOUT,
):
if backend_key is None:
raise ValueError("backend_key is mandatory")
super().__init__(capacity)
self._backend_key = backend_key
self._backend = backend
self.backend_ttl = backend_ttl
def load(self) -> None:
"""
Load the whole cache content from backend storage.
If no valid cached data exists in the backend, the local cache is cleared.
"""
serialized_data = self._backend.get(self._backend_key)
try:
self._data = (
pickle.loads(serialized_data) if serialized_data else OrderedDict()
)
except pickle.PickleError:
logger.warning(
"Cache exists in backend but could not be read (possibly invalid format)",
)
def save(self) -> None:
"""Save the entire local cache to the backend as a serialized object.
The backend entry will expire after the configured TTL.
"""
self._backend.set(
self._backend_key,
pickle.dumps(self._data),
self.backend_ttl,
)
def get_suggestion_cache_key(document_id: int) -> str: def get_suggestion_cache_key(document_id: int) -> str:
""" """

View File

@@ -16,29 +16,16 @@ if TYPE_CHECKING:
from django.conf import settings from django.conf import settings
from django.core.cache import cache from django.core.cache import cache
from django.core.cache import caches
from documents.caching import CACHE_5_MINUTES
from documents.caching import CACHE_50_MINUTES from documents.caching import CACHE_50_MINUTES
from documents.caching import CLASSIFIER_HASH_KEY from documents.caching import CLASSIFIER_HASH_KEY
from documents.caching import CLASSIFIER_MODIFIED_KEY from documents.caching import CLASSIFIER_MODIFIED_KEY
from documents.caching import CLASSIFIER_VERSION_KEY from documents.caching import CLASSIFIER_VERSION_KEY
from documents.caching import StoredLRUCache
from documents.models import Document from documents.models import Document
from documents.models import MatchingModel from documents.models import MatchingModel
logger = logging.getLogger("paperless.classifier") logger = logging.getLogger("paperless.classifier")
ADVANCED_TEXT_PROCESSING_ENABLED = (
settings.NLTK_LANGUAGE is not None and settings.NLTK_ENABLED
)
read_cache = caches["read-cache"]
RE_DIGIT = re.compile(r"\d")
RE_WORD = re.compile(r"\b[\w]+\b") # words that may contain digits
class IncompatibleClassifierVersionError(Exception): class IncompatibleClassifierVersionError(Exception):
def __init__(self, message: str, *args: object) -> None: def __init__(self, message: str, *args: object) -> None:
@@ -105,27 +92,14 @@ class DocumentClassifier:
self.last_auto_type_hash: bytes | None = None self.last_auto_type_hash: bytes | None = None
self.data_vectorizer = None self.data_vectorizer = None
self.data_vectorizer_hash = None
self.tags_binarizer = None self.tags_binarizer = None
self.tags_classifier = None self.tags_classifier = None
self.correspondent_classifier = None self.correspondent_classifier = None
self.document_type_classifier = None self.document_type_classifier = None
self.storage_path_classifier = None self.storage_path_classifier = None
self._stemmer = None
# 10,000 elements roughly use 200 to 500 KB per worker,
# and also in the shared Redis cache,
# Keep this cache small to minimize lookup and I/O latency.
if ADVANCED_TEXT_PROCESSING_ENABLED:
self._stem_cache = StoredLRUCache(
f"stem_cache_v{self.FORMAT_VERSION}",
capacity=10000,
)
self._stop_words = None
def _update_data_vectorizer_hash(self): self._stemmer = None
self.data_vectorizer_hash = sha256( self._stop_words = None
pickle.dumps(self.data_vectorizer),
).hexdigest()
def load(self) -> None: def load(self) -> None:
from sklearn.exceptions import InconsistentVersionWarning from sklearn.exceptions import InconsistentVersionWarning
@@ -145,7 +119,6 @@ class DocumentClassifier:
self.last_auto_type_hash = pickle.load(f) self.last_auto_type_hash = pickle.load(f)
self.data_vectorizer = pickle.load(f) self.data_vectorizer = pickle.load(f)
self._update_data_vectorizer_hash()
self.tags_binarizer = pickle.load(f) self.tags_binarizer = pickle.load(f)
self.tags_classifier = pickle.load(f) self.tags_classifier = pickle.load(f)
@@ -296,7 +269,7 @@ class DocumentClassifier:
Generates the content for documents, but once at a time Generates the content for documents, but once at a time
""" """
for doc in docs_queryset: for doc in docs_queryset:
yield self.preprocess_content(doc.content, shared_cache=False) yield self.preprocess_content(doc.content)
self.data_vectorizer = CountVectorizer( self.data_vectorizer = CountVectorizer(
analyzer="word", analyzer="word",
@@ -374,7 +347,6 @@ class DocumentClassifier:
self.last_doc_change_time = latest_doc_change self.last_doc_change_time = latest_doc_change
self.last_auto_type_hash = hasher.digest() self.last_auto_type_hash = hasher.digest()
self._update_data_vectorizer_hash()
# Set the classifier information into the cache # Set the classifier information into the cache
# Caching for 50 minutes, so slightly less than the normal retrain time # Caching for 50 minutes, so slightly less than the normal retrain time
@@ -384,15 +356,30 @@ class DocumentClassifier:
return True return True
def _init_advanced_text_processing(self): def preprocess_content(self, content: str) -> str: # pragma: no cover
if self._stop_words is None or self._stemmer is None: """
Process to contents of a document, distilling it down into
words which are meaningful to the content
"""
# Lower case the document
content = content.lower().strip()
# Reduce spaces
content = re.sub(r"\s+", " ", content)
# Get only the letters
content = re.sub(r"[^\w\s]", " ", content)
# If the NLTK language is supported, do further processing
if settings.NLTK_LANGUAGE is not None and settings.NLTK_ENABLED:
import nltk import nltk
from nltk.corpus import stopwords from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer from nltk.stem import SnowballStemmer
from nltk.tokenize import word_tokenize
# Not really hacky, since it isn't private and is documented, but # Not really hacky, since it isn't private and is documented, but
# set the search path for NLTK data to the single location it should be in # set the search path for NLTK data to the single location it should be in
nltk.data.path = [settings.NLTK_DIR] nltk.data.path = [settings.NLTK_DIR]
try: try:
# Preload the corpus early, to force the lazy loader to transform # Preload the corpus early, to force the lazy loader to transform
stopwords.ensure_loaded() stopwords.ensure_loaded()
@@ -400,100 +387,41 @@ class DocumentClassifier:
# Do some one time setup # Do some one time setup
# Sometimes, somehow, there's multiple threads loading the corpus # Sometimes, somehow, there's multiple threads loading the corpus
# and it's not thread safe, raising an AttributeError # and it's not thread safe, raising an AttributeError
if self._stemmer is None:
self._stemmer = SnowballStemmer(settings.NLTK_LANGUAGE) self._stemmer = SnowballStemmer(settings.NLTK_LANGUAGE)
self._stop_words = frozenset(stopwords.words(settings.NLTK_LANGUAGE)) if self._stop_words is None:
except AttributeError: self._stop_words = set(stopwords.words(settings.NLTK_LANGUAGE))
logger.debug("Could not initialize NLTK for advanced text processing.")
return False
return True
def stem_and_skip_stop_words(self, words: list[str], *, shared_cache=True):
"""
Reduce a list of words to their stem. Stop words are converted to empty strings.
:param words: the list of words to stem
"""
def _stem_and_skip_stop_word(word: str):
"""
Reduce a given word to its stem. If it's a stop word, return an empty string.
E.g. "amazement", "amaze" and "amazed" all return "amaz".
"""
cached = self._stem_cache.get(word)
if cached is not None:
return cached
elif word in self._stop_words:
return ""
# Assumption: words that contain numbers are never stemmed
elif RE_DIGIT.search(word):
return word
else:
result = self._stemmer.stem(word)
self._stem_cache.set(word, result)
return result
if shared_cache:
self._stem_cache.load()
# Stem the words and skip stop words
result = " ".join(
filter(None, (_stem_and_skip_stop_word(w) for w in words)),
)
if shared_cache:
self._stem_cache.save()
return result
def preprocess_content(
self,
content: str,
*,
shared_cache=True,
) -> str:
"""
Process the contents of a document, distilling it down into
words which are meaningful to the content.
A stemmer cache is shared across workers with the parameter "shared_cache".
This is unnecessary when training the classifier.
"""
# Lower case the document, reduce space,
# and keep only letters and digits.
content = " ".join(match.group().lower() for match in RE_WORD.finditer(content))
if ADVANCED_TEXT_PROCESSING_ENABLED:
from nltk.tokenize import word_tokenize
if not self._init_advanced_text_processing():
return content
# Tokenize # Tokenize
# This splits the content into tokens, roughly words # This splits the content into tokens, roughly words
words = word_tokenize(content, language=settings.NLTK_LANGUAGE) words: list[str] = word_tokenize(
# Stem the words and skip stop words content,
content = self.stem_and_skip_stop_words(words, shared_cache=shared_cache) language=settings.NLTK_LANGUAGE,
)
meaningful_words = []
for word in words:
# Skip stop words
# These are words like "a", "and", "the" which add little meaning
if word in self._stop_words:
continue
# Stem the words
# This reduces the words to their stems.
# "amazement" returns "amaz"
# "amaze" returns "amaz
# "amazed" returns "amaz"
meaningful_words.append(self._stemmer.stem(word))
return " ".join(meaningful_words)
except AttributeError:
return content return content
def _get_vectorizer_cache_key(self, content: str): return content
hash = sha256(content.encode())
hash.update(
f"|{self.FORMAT_VERSION}|{settings.NLTK_LANGUAGE}|{settings.NLTK_ENABLED}|{self.data_vectorizer_hash}".encode(),
)
return f"vectorized_content_{hash.hexdigest()}"
def _vectorize(self, content: str):
key = self._get_vectorizer_cache_key(content)
serialized_result = read_cache.get(key)
if serialized_result is None:
result = self.data_vectorizer.transform([self.preprocess_content(content)])
read_cache.set(key, pickle.dumps(result), CACHE_5_MINUTES)
else:
read_cache.touch(key, CACHE_5_MINUTES)
result = pickle.loads(serialized_result)
return result
def predict_correspondent(self, content: str) -> int | None: def predict_correspondent(self, content: str) -> int | None:
if self.correspondent_classifier: if self.correspondent_classifier:
X = self._vectorize(content) X = self.data_vectorizer.transform([self.preprocess_content(content)])
correspondent_id = self.correspondent_classifier.predict(X) correspondent_id = self.correspondent_classifier.predict(X)
if correspondent_id != -1: if correspondent_id != -1:
return correspondent_id return correspondent_id
@@ -504,7 +432,7 @@ class DocumentClassifier:
def predict_document_type(self, content: str) -> int | None: def predict_document_type(self, content: str) -> int | None:
if self.document_type_classifier: if self.document_type_classifier:
X = self._vectorize(content) X = self.data_vectorizer.transform([self.preprocess_content(content)])
document_type_id = self.document_type_classifier.predict(X) document_type_id = self.document_type_classifier.predict(X)
if document_type_id != -1: if document_type_id != -1:
return document_type_id return document_type_id
@@ -517,7 +445,7 @@ class DocumentClassifier:
from sklearn.utils.multiclass import type_of_target from sklearn.utils.multiclass import type_of_target
if self.tags_classifier: if self.tags_classifier:
X = self._vectorize(content) X = self.data_vectorizer.transform([self.preprocess_content(content)])
y = self.tags_classifier.predict(X) y = self.tags_classifier.predict(X)
tags_ids = self.tags_binarizer.inverse_transform(y)[0] tags_ids = self.tags_binarizer.inverse_transform(y)[0]
if type_of_target(y).startswith("multilabel"): if type_of_target(y).startswith("multilabel"):
@@ -536,7 +464,7 @@ class DocumentClassifier:
def predict_storage_path(self, content: str) -> int | None: def predict_storage_path(self, content: str) -> int | None:
if self.storage_path_classifier: if self.storage_path_classifier:
X = self._vectorize(content) X = self.data_vectorizer.transform([self.preprocess_content(content)])
storage_path_id = self.storage_path_classifier.predict(X) storage_path_id = self.storage_path_classifier.predict(X)
if storage_path_id != -1: if storage_path_id != -1:
return storage_path_id return storage_path_id

View File

@@ -1,5 +1,4 @@
import os import os
from pathlib import Path
from django.conf import settings from django.conf import settings
@@ -8,15 +7,19 @@ from documents.templating.filepath import validate_filepath_template_and_render
from documents.templating.utils import convert_format_str_to_template_format from documents.templating.utils import convert_format_str_to_template_format
def create_source_path_directory(source_path: Path) -> None: def create_source_path_directory(source_path):
source_path.parent.mkdir(parents=True, exist_ok=True) os.makedirs(os.path.dirname(source_path), exist_ok=True)
def delete_empty_directories(directory: Path, root: Path) -> None: def delete_empty_directories(directory, root):
if not directory.is_dir(): if not os.path.isdir(directory):
return return
if not directory.is_relative_to(root): # Go up in the directory hierarchy and try to delete all directories
directory = os.path.normpath(directory)
root = os.path.normpath(root)
if not directory.startswith(root + os.path.sep):
# don't do anything outside our originals folder. # don't do anything outside our originals folder.
# append os.path.set so that we avoid these cases: # append os.path.set so that we avoid these cases:
@@ -24,12 +27,11 @@ def delete_empty_directories(directory: Path, root: Path) -> None:
# root = /home/originals ("/" gets appended and startswith fails) # root = /home/originals ("/" gets appended and startswith fails)
return return
# Go up in the directory hierarchy and try to delete all directories
while directory != root: while directory != root:
if not list(directory.iterdir()): if not os.listdir(directory):
# it's empty # it's empty
try: try:
directory.rmdir() os.rmdir(directory)
except OSError: except OSError:
# whatever. empty directories aren't that bad anyway. # whatever. empty directories aren't that bad anyway.
return return
@@ -38,10 +40,10 @@ def delete_empty_directories(directory: Path, root: Path) -> None:
return return
# go one level up # go one level up
directory = directory.parent directory = os.path.normpath(os.path.dirname(directory))
def generate_unique_filename(doc, *, archive_filename=False) -> Path: def generate_unique_filename(doc, *, archive_filename=False):
""" """
Generates a unique filename for doc in settings.ORIGINALS_DIR. Generates a unique filename for doc in settings.ORIGINALS_DIR.
@@ -54,32 +56,21 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
""" """
if archive_filename: if archive_filename:
old_filename: Path | None = ( old_filename = doc.archive_filename
Path(doc.archive_filename) if doc.archive_filename else None
)
root = settings.ARCHIVE_DIR root = settings.ARCHIVE_DIR
else: else:
old_filename = Path(doc.filename) if doc.filename else None old_filename = doc.filename
root = settings.ORIGINALS_DIR root = settings.ORIGINALS_DIR
# If generating archive filenames, try to make a name that is similar to # If generating archive filenames, try to make a name that is similar to
# the original filename first. # the original filename first.
if archive_filename and doc.filename: if archive_filename and doc.filename:
# Generate the full path using the same logic as generate_filename new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
base_generated = generate_filename(doc, archive_filename=archive_filename) if new_filename == old_filename or not os.path.exists(
os.path.join(root, new_filename),
# Try to create a simple PDF version based on the original filename ):
# but preserve any directory structure from the template return new_filename
if str(base_generated.parent) != ".":
# Has directory structure, preserve it
simple_pdf_name = base_generated.parent / (Path(doc.filename).stem + ".pdf")
else:
# No directory structure
simple_pdf_name = Path(Path(doc.filename).stem + ".pdf")
if simple_pdf_name == old_filename or not (root / simple_pdf_name).exists():
return simple_pdf_name
counter = 0 counter = 0
@@ -93,7 +84,7 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
# still the same as before. # still the same as before.
return new_filename return new_filename
if (root / new_filename).exists(): if os.path.exists(os.path.join(root, new_filename)):
counter += 1 counter += 1
else: else:
return new_filename return new_filename
@@ -105,8 +96,8 @@ def generate_filename(
counter=0, counter=0,
append_gpg=True, append_gpg=True,
archive_filename=False, archive_filename=False,
) -> Path: ):
base_path: Path | None = None path = ""
def format_filename(document: Document, template_str: str) -> str | None: def format_filename(document: Document, template_str: str) -> str | None:
rendered_filename = validate_filepath_template_and_render( rendered_filename = validate_filepath_template_and_render(
@@ -143,34 +134,17 @@ def generate_filename(
# If we have one, render it # If we have one, render it
if filename_format is not None: if filename_format is not None:
rendered_path: str | None = format_filename(doc, filename_format) path = format_filename(doc, filename_format)
if rendered_path:
base_path = Path(rendered_path)
counter_str = f"_{counter:02}" if counter else "" counter_str = f"_{counter:02}" if counter else ""
filetype_str = ".pdf" if archive_filename else doc.file_type filetype_str = ".pdf" if archive_filename else doc.file_type
if base_path: if path:
# Split the path into directory and filename parts filename = f"{path}{counter_str}{filetype_str}"
directory = base_path.parent
# Use the full name (not just stem) as the base filename
base_filename = base_path.name
# Build the final filename with counter and filetype
final_filename = f"{base_filename}{counter_str}{filetype_str}"
# If we have a directory component, include it
if str(directory) != ".":
full_path = directory / final_filename
else: else:
full_path = Path(final_filename) filename = f"{doc.pk:07}{counter_str}{filetype_str}"
else:
# No template, use document ID
final_filename = f"{doc.pk:07}{counter_str}{filetype_str}"
full_path = Path(final_filename)
# Add GPG extension if needed
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG: if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
full_path = full_path.with_suffix(full_path.suffix + ".gpg") filename += ".gpg"
return full_path return filename

View File

@@ -236,7 +236,10 @@ class Command(CryptMixin, BaseCommand):
# now make an archive in the original target, with all files stored # now make an archive in the original target, with all files stored
if self.zip_export and temp_dir is not None: if self.zip_export and temp_dir is not None:
shutil.make_archive( shutil.make_archive(
self.original_target / options["zip_name"], os.path.join(
self.original_target,
options["zip_name"],
),
format="zip", format="zip",
root_dir=temp_dir.name, root_dir=temp_dir.name,
) )
@@ -339,7 +342,7 @@ class Command(CryptMixin, BaseCommand):
) )
if self.split_manifest: if self.split_manifest:
manifest_name = base_name.with_name(f"{base_name.stem}-manifest.json") manifest_name = Path(base_name + "-manifest.json")
if self.use_folder_prefix: if self.use_folder_prefix:
manifest_name = Path("json") / manifest_name manifest_name = Path("json") / manifest_name
manifest_name = (self.target / manifest_name).resolve() manifest_name = (self.target / manifest_name).resolve()
@@ -413,7 +416,7 @@ class Command(CryptMixin, BaseCommand):
else: else:
item.unlink() item.unlink()
def generate_base_name(self, document: Document) -> Path: def generate_base_name(self, document: Document) -> str:
""" """
Generates a unique name for the document, one which hasn't already been exported (or will be) Generates a unique name for the document, one which hasn't already been exported (or will be)
""" """
@@ -433,12 +436,12 @@ class Command(CryptMixin, BaseCommand):
break break
else: else:
filename_counter += 1 filename_counter += 1
return Path(base_name) return base_name
def generate_document_targets( def generate_document_targets(
self, self,
document: Document, document: Document,
base_name: Path, base_name: str,
document_dict: dict, document_dict: dict,
) -> tuple[Path, Path | None, Path | None]: ) -> tuple[Path, Path | None, Path | None]:
""" """
@@ -446,25 +449,25 @@ class Command(CryptMixin, BaseCommand):
""" """
original_name = base_name original_name = base_name
if self.use_folder_prefix: if self.use_folder_prefix:
original_name = Path("originals") / original_name original_name = os.path.join("originals", original_name)
original_target = (self.target / original_name).resolve() original_target = (self.target / Path(original_name)).resolve()
document_dict[EXPORTER_FILE_NAME] = str(original_name) document_dict[EXPORTER_FILE_NAME] = original_name
if not self.no_thumbnail: if not self.no_thumbnail:
thumbnail_name = base_name.parent / (base_name.stem + "-thumbnail.webp") thumbnail_name = base_name + "-thumbnail.webp"
if self.use_folder_prefix: if self.use_folder_prefix:
thumbnail_name = Path("thumbnails") / thumbnail_name thumbnail_name = os.path.join("thumbnails", thumbnail_name)
thumbnail_target = (self.target / thumbnail_name).resolve() thumbnail_target = (self.target / Path(thumbnail_name)).resolve()
document_dict[EXPORTER_THUMBNAIL_NAME] = str(thumbnail_name) document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
else: else:
thumbnail_target = None thumbnail_target = None
if not self.no_archive and document.has_archive_version: if not self.no_archive and document.has_archive_version:
archive_name = base_name.parent / (base_name.stem + "-archive.pdf") archive_name = base_name + "-archive.pdf"
if self.use_folder_prefix: if self.use_folder_prefix:
archive_name = Path("archive") / archive_name archive_name = os.path.join("archive", archive_name)
archive_target = (self.target / archive_name).resolve() archive_target = (self.target / Path(archive_name)).resolve()
document_dict[EXPORTER_ARCHIVE_NAME] = str(archive_name) document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
else: else:
archive_target = None archive_target = None
@@ -569,7 +572,7 @@ class Command(CryptMixin, BaseCommand):
perform_copy = False perform_copy = False
if target.exists(): if target.exists():
source_stat = source.stat() source_stat = os.stat(source)
target_stat = target.stat() target_stat = target.stat()
if self.compare_checksums and source_checksum: if self.compare_checksums and source_checksum:
target_checksum = hashlib.md5(target.read_bytes()).hexdigest() target_checksum = hashlib.md5(target.read_bytes()).hexdigest()

View File

@@ -63,11 +63,11 @@ class Document:
/ "documents" / "documents"
/ "originals" / "originals"
/ f"{self.pk:07}.{self.file_type}.gpg" / f"{self.pk:07}.{self.file_type}.gpg"
) ).as_posix()
@property @property
def source_file(self): def source_file(self):
return self.source_path.open("rb") return Path(self.source_path).open("rb")
@property @property
def file_name(self): def file_name(self):

View File

@@ -1293,7 +1293,6 @@ class BulkEditSerializer(
"merge", "merge",
"split", "split",
"delete_pages", "delete_pages",
"edit_pdf",
], ],
label="Method", label="Method",
write_only=True, write_only=True,
@@ -1367,10 +1366,7 @@ class BulkEditSerializer(
return bulk_edit.split return bulk_edit.split
elif method == "delete_pages": elif method == "delete_pages":
return bulk_edit.delete_pages return bulk_edit.delete_pages
elif method == "edit_pdf": else:
return bulk_edit.edit_pdf
else: # pragma: no cover
# This will never happen as it is handled by the ChoiceField
raise serializers.ValidationError("Unsupported method.") raise serializers.ValidationError("Unsupported method.")
def _validate_parameters_tags(self, parameters): def _validate_parameters_tags(self, parameters):
@@ -1524,47 +1520,6 @@ class BulkEditSerializer(
else: else:
parameters["archive_fallback"] = False parameters["archive_fallback"] = False
def _validate_parameters_edit_pdf(self, parameters, document_id):
if "operations" not in parameters:
raise serializers.ValidationError("operations not specified")
if not isinstance(parameters["operations"], list):
raise serializers.ValidationError("operations must be a list")
for op in parameters["operations"]:
if not isinstance(op, dict):
raise serializers.ValidationError("invalid operation entry")
if "page" not in op or not isinstance(op["page"], int):
raise serializers.ValidationError("page must be an integer")
if "rotate" in op and not isinstance(op["rotate"], int):
raise serializers.ValidationError("rotate must be an integer")
if "doc" in op and not isinstance(op["doc"], int):
raise serializers.ValidationError("doc must be an integer")
if "update_document" in parameters:
if not isinstance(parameters["update_document"], bool):
raise serializers.ValidationError("update_document must be a boolean")
else:
parameters["update_document"] = False
if "include_metadata" in parameters:
if not isinstance(parameters["include_metadata"], bool):
raise serializers.ValidationError("include_metadata must be a boolean")
else:
parameters["include_metadata"] = True
if parameters["update_document"]:
max_idx = max(op.get("doc", 0) for op in parameters["operations"])
if max_idx > 0:
raise serializers.ValidationError(
"update_document only allowed with a single output document",
)
doc = Document.objects.get(id=document_id)
# doc existence is already validated
if doc.page_count:
for op in parameters["operations"]:
if op["page"] < 1 or op["page"] > doc.page_count:
raise serializers.ValidationError(
f"Page {op['page']} is out of bounds for document with {doc.page_count} pages.",
)
def validate(self, attrs): def validate(self, attrs):
method = attrs["method"] method = attrs["method"]
parameters = attrs["parameters"] parameters = attrs["parameters"]
@@ -1599,12 +1554,6 @@ class BulkEditSerializer(
self._validate_parameters_delete_pages(parameters) self._validate_parameters_delete_pages(parameters)
elif method == bulk_edit.merge: elif method == bulk_edit.merge:
self._validate_parameters_merge(parameters) self._validate_parameters_merge(parameters)
elif method == bulk_edit.edit_pdf:
if len(attrs["documents"]) > 1:
raise serializers.ValidationError(
"Edit PDF method only supports one document",
)
self._validate_parameters_edit_pdf(parameters, attrs["documents"][0])
return attrs return attrs

View File

@@ -1,12 +1,9 @@
from __future__ import annotations from __future__ import annotations
import ipaddress
import logging import logging
import os
import shutil import shutil
import socket
from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from urllib.parse import urlparse
import httpx import httpx
from celery import shared_task from celery import shared_task
@@ -54,6 +51,8 @@ from documents.permissions import set_permissions_for_object
from documents.templating.workflows import parse_w_workflow_placeholders from documents.templating.workflows import parse_w_workflow_placeholders
if TYPE_CHECKING: if TYPE_CHECKING:
from pathlib import Path
from documents.classifier import DocumentClassifier from documents.classifier import DocumentClassifier
from documents.data_models import ConsumableDocument from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides from documents.data_models import DocumentMetadataOverrides
@@ -330,16 +329,15 @@ def cleanup_document_deletion(sender, instance, **kwargs):
# Find a non-conflicting filename in case a document with the same # Find a non-conflicting filename in case a document with the same
# name was moved to trash earlier # name was moved to trash earlier
counter = 0 counter = 0
old_filename = Path(instance.source_path).name old_filename = os.path.split(instance.source_path)[1]
old_filebase = Path(old_filename).stem (old_filebase, old_fileext) = os.path.splitext(old_filename)
old_fileext = Path(old_filename).suffix
while True: while True:
new_file_path = settings.EMPTY_TRASH_DIR / ( new_file_path = settings.EMPTY_TRASH_DIR / (
old_filebase + (f"_{counter:02}" if counter else "") + old_fileext old_filebase + (f"_{counter:02}" if counter else "") + old_fileext
) )
if new_file_path.exists(): if os.path.exists(new_file_path):
counter += 1 counter += 1
else: else:
break break
@@ -363,26 +361,26 @@ def cleanup_document_deletion(sender, instance, **kwargs):
files += (instance.source_path,) files += (instance.source_path,)
for filename in files: for filename in files:
if filename and filename.is_file(): if filename and os.path.isfile(filename):
try: try:
filename.unlink() os.unlink(filename)
logger.debug(f"Deleted file {filename}.") logger.debug(f"Deleted file {filename}.")
except OSError as e: except OSError as e:
logger.warning( logger.warning(
f"While deleting document {instance!s}, the file " f"While deleting document {instance!s}, the file "
f"{filename} could not be deleted: {e}", f"{filename} could not be deleted: {e}",
) )
elif filename and not filename.is_file(): elif filename and not os.path.isfile(filename):
logger.warning(f"Expected {filename} to exist, but it did not") logger.warning(f"Expected {filename} to exist, but it did not")
delete_empty_directories( delete_empty_directories(
Path(instance.source_path).parent, os.path.dirname(instance.source_path),
root=settings.ORIGINALS_DIR, root=settings.ORIGINALS_DIR,
) )
if instance.has_archive_version: if instance.has_archive_version:
delete_empty_directories( delete_empty_directories(
Path(instance.archive_path).parent, os.path.dirname(instance.archive_path),
root=settings.ARCHIVE_DIR, root=settings.ARCHIVE_DIR,
) )
@@ -403,14 +401,14 @@ def update_filename_and_move_files(
if isinstance(instance, CustomFieldInstance): if isinstance(instance, CustomFieldInstance):
instance = instance.document instance = instance.document
def validate_move(instance, old_path: Path, new_path: Path): def validate_move(instance, old_path, new_path):
if not old_path.is_file(): if not os.path.isfile(old_path):
# Can't do anything if the old file does not exist anymore. # Can't do anything if the old file does not exist anymore.
msg = f"Document {instance!s}: File {old_path} doesn't exist." msg = f"Document {instance!s}: File {old_path} doesn't exist."
logger.fatal(msg) logger.fatal(msg)
raise CannotMoveFilesException(msg) raise CannotMoveFilesException(msg)
if new_path.is_file(): if os.path.isfile(new_path):
# Can't do anything if the new file already exists. Skip updating file. # Can't do anything if the new file already exists. Skip updating file.
msg = f"Document {instance!s}: Cannot rename file since target path {new_path} already exists." msg = f"Document {instance!s}: Cannot rename file since target path {new_path} already exists."
logger.warning(msg) logger.warning(msg)
@@ -438,20 +436,16 @@ def update_filename_and_move_files(
old_filename = instance.filename old_filename = instance.filename
old_source_path = instance.source_path old_source_path = instance.source_path
# Need to convert to string to be able to save it to the db instance.filename = generate_unique_filename(instance)
instance.filename = str(generate_unique_filename(instance))
move_original = old_filename != instance.filename move_original = old_filename != instance.filename
old_archive_filename = instance.archive_filename old_archive_filename = instance.archive_filename
old_archive_path = instance.archive_path old_archive_path = instance.archive_path
if instance.has_archive_version: if instance.has_archive_version:
# Need to convert to string to be able to save it to the db instance.archive_filename = generate_unique_filename(
instance.archive_filename = str(
generate_unique_filename(
instance, instance,
archive_filename=True, archive_filename=True,
),
) )
move_archive = old_archive_filename != instance.archive_filename move_archive = old_archive_filename != instance.archive_filename
@@ -493,11 +487,11 @@ def update_filename_and_move_files(
# Try to move files to their original location. # Try to move files to their original location.
try: try:
if move_original and instance.source_path.is_file(): if move_original and os.path.isfile(instance.source_path):
logger.info("Restoring previous original path") logger.info("Restoring previous original path")
shutil.move(instance.source_path, old_source_path) shutil.move(instance.source_path, old_source_path)
if move_archive and instance.archive_path.is_file(): if move_archive and os.path.isfile(instance.archive_path):
logger.info("Restoring previous archive path") logger.info("Restoring previous archive path")
shutil.move(instance.archive_path, old_archive_path) shutil.move(instance.archive_path, old_archive_path)
@@ -518,15 +512,17 @@ def update_filename_and_move_files(
# finally, remove any empty sub folders. This will do nothing if # finally, remove any empty sub folders. This will do nothing if
# something has failed above. # something has failed above.
if not old_source_path.is_file(): if not os.path.isfile(old_source_path):
delete_empty_directories( delete_empty_directories(
Path(old_source_path).parent, os.path.dirname(old_source_path),
root=settings.ORIGINALS_DIR, root=settings.ORIGINALS_DIR,
) )
if instance.has_archive_version and not old_archive_path.is_file(): if instance.has_archive_version and not os.path.isfile(
old_archive_path,
):
delete_empty_directories( delete_empty_directories(
Path(old_archive_path).parent, os.path.dirname(old_archive_path),
root=settings.ARCHIVE_DIR, root=settings.ARCHIVE_DIR,
) )
@@ -663,28 +659,6 @@ def run_workflows_updated(sender, document: Document, logging_group=None, **kwar
) )
def _is_public_ip(ip: str) -> bool:
try:
obj = ipaddress.ip_address(ip)
return not (
obj.is_private
or obj.is_loopback
or obj.is_link_local
or obj.is_multicast
or obj.is_unspecified
)
except ValueError: # pragma: no cover
return False
def _resolve_first_ip(host: str) -> str | None:
try:
info = socket.getaddrinfo(host, None)
return info[0][4][0] if info else None
except Exception: # pragma: no cover
return None
@shared_task( @shared_task(
retry_backoff=True, retry_backoff=True,
autoretry_for=(httpx.HTTPStatusError,), autoretry_for=(httpx.HTTPStatusError,),
@@ -699,35 +673,11 @@ def send_webhook(
*, *,
as_json: bool = False, as_json: bool = False,
): ):
p = urlparse(url)
if p.scheme.lower() not in settings.WEBHOOKS_ALLOWED_SCHEMES or not p.hostname:
logger.warning("Webhook blocked: invalid scheme/hostname")
raise ValueError("Invalid URL scheme or hostname.")
port = p.port or (443 if p.scheme == "https" else 80)
if (
len(settings.WEBHOOKS_ALLOWED_PORTS) > 0
and port not in settings.WEBHOOKS_ALLOWED_PORTS
):
logger.warning("Webhook blocked: port not permitted")
raise ValueError("Destination port not permitted.")
ip = _resolve_first_ip(p.hostname)
if not ip or (
not _is_public_ip(ip) and not settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS
):
logger.warning("Webhook blocked: destination not allowed")
raise ValueError("Destination host is not allowed.")
try: try:
post_args = { post_args = {
"url": url, "url": url,
"headers": { "headers": headers,
k: v for k, v in (headers or {}).items() if k.lower() != "host" "files": files,
},
"files": files or None,
"timeout": 5.0,
"follow_redirects": False,
} }
if as_json: if as_json:
post_args["json"] = data post_args["json"] = data
@@ -748,6 +698,15 @@ def send_webhook(
) )
raise e raise e
logger.info(
f"Webhook sent to {url}",
)
except Exception as e:
logger.error(
f"Failed attempt sending webhook to {url}: {e}",
)
raise e
def run_workflows( def run_workflows(
trigger_type: WorkflowTrigger.WorkflowTriggerType, trigger_type: WorkflowTrigger.WorkflowTriggerType,
@@ -1260,7 +1219,10 @@ def run_workflows(
) )
files = None files = None
if action.webhook.include_document: if action.webhook.include_document:
with original_file.open("rb") as f: with open(
original_file,
"rb",
) as f:
files = { files = {
"file": ( "file": (
filename, filename,

View File

@@ -1,34 +0,0 @@
Sample textual document content.
Include as many characters as possible, to check the classifier's vectorization.
Hey 00, this is "a" test0707 content.
This is an example document — created on 2025-06-25.
Digits: 0123456789
Punctuation: . , ; : ! ? ' " ( ) [ ] { } —
English text: The quick brown fox jumps over the lazy dog.
English stop words: Weve been doing it before.
Accented Latin (diacritics): àâäæçéèêëîïôœùûüÿñ
Arabic: لقد قام المترجم بعمل جيد
Greek: Αλφα, Βήτα, Γάμμα, Δέλτα, Ωμέγα
Cyrillic: Привет, как дела? Добро пожаловать!
Chinese (Simplified): 你好,世界!今天的天气很好。
Chinese (Traditional): 歡迎來到世界,今天天氣很好。
Japanese (Kanji, Hiragana, Katakana): 東京へ行きます。カタカナ、ひらがな、漢字。
Korean (Hangul): 안녕하세요. 오늘 날씨 어때요?
Arabic: مرحبًا، كيف حالك؟
Hebrew: שלום, מה שלומך?
Emoji: 😀 🐍 📘 ✅ ©️ 🇺🇳
Symbols: © ® ™ § ¶ † ‡ ∞ µ ∑ ∆ √
Math: ∫₀^∞ x² dx = ∞, π ≈ 3.14159, ∇·E = ρ/ε₀
Currency: 1$ € ¥ £ ₹
Date formats: 25/06/2025, June 25, 2025, 2025年6月25日
Quote in French: « Bonjour, ça va ? »
Quote in German: „Guten Tag! Wie geht's?“
Newline test:
\r\n
\r
Tab\ttest\tspacing
/ = +) ( []) ~ * #192 +33601010101 § ¤
End of document.

View File

@@ -1 +0,0 @@
sample textual document content include as many characters as possible to check the classifier s vectorization hey 00 this is a test0707 content this is an example document created on 2025 06 25 digits 0123456789 punctuation english text the quick brown fox jumps over the lazy dog english stop words we ve been doing it before accented latin diacritics àâäæçéèêëîïôœùûüÿñ arabic لقد قام المترجم بعمل جيد greek αλφα βήτα γάμμα δέλτα ωμέγα cyrillic привет как дела добро пожаловать chinese simplified 你好 世界 今天的天气很好 chinese traditional 歡迎來到世界 今天天氣很好 japanese kanji hiragana katakana 東京へ行きます カタカナ ひらがな 漢字 korean hangul 안녕하세요 오늘 날씨 어때요 arabic مرحب ا كيف حالك hebrew שלום מה שלומך emoji symbols µ math ₀ x² dx π 3 14159 e ρ ε₀ currency 1 date formats 25 06 2025 june 25 2025 2025年6月25日 quote in french bonjour ça va quote in german guten tag wie geht s newline test r n r tab ttest tspacing 192 33601010101 end of document

View File

@@ -1 +0,0 @@
sampl textual document content includ mani charact possibl check classifi vector hey 00 test0707 content exampl document creat 2025 06 25 digit 0123456789 punctuat english text quick brown fox jump lazi dog english stop word accent latin diacrit àâäæçéèêëîïôœùûüÿñ arab لقد قام المترجم بعمل جيد greek αλφα βήτα γάμμα δέλτα ωμέγα cyril привет как дела добро пожаловать chines simplifi 你好 世界 今天的天气很好 chines tradit 歡迎來到世界 今天天氣很好 japanes kanji hiragana katakana 東京へ行きます カタカナ ひらがな 漢字 korean hangul 안녕하세요 오늘 날씨 어때요 arab مرحب ا كيف حالك hebrew שלום מה שלומך emoji symbol µ math ₀ x² dx π 3 14159 e ρ ε₀ currenc 1 date format 25 06 2025 june 25 2025 2025年6月25日 quot french bonjour ça va quot german guten tag wie geht newlin test r n r tab ttest tspace 192 33601010101 end document

View File

@@ -41,7 +41,6 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
title="B", title="B",
correspondent=self.c1, correspondent=self.c1,
document_type=self.dt1, document_type=self.dt1,
page_count=5,
) )
self.doc3 = Document.objects.create( self.doc3 = Document.objects.create(
checksum="C", checksum="C",
@@ -1370,218 +1369,6 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"pages must be a list of integers", response.content) self.assertIn(b"pages must be a list of integers", response.content)
@mock.patch("documents.serialisers.bulk_edit.edit_pdf")
def test_edit_pdf(self, m):
self.setup_mock(m, "edit_pdf")
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertCountEqual(args[0], [self.doc2.id])
self.assertEqual(kwargs["operations"], [{"page": 1}])
self.assertEqual(kwargs["user"], self.user)
def test_edit_pdf_invalid_params(self):
# multiple documents
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id, self.doc3.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"Edit PDF method only supports one document", response.content)
# no operations specified
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"operations not specified", response.content)
# operations not a list
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": "not_a_list"},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"operations must be a list", response.content)
# invalid operation
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": ["invalid_operation"]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"invalid operation entry", response.content)
# page not an int
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": "not_an_int"}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"page must be an integer", response.content)
# rotate not an int
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1, "rotate": "not_an_int"}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"rotate must be an integer", response.content)
# doc not an int
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1, "doc": "not_an_int"}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"doc must be an integer", response.content)
# update_document not a boolean
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {
"update_document": "not_a_bool",
"operations": [{"page": 1}],
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"update_document must be a boolean", response.content)
# include_metadata not a boolean
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {
"include_metadata": "not_a_bool",
"operations": [{"page": 1}],
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"include_metadata must be a boolean", response.content)
# update_document True but output would be multiple documents
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {
"update_document": True,
"operations": [{"page": 1, "doc": 1}, {"page": 2, "doc": 2}],
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(
b"update_document only allowed with a single output document",
response.content,
)
@mock.patch("documents.serialisers.bulk_edit.edit_pdf")
def test_edit_pdf_page_out_of_bounds(self, m):
"""
GIVEN:
- API data for editing PDF is called
- The page number is out of bounds
WHEN:
- API is called
THEN:
- The API fails with a correct error code
"""
self.setup_mock(m, "edit_pdf")
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 99}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"out of bounds", response.content)
@override_settings(AUDIT_LOG_ENABLED=True) @override_settings(AUDIT_LOG_ENABLED=True)
def test_bulk_edit_audit_log_enabled_simple_field(self): def test_bulk_edit_audit_log_enabled_simple_field(self):
""" """

View File

@@ -909,156 +909,3 @@ class TestPDFActions(DirectoriesMixin, TestCase):
expected_str = "Error deleting pages from document" expected_str = "Error deleting pages from document"
self.assertIn(expected_str, error_str) self.assertIn(expected_str, error_str)
mock_update_archive_file.assert_not_called() mock_update_archive_file.assert_not_called()
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_basic_operations(self, mock_consume_file, mock_group):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with two operations to split the doc and rotate pages
THEN:
- A grouped task is generated and delay() is called
"""
mock_group.return_value.delay.return_value = None
doc_ids = [self.doc2.id]
operations = [{"page": 1, "doc": 0}, {"page": 2, "doc": 1, "rotate": 90}]
result = bulk_edit.edit_pdf(doc_ids, operations)
self.assertEqual(result, "OK")
mock_group.return_value.delay.assert_called_once()
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_with_user_override(self, mock_consume_file, mock_group):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with user override
THEN:
- Task is created with user context
"""
mock_group.return_value.delay.return_value = None
doc_ids = [self.doc2.id]
operations = [{"page": 1, "doc": 0}, {"page": 2, "doc": 1}]
user = User.objects.create(username="editor")
result = bulk_edit.edit_pdf(doc_ids, operations, user=user)
self.assertEqual(result, "OK")
mock_group.return_value.delay.assert_called_once()
@mock.patch("documents.bulk_edit.chord")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_with_delete_original(self, mock_consume_file, mock_chord):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with delete_original=True
THEN:
- Task group is triggered
"""
mock_chord.return_value.delay.return_value = None
doc_ids = [self.doc2.id]
operations = [{"page": 1}, {"page": 2}]
result = bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)
self.assertEqual(result, "OK")
mock_chord.assert_called_once()
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
def test_edit_pdf_with_update_document(self, mock_update_document):
"""
GIVEN:
- A single existing PDF document
WHEN:
- edit_pdf is called with update_document=True and a single output
THEN:
- The original document is updated in-place
- The update_document_content_maybe_archive_file task is triggered
"""
doc_ids = [self.doc2.id]
operations = [{"page": 1}, {"page": 2}]
original_checksum = self.doc2.checksum
original_page_count = self.doc2.page_count
result = bulk_edit.edit_pdf(
doc_ids,
operations=operations,
update_document=True,
delete_original=False,
)
self.assertEqual(result, "OK")
self.doc2.refresh_from_db()
self.assertNotEqual(self.doc2.checksum, original_checksum)
self.assertNotEqual(self.doc2.page_count, original_page_count)
mock_update_document.assert_called_once_with(document_id=self.doc2.id)
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_without_metadata(self, mock_consume_file, mock_group):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with include_metadata=False
THEN:
- Tasks are created with empty metadata
"""
mock_group.return_value.delay.return_value = None
doc_ids = [self.doc2.id]
operations = [{"page": 1}]
result = bulk_edit.edit_pdf(doc_ids, operations, include_metadata=False)
self.assertEqual(result, "OK")
mock_group.return_value.delay.assert_called_once()
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_open_failure(self, mock_consume_file, mock_group):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf fails to open PDF
THEN:
- Task group is not called
"""
doc_ids = [self.doc2.id]
operations = [
{"page": 9999}, # invalid page, forces error during PDF load
]
with self.assertLogs("paperless.bulk_edit", level="ERROR"):
with self.assertRaises(Exception):
bulk_edit.edit_pdf(doc_ids, operations)
mock_group.assert_not_called()
mock_consume_file.assert_not_called()
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_multiple_outputs_with_update_flag_errors(
self,
mock_consume_file,
mock_group,
):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with multiple outputs and update_document=True
THEN:
- An error is logged and task group is not called
"""
doc_ids = [self.doc2.id]
operations = [
{"page": 1, "doc": 0},
{"page": 2, "doc": 1},
]
with self.assertLogs("paperless.bulk_edit", level="ERROR"):
with self.assertRaises(ValueError):
bulk_edit.edit_pdf(doc_ids, operations, update_document=True)
mock_group.assert_not_called()
mock_consume_file.assert_not_called()

View File

@@ -1,45 +0,0 @@
import pickle
from documents.caching import StoredLRUCache
def test_lru_cache_entries():
CACHE_TTL = 1
# LRU cache with a capacity of 2 elements
cache = StoredLRUCache("test_lru_cache_key", 2, backend_ttl=CACHE_TTL)
cache.set(1, 1)
cache.set(2, 2)
assert cache.get(2) == 2
assert cache.get(1) == 1
# The oldest entry (2) should be removed
cache.set(3, 3)
assert cache.get(3) == 3
assert not cache.get(2)
assert cache.get(1) == 1
# Save the cache, restore it and check it overwrites the current cache in memory
cache.save()
cache.set(4, 4)
assert not cache.get(3)
cache.load()
assert not cache.get(4)
assert cache.get(3) == 3
assert cache.get(1) == 1
def test_stored_lru_cache_key_ttl(mocker):
mock_backend = mocker.Mock()
cache = StoredLRUCache("test_key", backend=mock_backend, backend_ttl=321)
# Simulate storing values
cache.set("x", "X")
cache.set("y", "Y")
cache.save()
# Assert backend.set was called with pickled data, key and TTL
mock_backend.set.assert_called_once()
key, data, timeout = mock_backend.set.call_args[0]
assert key == "test_key"
assert timeout == 321
assert pickle.loads(data) == {"x": "X", "y": "Y"}

View File

@@ -21,7 +21,7 @@ from documents.models import Tag
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
def dummy_preprocess(content: str, **kwargs): def dummy_preprocess(content: str):
""" """
Simpler, faster pre-processing for testing purposes Simpler, faster pre-processing for testing purposes
""" """
@@ -223,26 +223,11 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.generate_test_data() self.generate_test_data()
self.classifier.train() self.classifier.train()
with (
mock.patch.object(
self.classifier.data_vectorizer,
"transform",
wraps=self.classifier.data_vectorizer.transform,
) as mock_transform,
mock.patch.object(
self.classifier,
"preprocess_content",
wraps=self.classifier.preprocess_content,
) as mock_preprocess_content,
):
self.assertEqual( self.assertEqual(
self.classifier.predict_correspondent(self.doc1.content), self.classifier.predict_correspondent(self.doc1.content),
self.c1.pk, self.c1.pk,
) )
self.assertEqual( self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
self.classifier.predict_correspondent(self.doc2.content),
None,
)
self.assertListEqual( self.assertListEqual(
self.classifier.predict_tags(self.doc1.content), self.classifier.predict_tags(self.doc1.content),
[self.t1.pk], [self.t1.pk],
@@ -255,15 +240,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.classifier.predict_document_type(self.doc1.content), self.classifier.predict_document_type(self.doc1.content),
self.dt.pk, self.dt.pk,
) )
self.assertEqual( self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
self.classifier.predict_document_type(self.doc2.content),
None,
)
# Check that the classifier vectorized content and text preprocessing has been cached
# It should be called once per document (doc1 and doc2)
self.assertEqual(mock_preprocess_content.call_count, 2)
self.assertEqual(mock_transform.call_count, 2)
def test_no_retrain_if_no_change(self): def test_no_retrain_if_no_change(self):
""" """
@@ -717,67 +694,3 @@ class TestClassifier(DirectoriesMixin, TestCase):
mock_load.side_effect = Exception() mock_load.side_effect = Exception()
with self.assertRaises(Exception): with self.assertRaises(Exception):
load_classifier(raise_exception=True) load_classifier(raise_exception=True)
def test_preprocess_content():
"""
GIVEN:
- Advanced text processing is enabled (default)
WHEN:
- Classifier preprocesses a document's content
THEN:
- Processed content matches the expected output (stemmed words)
"""
with (Path(__file__).parent / "samples" / "content.txt").open("r") as f:
content = f.read()
with (Path(__file__).parent / "samples" / "preprocessed_content_advanced.txt").open(
"r",
) as f:
expected_preprocess_content = f.read().rstrip()
classifier = DocumentClassifier()
result = classifier.preprocess_content(content)
assert result == expected_preprocess_content
def test_preprocess_content_nltk_disabled():
"""
GIVEN:
- Advanced text processing is disabled
WHEN:
- Classifier preprocesses a document's content
THEN:
- Processed content matches the expected output (unstemmed words)
"""
with (Path(__file__).parent / "samples" / "content.txt").open("r") as f:
content = f.read()
with (Path(__file__).parent / "samples" / "preprocessed_content.txt").open(
"r",
) as f:
expected_preprocess_content = f.read().rstrip()
classifier = DocumentClassifier()
with mock.patch("documents.classifier.ADVANCED_TEXT_PROCESSING_ENABLED", new=False):
result = classifier.preprocess_content(content)
assert result == expected_preprocess_content
def test_preprocess_content_nltk_load_fail(mocker):
"""
GIVEN:
- NLTK stop words fail to load
WHEN:
- Classifier preprocesses a document's content
THEN:
- Processed content matches the expected output (unstemmed words)
"""
_module = mocker.MagicMock(name="nltk_corpus_mock")
_module.stopwords.words.side_effect = AttributeError()
mocker.patch.dict("sys.modules", {"nltk.corpus": _module})
classifier = DocumentClassifier()
with (Path(__file__).parent / "samples" / "content.txt").open("r") as f:
content = f.read()
with (Path(__file__).parent / "samples" / "preprocessed_content.txt").open(
"r",
) as f:
expected_preprocess_content = f.read().rstrip()
result = classifier.preprocess_content(content)
assert result == expected_preprocess_content

View File

@@ -41,9 +41,11 @@ class TestDocument(TestCase):
Path(file_path).touch() Path(file_path).touch()
Path(thumb_path).touch() Path(thumb_path).touch()
with mock.patch("documents.signals.handlers.Path.unlink") as mock_unlink: with mock.patch("documents.signals.handlers.os.unlink") as mock_unlink:
document.delete() document.delete()
empty_trash([document.pk]) empty_trash([document.pk])
mock_unlink.assert_any_call(file_path)
mock_unlink.assert_any_call(thumb_path)
self.assertEqual(mock_unlink.call_count, 2) self.assertEqual(mock_unlink.call_count, 2)
def test_document_soft_delete(self): def test_document_soft_delete(self):
@@ -61,7 +63,7 @@ class TestDocument(TestCase):
Path(file_path).touch() Path(file_path).touch()
Path(thumb_path).touch() Path(thumb_path).touch()
with mock.patch("documents.signals.handlers.Path.unlink") as mock_unlink: with mock.patch("documents.signals.handlers.os.unlink") as mock_unlink:
document.delete() document.delete()
self.assertEqual(mock_unlink.call_count, 0) self.assertEqual(mock_unlink.call_count, 0)

View File

@@ -34,12 +34,12 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save() document.save()
self.assertEqual(generate_filename(document), Path(f"{document.pk:07d}.pdf")) self.assertEqual(generate_filename(document), f"{document.pk:07d}.pdf")
document.storage_type = Document.STORAGE_TYPE_GPG document.storage_type = Document.STORAGE_TYPE_GPG
self.assertEqual( self.assertEqual(
generate_filename(document), generate_filename(document),
Path(f"{document.pk:07d}.pdf.gpg"), f"{document.pk:07d}.pdf.gpg",
) )
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}") @override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
@@ -58,12 +58,12 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
document.filename = generate_filename(document) document.filename = generate_filename(document)
# Ensure that filename is properly generated # Ensure that filename is properly generated
self.assertEqual(document.filename, Path("none/none.pdf")) self.assertEqual(document.filename, "none/none.pdf")
# Enable encryption and check again # Enable encryption and check again
document.storage_type = Document.STORAGE_TYPE_GPG document.storage_type = Document.STORAGE_TYPE_GPG
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, Path("none/none.pdf.gpg")) self.assertEqual(document.filename, "none/none.pdf.gpg")
document.save() document.save()
@@ -96,7 +96,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, Path("none/none.pdf")) self.assertEqual(document.filename, "none/none.pdf")
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
document.source_path.touch() document.source_path.touch()
@@ -137,7 +137,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, Path("none/none.pdf")) self.assertEqual(document.filename, "none/none.pdf")
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
Path(document.source_path).touch() Path(document.source_path).touch()
@@ -247,7 +247,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, Path("none/none.pdf")) self.assertEqual(document.filename, "none/none.pdf")
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
@@ -269,11 +269,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
dt = DocumentType.objects.create(name="my_doc_type") dt = DocumentType.objects.create(name="my_doc_type")
d = Document.objects.create(title="the_doc", mime_type="application/pdf") d = Document.objects.create(title="the_doc", mime_type="application/pdf")
self.assertEqual(generate_filename(d), Path("none - the_doc.pdf")) self.assertEqual(generate_filename(d), "none - the_doc.pdf")
d.document_type = dt d.document_type = dt
self.assertEqual(generate_filename(d), Path("my_doc_type - the_doc.pdf")) self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf")
@override_settings(FILENAME_FORMAT="{asn} - {title}") @override_settings(FILENAME_FORMAT="{asn} - {title}")
def test_asn(self): def test_asn(self):
@@ -289,8 +289,8 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
archive_serial_number=None, archive_serial_number=None,
checksum="B", checksum="B",
) )
self.assertEqual(generate_filename(d1), Path("652 - the_doc.pdf")) self.assertEqual(generate_filename(d1), "652 - the_doc.pdf")
self.assertEqual(generate_filename(d2), Path("none - the_doc.pdf")) self.assertEqual(generate_filename(d2), "none - the_doc.pdf")
@override_settings(FILENAME_FORMAT="{title} {tag_list}") @override_settings(FILENAME_FORMAT="{title} {tag_list}")
def test_tag_list(self): def test_tag_list(self):
@@ -298,7 +298,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc.tags.create(name="tag2") doc.tags.create(name="tag2")
doc.tags.create(name="tag1") doc.tags.create(name="tag1")
self.assertEqual(generate_filename(doc), Path("doc1 tag1,tag2.pdf")) self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf")
doc = Document.objects.create( doc = Document.objects.create(
title="doc2", title="doc2",
@@ -306,7 +306,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
mime_type="application/pdf", mime_type="application/pdf",
) )
self.assertEqual(generate_filename(doc), Path("doc2.pdf")) self.assertEqual(generate_filename(doc), "doc2.pdf")
@override_settings(FILENAME_FORMAT="//etc/something/{title}") @override_settings(FILENAME_FORMAT="//etc/something/{title}")
def test_filename_relative(self): def test_filename_relative(self):
@@ -330,11 +330,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
created=d1, created=d1,
) )
self.assertEqual(generate_filename(doc1), Path("2020-03-06.pdf")) self.assertEqual(generate_filename(doc1), "2020-03-06.pdf")
doc1.created = datetime.date(2020, 11, 16) doc1.created = datetime.date(2020, 11, 16)
self.assertEqual(generate_filename(doc1), Path("2020-11-16.pdf")) self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
@override_settings( @override_settings(
FILENAME_FORMAT="{added_year}-{added_month}-{added_day}", FILENAME_FORMAT="{added_year}-{added_month}-{added_day}",
@@ -347,11 +347,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
added=d1, added=d1,
) )
self.assertEqual(generate_filename(doc1), Path("232-01-09.pdf")) self.assertEqual(generate_filename(doc1), "232-01-09.pdf")
doc1.added = timezone.make_aware(datetime.datetime(2020, 11, 16, 1, 1, 1)) doc1.added = timezone.make_aware(datetime.datetime(2020, 11, 16, 1, 1, 1))
self.assertEqual(generate_filename(doc1), Path("2020-11-16.pdf")) self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
@override_settings( @override_settings(
FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}", FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}",
@@ -389,11 +389,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
document.mime_type = "application/pdf" document.mime_type = "application/pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
self.assertEqual(generate_filename(document), Path("0000001.pdf")) self.assertEqual(generate_filename(document), "0000001.pdf")
document.pk = 13579 document.pk = 13579
self.assertEqual(generate_filename(document), Path("0013579.pdf")) self.assertEqual(generate_filename(document), "0013579.pdf")
@override_settings(FILENAME_FORMAT=None) @override_settings(FILENAME_FORMAT=None)
def test_format_none(self): def test_format_none(self):
@@ -402,7 +402,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
document.mime_type = "application/pdf" document.mime_type = "application/pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
self.assertEqual(generate_filename(document), Path("0000001.pdf")) self.assertEqual(generate_filename(document), "0000001.pdf")
def test_try_delete_empty_directories(self): def test_try_delete_empty_directories(self):
# Create our working directory # Create our working directory
@@ -428,7 +428,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
document.mime_type = "application/pdf" document.mime_type = "application/pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
self.assertEqual(generate_filename(document), Path("0000001.pdf")) self.assertEqual(generate_filename(document), "0000001.pdf")
@override_settings(FILENAME_FORMAT="{created__year}") @override_settings(FILENAME_FORMAT="{created__year}")
def test_invalid_format_key(self): def test_invalid_format_key(self):
@@ -437,7 +437,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
document.mime_type = "application/pdf" document.mime_type = "application/pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
self.assertEqual(generate_filename(document), Path("0000001.pdf")) self.assertEqual(generate_filename(document), "0000001.pdf")
@override_settings(FILENAME_FORMAT="{title}") @override_settings(FILENAME_FORMAT="{title}")
def test_duplicates(self): def test_duplicates(self):
@@ -564,7 +564,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
value_select="abc123", value_select="abc123",
) )
self.assertEqual(generate_filename(doc), Path("document_apple.pdf")) self.assertEqual(generate_filename(doc), "document_apple.pdf")
# handler should not have been called # handler should not have been called
self.assertEqual(m.call_count, 0) self.assertEqual(m.call_count, 0)
@@ -576,7 +576,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
], ],
} }
cf.save() cf.save()
self.assertEqual(generate_filename(doc), Path("document_aubergine.pdf")) self.assertEqual(generate_filename(doc), "document_aubergine.pdf")
# handler should have been called # handler should have been called
self.assertEqual(m.call_count, 1) self.assertEqual(m.call_count, 1)
@@ -897,7 +897,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
pk=1, pk=1,
checksum="1", checksum="1",
) )
self.assertEqual(generate_filename(doc), Path("This. is the title.pdf")) self.assertEqual(generate_filename(doc), "This. is the title.pdf")
doc = Document.objects.create( doc = Document.objects.create(
title="my\\invalid/../title:yay", title="my\\invalid/../title:yay",
@@ -905,7 +905,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
pk=2, pk=2,
checksum="2", checksum="2",
) )
self.assertEqual(generate_filename(doc), Path("my-invalid-..-title-yay.pdf")) self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf")
@override_settings(FILENAME_FORMAT="{created}") @override_settings(FILENAME_FORMAT="{created}")
def test_date(self): def test_date(self):
@@ -916,7 +916,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
pk=2, pk=2,
checksum="2", checksum="2",
) )
self.assertEqual(generate_filename(doc), Path("2020-05-21.pdf")) self.assertEqual(generate_filename(doc), "2020-05-21.pdf")
def test_dynamic_path(self): def test_dynamic_path(self):
""" """
@@ -935,7 +935,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
checksum="2", checksum="2",
storage_path=StoragePath.objects.create(path="TestFolder/{{created}}"), storage_path=StoragePath.objects.create(path="TestFolder/{{created}}"),
) )
self.assertEqual(generate_filename(doc), Path("TestFolder/2020-06-25.pdf")) self.assertEqual(generate_filename(doc), "TestFolder/2020-06-25.pdf")
def test_dynamic_path_with_none(self): def test_dynamic_path_with_none(self):
""" """
@@ -956,7 +956,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
checksum="2", checksum="2",
storage_path=StoragePath.objects.create(path="{{asn}} - {{created}}"), storage_path=StoragePath.objects.create(path="{{asn}} - {{created}}"),
) )
self.assertEqual(generate_filename(doc), Path("none - 2020-06-25.pdf")) self.assertEqual(generate_filename(doc), "none - 2020-06-25.pdf")
@override_settings( @override_settings(
FILENAME_FORMAT_REMOVE_NONE=True, FILENAME_FORMAT_REMOVE_NONE=True,
@@ -984,7 +984,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
checksum="2", checksum="2",
storage_path=sp, storage_path=sp,
) )
self.assertEqual(generate_filename(doc), Path("TestFolder/2020-06-25.pdf")) self.assertEqual(generate_filename(doc), "TestFolder/2020-06-25.pdf")
# Special case, undefined variable, then defined at the start of the template # Special case, undefined variable, then defined at the start of the template
# This could lead to an absolute path after we remove the leading -none-, but leave the leading / # This could lead to an absolute path after we remove the leading -none-, but leave the leading /
@@ -993,7 +993,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
"{{ owner_username }}/{{ created_year }}/{{ correspondent }}/{{ title }}" "{{ owner_username }}/{{ created_year }}/{{ correspondent }}/{{ title }}"
) )
sp.save() sp.save()
self.assertEqual(generate_filename(doc), Path("2020/does not matter.pdf")) self.assertEqual(generate_filename(doc), "2020/does not matter.pdf")
def test_multiple_doc_paths(self): def test_multiple_doc_paths(self):
""" """
@@ -1028,14 +1028,8 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
), ),
) )
self.assertEqual( self.assertEqual(generate_filename(doc_a), "ThisIsAFolder/4/2020-06-25.pdf")
generate_filename(doc_a), self.assertEqual(generate_filename(doc_b), "SomeImportantNone/2020-07-25.pdf")
Path("ThisIsAFolder/4/2020-06-25.pdf"),
)
self.assertEqual(
generate_filename(doc_b),
Path("SomeImportantNone/2020-07-25.pdf"),
)
@override_settings( @override_settings(
FILENAME_FORMAT=None, FILENAME_FORMAT=None,
@@ -1070,11 +1064,8 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
), ),
) )
self.assertEqual(generate_filename(doc_a), Path("0000002.pdf")) self.assertEqual(generate_filename(doc_a), "0000002.pdf")
self.assertEqual( self.assertEqual(generate_filename(doc_b), "SomeImportantNone/2020-07-25.pdf")
generate_filename(doc_b),
Path("SomeImportantNone/2020-07-25.pdf"),
)
@override_settings( @override_settings(
FILENAME_FORMAT="{created_year_short}/{created_month_name_short}/{created_month_name}/{title}", FILENAME_FORMAT="{created_year_short}/{created_month_name_short}/{created_month_name}/{title}",
@@ -1087,7 +1078,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
pk=2, pk=2,
checksum="2", checksum="2",
) )
self.assertEqual(generate_filename(doc), Path("89/Dec/December/The Title.pdf")) self.assertEqual(generate_filename(doc), "89/Dec/December/The Title.pdf")
@override_settings( @override_settings(
FILENAME_FORMAT="{added_year_short}/{added_month_name}/{added_month_name_short}/{title}", FILENAME_FORMAT="{added_year_short}/{added_month_name}/{added_month_name_short}/{title}",
@@ -1100,7 +1091,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
pk=2, pk=2,
checksum="2", checksum="2",
) )
self.assertEqual(generate_filename(doc), Path("84/August/Aug/The Title.pdf")) self.assertEqual(generate_filename(doc), "84/August/Aug/The Title.pdf")
@override_settings( @override_settings(
FILENAME_FORMAT="{owner_username}/{title}", FILENAME_FORMAT="{owner_username}/{title}",
@@ -1133,8 +1124,8 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
checksum="3", checksum="3",
) )
self.assertEqual(generate_filename(owned_doc), Path("user1/The Title.pdf")) self.assertEqual(generate_filename(owned_doc), "user1/The Title.pdf")
self.assertEqual(generate_filename(no_owner_doc), Path("none/does matter.pdf")) self.assertEqual(generate_filename(no_owner_doc), "none/does matter.pdf")
@override_settings( @override_settings(
FILENAME_FORMAT="{original_name}", FILENAME_FORMAT="{original_name}",
@@ -1180,20 +1171,17 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
original_filename="logs.txt", original_filename="logs.txt",
) )
self.assertEqual(generate_filename(doc_with_original), Path("someepdf.pdf")) self.assertEqual(generate_filename(doc_with_original), "someepdf.pdf")
self.assertEqual( self.assertEqual(
generate_filename(tricky_with_original), generate_filename(tricky_with_original),
Path("some pdf with spaces and stuff.pdf"), "some pdf with spaces and stuff.pdf",
) )
self.assertEqual(generate_filename(no_original), Path("none.pdf")) self.assertEqual(generate_filename(no_original), "none.pdf")
self.assertEqual(generate_filename(text_doc), Path("logs.txt")) self.assertEqual(generate_filename(text_doc), "logs.txt")
self.assertEqual( self.assertEqual(generate_filename(text_doc, archive_filename=True), "logs.pdf")
generate_filename(text_doc, archive_filename=True),
Path("logs.pdf"),
)
@override_settings( @override_settings(
FILENAME_FORMAT="XX{correspondent}/{title}", FILENAME_FORMAT="XX{correspondent}/{title}",
@@ -1218,7 +1206,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, Path("XX/doc1.pdf")) self.assertEqual(document.filename, "XX/doc1.pdf")
def test_complex_template_strings(self): def test_complex_template_strings(self):
""" """
@@ -1256,19 +1244,19 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
self.assertEqual( self.assertEqual(
generate_filename(doc_a), generate_filename(doc_a),
Path("somepath/some where/2020-06-25/Does Matter.pdf"), "somepath/some where/2020-06-25/Does Matter.pdf",
) )
doc_a.checksum = "5" doc_a.checksum = "5"
self.assertEqual( self.assertEqual(
generate_filename(doc_a), generate_filename(doc_a),
Path("somepath/2024-10-01/Does Matter.pdf"), "somepath/2024-10-01/Does Matter.pdf",
) )
sp.path = "{{ document.title|lower }}{{ document.archive_serial_number - 2 }}" sp.path = "{{ document.title|lower }}{{ document.archive_serial_number - 2 }}"
sp.save() sp.save()
self.assertEqual(generate_filename(doc_a), Path("does matter23.pdf")) self.assertEqual(generate_filename(doc_a), "does matter23.pdf")
sp.path = """ sp.path = """
somepath/ somepath/
@@ -1287,13 +1275,13 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
sp.save() sp.save()
self.assertEqual( self.assertEqual(
generate_filename(doc_a), generate_filename(doc_a),
Path("somepath/asn-000-200/Does Matter/Does Matter.pdf"), "somepath/asn-000-200/Does Matter/Does Matter.pdf",
) )
doc_a.archive_serial_number = 301 doc_a.archive_serial_number = 301
doc_a.save() doc_a.save()
self.assertEqual( self.assertEqual(
generate_filename(doc_a), generate_filename(doc_a),
Path("somepath/asn-201-400/asn-3xx/Does Matter.pdf"), "somepath/asn-201-400/asn-3xx/Does Matter.pdf",
) )
@override_settings( @override_settings(
@@ -1322,7 +1310,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
with self.assertLogs(level=logging.WARNING) as capture: with self.assertLogs(level=logging.WARNING) as capture:
self.assertEqual( self.assertEqual(
generate_filename(doc_a), generate_filename(doc_a),
Path("0000002.pdf"), "0000002.pdf",
) )
self.assertEqual(len(capture.output), 1) self.assertEqual(len(capture.output), 1)
@@ -1357,7 +1345,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
with self.assertLogs(level=logging.WARNING) as capture: with self.assertLogs(level=logging.WARNING) as capture:
self.assertEqual( self.assertEqual(
generate_filename(doc_a), generate_filename(doc_a),
Path("0000002.pdf"), "0000002.pdf",
) )
self.assertEqual(len(capture.output), 1) self.assertEqual(len(capture.output), 1)
@@ -1425,7 +1413,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
): ):
self.assertEqual( self.assertEqual(
generate_filename(doc_a), generate_filename(doc_a),
Path("invoices/1234.pdf"), "invoices/1234.pdf",
) )
with override_settings( with override_settings(
@@ -1439,7 +1427,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
): ):
self.assertEqual( self.assertEqual(
generate_filename(doc_a), generate_filename(doc_a),
Path("Some Title_ChoiceOne.pdf"), "Some Title_ChoiceOne.pdf",
) )
# Check for handling Nones well # Check for handling Nones well
@@ -1448,7 +1436,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
self.assertEqual( self.assertEqual(
generate_filename(doc_a), generate_filename(doc_a),
Path("Some Title_Default Value.pdf"), "Some Title_Default Value.pdf",
) )
cf.name = "Invoice Number" cf.name = "Invoice Number"
@@ -1461,7 +1449,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
): ):
self.assertEqual( self.assertEqual(
generate_filename(doc_a), generate_filename(doc_a),
Path("invoices/4567.pdf"), "invoices/4567.pdf",
) )
with override_settings( with override_settings(
@@ -1469,7 +1457,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
): ):
self.assertEqual( self.assertEqual(
generate_filename(doc_a), generate_filename(doc_a),
Path("invoices/0.pdf"), "invoices/0.pdf",
) )
def test_datetime_filter(self): def test_datetime_filter(self):
@@ -1508,7 +1496,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
): ):
self.assertEqual( self.assertEqual(
generate_filename(doc_a), generate_filename(doc_a),
Path("2020/Some Title.pdf"), "2020/Some Title.pdf",
) )
with override_settings( with override_settings(
@@ -1516,7 +1504,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
): ):
self.assertEqual( self.assertEqual(
generate_filename(doc_a), generate_filename(doc_a),
Path("2020-06-25/Some Title.pdf"), "2020-06-25/Some Title.pdf",
) )
with override_settings( with override_settings(
@@ -1524,7 +1512,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
): ):
self.assertEqual( self.assertEqual(
generate_filename(doc_a), generate_filename(doc_a),
Path("2024-10-01/Some Title.pdf"), "2024-10-01/Some Title.pdf",
) )
def test_slugify_filter(self): def test_slugify_filter(self):
@@ -1551,7 +1539,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
): ):
self.assertEqual( self.assertEqual(
generate_filename(doc), generate_filename(doc),
Path("some-title-with-special-characters.pdf"), "some-title-with-special-characters.pdf",
) )
# Test with correspondent name containing spaces and special chars # Test with correspondent name containing spaces and special chars
@@ -1565,7 +1553,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
): ):
self.assertEqual( self.assertEqual(
generate_filename(doc), generate_filename(doc),
Path("johns-office-workplace/some-title-with-special-characters.pdf"), "johns-office-workplace/some-title-with-special-characters.pdf",
) )
# Test with custom fields # Test with custom fields
@@ -1584,5 +1572,5 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
): ):
self.assertEqual( self.assertEqual(
generate_filename(doc), generate_filename(doc),
Path("brussels-belgium/some-title-with-special-characters.pdf"), "brussels-belgium/some-title-with-special-characters.pdf",
) )

View File

@@ -209,7 +209,7 @@ class TestExportImport(
4, 4,
) )
self.assertIsFile(self.target / "manifest.json") self.assertIsFile((self.target / "manifest.json").as_posix())
self.assertEqual( self.assertEqual(
self._get_document_from_manifest(manifest, self.d1.id)["fields"]["title"], self._get_document_from_manifest(manifest, self.d1.id)["fields"]["title"],
@@ -235,7 +235,9 @@ class TestExportImport(
).as_posix() ).as_posix()
self.assertIsFile(fname) self.assertIsFile(fname)
self.assertIsFile( self.assertIsFile(
self.target / element[document_exporter.EXPORTER_THUMBNAIL_NAME], (
self.target / element[document_exporter.EXPORTER_THUMBNAIL_NAME]
).as_posix(),
) )
with Path(fname).open("rb") as f: with Path(fname).open("rb") as f:
@@ -250,7 +252,7 @@ class TestExportImport(
if document_exporter.EXPORTER_ARCHIVE_NAME in element: if document_exporter.EXPORTER_ARCHIVE_NAME in element:
fname = ( fname = (
self.target / element[document_exporter.EXPORTER_ARCHIVE_NAME] self.target / element[document_exporter.EXPORTER_ARCHIVE_NAME]
) ).as_posix()
self.assertIsFile(fname) self.assertIsFile(fname)
with Path(fname).open("rb") as f: with Path(fname).open("rb") as f:
@@ -310,7 +312,7 @@ class TestExportImport(
) )
self._do_export() self._do_export()
self.assertIsFile(self.target / "manifest.json") self.assertIsFile((self.target / "manifest.json").as_posix())
st_mtime_1 = (self.target / "manifest.json").stat().st_mtime st_mtime_1 = (self.target / "manifest.json").stat().st_mtime
@@ -320,7 +322,7 @@ class TestExportImport(
self._do_export() self._do_export()
m.assert_not_called() m.assert_not_called()
self.assertIsFile(self.target / "manifest.json") self.assertIsFile((self.target / "manifest.json").as_posix())
st_mtime_2 = (self.target / "manifest.json").stat().st_mtime st_mtime_2 = (self.target / "manifest.json").stat().st_mtime
Path(self.d1.source_path).touch() Path(self.d1.source_path).touch()
@@ -332,7 +334,7 @@ class TestExportImport(
self.assertEqual(m.call_count, 1) self.assertEqual(m.call_count, 1)
st_mtime_3 = (self.target / "manifest.json").stat().st_mtime st_mtime_3 = (self.target / "manifest.json").stat().st_mtime
self.assertIsFile(self.target / "manifest.json") self.assertIsFile((self.target / "manifest.json").as_posix())
self.assertNotEqual(st_mtime_1, st_mtime_2) self.assertNotEqual(st_mtime_1, st_mtime_2)
self.assertNotEqual(st_mtime_2, st_mtime_3) self.assertNotEqual(st_mtime_2, st_mtime_3)
@@ -350,7 +352,7 @@ class TestExportImport(
self._do_export() self._do_export()
self.assertIsFile(self.target / "manifest.json") self.assertIsFile((self.target / "manifest.json").as_posix())
with mock.patch( with mock.patch(
"documents.management.commands.document_exporter.copy_file_with_basic_stats", "documents.management.commands.document_exporter.copy_file_with_basic_stats",
@@ -358,7 +360,7 @@ class TestExportImport(
self._do_export() self._do_export()
m.assert_not_called() m.assert_not_called()
self.assertIsFile(self.target / "manifest.json") self.assertIsFile((self.target / "manifest.json").as_posix())
self.d2.checksum = "asdfasdgf3" self.d2.checksum = "asdfasdgf3"
self.d2.save() self.d2.save()
@@ -369,7 +371,7 @@ class TestExportImport(
self._do_export(compare_checksums=True) self._do_export(compare_checksums=True)
self.assertEqual(m.call_count, 1) self.assertEqual(m.call_count, 1)
self.assertIsFile(self.target / "manifest.json") self.assertIsFile((self.target / "manifest.json").as_posix())
def test_update_export_deleted_document(self): def test_update_export_deleted_document(self):
shutil.rmtree(Path(self.dirs.media_dir) / "documents") shutil.rmtree(Path(self.dirs.media_dir) / "documents")
@@ -383,7 +385,7 @@ class TestExportImport(
self.assertTrue(len(manifest), 7) self.assertTrue(len(manifest), 7)
doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id) doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
self.assertIsFile( self.assertIsFile(
str(self.target / doc_from_manifest[EXPORTER_FILE_NAME]), (self.target / doc_from_manifest[EXPORTER_FILE_NAME]).as_posix(),
) )
self.d3.delete() self.d3.delete()
@@ -395,12 +397,12 @@ class TestExportImport(
self.d3.id, self.d3.id,
) )
self.assertIsFile( self.assertIsFile(
self.target / doc_from_manifest[EXPORTER_FILE_NAME], (self.target / doc_from_manifest[EXPORTER_FILE_NAME]).as_posix(),
) )
manifest = self._do_export(delete=True) manifest = self._do_export(delete=True)
self.assertIsNotFile( self.assertIsNotFile(
self.target / doc_from_manifest[EXPORTER_FILE_NAME], (self.target / doc_from_manifest[EXPORTER_FILE_NAME]).as_posix(),
) )
self.assertTrue(len(manifest), 6) self.assertTrue(len(manifest), 6)
@@ -414,20 +416,20 @@ class TestExportImport(
) )
self._do_export(use_filename_format=True) self._do_export(use_filename_format=True)
self.assertIsFile(self.target / "wow1" / "c.pdf") self.assertIsFile((self.target / "wow1" / "c.pdf").as_posix())
self.assertIsFile(self.target / "manifest.json") self.assertIsFile((self.target / "manifest.json").as_posix())
self.d1.title = "new_title" self.d1.title = "new_title"
self.d1.save() self.d1.save()
self._do_export(use_filename_format=True, delete=True) self._do_export(use_filename_format=True, delete=True)
self.assertIsNotFile(self.target / "wow1" / "c.pdf") self.assertIsNotFile((self.target / "wow1" / "c.pdf").as_posix())
self.assertIsNotDir(self.target / "wow1") self.assertIsNotDir((self.target / "wow1").as_posix())
self.assertIsFile(self.target / "new_title" / "c.pdf") self.assertIsFile((self.target / "new_title" / "c.pdf").as_posix())
self.assertIsFile(self.target / "manifest.json") self.assertIsFile((self.target / "manifest.json").as_posix())
self.assertIsFile(self.target / "wow2" / "none.pdf") self.assertIsFile((self.target / "wow2" / "none.pdf").as_posix())
self.assertIsFile( self.assertIsFile(
self.target / "wow2" / "none_01.pdf", (self.target / "wow2" / "none_01.pdf").as_posix(),
) )
def test_export_missing_files(self): def test_export_missing_files(self):

View File

@@ -20,7 +20,7 @@ def source_path_before(self):
if self.storage_type == STORAGE_TYPE_GPG: if self.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg" fname += ".gpg"
return Path(settings.ORIGINALS_DIR) / fname return (Path(settings.ORIGINALS_DIR) / fname).as_posix()
def file_type_after(self): def file_type_after(self):
@@ -35,7 +35,7 @@ def source_path_after(doc):
if doc.storage_type == STORAGE_TYPE_GPG: if doc.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg" # pragma: no cover fname += ".gpg" # pragma: no cover
return Path(settings.ORIGINALS_DIR) / fname return (Path(settings.ORIGINALS_DIR) / fname).as_posix()
@override_settings(PASSPHRASE="test") @override_settings(PASSPHRASE="test")

View File

@@ -1,10 +1,8 @@
import shutil import shutil
import socket
from datetime import timedelta from datetime import timedelta
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from unittest import mock from unittest import mock
import pytest
from django.contrib.auth.models import Group from django.contrib.auth.models import Group
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.test import override_settings from django.test import override_settings
@@ -12,7 +10,6 @@ from django.utils import timezone
from guardian.shortcuts import assign_perm from guardian.shortcuts import assign_perm
from guardian.shortcuts import get_groups_with_perms from guardian.shortcuts import get_groups_with_perms
from guardian.shortcuts import get_users_with_perms from guardian.shortcuts import get_users_with_perms
from httpx import HTTPError
from httpx import HTTPStatusError from httpx import HTTPStatusError
from pytest_httpx import HTTPXMock from pytest_httpx import HTTPXMock
from rest_framework.test import APITestCase from rest_framework.test import APITestCase
@@ -2828,8 +2825,6 @@ class TestWorkflows(
content="Test message", content="Test message",
headers={}, headers={},
files=None, files=None,
follow_redirects=False,
timeout=5,
) )
expected_str = "Webhook sent to http://paperless-ngx.com" expected_str = "Webhook sent to http://paperless-ngx.com"
@@ -2847,8 +2842,6 @@ class TestWorkflows(
data={"message": "Test message"}, data={"message": "Test message"},
headers={}, headers={},
files=None, files=None,
follow_redirects=False,
timeout=5,
) )
@mock.patch("httpx.post") @mock.patch("httpx.post")
@@ -2969,164 +2962,3 @@ class TestWebhookSend:
as_json=True, as_json=True,
) )
assert httpx_mock.get_request().headers["Content-Type"] == "application/json" assert httpx_mock.get_request().headers["Content-Type"] == "application/json"
@pytest.fixture
def resolve_to(monkeypatch):
"""
Force DNS resolution to a specific IP for any hostname.
"""
def _set(ip: str):
def fake_getaddrinfo(host, *_args, **_kwargs):
return [(socket.AF_INET, None, None, "", (ip, 0))]
monkeypatch.setattr(socket, "getaddrinfo", fake_getaddrinfo)
return _set
class TestWebhookSecurity:
def test_blocks_invalid_scheme_or_hostname(self, httpx_mock: HTTPXMock):
"""
GIVEN:
- Invalid URL schemes or hostnames
WHEN:
- send_webhook is called with such URLs
THEN:
- ValueError is raised
"""
with pytest.raises(ValueError):
send_webhook(
"ftp://example.com",
data="",
headers={},
files=None,
as_json=False,
)
with pytest.raises(ValueError):
send_webhook(
"http:///nohost",
data="",
headers={},
files=None,
as_json=False,
)
@override_settings(WEBHOOKS_ALLOWED_PORTS=[80, 443])
def test_blocks_disallowed_port(self, httpx_mock: HTTPXMock):
"""
GIVEN:
- URL with a disallowed port
WHEN:
- send_webhook is called with such URL
THEN:
- ValueError is raised
"""
with pytest.raises(ValueError):
send_webhook(
"http://paperless-ngx.com:8080",
data="",
headers={},
files=None,
as_json=False,
)
assert httpx_mock.get_request() is None
@override_settings(WEBHOOKS_ALLOW_INTERNAL_REQUESTS=False)
def test_blocks_private_loopback_linklocal(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- URL with a private, loopback, or link-local IP address
- WEBHOOKS_ALLOW_INTERNAL_REQUESTS is False
WHEN:
- send_webhook is called with such URL
THEN:
- ValueError is raised
"""
resolve_to("127.0.0.1")
with pytest.raises(ValueError):
send_webhook(
"http://paperless-ngx.com",
data="",
headers={},
files=None,
as_json=False,
)
def test_allows_public_ip_and_sends(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- URL with a public IP address
WHEN:
- send_webhook is called with such URL
THEN:
- Request is sent successfully
"""
resolve_to("52.207.186.75")
httpx_mock.add_response(content=b"ok")
send_webhook(
url="http://paperless-ngx.com",
data="hi",
headers={},
files=None,
as_json=False,
)
req = httpx_mock.get_request()
assert req.url.host == "paperless-ngx.com"
def test_follow_redirects_disabled(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- A URL that redirects
WHEN:
- send_webhook is called with follow_redirects=False
THEN:
- Request is made to the original URL and does not follow the redirect
"""
resolve_to("52.207.186.75")
# Return a redirect and ensure we don't follow it (only one request recorded)
httpx_mock.add_response(
status_code=302,
headers={"location": "http://internal-service.local"},
content=b"",
)
with pytest.raises(HTTPError):
send_webhook(
"http://paperless-ngx.com",
data="",
headers={},
files=None,
as_json=False,
)
assert len(httpx_mock.get_requests()) == 1
def test_strips_user_supplied_host_header(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- A URL with a user-supplied Host header
WHEN:
- send_webhook is called with a malicious Host header
THEN:
- The Host header is stripped and replaced with the resolved hostname
"""
resolve_to("52.207.186.75")
httpx_mock.add_response(content=b"ok")
send_webhook(
url="http://paperless-ngx.com",
data="ok",
headers={"Host": "evil.test"},
files=None,
as_json=False,
)
req = httpx_mock.get_request()
assert req.headers["Host"] == "paperless-ngx.com"
assert "evil.test" not in req.headers.get("Host", "")

View File

@@ -1321,7 +1321,6 @@ class BulkEditView(PassUserMixin):
"delete_pages": "checksum", "delete_pages": "checksum",
"split": None, "split": None,
"merge": None, "merge": None,
"edit_pdf": "checksum",
"reprocess": "checksum", "reprocess": "checksum",
} }
@@ -1340,7 +1339,6 @@ class BulkEditView(PassUserMixin):
if method in [ if method in [
bulk_edit.split, bulk_edit.split,
bulk_edit.merge, bulk_edit.merge,
bulk_edit.edit_pdf,
]: ]:
parameters["user"] = user parameters["user"] = user
@@ -1360,7 +1358,6 @@ class BulkEditView(PassUserMixin):
# check ownership for methods that change original document # check ownership for methods that change original document
if ( if (
(
has_perms has_perms
and method and method
in [ in [
@@ -1368,28 +1365,20 @@ class BulkEditView(PassUserMixin):
bulk_edit.delete, bulk_edit.delete,
bulk_edit.rotate, bulk_edit.rotate,
bulk_edit.delete_pages, bulk_edit.delete_pages,
bulk_edit.edit_pdf,
] ]
) ) or (
or (
method in [bulk_edit.merge, bulk_edit.split] method in [bulk_edit.merge, bulk_edit.split]
and parameters["delete_originals"] and parameters["delete_originals"]
)
or (method == bulk_edit.edit_pdf and parameters["update_document"])
): ):
has_perms = user_is_owner_of_all_documents has_perms = user_is_owner_of_all_documents
# check global add permissions for methods that create documents # check global add permissions for methods that create documents
if ( if (
has_perms has_perms
and ( and method in [bulk_edit.split, bulk_edit.merge]
method in [bulk_edit.split, bulk_edit.merge] and not user.has_perm(
or ( "documents.add_document",
method == bulk_edit.edit_pdf
and not parameters["update_document"]
) )
)
and not user.has_perm("documents.add_document")
): ):
has_perms = False has_perms = False
@@ -1427,6 +1416,7 @@ class BulkEditView(PassUserMixin):
) )
} }
# TODO: parameter validation
result = method(documents, **parameters) result = method(documents, **parameters)
if settings.AUDIT_LOG_ENABLED and modified_field: if settings.AUDIT_LOG_ENABLED and modified_field:

View File

@@ -2,7 +2,7 @@ msgid ""
msgstr "" msgstr ""
"Project-Id-Version: paperless-ngx\n" "Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n" "Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-08-11 17:31+0000\n" "POT-Creation-Date: 2025-08-02 12:55+0000\n"
"PO-Revision-Date: 2022-02-17 04:17\n" "PO-Revision-Date: 2022-02-17 04:17\n"
"Last-Translator: \n" "Last-Translator: \n"
"Language-Team: English\n" "Language-Team: English\n"
@@ -1185,12 +1185,12 @@ msgstr ""
msgid "Invalid color." msgid "Invalid color."
msgstr "" msgstr ""
#: documents/serialisers.py:1700 #: documents/serialisers.py:1649
#, python-format #, python-format
msgid "File type %(type)s not supported" msgid "File type %(type)s not supported"
msgstr "" msgstr ""
#: documents/serialisers.py:1794 #: documents/serialisers.py:1743
msgid "Invalid variable detected." msgid "Invalid variable detected."
msgstr "" msgstr ""

View File

@@ -1421,25 +1421,3 @@ OUTLOOK_OAUTH_ENABLED = bool(
and OUTLOOK_OAUTH_CLIENT_ID and OUTLOOK_OAUTH_CLIENT_ID
and OUTLOOK_OAUTH_CLIENT_SECRET, and OUTLOOK_OAUTH_CLIENT_SECRET,
) )
###############################################################################
# Webhooks
###############################################################################
WEBHOOKS_ALLOWED_SCHEMES = set(
s.lower()
for s in __get_list(
"PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES",
["http", "https"],
)
)
WEBHOOKS_ALLOWED_PORTS = set(
int(p)
for p in __get_list(
"PAPERLESS_WEBHOOKS_ALLOWED_PORTS",
[],
)
)
WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
"PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
"true",
)

34
uv.lock generated
View File

@@ -1,5 +1,5 @@
version = 1 version = 1
revision = 3 revision = 2
requires-python = ">=3.10" requires-python = ">=3.10"
resolution-markers = [ resolution-markers = [
"sys_platform == 'darwin'", "sys_platform == 'darwin'",
@@ -312,15 +312,15 @@ wheels = [
[[package]] [[package]]
name = "channels" name = "channels"
version = "4.3.1" version = "4.3.0"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "asgiref", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "asgiref", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "django", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "django", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/12/a0/46450fcf9e56af18a6b0440ba49db6635419bb7bc84142c35f4143b1a66c/channels-4.3.1.tar.gz", hash = "sha256:97413ffd674542db08e16a9ef09cd86ec0113e5f8125fbd33cf0854adcf27cdb", size = 26896, upload-time = "2025-08-01T13:25:19.952Z" } sdist = { url = "https://files.pythonhosted.org/packages/72/04/6768c7a887f9c593c4d49f99130c8aec4ea06e750bc17c306b689f6caf3b/channels-4.3.0.tar.gz", hash = "sha256:7db32c61dcd88eada1647e6c6f6ad2eb724b75d4852eeff26ad1c51ccd1a37f7", size = 26816, upload-time = "2025-07-28T13:52:50.334Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/89/1c/eae1c2a8c195760376e7f65d0bdcc3e966695d29cfbe5c54841ce5c71408/channels-4.3.1-py3-none-any.whl", hash = "sha256:b091d4b26f91d807de3e84aead7ba785314f27eaf5bac31dd51b1c956b883859", size = 31286, upload-time = "2025-08-01T13:25:18.845Z" }, { url = "https://files.pythonhosted.org/packages/7c/59/0866202ee593e1b0dab0b472ebb8169e1b2b7886ad3008d193da2bbe10cb/channels-4.3.0-py3-none-any.whl", hash = "sha256:0497f3affb95e621b37d6bae1b6a5d9e8e1e1221007a2566f280091cf30ffcce", size = 31238, upload-time = "2025-07-28T13:52:49.117Z" },
] ]
[[package]] [[package]]
@@ -1946,7 +1946,6 @@ dependencies = [
{ name = "ocrmypdf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "ocrmypdf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pathvalidate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "pathvalidate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pdf2image", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "pdf2image", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "psycopg-pool", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "python-dotenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "python-dotenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "python-gnupg", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "python-gnupg", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -1976,7 +1975,7 @@ postgres = [
{ name = "psycopg-c", version = "3.2.9", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version != '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version != '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or sys_platform == 'darwin'" }, { name = "psycopg-c", version = "3.2.9", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version != '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version != '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
{ name = "psycopg-c", version = "3.2.9", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_aarch64.whl" }, marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "psycopg-c", version = "3.2.9", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_aarch64.whl" }, marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'" },
{ name = "psycopg-c", version = "3.2.9", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl" }, marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "psycopg-c", version = "3.2.9", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl" }, marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux'" },
{ name = "psycopg-pool", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "psycopg-pool", version = "3.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or sys_platform == 'darwin'" },
] ]
webserver = [ webserver = [
{ name = "granian", extra = ["uvloop"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "granian", extra = ["uvloop"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2085,8 +2084,7 @@ requires-dist = [
{ name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'postgres'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_aarch64.whl" }, { name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'postgres'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_aarch64.whl" },
{ name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'postgres'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl" }, { name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'postgres'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl" },
{ name = "psycopg-c", marker = "(python_full_version != '3.12.*' and platform_machine == 'aarch64' and extra == 'postgres') or (python_full_version != '3.12.*' and platform_machine == 'x86_64' and extra == 'postgres') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'postgres') or (sys_platform != 'linux' and extra == 'postgres')", specifier = "==3.2.9" }, { name = "psycopg-c", marker = "(python_full_version != '3.12.*' and platform_machine == 'aarch64' and extra == 'postgres') or (python_full_version != '3.12.*' and platform_machine == 'x86_64' and extra == 'postgres') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'postgres') or (sys_platform != 'linux' and extra == 'postgres')", specifier = "==3.2.9" },
{ name = "psycopg-pool" }, { name = "psycopg-pool", marker = "extra == 'postgres'" },
{ name = "psycopg-pool", marker = "extra == 'postgres'", specifier = "==3.2.6" },
{ name = "python-dateutil", specifier = "~=2.9.0" }, { name = "python-dateutil", specifier = "~=2.9.0" },
{ name = "python-dotenv", specifier = "~=1.1.0" }, { name = "python-dotenv", specifier = "~=1.1.0" },
{ name = "python-gnupg", specifier = "~=0.5.4" }, { name = "python-gnupg", specifier = "~=0.5.4" },
@@ -2097,7 +2095,7 @@ requires-dist = [
{ name = "redis", extras = ["hiredis"], specifier = "~=5.2.1" }, { name = "redis", extras = ["hiredis"], specifier = "~=5.2.1" },
{ name = "scikit-learn", specifier = "~=1.7.0" }, { name = "scikit-learn", specifier = "~=1.7.0" },
{ name = "setproctitle", specifier = "~=1.3.4" }, { name = "setproctitle", specifier = "~=1.3.4" },
{ name = "tika-client", specifier = "~=0.10.0" }, { name = "tika-client", specifier = "~=0.9.0" },
{ name = "tqdm", specifier = "~=4.67.1" }, { name = "tqdm", specifier = "~=4.67.1" },
{ name = "watchdog", specifier = "~=6.0" }, { name = "watchdog", specifier = "~=6.0" },
{ name = "whitenoise", specifier = "~=6.9" }, { name = "whitenoise", specifier = "~=6.9" },
@@ -2438,7 +2436,7 @@ c = [
{ name = "psycopg-c", version = "3.2.9", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl" }, marker = "python_full_version == '3.12.*' and implementation_name != 'pypy' and platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "psycopg-c", version = "3.2.9", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl" }, marker = "python_full_version == '3.12.*' and implementation_name != 'pypy' and platform_machine == 'x86_64' and sys_platform == 'linux'" },
] ]
pool = [ pool = [
{ name = "psycopg-pool", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "psycopg-pool", version = "3.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or sys_platform == 'darwin'" },
] ]
[[package]] [[package]]
@@ -2477,14 +2475,12 @@ wheels = [
name = "psycopg-pool" name = "psycopg-pool"
version = "3.2.6" version = "3.2.6"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/cf/13/1e7850bb2c69a63267c3dbf37387d3f71a00fd0e2fa55c5db14d64ba1af4/psycopg_pool-3.2.6.tar.gz", hash = "sha256:0f92a7817719517212fbfe2fd58b8c35c1850cdd2a80d36b581ba2085d9148e5", size = 29770, upload-time = "2025-02-26T12:03:47.129Z" } sdist = { url = "https://files.pythonhosted.org/packages/cf/13/1e7850bb2c69a63267c3dbf37387d3f71a00fd0e2fa55c5db14d64ba1af4/psycopg_pool-3.2.6.tar.gz", hash = "sha256:0f92a7817719517212fbfe2fd58b8c35c1850cdd2a80d36b581ba2085d9148e5", size = 29770, upload-time = "2025-02-26T12:03:47.129Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/47/fd/4feb52a55c1a4bd748f2acaed1903ab54a723c47f6d0242780f4d97104d4/psycopg_pool-3.2.6-py3-none-any.whl", hash = "sha256:5887318a9f6af906d041a0b1dc1c60f8f0dda8340c2572b74e10907b51ed5da7", size = 38252, upload-time = "2025-02-26T12:03:45.073Z" }, { url = "https://files.pythonhosted.org/packages/47/fd/4feb52a55c1a4bd748f2acaed1903ab54a723c47f6d0242780f4d97104d4/psycopg_pool-3.2.6-py3-none-any.whl", hash = "sha256:5887318a9f6af906d041a0b1dc1c60f8f0dda8340c2572b74e10907b51ed5da7", size = 38252, upload-time = "2025-02-26T12:03:45.073Z" },
] ]
[[package]] [[package]]
name = "pyasn1" name = "pyasn1"
version = "0.6.1" version = "0.6.1"
@@ -2708,11 +2704,11 @@ wheels = [
[[package]] [[package]]
name = "python-gnupg" name = "python-gnupg"
version = "0.5.5" version = "0.5.4"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/42/d0/72a14a79f26c6119b281f6ccc475a787432ef155560278e60df97ce68a86/python-gnupg-0.5.5.tar.gz", hash = "sha256:3fdcaf76f60a1b948ff8e37dc398d03cf9ce7427065d583082b92da7a4ff5a63", size = 66467, upload-time = "2025-08-04T19:26:55.778Z" } sdist = { url = "https://files.pythonhosted.org/packages/f1/3e/ba0dc69c9f4e0aeb24d93175230ef057c151790a7516012f61014918992d/python-gnupg-0.5.4.tar.gz", hash = "sha256:f2fdb5fb29615c77c2743e1cb3d9314353a6e87b10c37d238d91ae1c6feae086", size = 65705, upload-time = "2025-01-07T11:58:34.073Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/aa/19/c147f78cc18c8788f54d4a16a22f6c05deba85ead5672d3ddf6dcba5a5fe/python_gnupg-0.5.5-py2.py3-none-any.whl", hash = "sha256:51fa7b8831ff0914bc73d74c59b99c613de7247b91294323c39733bb85ac3fc1", size = 21916, upload-time = "2025-08-04T19:26:54.307Z" }, { url = "https://files.pythonhosted.org/packages/7b/5b/6666ed5a0d3ce4d5444af62e373d5ba8ab253a03487c86f2f9f1078e7c31/python_gnupg-0.5.4-py2.py3-none-any.whl", hash = "sha256:40ce25cde9df29af91fe931ce9df3ce544e14a37f62b13ca878c897217b2de6c", size = 21730, upload-time = "2025-01-07T11:58:32.249Z" },
] ]
[[package]] [[package]]
@@ -3358,16 +3354,16 @@ wheels = [
[[package]] [[package]]
name = "tika-client" name = "tika-client"
version = "0.10.0" version = "0.9.0"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "typing-extensions", marker = "(python_full_version < '3.11' and sys_platform == 'darwin') or (python_full_version < '3.11' and sys_platform == 'linux')" }, { name = "typing-extensions", marker = "(python_full_version < '3.11' and sys_platform == 'darwin') or (python_full_version < '3.11' and sys_platform == 'linux')" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/21/be/65bfc47e4689ecd5ead20cf47dc0084fd767b7e71e8cfabf5fddc42aae3c/tika_client-0.10.0.tar.gz", hash = "sha256:3101e8b2482ae4cb7f87be13ada970ff691bdc3404d94cd52f5e57a09c99370c", size = 2178257, upload-time = "2025-08-04T17:47:30.414Z" } sdist = { url = "https://files.pythonhosted.org/packages/94/ad/3508e42b470a037b3f5c19ca9993893d0faa30ba7ec7e6ac33db9bc3bf51/tika_client-0.9.0.tar.gz", hash = "sha256:c10bba8e40ede23c039f84ccd821fb2d290d339cc26cbd267ab9b561a1e83659", size = 2175246, upload-time = "2025-01-15T18:46:23.901Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/b1/31/002e0fa5bca67d6a19da8c294273486f6c46cbcc83d6879719a38a181461/tika_client-0.10.0-py3-none-any.whl", hash = "sha256:f5486cc884e4522575662aa295bda761bf9f101ac8d92840155b58ab8b96f6e2", size = 18237, upload-time = "2025-08-04T17:47:28.966Z" }, { url = "https://files.pythonhosted.org/packages/36/8c/90ba51e014fb548ee34dd5ed14e85ec4a205ff97b89ca393e4de321304ac/tika_client-0.9.0-py3-none-any.whl", hash = "sha256:2464e8335b5e92c276641c729e7707f1e894a2bfb51cc59abdd3bdfb532da8a0", size = 17963, upload-time = "2025-01-15T18:46:21.143Z" },
] ]
[[package]] [[package]]