Compare commits

..

45 Commits

Author SHA1 Message Date
shamoon
0d87f94b9b Merge branch 'dev' into feature-remote-ocr-2 2025-09-14 14:01:35 -07:00
GitHub Actions
4905edbf79 Auto translate strings 2025-09-14 03:22:02 +00:00
jojo2357
feb5d534b5 Enhancement: long text custom field (#10846)
---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
2025-09-14 03:19:00 +00:00
shamoon
d230514dd3 Fix: fix pdf editor hover rotate counterclockwise button (#10848) 2025-09-13 14:19:50 -07:00
shamoon
1709aee903 Development: fix localization failing tests (#10840)
---------

Co-authored-by: Trenton H <797416+stumpylog@users.noreply.github.com>
2025-09-12 16:42:52 -07:00
shamoon
c4346124c3 Fix: warp long words in toast content (#10839) 2025-09-12 06:56:40 -07:00
shamoon
315b90f8e5 Add typing to assertContainsStrings test util 2025-09-11 13:56:14 -07:00
shamoon
47b2d2964b Use regular testcase instead of django, config check test 2025-09-11 13:52:10 -07:00
shamoon
e05639ae4e tempdir already a path 2025-09-11 13:49:30 -07:00
shamoon
f400a8cb2f Close client 2025-09-11 13:49:06 -07:00
shamoon
26abcf5612 Also ensure API key is set 2025-09-11 13:48:06 -07:00
shamoon
afde52430d Merge branch 'dev' into feature-remote-ocr-2 2025-09-11 13:25:53 -07:00
shamoon
44b8c4881a Fix: fix error when bulk adding empty doc link custom fields (#10832) 2025-09-11 13:19:23 -07:00
GitHub Actions
d3d8eef0b6 Auto translate strings 2025-09-11 18:00:58 +00:00
Mattia Paletti
a283c1c320 Enhancement: Add print button (#10626)
---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
2025-09-11 17:59:11 +00:00
GitHub Actions
f3220ce981 Auto translate strings 2025-09-11 17:44:22 +00:00
david-loe
2dc4f1f49b Enhancement: add storage path as workflow trigger filter (#10771)
---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
2025-09-11 17:41:04 +00:00
GitHub Actions
17509171bb Auto translate strings 2025-09-11 13:58:01 +00:00
sidey79
9e11e7fd05 Enhancement: jinja template support for workflow title assignment (#10700)
---------

Co-authored-by: Trenton Holmes <797416+stumpylog@users.noreply.github.com>
Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
2025-09-11 06:56:16 -07:00
shamoon
716f2da652 Merge branch 'dev' into feature-remote-ocr-2 2025-09-08 11:36:49 -07:00
shamoon
c54073b7c2 Merge branch 'dev' into feature-remote-ocr-2 2025-09-04 09:16:59 -07:00
shamoon
247e6f39dc Merge branch 'dev' into feature-remote-ocr-2 2025-09-01 20:10:40 -07:00
shamoon
1e6dfc4481 Merge branch 'dev' into feature-remote-ocr-2 2025-08-26 13:30:39 -07:00
shamoon
7cc0750066 Add note on costs and limitations for Azure OCR 2025-08-24 05:47:07 -07:00
shamoon
bd6585d3b4 Merge branch 'dev' into feature-remote-ocr-2 2025-08-22 08:54:26 -07:00
shamoon
717e828a1d Merge branch 'dev' into feature-remote-ocr-2 2025-08-17 21:25:14 -07:00
shamoon
07381d48e6 Merge branch 'dev' into feature-remote-ocr-2 2025-08-17 07:49:58 -07:00
shamoon
dd0ffaf312 Merge branch 'dev' into feature-remote-ocr-2 2025-08-11 10:48:36 -07:00
shamoon
264504affc Fix consumer declaration file extensions 2025-08-10 05:32:52 -07:00
shamoon
4feedf2add Merge branch 'dev' into feature-remote-ocr-2 2025-08-06 16:04:25 -04:00
shamoon
2f76cf9831 Merge branch 'dev' into feature-remote-ocr-2 2025-08-01 23:55:49 -04:00
shamoon
1002d37f6b Update test_parser.py 2025-07-09 11:05:37 -07:00
shamoon
d260a94740 Update parsers.py 2025-07-09 11:02:57 -07:00
shamoon
88c69b83ea Update index.md 2025-07-09 11:00:12 -07:00
shamoon
2557ee2014 Update docs to mention remote OCR with Azure AI 2025-07-09 09:53:30 -07:00
shamoon
3c75deed80 Add paperless_remote tests to testpaths 2025-07-08 14:19:45 -07:00
shamoon
d05343c927 Test fixes / coverage 2025-07-08 14:19:45 -07:00
shamoon
e7972b7eaf Coverage 2025-07-08 14:19:45 -07:00
shamoon
75a091cc0d Fix test 2025-07-08 14:19:44 -07:00
shamoon
dca74803fd Use output_content_format poller.result to get clean content 2025-07-08 14:19:44 -07:00
shamoon
3cf3d868d0 Some docs 2025-07-08 14:19:43 -07:00
shamoon
bf4fc6604a Test 2025-07-08 14:19:43 -07:00
shamoon
e8c1eb86fa This actually works
[ci skip]
2025-07-08 14:19:43 -07:00
shamoon
c3dad3cf69 Basic parse 2025-07-08 14:19:42 -07:00
shamoon
811bd66088 Ok, restart implementing this with just azure
[ci skip]
2025-07-08 14:19:42 -07:00
57 changed files with 1935 additions and 953 deletions

View File

@@ -5,7 +5,7 @@
# Purpose: Compiles the frontend
# Notes:
# - Does PNPM stuff with Typescript and such
FROM --platform=$BUILDPLATFORM docker.io/node:20-trixie-slim AS compile-frontend
FROM --platform=$BUILDPLATFORM docker.io/node:20-bookworm-slim AS compile-frontend
COPY ./src-ui /src/src-ui
@@ -32,7 +32,7 @@ RUN set -eux \
# Purpose: Installs s6-overlay and rootfs
# Comments:
# - Don't leave anything extra in here either
FROM ghcr.io/astral-sh/uv:0.8.16-python3.12-trixie-slim AS s6-overlay-base
FROM ghcr.io/astral-sh/uv:0.8.15-python3.12-bookworm-slim AS s6-overlay-base
WORKDIR /usr/src/s6
@@ -170,8 +170,20 @@ RUN set -eux \
&& apt-get update \
&& apt-get install --yes --quiet --no-install-recommends ${RUNTIME_PACKAGES} \
&& echo "Installing pre-built updates" \
&& curl --fail --silent --no-progress-meter --show-error --location --remote-name-all \
https://github.com/paperless-ngx/builder/releases/download/jbig2enc-v${JBIG2ENC_VERSION}/jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \
&& curl --fail --silent --no-progress-meter --show-error --location --remote-name-all --parallel --parallel-max 4 \
https://github.com/paperless-ngx/builder/releases/download/qpdf-${QPDF_VERSION}/libqpdf29_${QPDF_VERSION}-1_${TARGETARCH}.deb \
https://github.com/paperless-ngx/builder/releases/download/qpdf-${QPDF_VERSION}/qpdf_${QPDF_VERSION}-1_${TARGETARCH}.deb \
https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/libgs10_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/ghostscript_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/libgs10-common_${GS_VERSION}.dfsg-1_all.deb \
https://github.com/paperless-ngx/builder/releases/download/jbig2enc-${JBIG2ENC_VERSION}/jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \
&& echo "Installing qpdf ${QPDF_VERSION}" \
&& dpkg --install ./libqpdf29_${QPDF_VERSION}-1_${TARGETARCH}.deb \
&& dpkg --install ./qpdf_${QPDF_VERSION}-1_${TARGETARCH}.deb \
&& echo "Installing Ghostscript ${GS_VERSION}" \
&& dpkg --install ./libgs10-common_${GS_VERSION}.dfsg-1_all.deb \
&& dpkg --install ./libgs10_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
&& dpkg --install ./ghostscript_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
&& echo "Installing jbig2enc" \
&& dpkg --install ./jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \
&& echo "Configuring imagemagick" \

319
dev.txt
View File

@@ -1,319 +0,0 @@
adduser 3.134
apt 2.6.1
base-files 12.4+deb12u11
base-passwd 3.6.1
bash 5.2.15-2+b8
bsdutils 1:2.38.1-5+deb12u3
ca-certificates 20230311+deb12u1
coreutils 9.1-1
curl 7.88.1-10+deb12u12
dash 0.5.12-2
debconf 1.5.82
debian-archive-keyring 2023.3+deb12u2
debianutils 5.7-0.5~deb12u1
diffutils 1:3.8-4
dirmngr 2.2.40-1.1
dpkg 1.21.22
e2fsprogs 1.47.0-2
file 1:5.44-3
findutils 4.9.0-4
fontconfig 2.14.1-4
fontconfig-config 2.14.1-4
fonts-liberation 1:1.07.4-11
fonts-urw-base35 20200910-7
gcc-12-base 12.2.0-14+deb12u1
gettext 0.21-12
gettext-base 0.21-12
ghostscript 10.03.1~dfsg-1
gnupg 2.2.40-1.1
gnupg-l10n 2.2.40-1.1
gnupg-utils 2.2.40-1.1
gosu 1.14-1+b10
gpg 2.2.40-1.1
gpg-agent 2.2.40-1.1
gpg-wks-client 2.2.40-1.1
gpg-wks-server 2.2.40-1.1
gpgconf 2.2.40-1.1
gpgsm 2.2.40-1.1
gpgv 2.2.40-1.1
grep 3.8-5
gzip 1.12-1
hicolor-icon-theme 0.17-2
hostname 3.23+nmu1
icc-profiles-free 2.0.1+dfsg-1.1
imagemagick 8:6.9.11.60+dfsg-1.6+deb12u3
imagemagick-6-common 8:6.9.11.60+dfsg-1.6+deb12u3
imagemagick-6.q16 8:6.9.11.60+dfsg-1.6+deb12u3
init-system-helpers 1.65.2
jbig2dec 0.19-3
jbig2enc 0.30-1
libacl1 2.3.1-3
libaom3 3.6.0-1+deb12u1
libapt-pkg6.0 2.6.1
libarchive13 3.6.2-1+deb12u2
libassuan0 2.5.5-5
libattr1 1:2.5.1-4
libaudit-common 1:3.0.9-1
libaudit1 1:3.0.9-1
libavahi-client3 0.8-10+deb12u1
libavahi-common-data 0.8-10+deb12u1
libavahi-common3 0.8-10+deb12u1
libavcodec59 7:5.1.6-0+deb12u1
libavformat59 7:5.1.6-0+deb12u1
libavutil57 7:5.1.6-0+deb12u1
libblkid1 2.38.1-5+deb12u3
libbluray2 1:1.3.4-1
libbrotli1 1.0.9-2+b6
libbsd0 0.11.7-2
libbz2-1.0 1.0.8-5+b1
libc-bin 2.36-9+deb12u10
libc6 2.36-9+deb12u10
libcairo-gobject2 1.16.0-7
libcairo2 1.16.0-7
libcap-ng0 0.8.3-1+b3
libcap2 1:2.66-4+deb12u1
libchromaprint1 1.5.1-2+b1
libcjson1 1.7.15-1+deb12u2
libcodec2-1.0 1.0.5-1
libcom-err2 1.47.0-2
libconfig-inifiles-perl 3.000003-2
libcrypt1 1:4.4.33-2
libcups2 2.4.2-3+deb12u8
libcurl4 7.88.1-10+deb12u12
libdatrie1 0.2.13-2+b1
libdav1d6 1.0.0-2+deb12u1
libdb5.3 5.3.28+dfsg2-1
libdbus-1-3 1.14.10-1~deb12u1
libde265-0 1.0.11-1+deb12u2
libdebconfclient0 0.270
libdeflate0 1.14-1
libdrm-common 2.4.114-1
libdrm2 2.4.114-1+b1
libedit2 3.1-20221030-2
libexpat1 2.5.0-1+deb12u1
libext2fs2 1.47.0-2
libffi8 3.4.4-1
libfftw3-double3 3.3.10-1
libfontconfig1 2.14.1-4
libfontenc1 1:1.1.4-1
libfreetype6 2.12.1+dfsg-5+deb12u4
libfribidi0 1.0.8-2.1
libgcc-s1 12.2.0-14+deb12u1
libgcrypt20 1.10.1-3
libgdbm-compat4 1.23-3
libgdbm6 1.23-3
libgdk-pixbuf-2.0-0 2.42.10+dfsg-1+deb12u2
libgdk-pixbuf2.0-common 2.42.10+dfsg-1+deb12u2
libgif7 5.2.1-2.5
libglib2.0-0 2.74.6-2+deb12u6
libgme0 0.6.3-6
libgmp10 2:6.2.1+dfsg1-1.1
libgnutls30 3.7.9-2+deb12u5
libgomp1 12.2.0-14+deb12u1
libgpg-error0 1.46-1
libgraphite2-3 1.3.14-1
libgs-common 10.0.0~dfsg-11+deb12u7
libgs10 10.03.1~dfsg-1
libgs10-common 10.03.1~dfsg-1
libgsm1 1.0.22-1
libgssapi-krb5-2 1.20.1-2+deb12u3
libharfbuzz0b 6.0.0+dfsg-3
libheif1 1.15.1-1+deb12u1
libhogweed6 3.8.1-2
libhwy1 1.0.3-3+deb12u1
libice6 2:1.0.10-1
libicu72 72.1-3+deb12u1
libidn12 1.41-1
libidn2-0 2.3.3-1+b1
libijs-0.35 0.35-15
libimagequant0 2.17.0-1
libjbig0 2.1-6.1
libjbig2dec0 0.19-3
libjpeg62-turbo 1:2.1.5-2
libjxl0.7 0.7.0-10+deb12u1
libk5crypto3 1.20.1-2+deb12u3
libkeyutils1 1.6.3-2
libkrb5-3 1.20.1-2+deb12u3
libkrb5support0 1.20.1-2+deb12u3
libksba8 1.6.3-2
liblcms2-2 2.14-2
libldap-2.5-0 2.5.13+dfsg-5
liblept5 1.82.0-3+b3
liblerc4 4.0.0+ds-2
liblqr-1-0 0.4.2-2.1
libltdl7 2.4.7-7~deb12u1
liblz4-1 1.9.4-1
liblzma5 5.4.1-1
libmagic-mgc 1:5.44-3
libmagic1 1:5.44-3
libmagickcore-6.q16-6 8:6.9.11.60+dfsg-1.6+deb12u3
libmagickwand-6.q16-6 8:6.9.11.60+dfsg-1.6+deb12u3
libmariadb3 1:10.11.11-0+deb12u1
libmbedcrypto7 2.28.3-1
libmd0 1.0.4-2
libmfx1 22.5.4-1
libmount1 2.38.1-5+deb12u3
libmp3lame0 3.100-6
libmpg123-0 1.31.2-1+deb12u1
libncurses6 6.4-4
libncursesw6 6.4-4
libnettle8 3.8.1-2
libnghttp2-14 1.52.0-1+deb12u2
libnorm1 1.5.9+dfsg-2
libnpth0 1.6-3
libnsl2 1.3.0-2
libnspr4 2:4.35-1
libnss3 2:3.87.1-1+deb12u1
libnuma1 2.0.16-1
libogg0 1.3.5-3
libopenjp2-7 2.5.0-2+deb12u1
libopenmpt0 0.6.9-1
libopus0 1.3.1-3
libp11-kit0 0.24.1-2
libpam-modules 1.5.2-6+deb12u1
libpam-modules-bin 1.5.2-6+deb12u1
libpam-runtime 1.5.2-6+deb12u1
libpam0g 1.5.2-6+deb12u1
libpango-1.0-0 1.50.12+ds-1
libpangocairo-1.0-0 1.50.12+ds-1
libpangoft2-1.0-0 1.50.12+ds-1
libpaper1 1.1.29
libpcre2-8-0 10.42-1
libperl5.36 5.36.0-7+deb12u2
libpgm-5.3-0 5.3.128~dfsg-2
libpixman-1-0 0.42.2-1
libpng16-16 1.6.39-2
libpoppler126 22.12.0-2+deb12u1
libpq5 15.13-0+deb12u1
libpsl5 0.21.2-1
libqpdf29 11.9.0-1
librabbitmq4 0.11.0-1+deb12u1
librav1e0 0.5.1-6
libreadline8 8.2-1.3
librist4 0.2.7+dfsg-1
librsvg2-2 2.54.7+dfsg-1~deb12u1
librtmp1 2.4+20151223.gitfa8646d.1-2+b2
libsasl2-2 2.1.28+dfsg-10
libsasl2-modules-db 2.1.28+dfsg-10
libseccomp2 2.5.4-1+deb12u1
libselinux1 3.4-1+b6
libsemanage-common 3.4-1
libsemanage2 3.4-1+b5
libsepol2 3.4-2.1
libshine3 3.1.1-2
libsm6 2:1.2.3-1
libsmartcols1 2.38.1-5+deb12u3
libsnappy1v5 1.1.9-3
libsodium23 1.0.18-1
libsoxr0 0.1.3-4
libspeex1 1.2.1-2
libsqlite3-0 3.40.1-2+deb12u1
libsrt1.5-gnutls 1.5.1-1+deb12u1
libss2 1.47.0-2
libssh-gcrypt-4 0.10.6-0+deb12u1
libssh2-1 1.10.0-3+b1
libssl3 3.0.17-1~deb12u1
libstdc++6 12.2.0-14+deb12u1
libsvtav1enc1 1.4.1+dfsg-1
libswresample4 7:5.1.6-0+deb12u1
libsystemd0 252.38-1~deb12u1
libtasn1-6 4.19.0-2+deb12u1
libtesseract5 5.3.0-2
libthai-data 0.1.29-1
libthai0 0.1.29-1
libtheora0 1.1.1+dfsg.1-16.1+b1
libtiff6 4.5.0-6+deb12u2
libtinfo6 6.4-4
libtirpc-common 1.3.3+ds-1
libtirpc3 1.3.3+ds-1
libtwolame0 0.4.0-2
libudev1 252.38-1~deb12u1
libudfread0 1.1.2-1
libunistring2 1.0-2
libuuid1 2.38.1-5+deb12u3
libv4l-0 1.22.1-5+b2
libv4lconvert0 1.22.1-5+b2
libva-drm2 2.17.0-1
libva-x11-2 2.17.0-1
libva2 2.17.0-1
libvdpau1 1.5-2
libvorbis0a 1.3.7-1
libvorbisenc2 1.3.7-1
libvorbisfile3 1.3.7-1
libvpx7 1.12.0-1+deb12u4
libwebp7 1.2.4-0.2+deb12u1
libwebpdemux2 1.2.4-0.2+deb12u1
libwebpmux3 1.2.4-0.2+deb12u1
libx11-6 2:1.8.4-2+deb12u2
libx11-data 2:1.8.4-2+deb12u2
libx11-xcb1 2:1.8.4-2+deb12u2
libx264-164 2:0.164.3095+gitbaee400-3
libx265-199 3.5-2+b1
libxau6 1:1.0.9-1
libxcb-dri3-0 1.15-1
libxcb-render0 1.15-1
libxcb-shm0 1.15-1
libxcb1 1.15-1
libxdmcp6 1:1.1.2-3
libxext6 2:1.3.4-1+b1
libxfixes3 1:6.0.0-2
libxml2 2.9.14+dfsg-1.3~deb12u2
libxrender1 1:0.9.10-1.1
libxslt1.1 1.1.35-1+deb12u1
libxt6 1:1.2.1-1.1
libxvidcore4 2:1.3.7-1
libxxhash0 0.8.1-1
libzbar0 0.23.92-7+deb12u1
libzmq5 4.3.4-6
libzstd1 1.5.4+dfsg2-5
libzvbi-common 0.2.41-1
libzvbi0 0.2.41-1
login 1:4.13+dfsg1-1+deb12u1
logsave 1.47.0-2
mariadb-client 1:10.11.11-0+deb12u1
mariadb-client-core 1:10.11.11-0+deb12u1
mariadb-common 1:10.11.11-0+deb12u1
mawk 1.3.4.20200120-3.1
media-types 10.0.0
mount 2.38.1-5+deb12u3
mysql-common 5.8+1.1.0
ncurses-base 6.4-4
ncurses-bin 6.4-4
netbase 6.4
ocl-icd-libopencl1 2.3.1-1
openssl 3.0.17-1~deb12u1
passwd 1:4.13+dfsg1-1+deb12u1
perl 5.36.0-7+deb12u2
perl-base 5.36.0-7+deb12u2
perl-modules-5.36 5.36.0-7+deb12u2
pinentry-curses 1.2.1-1
pngquant 2.17.0-1
poppler-data 0.4.12-1
poppler-utils 22.12.0-2+deb12u1
postgresql-client 15+248
postgresql-client-15 15.13-0+deb12u1
postgresql-client-common 248
qpdf 11.9.0-1
readline-common 8.2-1.3
sed 4.9-1
sensible-utils 0.0.17+nmu1
shared-mime-info 2.2-1
sysvinit-utils 3.06-4
tar 1.34+dfsg-1.2+deb12u1
tesseract-ocr 5.3.0-2
tesseract-ocr-deu 1:4.1.0-2
tesseract-ocr-eng 1:4.1.0-2
tesseract-ocr-fra 1:4.1.0-2
tesseract-ocr-ita 1:4.1.0-2
tesseract-ocr-osd 1:4.1.0-2
tesseract-ocr-spa 1:4.1.0-2
tzdata 2025b-0+deb12u1
ucf 3.0043+nmu1+deb12u1
unpaper 7.0.0-0.1
usr-is-merged 37~deb12u1
util-linux 2.38.1-5+deb12u3
util-linux-extra 2.38.1-5+deb12u3
x11-common 1:7.7+23
xfonts-encodings 1:1.0.4-2.2
xfonts-utils 1:7.7+6
zlib1g 1:1.2.13.dfsg-1

View File

@@ -506,6 +506,7 @@ for the possible codes and their meanings.
The `localize_date` filter formats a date or datetime object into a localized string using Babel internationalization.
This takes into account the provided locale for translation. Since this must be used on a date or datetime object,
you must access the field directly, i.e. `document.created`.
An ISO string can also be provided to control the output format.
###### Syntax
@@ -516,7 +517,7 @@ you must access the field directly, i.e. `document.created`.
###### Parameters
- `value` (date | datetime): Date or datetime object to format (datetime should be timezone-aware)
- `value` (date | datetime | str): Date, datetime object or ISO string to format (datetime should be timezone-aware)
- `format` (str): Format type - either a Babel preset ('short', 'medium', 'long', 'full') or custom pattern
- `locale` (str): Locale code for localization (e.g., 'en_US', 'fr_FR', 'de_DE')

View File

@@ -1800,3 +1800,23 @@ password. All of these options come from their similarly-named [Django settings]
#### [`PAPERLESS_EMAIL_USE_SSL=<bool>`](#PAPERLESS_EMAIL_USE_SSL) {#PAPERLESS_EMAIL_USE_SSL}
: Defaults to false.
## Remote OCR
#### [`PAPERLESS_REMOTE_OCR_ENGINE=<str>`](#PAPERLESS_REMOTE_OCR_ENGINE) {#PAPERLESS_REMOTE_OCR_ENGINE}
: The remote OCR engine to use. Currently only Azure AI is supported as "azureai".
Defaults to None, which disables remote OCR.
#### [`PAPERLESS_REMOTE_OCR_API_KEY=<str>`](#PAPERLESS_REMOTE_OCR_API_KEY) {#PAPERLESS_REMOTE_OCR_API_KEY}
: The API key to use for the remote OCR engine.
Defaults to None.
#### [`PAPERLESS_REMOTE_OCR_ENDPOINT=<str>`](#PAPERLESS_REMOTE_OCR_ENDPOINT) {#PAPERLESS_REMOTE_OCR_ENDPOINT}
: The endpoint to use for the remote OCR engine. This is required for Azure AI.
Defaults to None.

View File

@@ -25,9 +25,10 @@ physical documents into a searchable online archive so you can keep, well, _less
## Features
- **Organize and index** your scanned documents with tags, correspondents, types, and more.
- _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way.
- _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way, unless you explicitly choose to do so.
- Performs **OCR** on your documents, adding searchable and selectable text, even to documents scanned with only images.
- Utilizes the open-source Tesseract engine to recognize more than 100 languages.
- Utilizes the open-source Tesseract engine to recognize more than 100 languages.
- _New!_ Supports remote OCR with Azure AI (opt-in).
- Documents are saved as PDF/A format which is designed for long term storage, alongside the unaltered originals.
- Uses machine-learning to automatically add tags, correspondents and document types to your documents.
- Supports PDF documents, images, plain text files, Office documents (Word, Excel, PowerPoint, and LibreOffice equivalents)[^1] and more.

View File

@@ -408,7 +408,7 @@ Currently, there are three events that correspond to workflow trigger 'types':
but the document content has been extracted and metadata such as document type, tags, etc. have been set, so these can now
be used for filtering.
3. **Document Updated**: when a document is updated. Similar to 'added' events, triggers can include filtering by content matching,
tags, doc type, or correspondent.
tags, doc type, correspondent or storage path.
4. **Scheduled**: a scheduled trigger that can be used to run workflows at a specific time. The date used can be either the document
added, created, updated date or you can specify a (date) custom field. You can also specify a day offset from the date (positive
offsets will trigger after the date, negative offsets will trigger before).
@@ -452,10 +452,11 @@ Workflows allow you to filter by:
- File path, including wildcards. Note that enabling `PAPERLESS_CONSUMER_RECURSIVE` would allow, for
example, automatically assigning documents to different owners based on the upload directory.
- Mail rule. Choosing this option will force 'mail fetch' to be the workflow source.
- Content matching (`Added` and `Updated` triggers only). Filter document content using the matching settings.
- Tags (`Added` and `Updated` triggers only). Filter for documents with any of the specified tags
- Document type (`Added` and `Updated` triggers only). Filter documents with this doc type
- Correspondent (`Added` and `Updated` triggers only). Filter documents with this correspondent
- Content matching (`Added`, `Updated` and `Scheduled` triggers only). Filter document content using the matching settings.
- Tags (`Added`, `Updated` and `Scheduled` triggers only). Filter for documents with any of the specified tags
- Document type (`Added`, `Updated` and `Scheduled` triggers only). Filter documents with this doc type
- Correspondent (`Added`, `Updated` and `Scheduled` triggers only). Filter documents with this correspondent
- Storage path (`Added`, `Updated` and `Scheduled` triggers only). Filter documents with this storage path
### Workflow Actions
@@ -505,35 +506,52 @@ you may want to adjust these settings to prevent abuse.
#### Workflow placeholders
Some workflow text can include placeholders but the available options differ depending on the type of
workflow trigger. This is because at the time of consumption (when the text is to be set), no automatic tags etc. have been
applied. You can use the following placeholders with any trigger type:
Titles can be assigned by workflows using [Jinja templates](https://jinja.palletsprojects.com/en/3.1.x/templates/).
This allows for complex logic to be used to generate the title, including [logical structures](https://jinja.palletsprojects.com/en/3.1.x/templates/#list-of-control-structures)
and [filters](https://jinja.palletsprojects.com/en/3.1.x/templates/#id11).
The template is provided as a string.
- `{correspondent}`: assigned correspondent name
- `{document_type}`: assigned document type name
- `{owner_username}`: assigned owner username
- `{added}`: added datetime
- `{added_year}`: added year
- `{added_year_short}`: added year
- `{added_month}`: added month
- `{added_month_name}`: added month name
- `{added_month_name_short}`: added month short name
- `{added_day}`: added day
- `{added_time}`: added time in HH:MM format
- `{original_filename}`: original file name without extension
- `{filename}`: current file name without extension
Using Jinja2 Templates is also useful for [Date localization](advanced_usage.md#Date-Localization) in the title.
The available inputs differ depending on the type of workflow trigger.
This is because at the time of consumption (when the text is to be set), no automatic tags etc. have been
applied. You can use the following placeholders in the template with any trigger type:
- `{{correspondent}}`: assigned correspondent name
- `{{document_type}}`: assigned document type name
- `{{owner_username}}`: assigned owner username
- `{{added}}`: added datetime
- `{{added_year}}`: added year
- `{{added_year_short}}`: added year
- `{{added_month}}`: added month
- `{{added_month_name}}`: added month name
- `{{added_month_name_short}}`: added month short name
- `{{added_day}}`: added day
- `{{added_time}}`: added time in HH:MM format
- `{{original_filename}}`: original file name without extension
- `{{filename}}`: current file name without extension
The following placeholders are only available for "added" or "updated" triggers
- `{created}`: created datetime
- `{created_year}`: created year
- `{created_year_short}`: created year
- `{created_month}`: created month
- `{created_month_name}`: created month name
- `{created_month_name_short}`: created month short name
- `{created_day}`: created day
- `{created_time}`: created time in HH:MM format
- `{doc_url}`: URL to the document in the web UI. Requires the `PAPERLESS_URL` setting to be set.
- `{{created}}`: created datetime
- `{{created_year}}`: created year
- `{{created_year_short}}`: created year
- `{{created_month}}`: created month
- `{{created_month_name}}`: created month name
- `{created_month_name_short}}`: created month short name
- `{{created_day}}`: created day
- `{{created_time}}`: created time in HH:MM format
- `{{doc_url}}`: URL to the document in the web UI. Requires the `PAPERLESS_URL` setting to be set.
##### Examples
```jinja2
{{ created | localize_date('MMMM', 'en_US') }}
<!-- Output: "January" -->
{{ added | localize_date('MMMM', 'de_DE') }}
<!-- Output: "Juni" --> # codespell:ignore
```
### Workflow permissions
@@ -850,6 +868,21 @@ how regularly you intend to scan documents and use paperless.
performed the task associated with the document, move it to the
inbox.
## Remote OCR
!!! important
This feature is disabled by default and will always remain strictly "opt-in".
Paperless-ngx supports performing OCR on documents using remote services. At the moment, this is limited to
[Microsoft's Azure "Document Intelligence" service](https://azure.microsoft.com/en-us/products/ai-services/ai-document-intelligence).
This is of course a paid service (with a free tier) which requires an Azure account and subscription. Azure AI is not affiliated with
Paperless-ngx in any way. When enabled, Paperless-ngx will automatically send appropriate documents to Azure for OCR processing, bypassing
the local OCR engine. See the [configuration](configuration.md#PAPERLESS_REMOTE_OCR_ENGINE) options for more details.
Additionally, when using a commercial service with this feature, consider both potential costs as well as any associated file size
or page limitations (e.g. with a free tier).
## Architecture
Paperless-ngx consists of the following components:

View File

@@ -15,6 +15,7 @@ classifiers = [
# This will allow testing to not install a webserver, mysql, etc
dependencies = [
"azure-ai-documentintelligence>=1.0.2",
"babel>=2.17",
"bleach~=6.2.0",
"celery[redis]~=5.5.1",
@@ -232,6 +233,7 @@ testpaths = [
"src/paperless_tesseract/tests/",
"src/paperless_tika/tests",
"src/paperless_text/tests/",
"src/paperless_remote/tests/",
]
addopts = [
"--pythonwarnings=all",

File diff suppressed because it is too large Load Diff

View File

@@ -35,6 +35,9 @@
@case (CustomFieldDataType.Select) {
<span [ngbTooltip]="nameTooltip">{{getSelectValue(field, value)}}</span>
}
@case (CustomFieldDataType.LongText) {
<p class="mb-0" [ngbTooltip]="nameTooltip">{{value | slice:0:20}}{{value.length > 20 ? '...' : ''}}</p>
}
@default {
<span [ngbTooltip]="nameTooltip">{{value}}</span>
}

View File

@@ -1,5 +1,5 @@
import { CurrencyPipe, getLocaleCurrencyCode } from '@angular/common'
import { Component, Input, LOCALE_ID, OnInit, inject } from '@angular/core'
import { CurrencyPipe, getLocaleCurrencyCode, SlicePipe } from '@angular/common'
import { Component, inject, Input, LOCALE_ID, OnInit } from '@angular/core'
import { NgbTooltipModule } from '@ng-bootstrap/ng-bootstrap'
import { takeUntil } from 'rxjs'
import { CustomField, CustomFieldDataType } from 'src/app/data/custom-field'
@@ -14,7 +14,7 @@ import { LoadingComponentWithPermissions } from '../../loading-component/loading
selector: 'pngx-custom-field-display',
templateUrl: './custom-field-display.component.html',
styleUrl: './custom-field-display.component.scss',
imports: [CustomDatePipe, CurrencyPipe, NgbTooltipModule],
imports: [CustomDatePipe, CurrencyPipe, NgbTooltipModule, SlicePipe],
})
export class CustomFieldDisplayComponent
extends LoadingComponentWithPermissions

View File

@@ -177,6 +177,7 @@
<pngx-input-tags [allowCreate]="false" i18n-title title="Has any of tags" formControlName="filter_has_tags"></pngx-input-tags>
<pngx-input-select i18n-title title="Has correspondent" [items]="correspondents" [allowNull]="true" formControlName="filter_has_correspondent"></pngx-input-select>
<pngx-input-select i18n-title title="Has document type" [items]="documentTypes" [allowNull]="true" formControlName="filter_has_document_type"></pngx-input-select>
<pngx-input-select i18n-title title="Has storage path" [items]="storagePaths" [allowNull]="true" formControlName="filter_has_storage_path"></pngx-input-select>
</div>
}
</div>

View File

@@ -412,6 +412,9 @@ export class WorkflowEditDialogComponent
filter_has_document_type: new FormControl(
trigger.filter_has_document_type
),
filter_has_storage_path: new FormControl(
trigger.filter_has_storage_path
),
schedule_offset_days: new FormControl(trigger.schedule_offset_days),
schedule_is_recurring: new FormControl(trigger.schedule_is_recurring),
schedule_recurring_interval_days: new FormControl(
@@ -536,6 +539,7 @@ export class WorkflowEditDialogComponent
filter_has_tags: [],
filter_has_correspondent: null,
filter_has_document_type: null,
filter_has_storage_path: null,
matching_algorithm: MATCH_NONE,
match: '',
is_insensitive: true,

View File

@@ -68,6 +68,11 @@
[allowNull]="true"
[horizontal]="true"></pngx-input-select>
}
@case (CustomFieldDataType.LongText) {
<pngx-input-textarea [(ngModel)]="value[fieldId]" (ngModelChange)="onChange(value)"
[title]="getCustomField(fieldId)?.name"
class="flex-grow-1"></pngx-input-textarea>
}
}
<button type="button" class="btn btn-link text-danger" (click)="removeSelectedField.next(fieldId)">
<i-bs name="trash"></i-bs>

View File

@@ -24,6 +24,7 @@ import { MonetaryComponent } from '../monetary/monetary.component'
import { NumberComponent } from '../number/number.component'
import { SelectComponent } from '../select/select.component'
import { TextComponent } from '../text/text.component'
import { TextAreaComponent } from '../textarea/textarea.component'
import { UrlComponent } from '../url/url.component'
@Component({
@@ -51,6 +52,7 @@ import { UrlComponent } from '../url/url.component'
ReactiveFormsModule,
RouterModule,
NgxBootstrapIconsModule,
TextAreaComponent,
],
})
export class CustomFieldsValuesComponent extends AbstractInputComponent<Object> {

View File

@@ -4,6 +4,7 @@ import {
NG_VALUE_ACCESSOR,
ReactiveFormsModule,
} from '@angular/forms'
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { SafeHtmlPipe } from 'src/app/pipes/safehtml.pipe'
import { AbstractInputComponent } from '../abstract-input'
@@ -18,7 +19,12 @@ import { AbstractInputComponent } from '../abstract-input'
selector: 'pngx-input-textarea',
templateUrl: './textarea.component.html',
styleUrls: ['./textarea.component.scss'],
imports: [FormsModule, ReactiveFormsModule, SafeHtmlPipe],
imports: [
FormsModule,
ReactiveFormsModule,
SafeHtmlPipe,
NgxBootstrapIconsModule,
],
})
export class TextAreaComponent extends AbstractInputComponent<string> {
@Input()

View File

@@ -30,7 +30,7 @@
<div class="page-item rounded p-2" cdkDrag (click)="toggleSelection(i)" [class.selected]="p.selected">
<div class="btn-toolbar hover-actions z-10">
<div class="btn-group me-2">
<button class="btn btn-sm btn-dark" (click)="rotate(i); $event.stopPropagation()" title="Rotate page counter-clockwise" i18n-title>
<button class="btn btn-sm btn-dark" (click)="rotate(i, true); $event.stopPropagation()" title="Rotate page counter-clockwise" i18n-title>
<i-bs name="arrow-counterclockwise"></i-bs>
</button>
<button class="btn btn-sm btn-dark" (click)="rotate(i); $event.stopPropagation()" title="Rotate page clockwise" i18n-title>

View File

@@ -67,8 +67,9 @@ export class PDFEditorComponent extends ConfirmDialogComponent {
this.pages[i].selected = !this.pages[i].selected
}
rotate(i: number) {
this.pages[i].rotate = (this.pages[i].rotate + 90) % 360
rotate(i: number, counterclockwise: boolean = false) {
this.pages[i].rotate =
(this.pages[i].rotate + (counterclockwise ? -90 : 90) + 360) % 360
}
rotateSelected(dir: number) {

View File

@@ -17,7 +17,7 @@
<i-bs width="0.9em" height="0.9em" name="exclamation-triangle"></i-bs>
}
<div>
<p class="ms-2 mb-0">{{toast.content}}</p>
<p class="ms-2 mb-0 text-break">{{toast.content}}</p>
@if (toast.error) {
<details class="ms-2">
<div class="mt-2 ms-n4 me-n2 small">

View File

@@ -54,6 +54,10 @@
<i-bs width="1em" height="1em" name="arrow-counterclockwise"></i-bs>&nbsp;<span i18n>Reprocess</span>
</button>
<button ngbDropdownItem (click)="printDocument()" [hidden]="useNativePdfViewer || isMobile">
<i-bs width="1em" height="1em" name="printer"></i-bs>&nbsp;<span i18n>Print</span>
</button>
<button ngbDropdownItem (click)="moreLike()">
<i-bs width="1em" height="1em" name="diagram-3"></i-bs>&nbsp;<span i18n>More like this</span>
</button>
@@ -212,6 +216,14 @@
(removed)="removeField(fieldInstance)"
[error]="getCustomFieldError(i)"></pngx-input-select>
}
@case (CustomFieldDataType.LongText) {
<pngx-input-textarea formControlName="value"
[title]="getCustomFieldFromInstance(fieldInstance)?.name"
[removable]="userCanEdit"
(removed)="removeField(fieldInstance)"
[horizontal]="true"
[error]="getCustomFieldError(i)"></pngx-input-textarea>
}
}
</div>
}

View File

@@ -1415,4 +1415,151 @@ describe('DocumentDetailComponent', () => {
.flush('fail', { status: 500, statusText: 'Server Error' })
expect(component.previewText).toContain('An error occurred loading content')
})
it('should print document successfully', fakeAsync(() => {
initNormally()
const appendChildSpy = jest
.spyOn(document.body, 'appendChild')
.mockImplementation((node: Node) => node)
const removeChildSpy = jest
.spyOn(document.body, 'removeChild')
.mockImplementation((node: Node) => node)
const createObjectURLSpy = jest
.spyOn(URL, 'createObjectURL')
.mockReturnValue('blob:mock-url')
const revokeObjectURLSpy = jest
.spyOn(URL, 'revokeObjectURL')
.mockImplementation(() => {})
const mockContentWindow = {
focus: jest.fn(),
print: jest.fn(),
onafterprint: null,
}
const mockIframe = {
style: {},
src: '',
onload: null,
contentWindow: mockContentWindow,
}
const createElementSpy = jest
.spyOn(document, 'createElement')
.mockReturnValue(mockIframe as any)
const blob = new Blob(['test'], { type: 'application/pdf' })
component.printDocument()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/${doc.id}/download/`
)
req.flush(blob)
tick()
expect(createElementSpy).toHaveBeenCalledWith('iframe')
expect(appendChildSpy).toHaveBeenCalledWith(mockIframe)
expect(createObjectURLSpy).toHaveBeenCalledWith(blob)
if (mockIframe.onload) {
mockIframe.onload({} as any)
}
expect(mockContentWindow.focus).toHaveBeenCalled()
expect(mockContentWindow.print).toHaveBeenCalled()
if (mockIframe.onload) {
mockIframe.onload(new Event('load'))
}
if (mockContentWindow.onafterprint) {
mockContentWindow.onafterprint(new Event('afterprint'))
}
expect(removeChildSpy).toHaveBeenCalledWith(mockIframe)
expect(revokeObjectURLSpy).toHaveBeenCalledWith('blob:mock-url')
createElementSpy.mockRestore()
appendChildSpy.mockRestore()
removeChildSpy.mockRestore()
createObjectURLSpy.mockRestore()
revokeObjectURLSpy.mockRestore()
}))
it('should show error toast if print document fails', () => {
initNormally()
const toastSpy = jest.spyOn(toastService, 'showError')
component.printDocument()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/${doc.id}/download/`
)
req.error(new ErrorEvent('failed'))
expect(toastSpy).toHaveBeenCalledWith(
'Error loading document for printing.'
)
})
it('should show error toast if printing throws inside iframe', fakeAsync(() => {
initNormally()
const appendChildSpy = jest
.spyOn(document.body, 'appendChild')
.mockImplementation((node: Node) => node)
const removeChildSpy = jest
.spyOn(document.body, 'removeChild')
.mockImplementation((node: Node) => node)
const createObjectURLSpy = jest
.spyOn(URL, 'createObjectURL')
.mockReturnValue('blob:mock-url')
const revokeObjectURLSpy = jest
.spyOn(URL, 'revokeObjectURL')
.mockImplementation(() => {})
const toastSpy = jest.spyOn(toastService, 'showError')
const mockContentWindow = {
focus: jest.fn().mockImplementation(() => {
throw new Error('focus failed')
}),
print: jest.fn(),
onafterprint: null,
}
const mockIframe: any = {
style: {},
src: '',
onload: null,
contentWindow: mockContentWindow,
}
const createElementSpy = jest
.spyOn(document, 'createElement')
.mockReturnValue(mockIframe as any)
const blob = new Blob(['test'], { type: 'application/pdf' })
component.printDocument()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/${doc.id}/download/`
)
req.flush(blob)
tick()
if (mockIframe.onload) {
mockIframe.onload(new Event('load'))
}
expect(toastSpy).toHaveBeenCalled()
expect(removeChildSpy).toHaveBeenCalledWith(mockIframe)
expect(revokeObjectURLSpy).toHaveBeenCalledWith('blob:mock-url')
createElementSpy.mockRestore()
appendChildSpy.mockRestore()
removeChildSpy.mockRestore()
createObjectURLSpy.mockRestore()
revokeObjectURLSpy.mockRestore()
}))
})

View File

@@ -98,6 +98,7 @@ import { PermissionsFormComponent } from '../common/input/permissions/permission
import { SelectComponent } from '../common/input/select/select.component'
import { TagsComponent } from '../common/input/tags/tags.component'
import { TextComponent } from '../common/input/text/text.component'
import { TextAreaComponent } from '../common/input/textarea/textarea.component'
import { UrlComponent } from '../common/input/url/url.component'
import { PageHeaderComponent } from '../common/page-header/page-header.component'
import {
@@ -173,6 +174,7 @@ export enum ZoomSetting {
NgbDropdownModule,
NgxBootstrapIconsModule,
PdfViewerModule,
TextAreaComponent,
],
})
export class DocumentDetailComponent
@@ -291,6 +293,10 @@ export class DocumentDetailComponent
return this.settings.get(SETTINGS_KEYS.USE_NATIVE_PDF_VIEWER)
}
get isMobile(): boolean {
return this.deviceDetectorService.isMobile()
}
get archiveContentRenderType(): ContentRenderType {
return this.document?.archived_file_name
? this.getRenderType('application/pdf')
@@ -1419,6 +1425,44 @@ export class DocumentDetailComponent
})
}
printDocument() {
const printUrl = this.documentsService.getDownloadUrl(
this.document.id,
false
)
this.http
.get(printUrl, { responseType: 'blob' })
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe({
next: (blob) => {
const blobUrl = URL.createObjectURL(blob)
const iframe = document.createElement('iframe')
iframe.style.display = 'none'
iframe.src = blobUrl
document.body.appendChild(iframe)
iframe.onload = () => {
try {
iframe.contentWindow.focus()
iframe.contentWindow.print()
iframe.contentWindow.onafterprint = () => {
document.body.removeChild(iframe)
URL.revokeObjectURL(blobUrl)
}
} catch (err) {
this.toastService.showError($localize`Print failed.`, err)
document.body.removeChild(iframe)
URL.revokeObjectURL(blobUrl)
}
}
},
error: () => {
this.toastService.showError(
$localize`Error loading document for printing.`
)
},
})
}
public openShareLinks() {
const modal = this.modalService.open(ShareLinksDialogComponent)
modal.componentInstance.documentId = this.document.id

View File

@@ -56,6 +56,10 @@
[items]="field.extra_data.select_options" bindLabel="label" [allowNull]="true" [horizontal]="true">
</pngx-input-select>
}
@case (CustomFieldDataType.LongText) {
<pngx-input-textarea formControlName="{{field.id}}" class="w-100" [title]="field.name" [horizontal]="true">
</pngx-input-textarea>
}
}
<button type="button" class="btn btn-outline-danger mb-3" (click)="removeField(field.id)">
<i-bs name="x"></i-bs>

View File

@@ -18,6 +18,7 @@ import { TextComponent } from 'src/app/components/common/input/text/text.compone
import { UrlComponent } from 'src/app/components/common/input/url/url.component'
import { CustomField, CustomFieldDataType } from 'src/app/data/custom-field'
import { DocumentService } from 'src/app/services/rest/document.service'
import { TextAreaComponent } from '../../../common/input/textarea/textarea.component'
@Component({
selector: 'pngx-custom-fields-bulk-edit-dialog',
@@ -35,6 +36,7 @@ import { DocumentService } from 'src/app/services/rest/document.service'
FormsModule,
ReactiveFormsModule,
NgxBootstrapIconsModule,
TextAreaComponent,
],
})
export class CustomFieldsBulkEditDialogComponent {

View File

@@ -114,6 +114,10 @@ export const CUSTOM_FIELD_QUERY_OPERATOR_GROUPS_BY_TYPE = {
CustomFieldQueryOperatorGroups.Exact,
CustomFieldQueryOperatorGroups.Subset,
],
[CustomFieldDataType.LongText]: [
CustomFieldQueryOperatorGroups.Basic,
CustomFieldQueryOperatorGroups.String,
],
}
export const CUSTOM_FIELD_QUERY_VALUE_TYPES_BY_OPERATOR = {

View File

@@ -10,6 +10,7 @@ export enum CustomFieldDataType {
Monetary = 'monetary',
DocumentLink = 'documentlink',
Select = 'select',
LongText = 'longtext',
}
export const DATA_TYPE_LABELS = [
@@ -49,6 +50,10 @@ export const DATA_TYPE_LABELS = [
id: CustomFieldDataType.Select,
name: $localize`Select`,
},
{
id: CustomFieldDataType.LongText,
name: $localize`Long Text`,
},
]
export interface CustomField extends ObjectWithId {

View File

@@ -44,6 +44,8 @@ export interface WorkflowTrigger extends ObjectWithId {
filter_has_document_type?: number // DocumentType.id
filter_has_storage_path?: number // StoragePath.id
schedule_offset_days?: number
schedule_is_recurring?: boolean

View File

@@ -110,6 +110,7 @@ import {
playFill,
plus,
plusCircle,
printer,
questionCircle,
scissors,
search,
@@ -319,6 +320,7 @@ const icons = {
playFill,
plus,
plusCircle,
printer,
questionCircle,
scissors,
search,

View File

@@ -181,6 +181,7 @@ def modify_custom_fields(
defaults[value_field] = value
if (
custom_field.data_type == CustomField.FieldDataType.DOCUMENTLINK
and value
and doc_id in value
):
# Prevent self-linking

View File

@@ -230,6 +230,7 @@ class CustomFieldsFilter(Filter):
| qs.filter(custom_fields__value_monetary__icontains=value)
| qs.filter(custom_fields__value_document_ids__icontains=value)
| qs.filter(custom_fields__value_select__in=option_ids)
| qs.filter(custom_fields__value_long_text__icontains=value)
)
else:
return qs
@@ -314,6 +315,7 @@ class CustomFieldQueryParser:
CustomField.FieldDataType.MONETARY: ("basic", "string", "arithmetic"),
CustomField.FieldDataType.DOCUMENTLINK: ("basic", "containment"),
CustomField.FieldDataType.SELECT: ("basic",),
CustomField.FieldDataType.LONG_TEXT: ("basic", "string"),
}
DATE_COMPONENTS = [
@@ -845,7 +847,10 @@ class DocumentsOrderingFilter(OrderingFilter):
annotation = None
match field.data_type:
case CustomField.FieldDataType.STRING:
case (
CustomField.FieldDataType.STRING
| CustomField.FieldDataType.LONG_TEXT
):
annotation = Subquery(
CustomFieldInstance.objects.filter(
document_id=OuterRef("id"),

View File

@@ -386,6 +386,16 @@ def existing_document_matches_workflow(
)
trigger_matched = False
# Document storage_path vs trigger has_storage_path
if (
trigger.filter_has_storage_path is not None
and document.storage_path != trigger.filter_has_storage_path
):
reason = (
f"Document storage path {document.storage_path} does not match {trigger.filter_has_storage_path}",
)
trigger_matched = False
# Document original_filename vs trigger filename
if (
trigger.filter_filename is not None
@@ -430,6 +440,11 @@ def prefilter_documents_by_workflowtrigger(
document_type=trigger.filter_has_document_type,
)
if trigger.filter_has_storage_path is not None:
documents = documents.filter(
storage_path=trigger.filter_has_storage_path,
)
if trigger.filter_filename is not None and len(trigger.filter_filename) > 0:
# the true fnmatch will actually run later so we just want a loose filter here
regex = fnmatch_translate(trigger.filter_filename).lstrip("^").rstrip("$")

View File

@@ -0,0 +1,35 @@
# Generated by Django 5.2.6 on 2025-09-11 17:29
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1068_alter_document_created"),
]
operations = [
migrations.AddField(
model_name="workflowtrigger",
name="filter_has_storage_path",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.storagepath",
verbose_name="has this storage path",
),
),
migrations.AlterField(
model_name="workflowaction",
name="assign_title",
field=models.TextField(
blank=True,
help_text="Assign a document title, must be a Jinja2 template, see documentation.",
null=True,
verbose_name="assign title",
),
),
]

View File

@@ -0,0 +1,39 @@
# Generated by Django 5.2.6 on 2025-09-13 17:11
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1069_workflowtrigger_filter_has_storage_path_and_more"),
]
operations = [
migrations.AddField(
model_name="customfieldinstance",
name="value_long_text",
field=models.TextField(null=True),
),
migrations.AlterField(
model_name="customfield",
name="data_type",
field=models.CharField(
choices=[
("string", "String"),
("url", "URL"),
("date", "Date"),
("boolean", "Boolean"),
("integer", "Integer"),
("float", "Float"),
("monetary", "Monetary"),
("documentlink", "Document Link"),
("select", "Select"),
("longtext", "Long Text"),
],
editable=False,
max_length=50,
verbose_name="data type",
),
),
]

View File

@@ -759,6 +759,7 @@ class CustomField(models.Model):
MONETARY = ("monetary", _("Monetary"))
DOCUMENTLINK = ("documentlink", _("Document Link"))
SELECT = ("select", _("Select"))
LONG_TEXT = ("longtext", _("Long Text"))
created = models.DateTimeField(
_("created"),
@@ -816,6 +817,7 @@ class CustomFieldInstance(SoftDeleteModel):
CustomField.FieldDataType.MONETARY: "value_monetary",
CustomField.FieldDataType.DOCUMENTLINK: "value_document_ids",
CustomField.FieldDataType.SELECT: "value_select",
CustomField.FieldDataType.LONG_TEXT: "value_long_text",
}
created = models.DateTimeField(
@@ -883,6 +885,8 @@ class CustomFieldInstance(SoftDeleteModel):
value_select = models.CharField(null=True, max_length=16)
value_long_text = models.TextField(null=True)
class Meta:
ordering = ("created",)
verbose_name = _("custom field instance")
@@ -1044,6 +1048,14 @@ class WorkflowTrigger(models.Model):
verbose_name=_("has this correspondent"),
)
filter_has_storage_path = models.ForeignKey(
StoragePath,
null=True,
blank=True,
on_delete=models.SET_NULL,
verbose_name=_("has this storage path"),
)
schedule_offset_days = models.IntegerField(
_("schedule offset days"),
default=0,
@@ -1207,14 +1219,12 @@ class WorkflowAction(models.Model):
default=WorkflowActionType.ASSIGNMENT,
)
assign_title = models.CharField(
assign_title = models.TextField(
_("assign title"),
max_length=256,
null=True,
blank=True,
help_text=_(
"Assign a document title, can include some placeholders, "
"see documentation.",
"Assign a document title, must be a Jinja2 template, see documentation.",
),
)

View File

@@ -2054,6 +2054,7 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer):
"filter_has_tags",
"filter_has_correspondent",
"filter_has_document_type",
"filter_has_storage_path",
"schedule_offset_days",
"schedule_is_recurring",
"schedule_recurring_interval_days",

View File

@@ -0,0 +1,27 @@
from jinja2.sandbox import SandboxedEnvironment
class JinjaEnvironment(SandboxedEnvironment):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.undefined_tracker = None
def is_safe_callable(self, obj):
# Block access to .save() and .delete() methods
if callable(obj) and getattr(obj, "__name__", None) in (
"save",
"delete",
"update",
):
return False
# Call the parent method for other cases
return super().is_safe_callable(obj)
_template_environment = JinjaEnvironment(
trim_blocks=True,
lstrip_blocks=True,
keep_trailing_newline=False,
autoescape=False,
extensions=["jinja2.ext.loopcontrols"],
)

View File

@@ -2,22 +2,16 @@ import logging
import os
import re
from collections.abc import Iterable
from datetime import date
from datetime import datetime
from pathlib import PurePath
import pathvalidate
from babel import Locale
from babel import dates
from django.utils import timezone
from django.utils.dateparse import parse_date
from django.utils.text import slugify as django_slugify
from jinja2 import StrictUndefined
from jinja2 import Template
from jinja2 import TemplateSyntaxError
from jinja2 import UndefinedError
from jinja2 import make_logging_undefined
from jinja2.sandbox import SandboxedEnvironment
from jinja2.sandbox import SecurityError
from documents.models import Correspondent
@@ -27,39 +21,16 @@ from documents.models import Document
from documents.models import DocumentType
from documents.models import StoragePath
from documents.models import Tag
from documents.templating.environment import _template_environment
from documents.templating.filters import format_datetime
from documents.templating.filters import get_cf_value
from documents.templating.filters import localize_date
logger = logging.getLogger("paperless.templating")
_LogStrictUndefined = make_logging_undefined(logger, StrictUndefined)
class FilePathEnvironment(SandboxedEnvironment):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.undefined_tracker = None
def is_safe_callable(self, obj):
# Block access to .save() and .delete() methods
if callable(obj) and getattr(obj, "__name__", None) in (
"save",
"delete",
"update",
):
return False
# Call the parent method for other cases
return super().is_safe_callable(obj)
_template_environment = FilePathEnvironment(
trim_blocks=True,
lstrip_blocks=True,
keep_trailing_newline=False,
autoescape=False,
extensions=["jinja2.ext.loopcontrols"],
undefined=_LogStrictUndefined,
)
class FilePathTemplate(Template):
def render(self, *args, **kwargs) -> str:
def clean_filepath(value: str) -> str:
@@ -81,54 +52,7 @@ class FilePathTemplate(Template):
return clean_filepath(original_render)
def get_cf_value(
custom_field_data: dict[str, dict[str, str]],
name: str,
default: str | None = None,
) -> str | None:
if name in custom_field_data and custom_field_data[name]["value"] is not None:
return custom_field_data[name]["value"]
elif default is not None:
return default
return None
def format_datetime(value: str | datetime, format: str) -> str:
if isinstance(value, str):
value = parse_date(value)
return value.strftime(format=format)
def localize_date(value: date | datetime, format: str, locale: str) -> str:
"""
Format a date or datetime object into a localized string using Babel.
Args:
value (date | datetime): The date or datetime to format. If a datetime
is provided, it should be timezone-aware (e.g., UTC from a Django DB object).
format (str): The format to use. Can be one of Babel's preset formats
('short', 'medium', 'long', 'full') or a custom pattern string.
locale (str): The locale code (e.g., 'en_US', 'fr_FR') to use for
localization.
Returns:
str: The localized, formatted date string.
Raises:
TypeError: If `value` is not a date or datetime instance.
"""
try:
Locale.parse(locale)
except Exception as e:
raise ValueError(f"Invalid locale identifier: {locale}") from e
if isinstance(value, datetime):
return dates.format_datetime(value, format=format, locale=locale)
elif isinstance(value, date):
return dates.format_date(value, format=format, locale=locale)
else:
raise TypeError(f"Unsupported type {type(value)} for localize_date")
_template_environment.undefined = _LogStrictUndefined
_template_environment.filters["get_cf_value"] = get_cf_value
@@ -278,6 +202,7 @@ def get_custom_fields_context(
CustomField.FieldDataType.MONETARY,
CustomField.FieldDataType.STRING,
CustomField.FieldDataType.URL,
CustomField.FieldDataType.LONG_TEXT,
}:
value = pathvalidate.sanitize_filename(
field_instance.value,

View File

@@ -0,0 +1,60 @@
from datetime import date
from datetime import datetime
from babel import Locale
from babel import dates
from django.utils.dateparse import parse_date
from django.utils.dateparse import parse_datetime
def localize_date(value: date | datetime | str, format: str, locale: str) -> str:
"""
Format a date, datetime or str object into a localized string using Babel.
Args:
value (date | datetime | str): The date or datetime to format. If a datetime
is provided, it should be timezone-aware (e.g., UTC from a Django DB object).
if str is provided is is parsed as date.
format (str): The format to use. Can be one of Babel's preset formats
('short', 'medium', 'long', 'full') or a custom pattern string.
locale (str): The locale code (e.g., 'en_US', 'fr_FR') to use for
localization.
Returns:
str: The localized, formatted date string.
Raises:
TypeError: If `value` is not a date, datetime or str instance.
"""
if isinstance(value, str):
value = parse_datetime(value)
try:
Locale.parse(locale)
except Exception as e:
raise ValueError(f"Invalid locale identifier: {locale}") from e
if isinstance(value, datetime):
return dates.format_datetime(value, format=format, locale=locale)
elif isinstance(value, date):
return dates.format_date(value, format=format, locale=locale)
else:
raise TypeError(f"Unsupported type {type(value)} for localize_date")
def format_datetime(value: str | datetime, format: str) -> str:
if isinstance(value, str):
value = parse_date(value)
return value.strftime(format=format)
def get_cf_value(
custom_field_data: dict[str, dict[str, str]],
name: str,
default: str | None = None,
) -> str | None:
if name in custom_field_data and custom_field_data[name]["value"] is not None:
return custom_field_data[name]["value"]
elif default is not None:
return default
return None

View File

@@ -1,7 +1,33 @@
import logging
from datetime import date
from datetime import datetime
from pathlib import Path
from django.utils.text import slugify as django_slugify
from jinja2 import StrictUndefined
from jinja2 import Template
from jinja2 import TemplateSyntaxError
from jinja2 import UndefinedError
from jinja2 import make_logging_undefined
from jinja2.sandbox import SecurityError
from documents.templating.environment import _template_environment
from documents.templating.filters import format_datetime
from documents.templating.filters import localize_date
logger = logging.getLogger("paperless.templating")
_LogStrictUndefined = make_logging_undefined(logger, StrictUndefined)
_template_environment.undefined = _LogStrictUndefined
_template_environment.filters["datetime"] = format_datetime
_template_environment.filters["slugify"] = django_slugify
_template_environment.filters["localize_date"] = localize_date
def parse_w_workflow_placeholders(
text: str,
@@ -20,6 +46,7 @@ def parse_w_workflow_placeholders(
e.g. for pre-consumption triggers created will not have been parsed yet, but it will
for added / updated triggers
"""
formatting = {
"correspondent": correspondent_name,
"document_type": doc_type_name,
@@ -52,4 +79,28 @@ def parse_w_workflow_placeholders(
formatting.update({"doc_title": doc_title})
if doc_url is not None:
formatting.update({"doc_url": doc_url})
return text.format(**formatting).strip()
logger.debug(f"Jinja Template is : {text}")
try:
template = _template_environment.from_string(
text,
template_class=Template,
)
rendered_template = template.render(formatting)
# We're good!
return rendered_template
except UndefinedError as e:
# The undefined class logs this already for us
raise e
except TemplateSyntaxError as e:
logger.warning(f"Template syntax error in title generation: {e}")
except SecurityError as e:
logger.warning(f"Template attempted restricted operation: {e}")
except Exception as e:
logger.warning(f"Unknown error in title generation: {e}")
logger.warning(
f"Invalid title format '{text}', workflow not applied: {e}",
)
raise e
return None

View File

@@ -186,6 +186,7 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
"filter_has_tags": [self.t1.id],
"filter_has_document_type": self.dt.id,
"filter_has_correspondent": self.c.id,
"filter_has_storage_path": self.sp.id,
},
],
"actions": [

View File

@@ -304,22 +304,6 @@ class TestConsumer(
self.assertEqual(document.title, "Override Title")
self._assert_first_last_send_progress()
def testOverrideTitleInvalidPlaceholders(self):
with self.assertLogs("paperless.consumer", level="ERROR") as cm:
with self.get_consumer(
self.get_test_file(),
DocumentMetadataOverrides(title="Override {correspondent]"),
) as consumer:
consumer.run()
document = Document.objects.first()
self.assertIsNotNone(document)
self.assertEqual(document.title, "sample")
expected_str = "Error occurred parsing title override 'Override {correspondent]', falling back to original"
self.assertIn(expected_str, cm.output[0])
def testOverrideCorrespondent(self):
c = Correspondent.objects.create(name="test")
@@ -437,7 +421,7 @@ class TestConsumer(
DocumentMetadataOverrides(
correspondent_id=c.pk,
document_type_id=dt.pk,
title="{correspondent}{document_type} {added_month}-{added_year_short}",
title="{{correspondent}}{{document_type}} {{added_month}}-{{added_year_short}}",
),
) as consumer:
consumer.run()

View File

@@ -18,14 +18,17 @@ class TestDocument(TestCase):
self.originals_dir = tempfile.mkdtemp()
self.thumb_dir = tempfile.mkdtemp()
override_settings(
self.overrides = override_settings(
ORIGINALS_DIR=self.originals_dir,
THUMBNAIL_DIR=self.thumb_dir,
).enable()
)
self.overrides.enable()
def tearDown(self) -> None:
shutil.rmtree(self.originals_dir)
shutil.rmtree(self.thumb_dir)
self.overrides.disable()
def test_file_deletion(self):
document = Document.objects.create(

View File

@@ -23,7 +23,6 @@ from documents.models import Document
from documents.models import DocumentType
from documents.models import StoragePath
from documents.tasks import empty_trash
from documents.templating.filepath import localize_date
from documents.tests.factories import DocumentFactory
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
@@ -1591,166 +1590,13 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
)
class TestDateLocalization:
class TestPathDateLocalization:
"""
Groups all tests related to the `localize_date` function.
"""
TEST_DATE = datetime.date(2023, 10, 26)
TEST_DATETIME = datetime.datetime(
2023,
10,
26,
14,
30,
5,
tzinfo=datetime.timezone.utc,
)
@pytest.mark.parametrize(
"value, format_style, locale_str, expected_output",
[
pytest.param(
TEST_DATE,
"EEEE, MMM d, yyyy",
"en_US",
"Thursday, Oct 26, 2023",
id="date-en_US-custom",
),
pytest.param(
TEST_DATE,
"dd.MM.yyyy",
"de_DE",
"26.10.2023",
id="date-de_DE-custom",
),
# German weekday and month name translation
pytest.param(
TEST_DATE,
"EEEE",
"de_DE",
"Donnerstag",
id="weekday-de_DE",
),
pytest.param(
TEST_DATE,
"MMMM",
"de_DE",
"Oktober",
id="month-de_DE",
),
# French weekday and month name translation
pytest.param(
TEST_DATE,
"EEEE",
"fr_FR",
"jeudi",
id="weekday-fr_FR",
),
pytest.param(
TEST_DATE,
"MMMM",
"fr_FR",
"octobre",
id="month-fr_FR",
),
],
)
def test_localize_date_with_date_objects(
self,
value: datetime.date,
format_style: str,
locale_str: str,
expected_output: str,
):
"""
Tests `localize_date` with `date` objects across different locales and formats.
"""
assert localize_date(value, format_style, locale_str) == expected_output
@pytest.mark.parametrize(
"value, format_style, locale_str, expected_output",
[
pytest.param(
TEST_DATETIME,
"yyyy.MM.dd G 'at' HH:mm:ss zzz",
"en_US",
"2023.10.26 AD at 14:30:05 UTC",
id="datetime-en_US-custom",
),
pytest.param(
TEST_DATETIME,
"dd.MM.yyyy",
"fr_FR",
"26.10.2023",
id="date-fr_FR-custom",
),
# Spanish weekday and month translation
pytest.param(
TEST_DATETIME,
"EEEE",
"es_ES",
"jueves",
id="weekday-es_ES",
),
pytest.param(
TEST_DATETIME,
"MMMM",
"es_ES",
"octubre",
id="month-es_ES",
),
# Italian weekday and month translation
pytest.param(
TEST_DATETIME,
"EEEE",
"it_IT",
"giovedì",
id="weekday-it_IT",
),
pytest.param(
TEST_DATETIME,
"MMMM",
"it_IT",
"ottobre",
id="month-it_IT",
),
],
)
def test_localize_date_with_datetime_objects(
self,
value: datetime.datetime,
format_style: str,
locale_str: str,
expected_output: str,
):
# To handle the non-breaking space in French and other locales
result = localize_date(value, format_style, locale_str)
assert result.replace("\u202f", " ") == expected_output.replace("\u202f", " ")
@pytest.mark.parametrize(
"invalid_value",
[
"2023-10-26",
1698330605,
None,
[],
{},
],
)
def test_localize_date_raises_type_error_for_invalid_input(self, invalid_value):
with pytest.raises(TypeError) as excinfo:
localize_date(invalid_value, "medium", "en_US")
assert f"Unsupported type {type(invalid_value)}" in str(excinfo.value)
def test_localize_date_raises_error_for_invalid_locale(self):
with pytest.raises(ValueError) as excinfo:
localize_date(self.TEST_DATE, "medium", "invalid_locale_code")
assert "Invalid locale identifier" in str(excinfo.value)
@pytest.mark.django_db
@pytest.mark.parametrize(
"filename_format,expected_filename",

View File

@@ -0,0 +1,296 @@
import datetime
from typing import Any
from typing import Literal
import pytest
from documents.templating.filters import localize_date
class TestDateLocalization:
"""
Groups all tests related to the `localize_date` function.
"""
TEST_DATE = datetime.date(2023, 10, 26)
TEST_DATETIME = datetime.datetime(
2023,
10,
26,
14,
30,
5,
tzinfo=datetime.timezone.utc,
)
TEST_DATETIME_STRING: str = "2023-10-26T14:30:05+00:00"
TEST_DATE_STRING: str = "2023-10-26"
@pytest.mark.parametrize(
"value, format_style, locale_str, expected_output",
[
pytest.param(
TEST_DATE,
"EEEE, MMM d, yyyy",
"en_US",
"Thursday, Oct 26, 2023",
id="date-en_US-custom",
),
pytest.param(
TEST_DATE,
"dd.MM.yyyy",
"de_DE",
"26.10.2023",
id="date-de_DE-custom",
),
# German weekday and month name translation
pytest.param(
TEST_DATE,
"EEEE",
"de_DE",
"Donnerstag",
id="weekday-de_DE",
),
pytest.param(
TEST_DATE,
"MMMM",
"de_DE",
"Oktober",
id="month-de_DE",
),
# French weekday and month name translation
pytest.param(
TEST_DATE,
"EEEE",
"fr_FR",
"jeudi",
id="weekday-fr_FR",
),
pytest.param(
TEST_DATE,
"MMMM",
"fr_FR",
"octobre",
id="month-fr_FR",
),
],
)
def test_localize_date_with_date_objects(
self,
value: datetime.date,
format_style: str,
locale_str: str,
expected_output: str,
):
"""
Tests `localize_date` with `date` objects across different locales and formats.
"""
assert localize_date(value, format_style, locale_str) == expected_output
@pytest.mark.parametrize(
"value, format_style, locale_str, expected_output",
[
pytest.param(
TEST_DATETIME,
"yyyy.MM.dd G 'at' HH:mm:ss zzz",
"en_US",
"2023.10.26 AD at 14:30:05 UTC",
id="datetime-en_US-custom",
),
pytest.param(
TEST_DATETIME,
"dd.MM.yyyy",
"fr_FR",
"26.10.2023",
id="date-fr_FR-custom",
),
# Spanish weekday and month translation
pytest.param(
TEST_DATETIME,
"EEEE",
"es_ES",
"jueves",
id="weekday-es_ES",
),
pytest.param(
TEST_DATETIME,
"MMMM",
"es_ES",
"octubre",
id="month-es_ES",
),
# Italian weekday and month translation
pytest.param(
TEST_DATETIME,
"EEEE",
"it_IT",
"giovedì",
id="weekday-it_IT",
),
pytest.param(
TEST_DATETIME,
"MMMM",
"it_IT",
"ottobre",
id="month-it_IT",
),
],
)
def test_localize_date_with_datetime_objects(
self,
value: datetime.datetime,
format_style: str,
locale_str: str,
expected_output: str,
):
# To handle the non-breaking space in French and other locales
result = localize_date(value, format_style, locale_str)
assert result.replace("\u202f", " ") == expected_output.replace("\u202f", " ")
@pytest.mark.parametrize(
"invalid_value",
[
1698330605,
None,
[],
{},
],
)
def test_localize_date_raises_type_error_for_invalid_input(
self,
invalid_value: None | list[object] | dict[Any, Any] | Literal[1698330605],
):
with pytest.raises(TypeError) as excinfo:
localize_date(invalid_value, "medium", "en_US")
assert f"Unsupported type {type(invalid_value)}" in str(excinfo.value)
def test_localize_date_raises_error_for_invalid_locale(self):
with pytest.raises(ValueError) as excinfo:
localize_date(self.TEST_DATE, "medium", "invalid_locale_code")
assert "Invalid locale identifier" in str(excinfo.value)
@pytest.mark.parametrize(
"value, format_style, locale_str, expected_output",
[
pytest.param(
TEST_DATETIME_STRING,
"EEEE, MMM d, yyyy",
"en_US",
"Thursday, Oct 26, 2023",
id="date-en_US-custom",
),
pytest.param(
TEST_DATETIME_STRING,
"dd.MM.yyyy",
"de_DE",
"26.10.2023",
id="date-de_DE-custom",
),
# German weekday and month name translation
pytest.param(
TEST_DATETIME_STRING,
"EEEE",
"de_DE",
"Donnerstag",
id="weekday-de_DE",
),
pytest.param(
TEST_DATETIME_STRING,
"MMMM",
"de_DE",
"Oktober",
id="month-de_DE",
),
# French weekday and month name translation
pytest.param(
TEST_DATETIME_STRING,
"EEEE",
"fr_FR",
"jeudi",
id="weekday-fr_FR",
),
pytest.param(
TEST_DATETIME_STRING,
"MMMM",
"fr_FR",
"octobre",
id="month-fr_FR",
),
],
)
def test_localize_date_with_datetime_string(
self,
value: str,
format_style: str,
locale_str: str,
expected_output: str,
):
"""
Tests `localize_date` with `date` string across different locales and formats.
"""
assert localize_date(value, format_style, locale_str) == expected_output
@pytest.mark.parametrize(
"value, format_style, locale_str, expected_output",
[
pytest.param(
TEST_DATE_STRING,
"EEEE, MMM d, yyyy",
"en_US",
"Thursday, Oct 26, 2023",
id="date-en_US-custom",
),
pytest.param(
TEST_DATE_STRING,
"dd.MM.yyyy",
"de_DE",
"26.10.2023",
id="date-de_DE-custom",
),
# German weekday and month name translation
pytest.param(
TEST_DATE_STRING,
"EEEE",
"de_DE",
"Donnerstag",
id="weekday-de_DE",
),
pytest.param(
TEST_DATE_STRING,
"MMMM",
"de_DE",
"Oktober",
id="month-de_DE",
),
# French weekday and month name translation
pytest.param(
TEST_DATE_STRING,
"EEEE",
"fr_FR",
"jeudi",
id="weekday-fr_FR",
),
pytest.param(
TEST_DATE_STRING,
"MMMM",
"fr_FR",
"octobre",
id="month-fr_FR",
),
],
)
def test_localize_date_with_date_string(
self,
value: str,
format_style: str,
locale_str: str,
expected_output: str,
):
"""
Tests `localize_date` with `date` string across different locales and formats.
"""
assert localize_date(value, format_style, locale_str) == expected_output

View File

@@ -97,12 +97,6 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
class TestDecryptDocuments(FileSystemAssertsMixin, TestCase):
@override_settings(
ORIGINALS_DIR=(Path(__file__).parent / "samples" / "originals"),
THUMBNAIL_DIR=(Path(__file__).parent / "samples" / "thumb"),
PASSPHRASE="test",
FILENAME_FORMAT=None,
)
@mock.patch("documents.management.commands.decrypt_documents.input")
def test_decrypt(self, m):
media_dir = tempfile.mkdtemp()
@@ -111,55 +105,55 @@ class TestDecryptDocuments(FileSystemAssertsMixin, TestCase):
originals_dir.mkdir(parents=True, exist_ok=True)
thumb_dir.mkdir(parents=True, exist_ok=True)
override_settings(
with override_settings(
ORIGINALS_DIR=originals_dir,
THUMBNAIL_DIR=thumb_dir,
PASSPHRASE="test",
).enable()
FILENAME_FORMAT=None,
):
doc = Document.objects.create(
checksum="82186aaa94f0b98697d704b90fd1c072",
title="wow",
filename="0000004.pdf.gpg",
mime_type="application/pdf",
storage_type=Document.STORAGE_TYPE_GPG,
)
doc = Document.objects.create(
checksum="82186aaa94f0b98697d704b90fd1c072",
title="wow",
filename="0000004.pdf.gpg",
mime_type="application/pdf",
storage_type=Document.STORAGE_TYPE_GPG,
)
shutil.copy(
(
Path(__file__).parent
/ "samples"
/ "documents"
/ "originals"
/ "0000004.pdf.gpg"
),
originals_dir / "0000004.pdf.gpg",
)
shutil.copy(
(
Path(__file__).parent
/ "samples"
/ "documents"
/ "thumbnails"
/ "0000004.webp.gpg"
),
thumb_dir / f"{doc.id:07}.webp.gpg",
)
shutil.copy(
(
Path(__file__).parent
/ "samples"
/ "documents"
/ "originals"
/ "0000004.pdf.gpg"
),
originals_dir / "0000004.pdf.gpg",
)
shutil.copy(
(
Path(__file__).parent
/ "samples"
/ "documents"
/ "thumbnails"
/ "0000004.webp.gpg"
),
thumb_dir / f"{doc.id:07}.webp.gpg",
)
call_command("decrypt_documents")
call_command("decrypt_documents")
doc.refresh_from_db()
doc.refresh_from_db()
self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
self.assertEqual(doc.filename, "0000004.pdf")
self.assertIsFile(Path(originals_dir) / "0000004.pdf")
self.assertIsFile(doc.source_path)
self.assertIsFile(Path(thumb_dir) / f"{doc.id:07}.webp")
self.assertIsFile(doc.thumbnail_path)
self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
self.assertEqual(doc.filename, "0000004.pdf")
self.assertIsFile(Path(originals_dir) / "0000004.pdf")
self.assertIsFile(doc.source_path)
self.assertIsFile(Path(thumb_dir) / f"{doc.id:07}.webp")
self.assertIsFile(doc.thumbnail_path)
with doc.source_file as f:
checksum: str = hashlib.md5(f.read()).hexdigest()
self.assertEqual(checksum, doc.checksum)
with doc.source_file as f:
checksum: str = hashlib.md5(f.read()).hexdigest()
self.assertEqual(checksum, doc.checksum)
class TestMakeIndex(TestCase):

View File

@@ -1,6 +1,8 @@
import datetime
import shutil
import socket
from datetime import timedelta
from pathlib import Path
from typing import TYPE_CHECKING
from unittest import mock
@@ -15,6 +17,7 @@ from guardian.shortcuts import get_users_with_perms
from httpx import HTTPError
from httpx import HTTPStatusError
from pytest_httpx import HTTPXMock
from rest_framework.test import APIClient
from rest_framework.test import APITestCase
from documents.signals.handlers import run_workflows
@@ -22,7 +25,7 @@ from documents.signals.handlers import send_webhook
if TYPE_CHECKING:
from django.db.models import QuerySet
from pytest_django.fixtures import SettingsWrapper
from documents import tasks
from documents.data_models import ConsumableDocument
@@ -122,7 +125,7 @@ class TestWorkflows(
filter_path=f"*/{self.dirs.scratch_dir.parts[-1]}/*",
)
action = WorkflowAction.objects.create(
assign_title="Doc from {correspondent}",
assign_title="Doc from {{correspondent}}",
assign_correspondent=self.c,
assign_document_type=self.dt,
assign_storage_path=self.sp,
@@ -241,7 +244,7 @@ class TestWorkflows(
)
action = WorkflowAction.objects.create(
assign_title="Doc from {correspondent}",
assign_title="Doc from {{correspondent}}",
assign_correspondent=self.c,
assign_document_type=self.dt,
assign_storage_path=self.sp,
@@ -892,7 +895,7 @@ class TestWorkflows(
filter_filename="*sample*",
)
action = WorkflowAction.objects.create(
assign_title="Doc created in {created_year}",
assign_title="Doc created in {{created_year}}",
assign_correspondent=self.c2,
assign_document_type=self.dt,
assign_storage_path=self.sp,
@@ -1147,6 +1150,38 @@ class TestWorkflows(
expected_str = f"Document correspondent {doc.correspondent} does not match {trigger.filter_has_correspondent}"
self.assertIn(expected_str, cm.output[1])
def test_document_added_no_match_storage_path(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
filter_has_storage_path=self.sp,
)
action = WorkflowAction.objects.create(
assign_title="Doc assign owner",
assign_owner=self.user2,
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
doc = Document.objects.create(
title="sample test",
original_filename="sample.pdf",
)
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
document_consumption_finished.send(
sender=self.__class__,
document=doc,
)
expected_str = f"Document did not match {w}"
self.assertIn(expected_str, cm.output[0])
expected_str = f"Document storage path {doc.storage_path} does not match {trigger.filter_has_storage_path}"
self.assertIn(expected_str, cm.output[1])
def test_document_added_invalid_title_placeholders(self):
"""
GIVEN:
@@ -1155,7 +1190,7 @@ class TestWorkflows(
WHEN:
- File that matches is added
THEN:
- Title is not updated, error is output
- Title is updated but the placeholder isn't replaced
"""
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
@@ -1181,15 +1216,12 @@ class TestWorkflows(
created=created,
)
with self.assertLogs("paperless.handlers", level="ERROR") as cm:
document_consumption_finished.send(
sender=self.__class__,
document=doc,
)
expected_str = f"Error occurred parsing title assignment '{action.assign_title}', falling back to original"
self.assertIn(expected_str, cm.output[0])
document_consumption_finished.send(
sender=self.__class__,
document=doc,
)
self.assertEqual(doc.title, "sample test")
self.assertEqual(doc.title, "Doc {created_year]")
def test_document_updated_workflow(self):
trigger = WorkflowTrigger.objects.create(
@@ -1223,6 +1255,45 @@ class TestWorkflows(
self.assertEqual(doc.custom_fields.all().count(), 1)
def test_document_consumption_workflow_month_placeholder_addded(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
sources=f"{DocumentSource.ApiUpload}",
filter_filename="simple*",
)
action = WorkflowAction.objects.create(
assign_title="Doc added in {{added_month_name_short}}",
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
superuser = User.objects.create_superuser("superuser")
self.client.force_authenticate(user=superuser)
test_file = shutil.copy(
self.SAMPLE_DIR / "simple.pdf",
self.dirs.scratch_dir / "simple.pdf",
)
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ApiUpload,
original_file=test_file,
),
None,
)
document = Document.objects.first()
self.assertRegex(
document.title,
r"Doc added in \w{3,}",
) # Match any 3-letter month name
def test_document_updated_workflow_existing_custom_field(self):
"""
GIVEN:
@@ -1777,6 +1848,7 @@ class TestWorkflows(
filter_filename="*sample*",
filter_has_document_type=self.dt,
filter_has_correspondent=self.c,
filter_has_storage_path=self.sp,
)
trigger.filter_has_tags.set([self.t1])
trigger.save()
@@ -1797,6 +1869,7 @@ class TestWorkflows(
title=f"sample test {i}",
checksum=f"checksum{i}",
correspondent=self.c,
storage_path=self.sp,
original_filename=f"sample_{i}.pdf",
document_type=self.dt if i % 2 == 0 else None,
)
@@ -2035,7 +2108,7 @@ class TestWorkflows(
filter_filename="*simple*",
)
action = WorkflowAction.objects.create(
assign_title="Doc from {correspondent}",
assign_title="Doc from {{correspondent}}",
assign_correspondent=self.c,
assign_document_type=self.dt,
assign_storage_path=self.sp,
@@ -2614,7 +2687,7 @@ class TestWorkflows(
)
webhook_action = WorkflowActionWebhook.objects.create(
use_params=False,
body="Test message: {doc_url}",
body="Test message: {{doc_url}}",
url="http://paperless-ngx.com",
include_document=False,
)
@@ -2673,7 +2746,7 @@ class TestWorkflows(
)
webhook_action = WorkflowActionWebhook.objects.create(
use_params=False,
body="Test message: {doc_url}",
body="Test message: {{doc_url}}",
url="http://paperless-ngx.com",
include_document=True,
)
@@ -3130,3 +3203,238 @@ class TestWebhookSecurity:
req = httpx_mock.get_request()
assert req.headers["Host"] == "paperless-ngx.com"
assert "evil.test" not in req.headers.get("Host", "")
@pytest.mark.django_db
class TestDateWorkflowLocalization(
SampleDirMixin,
):
"""Test cases for workflows that use date localization in templates."""
TEST_DATETIME = datetime.datetime(
2023,
6,
26,
14,
30,
5,
tzinfo=datetime.timezone.utc,
)
@pytest.mark.parametrize(
"title_template,expected_title",
[
pytest.param(
"Created at {{ created | localize_date('MMMM', 'es_ES') }}",
"Created at junio",
id="spanish_month",
),
pytest.param(
"Created at {{ created | localize_date('MMMM', 'de_DE') }}",
"Created at Juni", # codespell:ignore
id="german_month",
),
pytest.param(
"Created at {{ created | localize_date('dd/MM/yyyy', 'en_GB') }}",
"Created at 26/06/2023",
id="british_date_format",
),
],
)
def test_document_added_workflow_localization(
self,
title_template: str,
expected_title: str,
):
"""
GIVEN:
- Document added workflow with title template using localize_date filter
WHEN:
- Document is consumed
THEN:
- Document title is set with localized date
"""
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
filter_filename="*sample*",
)
action = WorkflowAction.objects.create(
assign_title=title_template,
)
workflow = Workflow.objects.create(
name="Workflow 1",
order=0,
)
workflow.triggers.add(trigger)
workflow.actions.add(action)
workflow.save()
doc = Document.objects.create(
title="sample test",
correspondent=None,
original_filename="sample.pdf",
created=self.TEST_DATETIME,
)
document_consumption_finished.send(
sender=self.__class__,
document=doc,
)
doc.refresh_from_db()
assert doc.title == expected_title
@pytest.mark.parametrize(
"title_template,expected_title",
[
pytest.param(
"Created at {{ created | localize_date('MMMM', 'es_ES') }}",
"Created at junio",
id="spanish_month",
),
pytest.param(
"Created at {{ created | localize_date('MMMM', 'de_DE') }}",
"Created at Juni", # codespell:ignore
id="german_month",
),
pytest.param(
"Created at {{ created | localize_date('dd/MM/yyyy', 'en_GB') }}",
"Created at 26/06/2023",
id="british_date_format",
),
],
)
def test_document_updated_workflow_localization(
self,
title_template: str,
expected_title: str,
):
"""
GIVEN:
- Document updated workflow with title template using localize_date filter
WHEN:
- Document is updated via API
THEN:
- Document title is set with localized date
"""
# Setup test data
dt = DocumentType.objects.create(name="DocType Name")
c = Correspondent.objects.create(name="Correspondent Name")
client = APIClient()
superuser = User.objects.create_superuser("superuser")
client.force_authenticate(user=superuser)
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
filter_has_document_type=dt,
)
doc = Document.objects.create(
title="sample test",
correspondent=c,
original_filename="sample.pdf",
created=self.TEST_DATETIME,
)
action = WorkflowAction.objects.create(
assign_title=title_template,
)
workflow = Workflow.objects.create(
name="Workflow 1",
order=0,
)
workflow.triggers.add(trigger)
workflow.actions.add(action)
workflow.save()
client.patch(
f"/api/documents/{doc.id}/",
{"document_type": dt.id},
format="json",
)
doc.refresh_from_db()
assert doc.title == expected_title
@pytest.mark.parametrize(
"title_template,expected_title",
[
pytest.param(
"Added at {{ added | localize_date('MMMM', 'es_ES') }}",
"Added at junio",
id="spanish_month",
),
pytest.param(
"Added at {{ added | localize_date('MMMM', 'de_DE') }}",
"Added at Juni", # codespell:ignore
id="german_month",
),
pytest.param(
"Added at {{ added | localize_date('dd/MM/yyyy', 'en_GB') }}",
"Added at 26/06/2023",
id="british_date_format",
),
],
)
def test_document_consumption_workflow_localization(
self,
tmp_path: Path,
settings: SettingsWrapper,
title_template: str,
expected_title: str,
):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
sources=f"{DocumentSource.ApiUpload}",
filter_filename="simple*",
)
test_file = shutil.copy(
self.SAMPLE_DIR / "simple.pdf",
tmp_path / "simple.pdf",
)
action = WorkflowAction.objects.create(
assign_title=title_template,
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
(tmp_path / "scratch").mkdir(parents=True, exist_ok=True)
(tmp_path / "thumbnails").mkdir(parents=True, exist_ok=True)
# Temporarily override "now" for the environment so templates using
# added/created placeholders behave as if it's a different system date.
with (
mock.patch(
"documents.tasks.ProgressManager",
DummyProgressManager,
),
mock.patch(
"django.utils.timezone.now",
return_value=self.TEST_DATETIME,
),
override_settings(
SCRATCH_DIR=tmp_path / "scratch",
THUMBNAIL_DIR=tmp_path / "thumbnails",
),
):
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ApiUpload,
original_file=test_file,
),
None,
)
document = Document.objects.first()
assert document.title == expected_title

View File

@@ -2,7 +2,7 @@ msgid ""
msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-09-09 20:04+0000\n"
"POT-Creation-Date: 2025-09-14 03:21+0000\n"
"PO-Revision-Date: 2022-02-17 04:17\n"
"Last-Translator: \n"
"Language-Team: English\n"
@@ -21,39 +21,39 @@ msgstr ""
msgid "Documents"
msgstr ""
#: documents/filters.py:384
#: documents/filters.py:386
msgid "Value must be valid JSON."
msgstr ""
#: documents/filters.py:403
#: documents/filters.py:405
msgid "Invalid custom field query expression"
msgstr ""
#: documents/filters.py:413
#: documents/filters.py:415
msgid "Invalid expression list. Must be nonempty."
msgstr ""
#: documents/filters.py:434
#: documents/filters.py:436
msgid "Invalid logical operator {op!r}"
msgstr ""
#: documents/filters.py:448
#: documents/filters.py:450
msgid "Maximum number of query conditions exceeded."
msgstr ""
#: documents/filters.py:513
#: documents/filters.py:515
msgid "{name!r} is not a valid custom field."
msgstr ""
#: documents/filters.py:550
#: documents/filters.py:552
msgid "{data_type} does not support query expr {expr!r}."
msgstr ""
#: documents/filters.py:658
#: documents/filters.py:660
msgid "Maximum nesting depth exceeded."
msgstr ""
#: documents/filters.py:843
#: documents/filters.py:845
msgid "Custom field not found"
msgstr ""
@@ -61,27 +61,27 @@ msgstr ""
msgid "owner"
msgstr ""
#: documents/models.py:53 documents/models.py:946
#: documents/models.py:53 documents/models.py:950
msgid "None"
msgstr ""
#: documents/models.py:54 documents/models.py:947
#: documents/models.py:54 documents/models.py:951
msgid "Any word"
msgstr ""
#: documents/models.py:55 documents/models.py:948
#: documents/models.py:55 documents/models.py:952
msgid "All words"
msgstr ""
#: documents/models.py:56 documents/models.py:949
#: documents/models.py:56 documents/models.py:953
msgid "Exact match"
msgstr ""
#: documents/models.py:57 documents/models.py:950
#: documents/models.py:57 documents/models.py:954
msgid "Regular expression"
msgstr ""
#: documents/models.py:58 documents/models.py:951
#: documents/models.py:58 documents/models.py:955
msgid "Fuzzy word"
msgstr ""
@@ -89,20 +89,20 @@ msgstr ""
msgid "Automatic"
msgstr ""
#: documents/models.py:62 documents/models.py:423 documents/models.py:1441
#: documents/models.py:62 documents/models.py:423 documents/models.py:1451
#: paperless_mail/models.py:23 paperless_mail/models.py:143
msgid "name"
msgstr ""
#: documents/models.py:64 documents/models.py:1015
#: documents/models.py:64 documents/models.py:1019
msgid "match"
msgstr ""
#: documents/models.py:67 documents/models.py:1018
#: documents/models.py:67 documents/models.py:1022
msgid "matching algorithm"
msgstr ""
#: documents/models.py:72 documents/models.py:1023
#: documents/models.py:72 documents/models.py:1027
msgid "is insensitive"
msgstr ""
@@ -207,7 +207,7 @@ msgid "The number of pages of the document."
msgstr ""
#: documents/models.py:217 documents/models.py:655 documents/models.py:693
#: documents/models.py:764 documents/models.py:822
#: documents/models.py:765 documents/models.py:824
msgid "created"
msgstr ""
@@ -256,7 +256,7 @@ msgid "The position of this document in your physical document archive."
msgstr ""
#: documents/models.py:294 documents/models.py:666 documents/models.py:720
#: documents/models.py:1484
#: documents/models.py:1494
msgid "document"
msgstr ""
@@ -280,11 +280,11 @@ msgstr ""
msgid "Title"
msgstr ""
#: documents/models.py:410 documents/models.py:967
#: documents/models.py:410 documents/models.py:971
msgid "Created"
msgstr ""
#: documents/models.py:411 documents/models.py:966
#: documents/models.py:411 documents/models.py:970
msgid "Added"
msgstr ""
@@ -752,427 +752,434 @@ msgstr ""
msgid "Select"
msgstr ""
#: documents/models.py:773
#: documents/models.py:762
msgid "Long Text"
msgstr ""
#: documents/models.py:774
msgid "data type"
msgstr ""
#: documents/models.py:780
#: documents/models.py:781
msgid "extra data"
msgstr ""
#: documents/models.py:784
#: documents/models.py:785
msgid "Extra data for the custom field, such as select options"
msgstr ""
#: documents/models.py:790
#: documents/models.py:791
msgid "custom field"
msgstr ""
#: documents/models.py:791
#: documents/models.py:792
msgid "custom fields"
msgstr ""
#: documents/models.py:888
#: documents/models.py:892
msgid "custom field instance"
msgstr ""
#: documents/models.py:889
#: documents/models.py:893
msgid "custom field instances"
msgstr ""
#: documents/models.py:954
#: documents/models.py:958
msgid "Consumption Started"
msgstr ""
#: documents/models.py:955
#: documents/models.py:959
msgid "Document Added"
msgstr ""
#: documents/models.py:956
#: documents/models.py:960
msgid "Document Updated"
msgstr ""
#: documents/models.py:957
#: documents/models.py:961
msgid "Scheduled"
msgstr ""
#: documents/models.py:960
#: documents/models.py:964
msgid "Consume Folder"
msgstr ""
#: documents/models.py:961
#: documents/models.py:965
msgid "Api Upload"
msgstr ""
#: documents/models.py:962
#: documents/models.py:966
msgid "Mail Fetch"
msgstr ""
#: documents/models.py:963
#: documents/models.py:967
msgid "Web UI"
msgstr ""
#: documents/models.py:968
#: documents/models.py:972
msgid "Modified"
msgstr ""
#: documents/models.py:969
#: documents/models.py:973
msgid "Custom Field"
msgstr ""
#: documents/models.py:972
#: documents/models.py:976
msgid "Workflow Trigger Type"
msgstr ""
#: documents/models.py:984
#: documents/models.py:988
msgid "filter path"
msgstr ""
#: documents/models.py:989
#: documents/models.py:993
msgid ""
"Only consume documents with a path that matches this if specified. Wildcards "
"specified as * are allowed. Case insensitive."
msgstr ""
#: documents/models.py:996
#: documents/models.py:1000
msgid "filter filename"
msgstr ""
#: documents/models.py:1001 paperless_mail/models.py:200
#: documents/models.py:1005 paperless_mail/models.py:200
msgid ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
msgstr ""
#: documents/models.py:1012
#: documents/models.py:1016
msgid "filter documents from this mail rule"
msgstr ""
#: documents/models.py:1028
#: documents/models.py:1032
msgid "has these tag(s)"
msgstr ""
#: documents/models.py:1036
#: documents/models.py:1040
msgid "has this document type"
msgstr ""
#: documents/models.py:1044
#: documents/models.py:1048
msgid "has this correspondent"
msgstr ""
#: documents/models.py:1048
#: documents/models.py:1056
msgid "has this storage path"
msgstr ""
#: documents/models.py:1060
msgid "schedule offset days"
msgstr ""
#: documents/models.py:1051
#: documents/models.py:1063
msgid "The number of days to offset the schedule trigger by."
msgstr ""
#: documents/models.py:1056
#: documents/models.py:1068
msgid "schedule is recurring"
msgstr ""
#: documents/models.py:1059
#: documents/models.py:1071
msgid "If the schedule should be recurring."
msgstr ""
#: documents/models.py:1064
#: documents/models.py:1076
msgid "schedule recurring delay in days"
msgstr ""
#: documents/models.py:1068
#: documents/models.py:1080
msgid "The number of days between recurring schedule triggers."
msgstr ""
#: documents/models.py:1073
#: documents/models.py:1085
msgid "schedule date field"
msgstr ""
#: documents/models.py:1078
#: documents/models.py:1090
msgid "The field to check for a schedule trigger."
msgstr ""
#: documents/models.py:1087
#: documents/models.py:1099
msgid "schedule date custom field"
msgstr ""
#: documents/models.py:1091
#: documents/models.py:1103
msgid "workflow trigger"
msgstr ""
#: documents/models.py:1092
#: documents/models.py:1104
msgid "workflow triggers"
msgstr ""
#: documents/models.py:1100
#: documents/models.py:1112
msgid "email subject"
msgstr ""
#: documents/models.py:1104
#: documents/models.py:1116
msgid ""
"The subject of the email, can include some placeholders, see documentation."
msgstr ""
#: documents/models.py:1110
#: documents/models.py:1122
msgid "email body"
msgstr ""
#: documents/models.py:1113
#: documents/models.py:1125
msgid ""
"The body (message) of the email, can include some placeholders, see "
"documentation."
msgstr ""
#: documents/models.py:1119
#: documents/models.py:1131
msgid "emails to"
msgstr ""
#: documents/models.py:1122
#: documents/models.py:1134
msgid "The destination email addresses, comma separated."
msgstr ""
#: documents/models.py:1128
#: documents/models.py:1140
msgid "include document in email"
msgstr ""
#: documents/models.py:1139
#: documents/models.py:1151
msgid "webhook url"
msgstr ""
#: documents/models.py:1142
#: documents/models.py:1154
msgid "The destination URL for the notification."
msgstr ""
#: documents/models.py:1147
#: documents/models.py:1159
msgid "use parameters"
msgstr ""
#: documents/models.py:1152
#: documents/models.py:1164
msgid "send as JSON"
msgstr ""
#: documents/models.py:1156
#: documents/models.py:1168
msgid "webhook parameters"
msgstr ""
#: documents/models.py:1159
#: documents/models.py:1171
msgid "The parameters to send with the webhook URL if body not used."
msgstr ""
#: documents/models.py:1163
#: documents/models.py:1175
msgid "webhook body"
msgstr ""
#: documents/models.py:1166
#: documents/models.py:1178
msgid "The body to send with the webhook URL if parameters not used."
msgstr ""
#: documents/models.py:1170
#: documents/models.py:1182
msgid "webhook headers"
msgstr ""
#: documents/models.py:1173
#: documents/models.py:1185
msgid "The headers to send with the webhook URL."
msgstr ""
#: documents/models.py:1178
#: documents/models.py:1190
msgid "include document in webhook"
msgstr ""
#: documents/models.py:1189
#: documents/models.py:1201
msgid "Assignment"
msgstr ""
#: documents/models.py:1193
#: documents/models.py:1205
msgid "Removal"
msgstr ""
#: documents/models.py:1197 documents/templates/account/password_reset.html:15
#: documents/models.py:1209 documents/templates/account/password_reset.html:15
msgid "Email"
msgstr ""
#: documents/models.py:1201
#: documents/models.py:1213
msgid "Webhook"
msgstr ""
#: documents/models.py:1205
#: documents/models.py:1217
msgid "Workflow Action Type"
msgstr ""
#: documents/models.py:1211
#: documents/models.py:1223
msgid "assign title"
msgstr ""
#: documents/models.py:1216
msgid ""
"Assign a document title, can include some placeholders, see documentation."
#: documents/models.py:1227
msgid "Assign a document title, must be a Jinja2 template, see documentation."
msgstr ""
#: documents/models.py:1225 paperless_mail/models.py:274
#: documents/models.py:1235 paperless_mail/models.py:274
msgid "assign this tag"
msgstr ""
#: documents/models.py:1234 paperless_mail/models.py:282
#: documents/models.py:1244 paperless_mail/models.py:282
msgid "assign this document type"
msgstr ""
#: documents/models.py:1243 paperless_mail/models.py:296
#: documents/models.py:1253 paperless_mail/models.py:296
msgid "assign this correspondent"
msgstr ""
#: documents/models.py:1252
#: documents/models.py:1262
msgid "assign this storage path"
msgstr ""
#: documents/models.py:1261
#: documents/models.py:1271
msgid "assign this owner"
msgstr ""
#: documents/models.py:1268
#: documents/models.py:1278
msgid "grant view permissions to these users"
msgstr ""
#: documents/models.py:1275
#: documents/models.py:1285
msgid "grant view permissions to these groups"
msgstr ""
#: documents/models.py:1282
#: documents/models.py:1292
msgid "grant change permissions to these users"
msgstr ""
#: documents/models.py:1289
#: documents/models.py:1299
msgid "grant change permissions to these groups"
msgstr ""
#: documents/models.py:1296
#: documents/models.py:1306
msgid "assign these custom fields"
msgstr ""
#: documents/models.py:1300
#: documents/models.py:1310
msgid "custom field values"
msgstr ""
#: documents/models.py:1304
#: documents/models.py:1314
msgid "Optional values to assign to the custom fields."
msgstr ""
#: documents/models.py:1313
#: documents/models.py:1323
msgid "remove these tag(s)"
msgstr ""
#: documents/models.py:1318
#: documents/models.py:1328
msgid "remove all tags"
msgstr ""
#: documents/models.py:1325
#: documents/models.py:1335
msgid "remove these document type(s)"
msgstr ""
#: documents/models.py:1330
#: documents/models.py:1340
msgid "remove all document types"
msgstr ""
#: documents/models.py:1337
#: documents/models.py:1347
msgid "remove these correspondent(s)"
msgstr ""
#: documents/models.py:1342
#: documents/models.py:1352
msgid "remove all correspondents"
msgstr ""
#: documents/models.py:1349
#: documents/models.py:1359
msgid "remove these storage path(s)"
msgstr ""
#: documents/models.py:1354
#: documents/models.py:1364
msgid "remove all storage paths"
msgstr ""
#: documents/models.py:1361
#: documents/models.py:1371
msgid "remove these owner(s)"
msgstr ""
#: documents/models.py:1366
#: documents/models.py:1376
msgid "remove all owners"
msgstr ""
#: documents/models.py:1373
#: documents/models.py:1383
msgid "remove view permissions for these users"
msgstr ""
#: documents/models.py:1380
#: documents/models.py:1390
msgid "remove view permissions for these groups"
msgstr ""
#: documents/models.py:1387
#: documents/models.py:1397
msgid "remove change permissions for these users"
msgstr ""
#: documents/models.py:1394
#: documents/models.py:1404
msgid "remove change permissions for these groups"
msgstr ""
#: documents/models.py:1399
#: documents/models.py:1409
msgid "remove all permissions"
msgstr ""
#: documents/models.py:1406
#: documents/models.py:1416
msgid "remove these custom fields"
msgstr ""
#: documents/models.py:1411
#: documents/models.py:1421
msgid "remove all custom fields"
msgstr ""
#: documents/models.py:1420
#: documents/models.py:1430
msgid "email"
msgstr ""
#: documents/models.py:1429
#: documents/models.py:1439
msgid "webhook"
msgstr ""
#: documents/models.py:1433
#: documents/models.py:1443
msgid "workflow action"
msgstr ""
#: documents/models.py:1434
#: documents/models.py:1444
msgid "workflow actions"
msgstr ""
#: documents/models.py:1443 paperless_mail/models.py:145
#: documents/models.py:1453 paperless_mail/models.py:145
msgid "order"
msgstr ""
#: documents/models.py:1449
#: documents/models.py:1459
msgid "triggers"
msgstr ""
#: documents/models.py:1456
#: documents/models.py:1466
msgid "actions"
msgstr ""
#: documents/models.py:1459 paperless_mail/models.py:154
#: documents/models.py:1469 paperless_mail/models.py:154
msgid "enabled"
msgstr ""
#: documents/models.py:1470
#: documents/models.py:1480
msgid "workflow"
msgstr ""
#: documents/models.py:1474
#: documents/models.py:1484
msgid "workflow trigger type"
msgstr ""
#: documents/models.py:1488
#: documents/models.py:1498
msgid "date run"
msgstr ""
#: documents/models.py:1494
#: documents/models.py:1504
msgid "workflow run"
msgstr ""
#: documents/models.py:1495
#: documents/models.py:1505
msgid "workflow runs"
msgstr ""

View File

@@ -322,6 +322,7 @@ INSTALLED_APPS = [
"paperless_tesseract.apps.PaperlessTesseractConfig",
"paperless_text.apps.PaperlessTextConfig",
"paperless_mail.apps.PaperlessMailConfig",
"paperless_remote.apps.PaperlessRemoteParserConfig",
"django.contrib.admin",
"rest_framework",
"rest_framework.authtoken",
@@ -1388,3 +1389,10 @@ WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
"PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
"true",
)
###############################################################################
# Remote Parser #
###############################################################################
REMOTE_OCR_ENGINE = os.getenv("PAPERLESS_REMOTE_OCR_ENGINE")
REMOTE_OCR_API_KEY = os.getenv("PAPERLESS_REMOTE_OCR_API_KEY")
REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT")

View File

@@ -0,0 +1,4 @@
# this is here so that django finds the checks.
from paperless_remote.checks import check_remote_parser_configured
__all__ = ["check_remote_parser_configured"]

View File

@@ -0,0 +1,14 @@
from django.apps import AppConfig
from paperless_remote.signals import remote_consumer_declaration
class PaperlessRemoteParserConfig(AppConfig):
name = "paperless_remote"
def ready(self):
from documents.signals import document_consumer_declaration
document_consumer_declaration.connect(remote_consumer_declaration)
AppConfig.ready(self)

View File

@@ -0,0 +1,17 @@
from django.conf import settings
from django.core.checks import Error
from django.core.checks import register
@register()
def check_remote_parser_configured(app_configs, **kwargs):
if settings.REMOTE_OCR_ENGINE == "azureai" and not (
settings.REMOTE_OCR_ENDPOINT and settings.REMOTE_OCR_API_KEY
):
return [
Error(
"Azure AI remote parser requires endpoint and API key to be configured.",
),
]
return []

View File

@@ -0,0 +1,114 @@
from pathlib import Path
from django.conf import settings
from paperless_tesseract.parsers import RasterisedDocumentParser
class RemoteEngineConfig:
def __init__(
self,
engine: str,
api_key: str | None = None,
endpoint: str | None = None,
):
self.engine = engine
self.api_key = api_key
self.endpoint = endpoint
def engine_is_valid(self):
valid = self.engine in ["azureai"] and self.api_key is not None
if self.engine == "azureai":
valid = valid and self.endpoint is not None
return valid
class RemoteDocumentParser(RasterisedDocumentParser):
"""
This parser uses a remote OCR engine to parse documents. Currently, it supports Azure AI Vision
as this is the only service that provides a remote OCR API with text-embedded PDF output.
"""
logging_name = "paperless.parsing.remote"
def get_settings(self) -> RemoteEngineConfig:
"""
Returns the configuration for the remote OCR engine, loaded from Django settings.
"""
return RemoteEngineConfig(
engine=settings.REMOTE_OCR_ENGINE,
api_key=settings.REMOTE_OCR_API_KEY,
endpoint=settings.REMOTE_OCR_ENDPOINT,
)
def supported_mime_types(self):
if self.settings.engine_is_valid():
return {
"application/pdf": ".pdf",
"image/png": ".png",
"image/jpeg": ".jpg",
"image/tiff": ".tiff",
"image/bmp": ".bmp",
"image/gif": ".gif",
"image/webp": ".webp",
}
else:
return {}
def azure_ai_vision_parse(
self,
file: Path,
) -> str | None:
"""
Uses Azure AI Vision to parse the document and return the text content.
It requests a searchable PDF output with embedded text.
The PDF is saved to the archive_path attribute.
Returns the text content extracted from the document.
If the parsing fails, it returns None.
"""
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
from azure.ai.documentintelligence.models import AnalyzeOutputOption
from azure.ai.documentintelligence.models import DocumentContentFormat
from azure.core.credentials import AzureKeyCredential
client = DocumentIntelligenceClient(
endpoint=self.settings.endpoint,
credential=AzureKeyCredential(self.settings.api_key),
)
with file.open("rb") as f:
analyze_request = AnalyzeDocumentRequest(bytes_source=f.read())
poller = client.begin_analyze_document(
model_id="prebuilt-read",
body=analyze_request,
output_content_format=DocumentContentFormat.TEXT,
output=[AnalyzeOutputOption.PDF], # request searchable PDF output
content_type="application/json",
)
poller.wait()
result_id = poller.details["operation_id"]
result = poller.result()
# Download the PDF with embedded text
self.archive_path = self.tempdir / "archive.pdf"
with self.archive_path.open("wb") as f:
for chunk in client.get_analyze_result_pdf(
model_id="prebuilt-read",
result_id=result_id,
):
f.write(chunk)
client.close()
return result.content
def parse(self, document_path: Path, mime_type, file_name=None):
if not self.settings.engine_is_valid():
self.log.warning(
"No valid remote parser engine is configured, content will be empty.",
)
self.text = ""
return
elif self.settings.engine == "azureai":
self.text = self.azure_ai_vision_parse(document_path)

View File

@@ -0,0 +1,18 @@
def get_parser(*args, **kwargs):
from paperless_remote.parsers import RemoteDocumentParser
return RemoteDocumentParser(*args, **kwargs)
def get_supported_mime_types():
from paperless_remote.parsers import RemoteDocumentParser
return RemoteDocumentParser(None).supported_mime_types()
def remote_consumer_declaration(sender, **kwargs):
return {
"parser": get_parser,
"weight": 5,
"mime_types": get_supported_mime_types(),
}

View File

Binary file not shown.

View File

@@ -0,0 +1,30 @@
from unittest import TestCase
from django.test import override_settings
from paperless_remote import check_remote_parser_configured
class TestChecks(TestCase):
@override_settings(REMOTE_OCR_ENGINE=None)
def test_no_engine(self):
msgs = check_remote_parser_configured(None)
self.assertEqual(len(msgs), 0)
@override_settings(REMOTE_OCR_ENGINE="azureai")
@override_settings(REMOTE_OCR_API_KEY="somekey")
@override_settings(REMOTE_OCR_ENDPOINT=None)
def test_azure_no_endpoint(self):
msgs = check_remote_parser_configured(None)
self.assertEqual(len(msgs), 1)
self.assertTrue(
msgs[0].msg.startswith(
"Azure AI remote parser requires endpoint and API key to be configured.",
),
)
@override_settings(REMOTE_OCR_ENGINE="something")
@override_settings(REMOTE_OCR_API_KEY="somekey")
def test_valid_configuration(self):
msgs = check_remote_parser_configured(None)
self.assertEqual(len(msgs), 0)

View File

@@ -0,0 +1,101 @@
import uuid
from pathlib import Path
from unittest import mock
from django.test import TestCase
from django.test import override_settings
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from paperless_remote.parsers import RemoteDocumentParser
from paperless_remote.signals import get_parser
class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
SAMPLE_FILES = Path(__file__).resolve().parent / "samples"
def assertContainsStrings(self, content: str, strings: list[str]):
# Asserts that all strings appear in content, in the given order.
indices = []
for s in strings:
if s in content:
indices.append(content.index(s))
else:
self.fail(f"'{s}' is not in '{content}'")
self.assertListEqual(indices, sorted(indices))
@mock.patch("paperless_tesseract.parsers.run_subprocess")
@mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
def test_get_text_with_azure(self, mock_client_cls, mock_subprocess):
# Arrange mock Azure client
mock_client = mock.Mock()
mock_client_cls.return_value = mock_client
# Simulate poller result and its `.details`
mock_poller = mock.Mock()
mock_poller.wait.return_value = None
mock_poller.details = {"operation_id": "fake-op-id"}
mock_client.begin_analyze_document.return_value = mock_poller
mock_poller.result.return_value.content = "This is a test document."
# Return dummy PDF bytes
mock_client.get_analyze_result_pdf.return_value = [
b"%PDF-",
b"1.7 ",
b"FAKEPDF",
]
# Simulate pdftotext by writing dummy text to sidecar file
def fake_run(cmd, *args, **kwargs):
with Path(cmd[-1]).open("w", encoding="utf-8") as f:
f.write("This is a test document.")
mock_subprocess.side_effect = fake_run
with override_settings(
REMOTE_OCR_ENGINE="azureai",
REMOTE_OCR_API_KEY="somekey",
REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
):
parser = get_parser(uuid.uuid4())
parser.parse(
self.SAMPLE_FILES / "simple-digital.pdf",
"application/pdf",
)
self.assertContainsStrings(
parser.text.strip(),
["This is a test document."],
)
@override_settings(
REMOTE_OCR_ENGINE="azureai",
REMOTE_OCR_API_KEY="key",
REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
)
def test_supported_mime_types_valid_config(self):
parser = RemoteDocumentParser(uuid.uuid4())
expected_types = {
"application/pdf": ".pdf",
"image/png": ".png",
"image/jpeg": ".jpg",
"image/tiff": ".tiff",
"image/bmp": ".bmp",
"image/gif": ".gif",
"image/webp": ".webp",
}
self.assertEqual(parser.supported_mime_types(), expected_types)
def test_supported_mime_types_invalid_config(self):
parser = get_parser(uuid.uuid4())
self.assertEqual(parser.supported_mime_types(), {})
@override_settings(
REMOTE_OCR_ENGINE=None,
REMOTE_OCR_API_KEY=None,
REMOTE_OCR_ENDPOINT=None,
)
def test_parse_with_invalid_config(self):
parser = get_parser(uuid.uuid4())
parser.parse(self.SAMPLE_FILES / "simple-digital.pdf", "application/pdf")
self.assertEqual(parser.text, "")

39
uv.lock generated
View File

@@ -95,6 +95,34 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/af/cc/55a32a2c98022d88812b5986d2a92c4ff3ee087e83b712ebc703bba452bf/Automat-24.8.1-py3-none-any.whl", hash = "sha256:bf029a7bc3da1e2c24da2343e7598affaa9f10bf0ab63ff808566ce90551e02a", size = 42585, upload-time = "2024-08-19T17:31:56.729Z" },
]
[[package]]
name = "azure-ai-documentintelligence"
version = "1.0.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "azure-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "isodate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/44/7b/8115cd713e2caa5e44def85f2b7ebd02a74ae74d7113ba20bdd41fd6dd80/azure_ai_documentintelligence-1.0.2.tar.gz", hash = "sha256:4d75a2513f2839365ebabc0e0e1772f5601b3a8c9a71e75da12440da13b63484", size = 170940 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d9/75/c9ec040f23082f54ffb1977ff8f364c2d21c79a640a13d1c1809e7fd6b1a/azure_ai_documentintelligence-1.0.2-py3-none-any.whl", hash = "sha256:e1fb446abbdeccc9759d897898a0fe13141ed29f9ad11fc705f951925822ed59", size = 106005 },
]
[[package]]
name = "azure-core"
version = "1.33.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/75/aa/7c9db8edd626f1a7d99d09ef7926f6f4fb34d5f9fa00dc394afdfe8e2a80/azure_core-1.33.0.tar.gz", hash = "sha256:f367aa07b5e3005fec2c1e184b882b0b039910733907d001c20fb08ebb8c0eb9", size = 295633 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/07/b7/76b7e144aa53bd206bf1ce34fa75350472c3f69bf30e5c8c18bc9881035d/azure_core-1.33.0-py3-none-any.whl", hash = "sha256:9b5b6d0223a1d38c37500e6971118c1e0f13f54951e6893968b38910bc9cda8f", size = 207071 },
]
[[package]]
name = "babel"
version = "2.17.0"
@@ -1403,6 +1431,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c7/fc/4e5a141c3f7c7bed550ac1f69e599e92b6be449dd4677ec09f325cad0955/inotifyrecursive-0.3.5-py3-none-any.whl", hash = "sha256:7e5f4a2e1dc2bef0efa3b5f6b339c41fb4599055a2b54909d020e9e932cc8d2f", size = 8009, upload-time = "2020-11-20T12:38:46.981Z" },
]
[[package]]
name = "isodate"
version = "0.7.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 },
]
[[package]]
name = "jinja2"
version = "3.1.6"
@@ -2023,6 +2060,7 @@ name = "paperless-ngx"
version = "2.18.4"
source = { virtual = "." }
dependencies = [
{ name = "azure-ai-documentintelligence", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "babel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "bleach", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "celery", extra = ["redis"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2159,6 +2197,7 @@ typing = [
[package.metadata]
requires-dist = [
{ name = "azure-ai-documentintelligence", specifier = ">=1.0.2" },
{ name = "babel", specifier = ">=2.17" },
{ name = "bleach", specifier = "~=6.2.0" },
{ name = "celery", extras = ["redis"], specifier = "~=5.5.1" },