Merge branch 'dev' into feature-remote-ocr-2

Wrap in try/catch
2025-11-21 04:36:53 -06:00 · 2025-11-19 23:49:11 -08:00 · 2025-11-18 12:08:38 -08:00 · 2025-11-18 12:07:16 -08:00 · 2025-11-17 20:54:37 -08:00 · 2025-11-17 18:49:01 -08:00
18 changed files with 454 additions and 327 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -1,6 +1,6 @@
 # syntax=docker/dockerfile:1

-FROM --platform=$BUILDPLATFORM docker.io/node:20-trixie-slim as main-app
+FROM --platform=$BUILDPLATFORM docker.io/node:20-bookworm-slim as main-app

 ARG DEBIAN_FRONTEND=noninteractive

@@ -9,6 +9,8 @@ ARG TARGETARCH

 # Can be workflow provided, defaults set for manual building
 ARG JBIG2ENC_VERSION=0.29
+ARG QPDF_VERSION=11.9.0
+ARG GS_VERSION=10.03.1

 # Set Python environment variables
 ENV PYTHONDONTWRITEBYTECODE=1 \
@@ -86,10 +88,32 @@ COPY --from=ghcr.io/astral-sh/uv:0.7.8 /uv /bin/uv

 RUN set -eux \
  && echo "Installing pre-built updates" \
+    && echo "Installing qpdf ${QPDF_VERSION}" \
+      && curl --fail --silent --show-error --location \
+        --output libqpdf29_${QPDF_VERSION}-1_${TARGETARCH}.deb \
+        https://github.com/paperless-ngx/builder/releases/download/qpdf-${QPDF_VERSION}/libqpdf29_${QPDF_VERSION}-1_${TARGETARCH}.deb \
+      && curl --fail --silent --show-error --location \
+        --output qpdf_${QPDF_VERSION}-1_${TARGETARCH}.deb \
+        https://github.com/paperless-ngx/builder/releases/download/qpdf-${QPDF_VERSION}/qpdf_${QPDF_VERSION}-1_${TARGETARCH}.deb \
+      && dpkg --install ./libqpdf29_${QPDF_VERSION}-1_${TARGETARCH}.deb \
+      && dpkg --install ./qpdf_${QPDF_VERSION}-1_${TARGETARCH}.deb \
+    && echo "Installing Ghostscript ${GS_VERSION}" \
+      && curl --fail --silent --show-error --location \
+          --output libgs10_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
+          https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/libgs10_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
+      && curl --fail --silent --show-error --location \
+          --output ghostscript_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
+          https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/ghostscript_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
+      && curl --fail --silent --show-error --location \
+          --output libgs10-common_${GS_VERSION}.dfsg-1_all.deb \
+          https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/libgs10-common_${GS_VERSION}.dfsg-1_all.deb \
+        && dpkg --install ./libgs10-common_${GS_VERSION}.dfsg-1_all.deb \
+        && dpkg --install ./libgs10_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
+        && dpkg --install ./ghostscript_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
    && echo "Installing jbig2enc" \
      && curl --fail --silent --show-error --location \
        --output jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \
-        https://github.com/paperless-ngx/builder/releases/download/jbig2enc-trixie-v${JBIG2ENC_VERSION}/jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \
+        https://github.com/paperless-ngx/builder/releases/download/jbig2enc-${JBIG2ENC_VERSION}/jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \
      && dpkg --install ./jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb

 # setup docker-specific things
--- a/22
+++ b/22
@@ -5,7 +5,7 @@
 # Purpose: Compiles the frontend
 # Notes:
 #  - Does PNPM stuff with Typescript and such
-FROM --platform=$BUILDPLATFORM docker.io/node:20-trixie-slim AS compile-frontend
+FROM --platform=$BUILDPLATFORM docker.io/node:20-bookworm-slim AS compile-frontend

 COPY ./src-ui /src/src-ui

@@ -32,7 +32,7 @@ RUN set -eux \
 # Purpose: Installs s6-overlay and rootfs
 # Comments:
 #  - Don't leave anything extra in here either
-FROM ghcr.io/astral-sh/uv:0.9.10-python3.12-trixie-slim AS s6-overlay-base
+FROM ghcr.io/astral-sh/uv:0.9.10-python3.12-bookworm-slim AS s6-overlay-base

 WORKDIR /usr/src/s6

@@ -102,6 +102,8 @@ ARG TARGETARCH

 # Can be workflow provided, defaults set for manual building
 ARG JBIG2ENC_VERSION=0.30
+ARG QPDF_VERSION=11.9.0
+ARG GS_VERSION=10.03.1

 # Set Python environment variables
 ENV PYTHONDONTWRITEBYTECODE=1 \
@@ -168,8 +170,20 @@ RUN set -eux \
    && apt-get update \
    && apt-get install --yes --quiet --no-install-recommends ${RUNTIME_PACKAGES} \
    && echo "Installing pre-built updates" \
-      && curl --fail --silent --no-progress-meter --show-error --location --remote-name-all \
-        https://github.com/paperless-ngx/builder/releases/download/jbig2enc-trixie-v${JBIG2ENC_VERSION}/jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \
+      && curl --fail --silent --no-progress-meter --show-error --location --remote-name-all --parallel --parallel-max 4 \
+        https://github.com/paperless-ngx/builder/releases/download/qpdf-${QPDF_VERSION}/libqpdf29_${QPDF_VERSION}-1_${TARGETARCH}.deb \
+        https://github.com/paperless-ngx/builder/releases/download/qpdf-${QPDF_VERSION}/qpdf_${QPDF_VERSION}-1_${TARGETARCH}.deb \
+        https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/libgs10_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
+        https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/ghostscript_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
+        https://github.com/paperless-ngx/builder/releases/download/ghostscript-${GS_VERSION}/libgs10-common_${GS_VERSION}.dfsg-1_all.deb \
+        https://github.com/paperless-ngx/builder/releases/download/jbig2enc-${JBIG2ENC_VERSION}/jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \
+      && echo "Installing qpdf ${QPDF_VERSION}" \
+        && dpkg --install ./libqpdf29_${QPDF_VERSION}-1_${TARGETARCH}.deb \
+        && dpkg --install ./qpdf_${QPDF_VERSION}-1_${TARGETARCH}.deb \
+      && echo "Installing Ghostscript ${GS_VERSION}" \
+        && dpkg --install ./libgs10-common_${GS_VERSION}.dfsg-1_all.deb \
+        && dpkg --install ./libgs10_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
+        && dpkg --install ./ghostscript_${GS_VERSION}.dfsg-1_${TARGETARCH}.deb \
      && echo "Installing jbig2enc" \
        && dpkg --install ./jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \
      && echo "Configuring imagemagick" \
--- a/dev.txt
+++ b/dev.txt
@@ -1,319 +0,0 @@
-adduser 3.134
-apt 2.6.1
-base-files 12.4+deb12u11
-base-passwd 3.6.1
-bash 5.2.15-2+b8
-bsdutils 1:2.38.1-5+deb12u3
-ca-certificates 20230311+deb12u1
-coreutils 9.1-1
-curl 7.88.1-10+deb12u12
-dash 0.5.12-2
-debconf 1.5.82
-debian-archive-keyring 2023.3+deb12u2
-debianutils 5.7-0.5~deb12u1
-diffutils 1:3.8-4
-dirmngr 2.2.40-1.1
-dpkg 1.21.22
-e2fsprogs 1.47.0-2
-file 1:5.44-3
-findutils 4.9.0-4
-fontconfig 2.14.1-4
-fontconfig-config 2.14.1-4
-fonts-liberation 1:1.07.4-11
-fonts-urw-base35 20200910-7
-gcc-12-base 12.2.0-14+deb12u1
-gettext 0.21-12
-gettext-base 0.21-12
-ghostscript 10.03.1~dfsg-1
-gnupg 2.2.40-1.1
-gnupg-l10n 2.2.40-1.1
-gnupg-utils 2.2.40-1.1
-gosu 1.14-1+b10
-gpg 2.2.40-1.1
-gpg-agent 2.2.40-1.1
-gpg-wks-client 2.2.40-1.1
-gpg-wks-server 2.2.40-1.1
-gpgconf 2.2.40-1.1
-gpgsm 2.2.40-1.1
-gpgv 2.2.40-1.1
-grep 3.8-5
-gzip 1.12-1
-hicolor-icon-theme 0.17-2
-hostname 3.23+nmu1
-icc-profiles-free 2.0.1+dfsg-1.1
-imagemagick 8:6.9.11.60+dfsg-1.6+deb12u3
-imagemagick-6-common 8:6.9.11.60+dfsg-1.6+deb12u3
-imagemagick-6.q16 8:6.9.11.60+dfsg-1.6+deb12u3
-init-system-helpers 1.65.2
-jbig2dec 0.19-3
-jbig2enc 0.30-1
-libacl1 2.3.1-3
-libaom3 3.6.0-1+deb12u1
-libapt-pkg6.0 2.6.1
-libarchive13 3.6.2-1+deb12u2
-libassuan0 2.5.5-5
-libattr1 1:2.5.1-4
-libaudit-common 1:3.0.9-1
-libaudit1 1:3.0.9-1
-libavahi-client3 0.8-10+deb12u1
-libavahi-common-data 0.8-10+deb12u1
-libavahi-common3 0.8-10+deb12u1
-libavcodec59 7:5.1.6-0+deb12u1
-libavformat59 7:5.1.6-0+deb12u1
-libavutil57 7:5.1.6-0+deb12u1
-libblkid1 2.38.1-5+deb12u3
-libbluray2 1:1.3.4-1
-libbrotli1 1.0.9-2+b6
-libbsd0 0.11.7-2
-libbz2-1.0 1.0.8-5+b1
-libc-bin 2.36-9+deb12u10
-libc6 2.36-9+deb12u10
-libcairo-gobject2 1.16.0-7
-libcairo2 1.16.0-7
-libcap-ng0 0.8.3-1+b3
-libcap2 1:2.66-4+deb12u1
-libchromaprint1 1.5.1-2+b1
-libcjson1 1.7.15-1+deb12u2
-libcodec2-1.0 1.0.5-1
-libcom-err2 1.47.0-2
-libconfig-inifiles-perl 3.000003-2
-libcrypt1 1:4.4.33-2
-libcups2 2.4.2-3+deb12u8
-libcurl4 7.88.1-10+deb12u12
-libdatrie1 0.2.13-2+b1
-libdav1d6 1.0.0-2+deb12u1
-libdb5.3 5.3.28+dfsg2-1
-libdbus-1-3 1.14.10-1~deb12u1
-libde265-0 1.0.11-1+deb12u2
-libdebconfclient0 0.270
-libdeflate0 1.14-1
-libdrm-common 2.4.114-1
-libdrm2 2.4.114-1+b1
-libedit2 3.1-20221030-2
-libexpat1 2.5.0-1+deb12u1
-libext2fs2 1.47.0-2
-libffi8 3.4.4-1
-libfftw3-double3 3.3.10-1
-libfontconfig1 2.14.1-4
-libfontenc1 1:1.1.4-1
-libfreetype6 2.12.1+dfsg-5+deb12u4
-libfribidi0 1.0.8-2.1
-libgcc-s1 12.2.0-14+deb12u1
-libgcrypt20 1.10.1-3
-libgdbm-compat4 1.23-3
-libgdbm6 1.23-3
-libgdk-pixbuf-2.0-0 2.42.10+dfsg-1+deb12u2
-libgdk-pixbuf2.0-common 2.42.10+dfsg-1+deb12u2
-libgif7 5.2.1-2.5
-libglib2.0-0 2.74.6-2+deb12u6
-libgme0 0.6.3-6
-libgmp10 2:6.2.1+dfsg1-1.1
-libgnutls30 3.7.9-2+deb12u5
-libgomp1 12.2.0-14+deb12u1
-libgpg-error0 1.46-1
-libgraphite2-3 1.3.14-1
-libgs-common 10.0.0~dfsg-11+deb12u7
-libgs10 10.03.1~dfsg-1
-libgs10-common 10.03.1~dfsg-1
-libgsm1 1.0.22-1
-libgssapi-krb5-2 1.20.1-2+deb12u3
-libharfbuzz0b 6.0.0+dfsg-3
-libheif1 1.15.1-1+deb12u1
-libhogweed6 3.8.1-2
-libhwy1 1.0.3-3+deb12u1
-libice6 2:1.0.10-1
-libicu72 72.1-3+deb12u1
-libidn12 1.41-1
-libidn2-0 2.3.3-1+b1
-libijs-0.35 0.35-15
-libimagequant0 2.17.0-1
-libjbig0 2.1-6.1
-libjbig2dec0 0.19-3
-libjpeg62-turbo 1:2.1.5-2
-libjxl0.7 0.7.0-10+deb12u1
-libk5crypto3 1.20.1-2+deb12u3
-libkeyutils1 1.6.3-2
-libkrb5-3 1.20.1-2+deb12u3
-libkrb5support0 1.20.1-2+deb12u3
-libksba8 1.6.3-2
-liblcms2-2 2.14-2
-libldap-2.5-0 2.5.13+dfsg-5
-liblept5 1.82.0-3+b3
-liblerc4 4.0.0+ds-2
-liblqr-1-0 0.4.2-2.1
-libltdl7 2.4.7-7~deb12u1
-liblz4-1 1.9.4-1
-liblzma5 5.4.1-1
-libmagic-mgc 1:5.44-3
-libmagic1 1:5.44-3
-libmagickcore-6.q16-6 8:6.9.11.60+dfsg-1.6+deb12u3
-libmagickwand-6.q16-6 8:6.9.11.60+dfsg-1.6+deb12u3
-libmariadb3 1:10.11.11-0+deb12u1
-libmbedcrypto7 2.28.3-1
-libmd0 1.0.4-2
-libmfx1 22.5.4-1
-libmount1 2.38.1-5+deb12u3
-libmp3lame0 3.100-6
-libmpg123-0 1.31.2-1+deb12u1
-libncurses6 6.4-4
-libncursesw6 6.4-4
-libnettle8 3.8.1-2
-libnghttp2-14 1.52.0-1+deb12u2
-libnorm1 1.5.9+dfsg-2
-libnpth0 1.6-3
-libnsl2 1.3.0-2
-libnspr4 2:4.35-1
-libnss3 2:3.87.1-1+deb12u1
-libnuma1 2.0.16-1
-libogg0 1.3.5-3
-libopenjp2-7 2.5.0-2+deb12u1
-libopenmpt0 0.6.9-1
-libopus0 1.3.1-3
-libp11-kit0 0.24.1-2
-libpam-modules 1.5.2-6+deb12u1
-libpam-modules-bin 1.5.2-6+deb12u1
-libpam-runtime 1.5.2-6+deb12u1
-libpam0g 1.5.2-6+deb12u1
-libpango-1.0-0 1.50.12+ds-1
-libpangocairo-1.0-0 1.50.12+ds-1
-libpangoft2-1.0-0 1.50.12+ds-1
-libpaper1 1.1.29
-libpcre2-8-0 10.42-1
-libperl5.36 5.36.0-7+deb12u2
-libpgm-5.3-0 5.3.128~dfsg-2
-libpixman-1-0 0.42.2-1
-libpng16-16 1.6.39-2
-libpoppler126 22.12.0-2+deb12u1
-libpq5 15.13-0+deb12u1
-libpsl5 0.21.2-1
-libqpdf29 11.9.0-1
-librabbitmq4 0.11.0-1+deb12u1
-librav1e0 0.5.1-6
-libreadline8 8.2-1.3
-librist4 0.2.7+dfsg-1
-librsvg2-2 2.54.7+dfsg-1~deb12u1
-librtmp1 2.4+20151223.gitfa8646d.1-2+b2
-libsasl2-2 2.1.28+dfsg-10
-libsasl2-modules-db 2.1.28+dfsg-10
-libseccomp2 2.5.4-1+deb12u1
-libselinux1 3.4-1+b6
-libsemanage-common 3.4-1
-libsemanage2 3.4-1+b5
-libsepol2 3.4-2.1
-libshine3 3.1.1-2
-libsm6 2:1.2.3-1
-libsmartcols1 2.38.1-5+deb12u3
-libsnappy1v5 1.1.9-3
-libsodium23 1.0.18-1
-libsoxr0 0.1.3-4
-libspeex1 1.2.1-2
-libsqlite3-0 3.40.1-2+deb12u1
-libsrt1.5-gnutls 1.5.1-1+deb12u1
-libss2 1.47.0-2
-libssh-gcrypt-4 0.10.6-0+deb12u1
-libssh2-1 1.10.0-3+b1
-libssl3 3.0.17-1~deb12u1
-libstdc++6 12.2.0-14+deb12u1
-libsvtav1enc1 1.4.1+dfsg-1
-libswresample4 7:5.1.6-0+deb12u1
-libsystemd0 252.38-1~deb12u1
-libtasn1-6 4.19.0-2+deb12u1
-libtesseract5 5.3.0-2
-libthai-data 0.1.29-1
-libthai0 0.1.29-1
-libtheora0 1.1.1+dfsg.1-16.1+b1
-libtiff6 4.5.0-6+deb12u2
-libtinfo6 6.4-4
-libtirpc-common 1.3.3+ds-1
-libtirpc3 1.3.3+ds-1
-libtwolame0 0.4.0-2
-libudev1 252.38-1~deb12u1
-libudfread0 1.1.2-1
-libunistring2 1.0-2
-libuuid1 2.38.1-5+deb12u3
-libv4l-0 1.22.1-5+b2
-libv4lconvert0 1.22.1-5+b2
-libva-drm2 2.17.0-1
-libva-x11-2 2.17.0-1
-libva2 2.17.0-1
-libvdpau1 1.5-2
-libvorbis0a 1.3.7-1
-libvorbisenc2 1.3.7-1
-libvorbisfile3 1.3.7-1
-libvpx7 1.12.0-1+deb12u4
-libwebp7 1.2.4-0.2+deb12u1
-libwebpdemux2 1.2.4-0.2+deb12u1
-libwebpmux3 1.2.4-0.2+deb12u1
-libx11-6 2:1.8.4-2+deb12u2
-libx11-data 2:1.8.4-2+deb12u2
-libx11-xcb1 2:1.8.4-2+deb12u2
-libx264-164 2:0.164.3095+gitbaee400-3
-libx265-199 3.5-2+b1
-libxau6 1:1.0.9-1
-libxcb-dri3-0 1.15-1
-libxcb-render0 1.15-1
-libxcb-shm0 1.15-1
-libxcb1 1.15-1
-libxdmcp6 1:1.1.2-3
-libxext6 2:1.3.4-1+b1
-libxfixes3 1:6.0.0-2
-libxml2 2.9.14+dfsg-1.3~deb12u2
-libxrender1 1:0.9.10-1.1
-libxslt1.1 1.1.35-1+deb12u1
-libxt6 1:1.2.1-1.1
-libxvidcore4 2:1.3.7-1
-libxxhash0 0.8.1-1
-libzbar0 0.23.92-7+deb12u1
-libzmq5 4.3.4-6
-libzstd1 1.5.4+dfsg2-5
-libzvbi-common 0.2.41-1
-libzvbi0 0.2.41-1
-login 1:4.13+dfsg1-1+deb12u1
-logsave 1.47.0-2
-mariadb-client 1:10.11.11-0+deb12u1
-mariadb-client-core 1:10.11.11-0+deb12u1
-mariadb-common 1:10.11.11-0+deb12u1
-mawk 1.3.4.20200120-3.1
-media-types 10.0.0
-mount 2.38.1-5+deb12u3
-mysql-common 5.8+1.1.0
-ncurses-base 6.4-4
-ncurses-bin 6.4-4
-netbase 6.4
-ocl-icd-libopencl1 2.3.1-1
-openssl 3.0.17-1~deb12u1
-passwd 1:4.13+dfsg1-1+deb12u1
-perl 5.36.0-7+deb12u2
-perl-base 5.36.0-7+deb12u2
-perl-modules-5.36 5.36.0-7+deb12u2
-pinentry-curses 1.2.1-1
-pngquant 2.17.0-1
-poppler-data 0.4.12-1
-poppler-utils 22.12.0-2+deb12u1
-postgresql-client 15+248
-postgresql-client-15 15.13-0+deb12u1
-postgresql-client-common 248
-qpdf 11.9.0-1
-readline-common 8.2-1.3
-sed 4.9-1
-sensible-utils 0.0.17+nmu1
-shared-mime-info 2.2-1
-sysvinit-utils 3.06-4
-tar 1.34+dfsg-1.2+deb12u1
-tesseract-ocr 5.3.0-2
-tesseract-ocr-deu 1:4.1.0-2
-tesseract-ocr-eng 1:4.1.0-2
-tesseract-ocr-fra 1:4.1.0-2
-tesseract-ocr-ita 1:4.1.0-2
-tesseract-ocr-osd 1:4.1.0-2
-tesseract-ocr-spa 1:4.1.0-2
-tzdata 2025b-0+deb12u1
-ucf 3.0043+nmu1+deb12u1
-unpaper 7.0.0-0.1
-usr-is-merged 37~deb12u1
-util-linux 2.38.1-5+deb12u3
-util-linux-extra 2.38.1-5+deb12u3
-x11-common 1:7.7+23
-xfonts-encodings 1:1.0.4-2.2
-xfonts-utils 1:7.7+6
-zlib1g 1:1.2.13.dfsg-1
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1794,3 +1794,23 @@ password. All of these options come from their similarly-named [Django settings]
 #### [`PAPERLESS_EMAIL_USE_SSL=<bool>`](#PAPERLESS_EMAIL_USE_SSL) {#PAPERLESS_EMAIL_USE_SSL}

 : Defaults to false.
+
+## Remote OCR
+
+#### [`PAPERLESS_REMOTE_OCR_ENGINE=<str>`](#PAPERLESS_REMOTE_OCR_ENGINE) {#PAPERLESS_REMOTE_OCR_ENGINE}
+
+: The remote OCR engine to use. Currently only Azure AI is supported as "azureai".
+
+    Defaults to None, which disables remote OCR.
+
+#### [`PAPERLESS_REMOTE_OCR_API_KEY=<str>`](#PAPERLESS_REMOTE_OCR_API_KEY) {#PAPERLESS_REMOTE_OCR_API_KEY}
+
+: The API key to use for the remote OCR engine.
+
+    Defaults to None.
+
+#### [`PAPERLESS_REMOTE_OCR_ENDPOINT=<str>`](#PAPERLESS_REMOTE_OCR_ENDPOINT) {#PAPERLESS_REMOTE_OCR_ENDPOINT}
+
+: The endpoint to use for the remote OCR engine. This is required for Azure AI.
+
+    Defaults to None.
--- a/docs/index.md
+++ b/docs/index.md
@@ -25,9 +25,10 @@ physical documents into a searchable online archive so you can keep, well, _less
 ## Features

 -   **Organize and index** your scanned documents with tags, correspondents, types, and more.
-   _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way.
+-   _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way, unless you explicitly choose to do so.
 -   Performs **OCR** on your documents, adding searchable and selectable text, even to documents scanned with only images.
-   Utilizes the open-source Tesseract engine to recognize more than 100 languages.
+    -   Utilizes the open-source Tesseract engine to recognize more than 100 languages.
+    -   _New!_ Supports remote OCR with Azure AI (opt-in).
 -   Documents are saved as PDF/A format which is designed for long term storage, alongside the unaltered originals.
 -   Uses machine-learning to automatically add tags, correspondents and document types to your documents.
 -   Supports PDF documents, images, plain text files, Office documents (Word, Excel, PowerPoint, and LibreOffice equivalents)[^1] and more.
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -892,6 +892,21 @@ how regularly you intend to scan documents and use paperless.
    performed the task associated with the document, move it to the
    inbox.

+## Remote OCR
+
+!!! important
+
+    This feature is disabled by default and will always remain strictly "opt-in".
+
+Paperless-ngx supports performing OCR on documents using remote services. At the moment, this is limited to
+[Microsoft's Azure "Document Intelligence" service](https://azure.microsoft.com/en-us/products/ai-services/ai-document-intelligence).
+This is of course a paid service (with a free tier) which requires an Azure account and subscription. Azure AI is not affiliated with
+Paperless-ngx in any way. When enabled, Paperless-ngx will automatically send appropriate documents to Azure for OCR processing, bypassing
+the local OCR engine. See the [configuration](configuration.md#PAPERLESS_REMOTE_OCR_ENGINE) options for more details.
+
+Additionally, when using a commercial service with this feature, consider both potential costs as well as any associated file size
+or page limitations (e.g. with a free tier).
+
 ## Architecture

 Paperless-ngx consists of the following components:
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,7 @@ classifiers = [
 # This will allow testing to not install a webserver, mysql, etc

 dependencies = [
+  "azure-ai-documentintelligence>=1.0.2",
  "babel>=2.17",
  "bleach~=6.3.0",
  "celery[redis]~=5.5.1",
@@ -252,6 +253,7 @@ testpaths = [
  "src/paperless_tesseract/tests/",
  "src/paperless_tika/tests",
  "src/paperless_text/tests/",
+  "src/paperless_remote/tests/",
 ]
 addopts = [
  "--pythonwarnings=all",
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -322,6 +322,7 @@ INSTALLED_APPS = [
    "paperless_tesseract.apps.PaperlessTesseractConfig",
    "paperless_text.apps.PaperlessTextConfig",
    "paperless_mail.apps.PaperlessMailConfig",
+    "paperless_remote.apps.PaperlessRemoteParserConfig",
    "django.contrib.admin",
    "rest_framework",
    "rest_framework.authtoken",
@@ -1401,3 +1402,10 @@ WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
    "PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
    "true",
 )
+
+###############################################################################
+# Remote Parser                                                               #
+###############################################################################
+REMOTE_OCR_ENGINE = os.getenv("PAPERLESS_REMOTE_OCR_ENGINE")
+REMOTE_OCR_API_KEY = os.getenv("PAPERLESS_REMOTE_OCR_API_KEY")
+REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT")
--- a/src/paperless_remote/init.py
+++ b/src/paperless_remote/init.py
@@ -0,0 +1,4 @@
+# this is here so that django finds the checks.
+from paperless_remote.checks import check_remote_parser_configured
+
+__all__ = ["check_remote_parser_configured"]
--- a/src/paperless_remote/apps.py
+++ b/src/paperless_remote/apps.py
@@ -0,0 +1,14 @@
+from django.apps import AppConfig
+
+from paperless_remote.signals import remote_consumer_declaration
+
+
+class PaperlessRemoteParserConfig(AppConfig):
+    name = "paperless_remote"
+
+    def ready(self):
+        from documents.signals import document_consumer_declaration
+
+        document_consumer_declaration.connect(remote_consumer_declaration)
+
+        AppConfig.ready(self)
--- a/src/paperless_remote/checks.py
+++ b/src/paperless_remote/checks.py
@@ -0,0 +1,17 @@
+from django.conf import settings
+from django.core.checks import Error
+from django.core.checks import register
+
+
+@register()
+def check_remote_parser_configured(app_configs, **kwargs):
+    if settings.REMOTE_OCR_ENGINE == "azureai" and not (
+        settings.REMOTE_OCR_ENDPOINT and settings.REMOTE_OCR_API_KEY
+    ):
+        return [
+            Error(
+                "Azure AI remote parser requires endpoint and API key to be configured.",
+            ),
+        ]
+
+    return []
--- a/src/paperless_remote/parsers.py
+++ b/src/paperless_remote/parsers.py
@@ -0,0 +1,118 @@
+from pathlib import Path
+
+from django.conf import settings
+
+from paperless_tesseract.parsers import RasterisedDocumentParser
+
+
+class RemoteEngineConfig:
+    def __init__(
+        self,
+        engine: str,
+        api_key: str | None = None,
+        endpoint: str | None = None,
+    ):
+        self.engine = engine
+        self.api_key = api_key
+        self.endpoint = endpoint
+
+    def engine_is_valid(self):
+        valid = self.engine in ["azureai"] and self.api_key is not None
+        if self.engine == "azureai":
+            valid = valid and self.endpoint is not None
+        return valid
+
+
+class RemoteDocumentParser(RasterisedDocumentParser):
+    """
+    This parser uses a remote OCR engine to parse documents. Currently, it supports Azure AI Vision
+    as this is the only service that provides a remote OCR API with text-embedded PDF output.
+    """
+
+    logging_name = "paperless.parsing.remote"
+
+    def get_settings(self) -> RemoteEngineConfig:
+        """
+        Returns the configuration for the remote OCR engine, loaded from Django settings.
+        """
+        return RemoteEngineConfig(
+            engine=settings.REMOTE_OCR_ENGINE,
+            api_key=settings.REMOTE_OCR_API_KEY,
+            endpoint=settings.REMOTE_OCR_ENDPOINT,
+        )
+
+    def supported_mime_types(self):
+        if self.settings.engine_is_valid():
+            return {
+                "application/pdf": ".pdf",
+                "image/png": ".png",
+                "image/jpeg": ".jpg",
+                "image/tiff": ".tiff",
+                "image/bmp": ".bmp",
+                "image/gif": ".gif",
+                "image/webp": ".webp",
+            }
+        else:
+            return {}
+
+    def azure_ai_vision_parse(
+        self,
+        file: Path,
+    ) -> str | None:
+        """
+        Uses Azure AI Vision to parse the document and return the text content.
+        It requests a searchable PDF output with embedded text.
+        The PDF is saved to the archive_path attribute.
+        Returns the text content extracted from the document.
+        If the parsing fails, it returns None.
+        """
+        from azure.ai.documentintelligence import DocumentIntelligenceClient
+        from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
+        from azure.ai.documentintelligence.models import AnalyzeOutputOption
+        from azure.ai.documentintelligence.models import DocumentContentFormat
+        from azure.core.credentials import AzureKeyCredential
+
+        client = DocumentIntelligenceClient(
+            endpoint=self.settings.endpoint,
+            credential=AzureKeyCredential(self.settings.api_key),
+        )
+
+        try:
+            with file.open("rb") as f:
+                analyze_request = AnalyzeDocumentRequest(bytes_source=f.read())
+                poller = client.begin_analyze_document(
+                    model_id="prebuilt-read",
+                    body=analyze_request,
+                    output_content_format=DocumentContentFormat.TEXT,
+                    output=[AnalyzeOutputOption.PDF],  # request searchable PDF output
+                    content_type="application/json",
+                )
+
+            poller.wait()
+            result_id = poller.details["operation_id"]
+            result = poller.result()
+
+            # Download the PDF with embedded text
+            self.archive_path = self.tempdir / "archive.pdf"
+            with self.archive_path.open("wb") as f:
+                for chunk in client.get_analyze_result_pdf(
+                    model_id="prebuilt-read",
+                    result_id=result_id,
+                ):
+                    f.write(chunk)
+            return result.content
+        except Exception as e:
+            self.log.error(f"Azure AI Vision parsing failed: {e}")
+        finally:
+            client.close()
+
+        return None
+
+    def parse(self, document_path: Path, mime_type, file_name=None):
+        if not self.settings.engine_is_valid():
+            self.log.warning(
+                "No valid remote parser engine is configured, content will be empty.",
+            )
+            self.text = ""
+        elif self.settings.engine == "azureai":
+            self.text = self.azure_ai_vision_parse(document_path)
--- a/src/paperless_remote/signals.py
+++ b/src/paperless_remote/signals.py
@@ -0,0 +1,18 @@
+def get_parser(*args, **kwargs):
+    from paperless_remote.parsers import RemoteDocumentParser
+
+    return RemoteDocumentParser(*args, **kwargs)
+
+
+def get_supported_mime_types():
+    from paperless_remote.parsers import RemoteDocumentParser
+
+    return RemoteDocumentParser(None).supported_mime_types()
+
+
+def remote_consumer_declaration(sender, **kwargs):
+    return {
+        "parser": get_parser,
+        "weight": 5,
+        "mime_types": get_supported_mime_types(),
+    }
--- a/src/paperless_remote/tests/init.py
+++ b/src/paperless_remote/tests/init.py
--- a/src/paperless_remote/tests/samples/simple-digital.pdf
+++ b/src/paperless_remote/tests/samples/simple-digital.pdf
--- a/src/paperless_remote/tests/test_checks.py
+++ b/src/paperless_remote/tests/test_checks.py
@@ -0,0 +1,24 @@
+from unittest import TestCase
+
+from django.test import override_settings
+
+from paperless_remote import check_remote_parser_configured
+
+
+class TestChecks(TestCase):
+    @override_settings(REMOTE_OCR_ENGINE=None)
+    def test_no_engine(self):
+        msgs = check_remote_parser_configured(None)
+        self.assertEqual(len(msgs), 0)
+
+    @override_settings(REMOTE_OCR_ENGINE="azureai")
+    @override_settings(REMOTE_OCR_API_KEY="somekey")
+    @override_settings(REMOTE_OCR_ENDPOINT=None)
+    def test_azure_no_endpoint(self):
+        msgs = check_remote_parser_configured(None)
+        self.assertEqual(len(msgs), 1)
+        self.assertTrue(
+            msgs[0].msg.startswith(
+                "Azure AI remote parser requires endpoint and API key to be configured.",
+            ),
+        )
--- a/src/paperless_remote/tests/test_parser.py
+++ b/src/paperless_remote/tests/test_parser.py
@@ -0,0 +1,128 @@
+import uuid
+from pathlib import Path
+from unittest import mock
+
+from django.test import TestCase
+from django.test import override_settings
+
+from documents.tests.utils import DirectoriesMixin
+from documents.tests.utils import FileSystemAssertsMixin
+from paperless_remote.parsers import RemoteDocumentParser
+from paperless_remote.signals import get_parser
+
+
+class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
+    SAMPLE_FILES = Path(__file__).resolve().parent / "samples"
+
+    def assertContainsStrings(self, content: str, strings: list[str]):
+        # Asserts that all strings appear in content, in the given order.
+        indices = []
+        for s in strings:
+            if s in content:
+                indices.append(content.index(s))
+            else:
+                self.fail(f"'{s}' is not in '{content}'")
+        self.assertListEqual(indices, sorted(indices))
+
+    @mock.patch("paperless_tesseract.parsers.run_subprocess")
+    @mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
+    def test_get_text_with_azure(self, mock_client_cls, mock_subprocess):
+        # Arrange mock Azure client
+        mock_client = mock.Mock()
+        mock_client_cls.return_value = mock_client
+
+        # Simulate poller result and its `.details`
+        mock_poller = mock.Mock()
+        mock_poller.wait.return_value = None
+        mock_poller.details = {"operation_id": "fake-op-id"}
+        mock_client.begin_analyze_document.return_value = mock_poller
+        mock_poller.result.return_value.content = "This is a test document."
+
+        # Return dummy PDF bytes
+        mock_client.get_analyze_result_pdf.return_value = [
+            b"%PDF-",
+            b"1.7 ",
+            b"FAKEPDF",
+        ]
+
+        # Simulate pdftotext by writing dummy text to sidecar file
+        def fake_run(cmd, *args, **kwargs):
+            with Path(cmd[-1]).open("w", encoding="utf-8") as f:
+                f.write("This is a test document.")
+
+        mock_subprocess.side_effect = fake_run
+
+        with override_settings(
+            REMOTE_OCR_ENGINE="azureai",
+            REMOTE_OCR_API_KEY="somekey",
+            REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
+        ):
+            parser = get_parser(uuid.uuid4())
+            parser.parse(
+                self.SAMPLE_FILES / "simple-digital.pdf",
+                "application/pdf",
+            )
+
+            self.assertContainsStrings(
+                parser.text.strip(),
+                ["This is a test document."],
+            )
+
+    @mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
+    def test_get_text_with_azure_error_logged_and_returns_none(self, mock_client_cls):
+        mock_client = mock.Mock()
+        mock_client.begin_analyze_document.side_effect = RuntimeError("fail")
+        mock_client_cls.return_value = mock_client
+
+        with override_settings(
+            REMOTE_OCR_ENGINE="azureai",
+            REMOTE_OCR_API_KEY="somekey",
+            REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
+        ):
+            parser = get_parser(uuid.uuid4())
+            with mock.patch.object(parser.log, "error") as mock_log_error:
+                parser.parse(
+                    self.SAMPLE_FILES / "simple-digital.pdf",
+                    "application/pdf",
+                )
+
+        self.assertIsNone(parser.text)
+        mock_client.begin_analyze_document.assert_called_once()
+        mock_client.close.assert_called_once()
+        mock_log_error.assert_called_once()
+        self.assertIn(
+            "Azure AI Vision parsing failed",
+            mock_log_error.call_args[0][0],
+        )
+
+    @override_settings(
+        REMOTE_OCR_ENGINE="azureai",
+        REMOTE_OCR_API_KEY="key",
+        REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
+    )
+    def test_supported_mime_types_valid_config(self):
+        parser = RemoteDocumentParser(uuid.uuid4())
+        expected_types = {
+            "application/pdf": ".pdf",
+            "image/png": ".png",
+            "image/jpeg": ".jpg",
+            "image/tiff": ".tiff",
+            "image/bmp": ".bmp",
+            "image/gif": ".gif",
+            "image/webp": ".webp",
+        }
+        self.assertEqual(parser.supported_mime_types(), expected_types)
+
+    def test_supported_mime_types_invalid_config(self):
+        parser = get_parser(uuid.uuid4())
+        self.assertEqual(parser.supported_mime_types(), {})
+
+    @override_settings(
+        REMOTE_OCR_ENGINE=None,
+        REMOTE_OCR_API_KEY=None,
+        REMOTE_OCR_ENDPOINT=None,
+    )
+    def test_parse_with_invalid_config(self):
+        parser = get_parser(uuid.uuid4())
+        parser.parse(self.SAMPLE_FILES / "simple-digital.pdf", "application/pdf")
+        self.assertEqual(parser.text, "")
--- a/uv.lock
+++ b/uv.lock
@@ -95,6 +95,34 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/02/ff/1175b0b7371e46244032d43a56862d0af455823b5280a50c63d99cc50f18/automat-25.4.16-py3-none-any.whl", hash = "sha256:04e9bce696a8d5671ee698005af6e5a9fa15354140a87f4870744604dcdd3ba1", size = 42842, upload-time = "2025-04-16T20:12:14.447Z" },
 ]

+[[package]]
+name = "azure-ai-documentintelligence"
+version = "1.0.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "azure-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "isodate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/44/7b/8115cd713e2caa5e44def85f2b7ebd02a74ae74d7113ba20bdd41fd6dd80/azure_ai_documentintelligence-1.0.2.tar.gz", hash = "sha256:4d75a2513f2839365ebabc0e0e1772f5601b3a8c9a71e75da12440da13b63484", size = 170940 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d9/75/c9ec040f23082f54ffb1977ff8f364c2d21c79a640a13d1c1809e7fd6b1a/azure_ai_documentintelligence-1.0.2-py3-none-any.whl", hash = "sha256:e1fb446abbdeccc9759d897898a0fe13141ed29f9ad11fc705f951925822ed59", size = 106005 },
+]
+
+[[package]]
+name = "azure-core"
+version = "1.33.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/75/aa/7c9db8edd626f1a7d99d09ef7926f6f4fb34d5f9fa00dc394afdfe8e2a80/azure_core-1.33.0.tar.gz", hash = "sha256:f367aa07b5e3005fec2c1e184b882b0b039910733907d001c20fb08ebb8c0eb9", size = 295633 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/b7/76b7e144aa53bd206bf1ce34fa75350472c3f69bf30e5c8c18bc9881035d/azure_core-1.33.0-py3-none-any.whl", hash = "sha256:9b5b6d0223a1d38c37500e6971118c1e0f13f54951e6893968b38910bc9cda8f", size = 207071 },
+]
+
 [[package]]
 name = "babel"
 version = "2.17.0"
@@ -1451,6 +1479,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/c7/fc/4e5a141c3f7c7bed550ac1f69e599e92b6be449dd4677ec09f325cad0955/inotifyrecursive-0.3.5-py3-none-any.whl", hash = "sha256:7e5f4a2e1dc2bef0efa3b5f6b339c41fb4599055a2b54909d020e9e932cc8d2f", size = 8009, upload-time = "2020-11-20T12:38:46.981Z" },
 ]

+[[package]]
+name = "isodate"
+version = "0.7.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -2118,6 +2155,7 @@ name = "paperless-ngx"
 version = "2.19.6"
 source = { virtual = "." }
 dependencies = [
+    { name = "azure-ai-documentintelligence", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "babel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "bleach", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "celery", extra = ["redis"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2254,6 +2292,7 @@ typing = [

 [package.metadata]
 requires-dist = [
+    { name = "azure-ai-documentintelligence", specifier = ">=1.0.2" },
    { name = "babel", specifier = ">=2.17" },
    { name = "bleach", specifier = "~=6.3.0" },
    { name = "celery", extras = ["redis"], specifier = "~=5.5.1" },
Author	SHA1	Message	Date
shamoon	7d2fe630a5	Merge branch 'dev' into feature-remote-ocr-2	2025-11-19 23:49:11 -08:00
shamoon	c29dd5485b	Merge branch 'dev' into feature-remote-ocr-2	2025-11-18 12:08:38 -08:00
shamoon	cef100a955	Wrap in try/catch	2025-11-18 12:07:16 -08:00
shamoon	4f53d1b6ee	Merge branch 'dev' into feature-remote-ocr-2	2025-11-17 20:54:37 -08:00
shamoon	23cea77548	Merge branch 'dev' into feature-remote-ocr-2	2025-11-17 18:49:01 -08:00
shamoon	4900af93c6	Merge branch 'dev' into feature-remote-ocr-2	2025-11-15 13:49:39 -08:00
shamoon	ef834ae808	Merge branch 'dev' into feature-remote-ocr-2	2025-11-13 15:45:08 -08:00
shamoon	0537e87cb5	Merge branch 'dev' into feature-remote-ocr-2	2025-11-06 11:46:02 -08:00
shamoon	b4da5c3cd1	Merge branch 'dev' into feature-remote-ocr-2	2025-11-04 16:24:26 -08:00
shamoon	251b0fb3d6	Merge branch 'dev' into feature-remote-ocr-2	2025-11-04 08:24:02 -08:00
shamoon	32bdf11f7f	Merge branch 'dev' into feature-remote-ocr-2	2025-11-02 08:14:04 -08:00
shamoon	0627ca69f5	Merge branch 'dev' into feature-remote-ocr-2	2025-10-29 11:13:53 -07:00
shamoon	f5525bbdff	Merge branch 'dev' into feature-remote-ocr-2	2025-10-27 21:22:42 -07:00
shamoon	a21a2a41a8	Merge branch 'dev' into feature-remote-ocr-2	2025-10-26 07:41:51 -07:00
shamoon	cc73ed8b86	Merge branch 'dev' into feature-remote-ocr-2	2025-10-24 16:48:07 -07:00
shamoon	0c706b2316	Merge branch 'dev' into feature-remote-ocr-2	2025-10-23 16:38:35 -07:00
shamoon	85b7b6874d	Merge branch 'dev' into feature-remote-ocr-2	2025-10-22 21:53:07 -07:00
shamoon	56b26185fa	Merge branch 'dev' into feature-remote-ocr-2	2025-10-21 08:23:20 -07:00
shamoon	6537fade7b	Merge branch 'dev' into feature-remote-ocr-2	2025-10-15 16:04:02 -07:00
shamoon	9f8090816f	Merge branch 'dev' into feature-remote-ocr-2	2025-10-09 12:54:58 -07:00
shamoon	1de7c52478	Merge branch 'dev' into feature-remote-ocr-2	2025-10-01 19:24:38 -07:00
shamoon	9aaaa6f069	Merge branch 'dev' into feature-remote-ocr-2	2025-09-30 09:14:56 -07:00
shamoon	c3a20b7797	Merge branch 'dev' into feature-remote-ocr-2	2025-09-28 15:06:37 -07:00
shamoon	476556379b	Merge branch 'dev' into feature-remote-ocr-2	2025-09-24 13:46:49 -07:00
shamoon	e5cafff043	Merge branch 'dev' into feature-remote-ocr-2	2025-09-22 13:42:55 -07:00
shamoon	8e0d574e99	Merge branch 'dev' into feature-remote-ocr-2	2025-09-21 16:18:13 -07:00
shamoon	8a5820328e	Sonar suggestions	2025-09-17 19:18:47 -07:00
shamoon	809d62a2f4	Merge branch 'dev' into feature-remote-ocr-2	2025-09-17 16:51:23 -07:00
shamoon	0d87f94b9b	Merge branch 'dev' into feature-remote-ocr-2	2025-09-14 14:01:35 -07:00
shamoon	315b90f8e5	Add typing to assertContainsStrings test util	2025-09-11 13:56:14 -07:00
shamoon	47b2d2964b	Use regular testcase instead of django, config check test	2025-09-11 13:52:10 -07:00
shamoon	e05639ae4e	tempdir already a path	2025-09-11 13:49:30 -07:00
shamoon	f400a8cb2f	Close client	2025-09-11 13:49:06 -07:00
shamoon	26abcf5612	Also ensure API key is set	2025-09-11 13:48:06 -07:00
shamoon	afde52430d	Merge branch 'dev' into feature-remote-ocr-2	2025-09-11 13:25:53 -07:00
shamoon	716f2da652	Merge branch 'dev' into feature-remote-ocr-2	2025-09-08 11:36:49 -07:00
shamoon	c54073b7c2	Merge branch 'dev' into feature-remote-ocr-2	2025-09-04 09:16:59 -07:00
shamoon	247e6f39dc	Merge branch 'dev' into feature-remote-ocr-2	2025-09-01 20:10:40 -07:00
shamoon	1e6dfc4481	Merge branch 'dev' into feature-remote-ocr-2	2025-08-26 13:30:39 -07:00
shamoon	7cc0750066	Add note on costs and limitations for Azure OCR	2025-08-24 05:47:07 -07:00
shamoon	bd6585d3b4	Merge branch 'dev' into feature-remote-ocr-2	2025-08-22 08:54:26 -07:00
shamoon	717e828a1d	Merge branch 'dev' into feature-remote-ocr-2	2025-08-17 21:25:14 -07:00
shamoon	07381d48e6	Merge branch 'dev' into feature-remote-ocr-2	2025-08-17 07:49:58 -07:00
shamoon	dd0ffaf312	Merge branch 'dev' into feature-remote-ocr-2	2025-08-11 10:48:36 -07:00
shamoon	264504affc	Fix consumer declaration file extensions	2025-08-10 05:32:52 -07:00
shamoon	4feedf2add	Merge branch 'dev' into feature-remote-ocr-2	2025-08-06 16:04:25 -04:00
shamoon	2f76cf9831	Merge branch 'dev' into feature-remote-ocr-2	2025-08-01 23:55:49 -04:00
shamoon	1002d37f6b	Update test_parser.py	2025-07-09 11:05:37 -07:00
shamoon	d260a94740	Update parsers.py	2025-07-09 11:02:57 -07:00
shamoon	88c69b83ea	Update index.md	2025-07-09 11:00:12 -07:00
shamoon	2557ee2014	Update docs to mention remote OCR with Azure AI	2025-07-09 09:53:30 -07:00
shamoon	3c75deed80	Add paperless_remote tests to testpaths	2025-07-08 14:19:45 -07:00
shamoon	d05343c927	Test fixes / coverage	2025-07-08 14:19:45 -07:00
shamoon	e7972b7eaf	Coverage	2025-07-08 14:19:45 -07:00
shamoon	75a091cc0d	Fix test	2025-07-08 14:19:44 -07:00
shamoon	dca74803fd	Use output_content_format poller.result to get clean content	2025-07-08 14:19:44 -07:00
shamoon	3cf3d868d0	Some docs	2025-07-08 14:19:43 -07:00
shamoon	bf4fc6604a	Test	2025-07-08 14:19:43 -07:00
shamoon	e8c1eb86fa	This actually works [ci skip]	2025-07-08 14:19:43 -07:00
shamoon	c3dad3cf69	Basic parse	2025-07-08 14:19:42 -07:00
shamoon	811bd66088	Ok, restart implementing this with just azure [ci skip]	2025-07-08 14:19:42 -07:00