Chore: Bulk backend dependency updates (#8212)

2026-01-02 14:28:14 -06:00 · 2024-11-11 11:54:51 -08:00
parent a6f4c75a72
commit d1f255a22e
8 changed files with 1883 additions and 1514 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -16,7 +16,7 @@ on:
 env:
  # This is the version of pipenv all the steps will use
  # If changing this, change Dockerfile
-  DEFAULT_PIP_ENV_VERSION: "2024.0.3"
+  DEFAULT_PIP_ENV_VERSION: "2024.4.0"
  # This is the default version of Python to use in most steps which aren't specific
  DEFAULT_PYTHON_VERSION: "3.11"

--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,7 +5,7 @@
 repos:
  # General hooks
  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
    hooks:
      - id: check-docstring-first
      - id: check-json
@@ -48,7 +48,7 @@ repos:
        exclude: "(^Pipfile\\.lock$)"
  # Python hooks
  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: 'v0.6.8'
+    rev: 'v0.7.3'
    hooks:
      - id: ruff
      - id: ruff-format
--- a/10
+++ b/10
@@ -39,7 +39,7 @@ COPY Pipfile* ./

 RUN set -eux \
  && echo "Installing pipenv" \
-    && python3 -m pip install --no-cache-dir --upgrade pipenv==2024.0.3 \
+    && python3 -m pip install --no-cache-dir --upgrade pipenv==2024.4.0 \
  && echo "Generating requirement.txt" \
    && pipenv requirements > requirements.txt

@@ -233,11 +233,11 @@ RUN --mount=type=cache,target=/root/.cache/pip/,id=pip-cache \
    && python3 -m pip install --no-cache-dir --upgrade wheel \
  && echo "Installing Python requirements" \
    && curl --fail --silent --show-error --location \
-    --output psycopg_c-3.2.2-cp312-cp312-linux_x86_64.whl \
-    https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.2/psycopg_c-3.2.2-cp312-cp312-linux_x86_64.whl \
+    --output psycopg_c-3.2.3-cp312-cp312-linux_x86_64.whl \
+    https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.3/psycopg_c-3.2.3-cp312-cp312-linux_x86_64.whl \
    && curl --fail --silent --show-error --location \
-    --output psycopg_c-3.2.2-cp312-cp312-linux_aarch64.whl  \
-    https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.2/psycopg_c-3.2.2-cp312-cp312-linux_aarch64.whl \
+    --output psycopg_c-3.2.3-cp312-cp312-linux_aarch64.whl  \
+    https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.3/psycopg_c-3.2.3-cp312-cp312-linux_aarch64.whl \
    && python3 -m pip install --default-timeout=1000 --find-links . --requirement requirements.txt \
  && echo "Installing NLTK data" \
    && python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" snowball_data \
--- a/8
+++ b/8
@@ -7,7 +7,7 @@ name = "pypi"
 dateparser = "~=1.2"
 # WARNING: django does not use semver.
 #          Only patch versions are guaranteed to not introduce breaking changes.
-django = "~=5.1.1"
+django = "~=5.1.3"
 django-allauth = {extras = ["socialaccount"], version = "*"}
 django-auditlog = "*"
 django-celery-results = "*"
@@ -18,7 +18,7 @@ django-filter = "~=24.3"
 django-guardian = "*"
 django-multiselectfield = "*"
 django-soft-delete = "*"
-djangorestframework = "==3.15.2"
+djangorestframework = "~=3.15.2"
 djangorestframework-guardian = "*"
 drf-writable-nested = "*"
 bleach = "*"
@@ -37,7 +37,7 @@ jinja2 = "~=3.1"
 langdetect = "*"
 mysqlclient = "*"
 nltk = "*"
-ocrmypdf = "~=16.5"
+ocrmypdf = "~=16.6"
 pathvalidate = "*"
 pdf2image = "*"
 psycopg = {version = "*", extras = ["c"]}
@@ -55,7 +55,7 @@ tika-client = "*"
 tqdm = "*"
 # See https://github.com/paperless-ngx/paperless-ngx/issues/5494
 uvicorn = {extras = ["standard"], version = "==0.25.0"}
-watchdog = "~=4.0"
+watchdog = "~=5.0"
 whitenoise = "~=6.8"
 whoosh = "~=2.7"
 zxing-cpp = {version = "*", platform_machine = "== 'x86_64'"}
--- a/Pipfile.lock
+++ b/Pipfile.lock
--- a/src/documents/tests/test_api_status.py
+++ b/src/documents/tests/test_api_status.py
@@ -243,21 +243,29 @@ class TestSystemStatus(APITestCase):
        THEN:
            - The response contains an ERROR classifier status
        """
-        does_exist = tempfile.NamedTemporaryFile(
-            dir="/tmp",
-            delete=False,
-        )
-        with override_settings(MODEL_FILE=does_exist):
+        with (
+            tempfile.NamedTemporaryFile(
+                dir="/tmp",
+                delete=False,
+            ) as does_exist,
+            override_settings(MODEL_FILE=does_exist),
+        ):
            with mock.patch("documents.classifier.load_classifier") as mock_load:
                mock_load.side_effect = ClassifierModelCorruptError()
                Document.objects.create(
                    title="Test Document",
                )
-                Tag.objects.create(name="Test Tag", matching_algorithm=Tag.MATCH_AUTO)
+                Tag.objects.create(
+                    name="Test Tag",
+                    matching_algorithm=Tag.MATCH_AUTO,
+                )
                self.client.force_login(self.user)
                response = self.client.get(self.ENDPOINT)
                self.assertEqual(response.status_code, status.HTTP_200_OK)
-                self.assertEqual(response.data["tasks"]["classifier_status"], "ERROR")
+                self.assertEqual(
+                    response.data["tasks"]["classifier_status"],
+                    "ERROR",
+                )
                self.assertIsNotNone(response.data["tasks"]["classifier_error"])

    def test_system_status_classifier_ok_no_objects(self):
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -1499,7 +1499,7 @@ class BulkDownloadView(GenericAPIView):
        follow_filename_format = serializer.validated_data.get("follow_formatting")

        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
-        temp = tempfile.NamedTemporaryFile(
+        temp = tempfile.NamedTemporaryFile(  # noqa: SIM115
            dir=settings.SCRATCH_DIR,
            suffix="-compressed-archive",
            delete=False,
@@ -1517,6 +1517,7 @@ class BulkDownloadView(GenericAPIView):
            for document in Document.objects.filter(pk__in=ids):
                strategy.add_document(document)

+        # TODO(stumpylog): Investigate using FileResponse here
        with open(temp.name, "rb") as f:
            response = HttpResponse(f, content_type="application/zip")
            response["Content-Disposition"] = '{}; filename="{}"'.format(
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -365,6 +365,7 @@ class RasterisedDocumentParser(DocumentParser):
        from ocrmypdf import EncryptedPdfError
        from ocrmypdf import InputFileError
        from ocrmypdf import SubprocessOutputError
+        from ocrmypdf.exceptions import DigitalSignatureError

        archive_path = Path(os.path.join(self.tempdir, "archive.pdf"))
        sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt"))
@@ -387,9 +388,9 @@ class RasterisedDocumentParser(DocumentParser):

            if not self.text:
                raise NoTextFoundException("No text was found in the original document")
-        except EncryptedPdfError:
+        except (DigitalSignatureError, EncryptedPdfError):
            self.log.warning(
-                "This file is encrypted, OCR is impossible. Using "
+                "This file is encrypted and/or signed, OCR is impossible. Using "
                "any text present in the original file.",
            )
            if original_has_text: