Chore: Bulk backend dependency updates (#8212)

This commit is contained in:
Trenton H 2024-11-11 11:54:51 -08:00 committed by GitHub
parent a6f4c75a72
commit d1f255a22e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 1883 additions and 1514 deletions

View File

@ -16,7 +16,7 @@ on:
env:
# This is the version of pipenv all the steps will use
# If changing this, change Dockerfile
DEFAULT_PIP_ENV_VERSION: "2024.0.3"
DEFAULT_PIP_ENV_VERSION: "2024.4.0"
# This is the default version of Python to use in most steps which aren't specific
DEFAULT_PYTHON_VERSION: "3.11"

View File

@ -5,7 +5,7 @@
repos:
# General hooks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: check-docstring-first
- id: check-json
@ -48,7 +48,7 @@ repos:
exclude: "(^Pipfile\\.lock$)"
# Python hooks
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: 'v0.6.8'
rev: 'v0.7.3'
hooks:
- id: ruff
- id: ruff-format

View File

@ -39,7 +39,7 @@ COPY Pipfile* ./
RUN set -eux \
&& echo "Installing pipenv" \
&& python3 -m pip install --no-cache-dir --upgrade pipenv==2024.0.3 \
&& python3 -m pip install --no-cache-dir --upgrade pipenv==2024.4.0 \
&& echo "Generating requirement.txt" \
&& pipenv requirements > requirements.txt
@ -233,11 +233,11 @@ RUN --mount=type=cache,target=/root/.cache/pip/,id=pip-cache \
&& python3 -m pip install --no-cache-dir --upgrade wheel \
&& echo "Installing Python requirements" \
&& curl --fail --silent --show-error --location \
--output psycopg_c-3.2.2-cp312-cp312-linux_x86_64.whl \
https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.2/psycopg_c-3.2.2-cp312-cp312-linux_x86_64.whl \
--output psycopg_c-3.2.3-cp312-cp312-linux_x86_64.whl \
https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.3/psycopg_c-3.2.3-cp312-cp312-linux_x86_64.whl \
&& curl --fail --silent --show-error --location \
--output psycopg_c-3.2.2-cp312-cp312-linux_aarch64.whl \
https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.2/psycopg_c-3.2.2-cp312-cp312-linux_aarch64.whl \
--output psycopg_c-3.2.3-cp312-cp312-linux_aarch64.whl \
https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.3/psycopg_c-3.2.3-cp312-cp312-linux_aarch64.whl \
&& python3 -m pip install --default-timeout=1000 --find-links . --requirement requirements.txt \
&& echo "Installing NLTK data" \
&& python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" snowball_data \

View File

@ -7,7 +7,7 @@ name = "pypi"
dateparser = "~=1.2"
# WARNING: django does not use semver.
# Only patch versions are guaranteed to not introduce breaking changes.
django = "~=5.1.1"
django = "~=5.1.3"
django-allauth = {extras = ["socialaccount"], version = "*"}
django-auditlog = "*"
django-celery-results = "*"
@ -18,7 +18,7 @@ django-filter = "~=24.3"
django-guardian = "*"
django-multiselectfield = "*"
django-soft-delete = "*"
djangorestframework = "==3.15.2"
djangorestframework = "~=3.15.2"
djangorestframework-guardian = "*"
drf-writable-nested = "*"
bleach = "*"
@ -37,7 +37,7 @@ jinja2 = "~=3.1"
langdetect = "*"
mysqlclient = "*"
nltk = "*"
ocrmypdf = "~=16.5"
ocrmypdf = "~=16.6"
pathvalidate = "*"
pdf2image = "*"
psycopg = {version = "*", extras = ["c"]}
@ -55,7 +55,7 @@ tika-client = "*"
tqdm = "*"
# See https://github.com/paperless-ngx/paperless-ngx/issues/5494
uvicorn = {extras = ["standard"], version = "==0.25.0"}
watchdog = "~=4.0"
watchdog = "~=5.0"
whitenoise = "~=6.8"
whoosh = "~=2.7"
zxing-cpp = {version = "*", platform_machine = "== 'x86_64'"}

3343
Pipfile.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -243,21 +243,29 @@ class TestSystemStatus(APITestCase):
THEN:
- The response contains an ERROR classifier status
"""
does_exist = tempfile.NamedTemporaryFile(
dir="/tmp",
delete=False,
)
with override_settings(MODEL_FILE=does_exist):
with (
tempfile.NamedTemporaryFile(
dir="/tmp",
delete=False,
) as does_exist,
override_settings(MODEL_FILE=does_exist),
):
with mock.patch("documents.classifier.load_classifier") as mock_load:
mock_load.side_effect = ClassifierModelCorruptError()
Document.objects.create(
title="Test Document",
)
Tag.objects.create(name="Test Tag", matching_algorithm=Tag.MATCH_AUTO)
Tag.objects.create(
name="Test Tag",
matching_algorithm=Tag.MATCH_AUTO,
)
self.client.force_login(self.user)
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["tasks"]["classifier_status"], "ERROR")
self.assertEqual(
response.data["tasks"]["classifier_status"],
"ERROR",
)
self.assertIsNotNone(response.data["tasks"]["classifier_error"])
def test_system_status_classifier_ok_no_objects(self):

View File

@ -1499,7 +1499,7 @@ class BulkDownloadView(GenericAPIView):
follow_filename_format = serializer.validated_data.get("follow_formatting")
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
temp = tempfile.NamedTemporaryFile(
temp = tempfile.NamedTemporaryFile( # noqa: SIM115
dir=settings.SCRATCH_DIR,
suffix="-compressed-archive",
delete=False,
@ -1517,6 +1517,7 @@ class BulkDownloadView(GenericAPIView):
for document in Document.objects.filter(pk__in=ids):
strategy.add_document(document)
# TODO(stumpylog): Investigate using FileResponse here
with open(temp.name, "rb") as f:
response = HttpResponse(f, content_type="application/zip")
response["Content-Disposition"] = '{}; filename="{}"'.format(

View File

@ -365,6 +365,7 @@ class RasterisedDocumentParser(DocumentParser):
from ocrmypdf import EncryptedPdfError
from ocrmypdf import InputFileError
from ocrmypdf import SubprocessOutputError
from ocrmypdf.exceptions import DigitalSignatureError
archive_path = Path(os.path.join(self.tempdir, "archive.pdf"))
sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt"))
@ -387,9 +388,9 @@ class RasterisedDocumentParser(DocumentParser):
if not self.text:
raise NoTextFoundException("No text was found in the original document")
except EncryptedPdfError:
except (DigitalSignatureError, EncryptedPdfError):
self.log.warning(
"This file is encrypted, OCR is impossible. Using "
"This file is encrypted and/or signed, OCR is impossible. Using "
"any text present in the original file.",
)
if original_has_text: