Chore: Drop Python 3.9 support (#7774)

This commit is contained in:
Trenton H 2024-09-26 12:22:24 -07:00 committed by GitHub
parent 5e687d9a93
commit e6f59472e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
44 changed files with 970 additions and 1066 deletions

View File

@ -16,9 +16,9 @@ on:
env:
# This is the version of pipenv all the steps will use
# If changing this, change Dockerfile
DEFAULT_PIP_ENV_VERSION: "2024.0.1"
DEFAULT_PIP_ENV_VERSION: "2024.0.3"
# This is the default version of Python to use in most steps which aren't specific
DEFAULT_PYTHON_VERSION: "3.10"
DEFAULT_PYTHON_VERSION: "3.11"
jobs:
pre-commit:
@ -100,7 +100,7 @@ jobs:
- pre-commit
strategy:
matrix:
python-version: ['3.9', '3.10', '3.11']
python-version: ['3.10', '3.11', '3.12']
fail-fast: false
steps:
-
@ -486,7 +486,7 @@ jobs:
name: Patch whitenoise
run: |
curl --fail --silent --show-error --location --output 484.patch https://github.com/evansd/whitenoise/pull/484.patch
patch -d $(pipenv --venv)/lib/python3.10/site-packages --verbose -p2 < 484.patch
patch -d $(pipenv --venv)/lib/python3.11/site-packages --verbose -p2 < 484.patch
rm 484.patch
-
name: Install system dependencies

View File

@ -48,7 +48,7 @@ repos:
exclude: "(^Pipfile\\.lock$)"
# Python hooks
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: 'v0.6.5'
rev: 'v0.6.8'
hooks:
- id: ruff
- id: ruff-format
@ -62,6 +62,9 @@ repos:
rev: v6.2.1
hooks:
- id: beautysh
language_version: '3.10'
additional_dependencies:
- setuptools
args:
- "--tab"
- repo: https://github.com/shellcheck-py/shellcheck-py

View File

@ -1 +1 @@
3.9.19
3.10.15

View File

@ -2,7 +2,7 @@ fix = true
line-length = 88
respect-gitignore = true
src = ["src"]
target-version = "py39"
target-version = "py310"
output-format = "grouped"
show-fixes = true

View File

@ -11,7 +11,7 @@ If you want to implement something big:
## Python
Paperless supports python 3.9 - 3.11 at this time. We format Python code with [ruff](https://docs.astral.sh/ruff/formatter/).
Paperless supports python 3.10 - 3.12 at this time. We format Python code with [ruff](https://docs.astral.sh/ruff/formatter/).
## Branches

View File

@ -39,7 +39,7 @@ COPY Pipfile* ./
RUN set -eux \
&& echo "Installing pipenv" \
&& python3 -m pip install --no-cache-dir --upgrade pipenv==2024.0.1 \
&& python3 -m pip install --no-cache-dir --upgrade pipenv==2024.0.3 \
&& echo "Generating requirement.txt" \
&& pipenv requirements > requirements.txt
@ -233,11 +233,11 @@ RUN --mount=type=cache,target=/root/.cache/pip/,id=pip-cache \
&& python3 -m pip install --no-cache-dir --upgrade wheel \
&& echo "Installing Python requirements" \
&& curl --fail --silent --show-error --location \
--output psycopg_c-3.2.1-cp311-cp311-linux_x86_64.whl \
https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.1/psycopg_c-3.2.1-cp311-cp311-linux_x86_64.whl \
--output psycopg_c-3.2.2-cp311-cp311-linux_x86_64.whl \
https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.2/psycopg_c-3.2.2-cp311-cp311-linux_x86_64.whl \
&& curl --fail --silent --show-error --location \
--output psycopg_c-3.2.1-cp311-cp311-linux_aarch64.whl \
https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.1/psycopg_c-3.2.1-cp311-cp311-linux_aarch64.whl \
--output psycopg_c-3.2.2-cp311-cp311-linux_aarch64.whl \
https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.2/psycopg_c-3.2.2-cp311-cp311-linux_aarch64.whl \
&& python3 -m pip install --default-timeout=1000 --find-links . --requirement requirements.txt \
&& echo "Patching whitenoise for compression speedup" \
&& curl --fail --silent --show-error --location --output 484.patch https://github.com/evansd/whitenoise/pull/484.patch \

1674
Pipfile.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -250,7 +250,7 @@ a minimal installation of Debian/Buster, which is the current stable
release at the time of writing. Windows is not and will never be
supported.
Paperless requires Python 3. At this time, 3.9 - 3.11 are tested versions.
Paperless requires Python 3. At this time, 3.10 - 3.12 are tested versions.
Newer versions may work, but some dependencies may not fully support newer versions.
Support for older Python versions may be dropped as they reach end of life or as newer versions
are released, dependency support is confirmed, etc.

View File

@ -3,7 +3,6 @@ import re
import tempfile
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
from django.conf import settings
from pdf2image import convert_from_path
@ -81,7 +80,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
self._tiff_conversion_done = False
self.barcodes: list[Barcode] = []
def run(self) -> Optional[str]:
def run(self) -> str | None:
# Some operations may use PIL, override pixel setting if needed
maybe_override_pixel_limit()
@ -299,7 +298,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
)
@property
def asn(self) -> Optional[int]:
def asn(self) -> int | None:
"""
Search the parsed barcodes for any ASNs.
The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
@ -334,7 +333,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
return asn
@property
def tags(self) -> Optional[list[int]]:
def tags(self) -> list[int] | None:
"""
Search the parsed barcodes for any tags.
Returns the detected tag ids (or empty list)

View File

@ -3,7 +3,6 @@ import itertools
import logging
import os
import tempfile
from typing import Optional
from celery import chain
from celery import chord
@ -242,7 +241,7 @@ def rotate(doc_ids: list[int], degrees: int):
def merge(
doc_ids: list[int],
metadata_document_id: Optional[int] = None,
metadata_document_id: int | None = None,
delete_originals: bool = False,
user: User = None,
):

View File

@ -19,8 +19,8 @@ logger = logging.getLogger("paperless.caching")
class MetadataCacheData:
original_checksum: str
original_metadata: list
archive_checksum: Optional[str]
archive_metadata: Optional[list]
archive_checksum: str | None
archive_metadata: list | None
@dataclass(frozen=True)
@ -46,7 +46,7 @@ def get_suggestion_cache_key(document_id: int) -> str:
return f"doc_{document_id}_suggest"
def get_suggestion_cache(document_id: int) -> Optional[SuggestionCacheData]:
def get_suggestion_cache(document_id: int) -> SuggestionCacheData | None:
"""
If possible, return the cached suggestions for the given document ID.
The classifier needs to be matching in format and hash and the suggestions need to
@ -121,13 +121,13 @@ def get_metadata_cache_key(document_id: int) -> str:
return f"doc_{document_id}_metadata"
def get_metadata_cache(document_id: int) -> Optional[MetadataCacheData]:
def get_metadata_cache(document_id: int) -> MetadataCacheData | None:
"""
Returns the cached document metadata for the given document ID, as long as the metadata
was cached once and the checksums have not changed
"""
doc_key = get_metadata_cache_key(document_id)
doc_metadata: Optional[MetadataCacheData] = cache.get(doc_key)
doc_metadata: MetadataCacheData | None = cache.get(doc_key)
# The metadata exists in the cache
if doc_metadata is not None:
try:
@ -161,7 +161,7 @@ def get_metadata_cache(document_id: int) -> Optional[MetadataCacheData]:
def set_metadata_cache(
document: Document,
original_metadata: list,
archive_metadata: Optional[list],
archive_metadata: list | None,
*,
timeout=CACHE_50_MINUTES,
) -> None:

View File

@ -78,9 +78,9 @@ class DocumentClassifier:
def __init__(self):
# last time a document changed and therefore training might be required
self.last_doc_change_time: Optional[datetime] = None
self.last_doc_change_time: datetime | None = None
# Hash of primary keys of AUTO matching values last used in training
self.last_auto_type_hash: Optional[bytes] = None
self.last_auto_type_hash: bytes | None = None
self.data_vectorizer = None
self.tags_binarizer = None
@ -408,7 +408,7 @@ class DocumentClassifier:
return content
def predict_correspondent(self, content: str) -> Optional[int]:
def predict_correspondent(self, content: str) -> int | None:
if self.correspondent_classifier:
X = self.data_vectorizer.transform([self.preprocess_content(content)])
correspondent_id = self.correspondent_classifier.predict(X)
@ -419,7 +419,7 @@ class DocumentClassifier:
else:
return None
def predict_document_type(self, content: str) -> Optional[int]:
def predict_document_type(self, content: str) -> int | None:
if self.document_type_classifier:
X = self.data_vectorizer.transform([self.preprocess_content(content)])
document_type_id = self.document_type_classifier.predict(X)
@ -451,7 +451,7 @@ class DocumentClassifier:
else:
return []
def predict_storage_path(self, content: str) -> Optional[int]:
def predict_storage_path(self, content: str) -> int | None:
if self.storage_path_classifier:
X = self.data_vectorizer.transform([self.preprocess_content(content)])
storage_path_id = self.storage_path_classifier.predict(X)

View File

@ -1,6 +1,5 @@
from datetime import datetime
from datetime import timezone
from typing import Optional
from django.conf import settings
from django.core.cache import cache
@ -15,7 +14,7 @@ from documents.classifier import DocumentClassifier
from documents.models import Document
def suggestions_etag(request, pk: int) -> Optional[str]:
def suggestions_etag(request, pk: int) -> str | None:
"""
Returns an optional string for the ETag, allowing browser caching of
suggestions if the classifier has not been changed and the suggested dates
@ -42,7 +41,7 @@ def suggestions_etag(request, pk: int) -> Optional[str]:
return None
def suggestions_last_modified(request, pk: int) -> Optional[datetime]:
def suggestions_last_modified(request, pk: int) -> datetime | None:
"""
Returns the datetime of classifier last modification. This is slightly off,
as there is not way to track the suggested date setting modification, but it seems
@ -67,7 +66,7 @@ def suggestions_last_modified(request, pk: int) -> Optional[datetime]:
return None
def metadata_etag(request, pk: int) -> Optional[str]:
def metadata_etag(request, pk: int) -> str | None:
"""
Metadata is extracted from the original file, so use its checksum as the
ETag
@ -80,7 +79,7 @@ def metadata_etag(request, pk: int) -> Optional[str]:
return None
def metadata_last_modified(request, pk: int) -> Optional[datetime]:
def metadata_last_modified(request, pk: int) -> datetime | None:
"""
Metadata is extracted from the original file, so use its modified. Strictly speaking, this is
not the modification of the original file, but of the database object, but might as well
@ -94,7 +93,7 @@ def metadata_last_modified(request, pk: int) -> Optional[datetime]:
return None
def preview_etag(request, pk: int) -> Optional[str]:
def preview_etag(request, pk: int) -> str | None:
"""
ETag for the document preview, using the original or archive checksum, depending on the request
"""
@ -110,7 +109,7 @@ def preview_etag(request, pk: int) -> Optional[str]:
return None
def preview_last_modified(request, pk: int) -> Optional[datetime]:
def preview_last_modified(request, pk: int) -> datetime | None:
"""
Uses the documents modified time to set the Last-Modified header. Not strictly
speaking correct, but close enough and quick
@ -123,7 +122,7 @@ def preview_last_modified(request, pk: int) -> Optional[datetime]:
return None
def thumbnail_last_modified(request, pk: int) -> Optional[datetime]:
def thumbnail_last_modified(request, pk: int) -> datetime | None:
"""
Returns the filesystem last modified either from cache or from filesystem.
Cache should be (slightly?) faster than filesystem

View File

@ -5,8 +5,6 @@ import tempfile
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Optional
from typing import Union
import magic
from django.conf import settings
@ -61,7 +59,7 @@ class WorkflowTriggerPlugin(
):
NAME: str = "WorkflowTriggerPlugin"
def run(self) -> Optional[str]:
def run(self) -> str | None:
"""
Get overrides from matching workflows
"""
@ -278,7 +276,7 @@ class ConsumerPlugin(
current_progress: int,
max_progress: int,
status: ProgressStatusOptions,
message: Optional[Union[ConsumerStatusShortMessage, str]] = None,
message: ConsumerStatusShortMessage | str | None = None,
document_id=None,
): # pragma: no cover
self.status_mgr.send_progress(
@ -294,10 +292,10 @@ class ConsumerPlugin(
def _fail(
self,
message: Union[ConsumerStatusShortMessage, str],
log_message: Optional[str] = None,
message: ConsumerStatusShortMessage | str,
log_message: str | None = None,
exc_info=None,
exception: Optional[Exception] = None,
exception: Exception | None = None,
):
self._send_progress(100, 100, ProgressStatusOptions.FAILED, message)
self.log.error(log_message or message, exc_info=exc_info)
@ -572,10 +570,8 @@ class ConsumerPlugin(
self.log.error(f"Error attempting to clean PDF: {e}")
# Based on the mime type, get the parser for that type
parser_class: Optional[type[DocumentParser]] = (
get_parser_class_for_mime_type(
mime_type,
)
parser_class: type[DocumentParser] | None = get_parser_class_for_mime_type(
mime_type,
)
if not parser_class:
tempdir.cleanup()
@ -832,8 +828,8 @@ class ConsumerPlugin(
def _store(
self,
text: str,
date: Optional[datetime.datetime],
page_count: Optional[int],
date: datetime.datetime | None,
page_count: int | None,
mime_type: str,
) -> Document:
# If someone gave us the original filename, use it instead of doc.
@ -961,7 +957,7 @@ def parse_doc_title_w_placeholders(
owner_username: str,
local_added: datetime.datetime,
original_filename: str,
created: Optional[datetime.datetime] = None,
created: datetime.datetime | None = None,
) -> str:
"""
Available title placeholders for Workflows depend on what has already been assigned,

View File

@ -2,7 +2,6 @@ import dataclasses
import datetime
from enum import IntEnum
from pathlib import Path
from typing import Optional
import magic
from guardian.shortcuts import get_groups_with_perms
@ -17,20 +16,20 @@ class DocumentMetadataOverrides:
meaning no override is happening
"""
filename: Optional[str] = None
title: Optional[str] = None
correspondent_id: Optional[int] = None
document_type_id: Optional[int] = None
tag_ids: Optional[list[int]] = None
storage_path_id: Optional[int] = None
created: Optional[datetime.datetime] = None
asn: Optional[int] = None
owner_id: Optional[int] = None
view_users: Optional[list[int]] = None
view_groups: Optional[list[int]] = None
change_users: Optional[list[int]] = None
change_groups: Optional[list[int]] = None
custom_field_ids: Optional[list[int]] = None
filename: str | None = None
title: str | None = None
correspondent_id: int | None = None
document_type_id: int | None = None
tag_ids: list[int] | None = None
storage_path_id: int | None = None
created: datetime.datetime | None = None
asn: int | None = None
owner_id: int | None = None
view_users: list[int] | None = None
view_groups: list[int] | None = None
change_users: list[int] | None = None
change_groups: list[int] | None = None
custom_field_ids: list[int] | None = None
def update(self, other: "DocumentMetadataOverrides") -> "DocumentMetadataOverrides":
"""
@ -156,7 +155,7 @@ class ConsumableDocument:
source: DocumentSource
original_file: Path
mailrule_id: Optional[int] = None
mailrule_id: int | None = None
mime_type: str = dataclasses.field(init=False, default=None)
def __post_init__(self):

View File

@ -4,7 +4,6 @@ import os
import shutil
from pathlib import Path
from typing import Final
from typing import Optional
from django.conf import settings
from pikepdf import Pdf
@ -37,7 +36,7 @@ class CollatePlugin(NoCleanupPluginMixin, NoSetupPluginMixin, ConsumeTaskPlugin)
in self.input_doc.original_file.parts
)
def run(self) -> Optional[str]:
def run(self) -> str | None:
"""
Tries to collate pages from 2 single sided scans of a double sided
document.

View File

@ -2,9 +2,8 @@ import functools
import inspect
import json
import operator
from collections.abc import Callable
from contextlib import contextmanager
from typing import Callable
from typing import Union
from django.contrib.contenttypes.models import ContentType
from django.db.models import CharField
@ -332,7 +331,7 @@ class CustomFieldLookupParser:
`max_query_depth` and `max_atom_count` can be set to guard against generating arbitrarily
complex SQL queries.
"""
self._custom_fields: dict[Union[int, str], CustomField] = {}
self._custom_fields: dict[int | str, CustomField] = {}
self._validation_prefix = validation_prefix
# Dummy ModelSerializer used to convert a Django models.Field to serializers.Field.
self._model_serializer = serializers.ModelSerializer()
@ -366,7 +365,7 @@ class CustomFieldLookupParser:
Applies rule (1, 2, 3) or (4, 5, 6) based on the length of the expr.
"""
with self._track_query_depth():
if isinstance(expr, (list, tuple)):
if isinstance(expr, list | tuple):
if len(expr) == 2:
return self._parse_logical_expr(*expr)
elif len(expr) == 3:
@ -380,7 +379,7 @@ class CustomFieldLookupParser:
"""
Handles [`q0`, `q1`, ..., `qn`] in rule 4 & 5.
"""
if not isinstance(exprs, (list, tuple)) or not exprs:
if not isinstance(exprs, list | tuple) or not exprs:
raise serializers.ValidationError(
[_("Invalid expression list. Must be nonempty.")],
)

View File

@ -6,7 +6,6 @@ from contextlib import contextmanager
from datetime import datetime
from datetime import timezone
from shutil import rmtree
from typing import Optional
from django.conf import settings
from django.db.models import QuerySet
@ -389,7 +388,7 @@ def autocomplete(
ix: FileIndex,
term: str,
limit: int = 10,
user: Optional[User] = None,
user: User | None = None,
):
"""
Mimics whoosh.reading.IndexReader.most_distinctive_terms with permissions
@ -425,7 +424,7 @@ def autocomplete(
return terms
def get_permissions_criterias(user: Optional[User] = None):
def get_permissions_criterias(user: User | None = None):
user_criterias = [query.Term("has_owner", False)]
if user is not None:
if user.is_superuser: # superusers see all docs

View File

@ -251,7 +251,7 @@ class Command(BaseCommand):
self.handle_inotify(directory, recursive, options["testing"])
else:
if INotify is None and settings.CONSUMER_POLLING == 0: # pragma: no cover
logger.warn("Using polling as INotify import failed")
logger.warning("Using polling as INotify import failed")
self.handle_polling(directory, recursive, options["testing"])
logger.debug("Consumer exiting.")
@ -267,7 +267,7 @@ class Command(BaseCommand):
polling_interval = settings.CONSUMER_POLLING
if polling_interval == 0: # pragma: no cover
# Only happens if INotify failed to import
logger.warn("Using polling of 10s, consider setting this")
logger.warning("Using polling of 10s, consider setting this")
polling_interval = 10
with ThreadPoolExecutor(max_workers=4) as pool:

View File

@ -6,7 +6,6 @@ import tempfile
import time
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Optional
import tqdm
from django.conf import settings
@ -183,7 +182,7 @@ class Command(CryptMixin, BaseCommand):
self.zip_export: bool = options["zip"]
self.data_only: bool = options["data_only"]
self.no_progress_bar: bool = options["no_progress_bar"]
self.passphrase: Optional[str] = options.get("passphrase")
self.passphrase: str | None = options.get("passphrase")
self.files_in_export_dir: set[Path] = set()
self.exported_files: set[str] = set()
@ -427,7 +426,7 @@ class Command(CryptMixin, BaseCommand):
document: Document,
base_name: str,
document_dict: dict,
) -> tuple[Path, Optional[Path], Optional[Path]]:
) -> tuple[Path, Path | None, Path | None]:
"""
Generates the targets for a given document, including the original file, archive file and thumbnail (depending on settings).
"""
@ -461,8 +460,8 @@ class Command(CryptMixin, BaseCommand):
self,
document: Document,
original_target: Path,
thumbnail_target: Optional[Path],
archive_target: Optional[Path],
thumbnail_target: Path | None,
archive_target: Path | None,
) -> None:
"""
Copies files from the document storage location to the specified target location.
@ -512,7 +511,7 @@ class Command(CryptMixin, BaseCommand):
def check_and_copy(
self,
source: Path,
source_checksum: Optional[str],
source_checksum: str | None,
target: Path,
):
"""

View File

@ -3,7 +3,6 @@ import logging
import os
from contextlib import contextmanager
from pathlib import Path
from typing import Optional
import tqdm
from django.conf import settings
@ -228,8 +227,8 @@ class Command(CryptMixin, BaseCommand):
self.data_only: bool = options["data_only"]
self.no_progress_bar: bool = options["no_progress_bar"]
self.passphrase: str | None = options.get("passphrase")
self.version: Optional[str] = None
self.salt: Optional[str] = None
self.version: str | None = None
self.salt: str | None = None
self.manifest_paths = []
self.manifest = []

View File

@ -1,9 +1,7 @@
import base64
import os
from argparse import ArgumentParser
from typing import Optional
from typing import TypedDict
from typing import Union
from cryptography.fernet import Fernet
from cryptography.hazmat.primitives import hashes
@ -103,7 +101,7 @@ class CryptMixin:
},
]
def get_crypt_params(self) -> dict[str, dict[str, Union[str, int]]]:
def get_crypt_params(self) -> dict[str, dict[str, str | int]]:
return {
EXPORTER_CRYPTO_SETTINGS_NAME: {
EXPORTER_CRYPTO_ALGO_NAME: self.kdf_algorithm,
@ -128,7 +126,7 @@ class CryptMixin:
EXPORTER_CRYPTO_SALT_NAME
]
def setup_crypto(self, *, passphrase: str, salt: Optional[str] = None):
def setup_crypto(self, *, passphrase: str, salt: str | None = None):
"""
Constructs a class for encryption or decryption using the specified passphrase and salt

View File

@ -1,7 +1,6 @@
import logging
import re
from fnmatch import fnmatch
from typing import Union
from documents.classifier import DocumentClassifier
from documents.data_models import ConsumableDocument
@ -20,7 +19,7 @@ logger = logging.getLogger("paperless.matching")
def log_reason(
matching_model: Union[MatchingModel, WorkflowTrigger],
matching_model: MatchingModel | WorkflowTrigger,
document: Document,
reason: str,
):
@ -386,7 +385,7 @@ def existing_document_matches_workflow(
def document_matches_workflow(
document: Union[ConsumableDocument, Document],
document: ConsumableDocument | Document,
workflow: Workflow,
trigger_type: WorkflowTrigger.WorkflowTriggerType,
) -> bool:

View File

@ -5,7 +5,6 @@ import re
from collections import OrderedDict
from pathlib import Path
from typing import Final
from typing import Optional
import dateutil.parser
import pathvalidate
@ -326,7 +325,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
return self.archive_filename is not None
@property
def archive_path(self) -> Optional[Path]:
def archive_path(self) -> Path | None:
if self.has_archive_version:
return (settings.ARCHIVE_DIR / Path(str(self.archive_filename))).resolve()
else:

View File

@ -10,7 +10,6 @@ from collections.abc import Iterator
from functools import lru_cache
from pathlib import Path
from re import Match
from typing import Optional
from django.conf import settings
from django.utils import timezone
@ -107,7 +106,7 @@ def get_supported_file_extensions() -> set[str]:
return extensions
def get_parser_class_for_mime_type(mime_type: str) -> Optional[type["DocumentParser"]]:
def get_parser_class_for_mime_type(mime_type: str) -> type["DocumentParser"] | None:
"""
Returns the best parser (by weight) for the given mimetype or
None if no parser exists
@ -252,7 +251,7 @@ def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None) -> Path:
return out_path
def parse_date(filename, text) -> Optional[datetime.datetime]:
def parse_date(filename, text) -> datetime.datetime | None:
return next(parse_date_generator(filename, text), None)
@ -277,7 +276,7 @@ def parse_date_generator(filename, text) -> Iterator[datetime.datetime]:
},
)
def __filter(date: datetime.datetime) -> Optional[datetime.datetime]:
def __filter(date: datetime.datetime) -> datetime.datetime | None:
if (
date is not None
and date.year > 1900
@ -290,7 +289,7 @@ def parse_date_generator(filename, text) -> Iterator[datetime.datetime]:
def __process_match(
match: Match[str],
date_order: str,
) -> Optional[datetime.datetime]:
) -> datetime.datetime | None:
date_string = match.group(0)
try:
@ -339,7 +338,7 @@ class DocumentParser(LoggingMixin):
self.archive_path = None
self.text = None
self.date: Optional[datetime.datetime] = None
self.date: datetime.datetime | None = None
self.progress_callback = progress_callback
def progress(self, current_progress, max_progress):
@ -385,7 +384,7 @@ class DocumentParser(LoggingMixin):
def get_text(self):
return self.text
def get_date(self) -> Optional[datetime.datetime]:
def get_date(self) -> datetime.datetime | None:
return self.date
def cleanup(self):

View File

@ -1,7 +1,6 @@
import abc
from pathlib import Path
from typing import Final
from typing import Optional
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
@ -88,7 +87,7 @@ class ConsumeTaskPlugin(abc.ABC):
"""
@abc.abstractmethod
def run(self) -> Optional[str]:
def run(self) -> str | None:
"""
The bulk of plugin processing, this does whatever action the plugin is for.

View File

@ -1,7 +1,5 @@
import enum
from typing import TYPE_CHECKING
from typing import Optional
from typing import Union
from asgiref.sync import async_to_sync
from channels.layers import get_channel_layer
@ -23,9 +21,9 @@ class ProgressManager:
of the open/close of the layer to ensure messages go out and everything is cleaned up
"""
def __init__(self, filename: str, task_id: Optional[str] = None) -> None:
def __init__(self, filename: str, task_id: str | None = None) -> None:
self.filename = filename
self._channel: Optional[RedisPubSubChannelLayer] = None
self._channel: RedisPubSubChannelLayer | None = None
self.task_id = task_id
def __enter__(self):
@ -57,7 +55,7 @@ class ProgressManager:
message: str,
current_progress: int,
max_progress: int,
extra_args: Optional[dict[str, Union[str, int, None]]] = None,
extra_args: dict[str, str | int | None] | None = None,
) -> None:
# Ensure the layer is open
self.open()

View File

@ -1,7 +1,6 @@
import logging
import os
import shutil
from typing import Optional
from celery import states
from celery.signals import before_task_publish
@ -62,7 +61,7 @@ def _suggestion_printer(
suggestion_type: str,
document: Document,
selected: MatchingModel,
base_url: Optional[str] = None,
base_url: str | None = None,
):
"""
Smaller helper to reduce duplication when just outputting suggestions to the console
@ -80,7 +79,7 @@ def set_correspondent(
sender,
document: Document,
logging_group=None,
classifier: Optional[DocumentClassifier] = None,
classifier: DocumentClassifier | None = None,
replace=False,
use_first=True,
suggest=False,
@ -135,7 +134,7 @@ def set_document_type(
sender,
document: Document,
logging_group=None,
classifier: Optional[DocumentClassifier] = None,
classifier: DocumentClassifier | None = None,
replace=False,
use_first=True,
suggest=False,
@ -191,7 +190,7 @@ def set_tags(
sender,
document: Document,
logging_group=None,
classifier: Optional[DocumentClassifier] = None,
classifier: DocumentClassifier | None = None,
replace=False,
suggest=False,
base_url=None,
@ -246,7 +245,7 @@ def set_storage_path(
sender,
document: Document,
logging_group=None,
classifier: Optional[DocumentClassifier] = None,
classifier: DocumentClassifier | None = None,
replace=False,
use_first=True,
suggest=False,

View File

@ -5,7 +5,6 @@ import uuid
from datetime import timedelta
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Optional
import tqdm
from celery import Task
@ -106,7 +105,7 @@ def train_classifier():
def consume_file(
self: Task,
input_doc: ConsumableDocument,
overrides: Optional[DocumentMetadataOverrides] = None,
overrides: DocumentMetadataOverrides | None = None,
):
# Default no overrides
if overrides is None:

View File

@ -1,7 +1,7 @@
import json
import re
from collections.abc import Callable
from datetime import date
from typing import Callable
from unittest.mock import Mock
from urllib.parse import quote

View File

@ -2,7 +2,6 @@ import datetime as dt
import os
import shutil
from pathlib import Path
from typing import Union
from unittest import mock
from django.test import TestCase
@ -34,7 +33,7 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.dirs.double_sided_dir.mkdir()
self.staging_file = self.dirs.scratch_dir / STAGING_FILE_NAME
def consume_file(self, srcname, dstname: Union[str, Path] = "foo.pdf"):
def consume_file(self, srcname, dstname: str | Path = "foo.pdf"):
"""
Starts the consume process and also ensures the
destination file does not exist afterwards

View File

@ -3,7 +3,6 @@ import importlib
import os
import shutil
from pathlib import Path
from typing import Optional
from unittest import mock
from django.conf import settings
@ -66,8 +65,8 @@ def make_test_document(
mime_type: str,
original: str,
original_filename: str,
archive: Optional[str] = None,
archive_filename: Optional[str] = None,
archive: str | None = None,
archive_filename: str | None = None,
):
doc = document_class()
doc.filename = original_filename

View File

@ -1,10 +1,9 @@
import importlib
import shutil
import tempfile
from collections.abc import Callable
from collections.abc import Iterable
from pathlib import Path
from typing import Callable
from typing import Union
from unittest import mock
from django.test import override_settings
@ -115,7 +114,7 @@ class TestMigrateToEncrytpedWebPThumbnails(TestMigrations):
def assert_file_count_by_extension(
self,
ext: str,
dir: Union[str, Path],
dir: str | Path,
expected_count: int,
):
"""

View File

@ -1,10 +1,9 @@
import importlib
import shutil
import tempfile
from collections.abc import Callable
from collections.abc import Iterable
from pathlib import Path
from typing import Callable
from typing import Union
from unittest import mock
from django.test import override_settings
@ -86,7 +85,7 @@ class TestMigrateWebPThumbnails(TestMigrations):
def assert_file_count_by_extension(
self,
ext: str,
dir: Union[str, Path],
dir: str | Path,
expected_count: int,
):
"""

View File

@ -3,15 +3,13 @@ import tempfile
import time
import warnings
from collections import namedtuple
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Iterator
from contextlib import contextmanager
from os import PathLike
from pathlib import Path
from typing import Any
from typing import Callable
from typing import Optional
from typing import Union
from unittest import mock
import httpx
@ -91,7 +89,7 @@ def paperless_environment():
def util_call_with_backoff(
method_or_callable: Callable,
args: Union[list, tuple],
args: list | tuple,
*,
skip_on_50x_err=True,
) -> tuple[bool, Any]:
@ -170,22 +168,22 @@ class FileSystemAssertsMixin:
Utilities for checks various state information of the file system
"""
def assertIsFile(self, path: Union[PathLike, str]):
def assertIsFile(self, path: PathLike | str):
self.assertTrue(Path(path).resolve().is_file(), f"File does not exist: {path}")
def assertIsNotFile(self, path: Union[PathLike, str]):
def assertIsNotFile(self, path: PathLike | str):
self.assertFalse(Path(path).resolve().is_file(), f"File does exist: {path}")
def assertIsDir(self, path: Union[PathLike, str]):
def assertIsDir(self, path: PathLike | str):
self.assertTrue(Path(path).resolve().is_dir(), f"Dir does not exist: {path}")
def assertIsNotDir(self, path: Union[PathLike, str]):
def assertIsNotDir(self, path: PathLike | str):
self.assertFalse(Path(path).resolve().is_dir(), f"Dir does exist: {path}")
def assertFilesEqual(
self,
path1: Union[PathLike, str],
path2: Union[PathLike, str],
path1: PathLike | str,
path2: PathLike | str,
):
path1 = Path(path1)
path2 = Path(path2)
@ -196,7 +194,7 @@ class FileSystemAssertsMixin:
self.assertEqual(hash1, hash2, "File SHA256 mismatch")
def assertFileCountInDir(self, path: Union[PathLike, str], count: int):
def assertFileCountInDir(self, path: PathLike | str, count: int):
path = Path(path).resolve()
self.assertTrue(path.is_dir(), f"Path {path} is not a directory")
files = [x for x in path.iterdir() if x.is_file()]
@ -340,7 +338,7 @@ class GetConsumerMixin:
def get_consumer(
self,
filepath: Path,
overrides: Union[DocumentMetadataOverrides, None] = None,
overrides: DocumentMetadataOverrides | None = None,
source: DocumentSource = DocumentSource.ConsumeFolder,
) -> Generator[ConsumerPlugin, None, None]:
# Store this for verification
@ -368,7 +366,7 @@ class DummyProgressManager:
mock.patch("documents.tasks.ProgressManager", DummyProgressManager)
"""
def __init__(self, filename: str, task_id: Optional[str] = None) -> None:
def __init__(self, filename: str, task_id: str | None = None) -> None:
self.filename = filename
self.task_id = task_id
self.payloads = []
@ -392,7 +390,7 @@ class DummyProgressManager:
message: str,
current_progress: int,
max_progress: int,
extra_args: Optional[dict[str, Union[str, int]]] = None,
extra_args: dict[str, str | int] | None = None,
) -> None:
# Ensure the layer is open
self.open()

View File

@ -4,21 +4,19 @@ from os import utime
from pathlib import Path
from subprocess import CompletedProcess
from subprocess import run
from typing import Optional
from typing import Union
from django.conf import settings
from PIL import Image
def _coerce_to_path(
source: Union[Path, str],
dest: Union[Path, str],
source: Path | str,
dest: Path | str,
) -> tuple[Path, Path]:
return Path(source).resolve(), Path(dest).resolve()
def copy_basic_file_stats(source: Union[Path, str], dest: Union[Path, str]) -> None:
def copy_basic_file_stats(source: Path | str, dest: Path | str) -> None:
"""
Copies only the m_time and a_time attributes from source to destination.
Both are expected to exist.
@ -33,8 +31,8 @@ def copy_basic_file_stats(source: Union[Path, str], dest: Union[Path, str]) -> N
def copy_file_with_basic_stats(
source: Union[Path, str],
dest: Union[Path, str],
source: Path | str,
dest: Path | str,
) -> None:
"""
A sort of simpler copy2 that doesn't copy extended file attributes,
@ -53,7 +51,7 @@ def maybe_override_pixel_limit() -> None:
"""
Maybe overrides the PIL limit on pixel count, if configured to allow it
"""
limit: Optional[Union[float, int]] = settings.MAX_IMAGE_PIXELS
limit: float | int | None = settings.MAX_IMAGE_PIXELS
if limit is not None and limit >= 0:
pixel_count = limit
if pixel_count == 0:
@ -63,8 +61,8 @@ def maybe_override_pixel_limit() -> None:
def run_subprocess(
arguments: list[str],
env: Optional[dict[str, str]] = None,
logger: Optional[logging.Logger] = None,
env: dict[str, str] | None = None,
logger: logging.Logger | None = None,
*,
check_exit_code: bool = True,
log_stdout: bool = True,

View File

@ -1638,9 +1638,8 @@ class RemoteVersionView(GenericAPIView):
try:
remote_json = json.loads(remote)
remote_version = remote_json["tag_name"]
# Basically PEP 616 but that only went in 3.9
if remote_version.startswith("ngx-"):
remote_version = remote_version[len("ngx-") :]
# Some early tags used ngx-x.y.z
remote_version = remote_version.removeprefix("ngx-")
except ValueError:
logger.debug("An error occurred parsing remote version json")
except urllib.error.URLError:

View File

@ -1,6 +1,5 @@
import dataclasses
import json
from typing import Optional
from django.conf import settings
@ -44,18 +43,18 @@ class OcrConfig(OutputTypeConfig):
correspond almost directly to the OCRMyPDF options
"""
pages: Optional[int] = dataclasses.field(init=False)
pages: int | None = dataclasses.field(init=False)
language: str = dataclasses.field(init=False)
mode: str = dataclasses.field(init=False)
skip_archive_file: str = dataclasses.field(init=False)
image_dpi: Optional[int] = dataclasses.field(init=False)
image_dpi: int | None = dataclasses.field(init=False)
clean: str = dataclasses.field(init=False)
deskew: bool = dataclasses.field(init=False)
rotate: bool = dataclasses.field(init=False)
rotate_threshold: float = dataclasses.field(init=False)
max_image_pixel: Optional[float] = dataclasses.field(init=False)
max_image_pixel: float | None = dataclasses.field(init=False)
color_conversion_strategy: str = dataclasses.field(init=False)
user_args: Optional[dict[str, str]] = dataclasses.field(init=False)
user_args: dict[str, str] | None = dataclasses.field(init=False)
def __post_init__(self) -> None:
super().__post_init__()

View File

@ -9,8 +9,6 @@ from os import PathLike
from pathlib import Path
from platform import machine
from typing import Final
from typing import Optional
from typing import Union
from urllib.parse import urlparse
from celery.schedules import crontab
@ -57,7 +55,7 @@ def __get_int(key: str, default: int) -> int:
return int(os.getenv(key, default))
def __get_optional_int(key: str) -> Optional[int]:
def __get_optional_int(key: str) -> int | None:
"""
Returns None if the environment key is not present, otherwise an integer
"""
@ -75,7 +73,7 @@ def __get_float(key: str, default: float) -> float:
def __get_path(
key: str,
default: Union[PathLike, str],
default: PathLike | str,
) -> Path:
"""
Return a normalized, absolute path based on the environment variable or a default,
@ -86,7 +84,7 @@ def __get_path(
return Path(default).resolve()
def __get_optional_path(key: str) -> Optional[Path]:
def __get_optional_path(key: str) -> Path | None:
"""
Returns None if the environment key is not present, otherwise a fully resolved Path
"""
@ -97,7 +95,7 @@ def __get_optional_path(key: str) -> Optional[Path]:
def __get_list(
key: str,
default: Optional[list[str]] = None,
default: list[str] | None = None,
sep: str = ",",
) -> list[str]:
"""
@ -112,7 +110,7 @@ def __get_list(
return []
def _parse_redis_url(env_redis: Optional[str]) -> tuple[str, str]:
def _parse_redis_url(env_redis: str | None) -> tuple[str, str]:
"""
Gets the Redis information from the environment or a default and handles
converting from incompatible django_channels and celery formats.
@ -989,7 +987,7 @@ OCR_ROTATE_PAGES_THRESHOLD: Final[float] = __get_float(
12.0,
)
OCR_MAX_IMAGE_PIXELS: Final[Optional[int]] = __get_optional_int(
OCR_MAX_IMAGE_PIXELS: Final[int | None] = __get_optional_int(
"PAPERLESS_OCR_MAX_IMAGE_PIXELS",
)
@ -1000,7 +998,7 @@ OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
MAX_IMAGE_PIXELS: Final[Optional[int]] = __get_optional_int(
MAX_IMAGE_PIXELS: Final[int | None] = __get_optional_int(
"PAPERLESS_MAX_IMAGE_PIXELS",
)
@ -1128,7 +1126,7 @@ APP_LOGO = os.getenv("PAPERLESS_APP_LOGO", None)
###############################################################################
def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]:
def _get_nltk_language_setting(ocr_lang: str) -> str | None:
"""
Maps an ISO-639-1 language code supported by Tesseract into
an optional NLTK language name. This is the set of common supported
@ -1165,7 +1163,7 @@ def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]:
NLTK_ENABLED: Final[bool] = __get_boolean("PAPERLESS_ENABLE_NLTK", "yes")
NLTK_LANGUAGE: Optional[str] = _get_nltk_language_setting(OCR_LANGUAGE)
NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)
###############################################################################
# Email (SMTP) Backend #
@ -1187,7 +1185,7 @@ if DEBUG: # pragma: no cover
# Email Preprocessors #
###############################################################################
EMAIL_GNUPG_HOME: Final[Optional[str]] = os.getenv("PAPERLESS_EMAIL_GNUPG_HOME")
EMAIL_GNUPG_HOME: Final[str | None] = os.getenv("PAPERLESS_EMAIL_GNUPG_HOME")
EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = __get_boolean(
"PAPERLESS_ENABLE_GPG_DECRYPTOR",
)

View File

@ -10,8 +10,6 @@ from datetime import timedelta
from fnmatch import fnmatch
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Optional
from typing import Union
import magic
import pathvalidate
@ -84,7 +82,7 @@ class BaseMailAction:
read mails when the action is to mark mails as read).
"""
def get_criteria(self) -> Union[dict, LogicOperator]:
def get_criteria(self) -> dict | LogicOperator:
"""
Returns filtering criteria/query for this mail action.
"""
@ -453,7 +451,7 @@ class MailAccountHandler(LoggingMixin):
else:
self.log.debug(f"Skipping mail preprocessor {preprocessor_type.NAME}")
def _correspondent_from_name(self, name: str) -> Optional[Correspondent]:
def _correspondent_from_name(self, name: str) -> Correspondent | None:
try:
return Correspondent.objects.get_or_create(name=name)[0]
except DatabaseError as e:
@ -465,7 +463,7 @@ class MailAccountHandler(LoggingMixin):
message: MailMessage,
att: MailAttachment,
rule: MailRule,
) -> Optional[str]:
) -> str | None:
if rule.assign_title_from == MailRule.TitleSource.FROM_SUBJECT:
return message.subject
@ -484,7 +482,7 @@ class MailAccountHandler(LoggingMixin):
self,
message: MailMessage,
rule: MailRule,
) -> Optional[Correspondent]:
) -> Correspondent | None:
c_from = rule.assign_correspondent_from
if c_from == MailRule.CorrespondentSource.FROM_NOTHING:
@ -688,7 +686,7 @@ class MailAccountHandler(LoggingMixin):
def filename_inclusion_matches(
self,
filter_attachment_filename_include: Optional[str],
filter_attachment_filename_include: str | None,
filename: str,
) -> bool:
if filter_attachment_filename_include:
@ -707,7 +705,7 @@ class MailAccountHandler(LoggingMixin):
def filename_exclusion_matches(
self,
filter_attachment_filename_exclude: Optional[str],
filter_attachment_filename_exclude: str | None,
filename: str,
) -> bool:
if filter_attachment_filename_exclude:

View File

@ -1,7 +1,6 @@
import re
from html import escape
from pathlib import Path
from typing import Optional
from bleach import clean
from bleach import linkify
@ -33,7 +32,7 @@ class MailDocumentParser(DocumentParser):
logging_name = "paperless.parsing.mail"
def _settings_to_gotenberg_pdfa(self) -> Optional[PdfAFormat]:
def _settings_to_gotenberg_pdfa(self) -> PdfAFormat | None:
"""
Converts our requested PDF/A output into the Gotenberg API
format
@ -44,7 +43,7 @@ class MailDocumentParser(DocumentParser):
}:
return PdfAFormat.A2b
elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1: # pragma: no cover
self.log.warn(
self.log.warning(
"Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
)
return PdfAFormat.A2b

View File

@ -4,8 +4,6 @@ import random
import uuid
from collections import namedtuple
from contextlib import AbstractContextManager
from typing import Optional
from typing import Union
from unittest import mock
import pytest
@ -199,11 +197,11 @@ class MessageBuilder:
def create_message(
self,
attachments: Union[int, list[_AttachmentDef]] = 1,
attachments: int | list[_AttachmentDef] = 1,
body: str = "",
subject: str = "the subject",
from_: str = "no_one@mail.com",
to: Optional[list[str]] = None,
to: list[str] | None = None,
seen: bool = False,
flagged: bool = False,
processed: bool = False,
@ -622,8 +620,8 @@ class TestMail(
@dataclasses.dataclass(frozen=True)
class FilterTestCase:
name: str
include_pattern: Optional[str]
exclude_pattern: Optional[str]
include_pattern: str | None
exclude_pattern: str | None
expected_matches: list[str]
tests = [

View File

@ -3,7 +3,6 @@ import re
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Optional
from django.conf import settings
from PIL import Image
@ -124,7 +123,7 @@ class RasterisedDocumentParser(DocumentParser):
)
return no_alpha_image
def get_dpi(self, image) -> Optional[int]:
def get_dpi(self, image) -> int | None:
try:
with Image.open(image) as im:
x, y = im.info["dpi"]
@ -133,7 +132,7 @@ class RasterisedDocumentParser(DocumentParser):
self.log.warning(f"Error while getting DPI from image {image}: {e}")
return None
def calculate_a4_dpi(self, image) -> Optional[int]:
def calculate_a4_dpi(self, image) -> int | None:
try:
with Image.open(image) as im:
width, height = im.size
@ -148,9 +147,9 @@ class RasterisedDocumentParser(DocumentParser):
def extract_text(
self,
sidecar_file: Optional[Path],
sidecar_file: Path | None,
pdf_file: Path,
) -> Optional[str]:
) -> str | None:
# When re-doing OCR, the sidecar contains ONLY the new text, not
# the whole text, so do not utilize it in that case
if (

View File

@ -102,7 +102,7 @@ class TikaDocumentParser(DocumentParser):
}:
route.pdf_format(PdfAFormat.A2b)
elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1:
self.log.warn(
self.log.warning(
"Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
)
route.pdf_format(PdfAFormat.A2b)