Chore: Switch from os.path to pathlib.Path (#8325)

--------- Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
2025-12-16 01:31:09 -06:00 · 2025-01-06 21:12:27 +01:00
parent d06aac947d
commit 935d077836
11 changed files with 178 additions and 142 deletions
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -38,20 +38,14 @@ ignore = ["DJ001", "SIM105", "RUF012"]
 [lint.per-file-ignores]
 ".github/scripts/*.py" = ["E501", "INP001", "SIM117"]
 "docker/wait-for-redis.py" = ["INP001", "T201"]
 "src/documents/barcodes.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/classifier.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/consumer.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/file_handling.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/index.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/management/commands/decrypt_documents.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/management/commands/document_consumer.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/management/commands/document_exporter.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/management/commands/document_importer.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/migrations/0012_auto_20160305_0040.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/migrations/0014_document_checksum.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/migrations/1003_mime_types.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/migrations/1012_fix_archive_files.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/migrations/1037_webp_encrypted_thumbnail_conversion.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/models.py" = ["SIM115", "PTH"]  # TODO PTH Enable & remove
 "src/documents/parsers.py" = ["PTH"]  # TODO Enable & remove
 "src/documents/signals/handlers.py" = ["PTH"]  # TODO Enable & remove
--- a/src/documents/barcodes.py
+++ b/src/documents/barcodes.py
@@ -3,6 +3,7 @@ import re
 import tempfile
 from dataclasses import dataclass
 from pathlib import Path
 from typing import TYPE_CHECKING
 from django.conf import settings
 from pdf2image import convert_from_path
@@ -21,6 +22,9 @@ from documents.utils import copy_basic_file_stats
 from documents.utils import copy_file_with_basic_stats
 from documents.utils import maybe_override_pixel_limit
 if TYPE_CHECKING:
    from collections.abc import Callable
 logger = logging.getLogger("paperless.barcodes")
@@ -61,7 +65,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
          - Barcode support is enabled and the mime type is supported
        """
        if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
-            supported_mimes = {"application/pdf", "image/tiff"}
+            supported_mimes: set[str] = {"application/pdf", "image/tiff"}
        else:
            supported_mimes = {"application/pdf"}
@@ -71,16 +75,16 @@ class BarcodePlugin(ConsumeTaskPlugin):
            or settings.CONSUMER_ENABLE_TAG_BARCODE
        ) and self.input_doc.mime_type in supported_mimes
-    def setup(self):
+    def setup(self) -> None:
        self.temp_dir = tempfile.TemporaryDirectory(
            dir=self.base_tmp_dir,
            prefix="barcode",
        )
-        self.pdf_file = self.input_doc.original_file
+        self.pdf_file: Path = self.input_doc.original_file
        self._tiff_conversion_done = False
        self.barcodes: list[Barcode] = []
-    def run(self) -> str | None:
+    def run(self) -> None:
        # Some operations may use PIL, override pixel setting if needed
        maybe_override_pixel_limit()
@@ -158,7 +162,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
    def cleanup(self) -> None:
        self.temp_dir.cleanup()
-    def convert_from_tiff_to_pdf(self):
+    def convert_from_tiff_to_pdf(self) -> None:
        """
        May convert a TIFF image into a PDF, if the input is a TIFF and
        the TIFF has not been made into a PDF
@@ -223,7 +227,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
        # Choose the library for reading
        if settings.CONSUMER_BARCODE_SCANNER == "PYZBAR":
-            reader = self.read_barcodes_pyzbar
+            reader: Callable[[Image.Image], list[str]] = self.read_barcodes_pyzbar
            logger.debug("Scanning for barcodes using PYZBAR")
        else:
            reader = self.read_barcodes_zxing
@@ -236,7 +240,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
            logger.debug(f"PDF has {num_of_pages} pages")
            # Get limit from configuration
-            barcode_max_pages = (
+            barcode_max_pages: int = (
                num_of_pages
                if settings.CONSUMER_BARCODE_MAX_PAGES == 0
                else settings.CONSUMER_BARCODE_MAX_PAGES
@@ -311,7 +315,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
        self.detect()
        # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
-        asn_text = next(
+        asn_text: str | None = next(
            (x.value for x in self.barcodes if x.is_asn),
            None,
        )
@@ -333,36 +337,36 @@ class BarcodePlugin(ConsumeTaskPlugin):
        return asn
    @property
-    def tags(self) -> list[int] | None:
+    def tags(self) -> list[int]:
        """
        Search the parsed barcodes for any tags.
        Returns the detected tag ids (or empty list)
        """
-        tags = []
+        tags: list[int] = []
        # Ensure the barcodes have been read
        self.detect()
        for x in self.barcodes:
-            tag_texts = x.value
+            tag_texts: str = x.value
            for raw in tag_texts.split(","):
                try:
-                    tag = None
+                    tag_str: str | None = None
                    for regex in settings.CONSUMER_TAG_BARCODE_MAPPING:
                        if re.match(regex, raw, flags=re.IGNORECASE):
                            sub = settings.CONSUMER_TAG_BARCODE_MAPPING[regex]
-                            tag = (
+                            tag_str = (
                                re.sub(regex, sub, raw, flags=re.IGNORECASE)
                                if sub
                                else raw
                            )
                            break
-                    if tag:
+                    if tag_str:
                        tag, _ = Tag.objects.get_or_create(
-                            name__iexact=tag,
+                            name__iexact=tag_str,
-                            defaults={"name": tag},
+                            defaults={"name": tag_str},
                        )
                        logger.debug(
@@ -413,7 +417,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
        """
        document_paths = []
-        fname = self.input_doc.original_file.stem
+        fname: str = self.input_doc.original_file.stem
        with Pdf.open(self.pdf_file) as input_pdf:
            # Start with an empty document
            current_document: list[Page] = []
@@ -432,7 +436,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
                logger.debug(f"Starting new document at idx {idx}")
                current_document = []
                documents.append(current_document)
-                keep_page = pages_to_split_on[idx]
+                keep_page: bool = pages_to_split_on[idx]
                if keep_page:
                    # Keep the page
                    # (new document is started by asn barcode)
@@ -451,7 +455,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
                logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages")
                savepath = Path(self.temp_dir.name) / output_filename
-                with open(savepath, "wb") as out:
+                with savepath.open("wb") as out:
                    dst.save(out)
                copy_basic_file_stats(self.input_doc.original_file, savepath)
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -1,16 +1,17 @@
 import logging
 import os
 import pickle
 import re
 import warnings
 from collections.abc import Iterator
 from hashlib import sha256
 from pathlib import Path
 from typing import TYPE_CHECKING
 from typing import Optional
 if TYPE_CHECKING:
    from datetime import datetime
-    from pathlib import Path
+
    from numpy import ndarray
 from django.conf import settings
 from django.core.cache import cache
@@ -28,7 +29,7 @@ logger = logging.getLogger("paperless.classifier")
 class IncompatibleClassifierVersionError(Exception):
    def __init__(self, message: str, *args: object) -> None:
-        self.message = message
+        self.message: str = message
        super().__init__(*args)
@@ -36,8 +37,8 @@ class ClassifierModelCorruptError(Exception):
    pass
-def load_classifier() -> Optional["DocumentClassifier"]:
+def load_classifier(*, raise_exception: bool = False) -> Optional["DocumentClassifier"]:
-    if not os.path.isfile(settings.MODEL_FILE):
+    if not settings.MODEL_FILE.is_file():
        logger.debug(
            "Document classification model does not exist (yet), not "
            "performing automatic matching.",
@@ -50,22 +51,30 @@ def load_classifier() -> Optional["DocumentClassifier"]:
    except IncompatibleClassifierVersionError as e:
        logger.info(f"Classifier version incompatible: {e.message}, will re-train")
-        os.unlink(settings.MODEL_FILE)
+        Path(settings.MODEL_FILE).unlink()
        classifier = None
-    except ClassifierModelCorruptError:
+        if raise_exception:
            raise e
    except ClassifierModelCorruptError as e:
        # there's something wrong with the model file.
        logger.exception(
            "Unrecoverable error while loading document "
            "classification model, deleting model file.",
        )
-        os.unlink(settings.MODEL_FILE)
+        Path(settings.MODEL_FILE).unlink
        classifier = None
-    except OSError:
+        if raise_exception:
            raise e
    except OSError as e:
        logger.exception("IO error while loading document classification model")
        classifier = None
-    except Exception:  # pragma: no cover
+        if raise_exception:
            raise e
    except Exception as e:  # pragma: no cover
        logger.exception("Unknown error while loading document classification model")
        classifier = None
        if raise_exception:
            raise e
    return classifier
@@ -76,7 +85,7 @@ class DocumentClassifier:
    # v9 - Changed from hashing to time/ids for re-train check
    FORMAT_VERSION = 9
-    def __init__(self):
+    def __init__(self) -> None:
        # last time a document changed and therefore training might be required
        self.last_doc_change_time: datetime | None = None
        # Hash of primary keys of AUTO matching values last used in training
@@ -95,7 +104,7 @@ class DocumentClassifier:
    def load(self) -> None:
        # Catch warnings for processing
        with warnings.catch_warnings(record=True) as w:
-            with open(settings.MODEL_FILE, "rb") as f:
+            with Path(settings.MODEL_FILE).open("rb") as f:
                schema_version = pickle.load(f)
                if schema_version != self.FORMAT_VERSION:
@@ -132,11 +141,11 @@ class DocumentClassifier:
                ):
                    raise IncompatibleClassifierVersionError("sklearn version update")
-    def save(self):
+    def save(self) -> None:
        target_file: Path = settings.MODEL_FILE
-        target_file_temp = target_file.with_suffix(".pickle.part")
+        target_file_temp: Path = target_file.with_suffix(".pickle.part")
-        with open(target_file_temp, "wb") as f:
+        with target_file_temp.open("wb") as f:
            pickle.dump(self.FORMAT_VERSION, f)
            pickle.dump(self.last_doc_change_time, f)
@@ -153,7 +162,7 @@ class DocumentClassifier:
        target_file_temp.rename(target_file)
-    def train(self):
+    def train(self) -> bool:
        # Get non-inbox documents
        docs_queryset = (
            Document.objects.exclude(
@@ -190,7 +199,7 @@ class DocumentClassifier:
            hasher.update(y.to_bytes(4, "little", signed=True))
            labels_correspondent.append(y)
-            tags = sorted(
+            tags: list[int] = sorted(
                tag.pk
                for tag in doc.tags.filter(
                    matching_algorithm=MatchingModel.MATCH_AUTO,
@@ -236,9 +245,9 @@ class DocumentClassifier:
        # union with {-1} accounts for cases where all documents have
        # correspondents and types assigned, so -1 isn't part of labels_x, which
        # it usually is.
-        num_correspondents = len(set(labels_correspondent) | {-1}) - 1
+        num_correspondents: int = len(set(labels_correspondent) | {-1}) - 1
-        num_document_types = len(set(labels_document_type) | {-1}) - 1
+        num_document_types: int = len(set(labels_document_type) | {-1}) - 1
-        num_storage_paths = len(set(labels_storage_path) | {-1}) - 1
+        num_storage_paths: int = len(set(labels_storage_path) | {-1}) - 1
        logger.debug(
            f"{docs_queryset.count()} documents, {num_tags} tag(s), {num_correspondents} correspondent(s), "
@@ -266,7 +275,9 @@ class DocumentClassifier:
            min_df=0.01,
        )
-        data_vectorized = self.data_vectorizer.fit_transform(content_generator())
+        data_vectorized: ndarray = self.data_vectorizer.fit_transform(
            content_generator(),
        )
        # See the notes here:
        # https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html
@@ -284,7 +295,7 @@ class DocumentClassifier:
                    label[0] if len(label) == 1 else -1 for label in labels_tags
                ]
                self.tags_binarizer = LabelBinarizer()
-                labels_tags_vectorized = self.tags_binarizer.fit_transform(
+                labels_tags_vectorized: ndarray = self.tags_binarizer.fit_transform(
                    labels_tags,
                ).ravel()
            else:
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -1,11 +1,11 @@
 import logging
 import math
 import os
 from collections import Counter
 from contextlib import contextmanager
 from datetime import datetime
 from datetime import timezone
 from shutil import rmtree
 from typing import Literal
 from django.conf import settings
 from django.db.models import QuerySet
@@ -47,7 +47,7 @@ from documents.models import User
 logger = logging.getLogger("paperless.index")
-def get_schema():
+def get_schema() -> Schema:
    return Schema(
        id=NUMERIC(stored=True, unique=True),
        title=TEXT(sortable=True),
@@ -93,7 +93,7 @@ def open_index(recreate=False) -> FileIndex:
        logger.exception("Error while opening the index, recreating.")
    # create_in doesn't handle corrupted indexes very well, remove the directory entirely first
-    if os.path.isdir(settings.INDEX_DIR):
+    if settings.INDEX_DIR.is_dir():
        rmtree(settings.INDEX_DIR)
    settings.INDEX_DIR.mkdir(parents=True, exist_ok=True)
@@ -123,7 +123,7 @@ def open_index_searcher() -> Searcher:
        searcher.close()
-def update_document(writer: AsyncWriter, doc: Document):
+def update_document(writer: AsyncWriter, doc: Document) -> None:
    tags = ",".join([t.name for t in doc.tags.all()])
    tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
    notes = ",".join([str(c.note) for c in Note.objects.filter(document=doc)])
@@ -133,7 +133,7 @@ def update_document(writer: AsyncWriter, doc: Document):
    custom_fields_ids = ",".join(
        [str(f.field.id) for f in CustomFieldInstance.objects.filter(document=doc)],
    )
-    asn = doc.archive_serial_number
+    asn: int | None = doc.archive_serial_number
    if asn is not None and (
        asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
        or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
@@ -149,7 +149,7 @@ def update_document(writer: AsyncWriter, doc: Document):
        doc,
        only_with_perms_in=["view_document"],
    )
-    viewer_ids = ",".join([str(u.id) for u in users_with_perms])
+    viewer_ids: str = ",".join([str(u.id) for u in users_with_perms])
    writer.update_document(
        id=doc.pk,
        title=doc.title,
@@ -187,20 +187,20 @@ def update_document(writer: AsyncWriter, doc: Document):
    )
-def remove_document(writer: AsyncWriter, doc: Document):
+def remove_document(writer: AsyncWriter, doc: Document) -> None:
    remove_document_by_id(writer, doc.pk)
-def remove_document_by_id(writer: AsyncWriter, doc_id):
+def remove_document_by_id(writer: AsyncWriter, doc_id) -> None:
    writer.delete_by_term("id", doc_id)
-def add_or_update_document(document: Document):
+def add_or_update_document(document: Document) -> None:
    with open_index_writer() as writer:
        update_document(writer, document)
-def remove_document_from_index(document: Document):
+def remove_document_from_index(document: Document) -> None:
    with open_index_writer() as writer:
        remove_document(writer, document)
@@ -218,11 +218,11 @@ class MappedDocIdSet(DocIdSet):
        self.document_ids = BitSet(document_ids, size=max_id)
        self.ixreader = ixreader
-    def __contains__(self, docnum):
+    def __contains__(self, docnum) -> bool:
        document_id = self.ixreader.stored_fields(docnum)["id"]
        return document_id in self.document_ids
-    def __bool__(self):
+    def __bool__(self) -> Literal[True]:
        # searcher.search ignores a filter if it's "falsy".
        # We use this hack so this DocIdSet, when used as a filter, is never ignored.
        return True
@@ -232,13 +232,13 @@ class DelayedQuery:
    def _get_query(self):
        raise NotImplementedError  # pragma: no cover
-    def _get_query_sortedby(self):
+    def _get_query_sortedby(self) -> tuple[None, Literal[False]] | tuple[str, bool]:
        if "ordering" not in self.query_params:
            return None, False
        field: str = self.query_params["ordering"]
-        sort_fields_map = {
+        sort_fields_map: dict[str, str] = {
            "created": "created",
            "modified": "modified",
            "added": "added",
@@ -268,7 +268,7 @@ class DelayedQuery:
        query_params,
        page_size,
        filter_queryset: QuerySet,
-    ):
+    ) -> None:
        self.searcher = searcher
        self.query_params = query_params
        self.page_size = page_size
@@ -276,7 +276,7 @@ class DelayedQuery:
        self.first_score = None
        self.filter_queryset = filter_queryset
-    def __len__(self):
+    def __len__(self) -> int:
        page = self[0:1]
        return len(page)
@@ -334,7 +334,7 @@ class LocalDateParser(English):
 class DelayedFullTextQuery(DelayedQuery):
-    def _get_query(self):
+    def _get_query(self) -> tuple:
        q_str = self.query_params["query"]
        qp = MultifieldParser(
            [
@@ -364,7 +364,7 @@ class DelayedFullTextQuery(DelayedQuery):
 class DelayedMoreLikeThisQuery(DelayedQuery):
-    def _get_query(self):
+    def _get_query(self) -> tuple:
        more_like_doc_id = int(self.query_params["more_like_id"])
        content = Document.objects.get(id=more_like_doc_id).content
@@ -379,7 +379,7 @@ class DelayedMoreLikeThisQuery(DelayedQuery):
        q = query.Or(
            [query.Term("content", word, boost=weight) for word, weight in kts],
        )
-        mask = {docnum}
+        mask: set = {docnum}
        return q, mask
@@ -389,7 +389,7 @@ def autocomplete(
    term: str,
    limit: int = 10,
    user: User | None = None,
-):
+) -> list:
    """
    Mimics whoosh.reading.IndexReader.most_distinctive_terms with permissions
    and without scoring
@@ -402,7 +402,7 @@ def autocomplete(
        # content field query instead and return bogus, not text data
        qp.remove_plugin_class(FieldsPlugin)
        q = qp.parse(f"{term.lower()}*")
-        user_criterias = get_permissions_criterias(user)
+        user_criterias: list = get_permissions_criterias(user)
        results = s.search(
            q,
@@ -417,14 +417,14 @@ def autocomplete(
                    termCounts[match] += 1
            terms = [t for t, _ in termCounts.most_common(limit)]
-        term_encoded = term.encode("UTF-8")
+        term_encoded: bytes = term.encode("UTF-8")
        if term_encoded in terms:
            terms.insert(0, terms.pop(terms.index(term_encoded)))
    return terms
-def get_permissions_criterias(user: User | None = None):
+def get_permissions_criterias(user: User | None = None) -> list:
    user_criterias = [query.Term("has_owner", False)]
    if user is not None:
        if user.is_superuser:  # superusers see all docs
--- a/src/documents/management/commands/decrypt_documents.py
+++ b/src/documents/management/commands/decrypt_documents.py
@@ -1,4 +1,4 @@
-import os
+from pathlib import Path
 from django.conf import settings
 from django.core.management.base import BaseCommand
@@ -14,7 +14,7 @@ class Command(BaseCommand):
        "state to an unencrypted one (or vice-versa)"
    )
-    def add_arguments(self, parser):
+    def add_arguments(self, parser) -> None:
        parser.add_argument(
            "--passphrase",
            help=(
@@ -23,7 +23,7 @@ class Command(BaseCommand):
            ),
        )
-    def handle(self, *args, **options):
+    def handle(self, *args, **options) -> None:
        try:
            self.stdout.write(
                self.style.WARNING(
@@ -52,7 +52,7 @@ class Command(BaseCommand):
        self.__gpg_to_unencrypted(passphrase)
-    def __gpg_to_unencrypted(self, passphrase: str):
+    def __gpg_to_unencrypted(self, passphrase: str) -> None:
        encrypted_files = Document.objects.filter(
            storage_type=Document.STORAGE_TYPE_GPG,
        )
@@ -69,7 +69,7 @@ class Command(BaseCommand):
            document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-            ext = os.path.splitext(document.filename)[1]
+            ext: str = Path(document.filename).suffix
            if not ext == ".gpg":
                raise CommandError(
@@ -77,12 +77,12 @@ class Command(BaseCommand):
                    f"end with .gpg",
                )
-            document.filename = os.path.splitext(document.filename)[0]
+            document.filename = Path(document.filename).stem
-            with open(document.source_path, "wb") as f:
+            with document.source_path.open("wb") as f:
                f.write(raw_document)
-            with open(document.thumbnail_path, "wb") as f:
+            with document.thumbnail_path.open("wb") as f:
                f.write(raw_thumb)
            Document.objects.filter(id=document.id).update(
@@ -91,4 +91,4 @@ class Command(BaseCommand):
            )
            for path in old_paths:
-                os.unlink(path)
+                path.unlink()
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -1,6 +1,7 @@
 import json
 import logging
 import os
 from collections.abc import Generator
 from contextlib import contextmanager
 from pathlib import Path
@@ -44,7 +45,7 @@ if settings.AUDIT_LOG_ENABLED:
@contextmanager
-def disable_signal(sig, receiver, sender):
+def disable_signal(sig, receiver, sender) -> Generator:
    try:
        sig.disconnect(receiver=receiver, sender=sender)
        yield
@@ -58,7 +59,7 @@ class Command(CryptMixin, BaseCommand):
        "documents it refers to."
    )
-    def add_arguments(self, parser):
+    def add_arguments(self, parser) -> None:
        parser.add_argument("source")
        parser.add_argument(
@@ -90,7 +91,7 @@ class Command(CryptMixin, BaseCommand):
        - Are there existing users or documents in the database?
        """
-        def pre_check_maybe_not_empty():
+        def pre_check_maybe_not_empty() -> None:
            # Skip this check if operating only on the database
            # We can expect data to exist in that case
            if not self.data_only:
@@ -122,7 +123,7 @@ class Command(CryptMixin, BaseCommand):
                    ),
                )
-        def pre_check_manifest_exists():
+        def pre_check_manifest_exists() -> None:
            if not (self.source / "manifest.json").exists():
                raise CommandError(
                    "That directory doesn't appear to contain a manifest.json file.",
@@ -141,7 +142,7 @@ class Command(CryptMixin, BaseCommand):
        """
        Loads manifest data from the various JSON files for parsing and loading the database
        """
-        main_manifest_path = self.source / "manifest.json"
+        main_manifest_path: Path = self.source / "manifest.json"
        with main_manifest_path.open() as infile:
            self.manifest = json.load(infile)
@@ -158,8 +159,8 @@ class Command(CryptMixin, BaseCommand):
        Must account for the old style of export as well, with just version.json
        """
-        version_path = self.source / "version.json"
+        version_path: Path = self.source / "version.json"
-        metadata_path = self.source / "metadata.json"
+        metadata_path: Path = self.source / "metadata.json"
        if not version_path.exists() and not metadata_path.exists():
            self.stdout.write(
                self.style.NOTICE("No version.json or metadata.json file located"),
@@ -221,7 +222,7 @@ class Command(CryptMixin, BaseCommand):
                )
                raise e
-    def handle(self, *args, **options):
+    def handle(self, *args, **options) -> None:
        logging.getLogger().handlers[0].level = logging.ERROR
        self.source = Path(options["source"]).resolve()
@@ -290,13 +291,13 @@ class Command(CryptMixin, BaseCommand):
            no_progress_bar=self.no_progress_bar,
        )
-    def check_manifest_validity(self):
+    def check_manifest_validity(self) -> None:
        """
        Attempts to verify the manifest is valid.  Namely checking the files
        referred to exist and the files can be read from
        """
-        def check_document_validity(document_record: dict):
+        def check_document_validity(document_record: dict) -> None:
            if EXPORTER_FILE_NAME not in document_record:
                raise CommandError(
                    "The manifest file contains a record which does not "
@@ -341,7 +342,7 @@ class Command(CryptMixin, BaseCommand):
            if not self.data_only and record["model"] == "documents.document":
                check_document_validity(record)
-    def _import_files_from_manifest(self):
+    def _import_files_from_manifest(self) -> None:
        settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
        settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True)
        settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
@@ -356,24 +357,24 @@ class Command(CryptMixin, BaseCommand):
            document = Document.objects.get(pk=record["pk"])
            doc_file = record[EXPORTER_FILE_NAME]
-            document_path = os.path.join(self.source, doc_file)
+            document_path = self.source / doc_file
            if EXPORTER_THUMBNAIL_NAME in record:
                thumb_file = record[EXPORTER_THUMBNAIL_NAME]
-                thumbnail_path = Path(os.path.join(self.source, thumb_file)).resolve()
+                thumbnail_path = (self.source / thumb_file).resolve()
            else:
                thumbnail_path = None
            if EXPORTER_ARCHIVE_NAME in record:
                archive_file = record[EXPORTER_ARCHIVE_NAME]
-                archive_path = os.path.join(self.source, archive_file)
+                archive_path = self.source / archive_file
            else:
                archive_path = None
            document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
            with FileLock(settings.MEDIA_LOCK):
-                if os.path.isfile(document.source_path):
+                if Path(document.source_path).is_file():
                    raise FileExistsError(document.source_path)
                create_source_path_directory(document.source_path)
@@ -418,8 +419,8 @@ class Command(CryptMixin, BaseCommand):
            had_at_least_one_record = False
            for crypt_config in self.CRYPT_FIELDS:
-                importer_model = crypt_config["model_name"]
+                importer_model: str = crypt_config["model_name"]
-                crypt_fields = crypt_config["fields"]
+                crypt_fields: str = crypt_config["fields"]
                for record in filter(
                    lambda x: x["model"] == importer_model,
                    self.manifest,
--- a/src/documents/migrations/1037_webp_encrypted_thumbnail_conversion.py
+++ b/src/documents/migrations/1037_webp_encrypted_thumbnail_conversion.py
@@ -15,7 +15,7 @@ from documents.parsers import run_convert
 logger = logging.getLogger("paperless.migrations")
-def _do_convert(work_package):
+def _do_convert(work_package) -> None:
    (
        existing_encrypted_thumbnail,
        converted_encrypted_thumbnail,
@@ -30,13 +30,13 @@ def _do_convert(work_package):
        # Decrypt png
        decrypted_thumbnail = existing_encrypted_thumbnail.with_suffix("").resolve()
-        with open(existing_encrypted_thumbnail, "rb") as existing_encrypted_file:
+        with existing_encrypted_thumbnail.open("rb") as existing_encrypted_file:
            raw_thumb = gpg.decrypt_file(
                existing_encrypted_file,
                passphrase=passphrase,
                always_trust=True,
            ).data
-            with open(decrypted_thumbnail, "wb") as decrypted_file:
+            with Path(decrypted_thumbnail).open("wb") as decrypted_file:
                decrypted_file.write(raw_thumb)
        converted_decrypted_thumbnail = Path(
@@ -62,7 +62,7 @@ def _do_convert(work_package):
        )
        # Encrypt webp
-        with open(converted_decrypted_thumbnail, "rb") as converted_decrypted_file:
+        with Path(converted_decrypted_thumbnail).open("rb") as converted_decrypted_file:
            encrypted = gpg.encrypt_file(
                fileobj_or_path=converted_decrypted_file,
                recipients=None,
@@ -71,7 +71,9 @@ def _do_convert(work_package):
                always_trust=True,
            ).data
-            with open(converted_encrypted_thumbnail, "wb") as converted_encrypted_file:
+            with Path(converted_encrypted_thumbnail).open(
                "wb",
            ) as converted_encrypted_file:
                converted_encrypted_file.write(encrypted)
        # Copy newly created thumbnail to thumbnail directory
@@ -95,8 +97,8 @@ def _do_convert(work_package):
        logger.error(f"Error converting thumbnail (existing file unchanged): {e}")
-def _convert_encrypted_thumbnails_to_webp(apps, schema_editor):
+def _convert_encrypted_thumbnails_to_webp(apps, schema_editor) -> None:
-    start = time.time()
+    start: float = time.time()
    with tempfile.TemporaryDirectory() as tempdir:
        work_packages = []
@@ -111,15 +113,15 @@ def _convert_encrypted_thumbnails_to_webp(apps, schema_editor):
                )
            for file in Path(settings.THUMBNAIL_DIR).glob("*.png.gpg"):
-                existing_thumbnail = file.resolve()
+                existing_thumbnail: Path = file.resolve()
                # Change the existing filename suffix from png to webp
-                converted_thumbnail_name = Path(
+                converted_thumbnail_name: str = Path(
                    str(existing_thumbnail).replace(".png.gpg", ".webp.gpg"),
                ).name
                # Create the expected output filename in the tempdir
-                converted_thumbnail = (
+                converted_thumbnail: Path = (
                    Path(tempdir) / Path(converted_thumbnail_name)
                ).resolve()
@@ -143,8 +145,8 @@ def _convert_encrypted_thumbnails_to_webp(apps, schema_editor):
                ) as pool:
                    pool.map(_do_convert, work_packages)
-                    end = time.time()
+                    end: float = time.time()
-                    duration = end - start
+                    duration: float = end - start
                logger.info(f"Conversion completed in {duration:.3f}s")
--- a/src/documents/tests/test_api_status.py
+++ b/src/documents/tests/test_api_status.py
@@ -173,7 +173,7 @@ class TestSystemStatus(APITestCase):
        self.assertEqual(response.data["tasks"]["index_status"], "OK")
        self.assertIsNotNone(response.data["tasks"]["index_last_modified"])
-    @override_settings(INDEX_DIR="/tmp/index/")
+    @override_settings(INDEX_DIR=Path("/tmp/index/"))
    @mock.patch("documents.index.open_index", autospec=True)
    def test_system_status_index_error(self, mock_open_index):
        """
@@ -193,7 +193,7 @@ class TestSystemStatus(APITestCase):
        self.assertEqual(response.data["tasks"]["index_status"], "ERROR")
        self.assertIsNotNone(response.data["tasks"]["index_error"])
-    @override_settings(DATA_DIR="/tmp/does_not_exist/data/")
+    @override_settings(DATA_DIR=Path("/tmp/does_not_exist/data/"))
    def test_system_status_classifier_ok(self):
        """
        GIVEN:
@@ -222,7 +222,7 @@ class TestSystemStatus(APITestCase):
        THEN:
            - The response contains an WARNING classifier status
        """
-        with override_settings(MODEL_FILE="does_not_exist"):
+        with override_settings(MODEL_FILE=Path("does_not_exist")):
            Document.objects.create(
                title="Test Document",
            )
@@ -233,7 +233,11 @@ class TestSystemStatus(APITestCase):
            self.assertEqual(response.data["tasks"]["classifier_status"], "WARNING")
            self.assertIsNotNone(response.data["tasks"]["classifier_error"])
-    def test_system_status_classifier_error(self):
+    @mock.patch(
        "documents.classifier.load_classifier",
        side_effect=ClassifierModelCorruptError(),
    )
    def test_system_status_classifier_error(self, mock_load_classifier):
        """
        GIVEN:
            - The classifier does exist but is corrupt
@@ -248,25 +252,23 @@ class TestSystemStatus(APITestCase):
                dir="/tmp",
                delete=False,
            ) as does_exist,
-            override_settings(MODEL_FILE=does_exist),
+            override_settings(MODEL_FILE=Path(does_exist.name)),
        ):
-            with mock.patch("documents.classifier.load_classifier") as mock_load:
+            Document.objects.create(
-                mock_load.side_effect = ClassifierModelCorruptError()
+                title="Test Document",
-                Document.objects.create(
+            )
-                    title="Test Document",
+            Tag.objects.create(
-                )
+                name="Test Tag",
-                Tag.objects.create(
+                matching_algorithm=Tag.MATCH_AUTO,
-                    name="Test Tag",
+            )
-                    matching_algorithm=Tag.MATCH_AUTO,
+            self.client.force_login(self.user)
-                )
+            response = self.client.get(self.ENDPOINT)
-                self.client.force_login(self.user)
+            self.assertEqual(response.status_code, status.HTTP_200_OK)
-                response = self.client.get(self.ENDPOINT)
+            self.assertEqual(
-                self.assertEqual(response.status_code, status.HTTP_200_OK)
+                response.data["tasks"]["classifier_status"],
-                self.assertEqual(
+                "ERROR",
-                    response.data["tasks"]["classifier_status"],
+            )
-                    "ERROR",
+            self.assertIsNotNone(response.data["tasks"]["classifier_error"])
                )
                self.assertIsNotNone(response.data["tasks"]["classifier_error"])
    def test_system_status_classifier_ok_no_objects(self):
        """
@@ -278,7 +280,7 @@ class TestSystemStatus(APITestCase):
        THEN:
            - The response contains an OK classifier status
        """
-        with override_settings(MODEL_FILE="does_not_exist"):
+        with override_settings(MODEL_FILE=Path("does_not_exist")):
            self.client.force_login(self.user)
            response = self.client.get(self.ENDPOINT)
            self.assertEqual(response.status_code, status.HTTP_200_OK)
--- a/src/documents/tests/test_classifier.py
+++ b/src/documents/tests/test_classifier.py
@@ -650,7 +650,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
        Path(settings.MODEL_FILE).touch()
        self.assertTrue(os.path.exists(settings.MODEL_FILE))
-        load.side_effect = IncompatibleClassifierVersionError("Dummey Error")
+        load.side_effect = IncompatibleClassifierVersionError("Dummy Error")
        self.assertIsNone(load_classifier())
        self.assertFalse(os.path.exists(settings.MODEL_FILE))
@@ -673,3 +673,25 @@ class TestClassifier(DirectoriesMixin, TestCase):
        ):
            classifier = load_classifier()
            self.assertIsNone(classifier)
    @mock.patch("documents.classifier.DocumentClassifier.load")
    def test_load_classifier_raise_exception(self, mock_load):
        Path(settings.MODEL_FILE).touch()
        mock_load.side_effect = IncompatibleClassifierVersionError("Dummy Error")
        with self.assertRaises(IncompatibleClassifierVersionError):
            load_classifier(raise_exception=True)
        Path(settings.MODEL_FILE).touch()
        mock_load.side_effect = ClassifierModelCorruptError()
        with self.assertRaises(ClassifierModelCorruptError):
            load_classifier(raise_exception=True)
        Path(settings.MODEL_FILE).touch()
        mock_load.side_effect = OSError()
        with self.assertRaises(OSError):
            load_classifier(raise_exception=True)
        Path(settings.MODEL_FILE).touch()
        mock_load.side_effect = Exception()
        with self.assertRaises(Exception):
            load_classifier(raise_exception=True)
--- a/src/documents/tests/test_management.py
+++ b/src/documents/tests/test_management.py
@@ -108,18 +108,18 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
 class TestDecryptDocuments(FileSystemAssertsMixin, TestCase):
    @override_settings(
-        ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
+        ORIGINALS_DIR=(Path(__file__).parent / "samples" / "originals"),
-        THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
+        THUMBNAIL_DIR=(Path(__file__).parent / "samples" / "thumb"),
        PASSPHRASE="test",
        FILENAME_FORMAT=None,
    )
    @mock.patch("documents.management.commands.decrypt_documents.input")
    def test_decrypt(self, m):
        media_dir = tempfile.mkdtemp()
-        originals_dir = os.path.join(media_dir, "documents", "originals")
+        originals_dir = Path(media_dir) / "documents" / "originals"
-        thumb_dir = os.path.join(media_dir, "documents", "thumbnails")
+        thumb_dir = Path(media_dir) / "documents" / "thumbnails"
-        os.makedirs(originals_dir, exist_ok=True)
+        originals_dir.mkdir(parents=True, exist_ok=True)
-        os.makedirs(thumb_dir, exist_ok=True)
+        thumb_dir.mkdir(parents=True, exist_ok=True)
        override_settings(
            ORIGINALS_DIR=originals_dir,
@@ -143,7 +143,7 @@ class TestDecryptDocuments(FileSystemAssertsMixin, TestCase):
                "originals",
                "0000004.pdf.gpg",
            ),
-            os.path.join(originals_dir, "0000004.pdf.gpg"),
+            originals_dir / "0000004.pdf.gpg",
        )
        shutil.copy(
            os.path.join(
@@ -153,7 +153,7 @@ class TestDecryptDocuments(FileSystemAssertsMixin, TestCase):
                "thumbnails",
                "0000004.webp.gpg",
            ),
-            os.path.join(thumb_dir, f"{doc.id:07}.webp.gpg"),
+            thumb_dir / f"{doc.id:07}.webp.gpg",
        )
        call_command("decrypt_documents")
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -2139,7 +2139,7 @@ class SystemStatusView(PassUserMixin):
        classifier_error = None
        classifier_status = None
        try:
-            classifier = load_classifier()
+            classifier = load_classifier(raise_exception=True)
            if classifier is None:
                # Make sure classifier should exist
                docs_queryset = Document.objects.exclude(
@@ -2159,7 +2159,7 @@ class SystemStatusView(PassUserMixin):
                            matching_algorithm=Tag.MATCH_AUTO,
                        ).exists()
                    )
-                    and not os.path.isfile(settings.MODEL_FILE)
+                    and not settings.MODEL_FILE.exists()
                ):
                    # if classifier file doesn't exist just classify as a warning
                    classifier_error = "Classifier file does not exist (yet). Re-training may be pending."