Chore: Switch from os.path to pathlib.Path (#8325)

---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
Sebastian Steinbeißer 2025-01-06 21:12:27 +01:00 committed by GitHub
parent d06aac947d
commit 935d077836
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 178 additions and 142 deletions

View File

@ -38,20 +38,14 @@ ignore = ["DJ001", "SIM105", "RUF012"]
[lint.per-file-ignores]
".github/scripts/*.py" = ["E501", "INP001", "SIM117"]
"docker/wait-for-redis.py" = ["INP001", "T201"]
"src/documents/barcodes.py" = ["PTH"] # TODO Enable & remove
"src/documents/classifier.py" = ["PTH"] # TODO Enable & remove
"src/documents/consumer.py" = ["PTH"] # TODO Enable & remove
"src/documents/file_handling.py" = ["PTH"] # TODO Enable & remove
"src/documents/index.py" = ["PTH"] # TODO Enable & remove
"src/documents/management/commands/decrypt_documents.py" = ["PTH"] # TODO Enable & remove
"src/documents/management/commands/document_consumer.py" = ["PTH"] # TODO Enable & remove
"src/documents/management/commands/document_exporter.py" = ["PTH"] # TODO Enable & remove
"src/documents/management/commands/document_importer.py" = ["PTH"] # TODO Enable & remove
"src/documents/migrations/0012_auto_20160305_0040.py" = ["PTH"] # TODO Enable & remove
"src/documents/migrations/0014_document_checksum.py" = ["PTH"] # TODO Enable & remove
"src/documents/migrations/1003_mime_types.py" = ["PTH"] # TODO Enable & remove
"src/documents/migrations/1012_fix_archive_files.py" = ["PTH"] # TODO Enable & remove
"src/documents/migrations/1037_webp_encrypted_thumbnail_conversion.py" = ["PTH"] # TODO Enable & remove
"src/documents/models.py" = ["SIM115", "PTH"] # TODO PTH Enable & remove
"src/documents/parsers.py" = ["PTH"] # TODO Enable & remove
"src/documents/signals/handlers.py" = ["PTH"] # TODO Enable & remove

View File

@ -3,6 +3,7 @@ import re
import tempfile
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING
from django.conf import settings
from pdf2image import convert_from_path
@ -21,6 +22,9 @@ from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
from documents.utils import maybe_override_pixel_limit
if TYPE_CHECKING:
from collections.abc import Callable
logger = logging.getLogger("paperless.barcodes")
@ -61,7 +65,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
- Barcode support is enabled and the mime type is supported
"""
if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
supported_mimes = {"application/pdf", "image/tiff"}
supported_mimes: set[str] = {"application/pdf", "image/tiff"}
else:
supported_mimes = {"application/pdf"}
@ -71,16 +75,16 @@ class BarcodePlugin(ConsumeTaskPlugin):
or settings.CONSUMER_ENABLE_TAG_BARCODE
) and self.input_doc.mime_type in supported_mimes
def setup(self):
def setup(self) -> None:
self.temp_dir = tempfile.TemporaryDirectory(
dir=self.base_tmp_dir,
prefix="barcode",
)
self.pdf_file = self.input_doc.original_file
self.pdf_file: Path = self.input_doc.original_file
self._tiff_conversion_done = False
self.barcodes: list[Barcode] = []
def run(self) -> str | None:
def run(self) -> None:
# Some operations may use PIL, override pixel setting if needed
maybe_override_pixel_limit()
@ -158,7 +162,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
def cleanup(self) -> None:
self.temp_dir.cleanup()
def convert_from_tiff_to_pdf(self):
def convert_from_tiff_to_pdf(self) -> None:
"""
May convert a TIFF image into a PDF, if the input is a TIFF and
the TIFF has not been made into a PDF
@ -223,7 +227,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
# Choose the library for reading
if settings.CONSUMER_BARCODE_SCANNER == "PYZBAR":
reader = self.read_barcodes_pyzbar
reader: Callable[[Image.Image], list[str]] = self.read_barcodes_pyzbar
logger.debug("Scanning for barcodes using PYZBAR")
else:
reader = self.read_barcodes_zxing
@ -236,7 +240,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
logger.debug(f"PDF has {num_of_pages} pages")
# Get limit from configuration
barcode_max_pages = (
barcode_max_pages: int = (
num_of_pages
if settings.CONSUMER_BARCODE_MAX_PAGES == 0
else settings.CONSUMER_BARCODE_MAX_PAGES
@ -311,7 +315,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
self.detect()
# get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
asn_text = next(
asn_text: str | None = next(
(x.value for x in self.barcodes if x.is_asn),
None,
)
@ -333,36 +337,36 @@ class BarcodePlugin(ConsumeTaskPlugin):
return asn
@property
def tags(self) -> list[int] | None:
def tags(self) -> list[int]:
"""
Search the parsed barcodes for any tags.
Returns the detected tag ids (or empty list)
"""
tags = []
tags: list[int] = []
# Ensure the barcodes have been read
self.detect()
for x in self.barcodes:
tag_texts = x.value
tag_texts: str = x.value
for raw in tag_texts.split(","):
try:
tag = None
tag_str: str | None = None
for regex in settings.CONSUMER_TAG_BARCODE_MAPPING:
if re.match(regex, raw, flags=re.IGNORECASE):
sub = settings.CONSUMER_TAG_BARCODE_MAPPING[regex]
tag = (
tag_str = (
re.sub(regex, sub, raw, flags=re.IGNORECASE)
if sub
else raw
)
break
if tag:
if tag_str:
tag, _ = Tag.objects.get_or_create(
name__iexact=tag,
defaults={"name": tag},
name__iexact=tag_str,
defaults={"name": tag_str},
)
logger.debug(
@ -413,7 +417,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
"""
document_paths = []
fname = self.input_doc.original_file.stem
fname: str = self.input_doc.original_file.stem
with Pdf.open(self.pdf_file) as input_pdf:
# Start with an empty document
current_document: list[Page] = []
@ -432,7 +436,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
logger.debug(f"Starting new document at idx {idx}")
current_document = []
documents.append(current_document)
keep_page = pages_to_split_on[idx]
keep_page: bool = pages_to_split_on[idx]
if keep_page:
# Keep the page
# (new document is started by asn barcode)
@ -451,7 +455,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages")
savepath = Path(self.temp_dir.name) / output_filename
with open(savepath, "wb") as out:
with savepath.open("wb") as out:
dst.save(out)
copy_basic_file_stats(self.input_doc.original_file, savepath)

View File

@ -1,16 +1,17 @@
import logging
import os
import pickle
import re
import warnings
from collections.abc import Iterator
from hashlib import sha256
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Optional
if TYPE_CHECKING:
from datetime import datetime
from pathlib import Path
from numpy import ndarray
from django.conf import settings
from django.core.cache import cache
@ -28,7 +29,7 @@ logger = logging.getLogger("paperless.classifier")
class IncompatibleClassifierVersionError(Exception):
def __init__(self, message: str, *args: object) -> None:
self.message = message
self.message: str = message
super().__init__(*args)
@ -36,8 +37,8 @@ class ClassifierModelCorruptError(Exception):
pass
def load_classifier() -> Optional["DocumentClassifier"]:
if not os.path.isfile(settings.MODEL_FILE):
def load_classifier(*, raise_exception: bool = False) -> Optional["DocumentClassifier"]:
if not settings.MODEL_FILE.is_file():
logger.debug(
"Document classification model does not exist (yet), not "
"performing automatic matching.",
@ -50,22 +51,30 @@ def load_classifier() -> Optional["DocumentClassifier"]:
except IncompatibleClassifierVersionError as e:
logger.info(f"Classifier version incompatible: {e.message}, will re-train")
os.unlink(settings.MODEL_FILE)
Path(settings.MODEL_FILE).unlink()
classifier = None
except ClassifierModelCorruptError:
if raise_exception:
raise e
except ClassifierModelCorruptError as e:
# there's something wrong with the model file.
logger.exception(
"Unrecoverable error while loading document "
"classification model, deleting model file.",
)
os.unlink(settings.MODEL_FILE)
Path(settings.MODEL_FILE).unlink
classifier = None
except OSError:
if raise_exception:
raise e
except OSError as e:
logger.exception("IO error while loading document classification model")
classifier = None
except Exception: # pragma: no cover
if raise_exception:
raise e
except Exception as e: # pragma: no cover
logger.exception("Unknown error while loading document classification model")
classifier = None
if raise_exception:
raise e
return classifier
@ -76,7 +85,7 @@ class DocumentClassifier:
# v9 - Changed from hashing to time/ids for re-train check
FORMAT_VERSION = 9
def __init__(self):
def __init__(self) -> None:
# last time a document changed and therefore training might be required
self.last_doc_change_time: datetime | None = None
# Hash of primary keys of AUTO matching values last used in training
@ -95,7 +104,7 @@ class DocumentClassifier:
def load(self) -> None:
# Catch warnings for processing
with warnings.catch_warnings(record=True) as w:
with open(settings.MODEL_FILE, "rb") as f:
with Path(settings.MODEL_FILE).open("rb") as f:
schema_version = pickle.load(f)
if schema_version != self.FORMAT_VERSION:
@ -132,11 +141,11 @@ class DocumentClassifier:
):
raise IncompatibleClassifierVersionError("sklearn version update")
def save(self):
def save(self) -> None:
target_file: Path = settings.MODEL_FILE
target_file_temp = target_file.with_suffix(".pickle.part")
target_file_temp: Path = target_file.with_suffix(".pickle.part")
with open(target_file_temp, "wb") as f:
with target_file_temp.open("wb") as f:
pickle.dump(self.FORMAT_VERSION, f)
pickle.dump(self.last_doc_change_time, f)
@ -153,7 +162,7 @@ class DocumentClassifier:
target_file_temp.rename(target_file)
def train(self):
def train(self) -> bool:
# Get non-inbox documents
docs_queryset = (
Document.objects.exclude(
@ -190,7 +199,7 @@ class DocumentClassifier:
hasher.update(y.to_bytes(4, "little", signed=True))
labels_correspondent.append(y)
tags = sorted(
tags: list[int] = sorted(
tag.pk
for tag in doc.tags.filter(
matching_algorithm=MatchingModel.MATCH_AUTO,
@ -236,9 +245,9 @@ class DocumentClassifier:
# union with {-1} accounts for cases where all documents have
# correspondents and types assigned, so -1 isn't part of labels_x, which
# it usually is.
num_correspondents = len(set(labels_correspondent) | {-1}) - 1
num_document_types = len(set(labels_document_type) | {-1}) - 1
num_storage_paths = len(set(labels_storage_path) | {-1}) - 1
num_correspondents: int = len(set(labels_correspondent) | {-1}) - 1
num_document_types: int = len(set(labels_document_type) | {-1}) - 1
num_storage_paths: int = len(set(labels_storage_path) | {-1}) - 1
logger.debug(
f"{docs_queryset.count()} documents, {num_tags} tag(s), {num_correspondents} correspondent(s), "
@ -266,7 +275,9 @@ class DocumentClassifier:
min_df=0.01,
)
data_vectorized = self.data_vectorizer.fit_transform(content_generator())
data_vectorized: ndarray = self.data_vectorizer.fit_transform(
content_generator(),
)
# See the notes here:
# https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html
@ -284,7 +295,7 @@ class DocumentClassifier:
label[0] if len(label) == 1 else -1 for label in labels_tags
]
self.tags_binarizer = LabelBinarizer()
labels_tags_vectorized = self.tags_binarizer.fit_transform(
labels_tags_vectorized: ndarray = self.tags_binarizer.fit_transform(
labels_tags,
).ravel()
else:

View File

@ -1,11 +1,11 @@
import logging
import math
import os
from collections import Counter
from contextlib import contextmanager
from datetime import datetime
from datetime import timezone
from shutil import rmtree
from typing import Literal
from django.conf import settings
from django.db.models import QuerySet
@ -47,7 +47,7 @@ from documents.models import User
logger = logging.getLogger("paperless.index")
def get_schema():
def get_schema() -> Schema:
return Schema(
id=NUMERIC(stored=True, unique=True),
title=TEXT(sortable=True),
@ -93,7 +93,7 @@ def open_index(recreate=False) -> FileIndex:
logger.exception("Error while opening the index, recreating.")
# create_in doesn't handle corrupted indexes very well, remove the directory entirely first
if os.path.isdir(settings.INDEX_DIR):
if settings.INDEX_DIR.is_dir():
rmtree(settings.INDEX_DIR)
settings.INDEX_DIR.mkdir(parents=True, exist_ok=True)
@ -123,7 +123,7 @@ def open_index_searcher() -> Searcher:
searcher.close()
def update_document(writer: AsyncWriter, doc: Document):
def update_document(writer: AsyncWriter, doc: Document) -> None:
tags = ",".join([t.name for t in doc.tags.all()])
tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
notes = ",".join([str(c.note) for c in Note.objects.filter(document=doc)])
@ -133,7 +133,7 @@ def update_document(writer: AsyncWriter, doc: Document):
custom_fields_ids = ",".join(
[str(f.field.id) for f in CustomFieldInstance.objects.filter(document=doc)],
)
asn = doc.archive_serial_number
asn: int | None = doc.archive_serial_number
if asn is not None and (
asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
@ -149,7 +149,7 @@ def update_document(writer: AsyncWriter, doc: Document):
doc,
only_with_perms_in=["view_document"],
)
viewer_ids = ",".join([str(u.id) for u in users_with_perms])
viewer_ids: str = ",".join([str(u.id) for u in users_with_perms])
writer.update_document(
id=doc.pk,
title=doc.title,
@ -187,20 +187,20 @@ def update_document(writer: AsyncWriter, doc: Document):
)
def remove_document(writer: AsyncWriter, doc: Document):
def remove_document(writer: AsyncWriter, doc: Document) -> None:
remove_document_by_id(writer, doc.pk)
def remove_document_by_id(writer: AsyncWriter, doc_id):
def remove_document_by_id(writer: AsyncWriter, doc_id) -> None:
writer.delete_by_term("id", doc_id)
def add_or_update_document(document: Document):
def add_or_update_document(document: Document) -> None:
with open_index_writer() as writer:
update_document(writer, document)
def remove_document_from_index(document: Document):
def remove_document_from_index(document: Document) -> None:
with open_index_writer() as writer:
remove_document(writer, document)
@ -218,11 +218,11 @@ class MappedDocIdSet(DocIdSet):
self.document_ids = BitSet(document_ids, size=max_id)
self.ixreader = ixreader
def __contains__(self, docnum):
def __contains__(self, docnum) -> bool:
document_id = self.ixreader.stored_fields(docnum)["id"]
return document_id in self.document_ids
def __bool__(self):
def __bool__(self) -> Literal[True]:
# searcher.search ignores a filter if it's "falsy".
# We use this hack so this DocIdSet, when used as a filter, is never ignored.
return True
@ -232,13 +232,13 @@ class DelayedQuery:
def _get_query(self):
raise NotImplementedError # pragma: no cover
def _get_query_sortedby(self):
def _get_query_sortedby(self) -> tuple[None, Literal[False]] | tuple[str, bool]:
if "ordering" not in self.query_params:
return None, False
field: str = self.query_params["ordering"]
sort_fields_map = {
sort_fields_map: dict[str, str] = {
"created": "created",
"modified": "modified",
"added": "added",
@ -268,7 +268,7 @@ class DelayedQuery:
query_params,
page_size,
filter_queryset: QuerySet,
):
) -> None:
self.searcher = searcher
self.query_params = query_params
self.page_size = page_size
@ -276,7 +276,7 @@ class DelayedQuery:
self.first_score = None
self.filter_queryset = filter_queryset
def __len__(self):
def __len__(self) -> int:
page = self[0:1]
return len(page)
@ -334,7 +334,7 @@ class LocalDateParser(English):
class DelayedFullTextQuery(DelayedQuery):
def _get_query(self):
def _get_query(self) -> tuple:
q_str = self.query_params["query"]
qp = MultifieldParser(
[
@ -364,7 +364,7 @@ class DelayedFullTextQuery(DelayedQuery):
class DelayedMoreLikeThisQuery(DelayedQuery):
def _get_query(self):
def _get_query(self) -> tuple:
more_like_doc_id = int(self.query_params["more_like_id"])
content = Document.objects.get(id=more_like_doc_id).content
@ -379,7 +379,7 @@ class DelayedMoreLikeThisQuery(DelayedQuery):
q = query.Or(
[query.Term("content", word, boost=weight) for word, weight in kts],
)
mask = {docnum}
mask: set = {docnum}
return q, mask
@ -389,7 +389,7 @@ def autocomplete(
term: str,
limit: int = 10,
user: User | None = None,
):
) -> list:
"""
Mimics whoosh.reading.IndexReader.most_distinctive_terms with permissions
and without scoring
@ -402,7 +402,7 @@ def autocomplete(
# content field query instead and return bogus, not text data
qp.remove_plugin_class(FieldsPlugin)
q = qp.parse(f"{term.lower()}*")
user_criterias = get_permissions_criterias(user)
user_criterias: list = get_permissions_criterias(user)
results = s.search(
q,
@ -417,14 +417,14 @@ def autocomplete(
termCounts[match] += 1
terms = [t for t, _ in termCounts.most_common(limit)]
term_encoded = term.encode("UTF-8")
term_encoded: bytes = term.encode("UTF-8")
if term_encoded in terms:
terms.insert(0, terms.pop(terms.index(term_encoded)))
return terms
def get_permissions_criterias(user: User | None = None):
def get_permissions_criterias(user: User | None = None) -> list:
user_criterias = [query.Term("has_owner", False)]
if user is not None:
if user.is_superuser: # superusers see all docs

View File

@ -1,4 +1,4 @@
import os
from pathlib import Path
from django.conf import settings
from django.core.management.base import BaseCommand
@ -14,7 +14,7 @@ class Command(BaseCommand):
"state to an unencrypted one (or vice-versa)"
)
def add_arguments(self, parser):
def add_arguments(self, parser) -> None:
parser.add_argument(
"--passphrase",
help=(
@ -23,7 +23,7 @@ class Command(BaseCommand):
),
)
def handle(self, *args, **options):
def handle(self, *args, **options) -> None:
try:
self.stdout.write(
self.style.WARNING(
@ -52,7 +52,7 @@ class Command(BaseCommand):
self.__gpg_to_unencrypted(passphrase)
def __gpg_to_unencrypted(self, passphrase: str):
def __gpg_to_unencrypted(self, passphrase: str) -> None:
encrypted_files = Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG,
)
@ -69,7 +69,7 @@ class Command(BaseCommand):
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
ext = os.path.splitext(document.filename)[1]
ext: str = Path(document.filename).suffix
if not ext == ".gpg":
raise CommandError(
@ -77,12 +77,12 @@ class Command(BaseCommand):
f"end with .gpg",
)
document.filename = os.path.splitext(document.filename)[0]
document.filename = Path(document.filename).stem
with open(document.source_path, "wb") as f:
with document.source_path.open("wb") as f:
f.write(raw_document)
with open(document.thumbnail_path, "wb") as f:
with document.thumbnail_path.open("wb") as f:
f.write(raw_thumb)
Document.objects.filter(id=document.id).update(
@ -91,4 +91,4 @@ class Command(BaseCommand):
)
for path in old_paths:
os.unlink(path)
path.unlink()

View File

@ -1,6 +1,7 @@
import json
import logging
import os
from collections.abc import Generator
from contextlib import contextmanager
from pathlib import Path
@ -44,7 +45,7 @@ if settings.AUDIT_LOG_ENABLED:
@contextmanager
def disable_signal(sig, receiver, sender):
def disable_signal(sig, receiver, sender) -> Generator:
try:
sig.disconnect(receiver=receiver, sender=sender)
yield
@ -58,7 +59,7 @@ class Command(CryptMixin, BaseCommand):
"documents it refers to."
)
def add_arguments(self, parser):
def add_arguments(self, parser) -> None:
parser.add_argument("source")
parser.add_argument(
@ -90,7 +91,7 @@ class Command(CryptMixin, BaseCommand):
- Are there existing users or documents in the database?
"""
def pre_check_maybe_not_empty():
def pre_check_maybe_not_empty() -> None:
# Skip this check if operating only on the database
# We can expect data to exist in that case
if not self.data_only:
@ -122,7 +123,7 @@ class Command(CryptMixin, BaseCommand):
),
)
def pre_check_manifest_exists():
def pre_check_manifest_exists() -> None:
if not (self.source / "manifest.json").exists():
raise CommandError(
"That directory doesn't appear to contain a manifest.json file.",
@ -141,7 +142,7 @@ class Command(CryptMixin, BaseCommand):
"""
Loads manifest data from the various JSON files for parsing and loading the database
"""
main_manifest_path = self.source / "manifest.json"
main_manifest_path: Path = self.source / "manifest.json"
with main_manifest_path.open() as infile:
self.manifest = json.load(infile)
@ -158,8 +159,8 @@ class Command(CryptMixin, BaseCommand):
Must account for the old style of export as well, with just version.json
"""
version_path = self.source / "version.json"
metadata_path = self.source / "metadata.json"
version_path: Path = self.source / "version.json"
metadata_path: Path = self.source / "metadata.json"
if not version_path.exists() and not metadata_path.exists():
self.stdout.write(
self.style.NOTICE("No version.json or metadata.json file located"),
@ -221,7 +222,7 @@ class Command(CryptMixin, BaseCommand):
)
raise e
def handle(self, *args, **options):
def handle(self, *args, **options) -> None:
logging.getLogger().handlers[0].level = logging.ERROR
self.source = Path(options["source"]).resolve()
@ -290,13 +291,13 @@ class Command(CryptMixin, BaseCommand):
no_progress_bar=self.no_progress_bar,
)
def check_manifest_validity(self):
def check_manifest_validity(self) -> None:
"""
Attempts to verify the manifest is valid. Namely checking the files
referred to exist and the files can be read from
"""
def check_document_validity(document_record: dict):
def check_document_validity(document_record: dict) -> None:
if EXPORTER_FILE_NAME not in document_record:
raise CommandError(
"The manifest file contains a record which does not "
@ -341,7 +342,7 @@ class Command(CryptMixin, BaseCommand):
if not self.data_only and record["model"] == "documents.document":
check_document_validity(record)
def _import_files_from_manifest(self):
def _import_files_from_manifest(self) -> None:
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True)
settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
@ -356,24 +357,24 @@ class Command(CryptMixin, BaseCommand):
document = Document.objects.get(pk=record["pk"])
doc_file = record[EXPORTER_FILE_NAME]
document_path = os.path.join(self.source, doc_file)
document_path = self.source / doc_file
if EXPORTER_THUMBNAIL_NAME in record:
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
thumbnail_path = Path(os.path.join(self.source, thumb_file)).resolve()
thumbnail_path = (self.source / thumb_file).resolve()
else:
thumbnail_path = None
if EXPORTER_ARCHIVE_NAME in record:
archive_file = record[EXPORTER_ARCHIVE_NAME]
archive_path = os.path.join(self.source, archive_file)
archive_path = self.source / archive_file
else:
archive_path = None
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
with FileLock(settings.MEDIA_LOCK):
if os.path.isfile(document.source_path):
if Path(document.source_path).is_file():
raise FileExistsError(document.source_path)
create_source_path_directory(document.source_path)
@ -418,8 +419,8 @@ class Command(CryptMixin, BaseCommand):
had_at_least_one_record = False
for crypt_config in self.CRYPT_FIELDS:
importer_model = crypt_config["model_name"]
crypt_fields = crypt_config["fields"]
importer_model: str = crypt_config["model_name"]
crypt_fields: str = crypt_config["fields"]
for record in filter(
lambda x: x["model"] == importer_model,
self.manifest,

View File

@ -15,7 +15,7 @@ from documents.parsers import run_convert
logger = logging.getLogger("paperless.migrations")
def _do_convert(work_package):
def _do_convert(work_package) -> None:
(
existing_encrypted_thumbnail,
converted_encrypted_thumbnail,
@ -30,13 +30,13 @@ def _do_convert(work_package):
# Decrypt png
decrypted_thumbnail = existing_encrypted_thumbnail.with_suffix("").resolve()
with open(existing_encrypted_thumbnail, "rb") as existing_encrypted_file:
with existing_encrypted_thumbnail.open("rb") as existing_encrypted_file:
raw_thumb = gpg.decrypt_file(
existing_encrypted_file,
passphrase=passphrase,
always_trust=True,
).data
with open(decrypted_thumbnail, "wb") as decrypted_file:
with Path(decrypted_thumbnail).open("wb") as decrypted_file:
decrypted_file.write(raw_thumb)
converted_decrypted_thumbnail = Path(
@ -62,7 +62,7 @@ def _do_convert(work_package):
)
# Encrypt webp
with open(converted_decrypted_thumbnail, "rb") as converted_decrypted_file:
with Path(converted_decrypted_thumbnail).open("rb") as converted_decrypted_file:
encrypted = gpg.encrypt_file(
fileobj_or_path=converted_decrypted_file,
recipients=None,
@ -71,7 +71,9 @@ def _do_convert(work_package):
always_trust=True,
).data
with open(converted_encrypted_thumbnail, "wb") as converted_encrypted_file:
with Path(converted_encrypted_thumbnail).open(
"wb",
) as converted_encrypted_file:
converted_encrypted_file.write(encrypted)
# Copy newly created thumbnail to thumbnail directory
@ -95,8 +97,8 @@ def _do_convert(work_package):
logger.error(f"Error converting thumbnail (existing file unchanged): {e}")
def _convert_encrypted_thumbnails_to_webp(apps, schema_editor):
start = time.time()
def _convert_encrypted_thumbnails_to_webp(apps, schema_editor) -> None:
start: float = time.time()
with tempfile.TemporaryDirectory() as tempdir:
work_packages = []
@ -111,15 +113,15 @@ def _convert_encrypted_thumbnails_to_webp(apps, schema_editor):
)
for file in Path(settings.THUMBNAIL_DIR).glob("*.png.gpg"):
existing_thumbnail = file.resolve()
existing_thumbnail: Path = file.resolve()
# Change the existing filename suffix from png to webp
converted_thumbnail_name = Path(
converted_thumbnail_name: str = Path(
str(existing_thumbnail).replace(".png.gpg", ".webp.gpg"),
).name
# Create the expected output filename in the tempdir
converted_thumbnail = (
converted_thumbnail: Path = (
Path(tempdir) / Path(converted_thumbnail_name)
).resolve()
@ -143,8 +145,8 @@ def _convert_encrypted_thumbnails_to_webp(apps, schema_editor):
) as pool:
pool.map(_do_convert, work_packages)
end = time.time()
duration = end - start
end: float = time.time()
duration: float = end - start
logger.info(f"Conversion completed in {duration:.3f}s")

View File

@ -173,7 +173,7 @@ class TestSystemStatus(APITestCase):
self.assertEqual(response.data["tasks"]["index_status"], "OK")
self.assertIsNotNone(response.data["tasks"]["index_last_modified"])
@override_settings(INDEX_DIR="/tmp/index/")
@override_settings(INDEX_DIR=Path("/tmp/index/"))
@mock.patch("documents.index.open_index", autospec=True)
def test_system_status_index_error(self, mock_open_index):
"""
@ -193,7 +193,7 @@ class TestSystemStatus(APITestCase):
self.assertEqual(response.data["tasks"]["index_status"], "ERROR")
self.assertIsNotNone(response.data["tasks"]["index_error"])
@override_settings(DATA_DIR="/tmp/does_not_exist/data/")
@override_settings(DATA_DIR=Path("/tmp/does_not_exist/data/"))
def test_system_status_classifier_ok(self):
"""
GIVEN:
@ -222,7 +222,7 @@ class TestSystemStatus(APITestCase):
THEN:
- The response contains an WARNING classifier status
"""
with override_settings(MODEL_FILE="does_not_exist"):
with override_settings(MODEL_FILE=Path("does_not_exist")):
Document.objects.create(
title="Test Document",
)
@ -233,7 +233,11 @@ class TestSystemStatus(APITestCase):
self.assertEqual(response.data["tasks"]["classifier_status"], "WARNING")
self.assertIsNotNone(response.data["tasks"]["classifier_error"])
def test_system_status_classifier_error(self):
@mock.patch(
"documents.classifier.load_classifier",
side_effect=ClassifierModelCorruptError(),
)
def test_system_status_classifier_error(self, mock_load_classifier):
"""
GIVEN:
- The classifier does exist but is corrupt
@ -248,25 +252,23 @@ class TestSystemStatus(APITestCase):
dir="/tmp",
delete=False,
) as does_exist,
override_settings(MODEL_FILE=does_exist),
override_settings(MODEL_FILE=Path(does_exist.name)),
):
with mock.patch("documents.classifier.load_classifier") as mock_load:
mock_load.side_effect = ClassifierModelCorruptError()
Document.objects.create(
title="Test Document",
)
Tag.objects.create(
name="Test Tag",
matching_algorithm=Tag.MATCH_AUTO,
)
self.client.force_login(self.user)
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(
response.data["tasks"]["classifier_status"],
"ERROR",
)
self.assertIsNotNone(response.data["tasks"]["classifier_error"])
Document.objects.create(
title="Test Document",
)
Tag.objects.create(
name="Test Tag",
matching_algorithm=Tag.MATCH_AUTO,
)
self.client.force_login(self.user)
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(
response.data["tasks"]["classifier_status"],
"ERROR",
)
self.assertIsNotNone(response.data["tasks"]["classifier_error"])
def test_system_status_classifier_ok_no_objects(self):
"""
@ -278,7 +280,7 @@ class TestSystemStatus(APITestCase):
THEN:
- The response contains an OK classifier status
"""
with override_settings(MODEL_FILE="does_not_exist"):
with override_settings(MODEL_FILE=Path("does_not_exist")):
self.client.force_login(self.user)
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, status.HTTP_200_OK)

View File

@ -650,7 +650,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
Path(settings.MODEL_FILE).touch()
self.assertTrue(os.path.exists(settings.MODEL_FILE))
load.side_effect = IncompatibleClassifierVersionError("Dummey Error")
load.side_effect = IncompatibleClassifierVersionError("Dummy Error")
self.assertIsNone(load_classifier())
self.assertFalse(os.path.exists(settings.MODEL_FILE))
@ -673,3 +673,25 @@ class TestClassifier(DirectoriesMixin, TestCase):
):
classifier = load_classifier()
self.assertIsNone(classifier)
@mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_raise_exception(self, mock_load):
Path(settings.MODEL_FILE).touch()
mock_load.side_effect = IncompatibleClassifierVersionError("Dummy Error")
with self.assertRaises(IncompatibleClassifierVersionError):
load_classifier(raise_exception=True)
Path(settings.MODEL_FILE).touch()
mock_load.side_effect = ClassifierModelCorruptError()
with self.assertRaises(ClassifierModelCorruptError):
load_classifier(raise_exception=True)
Path(settings.MODEL_FILE).touch()
mock_load.side_effect = OSError()
with self.assertRaises(OSError):
load_classifier(raise_exception=True)
Path(settings.MODEL_FILE).touch()
mock_load.side_effect = Exception()
with self.assertRaises(Exception):
load_classifier(raise_exception=True)

View File

@ -108,18 +108,18 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
class TestDecryptDocuments(FileSystemAssertsMixin, TestCase):
@override_settings(
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
ORIGINALS_DIR=(Path(__file__).parent / "samples" / "originals"),
THUMBNAIL_DIR=(Path(__file__).parent / "samples" / "thumb"),
PASSPHRASE="test",
FILENAME_FORMAT=None,
)
@mock.patch("documents.management.commands.decrypt_documents.input")
def test_decrypt(self, m):
media_dir = tempfile.mkdtemp()
originals_dir = os.path.join(media_dir, "documents", "originals")
thumb_dir = os.path.join(media_dir, "documents", "thumbnails")
os.makedirs(originals_dir, exist_ok=True)
os.makedirs(thumb_dir, exist_ok=True)
originals_dir = Path(media_dir) / "documents" / "originals"
thumb_dir = Path(media_dir) / "documents" / "thumbnails"
originals_dir.mkdir(parents=True, exist_ok=True)
thumb_dir.mkdir(parents=True, exist_ok=True)
override_settings(
ORIGINALS_DIR=originals_dir,
@ -143,7 +143,7 @@ class TestDecryptDocuments(FileSystemAssertsMixin, TestCase):
"originals",
"0000004.pdf.gpg",
),
os.path.join(originals_dir, "0000004.pdf.gpg"),
originals_dir / "0000004.pdf.gpg",
)
shutil.copy(
os.path.join(
@ -153,7 +153,7 @@ class TestDecryptDocuments(FileSystemAssertsMixin, TestCase):
"thumbnails",
"0000004.webp.gpg",
),
os.path.join(thumb_dir, f"{doc.id:07}.webp.gpg"),
thumb_dir / f"{doc.id:07}.webp.gpg",
)
call_command("decrypt_documents")

View File

@ -2139,7 +2139,7 @@ class SystemStatusView(PassUserMixin):
classifier_error = None
classifier_status = None
try:
classifier = load_classifier()
classifier = load_classifier(raise_exception=True)
if classifier is None:
# Make sure classifier should exist
docs_queryset = Document.objects.exclude(
@ -2159,7 +2159,7 @@ class SystemStatusView(PassUserMixin):
matching_algorithm=Tag.MATCH_AUTO,
).exists()
)
and not os.path.isfile(settings.MODEL_FILE)
and not settings.MODEL_FILE.exists()
):
# if classifier file doesn't exist just classify as a warning
classifier_error = "Classifier file does not exist (yet). Re-training may be pending."