mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Chore: Switch from os.path to pathlib.Path (#8325)
--------- Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
		 Sebastian Steinbeißer
					Sebastian Steinbeißer
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							d06aac947d
						
					
				
				
					commit
					935d077836
				
			| @@ -38,20 +38,14 @@ ignore = ["DJ001", "SIM105", "RUF012"] | |||||||
| [lint.per-file-ignores] | [lint.per-file-ignores] | ||||||
| ".github/scripts/*.py" = ["E501", "INP001", "SIM117"] | ".github/scripts/*.py" = ["E501", "INP001", "SIM117"] | ||||||
| "docker/wait-for-redis.py" = ["INP001", "T201"] | "docker/wait-for-redis.py" = ["INP001", "T201"] | ||||||
| "src/documents/barcodes.py" = ["PTH"]  # TODO Enable & remove |  | ||||||
| "src/documents/classifier.py" = ["PTH"]  # TODO Enable & remove |  | ||||||
| "src/documents/consumer.py" = ["PTH"]  # TODO Enable & remove | "src/documents/consumer.py" = ["PTH"]  # TODO Enable & remove | ||||||
| "src/documents/file_handling.py" = ["PTH"]  # TODO Enable & remove | "src/documents/file_handling.py" = ["PTH"]  # TODO Enable & remove | ||||||
| "src/documents/index.py" = ["PTH"]  # TODO Enable & remove |  | ||||||
| "src/documents/management/commands/decrypt_documents.py" = ["PTH"]  # TODO Enable & remove |  | ||||||
| "src/documents/management/commands/document_consumer.py" = ["PTH"]  # TODO Enable & remove | "src/documents/management/commands/document_consumer.py" = ["PTH"]  # TODO Enable & remove | ||||||
| "src/documents/management/commands/document_exporter.py" = ["PTH"]  # TODO Enable & remove | "src/documents/management/commands/document_exporter.py" = ["PTH"]  # TODO Enable & remove | ||||||
| "src/documents/management/commands/document_importer.py" = ["PTH"]  # TODO Enable & remove |  | ||||||
| "src/documents/migrations/0012_auto_20160305_0040.py" = ["PTH"]  # TODO Enable & remove | "src/documents/migrations/0012_auto_20160305_0040.py" = ["PTH"]  # TODO Enable & remove | ||||||
| "src/documents/migrations/0014_document_checksum.py" = ["PTH"]  # TODO Enable & remove | "src/documents/migrations/0014_document_checksum.py" = ["PTH"]  # TODO Enable & remove | ||||||
| "src/documents/migrations/1003_mime_types.py" = ["PTH"]  # TODO Enable & remove | "src/documents/migrations/1003_mime_types.py" = ["PTH"]  # TODO Enable & remove | ||||||
| "src/documents/migrations/1012_fix_archive_files.py" = ["PTH"]  # TODO Enable & remove | "src/documents/migrations/1012_fix_archive_files.py" = ["PTH"]  # TODO Enable & remove | ||||||
| "src/documents/migrations/1037_webp_encrypted_thumbnail_conversion.py" = ["PTH"]  # TODO Enable & remove |  | ||||||
| "src/documents/models.py" = ["SIM115", "PTH"]  # TODO PTH Enable & remove | "src/documents/models.py" = ["SIM115", "PTH"]  # TODO PTH Enable & remove | ||||||
| "src/documents/parsers.py" = ["PTH"]  # TODO Enable & remove | "src/documents/parsers.py" = ["PTH"]  # TODO Enable & remove | ||||||
| "src/documents/signals/handlers.py" = ["PTH"]  # TODO Enable & remove | "src/documents/signals/handlers.py" = ["PTH"]  # TODO Enable & remove | ||||||
|   | |||||||
| @@ -3,6 +3,7 @@ import re | |||||||
| import tempfile | import tempfile | ||||||
| from dataclasses import dataclass | from dataclasses import dataclass | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
|  | from typing import TYPE_CHECKING | ||||||
|  |  | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from pdf2image import convert_from_path | from pdf2image import convert_from_path | ||||||
| @@ -21,6 +22,9 @@ from documents.utils import copy_basic_file_stats | |||||||
| from documents.utils import copy_file_with_basic_stats | from documents.utils import copy_file_with_basic_stats | ||||||
| from documents.utils import maybe_override_pixel_limit | from documents.utils import maybe_override_pixel_limit | ||||||
|  |  | ||||||
|  | if TYPE_CHECKING: | ||||||
|  |     from collections.abc import Callable | ||||||
|  |  | ||||||
| logger = logging.getLogger("paperless.barcodes") | logger = logging.getLogger("paperless.barcodes") | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -61,7 +65,7 @@ class BarcodePlugin(ConsumeTaskPlugin): | |||||||
|           - Barcode support is enabled and the mime type is supported |           - Barcode support is enabled and the mime type is supported | ||||||
|         """ |         """ | ||||||
|         if settings.CONSUMER_BARCODE_TIFF_SUPPORT: |         if settings.CONSUMER_BARCODE_TIFF_SUPPORT: | ||||||
|             supported_mimes = {"application/pdf", "image/tiff"} |             supported_mimes: set[str] = {"application/pdf", "image/tiff"} | ||||||
|         else: |         else: | ||||||
|             supported_mimes = {"application/pdf"} |             supported_mimes = {"application/pdf"} | ||||||
|  |  | ||||||
| @@ -71,16 +75,16 @@ class BarcodePlugin(ConsumeTaskPlugin): | |||||||
|             or settings.CONSUMER_ENABLE_TAG_BARCODE |             or settings.CONSUMER_ENABLE_TAG_BARCODE | ||||||
|         ) and self.input_doc.mime_type in supported_mimes |         ) and self.input_doc.mime_type in supported_mimes | ||||||
|  |  | ||||||
|     def setup(self): |     def setup(self) -> None: | ||||||
|         self.temp_dir = tempfile.TemporaryDirectory( |         self.temp_dir = tempfile.TemporaryDirectory( | ||||||
|             dir=self.base_tmp_dir, |             dir=self.base_tmp_dir, | ||||||
|             prefix="barcode", |             prefix="barcode", | ||||||
|         ) |         ) | ||||||
|         self.pdf_file = self.input_doc.original_file |         self.pdf_file: Path = self.input_doc.original_file | ||||||
|         self._tiff_conversion_done = False |         self._tiff_conversion_done = False | ||||||
|         self.barcodes: list[Barcode] = [] |         self.barcodes: list[Barcode] = [] | ||||||
|  |  | ||||||
|     def run(self) -> str | None: |     def run(self) -> None: | ||||||
|         # Some operations may use PIL, override pixel setting if needed |         # Some operations may use PIL, override pixel setting if needed | ||||||
|         maybe_override_pixel_limit() |         maybe_override_pixel_limit() | ||||||
|  |  | ||||||
| @@ -158,7 +162,7 @@ class BarcodePlugin(ConsumeTaskPlugin): | |||||||
|     def cleanup(self) -> None: |     def cleanup(self) -> None: | ||||||
|         self.temp_dir.cleanup() |         self.temp_dir.cleanup() | ||||||
|  |  | ||||||
|     def convert_from_tiff_to_pdf(self): |     def convert_from_tiff_to_pdf(self) -> None: | ||||||
|         """ |         """ | ||||||
|         May convert a TIFF image into a PDF, if the input is a TIFF and |         May convert a TIFF image into a PDF, if the input is a TIFF and | ||||||
|         the TIFF has not been made into a PDF |         the TIFF has not been made into a PDF | ||||||
| @@ -223,7 +227,7 @@ class BarcodePlugin(ConsumeTaskPlugin): | |||||||
|  |  | ||||||
|         # Choose the library for reading |         # Choose the library for reading | ||||||
|         if settings.CONSUMER_BARCODE_SCANNER == "PYZBAR": |         if settings.CONSUMER_BARCODE_SCANNER == "PYZBAR": | ||||||
|             reader = self.read_barcodes_pyzbar |             reader: Callable[[Image.Image], list[str]] = self.read_barcodes_pyzbar | ||||||
|             logger.debug("Scanning for barcodes using PYZBAR") |             logger.debug("Scanning for barcodes using PYZBAR") | ||||||
|         else: |         else: | ||||||
|             reader = self.read_barcodes_zxing |             reader = self.read_barcodes_zxing | ||||||
| @@ -236,7 +240,7 @@ class BarcodePlugin(ConsumeTaskPlugin): | |||||||
|             logger.debug(f"PDF has {num_of_pages} pages") |             logger.debug(f"PDF has {num_of_pages} pages") | ||||||
|  |  | ||||||
|             # Get limit from configuration |             # Get limit from configuration | ||||||
|             barcode_max_pages = ( |             barcode_max_pages: int = ( | ||||||
|                 num_of_pages |                 num_of_pages | ||||||
|                 if settings.CONSUMER_BARCODE_MAX_PAGES == 0 |                 if settings.CONSUMER_BARCODE_MAX_PAGES == 0 | ||||||
|                 else settings.CONSUMER_BARCODE_MAX_PAGES |                 else settings.CONSUMER_BARCODE_MAX_PAGES | ||||||
| @@ -311,7 +315,7 @@ class BarcodePlugin(ConsumeTaskPlugin): | |||||||
|         self.detect() |         self.detect() | ||||||
|  |  | ||||||
|         # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX |         # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX | ||||||
|         asn_text = next( |         asn_text: str | None = next( | ||||||
|             (x.value for x in self.barcodes if x.is_asn), |             (x.value for x in self.barcodes if x.is_asn), | ||||||
|             None, |             None, | ||||||
|         ) |         ) | ||||||
| @@ -333,36 +337,36 @@ class BarcodePlugin(ConsumeTaskPlugin): | |||||||
|         return asn |         return asn | ||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def tags(self) -> list[int] | None: |     def tags(self) -> list[int]: | ||||||
|         """ |         """ | ||||||
|         Search the parsed barcodes for any tags. |         Search the parsed barcodes for any tags. | ||||||
|         Returns the detected tag ids (or empty list) |         Returns the detected tag ids (or empty list) | ||||||
|         """ |         """ | ||||||
|         tags = [] |         tags: list[int] = [] | ||||||
|  |  | ||||||
|         # Ensure the barcodes have been read |         # Ensure the barcodes have been read | ||||||
|         self.detect() |         self.detect() | ||||||
|  |  | ||||||
|         for x in self.barcodes: |         for x in self.barcodes: | ||||||
|             tag_texts = x.value |             tag_texts: str = x.value | ||||||
|  |  | ||||||
|             for raw in tag_texts.split(","): |             for raw in tag_texts.split(","): | ||||||
|                 try: |                 try: | ||||||
|                     tag = None |                     tag_str: str | None = None | ||||||
|                     for regex in settings.CONSUMER_TAG_BARCODE_MAPPING: |                     for regex in settings.CONSUMER_TAG_BARCODE_MAPPING: | ||||||
|                         if re.match(regex, raw, flags=re.IGNORECASE): |                         if re.match(regex, raw, flags=re.IGNORECASE): | ||||||
|                             sub = settings.CONSUMER_TAG_BARCODE_MAPPING[regex] |                             sub = settings.CONSUMER_TAG_BARCODE_MAPPING[regex] | ||||||
|                             tag = ( |                             tag_str = ( | ||||||
|                                 re.sub(regex, sub, raw, flags=re.IGNORECASE) |                                 re.sub(regex, sub, raw, flags=re.IGNORECASE) | ||||||
|                                 if sub |                                 if sub | ||||||
|                                 else raw |                                 else raw | ||||||
|                             ) |                             ) | ||||||
|                             break |                             break | ||||||
|  |  | ||||||
|                     if tag: |                     if tag_str: | ||||||
|                         tag, _ = Tag.objects.get_or_create( |                         tag, _ = Tag.objects.get_or_create( | ||||||
|                             name__iexact=tag, |                             name__iexact=tag_str, | ||||||
|                             defaults={"name": tag}, |                             defaults={"name": tag_str}, | ||||||
|                         ) |                         ) | ||||||
|  |  | ||||||
|                         logger.debug( |                         logger.debug( | ||||||
| @@ -413,7 +417,7 @@ class BarcodePlugin(ConsumeTaskPlugin): | |||||||
|         """ |         """ | ||||||
|  |  | ||||||
|         document_paths = [] |         document_paths = [] | ||||||
|         fname = self.input_doc.original_file.stem |         fname: str = self.input_doc.original_file.stem | ||||||
|         with Pdf.open(self.pdf_file) as input_pdf: |         with Pdf.open(self.pdf_file) as input_pdf: | ||||||
|             # Start with an empty document |             # Start with an empty document | ||||||
|             current_document: list[Page] = [] |             current_document: list[Page] = [] | ||||||
| @@ -432,7 +436,7 @@ class BarcodePlugin(ConsumeTaskPlugin): | |||||||
|                 logger.debug(f"Starting new document at idx {idx}") |                 logger.debug(f"Starting new document at idx {idx}") | ||||||
|                 current_document = [] |                 current_document = [] | ||||||
|                 documents.append(current_document) |                 documents.append(current_document) | ||||||
|                 keep_page = pages_to_split_on[idx] |                 keep_page: bool = pages_to_split_on[idx] | ||||||
|                 if keep_page: |                 if keep_page: | ||||||
|                     # Keep the page |                     # Keep the page | ||||||
|                     # (new document is started by asn barcode) |                     # (new document is started by asn barcode) | ||||||
| @@ -451,7 +455,7 @@ class BarcodePlugin(ConsumeTaskPlugin): | |||||||
|  |  | ||||||
|                 logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages") |                 logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages") | ||||||
|                 savepath = Path(self.temp_dir.name) / output_filename |                 savepath = Path(self.temp_dir.name) / output_filename | ||||||
|                 with open(savepath, "wb") as out: |                 with savepath.open("wb") as out: | ||||||
|                     dst.save(out) |                     dst.save(out) | ||||||
|  |  | ||||||
|                 copy_basic_file_stats(self.input_doc.original_file, savepath) |                 copy_basic_file_stats(self.input_doc.original_file, savepath) | ||||||
|   | |||||||
| @@ -1,16 +1,17 @@ | |||||||
| import logging | import logging | ||||||
| import os |  | ||||||
| import pickle | import pickle | ||||||
| import re | import re | ||||||
| import warnings | import warnings | ||||||
| from collections.abc import Iterator | from collections.abc import Iterator | ||||||
| from hashlib import sha256 | from hashlib import sha256 | ||||||
|  | from pathlib import Path | ||||||
| from typing import TYPE_CHECKING | from typing import TYPE_CHECKING | ||||||
| from typing import Optional | from typing import Optional | ||||||
|  |  | ||||||
| if TYPE_CHECKING: | if TYPE_CHECKING: | ||||||
|     from datetime import datetime |     from datetime import datetime | ||||||
|     from pathlib import Path |  | ||||||
|  |     from numpy import ndarray | ||||||
|  |  | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from django.core.cache import cache | from django.core.cache import cache | ||||||
| @@ -28,7 +29,7 @@ logger = logging.getLogger("paperless.classifier") | |||||||
|  |  | ||||||
| class IncompatibleClassifierVersionError(Exception): | class IncompatibleClassifierVersionError(Exception): | ||||||
|     def __init__(self, message: str, *args: object) -> None: |     def __init__(self, message: str, *args: object) -> None: | ||||||
|         self.message = message |         self.message: str = message | ||||||
|         super().__init__(*args) |         super().__init__(*args) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -36,8 +37,8 @@ class ClassifierModelCorruptError(Exception): | |||||||
|     pass |     pass | ||||||
|  |  | ||||||
|  |  | ||||||
| def load_classifier() -> Optional["DocumentClassifier"]: | def load_classifier(*, raise_exception: bool = False) -> Optional["DocumentClassifier"]: | ||||||
|     if not os.path.isfile(settings.MODEL_FILE): |     if not settings.MODEL_FILE.is_file(): | ||||||
|         logger.debug( |         logger.debug( | ||||||
|             "Document classification model does not exist (yet), not " |             "Document classification model does not exist (yet), not " | ||||||
|             "performing automatic matching.", |             "performing automatic matching.", | ||||||
| @@ -50,22 +51,30 @@ def load_classifier() -> Optional["DocumentClassifier"]: | |||||||
|  |  | ||||||
|     except IncompatibleClassifierVersionError as e: |     except IncompatibleClassifierVersionError as e: | ||||||
|         logger.info(f"Classifier version incompatible: {e.message}, will re-train") |         logger.info(f"Classifier version incompatible: {e.message}, will re-train") | ||||||
|         os.unlink(settings.MODEL_FILE) |         Path(settings.MODEL_FILE).unlink() | ||||||
|         classifier = None |         classifier = None | ||||||
|     except ClassifierModelCorruptError: |         if raise_exception: | ||||||
|  |             raise e | ||||||
|  |     except ClassifierModelCorruptError as e: | ||||||
|         # there's something wrong with the model file. |         # there's something wrong with the model file. | ||||||
|         logger.exception( |         logger.exception( | ||||||
|             "Unrecoverable error while loading document " |             "Unrecoverable error while loading document " | ||||||
|             "classification model, deleting model file.", |             "classification model, deleting model file.", | ||||||
|         ) |         ) | ||||||
|         os.unlink(settings.MODEL_FILE) |         Path(settings.MODEL_FILE).unlink | ||||||
|         classifier = None |         classifier = None | ||||||
|     except OSError: |         if raise_exception: | ||||||
|  |             raise e | ||||||
|  |     except OSError as e: | ||||||
|         logger.exception("IO error while loading document classification model") |         logger.exception("IO error while loading document classification model") | ||||||
|         classifier = None |         classifier = None | ||||||
|     except Exception:  # pragma: no cover |         if raise_exception: | ||||||
|  |             raise e | ||||||
|  |     except Exception as e:  # pragma: no cover | ||||||
|         logger.exception("Unknown error while loading document classification model") |         logger.exception("Unknown error while loading document classification model") | ||||||
|         classifier = None |         classifier = None | ||||||
|  |         if raise_exception: | ||||||
|  |             raise e | ||||||
|  |  | ||||||
|     return classifier |     return classifier | ||||||
|  |  | ||||||
| @@ -76,7 +85,7 @@ class DocumentClassifier: | |||||||
|     # v9 - Changed from hashing to time/ids for re-train check |     # v9 - Changed from hashing to time/ids for re-train check | ||||||
|     FORMAT_VERSION = 9 |     FORMAT_VERSION = 9 | ||||||
|  |  | ||||||
|     def __init__(self): |     def __init__(self) -> None: | ||||||
|         # last time a document changed and therefore training might be required |         # last time a document changed and therefore training might be required | ||||||
|         self.last_doc_change_time: datetime | None = None |         self.last_doc_change_time: datetime | None = None | ||||||
|         # Hash of primary keys of AUTO matching values last used in training |         # Hash of primary keys of AUTO matching values last used in training | ||||||
| @@ -95,7 +104,7 @@ class DocumentClassifier: | |||||||
|     def load(self) -> None: |     def load(self) -> None: | ||||||
|         # Catch warnings for processing |         # Catch warnings for processing | ||||||
|         with warnings.catch_warnings(record=True) as w: |         with warnings.catch_warnings(record=True) as w: | ||||||
|             with open(settings.MODEL_FILE, "rb") as f: |             with Path(settings.MODEL_FILE).open("rb") as f: | ||||||
|                 schema_version = pickle.load(f) |                 schema_version = pickle.load(f) | ||||||
|  |  | ||||||
|                 if schema_version != self.FORMAT_VERSION: |                 if schema_version != self.FORMAT_VERSION: | ||||||
| @@ -132,11 +141,11 @@ class DocumentClassifier: | |||||||
|                 ): |                 ): | ||||||
|                     raise IncompatibleClassifierVersionError("sklearn version update") |                     raise IncompatibleClassifierVersionError("sklearn version update") | ||||||
|  |  | ||||||
|     def save(self): |     def save(self) -> None: | ||||||
|         target_file: Path = settings.MODEL_FILE |         target_file: Path = settings.MODEL_FILE | ||||||
|         target_file_temp = target_file.with_suffix(".pickle.part") |         target_file_temp: Path = target_file.with_suffix(".pickle.part") | ||||||
|  |  | ||||||
|         with open(target_file_temp, "wb") as f: |         with target_file_temp.open("wb") as f: | ||||||
|             pickle.dump(self.FORMAT_VERSION, f) |             pickle.dump(self.FORMAT_VERSION, f) | ||||||
|  |  | ||||||
|             pickle.dump(self.last_doc_change_time, f) |             pickle.dump(self.last_doc_change_time, f) | ||||||
| @@ -153,7 +162,7 @@ class DocumentClassifier: | |||||||
|  |  | ||||||
|         target_file_temp.rename(target_file) |         target_file_temp.rename(target_file) | ||||||
|  |  | ||||||
|     def train(self): |     def train(self) -> bool: | ||||||
|         # Get non-inbox documents |         # Get non-inbox documents | ||||||
|         docs_queryset = ( |         docs_queryset = ( | ||||||
|             Document.objects.exclude( |             Document.objects.exclude( | ||||||
| @@ -190,7 +199,7 @@ class DocumentClassifier: | |||||||
|             hasher.update(y.to_bytes(4, "little", signed=True)) |             hasher.update(y.to_bytes(4, "little", signed=True)) | ||||||
|             labels_correspondent.append(y) |             labels_correspondent.append(y) | ||||||
|  |  | ||||||
|             tags = sorted( |             tags: list[int] = sorted( | ||||||
|                 tag.pk |                 tag.pk | ||||||
|                 for tag in doc.tags.filter( |                 for tag in doc.tags.filter( | ||||||
|                     matching_algorithm=MatchingModel.MATCH_AUTO, |                     matching_algorithm=MatchingModel.MATCH_AUTO, | ||||||
| @@ -236,9 +245,9 @@ class DocumentClassifier: | |||||||
|         # union with {-1} accounts for cases where all documents have |         # union with {-1} accounts for cases where all documents have | ||||||
|         # correspondents and types assigned, so -1 isn't part of labels_x, which |         # correspondents and types assigned, so -1 isn't part of labels_x, which | ||||||
|         # it usually is. |         # it usually is. | ||||||
|         num_correspondents = len(set(labels_correspondent) | {-1}) - 1 |         num_correspondents: int = len(set(labels_correspondent) | {-1}) - 1 | ||||||
|         num_document_types = len(set(labels_document_type) | {-1}) - 1 |         num_document_types: int = len(set(labels_document_type) | {-1}) - 1 | ||||||
|         num_storage_paths = len(set(labels_storage_path) | {-1}) - 1 |         num_storage_paths: int = len(set(labels_storage_path) | {-1}) - 1 | ||||||
|  |  | ||||||
|         logger.debug( |         logger.debug( | ||||||
|             f"{docs_queryset.count()} documents, {num_tags} tag(s), {num_correspondents} correspondent(s), " |             f"{docs_queryset.count()} documents, {num_tags} tag(s), {num_correspondents} correspondent(s), " | ||||||
| @@ -266,7 +275,9 @@ class DocumentClassifier: | |||||||
|             min_df=0.01, |             min_df=0.01, | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|         data_vectorized = self.data_vectorizer.fit_transform(content_generator()) |         data_vectorized: ndarray = self.data_vectorizer.fit_transform( | ||||||
|  |             content_generator(), | ||||||
|  |         ) | ||||||
|  |  | ||||||
|         # See the notes here: |         # See the notes here: | ||||||
|         # https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html |         # https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html | ||||||
| @@ -284,7 +295,7 @@ class DocumentClassifier: | |||||||
|                     label[0] if len(label) == 1 else -1 for label in labels_tags |                     label[0] if len(label) == 1 else -1 for label in labels_tags | ||||||
|                 ] |                 ] | ||||||
|                 self.tags_binarizer = LabelBinarizer() |                 self.tags_binarizer = LabelBinarizer() | ||||||
|                 labels_tags_vectorized = self.tags_binarizer.fit_transform( |                 labels_tags_vectorized: ndarray = self.tags_binarizer.fit_transform( | ||||||
|                     labels_tags, |                     labels_tags, | ||||||
|                 ).ravel() |                 ).ravel() | ||||||
|             else: |             else: | ||||||
|   | |||||||
| @@ -1,11 +1,11 @@ | |||||||
| import logging | import logging | ||||||
| import math | import math | ||||||
| import os |  | ||||||
| from collections import Counter | from collections import Counter | ||||||
| from contextlib import contextmanager | from contextlib import contextmanager | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
| from datetime import timezone | from datetime import timezone | ||||||
| from shutil import rmtree | from shutil import rmtree | ||||||
|  | from typing import Literal | ||||||
|  |  | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from django.db.models import QuerySet | from django.db.models import QuerySet | ||||||
| @@ -47,7 +47,7 @@ from documents.models import User | |||||||
| logger = logging.getLogger("paperless.index") | logger = logging.getLogger("paperless.index") | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_schema(): | def get_schema() -> Schema: | ||||||
|     return Schema( |     return Schema( | ||||||
|         id=NUMERIC(stored=True, unique=True), |         id=NUMERIC(stored=True, unique=True), | ||||||
|         title=TEXT(sortable=True), |         title=TEXT(sortable=True), | ||||||
| @@ -93,7 +93,7 @@ def open_index(recreate=False) -> FileIndex: | |||||||
|         logger.exception("Error while opening the index, recreating.") |         logger.exception("Error while opening the index, recreating.") | ||||||
|  |  | ||||||
|     # create_in doesn't handle corrupted indexes very well, remove the directory entirely first |     # create_in doesn't handle corrupted indexes very well, remove the directory entirely first | ||||||
|     if os.path.isdir(settings.INDEX_DIR): |     if settings.INDEX_DIR.is_dir(): | ||||||
|         rmtree(settings.INDEX_DIR) |         rmtree(settings.INDEX_DIR) | ||||||
|     settings.INDEX_DIR.mkdir(parents=True, exist_ok=True) |     settings.INDEX_DIR.mkdir(parents=True, exist_ok=True) | ||||||
|  |  | ||||||
| @@ -123,7 +123,7 @@ def open_index_searcher() -> Searcher: | |||||||
|         searcher.close() |         searcher.close() | ||||||
|  |  | ||||||
|  |  | ||||||
| def update_document(writer: AsyncWriter, doc: Document): | def update_document(writer: AsyncWriter, doc: Document) -> None: | ||||||
|     tags = ",".join([t.name for t in doc.tags.all()]) |     tags = ",".join([t.name for t in doc.tags.all()]) | ||||||
|     tags_ids = ",".join([str(t.id) for t in doc.tags.all()]) |     tags_ids = ",".join([str(t.id) for t in doc.tags.all()]) | ||||||
|     notes = ",".join([str(c.note) for c in Note.objects.filter(document=doc)]) |     notes = ",".join([str(c.note) for c in Note.objects.filter(document=doc)]) | ||||||
| @@ -133,7 +133,7 @@ def update_document(writer: AsyncWriter, doc: Document): | |||||||
|     custom_fields_ids = ",".join( |     custom_fields_ids = ",".join( | ||||||
|         [str(f.field.id) for f in CustomFieldInstance.objects.filter(document=doc)], |         [str(f.field.id) for f in CustomFieldInstance.objects.filter(document=doc)], | ||||||
|     ) |     ) | ||||||
|     asn = doc.archive_serial_number |     asn: int | None = doc.archive_serial_number | ||||||
|     if asn is not None and ( |     if asn is not None and ( | ||||||
|         asn < Document.ARCHIVE_SERIAL_NUMBER_MIN |         asn < Document.ARCHIVE_SERIAL_NUMBER_MIN | ||||||
|         or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX |         or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX | ||||||
| @@ -149,7 +149,7 @@ def update_document(writer: AsyncWriter, doc: Document): | |||||||
|         doc, |         doc, | ||||||
|         only_with_perms_in=["view_document"], |         only_with_perms_in=["view_document"], | ||||||
|     ) |     ) | ||||||
|     viewer_ids = ",".join([str(u.id) for u in users_with_perms]) |     viewer_ids: str = ",".join([str(u.id) for u in users_with_perms]) | ||||||
|     writer.update_document( |     writer.update_document( | ||||||
|         id=doc.pk, |         id=doc.pk, | ||||||
|         title=doc.title, |         title=doc.title, | ||||||
| @@ -187,20 +187,20 @@ def update_document(writer: AsyncWriter, doc: Document): | |||||||
|     ) |     ) | ||||||
|  |  | ||||||
|  |  | ||||||
| def remove_document(writer: AsyncWriter, doc: Document): | def remove_document(writer: AsyncWriter, doc: Document) -> None: | ||||||
|     remove_document_by_id(writer, doc.pk) |     remove_document_by_id(writer, doc.pk) | ||||||
|  |  | ||||||
|  |  | ||||||
| def remove_document_by_id(writer: AsyncWriter, doc_id): | def remove_document_by_id(writer: AsyncWriter, doc_id) -> None: | ||||||
|     writer.delete_by_term("id", doc_id) |     writer.delete_by_term("id", doc_id) | ||||||
|  |  | ||||||
|  |  | ||||||
| def add_or_update_document(document: Document): | def add_or_update_document(document: Document) -> None: | ||||||
|     with open_index_writer() as writer: |     with open_index_writer() as writer: | ||||||
|         update_document(writer, document) |         update_document(writer, document) | ||||||
|  |  | ||||||
|  |  | ||||||
| def remove_document_from_index(document: Document): | def remove_document_from_index(document: Document) -> None: | ||||||
|     with open_index_writer() as writer: |     with open_index_writer() as writer: | ||||||
|         remove_document(writer, document) |         remove_document(writer, document) | ||||||
|  |  | ||||||
| @@ -218,11 +218,11 @@ class MappedDocIdSet(DocIdSet): | |||||||
|         self.document_ids = BitSet(document_ids, size=max_id) |         self.document_ids = BitSet(document_ids, size=max_id) | ||||||
|         self.ixreader = ixreader |         self.ixreader = ixreader | ||||||
|  |  | ||||||
|     def __contains__(self, docnum): |     def __contains__(self, docnum) -> bool: | ||||||
|         document_id = self.ixreader.stored_fields(docnum)["id"] |         document_id = self.ixreader.stored_fields(docnum)["id"] | ||||||
|         return document_id in self.document_ids |         return document_id in self.document_ids | ||||||
|  |  | ||||||
|     def __bool__(self): |     def __bool__(self) -> Literal[True]: | ||||||
|         # searcher.search ignores a filter if it's "falsy". |         # searcher.search ignores a filter if it's "falsy". | ||||||
|         # We use this hack so this DocIdSet, when used as a filter, is never ignored. |         # We use this hack so this DocIdSet, when used as a filter, is never ignored. | ||||||
|         return True |         return True | ||||||
| @@ -232,13 +232,13 @@ class DelayedQuery: | |||||||
|     def _get_query(self): |     def _get_query(self): | ||||||
|         raise NotImplementedError  # pragma: no cover |         raise NotImplementedError  # pragma: no cover | ||||||
|  |  | ||||||
|     def _get_query_sortedby(self): |     def _get_query_sortedby(self) -> tuple[None, Literal[False]] | tuple[str, bool]: | ||||||
|         if "ordering" not in self.query_params: |         if "ordering" not in self.query_params: | ||||||
|             return None, False |             return None, False | ||||||
|  |  | ||||||
|         field: str = self.query_params["ordering"] |         field: str = self.query_params["ordering"] | ||||||
|  |  | ||||||
|         sort_fields_map = { |         sort_fields_map: dict[str, str] = { | ||||||
|             "created": "created", |             "created": "created", | ||||||
|             "modified": "modified", |             "modified": "modified", | ||||||
|             "added": "added", |             "added": "added", | ||||||
| @@ -268,7 +268,7 @@ class DelayedQuery: | |||||||
|         query_params, |         query_params, | ||||||
|         page_size, |         page_size, | ||||||
|         filter_queryset: QuerySet, |         filter_queryset: QuerySet, | ||||||
|     ): |     ) -> None: | ||||||
|         self.searcher = searcher |         self.searcher = searcher | ||||||
|         self.query_params = query_params |         self.query_params = query_params | ||||||
|         self.page_size = page_size |         self.page_size = page_size | ||||||
| @@ -276,7 +276,7 @@ class DelayedQuery: | |||||||
|         self.first_score = None |         self.first_score = None | ||||||
|         self.filter_queryset = filter_queryset |         self.filter_queryset = filter_queryset | ||||||
|  |  | ||||||
|     def __len__(self): |     def __len__(self) -> int: | ||||||
|         page = self[0:1] |         page = self[0:1] | ||||||
|         return len(page) |         return len(page) | ||||||
|  |  | ||||||
| @@ -334,7 +334,7 @@ class LocalDateParser(English): | |||||||
|  |  | ||||||
|  |  | ||||||
| class DelayedFullTextQuery(DelayedQuery): | class DelayedFullTextQuery(DelayedQuery): | ||||||
|     def _get_query(self): |     def _get_query(self) -> tuple: | ||||||
|         q_str = self.query_params["query"] |         q_str = self.query_params["query"] | ||||||
|         qp = MultifieldParser( |         qp = MultifieldParser( | ||||||
|             [ |             [ | ||||||
| @@ -364,7 +364,7 @@ class DelayedFullTextQuery(DelayedQuery): | |||||||
|  |  | ||||||
|  |  | ||||||
| class DelayedMoreLikeThisQuery(DelayedQuery): | class DelayedMoreLikeThisQuery(DelayedQuery): | ||||||
|     def _get_query(self): |     def _get_query(self) -> tuple: | ||||||
|         more_like_doc_id = int(self.query_params["more_like_id"]) |         more_like_doc_id = int(self.query_params["more_like_id"]) | ||||||
|         content = Document.objects.get(id=more_like_doc_id).content |         content = Document.objects.get(id=more_like_doc_id).content | ||||||
|  |  | ||||||
| @@ -379,7 +379,7 @@ class DelayedMoreLikeThisQuery(DelayedQuery): | |||||||
|         q = query.Or( |         q = query.Or( | ||||||
|             [query.Term("content", word, boost=weight) for word, weight in kts], |             [query.Term("content", word, boost=weight) for word, weight in kts], | ||||||
|         ) |         ) | ||||||
|         mask = {docnum} |         mask: set = {docnum} | ||||||
|  |  | ||||||
|         return q, mask |         return q, mask | ||||||
|  |  | ||||||
| @@ -389,7 +389,7 @@ def autocomplete( | |||||||
|     term: str, |     term: str, | ||||||
|     limit: int = 10, |     limit: int = 10, | ||||||
|     user: User | None = None, |     user: User | None = None, | ||||||
| ): | ) -> list: | ||||||
|     """ |     """ | ||||||
|     Mimics whoosh.reading.IndexReader.most_distinctive_terms with permissions |     Mimics whoosh.reading.IndexReader.most_distinctive_terms with permissions | ||||||
|     and without scoring |     and without scoring | ||||||
| @@ -402,7 +402,7 @@ def autocomplete( | |||||||
|         # content field query instead and return bogus, not text data |         # content field query instead and return bogus, not text data | ||||||
|         qp.remove_plugin_class(FieldsPlugin) |         qp.remove_plugin_class(FieldsPlugin) | ||||||
|         q = qp.parse(f"{term.lower()}*") |         q = qp.parse(f"{term.lower()}*") | ||||||
|         user_criterias = get_permissions_criterias(user) |         user_criterias: list = get_permissions_criterias(user) | ||||||
|  |  | ||||||
|         results = s.search( |         results = s.search( | ||||||
|             q, |             q, | ||||||
| @@ -417,14 +417,14 @@ def autocomplete( | |||||||
|                     termCounts[match] += 1 |                     termCounts[match] += 1 | ||||||
|             terms = [t for t, _ in termCounts.most_common(limit)] |             terms = [t for t, _ in termCounts.most_common(limit)] | ||||||
|  |  | ||||||
|         term_encoded = term.encode("UTF-8") |         term_encoded: bytes = term.encode("UTF-8") | ||||||
|         if term_encoded in terms: |         if term_encoded in terms: | ||||||
|             terms.insert(0, terms.pop(terms.index(term_encoded))) |             terms.insert(0, terms.pop(terms.index(term_encoded))) | ||||||
|  |  | ||||||
|     return terms |     return terms | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_permissions_criterias(user: User | None = None): | def get_permissions_criterias(user: User | None = None) -> list: | ||||||
|     user_criterias = [query.Term("has_owner", False)] |     user_criterias = [query.Term("has_owner", False)] | ||||||
|     if user is not None: |     if user is not None: | ||||||
|         if user.is_superuser:  # superusers see all docs |         if user.is_superuser:  # superusers see all docs | ||||||
|   | |||||||
| @@ -1,4 +1,4 @@ | |||||||
| import os | from pathlib import Path | ||||||
|  |  | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from django.core.management.base import BaseCommand | from django.core.management.base import BaseCommand | ||||||
| @@ -14,7 +14,7 @@ class Command(BaseCommand): | |||||||
|         "state to an unencrypted one (or vice-versa)" |         "state to an unencrypted one (or vice-versa)" | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|     def add_arguments(self, parser): |     def add_arguments(self, parser) -> None: | ||||||
|         parser.add_argument( |         parser.add_argument( | ||||||
|             "--passphrase", |             "--passphrase", | ||||||
|             help=( |             help=( | ||||||
| @@ -23,7 +23,7 @@ class Command(BaseCommand): | |||||||
|             ), |             ), | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|     def handle(self, *args, **options): |     def handle(self, *args, **options) -> None: | ||||||
|         try: |         try: | ||||||
|             self.stdout.write( |             self.stdout.write( | ||||||
|                 self.style.WARNING( |                 self.style.WARNING( | ||||||
| @@ -52,7 +52,7 @@ class Command(BaseCommand): | |||||||
|  |  | ||||||
|         self.__gpg_to_unencrypted(passphrase) |         self.__gpg_to_unencrypted(passphrase) | ||||||
|  |  | ||||||
|     def __gpg_to_unencrypted(self, passphrase: str): |     def __gpg_to_unencrypted(self, passphrase: str) -> None: | ||||||
|         encrypted_files = Document.objects.filter( |         encrypted_files = Document.objects.filter( | ||||||
|             storage_type=Document.STORAGE_TYPE_GPG, |             storage_type=Document.STORAGE_TYPE_GPG, | ||||||
|         ) |         ) | ||||||
| @@ -69,7 +69,7 @@ class Command(BaseCommand): | |||||||
|  |  | ||||||
|             document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED |             document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||||
|  |  | ||||||
|             ext = os.path.splitext(document.filename)[1] |             ext: str = Path(document.filename).suffix | ||||||
|  |  | ||||||
|             if not ext == ".gpg": |             if not ext == ".gpg": | ||||||
|                 raise CommandError( |                 raise CommandError( | ||||||
| @@ -77,12 +77,12 @@ class Command(BaseCommand): | |||||||
|                     f"end with .gpg", |                     f"end with .gpg", | ||||||
|                 ) |                 ) | ||||||
|  |  | ||||||
|             document.filename = os.path.splitext(document.filename)[0] |             document.filename = Path(document.filename).stem | ||||||
|  |  | ||||||
|             with open(document.source_path, "wb") as f: |             with document.source_path.open("wb") as f: | ||||||
|                 f.write(raw_document) |                 f.write(raw_document) | ||||||
|  |  | ||||||
|             with open(document.thumbnail_path, "wb") as f: |             with document.thumbnail_path.open("wb") as f: | ||||||
|                 f.write(raw_thumb) |                 f.write(raw_thumb) | ||||||
|  |  | ||||||
|             Document.objects.filter(id=document.id).update( |             Document.objects.filter(id=document.id).update( | ||||||
| @@ -91,4 +91,4 @@ class Command(BaseCommand): | |||||||
|             ) |             ) | ||||||
|  |  | ||||||
|             for path in old_paths: |             for path in old_paths: | ||||||
|                 os.unlink(path) |                 path.unlink() | ||||||
|   | |||||||
| @@ -1,6 +1,7 @@ | |||||||
| import json | import json | ||||||
| import logging | import logging | ||||||
| import os | import os | ||||||
|  | from collections.abc import Generator | ||||||
| from contextlib import contextmanager | from contextlib import contextmanager | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
|  |  | ||||||
| @@ -44,7 +45,7 @@ if settings.AUDIT_LOG_ENABLED: | |||||||
|  |  | ||||||
|  |  | ||||||
| @contextmanager | @contextmanager | ||||||
| def disable_signal(sig, receiver, sender): | def disable_signal(sig, receiver, sender) -> Generator: | ||||||
|     try: |     try: | ||||||
|         sig.disconnect(receiver=receiver, sender=sender) |         sig.disconnect(receiver=receiver, sender=sender) | ||||||
|         yield |         yield | ||||||
| @@ -58,7 +59,7 @@ class Command(CryptMixin, BaseCommand): | |||||||
|         "documents it refers to." |         "documents it refers to." | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|     def add_arguments(self, parser): |     def add_arguments(self, parser) -> None: | ||||||
|         parser.add_argument("source") |         parser.add_argument("source") | ||||||
|  |  | ||||||
|         parser.add_argument( |         parser.add_argument( | ||||||
| @@ -90,7 +91,7 @@ class Command(CryptMixin, BaseCommand): | |||||||
|         - Are there existing users or documents in the database? |         - Are there existing users or documents in the database? | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|         def pre_check_maybe_not_empty(): |         def pre_check_maybe_not_empty() -> None: | ||||||
|             # Skip this check if operating only on the database |             # Skip this check if operating only on the database | ||||||
|             # We can expect data to exist in that case |             # We can expect data to exist in that case | ||||||
|             if not self.data_only: |             if not self.data_only: | ||||||
| @@ -122,7 +123,7 @@ class Command(CryptMixin, BaseCommand): | |||||||
|                     ), |                     ), | ||||||
|                 ) |                 ) | ||||||
|  |  | ||||||
|         def pre_check_manifest_exists(): |         def pre_check_manifest_exists() -> None: | ||||||
|             if not (self.source / "manifest.json").exists(): |             if not (self.source / "manifest.json").exists(): | ||||||
|                 raise CommandError( |                 raise CommandError( | ||||||
|                     "That directory doesn't appear to contain a manifest.json file.", |                     "That directory doesn't appear to contain a manifest.json file.", | ||||||
| @@ -141,7 +142,7 @@ class Command(CryptMixin, BaseCommand): | |||||||
|         """ |         """ | ||||||
|         Loads manifest data from the various JSON files for parsing and loading the database |         Loads manifest data from the various JSON files for parsing and loading the database | ||||||
|         """ |         """ | ||||||
|         main_manifest_path = self.source / "manifest.json" |         main_manifest_path: Path = self.source / "manifest.json" | ||||||
|  |  | ||||||
|         with main_manifest_path.open() as infile: |         with main_manifest_path.open() as infile: | ||||||
|             self.manifest = json.load(infile) |             self.manifest = json.load(infile) | ||||||
| @@ -158,8 +159,8 @@ class Command(CryptMixin, BaseCommand): | |||||||
|  |  | ||||||
|         Must account for the old style of export as well, with just version.json |         Must account for the old style of export as well, with just version.json | ||||||
|         """ |         """ | ||||||
|         version_path = self.source / "version.json" |         version_path: Path = self.source / "version.json" | ||||||
|         metadata_path = self.source / "metadata.json" |         metadata_path: Path = self.source / "metadata.json" | ||||||
|         if not version_path.exists() and not metadata_path.exists(): |         if not version_path.exists() and not metadata_path.exists(): | ||||||
|             self.stdout.write( |             self.stdout.write( | ||||||
|                 self.style.NOTICE("No version.json or metadata.json file located"), |                 self.style.NOTICE("No version.json or metadata.json file located"), | ||||||
| @@ -221,7 +222,7 @@ class Command(CryptMixin, BaseCommand): | |||||||
|                 ) |                 ) | ||||||
|                 raise e |                 raise e | ||||||
|  |  | ||||||
|     def handle(self, *args, **options): |     def handle(self, *args, **options) -> None: | ||||||
|         logging.getLogger().handlers[0].level = logging.ERROR |         logging.getLogger().handlers[0].level = logging.ERROR | ||||||
|  |  | ||||||
|         self.source = Path(options["source"]).resolve() |         self.source = Path(options["source"]).resolve() | ||||||
| @@ -290,13 +291,13 @@ class Command(CryptMixin, BaseCommand): | |||||||
|             no_progress_bar=self.no_progress_bar, |             no_progress_bar=self.no_progress_bar, | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|     def check_manifest_validity(self): |     def check_manifest_validity(self) -> None: | ||||||
|         """ |         """ | ||||||
|         Attempts to verify the manifest is valid.  Namely checking the files |         Attempts to verify the manifest is valid.  Namely checking the files | ||||||
|         referred to exist and the files can be read from |         referred to exist and the files can be read from | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|         def check_document_validity(document_record: dict): |         def check_document_validity(document_record: dict) -> None: | ||||||
|             if EXPORTER_FILE_NAME not in document_record: |             if EXPORTER_FILE_NAME not in document_record: | ||||||
|                 raise CommandError( |                 raise CommandError( | ||||||
|                     "The manifest file contains a record which does not " |                     "The manifest file contains a record which does not " | ||||||
| @@ -341,7 +342,7 @@ class Command(CryptMixin, BaseCommand): | |||||||
|             if not self.data_only and record["model"] == "documents.document": |             if not self.data_only and record["model"] == "documents.document": | ||||||
|                 check_document_validity(record) |                 check_document_validity(record) | ||||||
|  |  | ||||||
|     def _import_files_from_manifest(self): |     def _import_files_from_manifest(self) -> None: | ||||||
|         settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True) |         settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True) | ||||||
|         settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True) |         settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True) | ||||||
|         settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True) |         settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True) | ||||||
| @@ -356,24 +357,24 @@ class Command(CryptMixin, BaseCommand): | |||||||
|             document = Document.objects.get(pk=record["pk"]) |             document = Document.objects.get(pk=record["pk"]) | ||||||
|  |  | ||||||
|             doc_file = record[EXPORTER_FILE_NAME] |             doc_file = record[EXPORTER_FILE_NAME] | ||||||
|             document_path = os.path.join(self.source, doc_file) |             document_path = self.source / doc_file | ||||||
|  |  | ||||||
|             if EXPORTER_THUMBNAIL_NAME in record: |             if EXPORTER_THUMBNAIL_NAME in record: | ||||||
|                 thumb_file = record[EXPORTER_THUMBNAIL_NAME] |                 thumb_file = record[EXPORTER_THUMBNAIL_NAME] | ||||||
|                 thumbnail_path = Path(os.path.join(self.source, thumb_file)).resolve() |                 thumbnail_path = (self.source / thumb_file).resolve() | ||||||
|             else: |             else: | ||||||
|                 thumbnail_path = None |                 thumbnail_path = None | ||||||
|  |  | ||||||
|             if EXPORTER_ARCHIVE_NAME in record: |             if EXPORTER_ARCHIVE_NAME in record: | ||||||
|                 archive_file = record[EXPORTER_ARCHIVE_NAME] |                 archive_file = record[EXPORTER_ARCHIVE_NAME] | ||||||
|                 archive_path = os.path.join(self.source, archive_file) |                 archive_path = self.source / archive_file | ||||||
|             else: |             else: | ||||||
|                 archive_path = None |                 archive_path = None | ||||||
|  |  | ||||||
|             document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED |             document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||||
|  |  | ||||||
|             with FileLock(settings.MEDIA_LOCK): |             with FileLock(settings.MEDIA_LOCK): | ||||||
|                 if os.path.isfile(document.source_path): |                 if Path(document.source_path).is_file(): | ||||||
|                     raise FileExistsError(document.source_path) |                     raise FileExistsError(document.source_path) | ||||||
|  |  | ||||||
|                 create_source_path_directory(document.source_path) |                 create_source_path_directory(document.source_path) | ||||||
| @@ -418,8 +419,8 @@ class Command(CryptMixin, BaseCommand): | |||||||
|             had_at_least_one_record = False |             had_at_least_one_record = False | ||||||
|  |  | ||||||
|             for crypt_config in self.CRYPT_FIELDS: |             for crypt_config in self.CRYPT_FIELDS: | ||||||
|                 importer_model = crypt_config["model_name"] |                 importer_model: str = crypt_config["model_name"] | ||||||
|                 crypt_fields = crypt_config["fields"] |                 crypt_fields: str = crypt_config["fields"] | ||||||
|                 for record in filter( |                 for record in filter( | ||||||
|                     lambda x: x["model"] == importer_model, |                     lambda x: x["model"] == importer_model, | ||||||
|                     self.manifest, |                     self.manifest, | ||||||
|   | |||||||
| @@ -15,7 +15,7 @@ from documents.parsers import run_convert | |||||||
| logger = logging.getLogger("paperless.migrations") | logger = logging.getLogger("paperless.migrations") | ||||||
|  |  | ||||||
|  |  | ||||||
| def _do_convert(work_package): | def _do_convert(work_package) -> None: | ||||||
|     ( |     ( | ||||||
|         existing_encrypted_thumbnail, |         existing_encrypted_thumbnail, | ||||||
|         converted_encrypted_thumbnail, |         converted_encrypted_thumbnail, | ||||||
| @@ -30,13 +30,13 @@ def _do_convert(work_package): | |||||||
|         # Decrypt png |         # Decrypt png | ||||||
|         decrypted_thumbnail = existing_encrypted_thumbnail.with_suffix("").resolve() |         decrypted_thumbnail = existing_encrypted_thumbnail.with_suffix("").resolve() | ||||||
|  |  | ||||||
|         with open(existing_encrypted_thumbnail, "rb") as existing_encrypted_file: |         with existing_encrypted_thumbnail.open("rb") as existing_encrypted_file: | ||||||
|             raw_thumb = gpg.decrypt_file( |             raw_thumb = gpg.decrypt_file( | ||||||
|                 existing_encrypted_file, |                 existing_encrypted_file, | ||||||
|                 passphrase=passphrase, |                 passphrase=passphrase, | ||||||
|                 always_trust=True, |                 always_trust=True, | ||||||
|             ).data |             ).data | ||||||
|             with open(decrypted_thumbnail, "wb") as decrypted_file: |             with Path(decrypted_thumbnail).open("wb") as decrypted_file: | ||||||
|                 decrypted_file.write(raw_thumb) |                 decrypted_file.write(raw_thumb) | ||||||
|  |  | ||||||
|         converted_decrypted_thumbnail = Path( |         converted_decrypted_thumbnail = Path( | ||||||
| @@ -62,7 +62,7 @@ def _do_convert(work_package): | |||||||
|         ) |         ) | ||||||
|  |  | ||||||
|         # Encrypt webp |         # Encrypt webp | ||||||
|         with open(converted_decrypted_thumbnail, "rb") as converted_decrypted_file: |         with Path(converted_decrypted_thumbnail).open("rb") as converted_decrypted_file: | ||||||
|             encrypted = gpg.encrypt_file( |             encrypted = gpg.encrypt_file( | ||||||
|                 fileobj_or_path=converted_decrypted_file, |                 fileobj_or_path=converted_decrypted_file, | ||||||
|                 recipients=None, |                 recipients=None, | ||||||
| @@ -71,7 +71,9 @@ def _do_convert(work_package): | |||||||
|                 always_trust=True, |                 always_trust=True, | ||||||
|             ).data |             ).data | ||||||
|  |  | ||||||
|             with open(converted_encrypted_thumbnail, "wb") as converted_encrypted_file: |             with Path(converted_encrypted_thumbnail).open( | ||||||
|  |                 "wb", | ||||||
|  |             ) as converted_encrypted_file: | ||||||
|                 converted_encrypted_file.write(encrypted) |                 converted_encrypted_file.write(encrypted) | ||||||
|  |  | ||||||
|         # Copy newly created thumbnail to thumbnail directory |         # Copy newly created thumbnail to thumbnail directory | ||||||
| @@ -95,8 +97,8 @@ def _do_convert(work_package): | |||||||
|         logger.error(f"Error converting thumbnail (existing file unchanged): {e}") |         logger.error(f"Error converting thumbnail (existing file unchanged): {e}") | ||||||
|  |  | ||||||
|  |  | ||||||
| def _convert_encrypted_thumbnails_to_webp(apps, schema_editor): | def _convert_encrypted_thumbnails_to_webp(apps, schema_editor) -> None: | ||||||
|     start = time.time() |     start: float = time.time() | ||||||
|  |  | ||||||
|     with tempfile.TemporaryDirectory() as tempdir: |     with tempfile.TemporaryDirectory() as tempdir: | ||||||
|         work_packages = [] |         work_packages = [] | ||||||
| @@ -111,15 +113,15 @@ def _convert_encrypted_thumbnails_to_webp(apps, schema_editor): | |||||||
|                 ) |                 ) | ||||||
|  |  | ||||||
|             for file in Path(settings.THUMBNAIL_DIR).glob("*.png.gpg"): |             for file in Path(settings.THUMBNAIL_DIR).glob("*.png.gpg"): | ||||||
|                 existing_thumbnail = file.resolve() |                 existing_thumbnail: Path = file.resolve() | ||||||
|  |  | ||||||
|                 # Change the existing filename suffix from png to webp |                 # Change the existing filename suffix from png to webp | ||||||
|                 converted_thumbnail_name = Path( |                 converted_thumbnail_name: str = Path( | ||||||
|                     str(existing_thumbnail).replace(".png.gpg", ".webp.gpg"), |                     str(existing_thumbnail).replace(".png.gpg", ".webp.gpg"), | ||||||
|                 ).name |                 ).name | ||||||
|  |  | ||||||
|                 # Create the expected output filename in the tempdir |                 # Create the expected output filename in the tempdir | ||||||
|                 converted_thumbnail = ( |                 converted_thumbnail: Path = ( | ||||||
|                     Path(tempdir) / Path(converted_thumbnail_name) |                     Path(tempdir) / Path(converted_thumbnail_name) | ||||||
|                 ).resolve() |                 ).resolve() | ||||||
|  |  | ||||||
| @@ -143,8 +145,8 @@ def _convert_encrypted_thumbnails_to_webp(apps, schema_editor): | |||||||
|                 ) as pool: |                 ) as pool: | ||||||
|                     pool.map(_do_convert, work_packages) |                     pool.map(_do_convert, work_packages) | ||||||
|  |  | ||||||
|                     end = time.time() |                     end: float = time.time() | ||||||
|                     duration = end - start |                     duration: float = end - start | ||||||
|  |  | ||||||
|                 logger.info(f"Conversion completed in {duration:.3f}s") |                 logger.info(f"Conversion completed in {duration:.3f}s") | ||||||
|  |  | ||||||
|   | |||||||
| @@ -173,7 +173,7 @@ class TestSystemStatus(APITestCase): | |||||||
|         self.assertEqual(response.data["tasks"]["index_status"], "OK") |         self.assertEqual(response.data["tasks"]["index_status"], "OK") | ||||||
|         self.assertIsNotNone(response.data["tasks"]["index_last_modified"]) |         self.assertIsNotNone(response.data["tasks"]["index_last_modified"]) | ||||||
|  |  | ||||||
|     @override_settings(INDEX_DIR="/tmp/index/") |     @override_settings(INDEX_DIR=Path("/tmp/index/")) | ||||||
|     @mock.patch("documents.index.open_index", autospec=True) |     @mock.patch("documents.index.open_index", autospec=True) | ||||||
|     def test_system_status_index_error(self, mock_open_index): |     def test_system_status_index_error(self, mock_open_index): | ||||||
|         """ |         """ | ||||||
| @@ -193,7 +193,7 @@ class TestSystemStatus(APITestCase): | |||||||
|         self.assertEqual(response.data["tasks"]["index_status"], "ERROR") |         self.assertEqual(response.data["tasks"]["index_status"], "ERROR") | ||||||
|         self.assertIsNotNone(response.data["tasks"]["index_error"]) |         self.assertIsNotNone(response.data["tasks"]["index_error"]) | ||||||
|  |  | ||||||
|     @override_settings(DATA_DIR="/tmp/does_not_exist/data/") |     @override_settings(DATA_DIR=Path("/tmp/does_not_exist/data/")) | ||||||
|     def test_system_status_classifier_ok(self): |     def test_system_status_classifier_ok(self): | ||||||
|         """ |         """ | ||||||
|         GIVEN: |         GIVEN: | ||||||
| @@ -222,7 +222,7 @@ class TestSystemStatus(APITestCase): | |||||||
|         THEN: |         THEN: | ||||||
|             - The response contains an WARNING classifier status |             - The response contains an WARNING classifier status | ||||||
|         """ |         """ | ||||||
|         with override_settings(MODEL_FILE="does_not_exist"): |         with override_settings(MODEL_FILE=Path("does_not_exist")): | ||||||
|             Document.objects.create( |             Document.objects.create( | ||||||
|                 title="Test Document", |                 title="Test Document", | ||||||
|             ) |             ) | ||||||
| @@ -233,7 +233,11 @@ class TestSystemStatus(APITestCase): | |||||||
|             self.assertEqual(response.data["tasks"]["classifier_status"], "WARNING") |             self.assertEqual(response.data["tasks"]["classifier_status"], "WARNING") | ||||||
|             self.assertIsNotNone(response.data["tasks"]["classifier_error"]) |             self.assertIsNotNone(response.data["tasks"]["classifier_error"]) | ||||||
|  |  | ||||||
|     def test_system_status_classifier_error(self): |     @mock.patch( | ||||||
|  |         "documents.classifier.load_classifier", | ||||||
|  |         side_effect=ClassifierModelCorruptError(), | ||||||
|  |     ) | ||||||
|  |     def test_system_status_classifier_error(self, mock_load_classifier): | ||||||
|         """ |         """ | ||||||
|         GIVEN: |         GIVEN: | ||||||
|             - The classifier does exist but is corrupt |             - The classifier does exist but is corrupt | ||||||
| @@ -248,25 +252,23 @@ class TestSystemStatus(APITestCase): | |||||||
|                 dir="/tmp", |                 dir="/tmp", | ||||||
|                 delete=False, |                 delete=False, | ||||||
|             ) as does_exist, |             ) as does_exist, | ||||||
|             override_settings(MODEL_FILE=does_exist), |             override_settings(MODEL_FILE=Path(does_exist.name)), | ||||||
|         ): |         ): | ||||||
|             with mock.patch("documents.classifier.load_classifier") as mock_load: |             Document.objects.create( | ||||||
|                 mock_load.side_effect = ClassifierModelCorruptError() |                 title="Test Document", | ||||||
|                 Document.objects.create( |             ) | ||||||
|                     title="Test Document", |             Tag.objects.create( | ||||||
|                 ) |                 name="Test Tag", | ||||||
|                 Tag.objects.create( |                 matching_algorithm=Tag.MATCH_AUTO, | ||||||
|                     name="Test Tag", |             ) | ||||||
|                     matching_algorithm=Tag.MATCH_AUTO, |             self.client.force_login(self.user) | ||||||
|                 ) |             response = self.client.get(self.ENDPOINT) | ||||||
|                 self.client.force_login(self.user) |             self.assertEqual(response.status_code, status.HTTP_200_OK) | ||||||
|                 response = self.client.get(self.ENDPOINT) |             self.assertEqual( | ||||||
|                 self.assertEqual(response.status_code, status.HTTP_200_OK) |                 response.data["tasks"]["classifier_status"], | ||||||
|                 self.assertEqual( |                 "ERROR", | ||||||
|                     response.data["tasks"]["classifier_status"], |             ) | ||||||
|                     "ERROR", |             self.assertIsNotNone(response.data["tasks"]["classifier_error"]) | ||||||
|                 ) |  | ||||||
|                 self.assertIsNotNone(response.data["tasks"]["classifier_error"]) |  | ||||||
|  |  | ||||||
|     def test_system_status_classifier_ok_no_objects(self): |     def test_system_status_classifier_ok_no_objects(self): | ||||||
|         """ |         """ | ||||||
| @@ -278,7 +280,7 @@ class TestSystemStatus(APITestCase): | |||||||
|         THEN: |         THEN: | ||||||
|             - The response contains an OK classifier status |             - The response contains an OK classifier status | ||||||
|         """ |         """ | ||||||
|         with override_settings(MODEL_FILE="does_not_exist"): |         with override_settings(MODEL_FILE=Path("does_not_exist")): | ||||||
|             self.client.force_login(self.user) |             self.client.force_login(self.user) | ||||||
|             response = self.client.get(self.ENDPOINT) |             response = self.client.get(self.ENDPOINT) | ||||||
|             self.assertEqual(response.status_code, status.HTTP_200_OK) |             self.assertEqual(response.status_code, status.HTTP_200_OK) | ||||||
|   | |||||||
| @@ -650,7 +650,7 @@ class TestClassifier(DirectoriesMixin, TestCase): | |||||||
|         Path(settings.MODEL_FILE).touch() |         Path(settings.MODEL_FILE).touch() | ||||||
|         self.assertTrue(os.path.exists(settings.MODEL_FILE)) |         self.assertTrue(os.path.exists(settings.MODEL_FILE)) | ||||||
|  |  | ||||||
|         load.side_effect = IncompatibleClassifierVersionError("Dummey Error") |         load.side_effect = IncompatibleClassifierVersionError("Dummy Error") | ||||||
|         self.assertIsNone(load_classifier()) |         self.assertIsNone(load_classifier()) | ||||||
|         self.assertFalse(os.path.exists(settings.MODEL_FILE)) |         self.assertFalse(os.path.exists(settings.MODEL_FILE)) | ||||||
|  |  | ||||||
| @@ -673,3 +673,25 @@ class TestClassifier(DirectoriesMixin, TestCase): | |||||||
|         ): |         ): | ||||||
|             classifier = load_classifier() |             classifier = load_classifier() | ||||||
|             self.assertIsNone(classifier) |             self.assertIsNone(classifier) | ||||||
|  |  | ||||||
|  |     @mock.patch("documents.classifier.DocumentClassifier.load") | ||||||
|  |     def test_load_classifier_raise_exception(self, mock_load): | ||||||
|  |         Path(settings.MODEL_FILE).touch() | ||||||
|  |         mock_load.side_effect = IncompatibleClassifierVersionError("Dummy Error") | ||||||
|  |         with self.assertRaises(IncompatibleClassifierVersionError): | ||||||
|  |             load_classifier(raise_exception=True) | ||||||
|  |  | ||||||
|  |         Path(settings.MODEL_FILE).touch() | ||||||
|  |         mock_load.side_effect = ClassifierModelCorruptError() | ||||||
|  |         with self.assertRaises(ClassifierModelCorruptError): | ||||||
|  |             load_classifier(raise_exception=True) | ||||||
|  |  | ||||||
|  |         Path(settings.MODEL_FILE).touch() | ||||||
|  |         mock_load.side_effect = OSError() | ||||||
|  |         with self.assertRaises(OSError): | ||||||
|  |             load_classifier(raise_exception=True) | ||||||
|  |  | ||||||
|  |         Path(settings.MODEL_FILE).touch() | ||||||
|  |         mock_load.side_effect = Exception() | ||||||
|  |         with self.assertRaises(Exception): | ||||||
|  |             load_classifier(raise_exception=True) | ||||||
|   | |||||||
| @@ -108,18 +108,18 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|  |  | ||||||
| class TestDecryptDocuments(FileSystemAssertsMixin, TestCase): | class TestDecryptDocuments(FileSystemAssertsMixin, TestCase): | ||||||
|     @override_settings( |     @override_settings( | ||||||
|         ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"), |         ORIGINALS_DIR=(Path(__file__).parent / "samples" / "originals"), | ||||||
|         THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"), |         THUMBNAIL_DIR=(Path(__file__).parent / "samples" / "thumb"), | ||||||
|         PASSPHRASE="test", |         PASSPHRASE="test", | ||||||
|         FILENAME_FORMAT=None, |         FILENAME_FORMAT=None, | ||||||
|     ) |     ) | ||||||
|     @mock.patch("documents.management.commands.decrypt_documents.input") |     @mock.patch("documents.management.commands.decrypt_documents.input") | ||||||
|     def test_decrypt(self, m): |     def test_decrypt(self, m): | ||||||
|         media_dir = tempfile.mkdtemp() |         media_dir = tempfile.mkdtemp() | ||||||
|         originals_dir = os.path.join(media_dir, "documents", "originals") |         originals_dir = Path(media_dir) / "documents" / "originals" | ||||||
|         thumb_dir = os.path.join(media_dir, "documents", "thumbnails") |         thumb_dir = Path(media_dir) / "documents" / "thumbnails" | ||||||
|         os.makedirs(originals_dir, exist_ok=True) |         originals_dir.mkdir(parents=True, exist_ok=True) | ||||||
|         os.makedirs(thumb_dir, exist_ok=True) |         thumb_dir.mkdir(parents=True, exist_ok=True) | ||||||
|  |  | ||||||
|         override_settings( |         override_settings( | ||||||
|             ORIGINALS_DIR=originals_dir, |             ORIGINALS_DIR=originals_dir, | ||||||
| @@ -143,7 +143,7 @@ class TestDecryptDocuments(FileSystemAssertsMixin, TestCase): | |||||||
|                 "originals", |                 "originals", | ||||||
|                 "0000004.pdf.gpg", |                 "0000004.pdf.gpg", | ||||||
|             ), |             ), | ||||||
|             os.path.join(originals_dir, "0000004.pdf.gpg"), |             originals_dir / "0000004.pdf.gpg", | ||||||
|         ) |         ) | ||||||
|         shutil.copy( |         shutil.copy( | ||||||
|             os.path.join( |             os.path.join( | ||||||
| @@ -153,7 +153,7 @@ class TestDecryptDocuments(FileSystemAssertsMixin, TestCase): | |||||||
|                 "thumbnails", |                 "thumbnails", | ||||||
|                 "0000004.webp.gpg", |                 "0000004.webp.gpg", | ||||||
|             ), |             ), | ||||||
|             os.path.join(thumb_dir, f"{doc.id:07}.webp.gpg"), |             thumb_dir / f"{doc.id:07}.webp.gpg", | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|         call_command("decrypt_documents") |         call_command("decrypt_documents") | ||||||
|   | |||||||
| @@ -2139,7 +2139,7 @@ class SystemStatusView(PassUserMixin): | |||||||
|         classifier_error = None |         classifier_error = None | ||||||
|         classifier_status = None |         classifier_status = None | ||||||
|         try: |         try: | ||||||
|             classifier = load_classifier() |             classifier = load_classifier(raise_exception=True) | ||||||
|             if classifier is None: |             if classifier is None: | ||||||
|                 # Make sure classifier should exist |                 # Make sure classifier should exist | ||||||
|                 docs_queryset = Document.objects.exclude( |                 docs_queryset = Document.objects.exclude( | ||||||
| @@ -2159,7 +2159,7 @@ class SystemStatusView(PassUserMixin): | |||||||
|                             matching_algorithm=Tag.MATCH_AUTO, |                             matching_algorithm=Tag.MATCH_AUTO, | ||||||
|                         ).exists() |                         ).exists() | ||||||
|                     ) |                     ) | ||||||
|                     and not os.path.isfile(settings.MODEL_FILE) |                     and not settings.MODEL_FILE.exists() | ||||||
|                 ): |                 ): | ||||||
|                     # if classifier file doesn't exist just classify as a warning |                     # if classifier file doesn't exist just classify as a warning | ||||||
|                     classifier_error = "Classifier file does not exist (yet). Re-training may be pending." |                     classifier_error = "Classifier file does not exist (yet). Re-training may be pending." | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user