mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Chore: Switch from os.path to pathlib.Path (#8325)
--------- Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
parent
d06aac947d
commit
935d077836
@ -38,20 +38,14 @@ ignore = ["DJ001", "SIM105", "RUF012"]
|
|||||||
[lint.per-file-ignores]
|
[lint.per-file-ignores]
|
||||||
".github/scripts/*.py" = ["E501", "INP001", "SIM117"]
|
".github/scripts/*.py" = ["E501", "INP001", "SIM117"]
|
||||||
"docker/wait-for-redis.py" = ["INP001", "T201"]
|
"docker/wait-for-redis.py" = ["INP001", "T201"]
|
||||||
"src/documents/barcodes.py" = ["PTH"] # TODO Enable & remove
|
|
||||||
"src/documents/classifier.py" = ["PTH"] # TODO Enable & remove
|
|
||||||
"src/documents/consumer.py" = ["PTH"] # TODO Enable & remove
|
"src/documents/consumer.py" = ["PTH"] # TODO Enable & remove
|
||||||
"src/documents/file_handling.py" = ["PTH"] # TODO Enable & remove
|
"src/documents/file_handling.py" = ["PTH"] # TODO Enable & remove
|
||||||
"src/documents/index.py" = ["PTH"] # TODO Enable & remove
|
|
||||||
"src/documents/management/commands/decrypt_documents.py" = ["PTH"] # TODO Enable & remove
|
|
||||||
"src/documents/management/commands/document_consumer.py" = ["PTH"] # TODO Enable & remove
|
"src/documents/management/commands/document_consumer.py" = ["PTH"] # TODO Enable & remove
|
||||||
"src/documents/management/commands/document_exporter.py" = ["PTH"] # TODO Enable & remove
|
"src/documents/management/commands/document_exporter.py" = ["PTH"] # TODO Enable & remove
|
||||||
"src/documents/management/commands/document_importer.py" = ["PTH"] # TODO Enable & remove
|
|
||||||
"src/documents/migrations/0012_auto_20160305_0040.py" = ["PTH"] # TODO Enable & remove
|
"src/documents/migrations/0012_auto_20160305_0040.py" = ["PTH"] # TODO Enable & remove
|
||||||
"src/documents/migrations/0014_document_checksum.py" = ["PTH"] # TODO Enable & remove
|
"src/documents/migrations/0014_document_checksum.py" = ["PTH"] # TODO Enable & remove
|
||||||
"src/documents/migrations/1003_mime_types.py" = ["PTH"] # TODO Enable & remove
|
"src/documents/migrations/1003_mime_types.py" = ["PTH"] # TODO Enable & remove
|
||||||
"src/documents/migrations/1012_fix_archive_files.py" = ["PTH"] # TODO Enable & remove
|
"src/documents/migrations/1012_fix_archive_files.py" = ["PTH"] # TODO Enable & remove
|
||||||
"src/documents/migrations/1037_webp_encrypted_thumbnail_conversion.py" = ["PTH"] # TODO Enable & remove
|
|
||||||
"src/documents/models.py" = ["SIM115", "PTH"] # TODO PTH Enable & remove
|
"src/documents/models.py" = ["SIM115", "PTH"] # TODO PTH Enable & remove
|
||||||
"src/documents/parsers.py" = ["PTH"] # TODO Enable & remove
|
"src/documents/parsers.py" = ["PTH"] # TODO Enable & remove
|
||||||
"src/documents/signals/handlers.py" = ["PTH"] # TODO Enable & remove
|
"src/documents/signals/handlers.py" = ["PTH"] # TODO Enable & remove
|
||||||
|
@ -3,6 +3,7 @@ import re
|
|||||||
import tempfile
|
import tempfile
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from pdf2image import convert_from_path
|
from pdf2image import convert_from_path
|
||||||
@ -21,6 +22,9 @@ from documents.utils import copy_basic_file_stats
|
|||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
from documents.utils import maybe_override_pixel_limit
|
from documents.utils import maybe_override_pixel_limit
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Callable
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.barcodes")
|
logger = logging.getLogger("paperless.barcodes")
|
||||||
|
|
||||||
|
|
||||||
@ -61,7 +65,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
- Barcode support is enabled and the mime type is supported
|
- Barcode support is enabled and the mime type is supported
|
||||||
"""
|
"""
|
||||||
if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
|
if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
|
||||||
supported_mimes = {"application/pdf", "image/tiff"}
|
supported_mimes: set[str] = {"application/pdf", "image/tiff"}
|
||||||
else:
|
else:
|
||||||
supported_mimes = {"application/pdf"}
|
supported_mimes = {"application/pdf"}
|
||||||
|
|
||||||
@ -71,16 +75,16 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
or settings.CONSUMER_ENABLE_TAG_BARCODE
|
or settings.CONSUMER_ENABLE_TAG_BARCODE
|
||||||
) and self.input_doc.mime_type in supported_mimes
|
) and self.input_doc.mime_type in supported_mimes
|
||||||
|
|
||||||
def setup(self):
|
def setup(self) -> None:
|
||||||
self.temp_dir = tempfile.TemporaryDirectory(
|
self.temp_dir = tempfile.TemporaryDirectory(
|
||||||
dir=self.base_tmp_dir,
|
dir=self.base_tmp_dir,
|
||||||
prefix="barcode",
|
prefix="barcode",
|
||||||
)
|
)
|
||||||
self.pdf_file = self.input_doc.original_file
|
self.pdf_file: Path = self.input_doc.original_file
|
||||||
self._tiff_conversion_done = False
|
self._tiff_conversion_done = False
|
||||||
self.barcodes: list[Barcode] = []
|
self.barcodes: list[Barcode] = []
|
||||||
|
|
||||||
def run(self) -> str | None:
|
def run(self) -> None:
|
||||||
# Some operations may use PIL, override pixel setting if needed
|
# Some operations may use PIL, override pixel setting if needed
|
||||||
maybe_override_pixel_limit()
|
maybe_override_pixel_limit()
|
||||||
|
|
||||||
@ -158,7 +162,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
def cleanup(self) -> None:
|
def cleanup(self) -> None:
|
||||||
self.temp_dir.cleanup()
|
self.temp_dir.cleanup()
|
||||||
|
|
||||||
def convert_from_tiff_to_pdf(self):
|
def convert_from_tiff_to_pdf(self) -> None:
|
||||||
"""
|
"""
|
||||||
May convert a TIFF image into a PDF, if the input is a TIFF and
|
May convert a TIFF image into a PDF, if the input is a TIFF and
|
||||||
the TIFF has not been made into a PDF
|
the TIFF has not been made into a PDF
|
||||||
@ -223,7 +227,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
|
|
||||||
# Choose the library for reading
|
# Choose the library for reading
|
||||||
if settings.CONSUMER_BARCODE_SCANNER == "PYZBAR":
|
if settings.CONSUMER_BARCODE_SCANNER == "PYZBAR":
|
||||||
reader = self.read_barcodes_pyzbar
|
reader: Callable[[Image.Image], list[str]] = self.read_barcodes_pyzbar
|
||||||
logger.debug("Scanning for barcodes using PYZBAR")
|
logger.debug("Scanning for barcodes using PYZBAR")
|
||||||
else:
|
else:
|
||||||
reader = self.read_barcodes_zxing
|
reader = self.read_barcodes_zxing
|
||||||
@ -236,7 +240,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
logger.debug(f"PDF has {num_of_pages} pages")
|
logger.debug(f"PDF has {num_of_pages} pages")
|
||||||
|
|
||||||
# Get limit from configuration
|
# Get limit from configuration
|
||||||
barcode_max_pages = (
|
barcode_max_pages: int = (
|
||||||
num_of_pages
|
num_of_pages
|
||||||
if settings.CONSUMER_BARCODE_MAX_PAGES == 0
|
if settings.CONSUMER_BARCODE_MAX_PAGES == 0
|
||||||
else settings.CONSUMER_BARCODE_MAX_PAGES
|
else settings.CONSUMER_BARCODE_MAX_PAGES
|
||||||
@ -311,7 +315,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
self.detect()
|
self.detect()
|
||||||
|
|
||||||
# get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
# get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
||||||
asn_text = next(
|
asn_text: str | None = next(
|
||||||
(x.value for x in self.barcodes if x.is_asn),
|
(x.value for x in self.barcodes if x.is_asn),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@ -333,36 +337,36 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
return asn
|
return asn
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tags(self) -> list[int] | None:
|
def tags(self) -> list[int]:
|
||||||
"""
|
"""
|
||||||
Search the parsed barcodes for any tags.
|
Search the parsed barcodes for any tags.
|
||||||
Returns the detected tag ids (or empty list)
|
Returns the detected tag ids (or empty list)
|
||||||
"""
|
"""
|
||||||
tags = []
|
tags: list[int] = []
|
||||||
|
|
||||||
# Ensure the barcodes have been read
|
# Ensure the barcodes have been read
|
||||||
self.detect()
|
self.detect()
|
||||||
|
|
||||||
for x in self.barcodes:
|
for x in self.barcodes:
|
||||||
tag_texts = x.value
|
tag_texts: str = x.value
|
||||||
|
|
||||||
for raw in tag_texts.split(","):
|
for raw in tag_texts.split(","):
|
||||||
try:
|
try:
|
||||||
tag = None
|
tag_str: str | None = None
|
||||||
for regex in settings.CONSUMER_TAG_BARCODE_MAPPING:
|
for regex in settings.CONSUMER_TAG_BARCODE_MAPPING:
|
||||||
if re.match(regex, raw, flags=re.IGNORECASE):
|
if re.match(regex, raw, flags=re.IGNORECASE):
|
||||||
sub = settings.CONSUMER_TAG_BARCODE_MAPPING[regex]
|
sub = settings.CONSUMER_TAG_BARCODE_MAPPING[regex]
|
||||||
tag = (
|
tag_str = (
|
||||||
re.sub(regex, sub, raw, flags=re.IGNORECASE)
|
re.sub(regex, sub, raw, flags=re.IGNORECASE)
|
||||||
if sub
|
if sub
|
||||||
else raw
|
else raw
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
|
|
||||||
if tag:
|
if tag_str:
|
||||||
tag, _ = Tag.objects.get_or_create(
|
tag, _ = Tag.objects.get_or_create(
|
||||||
name__iexact=tag,
|
name__iexact=tag_str,
|
||||||
defaults={"name": tag},
|
defaults={"name": tag_str},
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
@ -413,7 +417,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
document_paths = []
|
document_paths = []
|
||||||
fname = self.input_doc.original_file.stem
|
fname: str = self.input_doc.original_file.stem
|
||||||
with Pdf.open(self.pdf_file) as input_pdf:
|
with Pdf.open(self.pdf_file) as input_pdf:
|
||||||
# Start with an empty document
|
# Start with an empty document
|
||||||
current_document: list[Page] = []
|
current_document: list[Page] = []
|
||||||
@ -432,7 +436,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
logger.debug(f"Starting new document at idx {idx}")
|
logger.debug(f"Starting new document at idx {idx}")
|
||||||
current_document = []
|
current_document = []
|
||||||
documents.append(current_document)
|
documents.append(current_document)
|
||||||
keep_page = pages_to_split_on[idx]
|
keep_page: bool = pages_to_split_on[idx]
|
||||||
if keep_page:
|
if keep_page:
|
||||||
# Keep the page
|
# Keep the page
|
||||||
# (new document is started by asn barcode)
|
# (new document is started by asn barcode)
|
||||||
@ -451,7 +455,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
|
|
||||||
logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages")
|
logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages")
|
||||||
savepath = Path(self.temp_dir.name) / output_filename
|
savepath = Path(self.temp_dir.name) / output_filename
|
||||||
with open(savepath, "wb") as out:
|
with savepath.open("wb") as out:
|
||||||
dst.save(out)
|
dst.save(out)
|
||||||
|
|
||||||
copy_basic_file_stats(self.input_doc.original_file, savepath)
|
copy_basic_file_stats(self.input_doc.original_file, savepath)
|
||||||
|
@ -1,16 +1,17 @@
|
|||||||
import logging
|
import logging
|
||||||
import os
|
|
||||||
import pickle
|
import pickle
|
||||||
import re
|
import re
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import Iterator
|
from collections.abc import Iterator
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
|
||||||
|
from numpy import ndarray
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
@ -28,7 +29,7 @@ logger = logging.getLogger("paperless.classifier")
|
|||||||
|
|
||||||
class IncompatibleClassifierVersionError(Exception):
|
class IncompatibleClassifierVersionError(Exception):
|
||||||
def __init__(self, message: str, *args: object) -> None:
|
def __init__(self, message: str, *args: object) -> None:
|
||||||
self.message = message
|
self.message: str = message
|
||||||
super().__init__(*args)
|
super().__init__(*args)
|
||||||
|
|
||||||
|
|
||||||
@ -36,8 +37,8 @@ class ClassifierModelCorruptError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def load_classifier() -> Optional["DocumentClassifier"]:
|
def load_classifier(*, raise_exception: bool = False) -> Optional["DocumentClassifier"]:
|
||||||
if not os.path.isfile(settings.MODEL_FILE):
|
if not settings.MODEL_FILE.is_file():
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Document classification model does not exist (yet), not "
|
"Document classification model does not exist (yet), not "
|
||||||
"performing automatic matching.",
|
"performing automatic matching.",
|
||||||
@ -50,22 +51,30 @@ def load_classifier() -> Optional["DocumentClassifier"]:
|
|||||||
|
|
||||||
except IncompatibleClassifierVersionError as e:
|
except IncompatibleClassifierVersionError as e:
|
||||||
logger.info(f"Classifier version incompatible: {e.message}, will re-train")
|
logger.info(f"Classifier version incompatible: {e.message}, will re-train")
|
||||||
os.unlink(settings.MODEL_FILE)
|
Path(settings.MODEL_FILE).unlink()
|
||||||
classifier = None
|
classifier = None
|
||||||
except ClassifierModelCorruptError:
|
if raise_exception:
|
||||||
|
raise e
|
||||||
|
except ClassifierModelCorruptError as e:
|
||||||
# there's something wrong with the model file.
|
# there's something wrong with the model file.
|
||||||
logger.exception(
|
logger.exception(
|
||||||
"Unrecoverable error while loading document "
|
"Unrecoverable error while loading document "
|
||||||
"classification model, deleting model file.",
|
"classification model, deleting model file.",
|
||||||
)
|
)
|
||||||
os.unlink(settings.MODEL_FILE)
|
Path(settings.MODEL_FILE).unlink
|
||||||
classifier = None
|
classifier = None
|
||||||
except OSError:
|
if raise_exception:
|
||||||
|
raise e
|
||||||
|
except OSError as e:
|
||||||
logger.exception("IO error while loading document classification model")
|
logger.exception("IO error while loading document classification model")
|
||||||
classifier = None
|
classifier = None
|
||||||
except Exception: # pragma: no cover
|
if raise_exception:
|
||||||
|
raise e
|
||||||
|
except Exception as e: # pragma: no cover
|
||||||
logger.exception("Unknown error while loading document classification model")
|
logger.exception("Unknown error while loading document classification model")
|
||||||
classifier = None
|
classifier = None
|
||||||
|
if raise_exception:
|
||||||
|
raise e
|
||||||
|
|
||||||
return classifier
|
return classifier
|
||||||
|
|
||||||
@ -76,7 +85,7 @@ class DocumentClassifier:
|
|||||||
# v9 - Changed from hashing to time/ids for re-train check
|
# v9 - Changed from hashing to time/ids for re-train check
|
||||||
FORMAT_VERSION = 9
|
FORMAT_VERSION = 9
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
# last time a document changed and therefore training might be required
|
# last time a document changed and therefore training might be required
|
||||||
self.last_doc_change_time: datetime | None = None
|
self.last_doc_change_time: datetime | None = None
|
||||||
# Hash of primary keys of AUTO matching values last used in training
|
# Hash of primary keys of AUTO matching values last used in training
|
||||||
@ -95,7 +104,7 @@ class DocumentClassifier:
|
|||||||
def load(self) -> None:
|
def load(self) -> None:
|
||||||
# Catch warnings for processing
|
# Catch warnings for processing
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
with open(settings.MODEL_FILE, "rb") as f:
|
with Path(settings.MODEL_FILE).open("rb") as f:
|
||||||
schema_version = pickle.load(f)
|
schema_version = pickle.load(f)
|
||||||
|
|
||||||
if schema_version != self.FORMAT_VERSION:
|
if schema_version != self.FORMAT_VERSION:
|
||||||
@ -132,11 +141,11 @@ class DocumentClassifier:
|
|||||||
):
|
):
|
||||||
raise IncompatibleClassifierVersionError("sklearn version update")
|
raise IncompatibleClassifierVersionError("sklearn version update")
|
||||||
|
|
||||||
def save(self):
|
def save(self) -> None:
|
||||||
target_file: Path = settings.MODEL_FILE
|
target_file: Path = settings.MODEL_FILE
|
||||||
target_file_temp = target_file.with_suffix(".pickle.part")
|
target_file_temp: Path = target_file.with_suffix(".pickle.part")
|
||||||
|
|
||||||
with open(target_file_temp, "wb") as f:
|
with target_file_temp.open("wb") as f:
|
||||||
pickle.dump(self.FORMAT_VERSION, f)
|
pickle.dump(self.FORMAT_VERSION, f)
|
||||||
|
|
||||||
pickle.dump(self.last_doc_change_time, f)
|
pickle.dump(self.last_doc_change_time, f)
|
||||||
@ -153,7 +162,7 @@ class DocumentClassifier:
|
|||||||
|
|
||||||
target_file_temp.rename(target_file)
|
target_file_temp.rename(target_file)
|
||||||
|
|
||||||
def train(self):
|
def train(self) -> bool:
|
||||||
# Get non-inbox documents
|
# Get non-inbox documents
|
||||||
docs_queryset = (
|
docs_queryset = (
|
||||||
Document.objects.exclude(
|
Document.objects.exclude(
|
||||||
@ -190,7 +199,7 @@ class DocumentClassifier:
|
|||||||
hasher.update(y.to_bytes(4, "little", signed=True))
|
hasher.update(y.to_bytes(4, "little", signed=True))
|
||||||
labels_correspondent.append(y)
|
labels_correspondent.append(y)
|
||||||
|
|
||||||
tags = sorted(
|
tags: list[int] = sorted(
|
||||||
tag.pk
|
tag.pk
|
||||||
for tag in doc.tags.filter(
|
for tag in doc.tags.filter(
|
||||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||||
@ -236,9 +245,9 @@ class DocumentClassifier:
|
|||||||
# union with {-1} accounts for cases where all documents have
|
# union with {-1} accounts for cases where all documents have
|
||||||
# correspondents and types assigned, so -1 isn't part of labels_x, which
|
# correspondents and types assigned, so -1 isn't part of labels_x, which
|
||||||
# it usually is.
|
# it usually is.
|
||||||
num_correspondents = len(set(labels_correspondent) | {-1}) - 1
|
num_correspondents: int = len(set(labels_correspondent) | {-1}) - 1
|
||||||
num_document_types = len(set(labels_document_type) | {-1}) - 1
|
num_document_types: int = len(set(labels_document_type) | {-1}) - 1
|
||||||
num_storage_paths = len(set(labels_storage_path) | {-1}) - 1
|
num_storage_paths: int = len(set(labels_storage_path) | {-1}) - 1
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"{docs_queryset.count()} documents, {num_tags} tag(s), {num_correspondents} correspondent(s), "
|
f"{docs_queryset.count()} documents, {num_tags} tag(s), {num_correspondents} correspondent(s), "
|
||||||
@ -266,7 +275,9 @@ class DocumentClassifier:
|
|||||||
min_df=0.01,
|
min_df=0.01,
|
||||||
)
|
)
|
||||||
|
|
||||||
data_vectorized = self.data_vectorizer.fit_transform(content_generator())
|
data_vectorized: ndarray = self.data_vectorizer.fit_transform(
|
||||||
|
content_generator(),
|
||||||
|
)
|
||||||
|
|
||||||
# See the notes here:
|
# See the notes here:
|
||||||
# https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html
|
# https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html
|
||||||
@ -284,7 +295,7 @@ class DocumentClassifier:
|
|||||||
label[0] if len(label) == 1 else -1 for label in labels_tags
|
label[0] if len(label) == 1 else -1 for label in labels_tags
|
||||||
]
|
]
|
||||||
self.tags_binarizer = LabelBinarizer()
|
self.tags_binarizer = LabelBinarizer()
|
||||||
labels_tags_vectorized = self.tags_binarizer.fit_transform(
|
labels_tags_vectorized: ndarray = self.tags_binarizer.fit_transform(
|
||||||
labels_tags,
|
labels_tags,
|
||||||
).ravel()
|
).ravel()
|
||||||
else:
|
else:
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
import os
|
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from datetime import timezone
|
from datetime import timezone
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.db.models import QuerySet
|
from django.db.models import QuerySet
|
||||||
@ -47,7 +47,7 @@ from documents.models import User
|
|||||||
logger = logging.getLogger("paperless.index")
|
logger = logging.getLogger("paperless.index")
|
||||||
|
|
||||||
|
|
||||||
def get_schema():
|
def get_schema() -> Schema:
|
||||||
return Schema(
|
return Schema(
|
||||||
id=NUMERIC(stored=True, unique=True),
|
id=NUMERIC(stored=True, unique=True),
|
||||||
title=TEXT(sortable=True),
|
title=TEXT(sortable=True),
|
||||||
@ -93,7 +93,7 @@ def open_index(recreate=False) -> FileIndex:
|
|||||||
logger.exception("Error while opening the index, recreating.")
|
logger.exception("Error while opening the index, recreating.")
|
||||||
|
|
||||||
# create_in doesn't handle corrupted indexes very well, remove the directory entirely first
|
# create_in doesn't handle corrupted indexes very well, remove the directory entirely first
|
||||||
if os.path.isdir(settings.INDEX_DIR):
|
if settings.INDEX_DIR.is_dir():
|
||||||
rmtree(settings.INDEX_DIR)
|
rmtree(settings.INDEX_DIR)
|
||||||
settings.INDEX_DIR.mkdir(parents=True, exist_ok=True)
|
settings.INDEX_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
@ -123,7 +123,7 @@ def open_index_searcher() -> Searcher:
|
|||||||
searcher.close()
|
searcher.close()
|
||||||
|
|
||||||
|
|
||||||
def update_document(writer: AsyncWriter, doc: Document):
|
def update_document(writer: AsyncWriter, doc: Document) -> None:
|
||||||
tags = ",".join([t.name for t in doc.tags.all()])
|
tags = ",".join([t.name for t in doc.tags.all()])
|
||||||
tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
|
tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
|
||||||
notes = ",".join([str(c.note) for c in Note.objects.filter(document=doc)])
|
notes = ",".join([str(c.note) for c in Note.objects.filter(document=doc)])
|
||||||
@ -133,7 +133,7 @@ def update_document(writer: AsyncWriter, doc: Document):
|
|||||||
custom_fields_ids = ",".join(
|
custom_fields_ids = ",".join(
|
||||||
[str(f.field.id) for f in CustomFieldInstance.objects.filter(document=doc)],
|
[str(f.field.id) for f in CustomFieldInstance.objects.filter(document=doc)],
|
||||||
)
|
)
|
||||||
asn = doc.archive_serial_number
|
asn: int | None = doc.archive_serial_number
|
||||||
if asn is not None and (
|
if asn is not None and (
|
||||||
asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
|
asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
|
||||||
or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
|
or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
|
||||||
@ -149,7 +149,7 @@ def update_document(writer: AsyncWriter, doc: Document):
|
|||||||
doc,
|
doc,
|
||||||
only_with_perms_in=["view_document"],
|
only_with_perms_in=["view_document"],
|
||||||
)
|
)
|
||||||
viewer_ids = ",".join([str(u.id) for u in users_with_perms])
|
viewer_ids: str = ",".join([str(u.id) for u in users_with_perms])
|
||||||
writer.update_document(
|
writer.update_document(
|
||||||
id=doc.pk,
|
id=doc.pk,
|
||||||
title=doc.title,
|
title=doc.title,
|
||||||
@ -187,20 +187,20 @@ def update_document(writer: AsyncWriter, doc: Document):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def remove_document(writer: AsyncWriter, doc: Document):
|
def remove_document(writer: AsyncWriter, doc: Document) -> None:
|
||||||
remove_document_by_id(writer, doc.pk)
|
remove_document_by_id(writer, doc.pk)
|
||||||
|
|
||||||
|
|
||||||
def remove_document_by_id(writer: AsyncWriter, doc_id):
|
def remove_document_by_id(writer: AsyncWriter, doc_id) -> None:
|
||||||
writer.delete_by_term("id", doc_id)
|
writer.delete_by_term("id", doc_id)
|
||||||
|
|
||||||
|
|
||||||
def add_or_update_document(document: Document):
|
def add_or_update_document(document: Document) -> None:
|
||||||
with open_index_writer() as writer:
|
with open_index_writer() as writer:
|
||||||
update_document(writer, document)
|
update_document(writer, document)
|
||||||
|
|
||||||
|
|
||||||
def remove_document_from_index(document: Document):
|
def remove_document_from_index(document: Document) -> None:
|
||||||
with open_index_writer() as writer:
|
with open_index_writer() as writer:
|
||||||
remove_document(writer, document)
|
remove_document(writer, document)
|
||||||
|
|
||||||
@ -218,11 +218,11 @@ class MappedDocIdSet(DocIdSet):
|
|||||||
self.document_ids = BitSet(document_ids, size=max_id)
|
self.document_ids = BitSet(document_ids, size=max_id)
|
||||||
self.ixreader = ixreader
|
self.ixreader = ixreader
|
||||||
|
|
||||||
def __contains__(self, docnum):
|
def __contains__(self, docnum) -> bool:
|
||||||
document_id = self.ixreader.stored_fields(docnum)["id"]
|
document_id = self.ixreader.stored_fields(docnum)["id"]
|
||||||
return document_id in self.document_ids
|
return document_id in self.document_ids
|
||||||
|
|
||||||
def __bool__(self):
|
def __bool__(self) -> Literal[True]:
|
||||||
# searcher.search ignores a filter if it's "falsy".
|
# searcher.search ignores a filter if it's "falsy".
|
||||||
# We use this hack so this DocIdSet, when used as a filter, is never ignored.
|
# We use this hack so this DocIdSet, when used as a filter, is never ignored.
|
||||||
return True
|
return True
|
||||||
@ -232,13 +232,13 @@ class DelayedQuery:
|
|||||||
def _get_query(self):
|
def _get_query(self):
|
||||||
raise NotImplementedError # pragma: no cover
|
raise NotImplementedError # pragma: no cover
|
||||||
|
|
||||||
def _get_query_sortedby(self):
|
def _get_query_sortedby(self) -> tuple[None, Literal[False]] | tuple[str, bool]:
|
||||||
if "ordering" not in self.query_params:
|
if "ordering" not in self.query_params:
|
||||||
return None, False
|
return None, False
|
||||||
|
|
||||||
field: str = self.query_params["ordering"]
|
field: str = self.query_params["ordering"]
|
||||||
|
|
||||||
sort_fields_map = {
|
sort_fields_map: dict[str, str] = {
|
||||||
"created": "created",
|
"created": "created",
|
||||||
"modified": "modified",
|
"modified": "modified",
|
||||||
"added": "added",
|
"added": "added",
|
||||||
@ -268,7 +268,7 @@ class DelayedQuery:
|
|||||||
query_params,
|
query_params,
|
||||||
page_size,
|
page_size,
|
||||||
filter_queryset: QuerySet,
|
filter_queryset: QuerySet,
|
||||||
):
|
) -> None:
|
||||||
self.searcher = searcher
|
self.searcher = searcher
|
||||||
self.query_params = query_params
|
self.query_params = query_params
|
||||||
self.page_size = page_size
|
self.page_size = page_size
|
||||||
@ -276,7 +276,7 @@ class DelayedQuery:
|
|||||||
self.first_score = None
|
self.first_score = None
|
||||||
self.filter_queryset = filter_queryset
|
self.filter_queryset = filter_queryset
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self) -> int:
|
||||||
page = self[0:1]
|
page = self[0:1]
|
||||||
return len(page)
|
return len(page)
|
||||||
|
|
||||||
@ -334,7 +334,7 @@ class LocalDateParser(English):
|
|||||||
|
|
||||||
|
|
||||||
class DelayedFullTextQuery(DelayedQuery):
|
class DelayedFullTextQuery(DelayedQuery):
|
||||||
def _get_query(self):
|
def _get_query(self) -> tuple:
|
||||||
q_str = self.query_params["query"]
|
q_str = self.query_params["query"]
|
||||||
qp = MultifieldParser(
|
qp = MultifieldParser(
|
||||||
[
|
[
|
||||||
@ -364,7 +364,7 @@ class DelayedFullTextQuery(DelayedQuery):
|
|||||||
|
|
||||||
|
|
||||||
class DelayedMoreLikeThisQuery(DelayedQuery):
|
class DelayedMoreLikeThisQuery(DelayedQuery):
|
||||||
def _get_query(self):
|
def _get_query(self) -> tuple:
|
||||||
more_like_doc_id = int(self.query_params["more_like_id"])
|
more_like_doc_id = int(self.query_params["more_like_id"])
|
||||||
content = Document.objects.get(id=more_like_doc_id).content
|
content = Document.objects.get(id=more_like_doc_id).content
|
||||||
|
|
||||||
@ -379,7 +379,7 @@ class DelayedMoreLikeThisQuery(DelayedQuery):
|
|||||||
q = query.Or(
|
q = query.Or(
|
||||||
[query.Term("content", word, boost=weight) for word, weight in kts],
|
[query.Term("content", word, boost=weight) for word, weight in kts],
|
||||||
)
|
)
|
||||||
mask = {docnum}
|
mask: set = {docnum}
|
||||||
|
|
||||||
return q, mask
|
return q, mask
|
||||||
|
|
||||||
@ -389,7 +389,7 @@ def autocomplete(
|
|||||||
term: str,
|
term: str,
|
||||||
limit: int = 10,
|
limit: int = 10,
|
||||||
user: User | None = None,
|
user: User | None = None,
|
||||||
):
|
) -> list:
|
||||||
"""
|
"""
|
||||||
Mimics whoosh.reading.IndexReader.most_distinctive_terms with permissions
|
Mimics whoosh.reading.IndexReader.most_distinctive_terms with permissions
|
||||||
and without scoring
|
and without scoring
|
||||||
@ -402,7 +402,7 @@ def autocomplete(
|
|||||||
# content field query instead and return bogus, not text data
|
# content field query instead and return bogus, not text data
|
||||||
qp.remove_plugin_class(FieldsPlugin)
|
qp.remove_plugin_class(FieldsPlugin)
|
||||||
q = qp.parse(f"{term.lower()}*")
|
q = qp.parse(f"{term.lower()}*")
|
||||||
user_criterias = get_permissions_criterias(user)
|
user_criterias: list = get_permissions_criterias(user)
|
||||||
|
|
||||||
results = s.search(
|
results = s.search(
|
||||||
q,
|
q,
|
||||||
@ -417,14 +417,14 @@ def autocomplete(
|
|||||||
termCounts[match] += 1
|
termCounts[match] += 1
|
||||||
terms = [t for t, _ in termCounts.most_common(limit)]
|
terms = [t for t, _ in termCounts.most_common(limit)]
|
||||||
|
|
||||||
term_encoded = term.encode("UTF-8")
|
term_encoded: bytes = term.encode("UTF-8")
|
||||||
if term_encoded in terms:
|
if term_encoded in terms:
|
||||||
terms.insert(0, terms.pop(terms.index(term_encoded)))
|
terms.insert(0, terms.pop(terms.index(term_encoded)))
|
||||||
|
|
||||||
return terms
|
return terms
|
||||||
|
|
||||||
|
|
||||||
def get_permissions_criterias(user: User | None = None):
|
def get_permissions_criterias(user: User | None = None) -> list:
|
||||||
user_criterias = [query.Term("has_owner", False)]
|
user_criterias = [query.Term("has_owner", False)]
|
||||||
if user is not None:
|
if user is not None:
|
||||||
if user.is_superuser: # superusers see all docs
|
if user.is_superuser: # superusers see all docs
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import os
|
from pathlib import Path
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
@ -14,7 +14,7 @@ class Command(BaseCommand):
|
|||||||
"state to an unencrypted one (or vice-versa)"
|
"state to an unencrypted one (or vice-versa)"
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser) -> None:
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--passphrase",
|
"--passphrase",
|
||||||
help=(
|
help=(
|
||||||
@ -23,7 +23,7 @@ class Command(BaseCommand):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options) -> None:
|
||||||
try:
|
try:
|
||||||
self.stdout.write(
|
self.stdout.write(
|
||||||
self.style.WARNING(
|
self.style.WARNING(
|
||||||
@ -52,7 +52,7 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
self.__gpg_to_unencrypted(passphrase)
|
self.__gpg_to_unencrypted(passphrase)
|
||||||
|
|
||||||
def __gpg_to_unencrypted(self, passphrase: str):
|
def __gpg_to_unencrypted(self, passphrase: str) -> None:
|
||||||
encrypted_files = Document.objects.filter(
|
encrypted_files = Document.objects.filter(
|
||||||
storage_type=Document.STORAGE_TYPE_GPG,
|
storage_type=Document.STORAGE_TYPE_GPG,
|
||||||
)
|
)
|
||||||
@ -69,7 +69,7 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||||
|
|
||||||
ext = os.path.splitext(document.filename)[1]
|
ext: str = Path(document.filename).suffix
|
||||||
|
|
||||||
if not ext == ".gpg":
|
if not ext == ".gpg":
|
||||||
raise CommandError(
|
raise CommandError(
|
||||||
@ -77,12 +77,12 @@ class Command(BaseCommand):
|
|||||||
f"end with .gpg",
|
f"end with .gpg",
|
||||||
)
|
)
|
||||||
|
|
||||||
document.filename = os.path.splitext(document.filename)[0]
|
document.filename = Path(document.filename).stem
|
||||||
|
|
||||||
with open(document.source_path, "wb") as f:
|
with document.source_path.open("wb") as f:
|
||||||
f.write(raw_document)
|
f.write(raw_document)
|
||||||
|
|
||||||
with open(document.thumbnail_path, "wb") as f:
|
with document.thumbnail_path.open("wb") as f:
|
||||||
f.write(raw_thumb)
|
f.write(raw_thumb)
|
||||||
|
|
||||||
Document.objects.filter(id=document.id).update(
|
Document.objects.filter(id=document.id).update(
|
||||||
@ -91,4 +91,4 @@ class Command(BaseCommand):
|
|||||||
)
|
)
|
||||||
|
|
||||||
for path in old_paths:
|
for path in old_paths:
|
||||||
os.unlink(path)
|
path.unlink()
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
from collections.abc import Generator
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@ -44,7 +45,7 @@ if settings.AUDIT_LOG_ENABLED:
|
|||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def disable_signal(sig, receiver, sender):
|
def disable_signal(sig, receiver, sender) -> Generator:
|
||||||
try:
|
try:
|
||||||
sig.disconnect(receiver=receiver, sender=sender)
|
sig.disconnect(receiver=receiver, sender=sender)
|
||||||
yield
|
yield
|
||||||
@ -58,7 +59,7 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
"documents it refers to."
|
"documents it refers to."
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser) -> None:
|
||||||
parser.add_argument("source")
|
parser.add_argument("source")
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -90,7 +91,7 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
- Are there existing users or documents in the database?
|
- Are there existing users or documents in the database?
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def pre_check_maybe_not_empty():
|
def pre_check_maybe_not_empty() -> None:
|
||||||
# Skip this check if operating only on the database
|
# Skip this check if operating only on the database
|
||||||
# We can expect data to exist in that case
|
# We can expect data to exist in that case
|
||||||
if not self.data_only:
|
if not self.data_only:
|
||||||
@ -122,7 +123,7 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
def pre_check_manifest_exists():
|
def pre_check_manifest_exists() -> None:
|
||||||
if not (self.source / "manifest.json").exists():
|
if not (self.source / "manifest.json").exists():
|
||||||
raise CommandError(
|
raise CommandError(
|
||||||
"That directory doesn't appear to contain a manifest.json file.",
|
"That directory doesn't appear to contain a manifest.json file.",
|
||||||
@ -141,7 +142,7 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
"""
|
"""
|
||||||
Loads manifest data from the various JSON files for parsing and loading the database
|
Loads manifest data from the various JSON files for parsing and loading the database
|
||||||
"""
|
"""
|
||||||
main_manifest_path = self.source / "manifest.json"
|
main_manifest_path: Path = self.source / "manifest.json"
|
||||||
|
|
||||||
with main_manifest_path.open() as infile:
|
with main_manifest_path.open() as infile:
|
||||||
self.manifest = json.load(infile)
|
self.manifest = json.load(infile)
|
||||||
@ -158,8 +159,8 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
|
|
||||||
Must account for the old style of export as well, with just version.json
|
Must account for the old style of export as well, with just version.json
|
||||||
"""
|
"""
|
||||||
version_path = self.source / "version.json"
|
version_path: Path = self.source / "version.json"
|
||||||
metadata_path = self.source / "metadata.json"
|
metadata_path: Path = self.source / "metadata.json"
|
||||||
if not version_path.exists() and not metadata_path.exists():
|
if not version_path.exists() and not metadata_path.exists():
|
||||||
self.stdout.write(
|
self.stdout.write(
|
||||||
self.style.NOTICE("No version.json or metadata.json file located"),
|
self.style.NOTICE("No version.json or metadata.json file located"),
|
||||||
@ -221,7 +222,7 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
)
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options) -> None:
|
||||||
logging.getLogger().handlers[0].level = logging.ERROR
|
logging.getLogger().handlers[0].level = logging.ERROR
|
||||||
|
|
||||||
self.source = Path(options["source"]).resolve()
|
self.source = Path(options["source"]).resolve()
|
||||||
@ -290,13 +291,13 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
no_progress_bar=self.no_progress_bar,
|
no_progress_bar=self.no_progress_bar,
|
||||||
)
|
)
|
||||||
|
|
||||||
def check_manifest_validity(self):
|
def check_manifest_validity(self) -> None:
|
||||||
"""
|
"""
|
||||||
Attempts to verify the manifest is valid. Namely checking the files
|
Attempts to verify the manifest is valid. Namely checking the files
|
||||||
referred to exist and the files can be read from
|
referred to exist and the files can be read from
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def check_document_validity(document_record: dict):
|
def check_document_validity(document_record: dict) -> None:
|
||||||
if EXPORTER_FILE_NAME not in document_record:
|
if EXPORTER_FILE_NAME not in document_record:
|
||||||
raise CommandError(
|
raise CommandError(
|
||||||
"The manifest file contains a record which does not "
|
"The manifest file contains a record which does not "
|
||||||
@ -341,7 +342,7 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
if not self.data_only and record["model"] == "documents.document":
|
if not self.data_only and record["model"] == "documents.document":
|
||||||
check_document_validity(record)
|
check_document_validity(record)
|
||||||
|
|
||||||
def _import_files_from_manifest(self):
|
def _import_files_from_manifest(self) -> None:
|
||||||
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
|
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True)
|
settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
|
settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
@ -356,24 +357,24 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
document = Document.objects.get(pk=record["pk"])
|
document = Document.objects.get(pk=record["pk"])
|
||||||
|
|
||||||
doc_file = record[EXPORTER_FILE_NAME]
|
doc_file = record[EXPORTER_FILE_NAME]
|
||||||
document_path = os.path.join(self.source, doc_file)
|
document_path = self.source / doc_file
|
||||||
|
|
||||||
if EXPORTER_THUMBNAIL_NAME in record:
|
if EXPORTER_THUMBNAIL_NAME in record:
|
||||||
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
|
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
|
||||||
thumbnail_path = Path(os.path.join(self.source, thumb_file)).resolve()
|
thumbnail_path = (self.source / thumb_file).resolve()
|
||||||
else:
|
else:
|
||||||
thumbnail_path = None
|
thumbnail_path = None
|
||||||
|
|
||||||
if EXPORTER_ARCHIVE_NAME in record:
|
if EXPORTER_ARCHIVE_NAME in record:
|
||||||
archive_file = record[EXPORTER_ARCHIVE_NAME]
|
archive_file = record[EXPORTER_ARCHIVE_NAME]
|
||||||
archive_path = os.path.join(self.source, archive_file)
|
archive_path = self.source / archive_file
|
||||||
else:
|
else:
|
||||||
archive_path = None
|
archive_path = None
|
||||||
|
|
||||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||||
|
|
||||||
with FileLock(settings.MEDIA_LOCK):
|
with FileLock(settings.MEDIA_LOCK):
|
||||||
if os.path.isfile(document.source_path):
|
if Path(document.source_path).is_file():
|
||||||
raise FileExistsError(document.source_path)
|
raise FileExistsError(document.source_path)
|
||||||
|
|
||||||
create_source_path_directory(document.source_path)
|
create_source_path_directory(document.source_path)
|
||||||
@ -418,8 +419,8 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
had_at_least_one_record = False
|
had_at_least_one_record = False
|
||||||
|
|
||||||
for crypt_config in self.CRYPT_FIELDS:
|
for crypt_config in self.CRYPT_FIELDS:
|
||||||
importer_model = crypt_config["model_name"]
|
importer_model: str = crypt_config["model_name"]
|
||||||
crypt_fields = crypt_config["fields"]
|
crypt_fields: str = crypt_config["fields"]
|
||||||
for record in filter(
|
for record in filter(
|
||||||
lambda x: x["model"] == importer_model,
|
lambda x: x["model"] == importer_model,
|
||||||
self.manifest,
|
self.manifest,
|
||||||
|
@ -15,7 +15,7 @@ from documents.parsers import run_convert
|
|||||||
logger = logging.getLogger("paperless.migrations")
|
logger = logging.getLogger("paperless.migrations")
|
||||||
|
|
||||||
|
|
||||||
def _do_convert(work_package):
|
def _do_convert(work_package) -> None:
|
||||||
(
|
(
|
||||||
existing_encrypted_thumbnail,
|
existing_encrypted_thumbnail,
|
||||||
converted_encrypted_thumbnail,
|
converted_encrypted_thumbnail,
|
||||||
@ -30,13 +30,13 @@ def _do_convert(work_package):
|
|||||||
# Decrypt png
|
# Decrypt png
|
||||||
decrypted_thumbnail = existing_encrypted_thumbnail.with_suffix("").resolve()
|
decrypted_thumbnail = existing_encrypted_thumbnail.with_suffix("").resolve()
|
||||||
|
|
||||||
with open(existing_encrypted_thumbnail, "rb") as existing_encrypted_file:
|
with existing_encrypted_thumbnail.open("rb") as existing_encrypted_file:
|
||||||
raw_thumb = gpg.decrypt_file(
|
raw_thumb = gpg.decrypt_file(
|
||||||
existing_encrypted_file,
|
existing_encrypted_file,
|
||||||
passphrase=passphrase,
|
passphrase=passphrase,
|
||||||
always_trust=True,
|
always_trust=True,
|
||||||
).data
|
).data
|
||||||
with open(decrypted_thumbnail, "wb") as decrypted_file:
|
with Path(decrypted_thumbnail).open("wb") as decrypted_file:
|
||||||
decrypted_file.write(raw_thumb)
|
decrypted_file.write(raw_thumb)
|
||||||
|
|
||||||
converted_decrypted_thumbnail = Path(
|
converted_decrypted_thumbnail = Path(
|
||||||
@ -62,7 +62,7 @@ def _do_convert(work_package):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Encrypt webp
|
# Encrypt webp
|
||||||
with open(converted_decrypted_thumbnail, "rb") as converted_decrypted_file:
|
with Path(converted_decrypted_thumbnail).open("rb") as converted_decrypted_file:
|
||||||
encrypted = gpg.encrypt_file(
|
encrypted = gpg.encrypt_file(
|
||||||
fileobj_or_path=converted_decrypted_file,
|
fileobj_or_path=converted_decrypted_file,
|
||||||
recipients=None,
|
recipients=None,
|
||||||
@ -71,7 +71,9 @@ def _do_convert(work_package):
|
|||||||
always_trust=True,
|
always_trust=True,
|
||||||
).data
|
).data
|
||||||
|
|
||||||
with open(converted_encrypted_thumbnail, "wb") as converted_encrypted_file:
|
with Path(converted_encrypted_thumbnail).open(
|
||||||
|
"wb",
|
||||||
|
) as converted_encrypted_file:
|
||||||
converted_encrypted_file.write(encrypted)
|
converted_encrypted_file.write(encrypted)
|
||||||
|
|
||||||
# Copy newly created thumbnail to thumbnail directory
|
# Copy newly created thumbnail to thumbnail directory
|
||||||
@ -95,8 +97,8 @@ def _do_convert(work_package):
|
|||||||
logger.error(f"Error converting thumbnail (existing file unchanged): {e}")
|
logger.error(f"Error converting thumbnail (existing file unchanged): {e}")
|
||||||
|
|
||||||
|
|
||||||
def _convert_encrypted_thumbnails_to_webp(apps, schema_editor):
|
def _convert_encrypted_thumbnails_to_webp(apps, schema_editor) -> None:
|
||||||
start = time.time()
|
start: float = time.time()
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tempdir:
|
with tempfile.TemporaryDirectory() as tempdir:
|
||||||
work_packages = []
|
work_packages = []
|
||||||
@ -111,15 +113,15 @@ def _convert_encrypted_thumbnails_to_webp(apps, schema_editor):
|
|||||||
)
|
)
|
||||||
|
|
||||||
for file in Path(settings.THUMBNAIL_DIR).glob("*.png.gpg"):
|
for file in Path(settings.THUMBNAIL_DIR).glob("*.png.gpg"):
|
||||||
existing_thumbnail = file.resolve()
|
existing_thumbnail: Path = file.resolve()
|
||||||
|
|
||||||
# Change the existing filename suffix from png to webp
|
# Change the existing filename suffix from png to webp
|
||||||
converted_thumbnail_name = Path(
|
converted_thumbnail_name: str = Path(
|
||||||
str(existing_thumbnail).replace(".png.gpg", ".webp.gpg"),
|
str(existing_thumbnail).replace(".png.gpg", ".webp.gpg"),
|
||||||
).name
|
).name
|
||||||
|
|
||||||
# Create the expected output filename in the tempdir
|
# Create the expected output filename in the tempdir
|
||||||
converted_thumbnail = (
|
converted_thumbnail: Path = (
|
||||||
Path(tempdir) / Path(converted_thumbnail_name)
|
Path(tempdir) / Path(converted_thumbnail_name)
|
||||||
).resolve()
|
).resolve()
|
||||||
|
|
||||||
@ -143,8 +145,8 @@ def _convert_encrypted_thumbnails_to_webp(apps, schema_editor):
|
|||||||
) as pool:
|
) as pool:
|
||||||
pool.map(_do_convert, work_packages)
|
pool.map(_do_convert, work_packages)
|
||||||
|
|
||||||
end = time.time()
|
end: float = time.time()
|
||||||
duration = end - start
|
duration: float = end - start
|
||||||
|
|
||||||
logger.info(f"Conversion completed in {duration:.3f}s")
|
logger.info(f"Conversion completed in {duration:.3f}s")
|
||||||
|
|
||||||
|
@ -173,7 +173,7 @@ class TestSystemStatus(APITestCase):
|
|||||||
self.assertEqual(response.data["tasks"]["index_status"], "OK")
|
self.assertEqual(response.data["tasks"]["index_status"], "OK")
|
||||||
self.assertIsNotNone(response.data["tasks"]["index_last_modified"])
|
self.assertIsNotNone(response.data["tasks"]["index_last_modified"])
|
||||||
|
|
||||||
@override_settings(INDEX_DIR="/tmp/index/")
|
@override_settings(INDEX_DIR=Path("/tmp/index/"))
|
||||||
@mock.patch("documents.index.open_index", autospec=True)
|
@mock.patch("documents.index.open_index", autospec=True)
|
||||||
def test_system_status_index_error(self, mock_open_index):
|
def test_system_status_index_error(self, mock_open_index):
|
||||||
"""
|
"""
|
||||||
@ -193,7 +193,7 @@ class TestSystemStatus(APITestCase):
|
|||||||
self.assertEqual(response.data["tasks"]["index_status"], "ERROR")
|
self.assertEqual(response.data["tasks"]["index_status"], "ERROR")
|
||||||
self.assertIsNotNone(response.data["tasks"]["index_error"])
|
self.assertIsNotNone(response.data["tasks"]["index_error"])
|
||||||
|
|
||||||
@override_settings(DATA_DIR="/tmp/does_not_exist/data/")
|
@override_settings(DATA_DIR=Path("/tmp/does_not_exist/data/"))
|
||||||
def test_system_status_classifier_ok(self):
|
def test_system_status_classifier_ok(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@ -222,7 +222,7 @@ class TestSystemStatus(APITestCase):
|
|||||||
THEN:
|
THEN:
|
||||||
- The response contains an WARNING classifier status
|
- The response contains an WARNING classifier status
|
||||||
"""
|
"""
|
||||||
with override_settings(MODEL_FILE="does_not_exist"):
|
with override_settings(MODEL_FILE=Path("does_not_exist")):
|
||||||
Document.objects.create(
|
Document.objects.create(
|
||||||
title="Test Document",
|
title="Test Document",
|
||||||
)
|
)
|
||||||
@ -233,7 +233,11 @@ class TestSystemStatus(APITestCase):
|
|||||||
self.assertEqual(response.data["tasks"]["classifier_status"], "WARNING")
|
self.assertEqual(response.data["tasks"]["classifier_status"], "WARNING")
|
||||||
self.assertIsNotNone(response.data["tasks"]["classifier_error"])
|
self.assertIsNotNone(response.data["tasks"]["classifier_error"])
|
||||||
|
|
||||||
def test_system_status_classifier_error(self):
|
@mock.patch(
|
||||||
|
"documents.classifier.load_classifier",
|
||||||
|
side_effect=ClassifierModelCorruptError(),
|
||||||
|
)
|
||||||
|
def test_system_status_classifier_error(self, mock_load_classifier):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- The classifier does exist but is corrupt
|
- The classifier does exist but is corrupt
|
||||||
@ -248,25 +252,23 @@ class TestSystemStatus(APITestCase):
|
|||||||
dir="/tmp",
|
dir="/tmp",
|
||||||
delete=False,
|
delete=False,
|
||||||
) as does_exist,
|
) as does_exist,
|
||||||
override_settings(MODEL_FILE=does_exist),
|
override_settings(MODEL_FILE=Path(does_exist.name)),
|
||||||
):
|
):
|
||||||
with mock.patch("documents.classifier.load_classifier") as mock_load:
|
Document.objects.create(
|
||||||
mock_load.side_effect = ClassifierModelCorruptError()
|
title="Test Document",
|
||||||
Document.objects.create(
|
)
|
||||||
title="Test Document",
|
Tag.objects.create(
|
||||||
)
|
name="Test Tag",
|
||||||
Tag.objects.create(
|
matching_algorithm=Tag.MATCH_AUTO,
|
||||||
name="Test Tag",
|
)
|
||||||
matching_algorithm=Tag.MATCH_AUTO,
|
self.client.force_login(self.user)
|
||||||
)
|
response = self.client.get(self.ENDPOINT)
|
||||||
self.client.force_login(self.user)
|
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||||
response = self.client.get(self.ENDPOINT)
|
self.assertEqual(
|
||||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
response.data["tasks"]["classifier_status"],
|
||||||
self.assertEqual(
|
"ERROR",
|
||||||
response.data["tasks"]["classifier_status"],
|
)
|
||||||
"ERROR",
|
self.assertIsNotNone(response.data["tasks"]["classifier_error"])
|
||||||
)
|
|
||||||
self.assertIsNotNone(response.data["tasks"]["classifier_error"])
|
|
||||||
|
|
||||||
def test_system_status_classifier_ok_no_objects(self):
|
def test_system_status_classifier_ok_no_objects(self):
|
||||||
"""
|
"""
|
||||||
@ -278,7 +280,7 @@ class TestSystemStatus(APITestCase):
|
|||||||
THEN:
|
THEN:
|
||||||
- The response contains an OK classifier status
|
- The response contains an OK classifier status
|
||||||
"""
|
"""
|
||||||
with override_settings(MODEL_FILE="does_not_exist"):
|
with override_settings(MODEL_FILE=Path("does_not_exist")):
|
||||||
self.client.force_login(self.user)
|
self.client.force_login(self.user)
|
||||||
response = self.client.get(self.ENDPOINT)
|
response = self.client.get(self.ENDPOINT)
|
||||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||||
|
@ -650,7 +650,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
Path(settings.MODEL_FILE).touch()
|
Path(settings.MODEL_FILE).touch()
|
||||||
self.assertTrue(os.path.exists(settings.MODEL_FILE))
|
self.assertTrue(os.path.exists(settings.MODEL_FILE))
|
||||||
|
|
||||||
load.side_effect = IncompatibleClassifierVersionError("Dummey Error")
|
load.side_effect = IncompatibleClassifierVersionError("Dummy Error")
|
||||||
self.assertIsNone(load_classifier())
|
self.assertIsNone(load_classifier())
|
||||||
self.assertFalse(os.path.exists(settings.MODEL_FILE))
|
self.assertFalse(os.path.exists(settings.MODEL_FILE))
|
||||||
|
|
||||||
@ -673,3 +673,25 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
):
|
):
|
||||||
classifier = load_classifier()
|
classifier = load_classifier()
|
||||||
self.assertIsNone(classifier)
|
self.assertIsNone(classifier)
|
||||||
|
|
||||||
|
@mock.patch("documents.classifier.DocumentClassifier.load")
|
||||||
|
def test_load_classifier_raise_exception(self, mock_load):
|
||||||
|
Path(settings.MODEL_FILE).touch()
|
||||||
|
mock_load.side_effect = IncompatibleClassifierVersionError("Dummy Error")
|
||||||
|
with self.assertRaises(IncompatibleClassifierVersionError):
|
||||||
|
load_classifier(raise_exception=True)
|
||||||
|
|
||||||
|
Path(settings.MODEL_FILE).touch()
|
||||||
|
mock_load.side_effect = ClassifierModelCorruptError()
|
||||||
|
with self.assertRaises(ClassifierModelCorruptError):
|
||||||
|
load_classifier(raise_exception=True)
|
||||||
|
|
||||||
|
Path(settings.MODEL_FILE).touch()
|
||||||
|
mock_load.side_effect = OSError()
|
||||||
|
with self.assertRaises(OSError):
|
||||||
|
load_classifier(raise_exception=True)
|
||||||
|
|
||||||
|
Path(settings.MODEL_FILE).touch()
|
||||||
|
mock_load.side_effect = Exception()
|
||||||
|
with self.assertRaises(Exception):
|
||||||
|
load_classifier(raise_exception=True)
|
||||||
|
@ -108,18 +108,18 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
|
|
||||||
class TestDecryptDocuments(FileSystemAssertsMixin, TestCase):
|
class TestDecryptDocuments(FileSystemAssertsMixin, TestCase):
|
||||||
@override_settings(
|
@override_settings(
|
||||||
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
|
ORIGINALS_DIR=(Path(__file__).parent / "samples" / "originals"),
|
||||||
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
|
THUMBNAIL_DIR=(Path(__file__).parent / "samples" / "thumb"),
|
||||||
PASSPHRASE="test",
|
PASSPHRASE="test",
|
||||||
FILENAME_FORMAT=None,
|
FILENAME_FORMAT=None,
|
||||||
)
|
)
|
||||||
@mock.patch("documents.management.commands.decrypt_documents.input")
|
@mock.patch("documents.management.commands.decrypt_documents.input")
|
||||||
def test_decrypt(self, m):
|
def test_decrypt(self, m):
|
||||||
media_dir = tempfile.mkdtemp()
|
media_dir = tempfile.mkdtemp()
|
||||||
originals_dir = os.path.join(media_dir, "documents", "originals")
|
originals_dir = Path(media_dir) / "documents" / "originals"
|
||||||
thumb_dir = os.path.join(media_dir, "documents", "thumbnails")
|
thumb_dir = Path(media_dir) / "documents" / "thumbnails"
|
||||||
os.makedirs(originals_dir, exist_ok=True)
|
originals_dir.mkdir(parents=True, exist_ok=True)
|
||||||
os.makedirs(thumb_dir, exist_ok=True)
|
thumb_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
override_settings(
|
override_settings(
|
||||||
ORIGINALS_DIR=originals_dir,
|
ORIGINALS_DIR=originals_dir,
|
||||||
@ -143,7 +143,7 @@ class TestDecryptDocuments(FileSystemAssertsMixin, TestCase):
|
|||||||
"originals",
|
"originals",
|
||||||
"0000004.pdf.gpg",
|
"0000004.pdf.gpg",
|
||||||
),
|
),
|
||||||
os.path.join(originals_dir, "0000004.pdf.gpg"),
|
originals_dir / "0000004.pdf.gpg",
|
||||||
)
|
)
|
||||||
shutil.copy(
|
shutil.copy(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
@ -153,7 +153,7 @@ class TestDecryptDocuments(FileSystemAssertsMixin, TestCase):
|
|||||||
"thumbnails",
|
"thumbnails",
|
||||||
"0000004.webp.gpg",
|
"0000004.webp.gpg",
|
||||||
),
|
),
|
||||||
os.path.join(thumb_dir, f"{doc.id:07}.webp.gpg"),
|
thumb_dir / f"{doc.id:07}.webp.gpg",
|
||||||
)
|
)
|
||||||
|
|
||||||
call_command("decrypt_documents")
|
call_command("decrypt_documents")
|
||||||
|
@ -2139,7 +2139,7 @@ class SystemStatusView(PassUserMixin):
|
|||||||
classifier_error = None
|
classifier_error = None
|
||||||
classifier_status = None
|
classifier_status = None
|
||||||
try:
|
try:
|
||||||
classifier = load_classifier()
|
classifier = load_classifier(raise_exception=True)
|
||||||
if classifier is None:
|
if classifier is None:
|
||||||
# Make sure classifier should exist
|
# Make sure classifier should exist
|
||||||
docs_queryset = Document.objects.exclude(
|
docs_queryset = Document.objects.exclude(
|
||||||
@ -2159,7 +2159,7 @@ class SystemStatusView(PassUserMixin):
|
|||||||
matching_algorithm=Tag.MATCH_AUTO,
|
matching_algorithm=Tag.MATCH_AUTO,
|
||||||
).exists()
|
).exists()
|
||||||
)
|
)
|
||||||
and not os.path.isfile(settings.MODEL_FILE)
|
and not settings.MODEL_FILE.exists()
|
||||||
):
|
):
|
||||||
# if classifier file doesn't exist just classify as a warning
|
# if classifier file doesn't exist just classify as a warning
|
||||||
classifier_error = "Classifier file does not exist (yet). Re-training may be pending."
|
classifier_error = "Classifier file does not exist (yet). Re-training may be pending."
|
||||||
|
Loading…
x
Reference in New Issue
Block a user