From 1771d18a21684ee5914e599a475617de1fc08d2b Mon Sep 17 00:00:00 2001 From: Trenton Holmes Date: Fri, 11 Mar 2022 10:55:51 -0800 Subject: [PATCH] Runs the pre-commit hooks over all the Python files --- src/documents/__init__.py | 5 +- src/documents/admin.py | 14 +- src/documents/apps.py | 1 - src/documents/bulk_download.py | 11 +- src/documents/bulk_edit.py | 9 +- src/documents/checks.py | 23 +- src/documents/classifier.py | 45 +-- src/documents/consumer.py | 70 ++-- src/documents/file_handling.py | 20 +- src/documents/filters.py | 15 +- src/documents/index.py | 45 ++- src/documents/loggers.py | 2 - .../management/commands/decrypt_documents.py | 17 +- .../management/commands/document_archiver.py | 31 +- .../management/commands/document_consumer.py | 19 +- .../commands/document_create_classifier.py | 3 +- .../management/commands/document_exporter.py | 72 ++-- .../management/commands/document_importer.py | 33 +- .../management/commands/document_index.py | 4 +- .../management/commands/document_renamer.py | 7 +- .../management/commands/document_retagger.py | 12 +- .../commands/document_sanity_checker.py | 3 +- .../commands/document_thumbnails.py | 11 +- .../management/commands/manage_superuser.py | 7 +- src/documents/matching.py | 21 +- src/documents/models.py | 48 ++- src/documents/parsers.py | 12 +- src/documents/sanity_checker.py | 13 +- src/documents/serialisers.py | 39 ++- src/documents/signals/handlers.py | 69 ++-- src/documents/tasks.py | 21 +- src/documents/tests/factories.py | 3 +- src/documents/tests/test_admin.py | 7 +- src/documents/tests/test_api.py | 317 +++++++++++------- src/documents/tests/test_checks.py | 11 +- src/documents/tests/test_classifier.py | 141 +++++--- src/documents/tests/test_consumer.py | 82 +++-- src/documents/tests/test_date_parsing.py | 11 +- src/documents/tests/test_document_model.py | 6 +- src/documents/tests/test_file_handling.py | 88 +++-- src/documents/tests/test_importer.py | 10 +- src/documents/tests/test_index.py | 11 +- src/documents/tests/test_management.py | 30 +- .../tests/test_management_consumer.py | 28 +- .../tests/test_management_exporter.py | 67 ++-- .../tests/test_management_retagger.py | 60 +++- .../tests/test_management_superuser.py | 6 +- .../tests/test_management_thumbnails.py | 6 +- src/documents/tests/test_matchables.py | 42 ++- .../tests/test_migration_archive_files.py | 139 ++++++-- .../tests/test_migration_mime_type.py | 16 +- .../test_migration_remove_null_characters.py | 3 +- .../tests/test_migration_tag_colors.py | 3 +- src/documents/tests/test_models.py | 6 +- src/documents/tests/test_parsers.py | 18 +- src/documents/tests/test_sanity_check.py | 7 +- src/documents/tests/test_settings.py | 6 +- src/documents/tests/test_tasks.py | 15 +- src/documents/tests/test_views.py | 5 +- src/documents/tests/utils.py | 5 +- src/documents/views.py | 138 ++++---- src/paperless/__init__.py | 5 +- src/paperless/asgi.py | 8 +- src/paperless/auth.py | 4 +- src/paperless/checks.py | 17 +- src/paperless/consumers.py | 9 +- src/paperless/db.py | 1 - src/paperless/middleware.py | 1 - src/paperless/settings.py | 34 +- src/paperless/tests/test_checks.py | 11 +- src/paperless/tests/test_websockets.py | 7 +- src/paperless/urls.py | 62 ++-- src/paperless/views.py | 6 +- src/paperless/workers.py | 3 +- src/paperless/wsgi.py | 1 - src/paperless_mail/admin.py | 12 +- src/paperless_mail/apps.py | 1 - src/paperless_mail/mail.py | 56 ++-- .../management/commands/mail_fetcher.py | 4 +- src/paperless_mail/models.py | 41 ++- src/paperless_mail/tasks.py | 3 +- src/paperless_mail/tests/test_mail.py | 111 ++++-- src/paperless_tesseract/__init__.py | 5 +- src/paperless_tesseract/apps.py | 1 - src/paperless_tesseract/checks.py | 12 +- src/paperless_tesseract/parsers.py | 33 +- src/paperless_tesseract/tests/test_checks.py | 8 +- src/paperless_tesseract/tests/test_parser.py | 109 ++++-- src/paperless_text/apps.py | 1 - src/paperless_text/parsers.py | 5 +- src/paperless_text/tests/test_parser.py | 7 +- src/paperless_tika/parsers.py | 46 +-- src/paperless_tika/signals.py | 8 +- src/paperless_tika/tests/test_tika_parser.py | 8 +- 94 files changed, 1638 insertions(+), 991 deletions(-) diff --git a/src/documents/__init__.py b/src/documents/__init__.py index 5c9f358c3..dc94f2bdd 100644 --- a/src/documents/__init__.py +++ b/src/documents/__init__.py @@ -1,2 +1,5 @@ # this is here so that django finds the checks. -from .checks import * +from .checks import changed_password_check +from .checks import parser_check + +__all__ = ["changed_password_check", "parser_check"] diff --git a/src/documents/admin.py b/src/documents/admin.py index 88e2da50e..0551278ef 100644 --- a/src/documents/admin.py +++ b/src/documents/admin.py @@ -1,13 +1,11 @@ from django.contrib import admin -from .models import ( - Correspondent, - Document, - DocumentType, - Tag, - SavedView, - SavedViewFilterRule, -) +from .models import Correspondent +from .models import Document +from .models import DocumentType +from .models import SavedView +from .models import SavedViewFilterRule +from .models import Tag class CorrespondentAdmin(admin.ModelAdmin): diff --git a/src/documents/apps.py b/src/documents/apps.py index 0a59fef51..f4802532e 100644 --- a/src/documents/apps.py +++ b/src/documents/apps.py @@ -1,5 +1,4 @@ from django.apps import AppConfig - from django.utils.translation import gettext_lazy as _ diff --git a/src/documents/bulk_download.py b/src/documents/bulk_download.py index 11770968d..cf0b8949f 100644 --- a/src/documents/bulk_download.py +++ b/src/documents/bulk_download.py @@ -8,7 +8,10 @@ class BulkArchiveStrategy: self.zipf = zipf def make_unique_filename( - self, doc: Document, archive: bool = False, folder: str = "" + self, + doc: Document, + archive: bool = False, + folder: str = "", ): counter = 0 while True: @@ -34,7 +37,8 @@ class ArchiveOnlyStrategy(BulkArchiveStrategy): def add_document(self, doc: Document): if doc.has_archive_version: self.zipf.write( - doc.archive_path, self.make_unique_filename(doc, archive=True) + doc.archive_path, + self.make_unique_filename(doc, archive=True), ) else: self.zipf.write(doc.source_path, self.make_unique_filename(doc)) @@ -49,5 +53,6 @@ class OriginalAndArchiveStrategy(BulkArchiveStrategy): ) self.zipf.write( - doc.source_path, self.make_unique_filename(doc, folder="originals/") + doc.source_path, + self.make_unique_filename(doc, folder="originals/"), ) diff --git a/src/documents/bulk_edit.py b/src/documents/bulk_edit.py index 18ad04f2d..650138f1e 100644 --- a/src/documents/bulk_edit.py +++ b/src/documents/bulk_edit.py @@ -2,8 +2,9 @@ import itertools from django.db.models import Q from django_q.tasks import async_task - -from documents.models import Document, Correspondent, DocumentType +from documents.models import Correspondent +from documents.models import Document +from documents.models import DocumentType def set_correspondent(doc_ids, correspondent): @@ -40,7 +41,7 @@ def add_tag(doc_ids, tag): DocumentTagRelationship = Document.tags.through DocumentTagRelationship.objects.bulk_create( - [DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs] + [DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs], ) async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs) @@ -56,7 +57,7 @@ def remove_tag(doc_ids, tag): DocumentTagRelationship = Document.tags.through DocumentTagRelationship.objects.filter( - Q(document_id__in=affected_docs) & Q(tag_id=tag) + Q(document_id__in=affected_docs) & Q(tag_id=tag), ).delete() async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs) diff --git a/src/documents/checks.py b/src/documents/checks.py index fe3d89b12..4ac49a2c2 100644 --- a/src/documents/checks.py +++ b/src/documents/checks.py @@ -1,10 +1,11 @@ import textwrap from django.conf import settings -from django.core.checks import Error, register +from django.core.checks import Error +from django.core.checks import register from django.core.exceptions import FieldError -from django.db.utils import OperationalError, ProgrammingError - +from django.db.utils import OperationalError +from django.db.utils import ProgrammingError from documents.signals import document_consumer_declaration @@ -16,7 +17,7 @@ def changed_password_check(app_configs, **kwargs): try: encrypted_doc = Document.objects.filter( - storage_type=Document.STORAGE_TYPE_GPG + storage_type=Document.STORAGE_TYPE_GPG, ).first() except (OperationalError, ProgrammingError, FieldError): return [] # No documents table yet @@ -27,8 +28,8 @@ def changed_password_check(app_configs, **kwargs): return [ Error( "The database contains encrypted documents but no password " - "is set." - ) + "is set.", + ), ] if not GnuPG.decrypted(encrypted_doc.source_file): @@ -42,9 +43,9 @@ def changed_password_check(app_configs, **kwargs): If you intend to change your password, you must first export all of the old documents, start fresh with the new password and then re-import them." - """ - ) - ) + """, + ), + ), ] return [] @@ -61,8 +62,8 @@ def parser_check(app_configs, **kwargs): return [ Error( "No parsers found. This is a bug. The consumer won't be " - "able to consume any documents without parsers." - ) + "able to consume any documents without parsers.", + ), ] else: return [] diff --git a/src/documents/classifier.py b/src/documents/classifier.py index 195c934a9..b6ee890e4 100644 --- a/src/documents/classifier.py +++ b/src/documents/classifier.py @@ -6,8 +6,8 @@ import re import shutil from django.conf import settings - -from documents.models import Document, MatchingModel +from documents.models import Document +from documents.models import MatchingModel class IncompatibleClassifierVersionError(Exception): @@ -30,8 +30,8 @@ def preprocess_content(content): def load_classifier(): if not os.path.isfile(settings.MODEL_FILE): logger.debug( - f"Document classification model does not exist (yet), not " - f"performing automatic matching." + "Document classification model does not exist (yet), not " + "performing automatic matching.", ) return None @@ -42,16 +42,16 @@ def load_classifier(): except (ClassifierModelCorruptError, IncompatibleClassifierVersionError): # there's something wrong with the model file. logger.exception( - f"Unrecoverable error while loading document " - f"classification model, deleting model file." + "Unrecoverable error while loading document " + "classification model, deleting model file.", ) os.unlink(settings.MODEL_FILE) classifier = None except OSError: - logger.exception(f"IO error while loading document classification model") + logger.exception("IO error while loading document classification model") classifier = None except Exception: - logger.exception(f"Unknown error while loading document classification model") + logger.exception("Unknown error while loading document classification model") classifier = None return classifier @@ -78,7 +78,7 @@ class DocumentClassifier(object): if schema_version != self.FORMAT_VERSION: raise IncompatibleClassifierVersionError( - "Cannor load classifier, incompatible versions." + "Cannor load classifier, incompatible versions.", ) else: try: @@ -122,8 +122,8 @@ class DocumentClassifier(object): logger.debug("Gathering data from database...") m = hashlib.sha1() for doc in Document.objects.order_by("pk").exclude( - tags__is_inbox_tag=True - ): # NOQA: E501 + tags__is_inbox_tag=True, + ): preprocessed_content = preprocess_content(doc.content) m.update(preprocessed_content.encode("utf-8")) data.append(preprocessed_content) @@ -146,9 +146,9 @@ class DocumentClassifier(object): [ tag.pk for tag in doc.tags.filter( - matching_algorithm=MatchingModel.MATCH_AUTO + matching_algorithm=MatchingModel.MATCH_AUTO, ) - ] + ], ) for tag in tags: m.update(tag.to_bytes(4, "little", signed=True)) @@ -177,8 +177,11 @@ class DocumentClassifier(object): logger.debug( "{} documents, {} tag(s), {} correspondent(s), " "{} document type(s).".format( - len(data), num_tags, num_correspondents, num_document_types - ) + len(data), + num_tags, + num_correspondents, + num_document_types, + ), ) from sklearn.feature_extraction.text import CountVectorizer @@ -188,7 +191,9 @@ class DocumentClassifier(object): # Step 2: vectorize data logger.debug("Vectorizing data...") self.data_vectorizer = CountVectorizer( - analyzer="word", ngram_range=(1, 2), min_df=0.01 + analyzer="word", + ngram_range=(1, 2), + min_df=0.01, ) data_vectorized = self.data_vectorizer.fit_transform(data) @@ -204,7 +209,7 @@ class DocumentClassifier(object): ] self.tags_binarizer = LabelBinarizer() labels_tags_vectorized = self.tags_binarizer.fit_transform( - labels_tags + labels_tags, ).ravel() else: self.tags_binarizer = MultiLabelBinarizer() @@ -223,7 +228,8 @@ class DocumentClassifier(object): else: self.correspondent_classifier = None logger.debug( - "There are no correspondents. Not training correspondent " "classifier." + "There are no correspondents. Not training correspondent " + "classifier.", ) if num_document_types > 0: @@ -233,7 +239,8 @@ class DocumentClassifier(object): else: self.document_type_classifier = None logger.debug( - "There are no document types. Not training document type " "classifier." + "There are no document types. Not training document type " + "classifier.", ) self.data_hash = new_data_hash diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 0d246de26..4fe6b02ed 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -15,11 +15,19 @@ from filelock import FileLock from rest_framework.reverse import reverse from .classifier import load_classifier -from .file_handling import create_source_path_directory, generate_unique_filename +from .file_handling import create_source_path_directory +from .file_handling import generate_unique_filename from .loggers import LoggingMixin -from .models import Document, FileInfo, Correspondent, DocumentType, Tag -from .parsers import ParseError, get_parser_class_for_mime_type, parse_date -from .signals import document_consumption_finished, document_consumption_started +from .models import Correspondent +from .models import Document +from .models import DocumentType +from .models import FileInfo +from .models import Tag +from .parsers import get_parser_class_for_mime_type +from .parsers import parse_date +from .parsers import ParseError +from .signals import document_consumption_finished +from .signals import document_consumption_started class ConsumerError(Exception): @@ -46,12 +54,15 @@ class Consumer(LoggingMixin): logging_name = "paperless.consumer" def _send_progress( - self, current_progress, max_progress, status, message=None, document_id=None + self, + current_progress, + max_progress, + status, + message=None, + document_id=None, ): payload = { - "filename": os.path.basename(self.filename) - if self.filename - else None, # NOQA: E501 + "filename": os.path.basename(self.filename) if self.filename else None, "task_id": self.task_id, "current_progress": current_progress, "max_progress": max_progress, @@ -60,7 +71,8 @@ class Consumer(LoggingMixin): "document_id": document_id, } async_to_sync(self.channel_layer.group_send)( - "status_updates", {"type": "status_update", "data": payload} + "status_updates", + {"type": "status_update", "data": payload}, ) def _fail(self, message, log_message=None, exc_info=None): @@ -83,15 +95,16 @@ class Consumer(LoggingMixin): def pre_check_file_exists(self): if not os.path.isfile(self.path): self._fail( - MESSAGE_FILE_NOT_FOUND, f"Cannot consume {self.path}: File not found." + MESSAGE_FILE_NOT_FOUND, + f"Cannot consume {self.path}: File not found.", ) def pre_check_duplicate(self): with open(self.path, "rb") as f: checksum = hashlib.md5(f.read()).hexdigest() if Document.objects.filter( - Q(checksum=checksum) | Q(archive_checksum=checksum) - ).exists(): # NOQA: E501 + Q(checksum=checksum) | Q(archive_checksum=checksum), + ).exists(): if settings.CONSUMER_DELETE_DUPLICATES: os.unlink(self.path) self._fail( @@ -139,7 +152,8 @@ class Consumer(LoggingMixin): ) self.log( - "info", f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}" + "info", + f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}", ) try: @@ -154,7 +168,7 @@ class Consumer(LoggingMixin): reverse("document-thumb", kwargs={"pk": document.pk}), str(document.correspondent), str(",".join(document.tags.all().values_list("name", flat=True))), - ) + ), ).wait() except Exception as e: self._fail( @@ -213,7 +227,9 @@ class Consumer(LoggingMixin): # Notify all listeners that we're going to do some work. document_consumption_started.send( - sender=self.__class__, filename=self.path, logging_group=self.logging_group + sender=self.__class__, + filename=self.path, + logging_group=self.logging_group, ) self.run_pre_consume_script() @@ -247,7 +263,9 @@ class Consumer(LoggingMixin): self.log("debug", f"Generating thumbnail for {self.filename}...") self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL) thumbnail = document_parser.get_optimised_thumbnail( - self.path, mime_type, self.filename + self.path, + mime_type, + self.filename, ) text = document_parser.get_text() @@ -301,21 +319,26 @@ class Consumer(LoggingMixin): self._write(document.storage_type, self.path, document.source_path) self._write( - document.storage_type, thumbnail, document.thumbnail_path + document.storage_type, + thumbnail, + document.thumbnail_path, ) if archive_path and os.path.isfile(archive_path): document.archive_filename = generate_unique_filename( - document, archive_filename=True + document, + archive_filename=True, ) create_source_path_directory(document.archive_path) self._write( - document.storage_type, archive_path, document.archive_path + document.storage_type, + archive_path, + document.archive_path, ) with open(archive_path, "rb") as f: document.archive_checksum = hashlib.md5( - f.read() + f.read(), ).hexdigest() # Don't save with the lock active. Saving will cause the file @@ -328,7 +351,8 @@ class Consumer(LoggingMixin): # https://github.com/jonaswinkler/paperless-ng/discussions/1037 shadow_file = os.path.join( - os.path.dirname(self.path), "._" + os.path.basename(self.path) + os.path.dirname(self.path), + "._" + os.path.basename(self.path), ) if os.path.isfile(shadow_file): @@ -390,12 +414,12 @@ class Consumer(LoggingMixin): def apply_overrides(self, document): if self.override_correspondent_id: document.correspondent = Correspondent.objects.get( - pk=self.override_correspondent_id + pk=self.override_correspondent_id, ) if self.override_document_type_id: document.document_type = DocumentType.objects.get( - pk=self.override_document_type_id + pk=self.override_document_type_id, ) if self.override_tag_ids: diff --git a/src/documents/file_handling.py b/src/documents/file_handling.py index 390643357..132825e0e 100644 --- a/src/documents/file_handling.py +++ b/src/documents/file_handling.py @@ -103,15 +103,17 @@ def generate_unique_filename(doc, archive_filename=False): if archive_filename and doc.filename: new_filename = os.path.splitext(doc.filename)[0] + ".pdf" if new_filename == old_filename or not os.path.exists( - os.path.join(root, new_filename) - ): # NOQA: E501 + os.path.join(root, new_filename), + ): return new_filename counter = 0 while True: new_filename = generate_filename( - doc, counter, archive_filename=archive_filename + doc, + counter, + archive_filename=archive_filename, ) if new_filename == old_filename: # still the same as before. @@ -137,14 +139,16 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False): if doc.correspondent: correspondent = pathvalidate.sanitize_filename( - doc.correspondent.name, replacement_text="-" + doc.correspondent.name, + replacement_text="-", ) else: correspondent = "none" if doc.document_type: document_type = pathvalidate.sanitize_filename( - doc.document_type.name, replacement_text="-" + doc.document_type.name, + replacement_text="-", ) else: document_type = "none" @@ -160,9 +164,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False): document_type=document_type, created=datetime.date.isoformat(doc.created), created_year=doc.created.year if doc.created else "none", - created_month=f"{doc.created.month:02}" - if doc.created - else "none", # NOQA: E501 + created_month=f"{doc.created.month:02}" if doc.created else "none", created_day=f"{doc.created.day:02}" if doc.created else "none", added=datetime.date.isoformat(doc.added), added_year=doc.added.year if doc.added else "none", @@ -178,7 +180,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False): except (ValueError, KeyError, IndexError): logger.warning( f"Invalid PAPERLESS_FILENAME_FORMAT: " - f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default" + f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default", ) counter_str = f"_{counter:02}" if counter else "" diff --git a/src/documents/filters.py b/src/documents/filters.py index 7075eb3d0..6451bea35 100644 --- a/src/documents/filters.py +++ b/src/documents/filters.py @@ -1,7 +1,13 @@ from django.db.models import Q -from django_filters.rest_framework import BooleanFilter, FilterSet, Filter +from django_filters.rest_framework import BooleanFilter +from django_filters.rest_framework import Filter +from django_filters.rest_framework import FilterSet -from .models import Correspondent, Document, Tag, DocumentType, Log +from .models import Correspondent +from .models import Document +from .models import DocumentType +from .models import Log +from .models import Tag CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"] ID_KWARGS = ["in", "exact"] @@ -75,7 +81,10 @@ class TitleContentFilter(Filter): class DocumentFilterSet(FilterSet): is_tagged = BooleanFilter( - label="Is tagged", field_name="tags", lookup_expr="isnull", exclude=True + label="Is tagged", + field_name="tags", + lookup_expr="isnull", + exclude=True, ) tags__id__all = TagsFilter() diff --git a/src/documents/index.py b/src/documents/index.py index 2c708105c..cc95f2a96 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -1,21 +1,30 @@ import logging +import math import os from contextlib import contextmanager -import math from dateutil.parser import isoparse from django.conf import settings -from whoosh import highlight, classify, query -from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME, BOOLEAN +from documents.models import Document +from whoosh import classify +from whoosh import highlight +from whoosh import query +from whoosh.fields import BOOLEAN +from whoosh.fields import DATETIME +from whoosh.fields import KEYWORD +from whoosh.fields import NUMERIC +from whoosh.fields import Schema +from whoosh.fields import TEXT from whoosh.highlight import HtmlFormatter -from whoosh.index import create_in, exists_in, open_dir +from whoosh.index import create_in +from whoosh.index import exists_in +from whoosh.index import open_dir from whoosh.qparser import MultifieldParser from whoosh.qparser.dateparse import DateParserPlugin -from whoosh.searching import ResultsPage, Searcher +from whoosh.searching import ResultsPage +from whoosh.searching import Searcher from whoosh.writing import AsyncWriter -from documents.models import Document - logger = logging.getLogger("paperless.index") @@ -45,7 +54,7 @@ def open_index(recreate=False): if exists_in(settings.INDEX_DIR) and not recreate: return open_dir(settings.INDEX_DIR, schema=get_schema()) except Exception: - logger.exception(f"Error while opening the index, recreating.") + logger.exception("Error while opening the index, recreating.") if not os.path.isdir(settings.INDEX_DIR): os.makedirs(settings.INDEX_DIR, exist_ok=True) @@ -138,11 +147,11 @@ class DelayedQuery: criterias.append(query.Term("has_type", v == "false")) elif k == "created__date__lt": criterias.append( - query.DateRange("created", start=None, end=isoparse(v)) + query.DateRange("created", start=None, end=isoparse(v)), ) elif k == "created__date__gt": criterias.append( - query.DateRange("created", start=isoparse(v), end=None) + query.DateRange("created", start=isoparse(v), end=None), ) elif k == "added__date__gt": criterias.append(query.DateRange("added", start=isoparse(v), end=None)) @@ -220,7 +229,7 @@ class DelayedQuery: hit[1], ), page.results.top_n, - ) + ), ) self.saved_results[item.start] = page @@ -240,7 +249,7 @@ class DelayedFullTextQuery(DelayedQuery): corrected = self.searcher.correct_query(q, q_str) if corrected.query != q: - corrected_query = corrected.string + corrected.query = corrected.string return q, None @@ -252,10 +261,14 @@ class DelayedMoreLikeThisQuery(DelayedQuery): docnum = self.searcher.document_number(id=more_like_doc_id) kts = self.searcher.key_terms_from_text( - "content", content, numterms=20, model=classify.Bo1Model, normalize=False + "content", + content, + numterms=20, + model=classify.Bo1Model, + normalize=False, ) q = query.Or( - [query.Term("content", word, boost=weight) for word, weight in kts] + [query.Term("content", word, boost=weight) for word, weight in kts], ) mask = {docnum} @@ -266,7 +279,9 @@ def autocomplete(ix, term, limit=10): with ix.reader() as reader: terms = [] for (score, t) in reader.most_distinctive_terms( - "content", number=limit, prefix=term.lower() + "content", + number=limit, + prefix=term.lower(), ): terms.append(t) return terms diff --git a/src/documents/loggers.py b/src/documents/loggers.py index 78a2f3692..0dd109277 100644 --- a/src/documents/loggers.py +++ b/src/documents/loggers.py @@ -1,8 +1,6 @@ import logging import uuid -from django.conf import settings - class LoggingMixin: diff --git a/src/documents/management/commands/decrypt_documents.py b/src/documents/management/commands/decrypt_documents.py index 24b78474a..861bfa2cc 100644 --- a/src/documents/management/commands/decrypt_documents.py +++ b/src/documents/management/commands/decrypt_documents.py @@ -1,8 +1,8 @@ import os from django.conf import settings -from django.core.management.base import BaseCommand, CommandError - +from django.core.management.base import BaseCommand +from django.core.management.base import CommandError from documents.models import Document from paperless.db import GnuPG @@ -31,9 +31,9 @@ class Command(BaseCommand): "this unless you've got a recent backup\nWARNING: handy. It " "*should* work without a hitch, but be safe and backup your\n" "WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to " - "continue.\n\n" + "continue.\n\n", ) - __ = input() + _ = input() except KeyboardInterrupt: return @@ -41,7 +41,7 @@ class Command(BaseCommand): if not passphrase: raise CommandError( "Passphrase not defined. Please set it with --passphrase or " - "by declaring it in your environment or your config." + "by declaring it in your environment or your config.", ) self.__gpg_to_unencrypted(passphrase) @@ -50,7 +50,7 @@ class Command(BaseCommand): def __gpg_to_unencrypted(passphrase): encrypted_files = Document.objects.filter( - storage_type=Document.STORAGE_TYPE_GPG + storage_type=Document.STORAGE_TYPE_GPG, ) for document in encrypted_files: @@ -71,7 +71,7 @@ class Command(BaseCommand): if not ext == ".gpg": raise CommandError( f"Abort: encrypted file {document.source_path} does not " - f"end with .gpg" + f"end with .gpg", ) document.filename = os.path.splitext(document.filename)[0] @@ -83,7 +83,8 @@ class Command(BaseCommand): f.write(raw_thumb) Document.objects.filter(id=document.id).update( - storage_type=document.storage_type, filename=document.filename + storage_type=document.storage_type, + filename=document.filename, ) for path in old_paths: diff --git a/src/documents/management/commands/document_archiver.py b/src/documents/management/commands/document_archiver.py index 7b1f989f8..f33ccd7ce 100644 --- a/src/documents/management/commands/document_archiver.py +++ b/src/documents/management/commands/document_archiver.py @@ -1,7 +1,6 @@ import hashlib -import multiprocessing - import logging +import multiprocessing import os import shutil import uuid @@ -11,12 +10,12 @@ from django import db from django.conf import settings from django.core.management.base import BaseCommand from django.db import transaction -from filelock import FileLock -from whoosh.writing import AsyncWriter - from documents.models import Document +from filelock import FileLock + from ... import index -from ...file_handling import create_source_path_directory, generate_unique_filename +from ...file_handling import create_source_path_directory +from ...file_handling import generate_unique_filename from ...parsers import get_parser_class_for_mime_type @@ -33,7 +32,7 @@ def handle_document(document_id): if not parser_class: logger.error( f"No parser found for mime type {mime_type}, cannot " - f"archive document {document} (ID: {document_id})" + f"archive document {document} (ID: {document_id})", ) return @@ -43,7 +42,9 @@ def handle_document(document_id): parser.parse(document.source_path, mime_type, document.get_public_filename()) thumbnail = parser.get_optimised_thumbnail( - document.source_path, mime_type, document.get_public_filename() + document.source_path, + mime_type, + document.get_public_filename(), ) if parser.get_archive_path(): @@ -55,7 +56,8 @@ def handle_document(document_id): # We also don't use save() since that triggers the filehandling # logic, and we don't want that yet (file not yet in place) document.archive_filename = generate_unique_filename( - document, archive_filename=True + document, + archive_filename=True, ) Document.objects.filter(pk=document.pk).update( archive_checksum=checksum, @@ -70,9 +72,9 @@ def handle_document(document_id): with index.open_index_writer() as writer: index.update_document(writer, document) - except Exception as e: + except Exception: logger.exception( - f"Error while parsing document {document} " f"(ID: {document_id})" + f"Error while parsing document {document} " f"(ID: {document_id})", ) finally: parser.cleanup() @@ -86,7 +88,8 @@ class Command(BaseCommand): back-tag all previously indexed documents with metadata created (or modified) after their initial import. """.replace( - " ", "" + " ", + "", ) def add_arguments(self, parser): @@ -129,7 +132,7 @@ class Command(BaseCommand): map( lambda doc: doc.id, filter(lambda d: overwrite or not d.has_archive_version, documents), - ) + ), ) # Note to future self: this prevents django from reusing database @@ -146,7 +149,7 @@ class Command(BaseCommand): pool.imap_unordered(handle_document, document_ids), total=len(document_ids), disable=options["no_progress_bar"], - ) + ), ) except KeyboardInterrupt: print("Aborting...") diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index c35594b8c..89467c94a 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -1,17 +1,18 @@ import logging import os -from pathlib import Path, PurePath +from pathlib import Path +from pathlib import PurePath from threading import Thread from time import sleep from django.conf import settings -from django.core.management.base import BaseCommand, CommandError +from django.core.management.base import BaseCommand +from django.core.management.base import CommandError from django_q.tasks import async_task -from watchdog.events import FileSystemEventHandler -from watchdog.observers.polling import PollingObserver - from documents.models import Tag from documents.parsers import is_file_ext_supported +from watchdog.events import FileSystemEventHandler +from watchdog.observers.polling import PollingObserver try: from inotifyrecursive import INotify, flags @@ -29,7 +30,7 @@ def _tags_from_path(filepath): path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts for part in path_parts: tag_ids.add( - Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk + Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk, ) return tag_ids @@ -56,7 +57,7 @@ def _consume(filepath): try: if settings.CONSUMER_SUBDIRS_AS_TAGS: tag_ids = _tags_from_path(filepath) - except Exception as e: + except Exception: logger.exception("Error creating tags from path") try: @@ -67,7 +68,7 @@ def _consume(filepath): override_tag_ids=tag_ids if tag_ids else None, task_name=os.path.basename(filepath)[:100], ) - except Exception as e: + except Exception: # Catch all so that the consumer won't crash. # This is also what the test case is listening for to check for # errors. @@ -86,7 +87,7 @@ def _consume_wait_unmodified(file): new_mtime = os.stat(file).st_mtime except FileNotFoundError: logger.debug( - f"File {file} moved while waiting for it to remain " f"unmodified." + f"File {file} moved while waiting for it to remain " f"unmodified.", ) return if new_mtime == mtime: diff --git a/src/documents/management/commands/document_create_classifier.py b/src/documents/management/commands/document_create_classifier.py index 6ad3ee9f5..9610d50a0 100644 --- a/src/documents/management/commands/document_create_classifier.py +++ b/src/documents/management/commands/document_create_classifier.py @@ -9,7 +9,8 @@ class Command(BaseCommand): Trains the classifier on your data and saves the resulting models to a file. The document consumer will then automatically use this new model. """.replace( - " ", "" + " ", + "", ) def __init__(self, *args, **kwargs): diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index ec978fcf1..b110475a5 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -6,28 +6,28 @@ import time import tqdm from django.conf import settings -from django.contrib.auth.models import User, Group +from django.contrib.auth.models import Group +from django.contrib.auth.models import User from django.core import serializers -from django.core.management.base import BaseCommand, CommandError +from django.core.management.base import BaseCommand +from django.core.management.base import CommandError from django.db import transaction +from documents.models import Correspondent +from documents.models import Document +from documents.models import DocumentType +from documents.models import SavedView +from documents.models import SavedViewFilterRule +from documents.models import Tag +from documents.settings import EXPORTER_ARCHIVE_NAME +from documents.settings import EXPORTER_FILE_NAME +from documents.settings import EXPORTER_THUMBNAIL_NAME from filelock import FileLock - -from documents.models import ( - Document, - Correspondent, - Tag, - DocumentType, - SavedView, - SavedViewFilterRule, -) -from documents.settings import ( - EXPORTER_FILE_NAME, - EXPORTER_THUMBNAIL_NAME, - EXPORTER_ARCHIVE_NAME, -) from paperless.db import GnuPG -from paperless_mail.models import MailAccount, MailRule -from ...file_handling import generate_filename, delete_empty_directories +from paperless_mail.models import MailAccount +from paperless_mail.models import MailRule + +from ...file_handling import delete_empty_directories +from ...file_handling import generate_filename class Command(BaseCommand): @@ -37,7 +37,8 @@ class Command(BaseCommand): directory. And include a manifest file containing document data for easy import. """.replace( - " ", "" + " ", + "", ) def add_arguments(self, parser): @@ -107,20 +108,20 @@ class Command(BaseCommand): # 1. Take a snapshot of what files exist in the current export folder for root, dirs, files in os.walk(self.target): self.files_in_export_dir.extend( - map(lambda f: os.path.abspath(os.path.join(root, f)), files) + map(lambda f: os.path.abspath(os.path.join(root, f)), files), ) # 2. Create manifest, containing all correspondents, types, tags and # documents with transaction.atomic(): manifest = json.loads( - serializers.serialize("json", Correspondent.objects.all()) + serializers.serialize("json", Correspondent.objects.all()), ) manifest += json.loads(serializers.serialize("json", Tag.objects.all())) manifest += json.loads( - serializers.serialize("json", DocumentType.objects.all()) + serializers.serialize("json", DocumentType.objects.all()), ) documents = Document.objects.order_by("id") @@ -129,19 +130,19 @@ class Command(BaseCommand): manifest += document_manifest manifest += json.loads( - serializers.serialize("json", MailAccount.objects.all()) + serializers.serialize("json", MailAccount.objects.all()), ) manifest += json.loads( - serializers.serialize("json", MailRule.objects.all()) + serializers.serialize("json", MailRule.objects.all()), ) manifest += json.loads( - serializers.serialize("json", SavedView.objects.all()) + serializers.serialize("json", SavedView.objects.all()), ) manifest += json.loads( - serializers.serialize("json", SavedViewFilterRule.objects.all()) + serializers.serialize("json", SavedViewFilterRule.objects.all()), ) manifest += json.loads(serializers.serialize("json", Group.objects.all())) @@ -155,9 +156,7 @@ class Command(BaseCommand): disable=progress_bar_disable, ): # 3.1. store files unencrypted - document_dict["fields"][ - "storage_type" - ] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501 + document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED document = document_map[document_dict["pk"]] @@ -166,7 +165,9 @@ class Command(BaseCommand): while True: if self.use_filename_format: base_name = generate_filename( - document, counter=filename_counter, append_gpg=False + document, + counter=filename_counter, + append_gpg=False, ) else: base_name = document.get_public_filename(counter=filename_counter) @@ -217,14 +218,18 @@ class Command(BaseCommand): os.utime(archive_target, times=(t, t)) else: self.check_and_copy( - document.source_path, document.checksum, original_target + document.source_path, + document.checksum, + original_target, ) self.check_and_copy(document.thumbnail_path, None, thumbnail_target) if archive_target: self.check_and_copy( - document.archive_path, document.archive_checksum, archive_target + document.archive_path, + document.archive_checksum, + archive_target, ) # 4. write manifest to target forlder @@ -243,7 +248,8 @@ class Command(BaseCommand): os.remove(f) delete_empty_directories( - os.path.abspath(os.path.dirname(f)), os.path.abspath(self.target) + os.path.abspath(os.path.dirname(f)), + os.path.abspath(self.target), ) def check_and_copy(self, source, source_checksum, target): diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index 9d77c1033..d1ae33afb 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -7,16 +7,16 @@ from contextlib import contextmanager import tqdm from django.conf import settings from django.core.management import call_command -from django.core.management.base import BaseCommand, CommandError -from django.db.models.signals import post_save, m2m_changed +from django.core.management.base import BaseCommand +from django.core.management.base import CommandError +from django.db.models.signals import m2m_changed +from django.db.models.signals import post_save +from documents.models import Document +from documents.settings import EXPORTER_ARCHIVE_NAME +from documents.settings import EXPORTER_FILE_NAME +from documents.settings import EXPORTER_THUMBNAIL_NAME from filelock import FileLock -from documents.models import Document -from documents.settings import ( - EXPORTER_FILE_NAME, - EXPORTER_THUMBNAIL_NAME, - EXPORTER_ARCHIVE_NAME, -) from ...file_handling import create_source_path_directory from ...signals.handlers import update_filename_and_move_files @@ -36,7 +36,8 @@ class Command(BaseCommand): Using a manifest.json file, load the data from there, and import the documents it refers to. """.replace( - " ", "" + " ", + "", ) def add_arguments(self, parser): @@ -73,7 +74,9 @@ class Command(BaseCommand): self._check_manifest() with disable_signal( - post_save, receiver=update_filename_and_move_files, sender=Document + post_save, + receiver=update_filename_and_move_files, + sender=Document, ): with disable_signal( m2m_changed, @@ -92,7 +95,7 @@ class Command(BaseCommand): def _check_manifest_exists(path): if not os.path.exists(path): raise CommandError( - "That directory doesn't appear to contain a manifest.json " "file." + "That directory doesn't appear to contain a manifest.json " "file.", ) def _check_manifest(self): @@ -105,14 +108,14 @@ class Command(BaseCommand): if EXPORTER_FILE_NAME not in record: raise CommandError( "The manifest file contains a record which does not " - "refer to an actual document file." + "refer to an actual document file.", ) doc_file = record[EXPORTER_FILE_NAME] if not os.path.exists(os.path.join(self.source, doc_file)): raise CommandError( 'The manifest file refers to "{}" which does not ' - "appear to be in the source directory.".format(doc_file) + "appear to be in the source directory.".format(doc_file), ) if EXPORTER_ARCHIVE_NAME in record: @@ -120,7 +123,7 @@ class Command(BaseCommand): if not os.path.exists(os.path.join(self.source, archive_file)): raise CommandError( f"The manifest file refers to {archive_file} which " - f"does not appear to be in the source directory." + f"does not appear to be in the source directory.", ) def _import_files_from_manifest(self, progress_bar_disable): @@ -132,7 +135,7 @@ class Command(BaseCommand): print("Copy files into paperless...") manifest_documents = list( - filter(lambda r: r["model"] == "documents.document", self.manifest) + filter(lambda r: r["model"] == "documents.document", self.manifest), ) for record in tqdm.tqdm(manifest_documents, disable=progress_bar_disable): diff --git a/src/documents/management/commands/document_index.py b/src/documents/management/commands/document_index.py index 3dd4d84ff..cf7eb65e5 100644 --- a/src/documents/management/commands/document_index.py +++ b/src/documents/management/commands/document_index.py @@ -1,7 +1,7 @@ from django.core.management import BaseCommand from django.db import transaction - -from documents.tasks import index_reindex, index_optimize +from documents.tasks import index_optimize +from documents.tasks import index_reindex class Command(BaseCommand): diff --git a/src/documents/management/commands/document_renamer.py b/src/documents/management/commands/document_renamer.py index 221fb4208..79c46f905 100644 --- a/src/documents/management/commands/document_renamer.py +++ b/src/documents/management/commands/document_renamer.py @@ -3,7 +3,6 @@ import logging import tqdm from django.core.management.base import BaseCommand from django.db.models.signals import post_save - from documents.models import Document @@ -12,7 +11,8 @@ class Command(BaseCommand): help = """ This will rename all documents to match the latest filename format. """.replace( - " ", "" + " ", + "", ) def add_arguments(self, parser): @@ -28,6 +28,7 @@ class Command(BaseCommand): logging.getLogger().handlers[0].level = logging.ERROR for document in tqdm.tqdm( - Document.objects.all(), disable=options["no_progress_bar"] + Document.objects.all(), + disable=options["no_progress_bar"], ): post_save.send(Document, instance=document) diff --git a/src/documents/management/commands/document_retagger.py b/src/documents/management/commands/document_retagger.py index fcf9e3478..5ecf7f8ce 100644 --- a/src/documents/management/commands/document_retagger.py +++ b/src/documents/management/commands/document_retagger.py @@ -2,10 +2,12 @@ import logging import tqdm from django.core.management.base import BaseCommand - from documents.classifier import load_classifier from documents.models import Document -from ...signals.handlers import set_correspondent, set_document_type, set_tags + +from ...signals.handlers import set_correspondent +from ...signals.handlers import set_document_type +from ...signals.handlers import set_tags logger = logging.getLogger("paperless.management.retagger") @@ -19,7 +21,8 @@ class Command(BaseCommand): back-tag all previously indexed documents with metadata created (or modified) after their initial import. """.replace( - " ", "" + " ", + "", ) def add_arguments(self, parser): @@ -57,7 +60,8 @@ class Command(BaseCommand): help="Return the suggestion, don't change anything.", ) parser.add_argument( - "--base-url", help="The base URL to use to build the link to the documents." + "--base-url", + help="The base URL to use to build the link to the documents.", ) def handle(self, *args, **options): diff --git a/src/documents/management/commands/document_sanity_checker.py b/src/documents/management/commands/document_sanity_checker.py index 54691fefe..27c119863 100644 --- a/src/documents/management/commands/document_sanity_checker.py +++ b/src/documents/management/commands/document_sanity_checker.py @@ -7,7 +7,8 @@ class Command(BaseCommand): help = """ This command checks your document archive for issues. """.replace( - " ", "" + " ", + "", ) def add_arguments(self, parser): diff --git a/src/documents/management/commands/document_thumbnails.py b/src/documents/management/commands/document_thumbnails.py index 9e2893b5f..c9928c7cc 100644 --- a/src/documents/management/commands/document_thumbnails.py +++ b/src/documents/management/commands/document_thumbnails.py @@ -5,8 +5,8 @@ import shutil import tqdm from django import db from django.core.management.base import BaseCommand - from documents.models import Document + from ...parsers import get_parser_class_for_mime_type @@ -22,7 +22,9 @@ def _process_document(doc_in): try: thumb = parser.get_optimised_thumbnail( - document.source_path, document.mime_type, document.get_public_filename() + document.source_path, + document.mime_type, + document.get_public_filename(), ) shutil.move(thumb, document.thumbnail_path) @@ -35,7 +37,8 @@ class Command(BaseCommand): help = """ This will regenerate the thumbnails for all documents. """.replace( - " ", "" + " ", + "", ) def add_arguments(self, parser): @@ -76,5 +79,5 @@ class Command(BaseCommand): pool.imap_unordered(_process_document, ids), total=len(ids), disable=options["no_progress_bar"], - ) + ), ) diff --git a/src/documents/management/commands/manage_superuser.py b/src/documents/management/commands/manage_superuser.py index f8cefb0d9..e892f7d22 100644 --- a/src/documents/management/commands/manage_superuser.py +++ b/src/documents/management/commands/manage_superuser.py @@ -2,7 +2,7 @@ import logging import os from django.contrib.auth.models import User -from django.core.management.base import BaseCommand, CommandError +from django.core.management.base import BaseCommand logger = logging.getLogger("paperless.management.superuser") @@ -13,7 +13,8 @@ class Command(BaseCommand): help = """ Creates a Django superuser based on env variables. """.replace( - " ", "" + " ", + "", ) def handle(self, *args, **options): @@ -39,5 +40,5 @@ class Command(BaseCommand): self.stdout.write(f'Did not create superuser "{username}".') self.stdout.write( 'Make sure you specified "PAPERLESS_ADMIN_PASSWORD" in your ' - '"docker-compose.env" file.' + '"docker-compose.env" file.', ) diff --git a/src/documents/matching.py b/src/documents/matching.py index 2acd5b7f6..425674d8d 100644 --- a/src/documents/matching.py +++ b/src/documents/matching.py @@ -1,8 +1,10 @@ import logging import re - -from documents.models import MatchingModel, Correspondent, DocumentType, Tag +from documents.models import Correspondent +from documents.models import DocumentType +from documents.models import MatchingModel +from documents.models import Tag logger = logging.getLogger("paperless.matching") @@ -12,7 +14,7 @@ def log_reason(matching_model, document, reason): class_name = type(matching_model).__name__ logger.debug( f"{class_name} {matching_model.name} matched on document " - f"{document} because {reason}" + f"{document} because {reason}", ) @@ -25,7 +27,7 @@ def match_correspondents(document, classifier): correspondents = Correspondent.objects.all() return list( - filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents) + filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents), ) @@ -38,7 +40,7 @@ def match_document_types(document, classifier): document_types = DocumentType.objects.all() return list( - filter(lambda o: matches(o, document) or o.pk == pred_id, document_types) + filter(lambda o: matches(o, document) or o.pk == pred_id, document_types), ) @@ -51,7 +53,7 @@ def match_tags(document, classifier): tags = Tag.objects.all() return list( - filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags) + filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags), ) @@ -92,7 +94,7 @@ def matches(matching_model, document): rf"\b{re.escape(matching_model.match)}\b", document_content, **search_kwargs, - ) + ), ) if result: log_reason( @@ -105,11 +107,12 @@ def matches(matching_model, document): elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX: try: match = re.search( - re.compile(matching_model.match, **search_kwargs), document_content + re.compile(matching_model.match, **search_kwargs), + document_content, ) except re.error: logger.error( - f"Error while processing regular expression " f"{matching_model.match}" + f"Error while processing regular expression " f"{matching_model.match}", ) return False if match: diff --git a/src/documents/models.py b/src/documents/models.py index 02a6b56dc..0ea5a394f 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -5,17 +5,14 @@ import os import re from collections import OrderedDict -import pathvalidate - import dateutil.parser +import pathvalidate from django.conf import settings from django.contrib.auth.models import User from django.db import models from django.utils import timezone from django.utils.timezone import is_aware - from django.utils.translation import gettext_lazy as _ - from documents.parsers import get_default_file_extension @@ -42,7 +39,9 @@ class MatchingModel(models.Model): match = models.CharField(_("match"), max_length=256, blank=True) matching_algorithm = models.PositiveIntegerField( - _("matching algorithm"), choices=MATCHING_ALGORITHMS, default=MATCH_ANY + _("matching algorithm"), + choices=MATCHING_ALGORITHMS, + default=MATCH_ANY, ) is_insensitive = models.BooleanField(_("is insensitive"), default=True) @@ -71,7 +70,7 @@ class Tag(MatchingModel): default=False, help_text=_( "Marks this tag as an inbox tag: All newly consumed " - "documents will be tagged with inbox tags." + "documents will be tagged with inbox tags.", ), ) @@ -120,14 +119,17 @@ class Document(models.Model): blank=True, help_text=_( "The raw, text-only data of the document. This field is " - "primarily used for searching." + "primarily used for searching.", ), ) mime_type = models.CharField(_("mime type"), max_length=256, editable=False) tags = models.ManyToManyField( - Tag, related_name="documents", blank=True, verbose_name=_("tags") + Tag, + related_name="documents", + blank=True, + verbose_name=_("tags"), ) checksum = models.CharField( @@ -150,7 +152,10 @@ class Document(models.Model): created = models.DateTimeField(_("created"), default=timezone.now, db_index=True) modified = models.DateTimeField( - _("modified"), auto_now=True, editable=False, db_index=True + _("modified"), + auto_now=True, + editable=False, + db_index=True, ) storage_type = models.CharField( @@ -162,7 +167,10 @@ class Document(models.Model): ) added = models.DateTimeField( - _("added"), default=timezone.now, editable=False, db_index=True + _("added"), + default=timezone.now, + editable=False, + db_index=True, ) filename = models.FilePathField( @@ -192,7 +200,7 @@ class Document(models.Model): unique=True, db_index=True, help_text=_( - "The position of this document in your physical document " "archive." + "The position of this document in your physical document " "archive.", ), ) @@ -289,7 +297,9 @@ class Log(models.Model): message = models.TextField(_("message")) level = models.PositiveIntegerField( - _("level"), choices=LEVELS, default=logging.INFO + _("level"), + choices=LEVELS, + default=logging.INFO, ) created = models.DateTimeField(_("created"), auto_now_add=True) @@ -321,7 +331,10 @@ class SavedView(models.Model): ) sort_field = models.CharField( - _("sort field"), max_length=128, null=True, blank=True + _("sort field"), + max_length=128, + null=True, + blank=True, ) sort_reverse = models.BooleanField(_("sort reverse"), default=False) @@ -383,11 +396,16 @@ class FileInfo: ), ), ("title", re.compile(r"(?P.*)$", flags=re.IGNORECASE)), - ] + ], ) def __init__( - self, created=None, correspondent=None, title=None, tags=(), extension=None + self, + created=None, + correspondent=None, + title=None, + tags=(), + extension=None, ): self.created = created diff --git a/src/documents/parsers.py b/src/documents/parsers.py index f179337a4..80553cf7e 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -9,6 +9,8 @@ import tempfile import magic from django.conf import settings from django.utils import timezone +from documents.loggers import LoggingMixin +from documents.signals import document_consumer_declaration # This regular expression will try to find dates in the document at # hand and will match the following formats: @@ -21,17 +23,15 @@ from django.utils import timezone # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits # - MONTH ZZZZ, with ZZZZ being 4 digits # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits -from documents.loggers import LoggingMixin -from documents.signals import document_consumer_declaration # TODO: isnt there a date parsing library for this? DATE_REGEX = re.compile( - r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|" # NOQA: E501 - r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|" # NOQA: E501 - r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" # NOQA: E501 + r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|" # noqa: E501 + r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|" # noqa: E501 + r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" # noqa: E501 r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|" - r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))" + r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))", ) diff --git a/src/documents/sanity_checker.py b/src/documents/sanity_checker.py index 5dee84258..de3995eb7 100644 --- a/src/documents/sanity_checker.py +++ b/src/documents/sanity_checker.py @@ -3,9 +3,8 @@ import logging import os from django.conf import settings -from tqdm import tqdm - from documents.models import Document +from tqdm import tqdm class SanityCheckMessages: @@ -88,19 +87,19 @@ def check_sanity(progress=False): if not checksum == doc.checksum: messages.error( f"Checksum mismatch of document {doc.pk}. " - f"Stored: {doc.checksum}, actual: {checksum}." + f"Stored: {doc.checksum}, actual: {checksum}.", ) # Check sanity of the archive file. if doc.archive_checksum and not doc.archive_filename: messages.error( f"Document {doc.pk} has an archive file checksum, but no " - f"archive filename." + f"archive filename.", ) elif not doc.archive_checksum and doc.archive_filename: messages.error( f"Document {doc.pk} has an archive file, but its checksum is " - f"missing." + f"missing.", ) elif doc.has_archive_version: if not os.path.isfile(doc.archive_path): @@ -113,7 +112,7 @@ def check_sanity(progress=False): checksum = hashlib.md5(f.read()).hexdigest() except OSError as e: messages.error( - f"Cannot read archive file of document {doc.pk}: {e}" + f"Cannot read archive file of document {doc.pk}: {e}", ) else: if not checksum == doc.archive_checksum: @@ -121,7 +120,7 @@ def check_sanity(progress=False): f"Checksum mismatch of archived document " f"{doc.pk}. " f"Stored: {doc.archive_checksum}, " - f"actual: {checksum}." + f"actual: {checksum}.", ) # other document checks diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index 0206eb2ae..9ea3caa28 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -1,25 +1,22 @@ +import math import re import magic -import math from django.utils.text import slugify +from django.utils.translation import gettext as _ from rest_framework import serializers from rest_framework.fields import SerializerMethodField from . import bulk_edit -from .models import ( - Correspondent, - Tag, - Document, - DocumentType, - SavedView, - SavedViewFilterRule, - MatchingModel, -) +from .models import Correspondent +from .models import Document +from .models import DocumentType +from .models import MatchingModel +from .models import SavedView +from .models import SavedViewFilterRule +from .models import Tag from .parsers import is_mime_type_supported -from django.utils.translation import gettext as _ - # https://www.django-rest-framework.org/api-guide/serializers/#example class DynamicFieldsModelSerializer(serializers.ModelSerializer): @@ -56,12 +53,12 @@ class MatchingModelSerializer(serializers.ModelSerializer): if ( "matching_algorithm" in self.initial_data and self.initial_data["matching_algorithm"] == MatchingModel.MATCH_REGEX - ): # NOQA: E501 + ): try: re.compile(match) except Exception as e: raise serializers.ValidationError( - _("Invalid regular expression: %(error)s") % {"error": str(e)} + _("Invalid regular expression: %(error)s") % {"error": str(e)}, ) return match @@ -156,7 +153,7 @@ class TagSerializer(MatchingModelSerializer): luminance = math.sqrt( 0.299 * math.pow(rgb[0], 2) + 0.587 * math.pow(rgb[1], 2) - + 0.114 * math.pow(rgb[2], 2) + + 0.114 * math.pow(rgb[2], 2), ) return "#ffffff" if luminance < 0.53 else "#000000" except ValueError: @@ -298,7 +295,7 @@ class DocumentListSerializer(serializers.Serializer): count = Document.objects.filter(id__in=documents).count() if not count == len(documents): raise serializers.ValidationError( - f"Some documents in {name} don't exist or were " f"specified twice." + f"Some documents in {name} don't exist or were " f"specified twice.", ) def validate_documents(self, documents): @@ -331,7 +328,7 @@ class BulkEditSerializer(DocumentListSerializer): count = Tag.objects.filter(id__in=tags).count() if not count == len(tags): raise serializers.ValidationError( - f"Some tags in {name} don't exist or were specified twice." + f"Some tags in {name} don't exist or were specified twice.", ) def validate_method(self, method): @@ -456,7 +453,7 @@ class PostDocumentSerializer(serializers.Serializer): if not is_mime_type_supported(mime_type): raise serializers.ValidationError( - _("File type %(type)s not supported") % {"type": mime_type} + _("File type %(type)s not supported") % {"type": mime_type}, ) return document.name, document_data @@ -483,11 +480,13 @@ class PostDocumentSerializer(serializers.Serializer): class BulkDownloadSerializer(DocumentListSerializer): content = serializers.ChoiceField( - choices=["archive", "originals", "both"], default="archive" + choices=["archive", "originals", "both"], + default="archive", ) compression = serializers.ChoiceField( - choices=["none", "deflated", "bzip2", "lzma"], default="none" + choices=["none", "deflated", "bzip2", "lzma"], + default="none", ) def validate_compression(self, compression): diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index d0d28a2bc..1b3f3b577 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -1,24 +1,26 @@ import logging import os -from django.utils import termcolors from django.conf import settings -from django.contrib.admin.models import ADDITION, LogEntry +from django.contrib.admin.models import ADDITION +from django.contrib.admin.models import LogEntry from django.contrib.auth.models import User from django.contrib.contenttypes.models import ContentType -from django.db import models, DatabaseError +from django.db import DatabaseError +from django.db import models from django.db.models import Q from django.dispatch import receiver -from django.utils import termcolors, timezone +from django.utils import termcolors +from django.utils import timezone from filelock import FileLock from .. import matching -from ..file_handling import ( - delete_empty_directories, - create_source_path_directory, - generate_unique_filename, -) -from ..models import Document, Tag, MatchingModel +from ..file_handling import create_source_path_directory +from ..file_handling import delete_empty_directories +from ..file_handling import generate_unique_filename +from ..models import Document +from ..models import MatchingModel +from ..models import Tag logger = logging.getLogger("paperless.handlers") @@ -72,7 +74,7 @@ def set_correspondent( print( termcolors.colorize(str(document), fg="green") if color - else str(document) + else str(document), ) print(f"{base_url}/documents/{document.pk}") else: @@ -82,7 +84,7 @@ def set_correspondent( if color else str(document) ) - + f" [{document.pk}]" + + f" [{document.pk}]", ) print(f"Suggest correspondent {selected}") else: @@ -139,7 +141,7 @@ def set_document_type( print( termcolors.colorize(str(document), fg="green") if color - else str(document) + else str(document), ) print(f"{base_url}/documents/{document.pk}") else: @@ -149,7 +151,7 @@ def set_document_type( if color else str(document) ) - + f" [{document.pk}]" + + f" [{document.pk}]", ) print(f"Suggest document type {selected}") else: @@ -176,9 +178,9 @@ def set_tags( if replace: Document.tags.through.objects.filter(document=document).exclude( - Q(tag__is_inbox_tag=True) + Q(tag__is_inbox_tag=True), ).exclude( - Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO) + Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO), ).delete() current_tags = set(document.tags.all()) @@ -198,7 +200,7 @@ def set_tags( print( termcolors.colorize(str(document), fg="green") if color - else str(document) + else str(document), ) print(f"{base_url}/documents/{document.pk}") else: @@ -208,7 +210,7 @@ def set_tags( if color else str(document) ) - + f" [{document.pk}]" + + f" [{document.pk}]", ) if relevant_tags: print("Suggest tags: " + ", ".join([t.name for t in relevant_tags])) @@ -254,7 +256,7 @@ def cleanup_document_deletion(sender, instance, using, **kwargs): except OSError as e: logger.error( f"Failed to move {instance.source_path} to trash at " - f"{new_file_path}: {e}. Skipping cleanup!" + f"{new_file_path}: {e}. Skipping cleanup!", ) return @@ -270,16 +272,18 @@ def cleanup_document_deletion(sender, instance, using, **kwargs): except OSError as e: logger.warning( f"While deleting document {str(instance)}, the file " - f"{filename} could not be deleted: {e}" + f"{filename} could not be deleted: {e}", ) delete_empty_directories( - os.path.dirname(instance.source_path), root=settings.ORIGINALS_DIR + os.path.dirname(instance.source_path), + root=settings.ORIGINALS_DIR, ) if instance.has_archive_version: delete_empty_directories( - os.path.dirname(instance.archive_path), root=settings.ARCHIVE_DIR + os.path.dirname(instance.archive_path), + root=settings.ARCHIVE_DIR, ) @@ -297,7 +301,7 @@ def validate_move(instance, old_path, new_path): # Can't do anything if the new file already exists. Skip updating file. logger.warning( f"Document {str(instance)}: Cannot rename file " - f"since target path {new_path} already exists." + f"since target path {new_path} already exists.", ) raise CannotMoveFilesException() @@ -331,12 +335,11 @@ def update_filename_and_move_files(sender, instance, **kwargs): if instance.has_archive_version: instance.archive_filename = generate_unique_filename( - instance, archive_filename=True + instance, + archive_filename=True, ) - move_archive = ( - old_archive_filename != instance.archive_filename - ) # NOQA: E501 + move_archive = old_archive_filename != instance.archive_filename else: move_archive = False @@ -374,7 +377,7 @@ def update_filename_and_move_files(sender, instance, **kwargs): if move_archive and os.path.isfile(instance.archive_path): os.rename(instance.archive_path, old_archive_path) - except Exception as e: + except Exception: # This is fine, since: # A: if we managed to move source from A to B, we will also # manage to move it from B to A. If not, we have a serious @@ -393,14 +396,16 @@ def update_filename_and_move_files(sender, instance, **kwargs): # something has failed above. if not os.path.isfile(old_source_path): delete_empty_directories( - os.path.dirname(old_source_path), root=settings.ORIGINALS_DIR + os.path.dirname(old_source_path), + root=settings.ORIGINALS_DIR, ) if instance.has_archive_version and not os.path.isfile( - old_archive_path - ): # NOQA: E501 + old_archive_path, + ): delete_empty_directories( - os.path.dirname(old_archive_path), root=settings.ARCHIVE_DIR + os.path.dirname(old_archive_path), + root=settings.ARCHIVE_DIR, ) diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 569ebf0a7..b43f211de 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -3,13 +3,18 @@ import logging import tqdm from django.conf import settings from django.db.models.signals import post_save -from whoosh.writing import AsyncWriter - -from documents import index, sanity_checker -from documents.classifier import DocumentClassifier, load_classifier -from documents.consumer import Consumer, ConsumerError -from documents.models import Document, Tag, DocumentType, Correspondent +from documents import index +from documents import sanity_checker +from documents.classifier import DocumentClassifier +from documents.classifier import load_classifier +from documents.consumer import Consumer +from documents.consumer import ConsumerError +from documents.models import Correspondent +from documents.models import Document +from documents.models import DocumentType +from documents.models import Tag from documents.sanity_checker import SanityCheckFailedException +from whoosh.writing import AsyncWriter logger = logging.getLogger("paperless.tasks") @@ -47,7 +52,7 @@ def train_classifier(): try: if classifier.train(): logger.info( - "Saving updated classifier model to {}...".format(settings.MODEL_FILE) + "Saving updated classifier model to {}...".format(settings.MODEL_FILE), ) classifier.save() else: @@ -82,7 +87,7 @@ def consume_file( else: raise ConsumerError( "Unknown error: Returned document was null, but " - "no error message was given." + "no error message was given.", ) diff --git a/src/documents/tests/factories.py b/src/documents/tests/factories.py index c2907d932..83644c411 100644 --- a/src/documents/tests/factories.py +++ b/src/documents/tests/factories.py @@ -1,7 +1,8 @@ from factory import Faker from factory.django import DjangoModelFactory -from ..models import Document, Correspondent +from ..models import Correspondent +from ..models import Document class CorrespondentFactory(DjangoModelFactory): diff --git a/src/documents/tests/test_admin.py b/src/documents/tests/test_admin.py index 42e616e29..92e2d1f95 100644 --- a/src/documents/tests/test_admin.py +++ b/src/documents/tests/test_admin.py @@ -3,7 +3,6 @@ from unittest import mock from django.contrib.admin.sites import AdminSite from django.test import TestCase from django.utils import timezone - from documents import index from documents.admin import DocumentAdmin from documents.models import Document @@ -42,7 +41,8 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase): docs = [] for i in range(42): doc = Document.objects.create( - title="Many documents with the same title", checksum=f"{i:02}" + title="Many documents with the same title", + checksum=f"{i:02}", ) docs.append(doc) index.add_or_update_document(doc) @@ -61,6 +61,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase): def test_created(self): doc = Document.objects.create( - title="test", created=timezone.make_aware(timezone.datetime(2020, 4, 12)) + title="test", + created=timezone.make_aware(timezone.datetime(2020, 4, 12)), ) self.assertEqual(self.doc_admin.created_(doc), "2020-04-12") diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py index e384f8b39..ecfa84046 100644 --- a/src/documents/tests/test_api.py +++ b/src/documents/tests/test_api.py @@ -10,22 +10,20 @@ from unittest import mock import pytest from django.conf import settings from django.contrib.auth.models import User -from django.utils import timezone from django.test import override_settings +from django.utils import timezone +from documents import bulk_edit +from documents import index +from documents.models import Correspondent +from documents.models import Document +from documents.models import DocumentType +from documents.models import MatchingModel +from documents.models import SavedView +from documents.models import Tag +from documents.tests.utils import DirectoriesMixin from rest_framework.test import APITestCase from whoosh.writing import AsyncWriter -from documents import index, bulk_edit -from documents.models import ( - Document, - Correspondent, - DocumentType, - Tag, - SavedView, - MatchingModel, -) -from documents.tests.utils import DirectoriesMixin - class TestDocumentApi(DirectoriesMixin, APITestCase): def setUp(self): @@ -72,7 +70,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): returned_doc["title"] = "the new title" response = self.client.put( - "/api/documents/{}/".format(doc.pk), returned_doc, format="json" + "/api/documents/{}/".format(doc.pk), + returned_doc, + format="json", ) self.assertEqual(response.status_code, 200) @@ -127,7 +127,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertEqual(len(results[0]), 2) response = self.client.get( - "/api/documents/?fields=id,conteasdnt", format="json" + "/api/documents/?fields=id,conteasdnt", + format="json", ) self.assertEqual(response.status_code, 200) results = response.data["results"] @@ -162,7 +163,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): ) with open( - os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb" + os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)), + "wb", ) as f: f.write(content_thumbnail) @@ -206,7 +208,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertEqual(response.content, content_archive) response = self.client.get( - "/api/documents/{}/download/?original=true".format(doc.pk) + "/api/documents/{}/download/?original=true".format(doc.pk), ) self.assertEqual(response.status_code, 200) @@ -218,7 +220,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertEqual(response.content, content_archive) response = self.client.get( - "/api/documents/{}/preview/?original=true".format(doc.pk) + "/api/documents/{}/preview/?original=true".format(doc.pk), ) self.assertEqual(response.status_code, 200) @@ -227,7 +229,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): def test_document_actions_not_existing_file(self): doc = Document.objects.create( - title="none", filename=os.path.basename("asd"), mime_type="application/pdf" + title="none", + filename=os.path.basename("asd"), + mime_type="application/pdf", ) response = self.client.get("/api/documents/{}/download/".format(doc.pk)) @@ -242,13 +246,19 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): def test_document_filters(self): doc1 = Document.objects.create( - title="none1", checksum="A", mime_type="application/pdf" + title="none1", + checksum="A", + mime_type="application/pdf", ) doc2 = Document.objects.create( - title="none2", checksum="B", mime_type="application/pdf" + title="none2", + checksum="B", + mime_type="application/pdf", ) doc3 = Document.objects.create( - title="none3", checksum="C", mime_type="application/pdf" + title="none3", + checksum="C", + mime_type="application/pdf", ) tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True) @@ -273,7 +283,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id]) response = self.client.get( - "/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id) + "/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id), ) self.assertEqual(response.status_code, 200) results = response.data["results"] @@ -281,7 +291,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc3.id]) response = self.client.get( - "/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id) + "/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id), ) self.assertEqual(response.status_code, 200) results = response.data["results"] @@ -289,7 +299,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id]) response = self.client.get( - "/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id) + "/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id), ) self.assertEqual(response.status_code, 200) results = response.data["results"] @@ -297,14 +307,14 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertEqual(results[0]["id"], doc3.id) response = self.client.get( - "/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id) + "/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id), ) self.assertEqual(response.status_code, 200) results = response.data["results"] self.assertEqual(len(results), 0) response = self.client.get( - "/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id) + "/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id), ) self.assertEqual(response.status_code, 200) results = response.data["results"] @@ -317,7 +327,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc2.id]) response = self.client.get( - "/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id) + "/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id), ) self.assertEqual(response.status_code, 200) results = response.data["results"] @@ -325,7 +335,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertEqual(results[0]["id"], doc1.id) response = self.client.get( - "/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id) + "/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id), ) self.assertEqual(response.status_code, 200) results = response.data["results"] @@ -443,7 +453,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): for i in range(1, 6): response = self.client.get( - f"/api/documents/?query=content&page={i}&page_size=10" + f"/api/documents/?query=content&page={i}&page_size=10", ) results = response.data["results"] self.assertEqual(response.data["count"], 55) @@ -595,31 +605,35 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertCountEqual(search_query("&correspondent__id=" + str(c.id)), [d1.id]) self.assertCountEqual(search_query("&document_type__id=" + str(dt.id)), [d2.id]) self.assertCountEqual( - search_query("&correspondent__isnull"), [d2.id, d3.id, d4.id, d5.id] + search_query("&correspondent__isnull"), + [d2.id, d3.id, d4.id, d5.id], ) self.assertCountEqual( - search_query("&document_type__isnull"), [d1.id, d3.id, d4.id, d5.id] + search_query("&document_type__isnull"), + [d1.id, d3.id, d4.id, d5.id], ) self.assertCountEqual( - search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)), [d3.id] + search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)), + [d3.id], ) self.assertCountEqual(search_query("&tags__id__all=" + str(t.id)), [d3.id]) self.assertCountEqual( - search_query("&tags__id__all=" + str(t2.id)), [d3.id, d4.id] + search_query("&tags__id__all=" + str(t2.id)), + [d3.id, d4.id], ) self.assertIn( d4.id, search_query( "&created__date__lt=" - + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d") + + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"), ), ) self.assertNotIn( d4.id, search_query( "&created__date__gt=" - + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d") + + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"), ), ) @@ -627,40 +641,44 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): d4.id, search_query( "&created__date__lt=" - + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d") + + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"), ), ) self.assertIn( d4.id, search_query( "&created__date__gt=" - + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d") + + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"), ), ) self.assertIn( d5.id, search_query( - "&added__date__lt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d") + "&added__date__lt=" + + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"), ), ) self.assertNotIn( d5.id, search_query( - "&added__date__gt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d") + "&added__date__gt=" + + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"), ), ) self.assertNotIn( d5.id, search_query( - "&added__date__lt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d") + "&added__date__lt=" + + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"), ), ) self.assertIn( d5.id, search_query( - "&added__date__gt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d") + "&added__date__gt=" + + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"), ), ) @@ -700,18 +718,22 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): return [hit["id"] for hit in r.data["results"]] self.assertListEqual( - search_query("&ordering=archive_serial_number"), [d3.id, d1.id, d2.id] + search_query("&ordering=archive_serial_number"), + [d3.id, d1.id, d2.id], ) self.assertListEqual( - search_query("&ordering=-archive_serial_number"), [d2.id, d1.id, d3.id] + search_query("&ordering=-archive_serial_number"), + [d2.id, d1.id, d3.id], ) self.assertListEqual(search_query("&ordering=title"), [d3.id, d2.id, d1.id]) self.assertListEqual(search_query("&ordering=-title"), [d1.id, d2.id, d3.id]) self.assertListEqual( - search_query("&ordering=correspondent__name"), [d1.id, d3.id, d2.id] + search_query("&ordering=correspondent__name"), + [d1.id, d3.id, d2.id], ) self.assertListEqual( - search_query("&ordering=-correspondent__name"), [d2.id, d3.id, d1.id] + search_query("&ordering=-correspondent__name"), + [d2.id, d3.id, d1.id], ) def test_statistics(self): @@ -740,10 +762,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): def test_upload(self, m): with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" + os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), + "rb", ) as f: response = self.client.post( - "/api/documents/post_document/", {"document": f} + "/api/documents/post_document/", + {"document": f}, ) self.assertEqual(response.status_code, 200) @@ -761,7 +785,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): def test_upload_empty_metadata(self, m): with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" + os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), + "rb", ) as f: response = self.client.post( "/api/documents/post_document/", @@ -783,10 +808,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): def test_upload_invalid_form(self, m): with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" + os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), + "rb", ) as f: response = self.client.post( - "/api/documents/post_document/", {"documenst": f} + "/api/documents/post_document/", + {"documenst": f}, ) self.assertEqual(response.status_code, 400) m.assert_not_called() @@ -795,10 +822,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): def test_upload_invalid_file(self, m): with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb" + os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), + "rb", ) as f: response = self.client.post( - "/api/documents/post_document/", {"document": f} + "/api/documents/post_document/", + {"document": f}, ) self.assertEqual(response.status_code, 400) m.assert_not_called() @@ -806,7 +835,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.async_task") def test_upload_with_title(self, async_task): with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" + os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), + "rb", ) as f: response = self.client.post( "/api/documents/post_document/", @@ -824,10 +854,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): def test_upload_with_correspondent(self, async_task): c = Correspondent.objects.create(name="test-corres") with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" + os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), + "rb", ) as f: response = self.client.post( - "/api/documents/post_document/", {"document": f, "correspondent": c.id} + "/api/documents/post_document/", + {"document": f, "correspondent": c.id}, ) self.assertEqual(response.status_code, 200) @@ -840,10 +872,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.async_task") def test_upload_with_invalid_correspondent(self, async_task): with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" + os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), + "rb", ) as f: response = self.client.post( - "/api/documents/post_document/", {"document": f, "correspondent": 3456} + "/api/documents/post_document/", + {"document": f, "correspondent": 3456}, ) self.assertEqual(response.status_code, 400) @@ -853,10 +887,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): def test_upload_with_document_type(self, async_task): dt = DocumentType.objects.create(name="invoice") with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" + os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), + "rb", ) as f: response = self.client.post( - "/api/documents/post_document/", {"document": f, "document_type": dt.id} + "/api/documents/post_document/", + {"document": f, "document_type": dt.id}, ) self.assertEqual(response.status_code, 200) @@ -869,10 +905,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.async_task") def test_upload_with_invalid_document_type(self, async_task): with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" + os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), + "rb", ) as f: response = self.client.post( - "/api/documents/post_document/", {"document": f, "document_type": 34578} + "/api/documents/post_document/", + {"document": f, "document_type": 34578}, ) self.assertEqual(response.status_code, 400) @@ -883,10 +921,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): t1 = Tag.objects.create(name="tag1") t2 = Tag.objects.create(name="tag2") with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" + os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), + "rb", ) as f: response = self.client.post( - "/api/documents/post_document/", {"document": f, "tags": [t2.id, t1.id]} + "/api/documents/post_document/", + {"document": f, "tags": [t2.id, t1.id]}, ) self.assertEqual(response.status_code, 200) @@ -901,7 +941,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): t1 = Tag.objects.create(name="tag1") t2 = Tag.objects.create(name="tag2") with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" + os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), + "rb", ) as f: response = self.client.post( "/api/documents/post_document/", @@ -952,7 +993,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): def test_get_metadata_no_archive(self): doc = Document.objects.create( - title="test", filename="file.pdf", mime_type="application/pdf" + title="test", + filename="file.pdf", + mime_type="application/pdf", ) shutil.copy( @@ -999,7 +1042,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertEqual(response.status_code, 200) self.assertEqual( - response.data, {"correspondents": [], "tags": [], "document_types": []} + response.data, + {"correspondents": [], "tags": [], "document_types": []}, ) def test_get_suggestions_invalid_doc(self): @@ -1010,10 +1054,15 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.match_tags") @mock.patch("documents.views.match_document_types") def test_get_suggestions( - self, match_document_types, match_tags, match_correspondents + self, + match_document_types, + match_tags, + match_correspondents, ): doc = Document.objects.create( - title="test", mime_type="application/pdf", content="this is an invoice!" + title="test", + mime_type="application/pdf", + content="this is an invoice!", ) match_tags.return_value = [Tag(id=56), Tag(id=123)] match_document_types.return_value = [DocumentType(id=23)] @@ -1094,7 +1143,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertEqual(v1.user, self.user) response = self.client.patch( - f"/api/saved_views/{v1.id}/", {"show_in_sidebar": False}, format="json" + f"/api/saved_views/{v1.id}/", + {"show_in_sidebar": False}, + format="json", ) v1 = SavedView.objects.get(id=v1.id) @@ -1183,7 +1234,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): def test_regex_no_algorithm(self): for endpoint in ["correspondents", "tags", "document_types"]: response = self.client.post( - f"/api/{endpoint}/", {"name": "test", "match": "[0-9]"}, format="json" + f"/api/{endpoint}/", + {"name": "test", "match": "[0-9]"}, + format="json", ) self.assertEqual(response.status_code, 201, endpoint) @@ -1200,7 +1253,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): def test_tag_color(self): response = self.client.post( - "/api/tags/", {"name": "tag", "colour": 3}, format="json" + "/api/tags/", + {"name": "tag", "colour": 3}, + format="json", ) self.assertEqual(response.status_code, 201) self.assertEqual(Tag.objects.get(id=response.data["id"]).color, "#b2df8a") @@ -1213,14 +1268,17 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): def test_tag_color_invalid(self): response = self.client.post( - "/api/tags/", {"name": "tag", "colour": 34}, format="json" + "/api/tags/", + {"name": "tag", "colour": 34}, + format="json", ) self.assertEqual(response.status_code, 400) def test_tag_color_custom(self): tag = Tag.objects.create(name="test", color="#abcdef") self.assertEqual( - self.client.get(f"/api/tags/{tag.id}/", format="json").data["colour"], 1 + self.client.get(f"/api/tags/{tag.id}/", format="json").data["colour"], + 1, ) @@ -1236,32 +1294,42 @@ class TestDocumentApiV2(DirectoriesMixin, APITestCase): def test_tag_validate_color(self): self.assertEqual( self.client.post( - "/api/tags/", {"name": "test", "color": "#12fFaA"}, format="json" + "/api/tags/", + {"name": "test", "color": "#12fFaA"}, + format="json", ).status_code, 201, ) self.assertEqual( self.client.post( - "/api/tags/", {"name": "test1", "color": "abcdef"}, format="json" + "/api/tags/", + {"name": "test1", "color": "abcdef"}, + format="json", ).status_code, 400, ) self.assertEqual( self.client.post( - "/api/tags/", {"name": "test2", "color": "#abcdfg"}, format="json" + "/api/tags/", + {"name": "test2", "color": "#abcdfg"}, + format="json", ).status_code, 400, ) self.assertEqual( self.client.post( - "/api/tags/", {"name": "test3", "color": "#asd"}, format="json" + "/api/tags/", + {"name": "test3", "color": "#asd"}, + format="json", ).status_code, 400, ) self.assertEqual( self.client.post( - "/api/tags/", {"name": "test4", "color": "#12121212"}, format="json" + "/api/tags/", + {"name": "test4", "color": "#12121212"}, + format="json", ).status_code, 400, ) @@ -1313,10 +1381,16 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): self.t2 = Tag.objects.create(name="t2") self.doc1 = Document.objects.create(checksum="A", title="A") self.doc2 = Document.objects.create( - checksum="B", title="B", correspondent=self.c1, document_type=self.dt1 + checksum="B", + title="B", + correspondent=self.c1, + document_type=self.dt1, ) self.doc3 = Document.objects.create( - checksum="C", title="C", correspondent=self.c2, document_type=self.dt2 + checksum="C", + title="C", + correspondent=self.c2, + document_type=self.dt2, ) self.doc4 = Document.objects.create(checksum="D", title="D") self.doc5 = Document.objects.create(checksum="E", title="E") @@ -1327,7 +1401,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): def test_set_correspondent(self): self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 1) bulk_edit.set_correspondent( - [self.doc1.id, self.doc2.id, self.doc3.id], self.c2.id + [self.doc1.id, self.doc2.id, self.doc3.id], + self.c2.id, ) self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 3) self.async_task.assert_called_once() @@ -1345,7 +1420,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): def test_set_document_type(self): self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 1) bulk_edit.set_document_type( - [self.doc1.id, self.doc2.id, self.doc3.id], self.dt2.id + [self.doc1.id, self.doc2.id, self.doc3.id], + self.dt2.id, ) self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 3) self.async_task.assert_called_once() @@ -1363,7 +1439,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): def test_add_tag(self): self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 2) bulk_edit.add_tag( - [self.doc1.id, self.doc2.id, self.doc3.id, self.doc4.id], self.t1.id + [self.doc1.id, self.doc2.id, self.doc3.id, self.doc4.id], + self.t1.id, ) self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 4) self.async_task.assert_called_once() @@ -1415,7 +1492,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "documents": [self.doc1.id], "method": "set_correspondent", "parameters": {"correspondent": self.c1.id}, - } + }, ), content_type="application/json", ) @@ -1435,7 +1512,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "documents": [self.doc1.id], "method": "set_correspondent", "parameters": {"correspondent": None}, - } + }, ), content_type="application/json", ) @@ -1455,7 +1532,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "documents": [self.doc1.id], "method": "set_document_type", "parameters": {"document_type": self.dt1.id}, - } + }, ), content_type="application/json", ) @@ -1475,7 +1552,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "documents": [self.doc1.id], "method": "set_document_type", "parameters": {"document_type": None}, - } + }, ), content_type="application/json", ) @@ -1495,7 +1572,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "documents": [self.doc1.id], "method": "add_tag", "parameters": {"tag": self.t1.id}, - } + }, ), content_type="application/json", ) @@ -1515,7 +1592,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "documents": [self.doc1.id], "method": "remove_tag", "parameters": {"tag": self.t1.id}, - } + }, ), content_type="application/json", ) @@ -1538,7 +1615,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "add_tags": [self.t1.id], "remove_tags": [self.t2.id], }, - } + }, ), content_type="application/json", ) @@ -1555,7 +1632,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): response = self.client.post( "/api/documents/bulk_edit/", json.dumps( - {"documents": [self.doc1.id], "method": "delete", "parameters": {}} + {"documents": [self.doc1.id], "method": "delete", "parameters": {}}, ), content_type="application/json", ) @@ -1580,7 +1657,11 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): response = self.client.post( "/api/documents/bulk_edit/", json.dumps( - {"documents": [self.doc2.id], "method": "exterminate", "parameters": {}} + { + "documents": [self.doc2.id], + "method": "exterminate", + "parameters": {}, + }, ), content_type="application/json", ) @@ -1596,7 +1677,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "documents": [self.doc2.id], "method": "set_correspondent", "parameters": {"correspondent": 345657}, - } + }, ), content_type="application/json", ) @@ -1613,7 +1694,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "documents": [self.doc2.id], "method": "set_correspondent", "parameters": {}, - } + }, ), content_type="application/json", ) @@ -1628,7 +1709,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "documents": [self.doc2.id], "method": "set_document_type", "parameters": {"document_type": 345657}, - } + }, ), content_type="application/json", ) @@ -1645,7 +1726,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "documents": [self.doc2.id], "method": "set_document_type", "parameters": {}, - } + }, ), content_type="application/json", ) @@ -1660,7 +1741,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "documents": [self.doc2.id], "method": "add_tag", "parameters": {"tag": 345657}, - } + }, ), content_type="application/json", ) @@ -1672,7 +1753,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): response = self.client.post( "/api/documents/bulk_edit/", json.dumps( - {"documents": [self.doc2.id], "method": "add_tag", "parameters": {}} + {"documents": [self.doc2.id], "method": "add_tag", "parameters": {}}, ), content_type="application/json", ) @@ -1687,7 +1768,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "documents": [self.doc2.id], "method": "remove_tag", "parameters": {"tag": 345657}, - } + }, ), content_type="application/json", ) @@ -1699,7 +1780,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): response = self.client.post( "/api/documents/bulk_edit/", json.dumps( - {"documents": [self.doc2.id], "method": "remove_tag", "parameters": {}} + {"documents": [self.doc2.id], "method": "remove_tag", "parameters": {}}, ), content_type="application/json", ) @@ -1717,7 +1798,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "add_tags": [self.t2.id, 1657], "remove_tags": [1123123], }, - } + }, ), content_type="application/json", ) @@ -1731,7 +1812,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "documents": [self.doc2.id], "method": "modify_tags", "parameters": {"remove_tags": [1123123]}, - } + }, ), content_type="application/json", ) @@ -1744,7 +1825,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): "documents": [self.doc2.id], "method": "modify_tags", "parameters": {"add_tags": [self.t2.id, 1657]}, - } + }, ), content_type="application/json", ) @@ -1774,7 +1855,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): response = self.client.post( "/api/documents/selection_data/", json.dumps( - {"documents": [self.doc1.id, self.doc2.id, self.doc4.id, self.doc5.id]} + {"documents": [self.doc1.id, self.doc2.id, self.doc4.id, self.doc5.id]}, ), content_type="application/json", ) @@ -1856,7 +1937,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase): response = self.client.post( "/api/documents/bulk_download/", json.dumps( - {"documents": [self.doc2.id, self.doc3.id], "content": "originals"} + {"documents": [self.doc2.id, self.doc3.id], "content": "originals"}, ), content_type="application/json", ) @@ -1914,17 +1995,20 @@ class TestBulkDownload(DirectoriesMixin, APITestCase): with self.doc2.source_file as f: self.assertEqual( - f.read(), zipf.read("originals/2021-01-01 document A.pdf") + f.read(), + zipf.read("originals/2021-01-01 document A.pdf"), ) with self.doc3.archive_file as f: self.assertEqual( - f.read(), zipf.read("archive/2020-03-21 document B.pdf") + f.read(), + zipf.read("archive/2020-03-21 document B.pdf"), ) with self.doc3.source_file as f: self.assertEqual( - f.read(), zipf.read("originals/2020-03-21 document B.jpg") + f.read(), + zipf.read("originals/2020-03-21 document B.jpg"), ) def test_filename_clashes(self): @@ -1953,7 +2037,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase): response = self.client.post( "/api/documents/bulk_download/", json.dumps( - {"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"} + {"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"}, ), content_type="application/json", ) @@ -1968,13 +2052,16 @@ class TestApiAuth(APITestCase): self.assertEqual(self.client.get(f"/api/documents/{d.id}/").status_code, 401) self.assertEqual( - self.client.get(f"/api/documents/{d.id}/download/").status_code, 401 + self.client.get(f"/api/documents/{d.id}/download/").status_code, + 401, ) self.assertEqual( - self.client.get(f"/api/documents/{d.id}/preview/").status_code, 401 + self.client.get(f"/api/documents/{d.id}/preview/").status_code, + 401, ) self.assertEqual( - self.client.get(f"/api/documents/{d.id}/thumb/").status_code, 401 + self.client.get(f"/api/documents/{d.id}/thumb/").status_code, + 401, ) self.assertEqual(self.client.get("/api/tags/").status_code, 401) @@ -1987,10 +2074,12 @@ class TestApiAuth(APITestCase): self.assertEqual(self.client.get("/api/search/autocomplete/").status_code, 401) self.assertEqual(self.client.get("/api/documents/bulk_edit/").status_code, 401) self.assertEqual( - self.client.get("/api/documents/bulk_download/").status_code, 401 + self.client.get("/api/documents/bulk_download/").status_code, + 401, ) self.assertEqual( - self.client.get("/api/documents/selection_data/").status_code, 401 + self.client.get("/api/documents/selection_data/").status_code, + 401, ) def test_api_version_no_auth(self): diff --git a/src/documents/tests/test_checks.py b/src/documents/tests/test_checks.py index 7a1a81ec1..b7136a3dc 100644 --- a/src/documents/tests/test_checks.py +++ b/src/documents/tests/test_checks.py @@ -4,10 +4,11 @@ from unittest import mock from django.core.checks import Error from django.test import TestCase -from .factories import DocumentFactory -from .. import document_consumer_declaration -from ..checks import changed_password_check, parser_check +from ..checks import changed_password_check +from ..checks import parser_check from ..models import Document +from ..signals import document_consumer_declaration +from .factories import DocumentFactory class ChecksTestCase(TestCase): @@ -30,7 +31,7 @@ class ChecksTestCase(TestCase): [ Error( "No parsers found. This is a bug. The consumer won't be " - "able to consume any documents without parsers." - ) + "able to consume any documents without parsers.", + ), ], ) diff --git a/src/documents/tests/test_classifier.py b/src/documents/tests/test_classifier.py index dad8231a7..b03ecf7d3 100644 --- a/src/documents/tests/test_classifier.py +++ b/src/documents/tests/test_classifier.py @@ -5,14 +5,15 @@ from unittest import mock import pytest from django.conf import settings -from django.test import TestCase, override_settings - -from documents.classifier import ( - DocumentClassifier, - IncompatibleClassifierVersionError, - load_classifier, -) -from documents.models import Correspondent, Document, Tag, DocumentType +from django.test import override_settings +from django.test import TestCase +from documents.classifier import DocumentClassifier +from documents.classifier import IncompatibleClassifierVersionError +from documents.classifier import load_classifier +from documents.models import Correspondent +from documents.models import Document +from documents.models import DocumentType +from documents.models import Tag from documents.tests.utils import DirectoriesMixin @@ -23,26 +24,37 @@ class TestClassifier(DirectoriesMixin, TestCase): def generate_test_data(self): self.c1 = Correspondent.objects.create( - name="c1", matching_algorithm=Correspondent.MATCH_AUTO + name="c1", + matching_algorithm=Correspondent.MATCH_AUTO, ) self.c2 = Correspondent.objects.create(name="c2") self.c3 = Correspondent.objects.create( - name="c3", matching_algorithm=Correspondent.MATCH_AUTO + name="c3", + matching_algorithm=Correspondent.MATCH_AUTO, ) self.t1 = Tag.objects.create( - name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12 + name="t1", + matching_algorithm=Tag.MATCH_AUTO, + pk=12, ) self.t2 = Tag.objects.create( - name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True + name="t2", + matching_algorithm=Tag.MATCH_ANY, + pk=34, + is_inbox_tag=True, ) self.t3 = Tag.objects.create( - name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45 + name="t3", + matching_algorithm=Tag.MATCH_AUTO, + pk=45, ) self.dt = DocumentType.objects.create( - name="dt", matching_algorithm=DocumentType.MATCH_AUTO + name="dt", + matching_algorithm=DocumentType.MATCH_AUTO, ) self.dt2 = DocumentType.objects.create( - name="dt2", matching_algorithm=DocumentType.MATCH_AUTO + name="dt2", + matching_algorithm=DocumentType.MATCH_AUTO, ) self.doc1 = Document.objects.create( @@ -59,7 +71,9 @@ class TestClassifier(DirectoriesMixin, TestCase): checksum="B", ) self.doc_inbox = Document.objects.create( - title="doc235", content="aa", checksum="C" + title="doc235", + content="aa", + checksum="C", ) self.doc1.tags.add(self.t1) @@ -90,27 +104,33 @@ class TestClassifier(DirectoriesMixin, TestCase): self.generate_test_data() self.classifier.train() self.assertListEqual( - list(self.classifier.correspondent_classifier.classes_), [-1, self.c1.pk] + list(self.classifier.correspondent_classifier.classes_), + [-1, self.c1.pk], ) self.assertListEqual( - list(self.classifier.tags_binarizer.classes_), [self.t1.pk, self.t3.pk] + list(self.classifier.tags_binarizer.classes_), + [self.t1.pk, self.t3.pk], ) def testPredict(self): self.generate_test_data() self.classifier.train() self.assertEqual( - self.classifier.predict_correspondent(self.doc1.content), self.c1.pk + self.classifier.predict_correspondent(self.doc1.content), + self.c1.pk, ) self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None) self.assertListEqual( - self.classifier.predict_tags(self.doc1.content), [self.t1.pk] + self.classifier.predict_tags(self.doc1.content), + [self.t1.pk], ) self.assertListEqual( - self.classifier.predict_tags(self.doc2.content), [self.t1.pk, self.t3.pk] + self.classifier.predict_tags(self.doc2.content), + [self.t1.pk, self.t3.pk], ) self.assertEqual( - self.classifier.predict_document_type(self.doc1.content), self.dt.pk + self.classifier.predict_document_type(self.doc1.content), + self.dt.pk, ) self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None) @@ -133,7 +153,8 @@ class TestClassifier(DirectoriesMixin, TestCase): current_ver = DocumentClassifier.FORMAT_VERSION with mock.patch( - "documents.classifier.DocumentClassifier.FORMAT_VERSION", current_ver + 1 + "documents.classifier.DocumentClassifier.FORMAT_VERSION", + current_ver + 1, ): # assure that we won't load old classifiers. self.assertRaises(IncompatibleClassifierVersionError, classifier2.load) @@ -157,7 +178,7 @@ class TestClassifier(DirectoriesMixin, TestCase): self.assertFalse(new_classifier.train()) @override_settings( - MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle") + MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"), ) def test_load_and_classify(self): self.generate_test_data() @@ -169,7 +190,8 @@ class TestClassifier(DirectoriesMixin, TestCase): def test_one_correspondent_predict(self): c1 = Correspondent.objects.create( - name="c1", matching_algorithm=Correspondent.MATCH_AUTO + name="c1", + matching_algorithm=Correspondent.MATCH_AUTO, ) doc1 = Document.objects.create( title="doc1", @@ -183,7 +205,8 @@ class TestClassifier(DirectoriesMixin, TestCase): def test_one_correspondent_predict_manydocs(self): c1 = Correspondent.objects.create( - name="c1", matching_algorithm=Correspondent.MATCH_AUTO + name="c1", + matching_algorithm=Correspondent.MATCH_AUTO, ) doc1 = Document.objects.create( title="doc1", @@ -192,7 +215,9 @@ class TestClassifier(DirectoriesMixin, TestCase): checksum="A", ) doc2 = Document.objects.create( - title="doc2", content="this is a document from noone", checksum="B" + title="doc2", + content="this is a document from noone", + checksum="B", ) self.classifier.train() @@ -201,7 +226,8 @@ class TestClassifier(DirectoriesMixin, TestCase): def test_one_type_predict(self): dt = DocumentType.objects.create( - name="dt", matching_algorithm=DocumentType.MATCH_AUTO + name="dt", + matching_algorithm=DocumentType.MATCH_AUTO, ) doc1 = Document.objects.create( @@ -216,7 +242,8 @@ class TestClassifier(DirectoriesMixin, TestCase): def test_one_type_predict_manydocs(self): dt = DocumentType.objects.create( - name="dt", matching_algorithm=DocumentType.MATCH_AUTO + name="dt", + matching_algorithm=DocumentType.MATCH_AUTO, ) doc1 = Document.objects.create( @@ -227,7 +254,9 @@ class TestClassifier(DirectoriesMixin, TestCase): ) doc2 = Document.objects.create( - title="doc1", content="this is a document from c2", checksum="B" + title="doc1", + content="this is a document from c2", + checksum="B", ) self.classifier.train() @@ -238,7 +267,9 @@ class TestClassifier(DirectoriesMixin, TestCase): t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12) doc1 = Document.objects.create( - title="doc1", content="this is a document from c1", checksum="A" + title="doc1", + content="this is a document from c1", + checksum="A", ) doc1.tags.add(t1) @@ -249,7 +280,9 @@ class TestClassifier(DirectoriesMixin, TestCase): t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12) doc1 = Document.objects.create( - title="doc1", content="this is a document from c1", checksum="A" + title="doc1", + content="this is a document from c1", + checksum="A", ) self.classifier.train() @@ -260,7 +293,9 @@ class TestClassifier(DirectoriesMixin, TestCase): t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121) doc4 = Document.objects.create( - title="doc1", content="this is a document from c4", checksum="D" + title="doc1", + content="this is a document from c4", + checksum="D", ) doc4.tags.add(t1) @@ -273,16 +308,24 @@ class TestClassifier(DirectoriesMixin, TestCase): t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121) doc1 = Document.objects.create( - title="doc1", content="this is a document from c1", checksum="A" + title="doc1", + content="this is a document from c1", + checksum="A", ) doc2 = Document.objects.create( - title="doc1", content="this is a document from c2", checksum="B" + title="doc1", + content="this is a document from c2", + checksum="B", ) doc3 = Document.objects.create( - title="doc1", content="this is a document from c3", checksum="C" + title="doc1", + content="this is a document from c3", + checksum="C", ) doc4 = Document.objects.create( - title="doc1", content="this is a document from c4", checksum="D" + title="doc1", + content="this is a document from c4", + checksum="D", ) doc1.tags.add(t1) @@ -300,10 +343,14 @@ class TestClassifier(DirectoriesMixin, TestCase): t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12) doc1 = Document.objects.create( - title="doc1", content="this is a document from c1", checksum="A" + title="doc1", + content="this is a document from c1", + checksum="A", ) doc2 = Document.objects.create( - title="doc2", content="this is a document from c2", checksum="B" + title="doc2", + content="this is a document from c2", + checksum="B", ) doc1.tags.add(t1) @@ -316,10 +363,14 @@ class TestClassifier(DirectoriesMixin, TestCase): t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12) doc1 = Document.objects.create( - title="doc1", content="this is a document from c1", checksum="A" + title="doc1", + content="this is a document from c1", + checksum="A", ) doc2 = Document.objects.create( - title="doc2", content="this is a document from c2", checksum="B" + title="doc2", + content="this is a document from c2", + checksum="B", ) doc1.tags.add(t1) @@ -338,13 +389,15 @@ class TestClassifier(DirectoriesMixin, TestCase): load.assert_called_once() @override_settings( - CACHES={"default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}} + CACHES={ + "default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}, + }, ) @override_settings( - MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle") + MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"), ) @pytest.mark.skip( - reason="Disabled caching due to high memory usage - need to investigate." + reason="Disabled caching due to high memory usage - need to investigate.", ) def test_load_classifier_cached(self): classifier = load_classifier() diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index 6c79c7713..7225a7eef 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -6,13 +6,20 @@ from unittest import mock from unittest.mock import MagicMock from django.conf import settings -from django.test import TestCase, override_settings +from django.test import override_settings +from django.test import TestCase -from .utils import DirectoriesMixin -from ..consumer import Consumer, ConsumerError -from ..models import FileInfo, Tag, Correspondent, DocumentType, Document -from ..parsers import DocumentParser, ParseError +from ..consumer import Consumer +from ..consumer import ConsumerError +from ..models import Correspondent +from ..models import Document +from ..models import DocumentType +from ..models import FileInfo +from ..models import Tag +from ..parsers import DocumentParser +from ..parsers import ParseError from ..tasks import sanity_check +from .utils import DirectoriesMixin class TestAttributes(TestCase): @@ -33,12 +40,18 @@ class TestAttributes(TestCase): def test_guess_attributes_from_name_when_title_starts_with_dash(self): self._test_guess_attributes_from_name( - "- weird but should not break.pdf", None, "- weird but should not break", () + "- weird but should not break.pdf", + None, + "- weird but should not break", + (), ) def test_guess_attributes_from_name_when_title_ends_with_dash(self): self._test_guess_attributes_from_name( - "weird but should not break -.pdf", None, "weird but should not break -", () + "weird but should not break -.pdf", + None, + "weird but should not break -", + (), ) @@ -53,7 +66,12 @@ class TestFieldPermutations(TestCase): valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"] def _test_guessed_attributes( - self, filename, created=None, correspondent=None, title=None, tags=None + self, + filename, + created=None, + correspondent=None, + title=None, + tags=None, ): info = FileInfo.from_filename(filename) @@ -131,7 +149,7 @@ class TestFieldPermutations(TestCase): FILENAME_PARSE_TRANSFORMS=[ (all_patt, "all.gif"), (all_patt, "anotherall.gif"), - ] + ], ): info = FileInfo.from_filename(filename) self.assertEqual(info.title, "all") @@ -141,7 +159,7 @@ class TestFieldPermutations(TestCase): FILENAME_PARSE_TRANSFORMS=[ (none_patt, "none.gif"), (all_patt, "anotherall.gif"), - ] + ], ): info = FileInfo.from_filename(filename) self.assertEqual(info.title, "anotherall") @@ -238,7 +256,9 @@ class TestConsumer(DirectoriesMixin, TestCase): def make_dummy_parser(self, logging_group, progress_callback=None): return DummyParser( - logging_group, self.dirs.scratch_dir, self.get_test_archive_file() + logging_group, + self.dirs.scratch_dir, + self.get_test_archive_file(), ) def make_faulty_parser(self, logging_group, progress_callback=None): @@ -257,7 +277,7 @@ class TestConsumer(DirectoriesMixin, TestCase): "mime_types": {"application/pdf": ".pdf"}, "weight": 0, }, - ) + ), ] self.addCleanup(patcher.stop) @@ -282,7 +302,11 @@ class TestConsumer(DirectoriesMixin, TestCase): def get_test_archive_file(self): src = os.path.join( - os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf" + os.path.dirname(__file__), + "samples", + "documents", + "archive", + "0000001.pdf", ) dst = os.path.join(self.dirs.scratch_dir, "sample_archive.pdf") shutil.copy(src, dst) @@ -296,7 +320,8 @@ class TestConsumer(DirectoriesMixin, TestCase): self.assertEqual(document.content, "The Text") self.assertEqual( - document.title, os.path.splitext(os.path.basename(filename))[0] + document.title, + os.path.splitext(os.path.basename(filename))[0], ) self.assertIsNone(document.correspondent) self.assertIsNone(document.document_type) @@ -339,7 +364,8 @@ class TestConsumer(DirectoriesMixin, TestCase): override_filename = "Statement for November.pdf" document = self.consumer.try_consume_file( - filename, override_filename=override_filename + filename, + override_filename=override_filename, ) self.assertEqual(document.title, "Statement for November") @@ -348,7 +374,8 @@ class TestConsumer(DirectoriesMixin, TestCase): def testOverrideTitle(self): document = self.consumer.try_consume_file( - self.get_test_file(), override_title="Override Title" + self.get_test_file(), + override_title="Override Title", ) self.assertEqual(document.title, "Override Title") self._assert_first_last_send_progress() @@ -357,7 +384,8 @@ class TestConsumer(DirectoriesMixin, TestCase): c = Correspondent.objects.create(name="test") document = self.consumer.try_consume_file( - self.get_test_file(), override_correspondent_id=c.pk + self.get_test_file(), + override_correspondent_id=c.pk, ) self.assertEqual(document.correspondent.id, c.id) self._assert_first_last_send_progress() @@ -366,7 +394,8 @@ class TestConsumer(DirectoriesMixin, TestCase): dt = DocumentType.objects.create(name="test") document = self.consumer.try_consume_file( - self.get_test_file(), override_document_type_id=dt.pk + self.get_test_file(), + override_document_type_id=dt.pk, ) self.assertEqual(document.document_type.id, dt.id) self._assert_first_last_send_progress() @@ -376,7 +405,8 @@ class TestConsumer(DirectoriesMixin, TestCase): t2 = Tag.objects.create(name="t2") t3 = Tag.objects.create(name="t3") document = self.consumer.try_consume_file( - self.get_test_file(), override_tag_ids=[t1.id, t3.id] + self.get_test_file(), + override_tag_ids=[t1.id, t3.id], ) self.assertIn(t1, document.tags.all()) @@ -446,7 +476,7 @@ class TestConsumer(DirectoriesMixin, TestCase): "mime_types": {"application/pdf": ".pdf"}, "weight": 0, }, - ) + ), ] self.assertRaisesMessage( @@ -595,16 +625,16 @@ class TestConsumer(DirectoriesMixin, TestCase): "mime_types": {"application/pdf": ".pdf", "image/png": ".png"}, "weight": 0, }, - ) + ), ] doc1 = self.consumer.try_consume_file( - os.path.join(settings.CONSUMPTION_DIR, "simple.png") + os.path.join(settings.CONSUMPTION_DIR, "simple.png"), ) doc2 = self.consumer.try_consume_file( - os.path.join(settings.CONSUMPTION_DIR, "simple.pdf") + os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"), ) doc3 = self.consumer.try_consume_file( - os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf") + os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"), ) self.assertEqual(doc1.filename, "simple.png") @@ -691,7 +721,9 @@ class PostConsumeTestCase(TestCase): with override_settings(POST_CONSUME_SCRIPT=script.name): c = Correspondent.objects.create(name="my_bank") doc = Document.objects.create( - title="Test", mime_type="application/pdf", correspondent=c + title="Test", + mime_type="application/pdf", + correspondent=c, ) tag1 = Tag.objects.create(name="a") tag2 = Tag.objects.create(name="b") diff --git a/src/documents/tests/test_date_parsing.py b/src/documents/tests/test_date_parsing.py index d5dbaf60b..f5987633f 100644 --- a/src/documents/tests/test_date_parsing.py +++ b/src/documents/tests/test_date_parsing.py @@ -5,15 +5,16 @@ from uuid import uuid4 from dateutil import tz from django.conf import settings -from django.test import TestCase, override_settings - +from django.test import override_settings +from django.test import TestCase from documents.parsers import parse_date class TestDate(TestCase): SAMPLE_FILES = os.path.join( - os.path.dirname(__file__), "../../paperless_tesseract/tests/samples" + os.path.dirname(__file__), + "../../paperless_tesseract/tests/samples", ) SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8]) @@ -111,11 +112,11 @@ class TestDate(TestCase): @override_settings(FILENAME_DATE_ORDER="YMD") def test_filename_date_parse_invalid(self, *args): self.assertIsNone( - parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here") + parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"), ) @override_settings( - IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)) + IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)), ) def test_ignored_dates(self, *args): text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem " "ipsum" diff --git a/src/documents/tests/test_document_model.py b/src/documents/tests/test_document_model.py index 3a7a88b87..a99d6dd18 100644 --- a/src/documents/tests/test_document_model.py +++ b/src/documents/tests/test_document_model.py @@ -3,10 +3,12 @@ import tempfile from pathlib import Path from unittest import mock -from django.test import TestCase, override_settings +from django.test import override_settings +from django.test import TestCase from django.utils import timezone -from ..models import Document, Correspondent +from ..models import Correspondent +from ..models import Document class TestDocument(TestCase): diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 6ffa4481d..7743458b4 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -9,17 +9,19 @@ from unittest import mock from django.conf import settings from django.db import DatabaseError -from django.test import TestCase, override_settings +from django.test import override_settings +from django.test import TestCase from django.utils import timezone +from ..file_handling import create_source_path_directory +from ..file_handling import delete_empty_directories +from ..file_handling import generate_filename +from ..file_handling import generate_unique_filename +from ..models import Correspondent +from ..models import Document +from ..models import DocumentType +from ..models import Tag from .utils import DirectoriesMixin -from ..file_handling import ( - generate_filename, - create_source_path_directory, - delete_empty_directories, - generate_unique_filename, -) -from ..models import Document, Correspondent, Tag, DocumentType class TestFileHandling(DirectoriesMixin, TestCase): @@ -34,7 +36,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): document.storage_type = Document.STORAGE_TYPE_GPG self.assertEqual( - generate_filename(document), "{:07d}.pdf.gpg".format(document.pk) + generate_filename(document), + "{:07d}.pdf.gpg".format(document.pk), ) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") @@ -75,7 +78,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) self.assertEqual( - os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True + os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), + True, ) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") @@ -93,7 +97,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Test source_path self.assertEqual( - document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf" + document.source_path, + settings.ORIGINALS_DIR + "/none/none.pdf", ) # Make the folder read- and execute-only (no writing and no renaming) @@ -105,7 +110,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Check proper handling of files self.assertEqual( - os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True + os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), + True, ) self.assertEqual(document.filename, "none/none.pdf") @@ -145,7 +151,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Check proper handling of files self.assertTrue(os.path.isfile(document.source_path)) self.assertEqual( - os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True + os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), + True, ) self.assertEqual(document.filename, "none/none.pdf") @@ -167,7 +174,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): pk = document.pk document.delete() self.assertEqual( - os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False + os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), + False, ) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) @@ -192,7 +200,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none/none.pdf"), False) document.delete() self.assertEqual( - os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False + os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), + False, ) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none.pdf"), True) @@ -363,7 +372,9 @@ class TestFileHandling(DirectoriesMixin, TestCase): self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf") doc = Document.objects.create( - title="doc2", checksum="B", mime_type="application/pdf" + title="doc2", + checksum="B", + mime_type="application/pdf", ) self.assertEqual(generate_filename(doc), "doc2.pdf") @@ -380,12 +391,14 @@ class TestFileHandling(DirectoriesMixin, TestCase): ) @override_settings( - PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}" + PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}", ) def test_created_year_month_day(self): d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1)) doc1 = Document.objects.create( - title="doc1", mime_type="application/pdf", created=d1 + title="doc1", + mime_type="application/pdf", + created=d1, ) self.assertEqual(generate_filename(doc1), "2020-03-06.pdf") @@ -395,12 +408,14 @@ class TestFileHandling(DirectoriesMixin, TestCase): self.assertEqual(generate_filename(doc1), "2020-11-16.pdf") @override_settings( - PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}" + PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}", ) def test_added_year_month_day(self): d1 = timezone.make_aware(datetime.datetime(232, 1, 9, 1, 1, 1)) doc1 = Document.objects.create( - title="doc1", mime_type="application/pdf", added=d1 + title="doc1", + mime_type="application/pdf", + added=d1, ) self.assertEqual(generate_filename(doc1), "232-01-09.pdf") @@ -410,7 +425,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): self.assertEqual(generate_filename(doc1), "2020-11-16.pdf") @override_settings( - PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}" + PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}", ) def test_nested_directory_cleanup(self): document = Document() @@ -431,7 +446,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): document.delete() self.assertEqual( - os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False + os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), + False, ) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) @@ -456,7 +472,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): os.makedirs(os.path.join(tmp, "notempty", "empty")) delete_empty_directories( - os.path.join(tmp, "notempty", "empty"), root=settings.ORIGINALS_DIR + os.path.join(tmp, "notempty", "empty"), + root=settings.ORIGINALS_DIR, ) self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True) self.assertEqual(os.path.isfile(os.path.join(tmp, "notempty", "file")), True) @@ -483,10 +500,16 @@ class TestFileHandling(DirectoriesMixin, TestCase): @override_settings(PAPERLESS_FILENAME_FORMAT="{title}") def test_duplicates(self): document = Document.objects.create( - mime_type="application/pdf", title="qwe", checksum="A", pk=1 + mime_type="application/pdf", + title="qwe", + checksum="A", + pk=1, ) document2 = Document.objects.create( - mime_type="application/pdf", title="qwe", checksum="B", pk=2 + mime_type="application/pdf", + title="qwe", + checksum="B", + pk=2, ) Path(document.source_path).touch() Path(document2.source_path).touch() @@ -584,10 +607,12 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase): self.assertTrue(os.path.isfile(doc.source_path)) self.assertTrue(os.path.isfile(doc.archive_path)) self.assertEqual( - doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf") + doc.source_path, + os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"), ) self.assertEqual( - doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf") + doc.archive_path, + os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"), ) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") @@ -851,7 +876,10 @@ class TestFilenameGeneration(TestCase): def test_invalid_characters(self): doc = Document.objects.create( - title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1" + title="This. is the title.", + mime_type="application/pdf", + pk=1, + checksum="1", ) self.assertEqual(generate_filename(doc), "This. is the title.pdf") @@ -877,7 +905,9 @@ class TestFilenameGeneration(TestCase): def run(): doc = Document.objects.create( - checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow" + checksum=str(uuid.uuid4()), + title=str(uuid.uuid4()), + content="wow", ) doc.filename = generate_unique_filename(doc) Path(doc.thumbnail_path).touch() diff --git a/src/documents/tests/test_importer.py b/src/documents/tests/test_importer.py index 73215173a..5101a269f 100644 --- a/src/documents/tests/test_importer.py +++ b/src/documents/tests/test_importer.py @@ -1,7 +1,7 @@ from django.core.management.base import CommandError from django.test import TestCase - from documents.settings import EXPORTER_FILE_NAME + from ..management.commands.document_importer import Command @@ -12,7 +12,9 @@ class TestImporter(TestCase): def test_check_manifest_exists(self): cmd = Command() self.assertRaises( - CommandError, cmd._check_manifest_exists, "/tmp/manifest.json" + CommandError, + cmd._check_manifest_exists, + "/tmp/manifest.json", ) def test_check_manifest(self): @@ -26,11 +28,11 @@ class TestImporter(TestCase): self.assertTrue("The manifest file contains a record" in str(cm.exception)) cmd.manifest = [ - {"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"} + {"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"}, ] # self.assertRaises(CommandError, cmd._check_manifest) with self.assertRaises(CommandError) as cm: cmd._check_manifest() self.assertTrue( - 'The manifest file refers to "noexist.pdf"' in str(cm.exception) + 'The manifest file refers to "noexist.pdf"' in str(cm.exception), ) diff --git a/src/documents/tests/test_index.py b/src/documents/tests/test_index.py index 31ad2aebf..696648427 100644 --- a/src/documents/tests/test_index.py +++ b/src/documents/tests/test_index.py @@ -1,5 +1,4 @@ from django.test import TestCase - from documents import index from documents.models import Document from documents.tests.utils import DirectoriesMixin @@ -9,7 +8,9 @@ class TestAutoComplete(DirectoriesMixin, TestCase): def test_auto_complete(self): doc1 = Document.objects.create( - title="doc1", checksum="A", content="test test2 test3" + title="doc1", + checksum="A", + content="test test2 test3", ) doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2") doc3 = Document.objects.create(title="doc3", checksum="C", content="test2") @@ -21,10 +22,12 @@ class TestAutoComplete(DirectoriesMixin, TestCase): ix = index.open_index() self.assertListEqual( - index.autocomplete(ix, "tes"), [b"test3", b"test", b"test2"] + index.autocomplete(ix, "tes"), + [b"test3", b"test", b"test2"], ) self.assertListEqual( - index.autocomplete(ix, "tes", limit=3), [b"test3", b"test", b"test2"] + index.autocomplete(ix, "tes", limit=3), + [b"test3", b"test", b"test2"], ) self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"]) self.assertListEqual(index.autocomplete(ix, "tes", limit=0), []) diff --git a/src/documents/tests/test_management.py b/src/documents/tests/test_management.py index f3c3a3fae..7cb2e22d7 100644 --- a/src/documents/tests/test_management.py +++ b/src/documents/tests/test_management.py @@ -1,16 +1,14 @@ -import hashlib -import tempfile import filecmp +import hashlib import os import shutil +import tempfile from pathlib import Path from unittest import mock -from django.test import TestCase, override_settings - - from django.core.management import call_command - +from django.test import override_settings +from django.test import TestCase from documents.file_handling import generate_filename from documents.management.commands.document_archiver import handle_document from documents.models import Document @@ -34,7 +32,8 @@ class TestArchiver(DirectoriesMixin, TestCase): doc = self.make_models() shutil.copy( - sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf") + sample_file, + os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"), ) call_command("document_archiver") @@ -43,7 +42,8 @@ class TestArchiver(DirectoriesMixin, TestCase): doc = self.make_models() shutil.copy( - sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf") + sample_file, + os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"), ) handle_document(doc.pk) @@ -90,7 +90,8 @@ class TestArchiver(DirectoriesMixin, TestCase): ) shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf")) shutil.copy( - sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf") + sample_file, + os.path.join(self.dirs.originals_dir, f"document_01.pdf"), ) handle_document(doc2.pk) @@ -120,7 +121,9 @@ class TestDecryptDocuments(TestCase): os.makedirs(thumb_dir, exist_ok=True) override_settings( - ORIGINALS_DIR=originals_dir, THUMBNAIL_DIR=thumb_dir, PASSPHRASE="test" + ORIGINALS_DIR=originals_dir, + THUMBNAIL_DIR=thumb_dir, + PASSPHRASE="test", ).enable() doc = Document.objects.create( @@ -206,7 +209,7 @@ class TestRenamer(DirectoriesMixin, TestCase): class TestCreateClassifier(TestCase): @mock.patch( - "documents.management.commands.document_create_classifier.train_classifier" + "documents.management.commands.document_create_classifier.train_classifier", ) def test_create_classifier(self, m): call_command("document_create_classifier") @@ -224,7 +227,10 @@ class TestSanityChecker(DirectoriesMixin, TestCase): def test_errors(self): doc = Document.objects.create( - title="test", content="test", filename="test.pdf", checksum="abc" + title="test", + content="test", + filename="test.pdf", + checksum="abc", ) Path(doc.source_path).touch() Path(doc.thumbnail_path).touch() diff --git a/src/documents/tests/test_management_consumer.py b/src/documents/tests/test_management_consumer.py index 31ab69339..e4d772730 100644 --- a/src/documents/tests/test_management_consumer.py +++ b/src/documents/tests/test_management_consumer.py @@ -6,12 +6,13 @@ from time import sleep from unittest import mock from django.conf import settings -from django.core.management import call_command, CommandError -from django.test import override_settings, TransactionTestCase - -from documents.models import Tag +from django.core.management import call_command +from django.core.management import CommandError +from django.test import override_settings +from django.test import TransactionTestCase from documents.consumer import ConsumerError from documents.management.commands import document_consumer +from documents.models import Tag from documents.tests.utils import DirectoriesMixin @@ -41,7 +42,7 @@ class ConsumerMixin: super(ConsumerMixin, self).setUp() self.t = None patcher = mock.patch( - "documents.management.commands.document_consumer.async_task" + "documents.management.commands.document_consumer.async_task", ) self.task_mock = patcher.start() self.addCleanup(patcher.stop) @@ -208,13 +209,16 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase): self.t_start() shutil.copy( - self.sample_file, os.path.join(self.dirs.consumption_dir, ".DS_STORE") + self.sample_file, + os.path.join(self.dirs.consumption_dir, ".DS_STORE"), ) shutil.copy( - self.sample_file, os.path.join(self.dirs.consumption_dir, "my_file.pdf") + self.sample_file, + os.path.join(self.dirs.consumption_dir, "my_file.pdf"), ) shutil.copy( - self.sample_file, os.path.join(self.dirs.consumption_dir, "._my_file.pdf") + self.sample_file, + os.path.join(self.dirs.consumption_dir, "._my_file.pdf"), ) shutil.copy( self.sample_file, @@ -258,7 +262,9 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase): @override_settings( - CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=3, CONSUMER_POLLING_RETRY_COUNT=20 + CONSUMER_POLLING=1, + CONSUMER_POLLING_DELAY=3, + CONSUMER_POLLING_RETRY_COUNT=20, ) class TestConsumerPolling(TestConsumer): # just do all the tests with polling @@ -319,7 +325,9 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase): self.assertCountEqual(kwargs["override_tag_ids"], tag_ids) @override_settings( - CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20 + CONSUMER_POLLING=1, + CONSUMER_POLLING_DELAY=1, + CONSUMER_POLLING_RETRY_COUNT=20, ) def test_consume_file_with_path_tags_polling(self): self.test_consume_file_with_path_tags() diff --git a/src/documents/tests/test_management_exporter.py b/src/documents/tests/test_management_exporter.py index e833b0eef..23cf1f225 100644 --- a/src/documents/tests/test_management_exporter.py +++ b/src/documents/tests/test_management_exporter.py @@ -7,13 +7,17 @@ from pathlib import Path from unittest import mock from django.core.management import call_command -from django.test import TestCase, override_settings - +from django.test import override_settings +from django.test import TestCase from documents.management.commands import document_exporter -from documents.models import Document, Tag, DocumentType, Correspondent +from documents.models import Correspondent +from documents.models import Document +from documents.models import DocumentType +from documents.models import Tag from documents.sanity_checker import check_sanity from documents.settings import EXPORTER_FILE_NAME -from documents.tests.utils import DirectoriesMixin, paperless_environment +from documents.tests.utils import DirectoriesMixin +from documents.tests.utils import paperless_environment class TestExportImport(DirectoriesMixin, TestCase): @@ -66,8 +70,9 @@ class TestExportImport(DirectoriesMixin, TestCase): def _get_document_from_manifest(self, manifest, id): f = list( filter( - lambda d: d["model"] == "documents.document" and d["pk"] == id, manifest - ) + lambda d: d["model"] == "documents.document" and d["pk"] == id, + manifest, + ), ) if len(f) == 1: return f[0] @@ -76,7 +81,10 @@ class TestExportImport(DirectoriesMixin, TestCase): @override_settings(PASSPHRASE="test") def _do_export( - self, use_filename_format=False, compare_checksums=False, delete=False + self, + use_filename_format=False, + compare_checksums=False, + delete=False, ): args = ["document_exporter", self.target] if use_filename_format: @@ -104,7 +112,8 @@ class TestExportImport(DirectoriesMixin, TestCase): self.assertEqual(len(manifest), 8) self.assertEqual( - len(list(filter(lambda e: e["model"] == "documents.document", manifest))), 4 + len(list(filter(lambda e: e["model"] == "documents.document", manifest))), + 4, ) self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json"))) @@ -129,7 +138,8 @@ class TestExportImport(DirectoriesMixin, TestCase): for element in manifest: if element["model"] == "documents.document": fname = os.path.join( - self.target, element[document_exporter.EXPORTER_FILE_NAME] + self.target, + element[document_exporter.EXPORTER_FILE_NAME], ) self.assertTrue(os.path.exists(fname)) self.assertTrue( @@ -137,8 +147,8 @@ class TestExportImport(DirectoriesMixin, TestCase): os.path.join( self.target, element[document_exporter.EXPORTER_THUMBNAIL_NAME], - ) - ) + ), + ), ) with open(fname, "rb") as f: @@ -146,12 +156,14 @@ class TestExportImport(DirectoriesMixin, TestCase): self.assertEqual(checksum, element["fields"]["checksum"]) self.assertEqual( - element["fields"]["storage_type"], Document.STORAGE_TYPE_UNENCRYPTED + element["fields"]["storage_type"], + Document.STORAGE_TYPE_UNENCRYPTED, ) if document_exporter.EXPORTER_ARCHIVE_NAME in element: fname = os.path.join( - self.target, element[document_exporter.EXPORTER_ARCHIVE_NAME] + self.target, + element[document_exporter.EXPORTER_ARCHIVE_NAME], ) self.assertTrue(os.path.exists(fname)) @@ -188,7 +200,7 @@ class TestExportImport(DirectoriesMixin, TestCase): ) with override_settings( - PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}" + PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}", ): self.test_exporter(use_filename_format=True) @@ -205,7 +217,7 @@ class TestExportImport(DirectoriesMixin, TestCase): st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime with mock.patch( - "documents.management.commands.document_exporter.shutil.copy2" + "documents.management.commands.document_exporter.shutil.copy2", ) as m: self._do_export() m.assert_not_called() @@ -216,7 +228,7 @@ class TestExportImport(DirectoriesMixin, TestCase): Path(self.d1.source_path).touch() with mock.patch( - "documents.management.commands.document_exporter.shutil.copy2" + "documents.management.commands.document_exporter.shutil.copy2", ) as m: self._do_export() self.assertEqual(m.call_count, 1) @@ -239,7 +251,7 @@ class TestExportImport(DirectoriesMixin, TestCase): self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json"))) with mock.patch( - "documents.management.commands.document_exporter.shutil.copy2" + "documents.management.commands.document_exporter.shutil.copy2", ) as m: self._do_export() m.assert_not_called() @@ -250,7 +262,7 @@ class TestExportImport(DirectoriesMixin, TestCase): self.d2.save() with mock.patch( - "documents.management.commands.document_exporter.shutil.copy2" + "documents.management.commands.document_exporter.shutil.copy2", ) as m: self._do_export(compare_checksums=True) self.assertEqual(m.call_count, 1) @@ -270,26 +282,29 @@ class TestExportImport(DirectoriesMixin, TestCase): doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id) self.assertTrue( os.path.isfile( - os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]) - ) + os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]), + ), ) self.d3.delete() manifest = self._do_export() self.assertRaises( - ValueError, self._get_document_from_manifest, manifest, self.d3.id + ValueError, + self._get_document_from_manifest, + manifest, + self.d3.id, ) self.assertTrue( os.path.isfile( - os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]) - ) + os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]), + ), ) manifest = self._do_export(delete=True) self.assertFalse( os.path.isfile( - os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]) - ) + os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]), + ), ) self.assertTrue(len(manifest), 6) @@ -316,7 +331,7 @@ class TestExportImport(DirectoriesMixin, TestCase): self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json"))) self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf"))) self.assertTrue( - os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")) + os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")), ) def test_export_missing_files(self): diff --git a/src/documents/tests/test_management_retagger.py b/src/documents/tests/test_management_retagger.py index 77fc9d2ad..18b55626f 100644 --- a/src/documents/tests/test_management_retagger.py +++ b/src/documents/tests/test_management_retagger.py @@ -1,35 +1,50 @@ from django.core.management import call_command from django.test import TestCase - -from documents.models import Document, Tag, Correspondent, DocumentType +from documents.models import Correspondent +from documents.models import Document +from documents.models import DocumentType +from documents.models import Tag from documents.tests.utils import DirectoriesMixin class TestRetagger(DirectoriesMixin, TestCase): def make_models(self): self.d1 = Document.objects.create( - checksum="A", title="A", content="first document" + checksum="A", + title="A", + content="first document", ) self.d2 = Document.objects.create( - checksum="B", title="B", content="second document" + checksum="B", + title="B", + content="second document", ) self.d3 = Document.objects.create( - checksum="C", title="C", content="unrelated document" + checksum="C", + title="C", + content="unrelated document", ) self.d4 = Document.objects.create( - checksum="D", title="D", content="auto document" + checksum="D", + title="D", + content="auto document", ) self.tag_first = Tag.objects.create( - name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY + name="tag1", + match="first", + matching_algorithm=Tag.MATCH_ANY, ) self.tag_second = Tag.objects.create( - name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY + name="tag2", + match="second", + matching_algorithm=Tag.MATCH_ANY, ) self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True) self.tag_no_match = Tag.objects.create(name="test2") self.tag_auto = Tag.objects.create( - name="tagauto", matching_algorithm=Tag.MATCH_AUTO + name="tagauto", + matching_algorithm=Tag.MATCH_AUTO, ) self.d3.tags.add(self.tag_inbox) @@ -37,17 +52,25 @@ class TestRetagger(DirectoriesMixin, TestCase): self.d4.tags.add(self.tag_auto) self.correspondent_first = Correspondent.objects.create( - name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY + name="c1", + match="first", + matching_algorithm=Correspondent.MATCH_ANY, ) self.correspondent_second = Correspondent.objects.create( - name="c2", match="second", matching_algorithm=Correspondent.MATCH_ANY + name="c2", + match="second", + matching_algorithm=Correspondent.MATCH_ANY, ) self.doctype_first = DocumentType.objects.create( - name="dt1", match="first", matching_algorithm=DocumentType.MATCH_ANY + name="dt1", + match="first", + matching_algorithm=DocumentType.MATCH_ANY, ) self.doctype_second = DocumentType.objects.create( - name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY + name="dt2", + match="second", + matching_algorithm=DocumentType.MATCH_ANY, ) def get_updated_docs(self): @@ -98,10 +121,12 @@ class TestRetagger(DirectoriesMixin, TestCase): self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id)) self.assertCountEqual( - [tag.id for tag in d_first.tags.all()], [self.tag_first.id] + [tag.id for tag in d_first.tags.all()], + [self.tag_first.id], ) self.assertCountEqual( - [tag.id for tag in d_second.tags.all()], [self.tag_second.id] + [tag.id for tag in d_second.tags.all()], + [self.tag_second.id], ) self.assertCountEqual( [tag.id for tag in d_unrelated.tags.all()], @@ -133,7 +158,10 @@ class TestRetagger(DirectoriesMixin, TestCase): def test_add_tags_suggest_url(self): call_command( - "document_retagger", "--tags", "--suggest", "--base-url=http://localhost" + "document_retagger", + "--tags", + "--suggest", + "--base-url=http://localhost", ) d_first, d_second, d_unrelated, d_auto = self.get_updated_docs() diff --git a/src/documents/tests/test_management_superuser.py b/src/documents/tests/test_management_superuser.py index fa62a9f14..b4e91fd66 100644 --- a/src/documents/tests/test_management_superuser.py +++ b/src/documents/tests/test_management_superuser.py @@ -5,9 +5,11 @@ from unittest import mock from django.contrib.auth.models import User from django.core.management import call_command from django.test import TestCase - from documents.management.commands.document_thumbnails import _process_document -from documents.models import Document, Tag, Correspondent, DocumentType +from documents.models import Correspondent +from documents.models import Document +from documents.models import DocumentType +from documents.models import Tag from documents.tests.utils import DirectoriesMixin diff --git a/src/documents/tests/test_management_thumbnails.py b/src/documents/tests/test_management_thumbnails.py index 6af94ce99..daf56c586 100644 --- a/src/documents/tests/test_management_thumbnails.py +++ b/src/documents/tests/test_management_thumbnails.py @@ -4,9 +4,11 @@ from unittest import mock from django.core.management import call_command from django.test import TestCase - from documents.management.commands.document_thumbnails import _process_document -from documents.models import Document, Tag, Correspondent, DocumentType +from documents.models import Correspondent +from documents.models import Document +from documents.models import DocumentType +from documents.models import Tag from documents.tests.utils import DirectoriesMixin diff --git a/src/documents/tests/test_matchables.py b/src/documents/tests/test_matchables.py index 913dde637..4ba4fc788 100644 --- a/src/documents/tests/test_matchables.py +++ b/src/documents/tests/test_matchables.py @@ -4,10 +4,14 @@ from random import randint from django.contrib.admin.models import LogEntry from django.contrib.auth.models import User -from django.test import TestCase, override_settings +from django.test import override_settings +from django.test import TestCase from .. import matching -from ..models import Correspondent, Document, Tag, DocumentType +from ..models import Correspondent +from ..models import Document +from ..models import DocumentType +from ..models import Tag from ..signals import document_consumption_finished @@ -209,7 +213,8 @@ class TestDocumentConsumptionFinishedSignal(TestCase): TestCase.setUp(self) User.objects.create_user(username="test_consumer", password="12345") self.doc_contains = Document.objects.create( - content="I contain the keyword.", mime_type="application/pdf" + content="I contain the keyword.", + mime_type="application/pdf", ) self.index_dir = tempfile.mkdtemp() @@ -221,43 +226,56 @@ class TestDocumentConsumptionFinishedSignal(TestCase): def test_tag_applied_any(self): t1 = Tag.objects.create( - name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY + name="test", + match="keyword", + matching_algorithm=Tag.MATCH_ANY, ) document_consumption_finished.send( - sender=self.__class__, document=self.doc_contains + sender=self.__class__, + document=self.doc_contains, ) self.assertTrue(list(self.doc_contains.tags.all()) == [t1]) def test_tag_not_applied(self): Tag.objects.create( - name="test", match="no-match", matching_algorithm=Tag.MATCH_ANY + name="test", + match="no-match", + matching_algorithm=Tag.MATCH_ANY, ) document_consumption_finished.send( - sender=self.__class__, document=self.doc_contains + sender=self.__class__, + document=self.doc_contains, ) self.assertTrue(list(self.doc_contains.tags.all()) == []) def test_correspondent_applied(self): correspondent = Correspondent.objects.create( - name="test", match="keyword", matching_algorithm=Correspondent.MATCH_ANY + name="test", + match="keyword", + matching_algorithm=Correspondent.MATCH_ANY, ) document_consumption_finished.send( - sender=self.__class__, document=self.doc_contains + sender=self.__class__, + document=self.doc_contains, ) self.assertTrue(self.doc_contains.correspondent == correspondent) def test_correspondent_not_applied(self): Tag.objects.create( - name="test", match="no-match", matching_algorithm=Correspondent.MATCH_ANY + name="test", + match="no-match", + matching_algorithm=Correspondent.MATCH_ANY, ) document_consumption_finished.send( - sender=self.__class__, document=self.doc_contains + sender=self.__class__, + document=self.doc_contains, ) self.assertEqual(self.doc_contains.correspondent, None) def test_logentry_created(self): document_consumption_finished.send( - sender=self.__class__, document=self.doc_contains + sender=self.__class__, + document=self.doc_contains, ) self.assertEqual(LogEntry.objects.count(), 1) diff --git a/src/documents/tests/test_migration_archive_files.py b/src/documents/tests/test_migration_archive_files.py index 97f8899bc..1cb088185 100644 --- a/src/documents/tests/test_migration_archive_files.py +++ b/src/documents/tests/test_migration_archive_files.py @@ -6,9 +6,9 @@ from unittest import mock from django.conf import settings from django.test import override_settings - from documents.parsers import ParseError -from documents.tests.utils import DirectoriesMixin, TestMigrations +from documents.tests.utils import DirectoriesMixin +from documents.tests.utils import TestMigrations STORAGE_TYPE_GPG = "gpg" @@ -93,10 +93,18 @@ def make_test_document( simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg") simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") simple_pdf2 = os.path.join( - os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf" + os.path.dirname(__file__), + "samples", + "documents", + "originals", + "0000002.pdf", ) simple_pdf3 = os.path.join( - os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf" + os.path.dirname(__file__), + "samples", + "documents", + "originals", + "0000003.pdf", ) simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt") simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png") @@ -121,19 +129,43 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations): simple_pdf, ) self.no_text = make_test_document( - Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf + Document, + "no-text", + "image/png", + simple_png2, + "no-text.png", + simple_pdf, ) self.doc_no_archive = make_test_document( - Document, "no_archive", "text/plain", simple_txt, "no_archive.txt" + Document, + "no_archive", + "text/plain", + simple_txt, + "no_archive.txt", ) self.clash1 = make_test_document( - Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf + Document, + "clash", + "application/pdf", + simple_pdf, + "clash.pdf", + simple_pdf, ) self.clash2 = make_test_document( - Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf + Document, + "clash", + "image/jpeg", + simple_jpg, + "clash.jpg", + simple_pdf, ) self.clash3 = make_test_document( - Document, "clash", "image/png", simple_png, "clash.png", simple_pdf + Document, + "clash", + "image/png", + simple_png, + "clash.png", + simple_pdf, ) self.clash4 = make_test_document( Document, @@ -147,7 +179,8 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations): self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2)) self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3)) self.assertNotEqual( - archive_path_old(self.clash1), archive_path_old(self.clash4) + archive_path_old(self.clash1), + archive_path_old(self.clash4), ) def testArchiveFilesMigrated(self): @@ -171,19 +204,23 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations): self.assertEqual(archive_checksum, doc.archive_checksum) self.assertEqual( - Document.objects.filter(archive_checksum__isnull=False).count(), 6 + Document.objects.filter(archive_checksum__isnull=False).count(), + 6, ) def test_filenames(self): Document = self.apps.get_model("documents", "Document") self.assertEqual( - Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf" + Document.objects.get(id=self.unrelated.id).archive_filename, + "unrelated.pdf", ) self.assertEqual( - Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf" + Document.objects.get(id=self.no_text.id).archive_filename, + "no-text.pdf", ) self.assertEqual( - Document.objects.get(id=self.doc_no_archive.id).archive_filename, None + Document.objects.get(id=self.doc_no_archive.id).archive_filename, + None, ) self.assertEqual( Document.objects.get(id=self.clash1.id).archive_filename, @@ -198,7 +235,8 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations): f"{self.clash3.id:07}.pdf", ) self.assertEqual( - Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf" + Document.objects.get(id=self.clash4.id).archive_filename, + "clash.png.pdf", ) @@ -207,16 +245,20 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles): def test_filenames(self): Document = self.apps.get_model("documents", "Document") self.assertEqual( - Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf" + Document.objects.get(id=self.unrelated.id).archive_filename, + "unrelated.pdf", ) self.assertEqual( - Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf" + Document.objects.get(id=self.no_text.id).archive_filename, + "no-text.pdf", ) self.assertEqual( - Document.objects.get(id=self.doc_no_archive.id).archive_filename, None + Document.objects.get(id=self.doc_no_archive.id).archive_filename, + None, ) self.assertEqual( - Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf" + Document.objects.get(id=self.clash1.id).archive_filename, + "none/clash.pdf", ) self.assertEqual( Document.objects.get(id=self.clash2.id).archive_filename, @@ -227,7 +269,8 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles): "none/clash_02.pdf", ) self.assertEqual( - Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf" + Document.objects.get(id=self.clash4.id).archive_filename, + "clash.png.pdf", ) @@ -248,12 +291,19 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): Document = self.apps.get_model("documents", "Document") doc = make_test_document( - Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf + Document, + "clash", + "application/pdf", + simple_pdf, + "clash.pdf", + simple_pdf, ) os.unlink(archive_path_old(doc)) self.assertRaisesMessage( - ValueError, "does not exist at: ", self.performMigration + ValueError, + "does not exist at: ", + self.performMigration, ) def test_parser_missing(self): @@ -277,7 +327,9 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): ) self.assertRaisesMessage( - ValueError, "no parsers are available", self.performMigration + ValueError, + "no parsers are available", + self.performMigration, ) @mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper") @@ -286,7 +338,12 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): Document = self.apps.get_model("documents", "Document") doc1 = make_test_document( - Document, "document", "image/png", simple_png, "document.png", simple_pdf + Document, + "document", + "image/png", + simple_png, + "document.png", + simple_pdf, ) doc2 = make_test_document( Document, @@ -311,8 +368,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): filter( lambda log: "Parse error, will try again in 5 seconds" in log, capture.output, - ) - ) + ), + ), ), 4, ) @@ -324,8 +381,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): lambda log: "Unable to regenerate archive document for ID:" in log, capture.output, - ) - ) + ), + ), ), 2, ) @@ -347,7 +404,12 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): Document = self.apps.get_model("documents", "Document") doc1 = make_test_document( - Document, "document", "image/png", simple_png, "document.png", simple_pdf + Document, + "document", + "image/png", + simple_png, + "document.png", + simple_pdf, ) doc2 = make_test_document( Document, @@ -368,8 +430,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): lambda log: "Parser did not return an archive document for document" in log, capture.output, - ) - ) + ), + ), ), 2, ) @@ -405,7 +467,11 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations): "unrelated.pdf", ) doc_no_archive = make_test_document( - Document, "no_archive", "text/plain", simple_txt, "no_archive.txt" + Document, + "no_archive", + "text/plain", + simple_txt, + "no_archive.txt", ) clashB = make_test_document( Document, @@ -434,13 +500,14 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations): self.assertEqual(archive_checksum, doc.archive_checksum) self.assertEqual( - Document.objects.filter(archive_checksum__isnull=False).count(), 2 + Document.objects.filter(archive_checksum__isnull=False).count(), + 2, ) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") class TestMigrateArchiveFilesBackwardsWithFilenameFormat( - TestMigrateArchiveFilesBackwards + TestMigrateArchiveFilesBackwards, ): pass @@ -505,5 +572,7 @@ class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations): ) self.assertRaisesMessage( - ValueError, "file already exists.", self.performMigration + ValueError, + "file already exists.", + self.performMigration, ) diff --git a/src/documents/tests/test_migration_mime_type.py b/src/documents/tests/test_migration_mime_type.py index 57cb84ad4..a08c3d74d 100644 --- a/src/documents/tests/test_migration_mime_type.py +++ b/src/documents/tests/test_migration_mime_type.py @@ -3,9 +3,9 @@ import shutil from django.conf import settings from django.test import override_settings - from documents.parsers import get_default_file_extension -from documents.tests.utils import DirectoriesMixin, TestMigrations +from documents.tests.utils import DirectoriesMixin +from documents.tests.utils import TestMigrations STORAGE_TYPE_UNENCRYPTED = "unencrypted" STORAGE_TYPE_GPG = "gpg" @@ -46,7 +46,9 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations): def setUpBeforeMigration(self, apps): Document = apps.get_model("documents", "Document") doc = Document.objects.create( - title="test", file_type="pdf", filename="file1.pdf" + title="test", + file_type="pdf", + filename="file1.pdf", ) self.doc_id = doc.id shutil.copy( @@ -55,7 +57,9 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations): ) doc2 = Document.objects.create( - checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG + checksum="B", + file_type="pdf", + storage_type=STORAGE_TYPE_GPG, ) self.doc2_id = doc2.id shutil.copy( @@ -88,7 +92,9 @@ class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations): def setUpBeforeMigration(self, apps): Document = apps.get_model("documents", "Document") doc = Document.objects.create( - title="test", mime_type="application/pdf", filename="file1.pdf" + title="test", + mime_type="application/pdf", + filename="file1.pdf", ) self.doc_id = doc.id shutil.copy( diff --git a/src/documents/tests/test_migration_remove_null_characters.py b/src/documents/tests/test_migration_remove_null_characters.py index 9c8000550..09fd80883 100644 --- a/src/documents/tests/test_migration_remove_null_characters.py +++ b/src/documents/tests/test_migration_remove_null_characters.py @@ -1,4 +1,5 @@ -from documents.tests.utils import DirectoriesMixin, TestMigrations +from documents.tests.utils import DirectoriesMixin +from documents.tests.utils import TestMigrations class TestMigrateNullCharacters(DirectoriesMixin, TestMigrations): diff --git a/src/documents/tests/test_migration_tag_colors.py b/src/documents/tests/test_migration_tag_colors.py index e209ce5e6..6cc2fa3f7 100644 --- a/src/documents/tests/test_migration_tag_colors.py +++ b/src/documents/tests/test_migration_tag_colors.py @@ -1,4 +1,5 @@ -from documents.tests.utils import DirectoriesMixin, TestMigrations +from documents.tests.utils import DirectoriesMixin +from documents.tests.utils import TestMigrations class TestMigrateTagColor(DirectoriesMixin, TestMigrations): diff --git a/src/documents/tests/test_models.py b/src/documents/tests/test_models.py index 77bb507f5..d230511ff 100644 --- a/src/documents/tests/test_models.py +++ b/src/documents/tests/test_models.py @@ -1,7 +1,9 @@ from django.test import TestCase -from .factories import DocumentFactory, CorrespondentFactory -from ..models import Document, Correspondent +from ..models import Correspondent +from ..models import Document +from .factories import CorrespondentFactory +from .factories import DocumentFactory class CorrespondentTestCase(TestCase): diff --git a/src/documents/tests/test_parsers.py b/src/documents/tests/test_parsers.py index a914bbf93..ab0311783 100644 --- a/src/documents/tests/test_parsers.py +++ b/src/documents/tests/test_parsers.py @@ -4,16 +4,14 @@ import tempfile from tempfile import TemporaryDirectory from unittest import mock -from django.test import TestCase, override_settings - -from documents.parsers import ( - get_parser_class, - get_supported_file_extensions, - get_default_file_extension, - get_parser_class_for_mime_type, - DocumentParser, - is_file_ext_supported, -) +from django.test import override_settings +from django.test import TestCase +from documents.parsers import DocumentParser +from documents.parsers import get_default_file_extension +from documents.parsers import get_parser_class +from documents.parsers import get_parser_class_for_mime_type +from documents.parsers import get_supported_file_extensions +from documents.parsers import is_file_ext_supported from paperless_tesseract.parsers import RasterisedDocumentParser from paperless_text.parsers import TextDocumentParser diff --git a/src/documents/tests/test_sanity_check.py b/src/documents/tests/test_sanity_check.py index f3953bab9..7a1b64ce4 100644 --- a/src/documents/tests/test_sanity_check.py +++ b/src/documents/tests/test_sanity_check.py @@ -6,9 +6,9 @@ from pathlib import Path import filelock from django.conf import settings from django.test import TestCase - from documents.models import Document -from documents.sanity_checker import check_sanity, SanityCheckMessages +from documents.sanity_checker import check_sanity +from documents.sanity_checker import SanityCheckMessages from documents.tests.utils import DirectoriesMixin @@ -23,7 +23,8 @@ class TestSanityCheckMessages(TestCase): self.assertEqual(len(capture.output), 1) self.assertEqual(capture.records[0].levelno, logging.INFO) self.assertEqual( - capture.records[0].message, "Sanity checker detected no issues." + capture.records[0].message, + "Sanity checker detected no issues.", ) def test_info(self): diff --git a/src/documents/tests/test_settings.py b/src/documents/tests/test_settings.py index 25fe0e317..9b8edab27 100644 --- a/src/documents/tests/test_settings.py +++ b/src/documents/tests/test_settings.py @@ -2,8 +2,8 @@ import logging from unittest import mock from django.test import TestCase - -from paperless.settings import default_task_workers, default_threads_per_worker +from paperless.settings import default_task_workers +from paperless.settings import default_threads_per_worker class TestSettings(TestCase): @@ -21,7 +21,7 @@ class TestSettings(TestCase): def test_workers_threads(self): for i in range(1, 64): with mock.patch( - "paperless.settings.multiprocessing.cpu_count" + "paperless.settings.multiprocessing.cpu_count", ) as cpu_count: cpu_count.return_value = i diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py index a0cae7307..952d3d920 100644 --- a/src/documents/tests/test_tasks.py +++ b/src/documents/tests/test_tasks.py @@ -4,10 +4,13 @@ from unittest import mock from django.conf import settings from django.test import TestCase from django.utils import timezone - from documents import tasks -from documents.models import Document, Tag, Correspondent, DocumentType -from documents.sanity_checker import SanityCheckMessages, SanityCheckFailedException +from documents.models import Correspondent +from documents.models import Document +from documents.models import DocumentType +from documents.models import Tag +from documents.sanity_checker import SanityCheckFailedException +from documents.sanity_checker import SanityCheckMessages from documents.tests.utils import DirectoriesMixin @@ -106,7 +109,8 @@ class TestTasks(DirectoriesMixin, TestCase): messages.warning("Some warning") m.return_value = messages self.assertEqual( - tasks.sanity_check(), "Sanity check exited with warnings. See log." + tasks.sanity_check(), + "Sanity check exited with warnings. See log.", ) m.assert_called_once() @@ -116,7 +120,8 @@ class TestTasks(DirectoriesMixin, TestCase): messages.info("Some info") m.return_value = messages self.assertEqual( - tasks.sanity_check(), "Sanity check exited with infos. See log." + tasks.sanity_check(), + "Sanity check exited with infos. See log.", ) m.assert_called_once() diff --git a/src/documents/tests/test_views.py b/src/documents/tests/test_views.py index dcae72797..ce457a7f3 100644 --- a/src/documents/tests/test_views.py +++ b/src/documents/tests/test_views.py @@ -25,7 +25,7 @@ class TestViews(TestCase): ]: if language_given: self.client.cookies.load( - {settings.LANGUAGE_COOKIE_NAME: language_given} + {settings.LANGUAGE_COOKIE_NAME: language_given}, ) elif settings.LANGUAGE_COOKIE_NAME in self.client.cookies.keys(): self.client.cookies.pop(settings.LANGUAGE_COOKIE_NAME) @@ -51,5 +51,6 @@ class TestViews(TestCase): f"frontend/{language_actual}/polyfills.js", ) self.assertEqual( - response.context_data["main_js"], f"frontend/{language_actual}/main.js" + response.context_data["main_js"], + f"frontend/{language_actual}/main.js", ) diff --git a/src/documents/tests/utils.py b/src/documents/tests/utils.py index 3aa9cf880..f4d3bee87 100644 --- a/src/documents/tests/utils.py +++ b/src/documents/tests/utils.py @@ -7,7 +7,8 @@ from contextlib import contextmanager from django.apps import apps from django.db import connection from django.db.migrations.executor import MigrationExecutor -from django.test import override_settings, TransactionTestCase +from django.test import override_settings +from django.test import TransactionTestCase def setup_directories(): @@ -97,7 +98,7 @@ class TestMigrations(TransactionTestCase): assert ( self.migrate_from and self.migrate_to ), "TestCase '{}' must define migrate_from and migrate_to properties".format( - type(self).__name__ + type(self).__name__, ) self.migrate_from = [(self.app, self.migrate_from)] self.migrate_to = [(self.app, self.migrate_to)] diff --git a/src/documents/views.py b/src/documents/views.py index 9e4d960ab..b8814ab81 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -5,63 +5,70 @@ import uuid import zipfile from datetime import datetime from time import mktime -from urllib.parse import quote_plus from unicodedata import normalize +from urllib.parse import quote_plus from django.conf import settings -from django.db.models import Count, Max, Case, When, IntegerField +from django.db.models import Case +from django.db.models import Count +from django.db.models import IntegerField +from django.db.models import Max +from django.db.models import When from django.db.models.functions import Lower -from django.http import HttpResponse, HttpResponseBadRequest, Http404 +from django.http import Http404 +from django.http import HttpResponse +from django.http import HttpResponseBadRequest from django.utils.translation import get_language from django.views.decorators.cache import cache_control from django.views.generic import TemplateView from django_filters.rest_framework import DjangoFilterBackend from django_q.tasks import async_task +from paperless.db import GnuPG +from paperless.views import StandardPagination from rest_framework import parsers from rest_framework.decorators import action from rest_framework.exceptions import NotFound -from rest_framework.filters import OrderingFilter, SearchFilter +from rest_framework.filters import OrderingFilter +from rest_framework.filters import SearchFilter from rest_framework.generics import GenericAPIView -from rest_framework.mixins import ( - DestroyModelMixin, - ListModelMixin, - RetrieveModelMixin, - UpdateModelMixin, -) +from rest_framework.mixins import DestroyModelMixin +from rest_framework.mixins import ListModelMixin +from rest_framework.mixins import RetrieveModelMixin +from rest_framework.mixins import UpdateModelMixin from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response from rest_framework.views import APIView -from rest_framework.viewsets import GenericViewSet, ModelViewSet, ViewSet +from rest_framework.viewsets import GenericViewSet +from rest_framework.viewsets import ModelViewSet +from rest_framework.viewsets import ViewSet -from paperless.db import GnuPG -from paperless.views import StandardPagination -from .bulk_download import ( - OriginalAndArchiveStrategy, - OriginalsOnlyStrategy, - ArchiveOnlyStrategy, -) +from .bulk_download import ArchiveOnlyStrategy +from .bulk_download import OriginalAndArchiveStrategy +from .bulk_download import OriginalsOnlyStrategy from .classifier import load_classifier -from .filters import ( - CorrespondentFilterSet, - DocumentFilterSet, - TagFilterSet, - DocumentTypeFilterSet, -) -from .matching import match_correspondents, match_tags, match_document_types -from .models import Correspondent, Document, Tag, DocumentType, SavedView +from .filters import CorrespondentFilterSet +from .filters import DocumentFilterSet +from .filters import DocumentTypeFilterSet +from .filters import TagFilterSet +from .matching import match_correspondents +from .matching import match_document_types +from .matching import match_tags +from .models import Correspondent +from .models import Document +from .models import DocumentType +from .models import SavedView +from .models import Tag from .parsers import get_parser_class_for_mime_type -from .serialisers import ( - CorrespondentSerializer, - DocumentSerializer, - TagSerializerVersion1, - TagSerializer, - DocumentTypeSerializer, - PostDocumentSerializer, - SavedViewSerializer, - BulkEditSerializer, - DocumentListSerializer, - BulkDownloadSerializer, -) +from .serialisers import BulkDownloadSerializer +from .serialisers import BulkEditSerializer +from .serialisers import CorrespondentSerializer +from .serialisers import DocumentListSerializer +from .serialisers import DocumentSerializer +from .serialisers import DocumentTypeSerializer +from .serialisers import PostDocumentSerializer +from .serialisers import SavedViewSerializer +from .serialisers import TagSerializer +from .serialisers import TagSerializerVersion1 logger = logging.getLogger("paperless.api") @@ -89,16 +96,14 @@ class IndexView(TemplateView): context["full_name"] = self.request.user.get_full_name() context["styles_css"] = f"frontend/{self.get_language()}/styles.css" context["runtime_js"] = f"frontend/{self.get_language()}/runtime.js" - context[ - "polyfills_js" - ] = f"frontend/{self.get_language()}/polyfills.js" # NOQA: E501 + context["polyfills_js"] = f"frontend/{self.get_language()}/polyfills.js" context["main_js"] = f"frontend/{self.get_language()}/main.js" context[ "webmanifest" - ] = f"frontend/{self.get_language()}/manifest.webmanifest" # NOQA: E501 + ] = f"frontend/{self.get_language()}/manifest.webmanifest" # noqa: E501 context[ "apple_touch_icon" - ] = f"frontend/{self.get_language()}/apple-touch-icon.png" # NOQA: E501 + ] = f"frontend/{self.get_language()}/apple-touch-icon.png" # noqa: E501 return context @@ -106,7 +111,8 @@ class CorrespondentViewSet(ModelViewSet): model = Correspondent queryset = Correspondent.objects.annotate( - document_count=Count("documents"), last_correspondence=Max("documents__created") + document_count=Count("documents"), + last_correspondence=Max("documents__created"), ).order_by(Lower("name")) serializer_class = CorrespondentSerializer @@ -127,7 +133,7 @@ class TagViewSet(ModelViewSet): model = Tag queryset = Tag.objects.annotate(document_count=Count("documents")).order_by( - Lower("name") + Lower("name"), ) def get_serializer_class(self): @@ -147,7 +153,7 @@ class DocumentTypeViewSet(ModelViewSet): model = DocumentType queryset = DocumentType.objects.annotate( - document_count=Count("documents") + document_count=Count("documents"), ).order_by(Lower("name")) serializer_class = DocumentTypeSerializer @@ -220,9 +226,7 @@ class DocumentViewSet( def file_response(self, pk, request, disposition): doc = Document.objects.get(id=pk) - if ( - not self.original_requested(request) and doc.has_archive_version - ): # NOQA: E501 + if not self.original_requested(request) and doc.has_archive_version: file_handle = doc.archive_file filename = doc.get_public_filename(archive=True) mime_type = "application/pdf" @@ -258,7 +262,7 @@ class DocumentViewSet( try: return parser.extract_metadata(file, mime_type) - except Exception as e: + except Exception: # TODO: cover GPG errors, remove later. return [] else: @@ -291,7 +295,8 @@ class DocumentViewSet( if doc.has_archive_version: meta["archive_size"] = self.get_filesize(doc.archive_path) meta["archive_metadata"] = self.get_metadata( - doc.archive_path, "application/pdf" + doc.archive_path, + "application/pdf", ) else: meta["archive_size"] = None @@ -315,7 +320,7 @@ class DocumentViewSet( "document_types": [ dt.id for dt in match_document_types(doc, classifier) ], - } + }, ) @action(methods=["get"], detail=True) @@ -357,7 +362,7 @@ class SearchResultSerializer(DocumentSerializer): "score": instance.score, "highlights": instance.highlights("content", text=doc.content) if doc - else None, # NOQA: E501 + else None, "rank": instance.rank, } @@ -500,7 +505,9 @@ class PostDocumentView(GenericAPIView): os.makedirs(settings.SCRATCH_DIR, exist_ok=True) with tempfile.NamedTemporaryFile( - prefix="paperless-upload-", dir=settings.SCRATCH_DIR, delete=False + prefix="paperless-upload-", + dir=settings.SCRATCH_DIR, + delete=False, ) as f: f.write(doc_data) os.utime(f.name, times=(t, t)) @@ -537,20 +544,20 @@ class SelectionDataView(GenericAPIView): correspondents = Correspondent.objects.annotate( document_count=Count( - Case(When(documents__id__in=ids, then=1), output_field=IntegerField()) - ) + Case(When(documents__id__in=ids, then=1), output_field=IntegerField()), + ), ) tags = Tag.objects.annotate( document_count=Count( - Case(When(documents__id__in=ids, then=1), output_field=IntegerField()) - ) + Case(When(documents__id__in=ids, then=1), output_field=IntegerField()), + ), ) types = DocumentType.objects.annotate( document_count=Count( - Case(When(documents__id__in=ids, then=1), output_field=IntegerField()) - ) + Case(When(documents__id__in=ids, then=1), output_field=IntegerField()), + ), ) r = Response( @@ -565,7 +572,7 @@ class SelectionDataView(GenericAPIView): "selected_document_types": [ {"id": t.id, "document_count": t.document_count} for t in types ], - } + }, ) return r @@ -612,7 +619,7 @@ class StatisticsView(APIView): { "documents_total": documents_total, "documents_inbox": documents_inbox, - } + }, ) @@ -632,7 +639,9 @@ class BulkDownloadView(GenericAPIView): os.makedirs(settings.SCRATCH_DIR, exist_ok=True) temp = tempfile.NamedTemporaryFile( - dir=settings.SCRATCH_DIR, suffix="-compressed-archive", delete=False + dir=settings.SCRATCH_DIR, + suffix="-compressed-archive", + delete=False, ) if content == "both": @@ -651,7 +660,8 @@ class BulkDownloadView(GenericAPIView): with open(temp.name, "rb") as f: response = HttpResponse(f, content_type="application/zip") response["Content-Disposition"] = '{}; filename="{}"'.format( - "attachment", "documents.zip" + "attachment", + "documents.zip", ) return response diff --git a/src/paperless/__init__.py b/src/paperless/__init__.py index 0789cc963..8cdd600b3 100644 --- a/src/paperless/__init__.py +++ b/src/paperless/__init__.py @@ -1 +1,4 @@ -from .checks import paths_check, binaries_check +from .checks import binaries_check +from .checks import paths_check + +__all__ = ["binaries_check", "paths_check"] diff --git a/src/paperless/asgi.py b/src/paperless/asgi.py index a3bc386ce..633c75ce0 100644 --- a/src/paperless/asgi.py +++ b/src/paperless/asgi.py @@ -9,14 +9,14 @@ from django.core.asgi import get_asgi_application os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings") django_asgi_app = get_asgi_application() -from channels.auth import AuthMiddlewareStack # NOQA: E402 -from channels.routing import ProtocolTypeRouter, URLRouter # NOQA: E402 +from channels.auth import AuthMiddlewareStack # noqa: E402 +from channels.routing import ProtocolTypeRouter, URLRouter # noqa: E402 -from paperless.urls import websocket_urlpatterns # NOQA: E402 +from paperless.urls import websocket_urlpatterns # noqa: E402 application = ProtocolTypeRouter( { "http": get_asgi_application(), "websocket": AuthMiddlewareStack(URLRouter(websocket_urlpatterns)), - } + }, ) diff --git a/src/paperless/auth.py b/src/paperless/auth.py index 7af4f3590..86f428518 100644 --- a/src/paperless/auth.py +++ b/src/paperless/auth.py @@ -1,9 +1,9 @@ from django.conf import settings from django.contrib import auth +from django.contrib.auth.middleware import RemoteUserMiddleware from django.contrib.auth.models import User from django.utils.deprecation import MiddlewareMixin from rest_framework import authentication -from django.contrib.auth.middleware import RemoteUserMiddleware class AutoLoginMiddleware(MiddlewareMixin): @@ -25,7 +25,7 @@ class AngularApiAuthenticationOverride(authentication.BaseAuthentication): settings.DEBUG and "Referer" in request.headers and request.headers["Referer"].startswith("http://localhost:4200/") - ): # NOQA: E501 + ): user = User.objects.filter(is_staff=True).first() print("Auto-Login with user {}".format(user)) return (user, None) diff --git a/src/paperless/checks.py b/src/paperless/checks.py index 1adc8b149..ee9b95e09 100644 --- a/src/paperless/checks.py +++ b/src/paperless/checks.py @@ -3,7 +3,9 @@ import shutil import stat from django.conf import settings -from django.core.checks import Error, Warning, register +from django.core.checks import Error +from django.core.checks import register +from django.core.checks import Warning exists_message = "{} is set but doesn't exist." exists_hint = "Create a directory at {}" @@ -19,11 +21,12 @@ def path_check(var, directory): if directory: if not os.path.isdir(directory): messages.append( - Error(exists_message.format(var), exists_hint.format(directory)) + Error(exists_message.format(var), exists_hint.format(directory)), ) else: test_file = os.path.join( - directory, f"__paperless_write_test_{os.getpid()}__" + directory, + f"__paperless_write_test_{os.getpid()}__", ) try: with open(test_file, "w"): @@ -34,9 +37,9 @@ def path_check(var, directory): writeable_message.format(var), writeable_hint.format( f"\n{stat.filemode(os.stat(directory).st_mode)} " - f"{directory}\n" + f"{directory}\n", ), - ) + ), ) finally: if os.path.isfile(test_file): @@ -88,8 +91,8 @@ def debug_mode_check(app_configs, **kwargs): "security issue, since it puts security overides in place which " "are meant to be only used during development. This " "also means that paperless will tell anyone various " - "debugging information when something goes wrong." - ) + "debugging information when something goes wrong.", + ), ] else: return [] diff --git a/src/paperless/consumers.py b/src/paperless/consumers.py index 8b8e8c6dc..7013a8e79 100644 --- a/src/paperless/consumers.py +++ b/src/paperless/consumers.py @@ -1,7 +1,8 @@ import json from asgiref.sync import async_to_sync -from channels.exceptions import DenyConnection, AcceptConnection +from channels.exceptions import AcceptConnection +from channels.exceptions import DenyConnection from channels.generic.websocket import WebsocketConsumer @@ -14,13 +15,15 @@ class StatusConsumer(WebsocketConsumer): raise DenyConnection() else: async_to_sync(self.channel_layer.group_add)( - "status_updates", self.channel_name + "status_updates", + self.channel_name, ) raise AcceptConnection() def disconnect(self, close_code): async_to_sync(self.channel_layer.group_discard)( - "status_updates", self.channel_name + "status_updates", + self.channel_name, ) def status_update(self, event): diff --git a/src/paperless/db.py b/src/paperless/db.py index 5f476b5e7..51184750d 100644 --- a/src/paperless/db.py +++ b/src/paperless/db.py @@ -1,5 +1,4 @@ import gnupg - from django.conf import settings diff --git a/src/paperless/middleware.py b/src/paperless/middleware.py index bb634adf8..f82ba2435 100644 --- a/src/paperless/middleware.py +++ b/src/paperless/middleware.py @@ -1,5 +1,4 @@ from django.conf import settings - from paperless import version diff --git a/src/paperless/settings.py b/src/paperless/settings.py index e3fb5d155..314b49012 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -5,9 +5,8 @@ import os import re from concurrent_log_handler.queue import setup_logging_queues -from dotenv import load_dotenv - from django.utils.translation import gettext_lazy as _ +from dotenv import load_dotenv # Tap paperless.conf if it's available if os.path.exists("../paperless.conf"): @@ -68,7 +67,8 @@ MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle") LOGGING_DIR = os.getenv("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log")) CONSUMPTION_DIR = os.getenv( - "PAPERLESS_CONSUMPTION_DIR", os.path.join(BASE_DIR, "..", "consume") + "PAPERLESS_CONSUMPTION_DIR", + os.path.join(BASE_DIR, "..", "consume"), ) # This will be created if it doesn't exist @@ -119,7 +119,7 @@ REST_FRAMEWORK = { if DEBUG: REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append( - "paperless.auth.AngularApiAuthenticationOverride" + "paperless.auth.AngularApiAuthenticationOverride", ) MIDDLEWARE = [ @@ -191,7 +191,8 @@ if AUTO_LOGIN_USERNAME: ENABLE_HTTP_REMOTE_USER = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER") HTTP_REMOTE_USER_HEADER_NAME = os.getenv( - "PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME", "HTTP_REMOTE_USER" + "PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME", + "HTTP_REMOTE_USER", ) if ENABLE_HTTP_REMOTE_USER: @@ -201,7 +202,7 @@ if ENABLE_HTTP_REMOTE_USER: "django.contrib.auth.backends.ModelBackend", ] REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append( - "rest_framework.authentication.RemoteUserAuthentication" + "rest_framework.authentication.RemoteUserAuthentication", ) # X-Frame options for embedded PDF display: @@ -212,7 +213,7 @@ else: # We allow CORS from localhost:8080 CORS_ALLOWED_ORIGINS = tuple( - os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(",") + os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(","), ) if DEBUG: @@ -223,7 +224,8 @@ if DEBUG: # Paperless on a closed network. However, if you're putting this anywhere # public, you should change the key to something unique and verbose. SECRET_KEY = os.getenv( - "PAPERLESS_SECRET_KEY", "e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee" + "PAPERLESS_SECRET_KEY", + "e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee", ) _allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS") @@ -268,7 +270,7 @@ DATABASES = { "default": { "ENGINE": "django.db.backends.sqlite3", "NAME": os.path.join(DATA_DIR, "db.sqlite3"), - } + }, } if os.getenv("PAPERLESS_DBHOST"): @@ -423,7 +425,8 @@ def default_threads_per_worker(task_workers): THREADS_PER_WORKER = os.getenv( - "PAPERLESS_THREADS_PER_WORKER", default_threads_per_worker(TASK_WORKERS) + "PAPERLESS_THREADS_PER_WORKER", + default_threads_per_worker(TASK_WORKERS), ) ############################################################################### @@ -435,7 +438,7 @@ CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0)) CONSUMER_POLLING_DELAY = int(os.getenv("PAPERLESS_CONSUMER_POLLING_DELAY", 5)) CONSUMER_POLLING_RETRY_COUNT = int( - os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5) + os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5), ) CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES") @@ -448,8 +451,8 @@ CONSUMER_IGNORE_PATTERNS = list( os.getenv( "PAPERLESS_CONSUMER_IGNORE_PATTERNS", '[".DS_STORE/*", "._*", ".stfolder/*"]', - ) - ) + ), + ), ) CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS") @@ -479,7 +482,7 @@ OCR_DESKEW = __get_boolean("PAPERLESS_OCR_DESKEW", "true") OCR_ROTATE_PAGES = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true") OCR_ROTATE_PAGES_THRESHOLD = float( - os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0) + os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0), ) OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}") @@ -536,7 +539,8 @@ THUMBNAIL_FONT_NAME = os.getenv( PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO") PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998") PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv( - "PAPERLESS_TIKA_GOTENBERG_ENDPOINT", "http://localhost:3000" + "PAPERLESS_TIKA_GOTENBERG_ENDPOINT", + "http://localhost:3000", ) if PAPERLESS_TIKA_ENABLED: diff --git a/src/paperless/tests/test_checks.py b/src/paperless/tests/test_checks.py index b0301be0e..df0cb0afd 100644 --- a/src/paperless/tests/test_checks.py +++ b/src/paperless/tests/test_checks.py @@ -1,10 +1,11 @@ import os import shutil -from django.test import TestCase, override_settings - +from django.test import override_settings +from django.test import TestCase from documents.tests.utils import DirectoriesMixin -from paperless import binaries_check, paths_check +from paperless import binaries_check +from paperless import paths_check from paperless.checks import debug_mode_check @@ -20,7 +21,9 @@ class TestChecks(DirectoriesMixin, TestCase): self.assertEqual(paths_check(None), []) @override_settings( - MEDIA_ROOT="uuh", DATA_DIR="whatever", CONSUMPTION_DIR="idontcare" + MEDIA_ROOT="uuh", + DATA_DIR="whatever", + CONSUMPTION_DIR="idontcare", ) def test_paths_check_dont_exist(self): msgs = paths_check(None) diff --git a/src/paperless/tests/test_websockets.py b/src/paperless/tests/test_websockets.py index c8cc269fe..069bb644a 100644 --- a/src/paperless/tests/test_websockets.py +++ b/src/paperless/tests/test_websockets.py @@ -2,8 +2,8 @@ from unittest import mock from channels.layers import get_channel_layer from channels.testing import WebsocketCommunicator -from django.test import TestCase, override_settings - +from django.test import override_settings +from django.test import TestCase from paperless.asgi import application @@ -46,7 +46,8 @@ class TestWebSockets(TestCase): channel_layer = get_channel_layer() await channel_layer.group_send( - "status_updates", {"type": "status_update", "data": message} + "status_updates", + {"type": "status_update", "data": message}, ) response = await communicator.receive_json_from() diff --git a/src/paperless/urls.py b/src/paperless/urls.py index 510e624d1..9dbe39e8a 100644 --- a/src/paperless/urls.py +++ b/src/paperless/urls.py @@ -1,34 +1,30 @@ +from django.conf import settings from django.conf.urls import include from django.contrib import admin from django.contrib.auth.decorators import login_required -from django.urls import path, re_path +from django.urls import path +from django.urls import re_path +from django.utils.translation import gettext_lazy as _ from django.views.decorators.csrf import csrf_exempt from django.views.generic import RedirectView +from documents.views import BulkDownloadView +from documents.views import BulkEditView +from documents.views import CorrespondentViewSet +from documents.views import DocumentTypeViewSet +from documents.views import IndexView +from documents.views import LogViewSet +from documents.views import PostDocumentView +from documents.views import SavedViewViewSet +from documents.views import SearchAutoCompleteView +from documents.views import SelectionDataView +from documents.views import StatisticsView +from documents.views import TagViewSet +from documents.views import UnifiedSearchViewSet +from paperless.consumers import StatusConsumer +from paperless.views import FaviconView from rest_framework.authtoken import views from rest_framework.routers import DefaultRouter -from django.utils.translation import gettext_lazy as _ - -from django.conf import settings - -from paperless.consumers import StatusConsumer -from documents.views import ( - CorrespondentViewSet, - UnifiedSearchViewSet, - LogViewSet, - TagViewSet, - DocumentTypeViewSet, - IndexView, - SearchAutoCompleteView, - StatisticsView, - PostDocumentView, - SavedViewViewSet, - BulkEditView, - SelectionDataView, - BulkDownloadView, -) -from paperless.views import FaviconView - api_router = DefaultRouter() api_router.register(r"correspondents", CorrespondentViewSet) api_router.register(r"document_types", DocumentTypeViewSet) @@ -62,7 +58,9 @@ urlpatterns = [ name="post_document", ), re_path( - r"^documents/bulk_edit/", BulkEditView.as_view(), name="bulk_edit" + r"^documents/bulk_edit/", + BulkEditView.as_view(), + name="bulk_edit", ), re_path( r"^documents/selection_data/", @@ -76,7 +74,7 @@ urlpatterns = [ ), path("token/", views.obtain_auth_token), ] - + api_router.urls + + api_router.urls, ), ), re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"), @@ -88,35 +86,37 @@ urlpatterns = [ re_path( r"^doc/(?P<pk>\d+)$", RedirectView.as_view( - url=settings.BASE_URL + "api/documents/%(pk)s/download/" + url=settings.BASE_URL + "api/documents/%(pk)s/download/", ), ), re_path( r"^thumb/(?P<pk>\d+)$", RedirectView.as_view( - url=settings.BASE_URL + "api/documents/%(pk)s/thumb/" + url=settings.BASE_URL + "api/documents/%(pk)s/thumb/", ), ), re_path( r"^preview/(?P<pk>\d+)$", RedirectView.as_view( - url=settings.BASE_URL + "api/documents/%(pk)s/preview/" + url=settings.BASE_URL + "api/documents/%(pk)s/preview/", ), ), - ] + ], ), ), re_path( r"^push$", csrf_exempt( - RedirectView.as_view(url=settings.BASE_URL + "api/documents/post_document/") + RedirectView.as_view( + url=settings.BASE_URL + "api/documents/post_document/", + ), ), ), # Frontend assets TODO: this is pretty bad, but it works. path( "assets/<path:path>", RedirectView.as_view( - url=settings.STATIC_URL + "frontend/en-US/assets/%(path)s" + url=settings.STATIC_URL + "frontend/en-US/assets/%(path)s", ), ), # TODO: with localization, this is even worse! :/ diff --git a/src/paperless/views.py b/src/paperless/views.py index a6a37a679..9f3d017a6 100644 --- a/src/paperless/views.py +++ b/src/paperless/views.py @@ -14,7 +14,11 @@ class StandardPagination(PageNumberPagination): class FaviconView(View): def get(self, request, *args, **kwargs): favicon = os.path.join( - os.path.dirname(__file__), "static", "paperless", "img", "favicon.ico" + os.path.dirname(__file__), + "static", + "paperless", + "img", + "favicon.ico", ) with open(favicon, "rb") as f: return HttpResponse(f, content_type="image/x-icon") diff --git a/src/paperless/workers.py b/src/paperless/workers.py index 4f2f0802a..0f4a66149 100644 --- a/src/paperless/workers.py +++ b/src/paperless/workers.py @@ -1,6 +1,7 @@ import os -from uvicorn.workers import UvicornWorker + from django.conf import settings +from uvicorn.workers import UvicornWorker os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings") diff --git a/src/paperless/wsgi.py b/src/paperless/wsgi.py index 6aab72299..82e0744f7 100644 --- a/src/paperless/wsgi.py +++ b/src/paperless/wsgi.py @@ -6,7 +6,6 @@ It exposes the WSGI callable as a module-level variable named ``application``. For more information on this file, see https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/ """ - import os from django.core.wsgi import get_wsgi_application diff --git a/src/paperless_mail/admin.py b/src/paperless_mail/admin.py index 3c9ae0f56..b56bc0727 100644 --- a/src/paperless_mail/admin.py +++ b/src/paperless_mail/admin.py @@ -1,8 +1,8 @@ -from django.contrib import admin from django import forms -from paperless_mail.models import MailAccount, MailRule - +from django.contrib import admin from django.utils.translation import gettext_lazy as _ +from paperless_mail.models import MailAccount +from paperless_mail.models import MailRule class MailAccountAdminForm(forms.ModelForm): @@ -48,7 +48,7 @@ class MailRuleAdmin(admin.ModelAdmin): { "description": _( "Paperless will only process mails that match ALL of the " - "filters given below." + "filters given below.", ), "fields": ( "filter_from", @@ -66,7 +66,7 @@ class MailRuleAdmin(admin.ModelAdmin): "description": _( "The action applied to the mail. This action is only " "performed when documents were consumed from the mail. " - "Mails without attachments will remain entirely untouched." + "Mails without attachments will remain entirely untouched.", ), "fields": ("action", "action_parameter"), }, @@ -78,7 +78,7 @@ class MailRuleAdmin(admin.ModelAdmin): "Assign metadata to documents consumed from this rule " "automatically. If you do not assign tags, types or " "correspondents here, paperless will still process all " - "matching rules that you have defined." + "matching rules that you have defined.", ), "fields": ( "assign_title_from", diff --git a/src/paperless_mail/apps.py b/src/paperless_mail/apps.py index f55240852..1c5d656e0 100644 --- a/src/paperless_mail/apps.py +++ b/src/paperless_mail/apps.py @@ -1,5 +1,4 @@ from django.apps import AppConfig - from django.utils.translation import gettext_lazy as _ diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index 0e9294927..ac1568829 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -1,6 +1,7 @@ import os import tempfile -from datetime import timedelta, date +from datetime import date +from datetime import timedelta from fnmatch import fnmatch import magic @@ -8,18 +9,16 @@ import pathvalidate from django.conf import settings from django.db import DatabaseError from django_q.tasks import async_task -from imap_tools import ( - MailBox, - MailBoxUnencrypted, - AND, - MailMessageFlags, - MailboxFolderSelectError, -) - from documents.loggers import LoggingMixin from documents.models import Correspondent from documents.parsers import is_mime_type_supported -from paperless_mail.models import MailAccount, MailRule +from imap_tools import AND +from imap_tools import MailBox +from imap_tools import MailboxFolderSelectError +from imap_tools import MailBoxUnencrypted +from imap_tools import MailMessageFlags +from paperless_mail.models import MailAccount +from paperless_mail.models import MailRule class MailError(Exception): @@ -120,8 +119,8 @@ class MailAccountHandler(LoggingMixin): else: raise NotImplementedError( - "Unknown title selector." - ) # pragma: nocover # NOQA: E501 + "Unknown title selector.", + ) # pragma: nocover def get_correspondent(self, message, rule): c_from = rule.assign_correspondent_from @@ -137,7 +136,7 @@ class MailAccountHandler(LoggingMixin): message.from_values and "name" in message.from_values and message.from_values["name"] - ): # NOQA: E501 + ): return self._correspondent_from_name(message.from_values["name"]) else: return self._correspondent_from_name(message.from_) @@ -147,8 +146,8 @@ class MailAccountHandler(LoggingMixin): else: raise NotImplementedError( - "Unknwown correspondent selector" - ) # pragma: nocover # NOQA: E501 + "Unknwown correspondent selector", + ) # pragma: nocover def handle_mail_account(self, account): @@ -159,7 +158,9 @@ class MailAccountHandler(LoggingMixin): total_processed_files = 0 with get_mailbox( - account.imap_server, account.imap_port, account.imap_security + account.imap_server, + account.imap_port, + account.imap_security, ) as M: try: @@ -193,7 +194,7 @@ class MailAccountHandler(LoggingMixin): except MailboxFolderSelectError: raise MailError( f"Rule {rule}: Folder {rule.folder} " - f"does not exist in account {rule.account}" + f"does not exist in account {rule.account}", ) criterias = make_criterias(rule) @@ -242,12 +243,14 @@ class MailAccountHandler(LoggingMixin): try: get_rule_action(rule).post_consume( - M, post_consume_messages, rule.action_parameter + M, + post_consume_messages, + rule.action_parameter, ) except Exception as e: raise MailError( - f"Rule {rule}: Error while processing post-consume actions: " f"{e}" + f"Rule {rule}: Error while processing post-consume actions: " f"{e}", ) return total_processed_files @@ -274,7 +277,7 @@ class MailAccountHandler(LoggingMixin): if ( not att.content_disposition == "attachment" and rule.attachment_type == MailRule.ATTACHMENT_TYPE_ATTACHMENTS_ONLY - ): # NOQA: E501 + ): self.log( "debug", f"Rule {rule}: " @@ -297,7 +300,8 @@ class MailAccountHandler(LoggingMixin): os.makedirs(settings.SCRATCH_DIR, exist_ok=True) _, temp_filename = tempfile.mkstemp( - prefix="paperless-mail-", dir=settings.SCRATCH_DIR + prefix="paperless-mail-", + dir=settings.SCRATCH_DIR, ) with open(temp_filename, "wb") as f: f.write(att.payload) @@ -313,15 +317,13 @@ class MailAccountHandler(LoggingMixin): "documents.tasks.consume_file", path=temp_filename, override_filename=pathvalidate.sanitize_filename( - att.filename - ), # NOQA: E501 + att.filename, + ), override_title=title, override_correspondent_id=correspondent.id if correspondent - else None, # NOQA: E501 - override_document_type_id=doc_type.id - if doc_type - else None, # NOQA: E501 + else None, + override_document_type_id=doc_type.id if doc_type else None, override_tag_ids=[tag.id] if tag else None, task_name=att.filename[:100], ) diff --git a/src/paperless_mail/management/commands/mail_fetcher.py b/src/paperless_mail/management/commands/mail_fetcher.py index 642633660..e2bc8262c 100644 --- a/src/paperless_mail/management/commands/mail_fetcher.py +++ b/src/paperless_mail/management/commands/mail_fetcher.py @@ -1,5 +1,4 @@ from django.core.management.base import BaseCommand - from paperless_mail import tasks @@ -7,7 +6,8 @@ class Command(BaseCommand): help = """ """.replace( - " ", "" + " ", + "", ) def handle(self, *args, **options): diff --git a/src/paperless_mail/models.py b/src/paperless_mail/models.py index 08048d352..6d58de4e9 100644 --- a/src/paperless_mail/models.py +++ b/src/paperless_mail/models.py @@ -1,7 +1,5 @@ -from django.db import models - import documents.models as document_models - +from django.db import models from django.utils.translation import gettext_lazy as _ @@ -30,12 +28,14 @@ class MailAccount(models.Model): null=True, help_text=_( "This is usually 143 for unencrypted and STARTTLS " - "connections, and 993 for SSL connections." + "connections, and 993 for SSL connections.", ), ) imap_security = models.PositiveIntegerField( - _("IMAP security"), choices=IMAP_SECURITY_OPTIONS, default=IMAP_SECURITY_SSL + _("IMAP security"), + choices=IMAP_SECURITY_OPTIONS, + default=IMAP_SECURITY_SSL, ) username = models.CharField(_("username"), max_length=256) @@ -48,7 +48,7 @@ class MailAccount(models.Model): default="UTF-8", help_text=_( "The character set to use when communicating with the " - "mail server, such as 'UTF-8' or 'US-ASCII'." + "mail server, such as 'UTF-8' or 'US-ASCII'.", ), ) @@ -123,13 +123,22 @@ class MailRule(models.Model): ) filter_from = models.CharField( - _("filter from"), max_length=256, null=True, blank=True + _("filter from"), + max_length=256, + null=True, + blank=True, ) filter_subject = models.CharField( - _("filter subject"), max_length=256, null=True, blank=True + _("filter subject"), + max_length=256, + null=True, + blank=True, ) filter_body = models.CharField( - _("filter body"), max_length=256, null=True, blank=True + _("filter body"), + max_length=256, + null=True, + blank=True, ) filter_attachment_filename = models.CharField( @@ -140,12 +149,14 @@ class MailRule(models.Model): help_text=_( "Only consume documents which entirely match this " "filename if specified. Wildcards such as *.pdf or " - "*invoice* are allowed. Case insensitive." + "*invoice* are allowed. Case insensitive.", ), ) maximum_age = models.PositiveIntegerField( - _("maximum age"), default=30, help_text=_("Specified in days.") + _("maximum age"), + default=30, + help_text=_("Specified in days."), ) attachment_type = models.PositiveIntegerField( @@ -154,7 +165,7 @@ class MailRule(models.Model): default=ATTACHMENT_TYPE_ATTACHMENTS_ONLY, help_text=_( "Inline attachments include embedded images, so it's best " - "to combine this option with a filename filter." + "to combine this option with a filename filter.", ), ) @@ -173,12 +184,14 @@ class MailRule(models.Model): "Additional parameter for the action selected above, " "i.e., " "the target folder of the move to folder action. " - "Subfolders must be separated by dots." + "Subfolders must be separated by dots.", ), ) assign_title_from = models.PositiveIntegerField( - _("assign title from"), choices=TITLE_SELECTOR, default=TITLE_FROM_SUBJECT + _("assign title from"), + choices=TITLE_SELECTOR, + default=TITLE_FROM_SUBJECT, ) assign_tag = models.ForeignKey( diff --git a/src/paperless_mail/tasks.py b/src/paperless_mail/tasks.py index bbb163ff1..faa0300e8 100644 --- a/src/paperless_mail/tasks.py +++ b/src/paperless_mail/tasks.py @@ -1,6 +1,7 @@ import logging -from paperless_mail.mail import MailAccountHandler, MailError +from paperless_mail.mail import MailAccountHandler +from paperless_mail.mail import MailError from paperless_mail.models import MailAccount diff --git a/src/paperless_mail/tests/test_mail.py b/src/paperless_mail/tests/test_mail.py index 10aa431db..bc56ed647 100644 --- a/src/paperless_mail/tests/test_mail.py +++ b/src/paperless_mail/tests/test_mail.py @@ -7,13 +7,15 @@ from unittest import mock from django.core.management import call_command from django.db import DatabaseError from django.test import TestCase -from imap_tools import MailMessageFlags, MailboxFolderSelectError - from documents.models import Correspondent from documents.tests.utils import DirectoriesMixin +from imap_tools import MailboxFolderSelectError +from imap_tools import MailMessageFlags from paperless_mail import tasks -from paperless_mail.mail import MailError, MailAccountHandler -from paperless_mail.models import MailRule, MailAccount +from paperless_mail.mail import MailAccountHandler +from paperless_mail.mail import MailError +from paperless_mail.models import MailAccount +from paperless_mail.models import MailRule class BogusFolderManager: @@ -83,7 +85,7 @@ class BogusMailBox(ContextManager): def move(self, uid_list, folder): if folder == "spam": self.messages_spam.append( - filter(lambda m: m.uid in uid_list, self.messages) + filter(lambda m: m.uid in uid_list, self.messages), ) self.messages = list(filter(lambda m: m.uid not in uid_list, self.messages)) else: @@ -115,7 +117,9 @@ def create_message( def create_attachment( - filename="the_file.pdf", content_disposition="attachment", payload=b"a PDF document" + filename="the_file.pdf", + content_disposition="attachment", + payload=b"a PDF document", ): attachment = namedtuple("Attachment", []) attachment.filename = filename @@ -163,7 +167,7 @@ class TestMail(DirectoriesMixin, TestCase): body="cables", seen=True, flagged=False, - ) + ), ) self.bogus_mailbox.messages.append( create_message( @@ -171,14 +175,14 @@ class TestMail(DirectoriesMixin, TestCase): body="from my favorite electronic store", seen=False, flagged=True, - ) + ), ) self.bogus_mailbox.messages.append( create_message( subject="Claim your $10M price now!", from_="amazon@amazon-some-indian-site.org", seen=False, - ) + ), ) def test_get_correspondent(self): @@ -196,12 +200,14 @@ class TestMail(DirectoriesMixin, TestCase): handler = MailAccountHandler() rule = MailRule( - name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING + name="a", + assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING, ) self.assertIsNone(handler.get_correspondent(message, rule)) rule = MailRule( - name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL + name="b", + assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL, ) c = handler.get_correspondent(message, rule) self.assertIsNotNone(c) @@ -212,7 +218,8 @@ class TestMail(DirectoriesMixin, TestCase): self.assertEqual(c.id, me_localhost.id) rule = MailRule( - name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME + name="c", + assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME, ) c = handler.get_correspondent(message, rule) self.assertIsNotNone(c) @@ -244,7 +251,9 @@ class TestMail(DirectoriesMixin, TestCase): def test_handle_message(self): message = create_message( - subject="the message title", from_="Myself", num_attachments=2 + subject="the message title", + from_="Myself", + num_attachments=2, ) account = MailAccount() @@ -376,11 +385,16 @@ class TestMail(DirectoriesMixin, TestCase): def test_handle_mail_account_mark_read(self): account = MailAccount.objects.create( - name="test", imap_server="", username="admin", password="secret" + name="test", + imap_server="", + username="admin", + password="secret", ) rule = MailRule.objects.create( - name="testrule", account=account, action=MailRule.ACTION_MARK_READ + name="testrule", + account=account, + action=MailRule.ACTION_MARK_READ, ) self.assertEqual(len(self.bogus_mailbox.messages), 3) @@ -394,7 +408,10 @@ class TestMail(DirectoriesMixin, TestCase): def test_handle_mail_account_delete(self): account = MailAccount.objects.create( - name="test", imap_server="", username="admin", password="secret" + name="test", + imap_server="", + username="admin", + password="secret", ) rule = MailRule.objects.create( @@ -412,7 +429,10 @@ class TestMail(DirectoriesMixin, TestCase): def test_handle_mail_account_flag(self): account = MailAccount.objects.create( - name="test", imap_server="", username="admin", password="secret" + name="test", + imap_server="", + username="admin", + password="secret", ) rule = MailRule.objects.create( @@ -432,7 +452,10 @@ class TestMail(DirectoriesMixin, TestCase): def test_handle_mail_account_move(self): account = MailAccount.objects.create( - name="test", imap_server="", username="admin", password="secret" + name="test", + imap_server="", + username="admin", + password="secret", ) rule = MailRule.objects.create( @@ -453,7 +476,10 @@ class TestMail(DirectoriesMixin, TestCase): def test_error_login(self): account = MailAccount.objects.create( - name="test", imap_server="", username="admin", password="wrong" + name="test", + imap_server="", + username="admin", + password="wrong", ) try: @@ -465,11 +491,17 @@ class TestMail(DirectoriesMixin, TestCase): def test_error_skip_account(self): account_faulty = MailAccount.objects.create( - name="test", imap_server="", username="admin", password="wroasdng" + name="test", + imap_server="", + username="admin", + password="wroasdng", ) account = MailAccount.objects.create( - name="test2", imap_server="", username="admin", password="secret" + name="test2", + imap_server="", + username="admin", + password="secret", ) rule = MailRule.objects.create( name="testrule", @@ -487,7 +519,10 @@ class TestMail(DirectoriesMixin, TestCase): def test_error_skip_rule(self): account = MailAccount.objects.create( - name="test2", imap_server="", username="admin", password="secret" + name="test2", + imap_server="", + username="admin", + password="secret", ) rule = MailRule.objects.create( name="testrule", @@ -523,7 +558,10 @@ class TestMail(DirectoriesMixin, TestCase): m.side_effect = get_correspondent_fake account = MailAccount.objects.create( - name="test2", imap_server="", username="admin", password="secret" + name="test2", + imap_server="", + username="admin", + password="secret", ) rule = MailRule.objects.create( name="testrule", @@ -544,7 +582,10 @@ class TestMail(DirectoriesMixin, TestCase): def test_error_create_correspondent(self): account = MailAccount.objects.create( - name="test2", imap_server="", username="admin", password="secret" + name="test2", + imap_server="", + username="admin", + password="secret", ) rule = MailRule.objects.create( name="testrule", @@ -579,7 +620,10 @@ class TestMail(DirectoriesMixin, TestCase): def test_filters(self): account = MailAccount.objects.create( - name="test3", imap_server="", username="admin", password="secret" + name="test3", + imap_server="", + username="admin", + password="secret", ) rule = MailRule.objects.create( name="testrule3", @@ -629,7 +673,7 @@ class TestMail(DirectoriesMixin, TestCase): class TestManagementCommand(TestCase): @mock.patch( - "paperless_mail.management.commands.mail_fetcher.tasks.process_mail_accounts" + "paperless_mail.management.commands.mail_fetcher.tasks.process_mail_accounts", ) def test_mail_fetcher(self, m): @@ -644,10 +688,16 @@ class TestTasks(TestCase): m.side_effect = lambda account: 6 MailAccount.objects.create( - name="A", imap_server="A", username="A", password="A" + name="A", + imap_server="A", + username="A", + password="A", ) MailAccount.objects.create( - name="B", imap_server="A", username="A", password="A" + name="B", + imap_server="A", + username="A", + password="A", ) result = tasks.process_mail_accounts() @@ -663,7 +713,10 @@ class TestTasks(TestCase): def test_single_accounts(self, m): MailAccount.objects.create( - name="A", imap_server="A", username="A", password="A" + name="A", + imap_server="A", + username="A", + password="A", ) tasks.process_mail_account("A") diff --git a/src/paperless_tesseract/__init__.py b/src/paperless_tesseract/__init__.py index 5c9f358c3..1e857428c 100644 --- a/src/paperless_tesseract/__init__.py +++ b/src/paperless_tesseract/__init__.py @@ -1,2 +1,5 @@ # this is here so that django finds the checks. -from .checks import * +from .checks import check_default_language_available +from .checks import get_tesseract_langs + +__all__ = ["get_tesseract_langs", "check_default_language_available"] diff --git a/src/paperless_tesseract/apps.py b/src/paperless_tesseract/apps.py index 67b90f006..02045758b 100644 --- a/src/paperless_tesseract/apps.py +++ b/src/paperless_tesseract/apps.py @@ -1,5 +1,4 @@ from django.apps import AppConfig - from paperless_tesseract.signals import tesseract_consumer_declaration diff --git a/src/paperless_tesseract/checks.py b/src/paperless_tesseract/checks.py index e627aa0ac..99780cad4 100644 --- a/src/paperless_tesseract/checks.py +++ b/src/paperless_tesseract/checks.py @@ -1,7 +1,9 @@ import subprocess from django.conf import settings -from django.core.checks import Error, Warning, register +from django.core.checks import Error +from django.core.checks import register +from django.core.checks import Warning def get_tesseract_langs(): @@ -19,8 +21,8 @@ def check_default_language_available(app_configs, **kwargs): return [ Warning( "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. " - "This means that tesseract will fallback to english." - ) + "This means that tesseract will fallback to english.", + ), ] specified_langs = settings.OCR_LANGUAGE.split("+") @@ -31,8 +33,8 @@ def check_default_language_available(app_configs, **kwargs): Error( f"The selected ocr language {lang} is " f"not installed. Paperless cannot OCR your documents " - f"without it. Please fix PAPERLESS_OCR_LANGUAGE." - ) + f"without it. Please fix PAPERLESS_OCR_LANGUAGE.", + ), ] return [] diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index f79e086da..ad167ecf0 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -2,10 +2,11 @@ import json import os import re -from PIL import Image from django.conf import settings - -from documents.parsers import DocumentParser, ParseError, make_thumbnail_from_pdf +from documents.parsers import DocumentParser +from documents.parsers import make_thumbnail_from_pdf +from documents.parsers import ParseError +from PIL import Image class NoTextFoundException(Exception): @@ -42,7 +43,7 @@ class RasterisedDocumentParser(DocumentParser): "prefix": meta.REVERSE_NS[m.group(1)], "key": m.group(2), "value": value, - } + }, ) except Exception as e: self.log( @@ -53,7 +54,9 @@ class RasterisedDocumentParser(DocumentParser): def get_thumbnail(self, document_path, mime_type, file_name=None): return make_thumbnail_from_pdf( - self.archive_path or document_path, self.tempdir, self.logging_group + self.archive_path or document_path, + self.tempdir, + self.logging_group, ) def is_image(self, mime_type): @@ -110,7 +113,6 @@ class RasterisedDocumentParser(DocumentParser): return None from pdfminer.high_level import extract_text as pdfminer_extract_text - from pdfminer.pdftypes import PDFException try: stripped = post_process_text(pdfminer_extract_text(pdf_file)) @@ -129,7 +131,12 @@ class RasterisedDocumentParser(DocumentParser): return None def construct_ocrmypdf_parameters( - self, input_file, mime_type, output_file, sidecar_file, safe_fallback=False + self, + input_file, + mime_type, + output_file, + sidecar_file, + safe_fallback=False, ): ocrmypdf_args = { "input_file": input_file, @@ -167,7 +174,7 @@ class RasterisedDocumentParser(DocumentParser): ocrmypdf_args["rotate_pages"] = True ocrmypdf_args[ "rotate_pages_threshold" - ] = settings.OCR_ROTATE_PAGES_THRESHOLD # NOQA: E501 + ] = settings.OCR_ROTATE_PAGES_THRESHOLD if settings.OCR_PAGES > 0: ocrmypdf_args["pages"] = f"1-{settings.OCR_PAGES}" @@ -202,7 +209,7 @@ class RasterisedDocumentParser(DocumentParser): raise ParseError( f"Cannot produce archive PDF for image {input_file}, " f"no DPI information is present in this image and " - f"OCR_IMAGE_DPI is not set." + f"OCR_IMAGE_DPI is not set.", ) if settings.OCR_USER_ARGS and not safe_fallback: @@ -241,7 +248,10 @@ class RasterisedDocumentParser(DocumentParser): sidecar_file = os.path.join(self.tempdir, "sidecar.txt") args = self.construct_ocrmypdf_parameters( - document_path, mime_type, archive_path, sidecar_file + document_path, + mime_type, + archive_path, + sidecar_file, ) try: @@ -289,7 +299,8 @@ class RasterisedDocumentParser(DocumentParser): # is bigger and blurry due to --force-ocr. self.text = self.extract_text( - sidecar_file_fallback, archive_path_fallback + sidecar_file_fallback, + archive_path_fallback, ) except Exception as e: diff --git a/src/paperless_tesseract/tests/test_checks.py b/src/paperless_tesseract/tests/test_checks.py index 31d60f4ee..cfac11d3c 100644 --- a/src/paperless_tesseract/tests/test_checks.py +++ b/src/paperless_tesseract/tests/test_checks.py @@ -1,8 +1,8 @@ from unittest import mock from django.core.checks import ERROR -from django.test import TestCase, override_settings - +from django.test import override_settings +from django.test import TestCase from paperless_tesseract import check_default_language_available @@ -16,8 +16,8 @@ class TestChecks(TestCase): self.assertEqual(len(msgs), 1) self.assertTrue( msgs[0].msg.startswith( - "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE" - ) + "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE", + ), ) @override_settings(OCR_LANGUAGE="ita") diff --git a/src/paperless_tesseract/tests/test_parser.py b/src/paperless_tesseract/tests/test_parser.py index 9b59c324d..97ec26ca9 100644 --- a/src/paperless_tesseract/tests/test_parser.py +++ b/src/paperless_tesseract/tests/test_parser.py @@ -3,11 +3,13 @@ import uuid from typing import ContextManager from unittest import mock -from django.test import TestCase, override_settings - -from documents.parsers import ParseError, run_convert +from django.test import override_settings +from django.test import TestCase +from documents.parsers import ParseError +from documents.parsers import run_convert from documents.tests.utils import DirectoriesMixin -from paperless_tesseract.parsers import RasterisedDocumentParser, post_process_text +from paperless_tesseract.parsers import post_process_text +from paperless_tesseract.parsers import RasterisedDocumentParser image_to_string_calls = [] @@ -56,7 +58,9 @@ class TestParser(DirectoriesMixin, TestCase): result, actual_result, "strip_exceess_whitespace({}) != '{}', but '{}'".format( - source, result, actual_result + source, + result, + actual_result, ), ) @@ -65,7 +69,8 @@ class TestParser(DirectoriesMixin, TestCase): def test_get_text_from_pdf(self): parser = RasterisedDocumentParser(uuid.uuid4()) text = parser.extract_text( - None, os.path.join(self.SAMPLE_FILES, "simple-digital.pdf") + None, + os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), ) self.assertContainsStrings(text.strip(), ["This is a test document."]) @@ -73,7 +78,8 @@ class TestParser(DirectoriesMixin, TestCase): def test_thumbnail(self): parser = RasterisedDocumentParser(uuid.uuid4()) thumb = parser.get_thumbnail( - os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), + "application/pdf", ) self.assertTrue(os.path.isfile(thumb)) @@ -89,14 +95,16 @@ class TestParser(DirectoriesMixin, TestCase): parser = RasterisedDocumentParser(uuid.uuid4()) thumb = parser.get_thumbnail( - os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), + "application/pdf", ) self.assertTrue(os.path.isfile(thumb)) def test_thumbnail_encrypted(self): parser = RasterisedDocumentParser(uuid.uuid4()) thumb = parser.get_thumbnail( - os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), + "application/pdf", ) self.assertTrue(os.path.isfile(thumb)) @@ -113,7 +121,8 @@ class TestParser(DirectoriesMixin, TestCase): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), + "application/pdf", ) self.assertTrue(os.path.isfile(parser.archive_path)) @@ -124,7 +133,8 @@ class TestParser(DirectoriesMixin, TestCase): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "with-form.pdf"), + "application/pdf", ) self.assertTrue(os.path.isfile(parser.archive_path)) @@ -139,7 +149,8 @@ class TestParser(DirectoriesMixin, TestCase): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "with-form.pdf"), + "application/pdf", ) self.assertIsNone(parser.archive_path) @@ -168,7 +179,8 @@ class TestParser(DirectoriesMixin, TestCase): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), + "application/pdf", ) self.assertIsNone(parser.archive_path) @@ -178,7 +190,8 @@ class TestParser(DirectoriesMixin, TestCase): def test_with_form_error_notext(self): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "with-form.pdf"), + "application/pdf", ) self.assertContainsStrings( @@ -191,7 +204,8 @@ class TestParser(DirectoriesMixin, TestCase): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "with-form.pdf"), + "application/pdf", ) self.assertContainsStrings( @@ -221,7 +235,7 @@ class TestParser(DirectoriesMixin, TestCase): parser = RasterisedDocumentParser(None) dpi = parser.calculate_a4_dpi( - os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png") + os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), ) self.assertEqual(dpi, 62) @@ -233,7 +247,8 @@ class TestParser(DirectoriesMixin, TestCase): def f(): parser.parse( - os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), "image/png" + os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), + "image/png", ) self.assertRaises(ParseError, f) @@ -247,68 +262,80 @@ class TestParser(DirectoriesMixin, TestCase): self.assertTrue(os.path.isfile(parser.archive_path)) self.assertContainsStrings( - parser.get_text().lower(), ["this is a test document."] + parser.get_text().lower(), + ["this is a test document."], ) def test_multi_page(self): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), + "application/pdf", ) self.assertTrue(os.path.isfile(parser.archive_path)) self.assertContainsStrings( - parser.get_text().lower(), ["page 1", "page 2", "page 3"] + parser.get_text().lower(), + ["page 1", "page 2", "page 3"], ) @override_settings(OCR_PAGES=2, OCR_MODE="skip") def test_multi_page_pages_skip(self): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), + "application/pdf", ) self.assertTrue(os.path.isfile(parser.archive_path)) self.assertContainsStrings( - parser.get_text().lower(), ["page 1", "page 2", "page 3"] + parser.get_text().lower(), + ["page 1", "page 2", "page 3"], ) @override_settings(OCR_PAGES=2, OCR_MODE="redo") def test_multi_page_pages_redo(self): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), + "application/pdf", ) self.assertTrue(os.path.isfile(parser.archive_path)) self.assertContainsStrings( - parser.get_text().lower(), ["page 1", "page 2", "page 3"] + parser.get_text().lower(), + ["page 1", "page 2", "page 3"], ) @override_settings(OCR_PAGES=2, OCR_MODE="force") def test_multi_page_pages_force(self): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), + "application/pdf", ) self.assertTrue(os.path.isfile(parser.archive_path)) self.assertContainsStrings( - parser.get_text().lower(), ["page 1", "page 2", "page 3"] + parser.get_text().lower(), + ["page 1", "page 2", "page 3"], ) @override_settings(OOCR_MODE="skip") def test_multi_page_analog_pages_skip(self): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), + "application/pdf", ) self.assertTrue(os.path.isfile(parser.archive_path)) self.assertContainsStrings( - parser.get_text().lower(), ["page 1", "page 2", "page 3"] + parser.get_text().lower(), + ["page 1", "page 2", "page 3"], ) @override_settings(OCR_PAGES=2, OCR_MODE="redo") def test_multi_page_analog_pages_redo(self): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), + "application/pdf", ) self.assertTrue(os.path.isfile(parser.archive_path)) self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"]) @@ -318,7 +345,8 @@ class TestParser(DirectoriesMixin, TestCase): def test_multi_page_analog_pages_force(self): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), + "application/pdf", ) self.assertTrue(os.path.isfile(parser.archive_path)) self.assertContainsStrings(parser.get_text().lower(), ["page 1"]) @@ -329,29 +357,34 @@ class TestParser(DirectoriesMixin, TestCase): def test_skip_noarchive_withtext(self): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), + "application/pdf", ) self.assertIsNone(parser.archive_path) self.assertContainsStrings( - parser.get_text().lower(), ["page 1", "page 2", "page 3"] + parser.get_text().lower(), + ["page 1", "page 2", "page 3"], ) @override_settings(OCR_MODE="skip_noarchive") def test_skip_noarchive_notext(self): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), + "application/pdf", ) self.assertTrue(os.path.isfile(parser.archive_path)) self.assertContainsStrings( - parser.get_text().lower(), ["page 1", "page 2", "page 3"] + parser.get_text().lower(), + ["page 1", "page 2", "page 3"], ) @override_settings(OCR_MODE="skip") def test_multi_page_mixed(self): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), + "application/pdf", ) self.assertTrue(os.path.isfile(parser.archive_path)) self.assertContainsStrings( @@ -368,11 +401,13 @@ class TestParser(DirectoriesMixin, TestCase): def test_multi_page_mixed_no_archive(self): parser = RasterisedDocumentParser(None) parser.parse( - os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), "application/pdf" + os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), + "application/pdf", ) self.assertIsNone(parser.archive_path) self.assertContainsStrings( - parser.get_text().lower(), ["page 4", "page 5", "page 6"] + parser.get_text().lower(), + ["page 4", "page 5", "page 6"], ) @override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True) diff --git a/src/paperless_text/apps.py b/src/paperless_text/apps.py index 1acc361aa..61f5eb7ef 100644 --- a/src/paperless_text/apps.py +++ b/src/paperless_text/apps.py @@ -1,5 +1,4 @@ from django.apps import AppConfig - from paperless_text.signals import text_consumer_declaration diff --git a/src/paperless_text/parsers.py b/src/paperless_text/parsers.py index 86d4e8d43..a0f19c020 100644 --- a/src/paperless_text/parsers.py +++ b/src/paperless_text/parsers.py @@ -1,9 +1,10 @@ import os -from PIL import ImageDraw, ImageFont, Image from django.conf import settings - from documents.parsers import DocumentParser +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFont class TextDocumentParser(DocumentParser): diff --git a/src/paperless_text/tests/test_parser.py b/src/paperless_text/tests/test_parser.py index f63c327cb..d952ceb9a 100644 --- a/src/paperless_text/tests/test_parser.py +++ b/src/paperless_text/tests/test_parser.py @@ -1,7 +1,6 @@ import os from django.test import TestCase - from documents.tests.utils import DirectoriesMixin from paperless_text.parsers import TextDocumentParser @@ -13,7 +12,8 @@ class TestTextParser(DirectoriesMixin, TestCase): # just make sure that it does not crash f = parser.get_thumbnail( - os.path.join(os.path.dirname(__file__), "samples", "test.txt"), "text/plain" + os.path.join(os.path.dirname(__file__), "samples", "test.txt"), + "text/plain", ) self.assertTrue(os.path.isfile(f)) @@ -22,7 +22,8 @@ class TestTextParser(DirectoriesMixin, TestCase): parser = TextDocumentParser(None) parser.parse( - os.path.join(os.path.dirname(__file__), "samples", "test.txt"), "text/plain" + os.path.join(os.path.dirname(__file__), "samples", "test.txt"), + "text/plain", ) self.assertEqual(parser.get_text(), "This is a test file.\n") diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index 5dff20098..22218dfe7 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -1,10 +1,11 @@ import os -import requests + import dateutil.parser - +import requests from django.conf import settings - -from documents.parsers import DocumentParser, ParseError, make_thumbnail_from_pdf +from documents.parsers import DocumentParser +from documents.parsers import make_thumbnail_from_pdf +from documents.parsers import ParseError from tika import parser @@ -20,7 +21,9 @@ class TikaDocumentParser(DocumentParser): self.archive_path = self.convert_to_pdf(document_path, file_name) return make_thumbnail_from_pdf( - self.archive_path, self.tempdir, self.logging_group + self.archive_path, + self.tempdir, + self.logging_group, ) def extract_metadata(self, document_path, mime_type): @@ -53,7 +56,7 @@ class TikaDocumentParser(DocumentParser): except Exception as err: raise ParseError( f"Could not parse {document_path} with tika server at " - f"{tika_server}: {err}" + f"{tika_server}: {err}", ) self.text = parsed["content"].strip() @@ -74,22 +77,23 @@ class TikaDocumentParser(DocumentParser): url = gotenberg_server + "/forms/libreoffice/convert" self.log("info", f"Converting {document_path} to PDF as {pdf_path}") - files = { - "files": ( - file_name or os.path.basename(document_path), - open(document_path, "rb"), - ) - } - headers = {} + with open(document_path, "rb") as document_handle: + files = { + "files": ( + file_name or os.path.basename(document_path), + document_handle, + ), + } + headers = {} - try: - response = requests.post(url, files=files, headers=headers) - response.raise_for_status() # ensure we notice bad responses - except Exception as err: - raise ParseError(f"Error while converting document to PDF: {err}") + try: + response = requests.post(url, files=files, headers=headers) + response.raise_for_status() # ensure we notice bad responses + except Exception as err: + raise ParseError(f"Error while converting document to PDF: {err}") - file = open(pdf_path, "wb") - file.write(response.content) - file.close() + with open(pdf_path, "wb") as file: + file.write(response.content) + file.close() return pdf_path diff --git a/src/paperless_tika/signals.py b/src/paperless_tika/signals.py index 6b1698f2d..39838f076 100644 --- a/src/paperless_tika/signals.py +++ b/src/paperless_tika/signals.py @@ -10,12 +10,12 @@ def tika_consumer_declaration(sender, **kwargs): "weight": 10, "mime_types": { "application/msword": ".doc", - "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", # NOQA: E501 + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", # noqa: E501 "application/vnd.ms-excel": ".xls", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", # NOQA: E501 + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", # noqa: E501 "application/vnd.ms-powerpoint": ".ppt", - "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", # NOQA: E501 - "application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx", # NOQA: E501 + "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", # noqa: E501 + "application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx", # noqa: E501 "application/vnd.oasis.opendocument.presentation": ".odp", "application/vnd.oasis.opendocument.spreadsheet": ".ods", "application/vnd.oasis.opendocument.text": ".odt", diff --git a/src/paperless_tika/tests/test_tika_parser.py b/src/paperless_tika/tests/test_tika_parser.py index 7eaaab25e..bf6b4e7c8 100644 --- a/src/paperless_tika/tests/test_tika_parser.py +++ b/src/paperless_tika/tests/test_tika_parser.py @@ -4,9 +4,8 @@ from pathlib import Path from unittest import mock from django.test import TestCase -from requests import Response - from paperless_tika.parsers import TikaDocumentParser +from requests import Response class TestTikaParser(TestCase): @@ -42,14 +41,15 @@ class TestTikaParser(TestCase): @mock.patch("paperless_tika.parsers.parser.from_file") def test_metadata(self, from_file): from_file.return_value = { - "metadata": {"Creation-Date": "2020-11-21", "Some-key": "value"} + "metadata": {"Creation-Date": "2020-11-21", "Some-key": "value"}, } file = os.path.join(self.parser.tempdir, "input.odt") Path(file).touch() metadata = self.parser.extract_metadata( - file, "application/vnd.oasis.opendocument.text" + file, + "application/vnd.oasis.opendocument.text", ) self.assertTrue("Creation-Date" in [m["key"] for m in metadata])