mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Runs the pre-commit hooks over all the Python files
This commit is contained in:
		| @@ -1,2 +1,5 @@ | ||||
| # this is here so that django finds the checks. | ||||
| from .checks import * | ||||
| from .checks import changed_password_check | ||||
| from .checks import parser_check | ||||
|  | ||||
| __all__ = ["changed_password_check", "parser_check"] | ||||
|   | ||||
| @@ -1,13 +1,11 @@ | ||||
| from django.contrib import admin | ||||
|  | ||||
| from .models import ( | ||||
|     Correspondent, | ||||
|     Document, | ||||
|     DocumentType, | ||||
|     Tag, | ||||
|     SavedView, | ||||
|     SavedViewFilterRule, | ||||
| ) | ||||
| from .models import Correspondent | ||||
| from .models import Document | ||||
| from .models import DocumentType | ||||
| from .models import SavedView | ||||
| from .models import SavedViewFilterRule | ||||
| from .models import Tag | ||||
|  | ||||
|  | ||||
| class CorrespondentAdmin(admin.ModelAdmin): | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| from django.apps import AppConfig | ||||
|  | ||||
| from django.utils.translation import gettext_lazy as _ | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -8,7 +8,10 @@ class BulkArchiveStrategy: | ||||
|         self.zipf = zipf | ||||
|  | ||||
|     def make_unique_filename( | ||||
|         self, doc: Document, archive: bool = False, folder: str = "" | ||||
|         self, | ||||
|         doc: Document, | ||||
|         archive: bool = False, | ||||
|         folder: str = "", | ||||
|     ): | ||||
|         counter = 0 | ||||
|         while True: | ||||
| @@ -34,7 +37,8 @@ class ArchiveOnlyStrategy(BulkArchiveStrategy): | ||||
|     def add_document(self, doc: Document): | ||||
|         if doc.has_archive_version: | ||||
|             self.zipf.write( | ||||
|                 doc.archive_path, self.make_unique_filename(doc, archive=True) | ||||
|                 doc.archive_path, | ||||
|                 self.make_unique_filename(doc, archive=True), | ||||
|             ) | ||||
|         else: | ||||
|             self.zipf.write(doc.source_path, self.make_unique_filename(doc)) | ||||
| @@ -49,5 +53,6 @@ class OriginalAndArchiveStrategy(BulkArchiveStrategy): | ||||
|             ) | ||||
|  | ||||
|         self.zipf.write( | ||||
|             doc.source_path, self.make_unique_filename(doc, folder="originals/") | ||||
|             doc.source_path, | ||||
|             self.make_unique_filename(doc, folder="originals/"), | ||||
|         ) | ||||
|   | ||||
| @@ -2,8 +2,9 @@ import itertools | ||||
|  | ||||
| from django.db.models import Q | ||||
| from django_q.tasks import async_task | ||||
|  | ||||
| from documents.models import Document, Correspondent, DocumentType | ||||
| from documents.models import Correspondent | ||||
| from documents.models import Document | ||||
| from documents.models import DocumentType | ||||
|  | ||||
|  | ||||
| def set_correspondent(doc_ids, correspondent): | ||||
| @@ -40,7 +41,7 @@ def add_tag(doc_ids, tag): | ||||
|     DocumentTagRelationship = Document.tags.through | ||||
|  | ||||
|     DocumentTagRelationship.objects.bulk_create( | ||||
|         [DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs] | ||||
|         [DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs], | ||||
|     ) | ||||
|  | ||||
|     async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs) | ||||
| @@ -56,7 +57,7 @@ def remove_tag(doc_ids, tag): | ||||
|     DocumentTagRelationship = Document.tags.through | ||||
|  | ||||
|     DocumentTagRelationship.objects.filter( | ||||
|         Q(document_id__in=affected_docs) & Q(tag_id=tag) | ||||
|         Q(document_id__in=affected_docs) & Q(tag_id=tag), | ||||
|     ).delete() | ||||
|  | ||||
|     async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs) | ||||
|   | ||||
| @@ -1,10 +1,11 @@ | ||||
| import textwrap | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core.checks import Error, register | ||||
| from django.core.checks import Error | ||||
| from django.core.checks import register | ||||
| from django.core.exceptions import FieldError | ||||
| from django.db.utils import OperationalError, ProgrammingError | ||||
|  | ||||
| from django.db.utils import OperationalError | ||||
| from django.db.utils import ProgrammingError | ||||
| from documents.signals import document_consumer_declaration | ||||
|  | ||||
|  | ||||
| @@ -16,7 +17,7 @@ def changed_password_check(app_configs, **kwargs): | ||||
|  | ||||
|     try: | ||||
|         encrypted_doc = Document.objects.filter( | ||||
|             storage_type=Document.STORAGE_TYPE_GPG | ||||
|             storage_type=Document.STORAGE_TYPE_GPG, | ||||
|         ).first() | ||||
|     except (OperationalError, ProgrammingError, FieldError): | ||||
|         return []  # No documents table yet | ||||
| @@ -27,8 +28,8 @@ def changed_password_check(app_configs, **kwargs): | ||||
|             return [ | ||||
|                 Error( | ||||
|                     "The database contains encrypted documents but no password " | ||||
|                     "is set." | ||||
|                 ) | ||||
|                     "is set.", | ||||
|                 ), | ||||
|             ] | ||||
|  | ||||
|         if not GnuPG.decrypted(encrypted_doc.source_file): | ||||
| @@ -42,9 +43,9 @@ def changed_password_check(app_configs, **kwargs): | ||||
|                 If you intend to change your password, you must first export | ||||
|                 all of the old documents, start fresh with the new password | ||||
|                 and then re-import them." | ||||
|                 """ | ||||
|                     ) | ||||
|                 ) | ||||
|                 """, | ||||
|                     ), | ||||
|                 ), | ||||
|             ] | ||||
|  | ||||
|     return [] | ||||
| @@ -61,8 +62,8 @@ def parser_check(app_configs, **kwargs): | ||||
|         return [ | ||||
|             Error( | ||||
|                 "No parsers found. This is a bug. The consumer won't be " | ||||
|                 "able to consume any documents without parsers." | ||||
|             ) | ||||
|                 "able to consume any documents without parsers.", | ||||
|             ), | ||||
|         ] | ||||
|     else: | ||||
|         return [] | ||||
|   | ||||
| @@ -6,8 +6,8 @@ import re | ||||
| import shutil | ||||
|  | ||||
| from django.conf import settings | ||||
|  | ||||
| from documents.models import Document, MatchingModel | ||||
| from documents.models import Document | ||||
| from documents.models import MatchingModel | ||||
|  | ||||
|  | ||||
| class IncompatibleClassifierVersionError(Exception): | ||||
| @@ -30,8 +30,8 @@ def preprocess_content(content): | ||||
| def load_classifier(): | ||||
|     if not os.path.isfile(settings.MODEL_FILE): | ||||
|         logger.debug( | ||||
|             f"Document classification model does not exist (yet), not " | ||||
|             f"performing automatic matching." | ||||
|             "Document classification model does not exist (yet), not " | ||||
|             "performing automatic matching.", | ||||
|         ) | ||||
|         return None | ||||
|  | ||||
| @@ -42,16 +42,16 @@ def load_classifier(): | ||||
|     except (ClassifierModelCorruptError, IncompatibleClassifierVersionError): | ||||
|         # there's something wrong with the model file. | ||||
|         logger.exception( | ||||
|             f"Unrecoverable error while loading document " | ||||
|             f"classification model, deleting model file." | ||||
|             "Unrecoverable error while loading document " | ||||
|             "classification model, deleting model file.", | ||||
|         ) | ||||
|         os.unlink(settings.MODEL_FILE) | ||||
|         classifier = None | ||||
|     except OSError: | ||||
|         logger.exception(f"IO error while loading document classification model") | ||||
|         logger.exception("IO error while loading document classification model") | ||||
|         classifier = None | ||||
|     except Exception: | ||||
|         logger.exception(f"Unknown error while loading document classification model") | ||||
|         logger.exception("Unknown error while loading document classification model") | ||||
|         classifier = None | ||||
|  | ||||
|     return classifier | ||||
| @@ -78,7 +78,7 @@ class DocumentClassifier(object): | ||||
|  | ||||
|             if schema_version != self.FORMAT_VERSION: | ||||
|                 raise IncompatibleClassifierVersionError( | ||||
|                     "Cannor load classifier, incompatible versions." | ||||
|                     "Cannor load classifier, incompatible versions.", | ||||
|                 ) | ||||
|             else: | ||||
|                 try: | ||||
| @@ -122,8 +122,8 @@ class DocumentClassifier(object): | ||||
|         logger.debug("Gathering data from database...") | ||||
|         m = hashlib.sha1() | ||||
|         for doc in Document.objects.order_by("pk").exclude( | ||||
|             tags__is_inbox_tag=True | ||||
|         ):  # NOQA: E501 | ||||
|             tags__is_inbox_tag=True, | ||||
|         ): | ||||
|             preprocessed_content = preprocess_content(doc.content) | ||||
|             m.update(preprocessed_content.encode("utf-8")) | ||||
|             data.append(preprocessed_content) | ||||
| @@ -146,9 +146,9 @@ class DocumentClassifier(object): | ||||
|                 [ | ||||
|                     tag.pk | ||||
|                     for tag in doc.tags.filter( | ||||
|                         matching_algorithm=MatchingModel.MATCH_AUTO | ||||
|                         matching_algorithm=MatchingModel.MATCH_AUTO, | ||||
|                     ) | ||||
|                 ] | ||||
|                 ], | ||||
|             ) | ||||
|             for tag in tags: | ||||
|                 m.update(tag.to_bytes(4, "little", signed=True)) | ||||
| @@ -177,8 +177,11 @@ class DocumentClassifier(object): | ||||
|         logger.debug( | ||||
|             "{} documents, {} tag(s), {} correspondent(s), " | ||||
|             "{} document type(s).".format( | ||||
|                 len(data), num_tags, num_correspondents, num_document_types | ||||
|             ) | ||||
|                 len(data), | ||||
|                 num_tags, | ||||
|                 num_correspondents, | ||||
|                 num_document_types, | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|         from sklearn.feature_extraction.text import CountVectorizer | ||||
| @@ -188,7 +191,9 @@ class DocumentClassifier(object): | ||||
|         # Step 2: vectorize data | ||||
|         logger.debug("Vectorizing data...") | ||||
|         self.data_vectorizer = CountVectorizer( | ||||
|             analyzer="word", ngram_range=(1, 2), min_df=0.01 | ||||
|             analyzer="word", | ||||
|             ngram_range=(1, 2), | ||||
|             min_df=0.01, | ||||
|         ) | ||||
|         data_vectorized = self.data_vectorizer.fit_transform(data) | ||||
|  | ||||
| @@ -204,7 +209,7 @@ class DocumentClassifier(object): | ||||
|                 ] | ||||
|                 self.tags_binarizer = LabelBinarizer() | ||||
|                 labels_tags_vectorized = self.tags_binarizer.fit_transform( | ||||
|                     labels_tags | ||||
|                     labels_tags, | ||||
|                 ).ravel() | ||||
|             else: | ||||
|                 self.tags_binarizer = MultiLabelBinarizer() | ||||
| @@ -223,7 +228,8 @@ class DocumentClassifier(object): | ||||
|         else: | ||||
|             self.correspondent_classifier = None | ||||
|             logger.debug( | ||||
|                 "There are no correspondents. Not training correspondent " "classifier." | ||||
|                 "There are no correspondents. Not training correspondent " | ||||
|                 "classifier.", | ||||
|             ) | ||||
|  | ||||
|         if num_document_types > 0: | ||||
| @@ -233,7 +239,8 @@ class DocumentClassifier(object): | ||||
|         else: | ||||
|             self.document_type_classifier = None | ||||
|             logger.debug( | ||||
|                 "There are no document types. Not training document type " "classifier." | ||||
|                 "There are no document types. Not training document type " | ||||
|                 "classifier.", | ||||
|             ) | ||||
|  | ||||
|         self.data_hash = new_data_hash | ||||
|   | ||||
| @@ -15,11 +15,19 @@ from filelock import FileLock | ||||
| from rest_framework.reverse import reverse | ||||
|  | ||||
| from .classifier import load_classifier | ||||
| from .file_handling import create_source_path_directory, generate_unique_filename | ||||
| from .file_handling import create_source_path_directory | ||||
| from .file_handling import generate_unique_filename | ||||
| from .loggers import LoggingMixin | ||||
| from .models import Document, FileInfo, Correspondent, DocumentType, Tag | ||||
| from .parsers import ParseError, get_parser_class_for_mime_type, parse_date | ||||
| from .signals import document_consumption_finished, document_consumption_started | ||||
| from .models import Correspondent | ||||
| from .models import Document | ||||
| from .models import DocumentType | ||||
| from .models import FileInfo | ||||
| from .models import Tag | ||||
| from .parsers import get_parser_class_for_mime_type | ||||
| from .parsers import parse_date | ||||
| from .parsers import ParseError | ||||
| from .signals import document_consumption_finished | ||||
| from .signals import document_consumption_started | ||||
|  | ||||
|  | ||||
| class ConsumerError(Exception): | ||||
| @@ -46,12 +54,15 @@ class Consumer(LoggingMixin): | ||||
|     logging_name = "paperless.consumer" | ||||
|  | ||||
|     def _send_progress( | ||||
|         self, current_progress, max_progress, status, message=None, document_id=None | ||||
|         self, | ||||
|         current_progress, | ||||
|         max_progress, | ||||
|         status, | ||||
|         message=None, | ||||
|         document_id=None, | ||||
|     ): | ||||
|         payload = { | ||||
|             "filename": os.path.basename(self.filename) | ||||
|             if self.filename | ||||
|             else None,  # NOQA: E501 | ||||
|             "filename": os.path.basename(self.filename) if self.filename else None, | ||||
|             "task_id": self.task_id, | ||||
|             "current_progress": current_progress, | ||||
|             "max_progress": max_progress, | ||||
| @@ -60,7 +71,8 @@ class Consumer(LoggingMixin): | ||||
|             "document_id": document_id, | ||||
|         } | ||||
|         async_to_sync(self.channel_layer.group_send)( | ||||
|             "status_updates", {"type": "status_update", "data": payload} | ||||
|             "status_updates", | ||||
|             {"type": "status_update", "data": payload}, | ||||
|         ) | ||||
|  | ||||
|     def _fail(self, message, log_message=None, exc_info=None): | ||||
| @@ -83,15 +95,16 @@ class Consumer(LoggingMixin): | ||||
|     def pre_check_file_exists(self): | ||||
|         if not os.path.isfile(self.path): | ||||
|             self._fail( | ||||
|                 MESSAGE_FILE_NOT_FOUND, f"Cannot consume {self.path}: File not found." | ||||
|                 MESSAGE_FILE_NOT_FOUND, | ||||
|                 f"Cannot consume {self.path}: File not found.", | ||||
|             ) | ||||
|  | ||||
|     def pre_check_duplicate(self): | ||||
|         with open(self.path, "rb") as f: | ||||
|             checksum = hashlib.md5(f.read()).hexdigest() | ||||
|         if Document.objects.filter( | ||||
|             Q(checksum=checksum) | Q(archive_checksum=checksum) | ||||
|         ).exists():  # NOQA: E501 | ||||
|             Q(checksum=checksum) | Q(archive_checksum=checksum), | ||||
|         ).exists(): | ||||
|             if settings.CONSUMER_DELETE_DUPLICATES: | ||||
|                 os.unlink(self.path) | ||||
|             self._fail( | ||||
| @@ -139,7 +152,8 @@ class Consumer(LoggingMixin): | ||||
|             ) | ||||
|  | ||||
|         self.log( | ||||
|             "info", f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}" | ||||
|             "info", | ||||
|             f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}", | ||||
|         ) | ||||
|  | ||||
|         try: | ||||
| @@ -154,7 +168,7 @@ class Consumer(LoggingMixin): | ||||
|                     reverse("document-thumb", kwargs={"pk": document.pk}), | ||||
|                     str(document.correspondent), | ||||
|                     str(",".join(document.tags.all().values_list("name", flat=True))), | ||||
|                 ) | ||||
|                 ), | ||||
|             ).wait() | ||||
|         except Exception as e: | ||||
|             self._fail( | ||||
| @@ -213,7 +227,9 @@ class Consumer(LoggingMixin): | ||||
|         # Notify all listeners that we're going to do some work. | ||||
|  | ||||
|         document_consumption_started.send( | ||||
|             sender=self.__class__, filename=self.path, logging_group=self.logging_group | ||||
|             sender=self.__class__, | ||||
|             filename=self.path, | ||||
|             logging_group=self.logging_group, | ||||
|         ) | ||||
|  | ||||
|         self.run_pre_consume_script() | ||||
| @@ -247,7 +263,9 @@ class Consumer(LoggingMixin): | ||||
|             self.log("debug", f"Generating thumbnail for {self.filename}...") | ||||
|             self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL) | ||||
|             thumbnail = document_parser.get_optimised_thumbnail( | ||||
|                 self.path, mime_type, self.filename | ||||
|                 self.path, | ||||
|                 mime_type, | ||||
|                 self.filename, | ||||
|             ) | ||||
|  | ||||
|             text = document_parser.get_text() | ||||
| @@ -301,21 +319,26 @@ class Consumer(LoggingMixin): | ||||
|                     self._write(document.storage_type, self.path, document.source_path) | ||||
|  | ||||
|                     self._write( | ||||
|                         document.storage_type, thumbnail, document.thumbnail_path | ||||
|                         document.storage_type, | ||||
|                         thumbnail, | ||||
|                         document.thumbnail_path, | ||||
|                     ) | ||||
|  | ||||
|                     if archive_path and os.path.isfile(archive_path): | ||||
|                         document.archive_filename = generate_unique_filename( | ||||
|                             document, archive_filename=True | ||||
|                             document, | ||||
|                             archive_filename=True, | ||||
|                         ) | ||||
|                         create_source_path_directory(document.archive_path) | ||||
|                         self._write( | ||||
|                             document.storage_type, archive_path, document.archive_path | ||||
|                             document.storage_type, | ||||
|                             archive_path, | ||||
|                             document.archive_path, | ||||
|                         ) | ||||
|  | ||||
|                         with open(archive_path, "rb") as f: | ||||
|                             document.archive_checksum = hashlib.md5( | ||||
|                                 f.read() | ||||
|                                 f.read(), | ||||
|                             ).hexdigest() | ||||
|  | ||||
|                 # Don't save with the lock active. Saving will cause the file | ||||
| @@ -328,7 +351,8 @@ class Consumer(LoggingMixin): | ||||
|  | ||||
|                 # https://github.com/jonaswinkler/paperless-ng/discussions/1037 | ||||
|                 shadow_file = os.path.join( | ||||
|                     os.path.dirname(self.path), "._" + os.path.basename(self.path) | ||||
|                     os.path.dirname(self.path), | ||||
|                     "._" + os.path.basename(self.path), | ||||
|                 ) | ||||
|  | ||||
|                 if os.path.isfile(shadow_file): | ||||
| @@ -390,12 +414,12 @@ class Consumer(LoggingMixin): | ||||
|     def apply_overrides(self, document): | ||||
|         if self.override_correspondent_id: | ||||
|             document.correspondent = Correspondent.objects.get( | ||||
|                 pk=self.override_correspondent_id | ||||
|                 pk=self.override_correspondent_id, | ||||
|             ) | ||||
|  | ||||
|         if self.override_document_type_id: | ||||
|             document.document_type = DocumentType.objects.get( | ||||
|                 pk=self.override_document_type_id | ||||
|                 pk=self.override_document_type_id, | ||||
|             ) | ||||
|  | ||||
|         if self.override_tag_ids: | ||||
|   | ||||
| @@ -103,15 +103,17 @@ def generate_unique_filename(doc, archive_filename=False): | ||||
|     if archive_filename and doc.filename: | ||||
|         new_filename = os.path.splitext(doc.filename)[0] + ".pdf" | ||||
|         if new_filename == old_filename or not os.path.exists( | ||||
|             os.path.join(root, new_filename) | ||||
|         ):  # NOQA: E501 | ||||
|             os.path.join(root, new_filename), | ||||
|         ): | ||||
|             return new_filename | ||||
|  | ||||
|     counter = 0 | ||||
|  | ||||
|     while True: | ||||
|         new_filename = generate_filename( | ||||
|             doc, counter, archive_filename=archive_filename | ||||
|             doc, | ||||
|             counter, | ||||
|             archive_filename=archive_filename, | ||||
|         ) | ||||
|         if new_filename == old_filename: | ||||
|             # still the same as before. | ||||
| @@ -137,14 +139,16 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False): | ||||
|  | ||||
|             if doc.correspondent: | ||||
|                 correspondent = pathvalidate.sanitize_filename( | ||||
|                     doc.correspondent.name, replacement_text="-" | ||||
|                     doc.correspondent.name, | ||||
|                     replacement_text="-", | ||||
|                 ) | ||||
|             else: | ||||
|                 correspondent = "none" | ||||
|  | ||||
|             if doc.document_type: | ||||
|                 document_type = pathvalidate.sanitize_filename( | ||||
|                     doc.document_type.name, replacement_text="-" | ||||
|                     doc.document_type.name, | ||||
|                     replacement_text="-", | ||||
|                 ) | ||||
|             else: | ||||
|                 document_type = "none" | ||||
| @@ -160,9 +164,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False): | ||||
|                 document_type=document_type, | ||||
|                 created=datetime.date.isoformat(doc.created), | ||||
|                 created_year=doc.created.year if doc.created else "none", | ||||
|                 created_month=f"{doc.created.month:02}" | ||||
|                 if doc.created | ||||
|                 else "none",  # NOQA: E501 | ||||
|                 created_month=f"{doc.created.month:02}" if doc.created else "none", | ||||
|                 created_day=f"{doc.created.day:02}" if doc.created else "none", | ||||
|                 added=datetime.date.isoformat(doc.added), | ||||
|                 added_year=doc.added.year if doc.added else "none", | ||||
| @@ -178,7 +180,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False): | ||||
|     except (ValueError, KeyError, IndexError): | ||||
|         logger.warning( | ||||
|             f"Invalid PAPERLESS_FILENAME_FORMAT: " | ||||
|             f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default" | ||||
|             f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default", | ||||
|         ) | ||||
|  | ||||
|     counter_str = f"_{counter:02}" if counter else "" | ||||
|   | ||||
| @@ -1,7 +1,13 @@ | ||||
| from django.db.models import Q | ||||
| from django_filters.rest_framework import BooleanFilter, FilterSet, Filter | ||||
| from django_filters.rest_framework import BooleanFilter | ||||
| from django_filters.rest_framework import Filter | ||||
| from django_filters.rest_framework import FilterSet | ||||
|  | ||||
| from .models import Correspondent, Document, Tag, DocumentType, Log | ||||
| from .models import Correspondent | ||||
| from .models import Document | ||||
| from .models import DocumentType | ||||
| from .models import Log | ||||
| from .models import Tag | ||||
|  | ||||
| CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"] | ||||
| ID_KWARGS = ["in", "exact"] | ||||
| @@ -75,7 +81,10 @@ class TitleContentFilter(Filter): | ||||
| class DocumentFilterSet(FilterSet): | ||||
|  | ||||
|     is_tagged = BooleanFilter( | ||||
|         label="Is tagged", field_name="tags", lookup_expr="isnull", exclude=True | ||||
|         label="Is tagged", | ||||
|         field_name="tags", | ||||
|         lookup_expr="isnull", | ||||
|         exclude=True, | ||||
|     ) | ||||
|  | ||||
|     tags__id__all = TagsFilter() | ||||
|   | ||||
| @@ -1,21 +1,30 @@ | ||||
| import logging | ||||
| import math | ||||
| import os | ||||
| from contextlib import contextmanager | ||||
|  | ||||
| import math | ||||
| from dateutil.parser import isoparse | ||||
| from django.conf import settings | ||||
| from whoosh import highlight, classify, query | ||||
| from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME, BOOLEAN | ||||
| from documents.models import Document | ||||
| from whoosh import classify | ||||
| from whoosh import highlight | ||||
| from whoosh import query | ||||
| from whoosh.fields import BOOLEAN | ||||
| from whoosh.fields import DATETIME | ||||
| from whoosh.fields import KEYWORD | ||||
| from whoosh.fields import NUMERIC | ||||
| from whoosh.fields import Schema | ||||
| from whoosh.fields import TEXT | ||||
| from whoosh.highlight import HtmlFormatter | ||||
| from whoosh.index import create_in, exists_in, open_dir | ||||
| from whoosh.index import create_in | ||||
| from whoosh.index import exists_in | ||||
| from whoosh.index import open_dir | ||||
| from whoosh.qparser import MultifieldParser | ||||
| from whoosh.qparser.dateparse import DateParserPlugin | ||||
| from whoosh.searching import ResultsPage, Searcher | ||||
| from whoosh.searching import ResultsPage | ||||
| from whoosh.searching import Searcher | ||||
| from whoosh.writing import AsyncWriter | ||||
|  | ||||
| from documents.models import Document | ||||
|  | ||||
| logger = logging.getLogger("paperless.index") | ||||
|  | ||||
|  | ||||
| @@ -45,7 +54,7 @@ def open_index(recreate=False): | ||||
|         if exists_in(settings.INDEX_DIR) and not recreate: | ||||
|             return open_dir(settings.INDEX_DIR, schema=get_schema()) | ||||
|     except Exception: | ||||
|         logger.exception(f"Error while opening the index, recreating.") | ||||
|         logger.exception("Error while opening the index, recreating.") | ||||
|  | ||||
|     if not os.path.isdir(settings.INDEX_DIR): | ||||
|         os.makedirs(settings.INDEX_DIR, exist_ok=True) | ||||
| @@ -138,11 +147,11 @@ class DelayedQuery: | ||||
|                 criterias.append(query.Term("has_type", v == "false")) | ||||
|             elif k == "created__date__lt": | ||||
|                 criterias.append( | ||||
|                     query.DateRange("created", start=None, end=isoparse(v)) | ||||
|                     query.DateRange("created", start=None, end=isoparse(v)), | ||||
|                 ) | ||||
|             elif k == "created__date__gt": | ||||
|                 criterias.append( | ||||
|                     query.DateRange("created", start=isoparse(v), end=None) | ||||
|                     query.DateRange("created", start=isoparse(v), end=None), | ||||
|                 ) | ||||
|             elif k == "added__date__gt": | ||||
|                 criterias.append(query.DateRange("added", start=isoparse(v), end=None)) | ||||
| @@ -220,7 +229,7 @@ class DelayedQuery: | ||||
|                     hit[1], | ||||
|                 ), | ||||
|                 page.results.top_n, | ||||
|             ) | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|         self.saved_results[item.start] = page | ||||
| @@ -240,7 +249,7 @@ class DelayedFullTextQuery(DelayedQuery): | ||||
|  | ||||
|         corrected = self.searcher.correct_query(q, q_str) | ||||
|         if corrected.query != q: | ||||
|             corrected_query = corrected.string | ||||
|             corrected.query = corrected.string | ||||
|  | ||||
|         return q, None | ||||
|  | ||||
| @@ -252,10 +261,14 @@ class DelayedMoreLikeThisQuery(DelayedQuery): | ||||
|  | ||||
|         docnum = self.searcher.document_number(id=more_like_doc_id) | ||||
|         kts = self.searcher.key_terms_from_text( | ||||
|             "content", content, numterms=20, model=classify.Bo1Model, normalize=False | ||||
|             "content", | ||||
|             content, | ||||
|             numterms=20, | ||||
|             model=classify.Bo1Model, | ||||
|             normalize=False, | ||||
|         ) | ||||
|         q = query.Or( | ||||
|             [query.Term("content", word, boost=weight) for word, weight in kts] | ||||
|             [query.Term("content", word, boost=weight) for word, weight in kts], | ||||
|         ) | ||||
|         mask = {docnum} | ||||
|  | ||||
| @@ -266,7 +279,9 @@ def autocomplete(ix, term, limit=10): | ||||
|     with ix.reader() as reader: | ||||
|         terms = [] | ||||
|         for (score, t) in reader.most_distinctive_terms( | ||||
|             "content", number=limit, prefix=term.lower() | ||||
|             "content", | ||||
|             number=limit, | ||||
|             prefix=term.lower(), | ||||
|         ): | ||||
|             terms.append(t) | ||||
|         return terms | ||||
|   | ||||
| @@ -1,8 +1,6 @@ | ||||
| import logging | ||||
| import uuid | ||||
|  | ||||
| from django.conf import settings | ||||
|  | ||||
|  | ||||
| class LoggingMixin: | ||||
|  | ||||
|   | ||||
| @@ -1,8 +1,8 @@ | ||||
| import os | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core.management.base import BaseCommand, CommandError | ||||
|  | ||||
| from django.core.management.base import BaseCommand | ||||
| from django.core.management.base import CommandError | ||||
| from documents.models import Document | ||||
| from paperless.db import GnuPG | ||||
|  | ||||
| @@ -31,9 +31,9 @@ class Command(BaseCommand): | ||||
|                 "this unless you've got a recent backup\nWARNING: handy.  It " | ||||
|                 "*should* work without a hitch, but be safe and backup your\n" | ||||
|                 "WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to " | ||||
|                 "continue.\n\n" | ||||
|                 "continue.\n\n", | ||||
|             ) | ||||
|             __ = input() | ||||
|             _ = input() | ||||
|         except KeyboardInterrupt: | ||||
|             return | ||||
|  | ||||
| @@ -41,7 +41,7 @@ class Command(BaseCommand): | ||||
|         if not passphrase: | ||||
|             raise CommandError( | ||||
|                 "Passphrase not defined.  Please set it with --passphrase or " | ||||
|                 "by declaring it in your environment or your config." | ||||
|                 "by declaring it in your environment or your config.", | ||||
|             ) | ||||
|  | ||||
|         self.__gpg_to_unencrypted(passphrase) | ||||
| @@ -50,7 +50,7 @@ class Command(BaseCommand): | ||||
|     def __gpg_to_unencrypted(passphrase): | ||||
|  | ||||
|         encrypted_files = Document.objects.filter( | ||||
|             storage_type=Document.STORAGE_TYPE_GPG | ||||
|             storage_type=Document.STORAGE_TYPE_GPG, | ||||
|         ) | ||||
|  | ||||
|         for document in encrypted_files: | ||||
| @@ -71,7 +71,7 @@ class Command(BaseCommand): | ||||
|             if not ext == ".gpg": | ||||
|                 raise CommandError( | ||||
|                     f"Abort: encrypted file {document.source_path} does not " | ||||
|                     f"end with .gpg" | ||||
|                     f"end with .gpg", | ||||
|                 ) | ||||
|  | ||||
|             document.filename = os.path.splitext(document.filename)[0] | ||||
| @@ -83,7 +83,8 @@ class Command(BaseCommand): | ||||
|                 f.write(raw_thumb) | ||||
|  | ||||
|             Document.objects.filter(id=document.id).update( | ||||
|                 storage_type=document.storage_type, filename=document.filename | ||||
|                 storage_type=document.storage_type, | ||||
|                 filename=document.filename, | ||||
|             ) | ||||
|  | ||||
|             for path in old_paths: | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| import hashlib | ||||
| import multiprocessing | ||||
|  | ||||
| import logging | ||||
| import multiprocessing | ||||
| import os | ||||
| import shutil | ||||
| import uuid | ||||
| @@ -11,12 +10,12 @@ from django import db | ||||
| from django.conf import settings | ||||
| from django.core.management.base import BaseCommand | ||||
| from django.db import transaction | ||||
| from filelock import FileLock | ||||
| from whoosh.writing import AsyncWriter | ||||
|  | ||||
| from documents.models import Document | ||||
| from filelock import FileLock | ||||
|  | ||||
| from ... import index | ||||
| from ...file_handling import create_source_path_directory, generate_unique_filename | ||||
| from ...file_handling import create_source_path_directory | ||||
| from ...file_handling import generate_unique_filename | ||||
| from ...parsers import get_parser_class_for_mime_type | ||||
|  | ||||
|  | ||||
| @@ -33,7 +32,7 @@ def handle_document(document_id): | ||||
|     if not parser_class: | ||||
|         logger.error( | ||||
|             f"No parser found for mime type {mime_type}, cannot " | ||||
|             f"archive document {document} (ID: {document_id})" | ||||
|             f"archive document {document} (ID: {document_id})", | ||||
|         ) | ||||
|         return | ||||
|  | ||||
| @@ -43,7 +42,9 @@ def handle_document(document_id): | ||||
|         parser.parse(document.source_path, mime_type, document.get_public_filename()) | ||||
|  | ||||
|         thumbnail = parser.get_optimised_thumbnail( | ||||
|             document.source_path, mime_type, document.get_public_filename() | ||||
|             document.source_path, | ||||
|             mime_type, | ||||
|             document.get_public_filename(), | ||||
|         ) | ||||
|  | ||||
|         if parser.get_archive_path(): | ||||
| @@ -55,7 +56,8 @@ def handle_document(document_id): | ||||
|                 # We also don't use save() since that triggers the filehandling | ||||
|                 # logic, and we don't want that yet (file not yet in place) | ||||
|                 document.archive_filename = generate_unique_filename( | ||||
|                     document, archive_filename=True | ||||
|                     document, | ||||
|                     archive_filename=True, | ||||
|                 ) | ||||
|                 Document.objects.filter(pk=document.pk).update( | ||||
|                     archive_checksum=checksum, | ||||
| @@ -70,9 +72,9 @@ def handle_document(document_id): | ||||
|             with index.open_index_writer() as writer: | ||||
|                 index.update_document(writer, document) | ||||
|  | ||||
|     except Exception as e: | ||||
|     except Exception: | ||||
|         logger.exception( | ||||
|             f"Error while parsing document {document} " f"(ID: {document_id})" | ||||
|             f"Error while parsing document {document} " f"(ID: {document_id})", | ||||
|         ) | ||||
|     finally: | ||||
|         parser.cleanup() | ||||
| @@ -86,7 +88,8 @@ class Command(BaseCommand): | ||||
|         back-tag all previously indexed documents with metadata created (or | ||||
|         modified) after their initial import. | ||||
|     """.replace( | ||||
|         "    ", "" | ||||
|         "    ", | ||||
|         "", | ||||
|     ) | ||||
|  | ||||
|     def add_arguments(self, parser): | ||||
| @@ -129,7 +132,7 @@ class Command(BaseCommand): | ||||
|             map( | ||||
|                 lambda doc: doc.id, | ||||
|                 filter(lambda d: overwrite or not d.has_archive_version, documents), | ||||
|             ) | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|         # Note to future self: this prevents django from reusing database | ||||
| @@ -146,7 +149,7 @@ class Command(BaseCommand): | ||||
|                         pool.imap_unordered(handle_document, document_ids), | ||||
|                         total=len(document_ids), | ||||
|                         disable=options["no_progress_bar"], | ||||
|                     ) | ||||
|                     ), | ||||
|                 ) | ||||
|         except KeyboardInterrupt: | ||||
|             print("Aborting...") | ||||
|   | ||||
| @@ -1,17 +1,18 @@ | ||||
| import logging | ||||
| import os | ||||
| from pathlib import Path, PurePath | ||||
| from pathlib import Path | ||||
| from pathlib import PurePath | ||||
| from threading import Thread | ||||
| from time import sleep | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core.management.base import BaseCommand, CommandError | ||||
| from django.core.management.base import BaseCommand | ||||
| from django.core.management.base import CommandError | ||||
| from django_q.tasks import async_task | ||||
| from watchdog.events import FileSystemEventHandler | ||||
| from watchdog.observers.polling import PollingObserver | ||||
|  | ||||
| from documents.models import Tag | ||||
| from documents.parsers import is_file_ext_supported | ||||
| from watchdog.events import FileSystemEventHandler | ||||
| from watchdog.observers.polling import PollingObserver | ||||
|  | ||||
| try: | ||||
|     from inotifyrecursive import INotify, flags | ||||
| @@ -29,7 +30,7 @@ def _tags_from_path(filepath): | ||||
|     path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts | ||||
|     for part in path_parts: | ||||
|         tag_ids.add( | ||||
|             Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk | ||||
|             Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk, | ||||
|         ) | ||||
|  | ||||
|     return tag_ids | ||||
| @@ -56,7 +57,7 @@ def _consume(filepath): | ||||
|     try: | ||||
|         if settings.CONSUMER_SUBDIRS_AS_TAGS: | ||||
|             tag_ids = _tags_from_path(filepath) | ||||
|     except Exception as e: | ||||
|     except Exception: | ||||
|         logger.exception("Error creating tags from path") | ||||
|  | ||||
|     try: | ||||
| @@ -67,7 +68,7 @@ def _consume(filepath): | ||||
|             override_tag_ids=tag_ids if tag_ids else None, | ||||
|             task_name=os.path.basename(filepath)[:100], | ||||
|         ) | ||||
|     except Exception as e: | ||||
|     except Exception: | ||||
|         # Catch all so that the consumer won't crash. | ||||
|         # This is also what the test case is listening for to check for | ||||
|         # errors. | ||||
| @@ -86,7 +87,7 @@ def _consume_wait_unmodified(file): | ||||
|             new_mtime = os.stat(file).st_mtime | ||||
|         except FileNotFoundError: | ||||
|             logger.debug( | ||||
|                 f"File {file} moved while waiting for it to remain " f"unmodified." | ||||
|                 f"File {file} moved while waiting for it to remain " f"unmodified.", | ||||
|             ) | ||||
|             return | ||||
|         if new_mtime == mtime: | ||||
|   | ||||
| @@ -9,7 +9,8 @@ class Command(BaseCommand): | ||||
|         Trains the classifier on your data and saves the resulting models to a | ||||
|         file. The document consumer will then automatically use this new model. | ||||
|     """.replace( | ||||
|         "    ", "" | ||||
|         "    ", | ||||
|         "", | ||||
|     ) | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
|   | ||||
| @@ -6,28 +6,28 @@ import time | ||||
|  | ||||
| import tqdm | ||||
| from django.conf import settings | ||||
| from django.contrib.auth.models import User, Group | ||||
| from django.contrib.auth.models import Group | ||||
| from django.contrib.auth.models import User | ||||
| from django.core import serializers | ||||
| from django.core.management.base import BaseCommand, CommandError | ||||
| from django.core.management.base import BaseCommand | ||||
| from django.core.management.base import CommandError | ||||
| from django.db import transaction | ||||
| from documents.models import Correspondent | ||||
| from documents.models import Document | ||||
| from documents.models import DocumentType | ||||
| from documents.models import SavedView | ||||
| from documents.models import SavedViewFilterRule | ||||
| from documents.models import Tag | ||||
| from documents.settings import EXPORTER_ARCHIVE_NAME | ||||
| from documents.settings import EXPORTER_FILE_NAME | ||||
| from documents.settings import EXPORTER_THUMBNAIL_NAME | ||||
| from filelock import FileLock | ||||
|  | ||||
| from documents.models import ( | ||||
|     Document, | ||||
|     Correspondent, | ||||
|     Tag, | ||||
|     DocumentType, | ||||
|     SavedView, | ||||
|     SavedViewFilterRule, | ||||
| ) | ||||
| from documents.settings import ( | ||||
|     EXPORTER_FILE_NAME, | ||||
|     EXPORTER_THUMBNAIL_NAME, | ||||
|     EXPORTER_ARCHIVE_NAME, | ||||
| ) | ||||
| from paperless.db import GnuPG | ||||
| from paperless_mail.models import MailAccount, MailRule | ||||
| from ...file_handling import generate_filename, delete_empty_directories | ||||
| from paperless_mail.models import MailAccount | ||||
| from paperless_mail.models import MailRule | ||||
|  | ||||
| from ...file_handling import delete_empty_directories | ||||
| from ...file_handling import generate_filename | ||||
|  | ||||
|  | ||||
| class Command(BaseCommand): | ||||
| @@ -37,7 +37,8 @@ class Command(BaseCommand): | ||||
|         directory.  And include a manifest file containing document data for | ||||
|         easy import. | ||||
|     """.replace( | ||||
|         "    ", "" | ||||
|         "    ", | ||||
|         "", | ||||
|     ) | ||||
|  | ||||
|     def add_arguments(self, parser): | ||||
| @@ -107,20 +108,20 @@ class Command(BaseCommand): | ||||
|         # 1. Take a snapshot of what files exist in the current export folder | ||||
|         for root, dirs, files in os.walk(self.target): | ||||
|             self.files_in_export_dir.extend( | ||||
|                 map(lambda f: os.path.abspath(os.path.join(root, f)), files) | ||||
|                 map(lambda f: os.path.abspath(os.path.join(root, f)), files), | ||||
|             ) | ||||
|  | ||||
|         # 2. Create manifest, containing all correspondents, types, tags and | ||||
|         # documents | ||||
|         with transaction.atomic(): | ||||
|             manifest = json.loads( | ||||
|                 serializers.serialize("json", Correspondent.objects.all()) | ||||
|                 serializers.serialize("json", Correspondent.objects.all()), | ||||
|             ) | ||||
|  | ||||
|             manifest += json.loads(serializers.serialize("json", Tag.objects.all())) | ||||
|  | ||||
|             manifest += json.loads( | ||||
|                 serializers.serialize("json", DocumentType.objects.all()) | ||||
|                 serializers.serialize("json", DocumentType.objects.all()), | ||||
|             ) | ||||
|  | ||||
|             documents = Document.objects.order_by("id") | ||||
| @@ -129,19 +130,19 @@ class Command(BaseCommand): | ||||
|             manifest += document_manifest | ||||
|  | ||||
|             manifest += json.loads( | ||||
|                 serializers.serialize("json", MailAccount.objects.all()) | ||||
|                 serializers.serialize("json", MailAccount.objects.all()), | ||||
|             ) | ||||
|  | ||||
|             manifest += json.loads( | ||||
|                 serializers.serialize("json", MailRule.objects.all()) | ||||
|                 serializers.serialize("json", MailRule.objects.all()), | ||||
|             ) | ||||
|  | ||||
|             manifest += json.loads( | ||||
|                 serializers.serialize("json", SavedView.objects.all()) | ||||
|                 serializers.serialize("json", SavedView.objects.all()), | ||||
|             ) | ||||
|  | ||||
|             manifest += json.loads( | ||||
|                 serializers.serialize("json", SavedViewFilterRule.objects.all()) | ||||
|                 serializers.serialize("json", SavedViewFilterRule.objects.all()), | ||||
|             ) | ||||
|  | ||||
|             manifest += json.loads(serializers.serialize("json", Group.objects.all())) | ||||
| @@ -155,9 +156,7 @@ class Command(BaseCommand): | ||||
|             disable=progress_bar_disable, | ||||
|         ): | ||||
|             # 3.1. store files unencrypted | ||||
|             document_dict["fields"][ | ||||
|                 "storage_type" | ||||
|             ] = Document.STORAGE_TYPE_UNENCRYPTED  # NOQA: E501 | ||||
|             document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|  | ||||
|             document = document_map[document_dict["pk"]] | ||||
|  | ||||
| @@ -166,7 +165,9 @@ class Command(BaseCommand): | ||||
|             while True: | ||||
|                 if self.use_filename_format: | ||||
|                     base_name = generate_filename( | ||||
|                         document, counter=filename_counter, append_gpg=False | ||||
|                         document, | ||||
|                         counter=filename_counter, | ||||
|                         append_gpg=False, | ||||
|                     ) | ||||
|                 else: | ||||
|                     base_name = document.get_public_filename(counter=filename_counter) | ||||
| @@ -217,14 +218,18 @@ class Command(BaseCommand): | ||||
|                             os.utime(archive_target, times=(t, t)) | ||||
|             else: | ||||
|                 self.check_and_copy( | ||||
|                     document.source_path, document.checksum, original_target | ||||
|                     document.source_path, | ||||
|                     document.checksum, | ||||
|                     original_target, | ||||
|                 ) | ||||
|  | ||||
|                 self.check_and_copy(document.thumbnail_path, None, thumbnail_target) | ||||
|  | ||||
|                 if archive_target: | ||||
|                     self.check_and_copy( | ||||
|                         document.archive_path, document.archive_checksum, archive_target | ||||
|                         document.archive_path, | ||||
|                         document.archive_checksum, | ||||
|                         archive_target, | ||||
|                     ) | ||||
|  | ||||
|         # 4. write manifest to target forlder | ||||
| @@ -243,7 +248,8 @@ class Command(BaseCommand): | ||||
|                 os.remove(f) | ||||
|  | ||||
|                 delete_empty_directories( | ||||
|                     os.path.abspath(os.path.dirname(f)), os.path.abspath(self.target) | ||||
|                     os.path.abspath(os.path.dirname(f)), | ||||
|                     os.path.abspath(self.target), | ||||
|                 ) | ||||
|  | ||||
|     def check_and_copy(self, source, source_checksum, target): | ||||
|   | ||||
| @@ -7,16 +7,16 @@ from contextlib import contextmanager | ||||
| import tqdm | ||||
| from django.conf import settings | ||||
| from django.core.management import call_command | ||||
| from django.core.management.base import BaseCommand, CommandError | ||||
| from django.db.models.signals import post_save, m2m_changed | ||||
| from django.core.management.base import BaseCommand | ||||
| from django.core.management.base import CommandError | ||||
| from django.db.models.signals import m2m_changed | ||||
| from django.db.models.signals import post_save | ||||
| from documents.models import Document | ||||
| from documents.settings import EXPORTER_ARCHIVE_NAME | ||||
| from documents.settings import EXPORTER_FILE_NAME | ||||
| from documents.settings import EXPORTER_THUMBNAIL_NAME | ||||
| from filelock import FileLock | ||||
|  | ||||
| from documents.models import Document | ||||
| from documents.settings import ( | ||||
|     EXPORTER_FILE_NAME, | ||||
|     EXPORTER_THUMBNAIL_NAME, | ||||
|     EXPORTER_ARCHIVE_NAME, | ||||
| ) | ||||
| from ...file_handling import create_source_path_directory | ||||
| from ...signals.handlers import update_filename_and_move_files | ||||
|  | ||||
| @@ -36,7 +36,8 @@ class Command(BaseCommand): | ||||
|         Using a manifest.json file, load the data from there, and import the | ||||
|         documents it refers to. | ||||
|     """.replace( | ||||
|         "    ", "" | ||||
|         "    ", | ||||
|         "", | ||||
|     ) | ||||
|  | ||||
|     def add_arguments(self, parser): | ||||
| @@ -73,7 +74,9 @@ class Command(BaseCommand): | ||||
|  | ||||
|         self._check_manifest() | ||||
|         with disable_signal( | ||||
|             post_save, receiver=update_filename_and_move_files, sender=Document | ||||
|             post_save, | ||||
|             receiver=update_filename_and_move_files, | ||||
|             sender=Document, | ||||
|         ): | ||||
|             with disable_signal( | ||||
|                 m2m_changed, | ||||
| @@ -92,7 +95,7 @@ class Command(BaseCommand): | ||||
|     def _check_manifest_exists(path): | ||||
|         if not os.path.exists(path): | ||||
|             raise CommandError( | ||||
|                 "That directory doesn't appear to contain a manifest.json " "file." | ||||
|                 "That directory doesn't appear to contain a manifest.json " "file.", | ||||
|             ) | ||||
|  | ||||
|     def _check_manifest(self): | ||||
| @@ -105,14 +108,14 @@ class Command(BaseCommand): | ||||
|             if EXPORTER_FILE_NAME not in record: | ||||
|                 raise CommandError( | ||||
|                     "The manifest file contains a record which does not " | ||||
|                     "refer to an actual document file." | ||||
|                     "refer to an actual document file.", | ||||
|                 ) | ||||
|  | ||||
|             doc_file = record[EXPORTER_FILE_NAME] | ||||
|             if not os.path.exists(os.path.join(self.source, doc_file)): | ||||
|                 raise CommandError( | ||||
|                     'The manifest file refers to "{}" which does not ' | ||||
|                     "appear to be in the source directory.".format(doc_file) | ||||
|                     "appear to be in the source directory.".format(doc_file), | ||||
|                 ) | ||||
|  | ||||
|             if EXPORTER_ARCHIVE_NAME in record: | ||||
| @@ -120,7 +123,7 @@ class Command(BaseCommand): | ||||
|                 if not os.path.exists(os.path.join(self.source, archive_file)): | ||||
|                     raise CommandError( | ||||
|                         f"The manifest file refers to {archive_file} which " | ||||
|                         f"does not appear to be in the source directory." | ||||
|                         f"does not appear to be in the source directory.", | ||||
|                     ) | ||||
|  | ||||
|     def _import_files_from_manifest(self, progress_bar_disable): | ||||
| @@ -132,7 +135,7 @@ class Command(BaseCommand): | ||||
|         print("Copy files into paperless...") | ||||
|  | ||||
|         manifest_documents = list( | ||||
|             filter(lambda r: r["model"] == "documents.document", self.manifest) | ||||
|             filter(lambda r: r["model"] == "documents.document", self.manifest), | ||||
|         ) | ||||
|  | ||||
|         for record in tqdm.tqdm(manifest_documents, disable=progress_bar_disable): | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| from django.core.management import BaseCommand | ||||
| from django.db import transaction | ||||
|  | ||||
| from documents.tasks import index_reindex, index_optimize | ||||
| from documents.tasks import index_optimize | ||||
| from documents.tasks import index_reindex | ||||
|  | ||||
|  | ||||
| class Command(BaseCommand): | ||||
|   | ||||
| @@ -3,7 +3,6 @@ import logging | ||||
| import tqdm | ||||
| from django.core.management.base import BaseCommand | ||||
| from django.db.models.signals import post_save | ||||
|  | ||||
| from documents.models import Document | ||||
|  | ||||
|  | ||||
| @@ -12,7 +11,8 @@ class Command(BaseCommand): | ||||
|     help = """ | ||||
|         This will rename all documents to match the latest filename format. | ||||
|     """.replace( | ||||
|         "    ", "" | ||||
|         "    ", | ||||
|         "", | ||||
|     ) | ||||
|  | ||||
|     def add_arguments(self, parser): | ||||
| @@ -28,6 +28,7 @@ class Command(BaseCommand): | ||||
|         logging.getLogger().handlers[0].level = logging.ERROR | ||||
|  | ||||
|         for document in tqdm.tqdm( | ||||
|             Document.objects.all(), disable=options["no_progress_bar"] | ||||
|             Document.objects.all(), | ||||
|             disable=options["no_progress_bar"], | ||||
|         ): | ||||
|             post_save.send(Document, instance=document) | ||||
|   | ||||
| @@ -2,10 +2,12 @@ import logging | ||||
|  | ||||
| import tqdm | ||||
| from django.core.management.base import BaseCommand | ||||
|  | ||||
| from documents.classifier import load_classifier | ||||
| from documents.models import Document | ||||
| from ...signals.handlers import set_correspondent, set_document_type, set_tags | ||||
|  | ||||
| from ...signals.handlers import set_correspondent | ||||
| from ...signals.handlers import set_document_type | ||||
| from ...signals.handlers import set_tags | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger("paperless.management.retagger") | ||||
| @@ -19,7 +21,8 @@ class Command(BaseCommand): | ||||
|         back-tag all previously indexed documents with metadata created (or | ||||
|         modified) after their initial import. | ||||
|     """.replace( | ||||
|         "    ", "" | ||||
|         "    ", | ||||
|         "", | ||||
|     ) | ||||
|  | ||||
|     def add_arguments(self, parser): | ||||
| @@ -57,7 +60,8 @@ class Command(BaseCommand): | ||||
|             help="Return the suggestion, don't change anything.", | ||||
|         ) | ||||
|         parser.add_argument( | ||||
|             "--base-url", help="The base URL to use to build the link to the documents." | ||||
|             "--base-url", | ||||
|             help="The base URL to use to build the link to the documents.", | ||||
|         ) | ||||
|  | ||||
|     def handle(self, *args, **options): | ||||
|   | ||||
| @@ -7,7 +7,8 @@ class Command(BaseCommand): | ||||
|     help = """ | ||||
|         This command checks your document archive for issues. | ||||
|     """.replace( | ||||
|         "    ", "" | ||||
|         "    ", | ||||
|         "", | ||||
|     ) | ||||
|  | ||||
|     def add_arguments(self, parser): | ||||
|   | ||||
| @@ -5,8 +5,8 @@ import shutil | ||||
| import tqdm | ||||
| from django import db | ||||
| from django.core.management.base import BaseCommand | ||||
|  | ||||
| from documents.models import Document | ||||
|  | ||||
| from ...parsers import get_parser_class_for_mime_type | ||||
|  | ||||
|  | ||||
| @@ -22,7 +22,9 @@ def _process_document(doc_in): | ||||
|  | ||||
|     try: | ||||
|         thumb = parser.get_optimised_thumbnail( | ||||
|             document.source_path, document.mime_type, document.get_public_filename() | ||||
|             document.source_path, | ||||
|             document.mime_type, | ||||
|             document.get_public_filename(), | ||||
|         ) | ||||
|  | ||||
|         shutil.move(thumb, document.thumbnail_path) | ||||
| @@ -35,7 +37,8 @@ class Command(BaseCommand): | ||||
|     help = """ | ||||
|         This will regenerate the thumbnails for all documents. | ||||
|     """.replace( | ||||
|         "    ", "" | ||||
|         "    ", | ||||
|         "", | ||||
|     ) | ||||
|  | ||||
|     def add_arguments(self, parser): | ||||
| @@ -76,5 +79,5 @@ class Command(BaseCommand): | ||||
|                     pool.imap_unordered(_process_document, ids), | ||||
|                     total=len(ids), | ||||
|                     disable=options["no_progress_bar"], | ||||
|                 ) | ||||
|                 ), | ||||
|             ) | ||||
|   | ||||
| @@ -2,7 +2,7 @@ import logging | ||||
| import os | ||||
|  | ||||
| from django.contrib.auth.models import User | ||||
| from django.core.management.base import BaseCommand, CommandError | ||||
| from django.core.management.base import BaseCommand | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger("paperless.management.superuser") | ||||
| @@ -13,7 +13,8 @@ class Command(BaseCommand): | ||||
|     help = """ | ||||
|         Creates a Django superuser based on env variables. | ||||
|     """.replace( | ||||
|         "    ", "" | ||||
|         "    ", | ||||
|         "", | ||||
|     ) | ||||
|  | ||||
|     def handle(self, *args, **options): | ||||
| @@ -39,5 +40,5 @@ class Command(BaseCommand): | ||||
|             self.stdout.write(f'Did not create superuser "{username}".') | ||||
|             self.stdout.write( | ||||
|                 'Make sure you specified "PAPERLESS_ADMIN_PASSWORD" in your ' | ||||
|                 '"docker-compose.env" file.' | ||||
|                 '"docker-compose.env" file.', | ||||
|             ) | ||||
|   | ||||
| @@ -1,8 +1,10 @@ | ||||
| import logging | ||||
| import re | ||||
|  | ||||
|  | ||||
| from documents.models import MatchingModel, Correspondent, DocumentType, Tag | ||||
| from documents.models import Correspondent | ||||
| from documents.models import DocumentType | ||||
| from documents.models import MatchingModel | ||||
| from documents.models import Tag | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger("paperless.matching") | ||||
| @@ -12,7 +14,7 @@ def log_reason(matching_model, document, reason): | ||||
|     class_name = type(matching_model).__name__ | ||||
|     logger.debug( | ||||
|         f"{class_name} {matching_model.name} matched on document " | ||||
|         f"{document} because {reason}" | ||||
|         f"{document} because {reason}", | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @@ -25,7 +27,7 @@ def match_correspondents(document, classifier): | ||||
|     correspondents = Correspondent.objects.all() | ||||
|  | ||||
|     return list( | ||||
|         filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents) | ||||
|         filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @@ -38,7 +40,7 @@ def match_document_types(document, classifier): | ||||
|     document_types = DocumentType.objects.all() | ||||
|  | ||||
|     return list( | ||||
|         filter(lambda o: matches(o, document) or o.pk == pred_id, document_types) | ||||
|         filter(lambda o: matches(o, document) or o.pk == pred_id, document_types), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @@ -51,7 +53,7 @@ def match_tags(document, classifier): | ||||
|     tags = Tag.objects.all() | ||||
|  | ||||
|     return list( | ||||
|         filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags) | ||||
|         filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @@ -92,7 +94,7 @@ def matches(matching_model, document): | ||||
|                 rf"\b{re.escape(matching_model.match)}\b", | ||||
|                 document_content, | ||||
|                 **search_kwargs, | ||||
|             ) | ||||
|             ), | ||||
|         ) | ||||
|         if result: | ||||
|             log_reason( | ||||
| @@ -105,11 +107,12 @@ def matches(matching_model, document): | ||||
|     elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX: | ||||
|         try: | ||||
|             match = re.search( | ||||
|                 re.compile(matching_model.match, **search_kwargs), document_content | ||||
|                 re.compile(matching_model.match, **search_kwargs), | ||||
|                 document_content, | ||||
|             ) | ||||
|         except re.error: | ||||
|             logger.error( | ||||
|                 f"Error while processing regular expression " f"{matching_model.match}" | ||||
|                 f"Error while processing regular expression " f"{matching_model.match}", | ||||
|             ) | ||||
|             return False | ||||
|         if match: | ||||
|   | ||||
| @@ -5,17 +5,14 @@ import os | ||||
| import re | ||||
| from collections import OrderedDict | ||||
|  | ||||
| import pathvalidate | ||||
|  | ||||
| import dateutil.parser | ||||
| import pathvalidate | ||||
| from django.conf import settings | ||||
| from django.contrib.auth.models import User | ||||
| from django.db import models | ||||
| from django.utils import timezone | ||||
| from django.utils.timezone import is_aware | ||||
|  | ||||
| from django.utils.translation import gettext_lazy as _ | ||||
|  | ||||
| from documents.parsers import get_default_file_extension | ||||
|  | ||||
|  | ||||
| @@ -42,7 +39,9 @@ class MatchingModel(models.Model): | ||||
|     match = models.CharField(_("match"), max_length=256, blank=True) | ||||
|  | ||||
|     matching_algorithm = models.PositiveIntegerField( | ||||
|         _("matching algorithm"), choices=MATCHING_ALGORITHMS, default=MATCH_ANY | ||||
|         _("matching algorithm"), | ||||
|         choices=MATCHING_ALGORITHMS, | ||||
|         default=MATCH_ANY, | ||||
|     ) | ||||
|  | ||||
|     is_insensitive = models.BooleanField(_("is insensitive"), default=True) | ||||
| @@ -71,7 +70,7 @@ class Tag(MatchingModel): | ||||
|         default=False, | ||||
|         help_text=_( | ||||
|             "Marks this tag as an inbox tag: All newly consumed " | ||||
|             "documents will be tagged with inbox tags." | ||||
|             "documents will be tagged with inbox tags.", | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
| @@ -120,14 +119,17 @@ class Document(models.Model): | ||||
|         blank=True, | ||||
|         help_text=_( | ||||
|             "The raw, text-only data of the document. This field is " | ||||
|             "primarily used for searching." | ||||
|             "primarily used for searching.", | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
|     mime_type = models.CharField(_("mime type"), max_length=256, editable=False) | ||||
|  | ||||
|     tags = models.ManyToManyField( | ||||
|         Tag, related_name="documents", blank=True, verbose_name=_("tags") | ||||
|         Tag, | ||||
|         related_name="documents", | ||||
|         blank=True, | ||||
|         verbose_name=_("tags"), | ||||
|     ) | ||||
|  | ||||
|     checksum = models.CharField( | ||||
| @@ -150,7 +152,10 @@ class Document(models.Model): | ||||
|     created = models.DateTimeField(_("created"), default=timezone.now, db_index=True) | ||||
|  | ||||
|     modified = models.DateTimeField( | ||||
|         _("modified"), auto_now=True, editable=False, db_index=True | ||||
|         _("modified"), | ||||
|         auto_now=True, | ||||
|         editable=False, | ||||
|         db_index=True, | ||||
|     ) | ||||
|  | ||||
|     storage_type = models.CharField( | ||||
| @@ -162,7 +167,10 @@ class Document(models.Model): | ||||
|     ) | ||||
|  | ||||
|     added = models.DateTimeField( | ||||
|         _("added"), default=timezone.now, editable=False, db_index=True | ||||
|         _("added"), | ||||
|         default=timezone.now, | ||||
|         editable=False, | ||||
|         db_index=True, | ||||
|     ) | ||||
|  | ||||
|     filename = models.FilePathField( | ||||
| @@ -192,7 +200,7 @@ class Document(models.Model): | ||||
|         unique=True, | ||||
|         db_index=True, | ||||
|         help_text=_( | ||||
|             "The position of this document in your physical document " "archive." | ||||
|             "The position of this document in your physical document " "archive.", | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
| @@ -289,7 +297,9 @@ class Log(models.Model): | ||||
|     message = models.TextField(_("message")) | ||||
|  | ||||
|     level = models.PositiveIntegerField( | ||||
|         _("level"), choices=LEVELS, default=logging.INFO | ||||
|         _("level"), | ||||
|         choices=LEVELS, | ||||
|         default=logging.INFO, | ||||
|     ) | ||||
|  | ||||
|     created = models.DateTimeField(_("created"), auto_now_add=True) | ||||
| @@ -321,7 +331,10 @@ class SavedView(models.Model): | ||||
|     ) | ||||
|  | ||||
|     sort_field = models.CharField( | ||||
|         _("sort field"), max_length=128, null=True, blank=True | ||||
|         _("sort field"), | ||||
|         max_length=128, | ||||
|         null=True, | ||||
|         blank=True, | ||||
|     ) | ||||
|     sort_reverse = models.BooleanField(_("sort reverse"), default=False) | ||||
|  | ||||
| @@ -383,11 +396,16 @@ class FileInfo: | ||||
|                 ), | ||||
|             ), | ||||
|             ("title", re.compile(r"(?P<title>.*)$", flags=re.IGNORECASE)), | ||||
|         ] | ||||
|         ], | ||||
|     ) | ||||
|  | ||||
|     def __init__( | ||||
|         self, created=None, correspondent=None, title=None, tags=(), extension=None | ||||
|         self, | ||||
|         created=None, | ||||
|         correspondent=None, | ||||
|         title=None, | ||||
|         tags=(), | ||||
|         extension=None, | ||||
|     ): | ||||
|  | ||||
|         self.created = created | ||||
|   | ||||
| @@ -9,6 +9,8 @@ import tempfile | ||||
| import magic | ||||
| from django.conf import settings | ||||
| from django.utils import timezone | ||||
| from documents.loggers import LoggingMixin | ||||
| from documents.signals import document_consumer_declaration | ||||
|  | ||||
| # This regular expression will try to find dates in the document at | ||||
| # hand and will match the following formats: | ||||
| @@ -21,17 +23,15 @@ from django.utils import timezone | ||||
| # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits | ||||
| # - MONTH ZZZZ, with ZZZZ being 4 digits | ||||
| # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits | ||||
| from documents.loggers import LoggingMixin | ||||
| from documents.signals import document_consumer_declaration | ||||
|  | ||||
| # TODO: isnt there a date parsing library for this? | ||||
|  | ||||
| DATE_REGEX = re.compile( | ||||
|     r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|"  # NOQA: E501 | ||||
|     r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|"  # NOQA: E501 | ||||
|     r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|"  # NOQA: E501 | ||||
|     r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|"  # noqa: E501 | ||||
|     r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|"  # noqa: E501 | ||||
|     r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|"  # noqa: E501 | ||||
|     r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|" | ||||
|     r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))" | ||||
|     r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))", | ||||
| ) | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -3,9 +3,8 @@ import logging | ||||
| import os | ||||
|  | ||||
| from django.conf import settings | ||||
| from tqdm import tqdm | ||||
|  | ||||
| from documents.models import Document | ||||
| from tqdm import tqdm | ||||
|  | ||||
|  | ||||
| class SanityCheckMessages: | ||||
| @@ -88,19 +87,19 @@ def check_sanity(progress=False): | ||||
|                 if not checksum == doc.checksum: | ||||
|                     messages.error( | ||||
|                         f"Checksum mismatch of document {doc.pk}. " | ||||
|                         f"Stored: {doc.checksum}, actual: {checksum}." | ||||
|                         f"Stored: {doc.checksum}, actual: {checksum}.", | ||||
|                     ) | ||||
|  | ||||
|         # Check sanity of the archive file. | ||||
|         if doc.archive_checksum and not doc.archive_filename: | ||||
|             messages.error( | ||||
|                 f"Document {doc.pk} has an archive file checksum, but no " | ||||
|                 f"archive filename." | ||||
|                 f"archive filename.", | ||||
|             ) | ||||
|         elif not doc.archive_checksum and doc.archive_filename: | ||||
|             messages.error( | ||||
|                 f"Document {doc.pk} has an archive file, but its checksum is " | ||||
|                 f"missing." | ||||
|                 f"missing.", | ||||
|             ) | ||||
|         elif doc.has_archive_version: | ||||
|             if not os.path.isfile(doc.archive_path): | ||||
| @@ -113,7 +112,7 @@ def check_sanity(progress=False): | ||||
|                         checksum = hashlib.md5(f.read()).hexdigest() | ||||
|                 except OSError as e: | ||||
|                     messages.error( | ||||
|                         f"Cannot read archive file of document {doc.pk}: {e}" | ||||
|                         f"Cannot read archive file of document {doc.pk}: {e}", | ||||
|                     ) | ||||
|                 else: | ||||
|                     if not checksum == doc.archive_checksum: | ||||
| @@ -121,7 +120,7 @@ def check_sanity(progress=False): | ||||
|                             f"Checksum mismatch of archived document " | ||||
|                             f"{doc.pk}. " | ||||
|                             f"Stored: {doc.archive_checksum}, " | ||||
|                             f"actual: {checksum}." | ||||
|                             f"actual: {checksum}.", | ||||
|                         ) | ||||
|  | ||||
|         # other document checks | ||||
|   | ||||
| @@ -1,25 +1,22 @@ | ||||
| import math | ||||
| import re | ||||
|  | ||||
| import magic | ||||
| import math | ||||
| from django.utils.text import slugify | ||||
| from django.utils.translation import gettext as _ | ||||
| from rest_framework import serializers | ||||
| from rest_framework.fields import SerializerMethodField | ||||
|  | ||||
| from . import bulk_edit | ||||
| from .models import ( | ||||
|     Correspondent, | ||||
|     Tag, | ||||
|     Document, | ||||
|     DocumentType, | ||||
|     SavedView, | ||||
|     SavedViewFilterRule, | ||||
|     MatchingModel, | ||||
| ) | ||||
| from .models import Correspondent | ||||
| from .models import Document | ||||
| from .models import DocumentType | ||||
| from .models import MatchingModel | ||||
| from .models import SavedView | ||||
| from .models import SavedViewFilterRule | ||||
| from .models import Tag | ||||
| from .parsers import is_mime_type_supported | ||||
|  | ||||
| from django.utils.translation import gettext as _ | ||||
|  | ||||
|  | ||||
| # https://www.django-rest-framework.org/api-guide/serializers/#example | ||||
| class DynamicFieldsModelSerializer(serializers.ModelSerializer): | ||||
| @@ -56,12 +53,12 @@ class MatchingModelSerializer(serializers.ModelSerializer): | ||||
|         if ( | ||||
|             "matching_algorithm" in self.initial_data | ||||
|             and self.initial_data["matching_algorithm"] == MatchingModel.MATCH_REGEX | ||||
|         ):  # NOQA: E501 | ||||
|         ): | ||||
|             try: | ||||
|                 re.compile(match) | ||||
|             except Exception as e: | ||||
|                 raise serializers.ValidationError( | ||||
|                     _("Invalid regular expression: %(error)s") % {"error": str(e)} | ||||
|                     _("Invalid regular expression: %(error)s") % {"error": str(e)}, | ||||
|                 ) | ||||
|         return match | ||||
|  | ||||
| @@ -156,7 +153,7 @@ class TagSerializer(MatchingModelSerializer): | ||||
|             luminance = math.sqrt( | ||||
|                 0.299 * math.pow(rgb[0], 2) | ||||
|                 + 0.587 * math.pow(rgb[1], 2) | ||||
|                 + 0.114 * math.pow(rgb[2], 2) | ||||
|                 + 0.114 * math.pow(rgb[2], 2), | ||||
|             ) | ||||
|             return "#ffffff" if luminance < 0.53 else "#000000" | ||||
|         except ValueError: | ||||
| @@ -298,7 +295,7 @@ class DocumentListSerializer(serializers.Serializer): | ||||
|         count = Document.objects.filter(id__in=documents).count() | ||||
|         if not count == len(documents): | ||||
|             raise serializers.ValidationError( | ||||
|                 f"Some documents in {name} don't exist or were " f"specified twice." | ||||
|                 f"Some documents in {name} don't exist or were " f"specified twice.", | ||||
|             ) | ||||
|  | ||||
|     def validate_documents(self, documents): | ||||
| @@ -331,7 +328,7 @@ class BulkEditSerializer(DocumentListSerializer): | ||||
|         count = Tag.objects.filter(id__in=tags).count() | ||||
|         if not count == len(tags): | ||||
|             raise serializers.ValidationError( | ||||
|                 f"Some tags in {name} don't exist or were specified twice." | ||||
|                 f"Some tags in {name} don't exist or were specified twice.", | ||||
|             ) | ||||
|  | ||||
|     def validate_method(self, method): | ||||
| @@ -456,7 +453,7 @@ class PostDocumentSerializer(serializers.Serializer): | ||||
|  | ||||
|         if not is_mime_type_supported(mime_type): | ||||
|             raise serializers.ValidationError( | ||||
|                 _("File type %(type)s not supported") % {"type": mime_type} | ||||
|                 _("File type %(type)s not supported") % {"type": mime_type}, | ||||
|             ) | ||||
|  | ||||
|         return document.name, document_data | ||||
| @@ -483,11 +480,13 @@ class PostDocumentSerializer(serializers.Serializer): | ||||
| class BulkDownloadSerializer(DocumentListSerializer): | ||||
|  | ||||
|     content = serializers.ChoiceField( | ||||
|         choices=["archive", "originals", "both"], default="archive" | ||||
|         choices=["archive", "originals", "both"], | ||||
|         default="archive", | ||||
|     ) | ||||
|  | ||||
|     compression = serializers.ChoiceField( | ||||
|         choices=["none", "deflated", "bzip2", "lzma"], default="none" | ||||
|         choices=["none", "deflated", "bzip2", "lzma"], | ||||
|         default="none", | ||||
|     ) | ||||
|  | ||||
|     def validate_compression(self, compression): | ||||
|   | ||||
| @@ -1,24 +1,26 @@ | ||||
| import logging | ||||
| import os | ||||
|  | ||||
| from django.utils import termcolors | ||||
| from django.conf import settings | ||||
| from django.contrib.admin.models import ADDITION, LogEntry | ||||
| from django.contrib.admin.models import ADDITION | ||||
| from django.contrib.admin.models import LogEntry | ||||
| from django.contrib.auth.models import User | ||||
| from django.contrib.contenttypes.models import ContentType | ||||
| from django.db import models, DatabaseError | ||||
| from django.db import DatabaseError | ||||
| from django.db import models | ||||
| from django.db.models import Q | ||||
| from django.dispatch import receiver | ||||
| from django.utils import termcolors, timezone | ||||
| from django.utils import termcolors | ||||
| from django.utils import timezone | ||||
| from filelock import FileLock | ||||
|  | ||||
| from .. import matching | ||||
| from ..file_handling import ( | ||||
|     delete_empty_directories, | ||||
|     create_source_path_directory, | ||||
|     generate_unique_filename, | ||||
| ) | ||||
| from ..models import Document, Tag, MatchingModel | ||||
| from ..file_handling import create_source_path_directory | ||||
| from ..file_handling import delete_empty_directories | ||||
| from ..file_handling import generate_unique_filename | ||||
| from ..models import Document | ||||
| from ..models import MatchingModel | ||||
| from ..models import Tag | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger("paperless.handlers") | ||||
| @@ -72,7 +74,7 @@ def set_correspondent( | ||||
|                 print( | ||||
|                     termcolors.colorize(str(document), fg="green") | ||||
|                     if color | ||||
|                     else str(document) | ||||
|                     else str(document), | ||||
|                 ) | ||||
|                 print(f"{base_url}/documents/{document.pk}") | ||||
|             else: | ||||
| @@ -82,7 +84,7 @@ def set_correspondent( | ||||
|                         if color | ||||
|                         else str(document) | ||||
|                     ) | ||||
|                     + f" [{document.pk}]" | ||||
|                     + f" [{document.pk}]", | ||||
|                 ) | ||||
|             print(f"Suggest correspondent {selected}") | ||||
|         else: | ||||
| @@ -139,7 +141,7 @@ def set_document_type( | ||||
|                 print( | ||||
|                     termcolors.colorize(str(document), fg="green") | ||||
|                     if color | ||||
|                     else str(document) | ||||
|                     else str(document), | ||||
|                 ) | ||||
|                 print(f"{base_url}/documents/{document.pk}") | ||||
|             else: | ||||
| @@ -149,7 +151,7 @@ def set_document_type( | ||||
|                         if color | ||||
|                         else str(document) | ||||
|                     ) | ||||
|                     + f" [{document.pk}]" | ||||
|                     + f" [{document.pk}]", | ||||
|                 ) | ||||
|             print(f"Suggest document type {selected}") | ||||
|         else: | ||||
| @@ -176,9 +178,9 @@ def set_tags( | ||||
|  | ||||
|     if replace: | ||||
|         Document.tags.through.objects.filter(document=document).exclude( | ||||
|             Q(tag__is_inbox_tag=True) | ||||
|             Q(tag__is_inbox_tag=True), | ||||
|         ).exclude( | ||||
|             Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO) | ||||
|             Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO), | ||||
|         ).delete() | ||||
|  | ||||
|     current_tags = set(document.tags.all()) | ||||
| @@ -198,7 +200,7 @@ def set_tags( | ||||
|             print( | ||||
|                 termcolors.colorize(str(document), fg="green") | ||||
|                 if color | ||||
|                 else str(document) | ||||
|                 else str(document), | ||||
|             ) | ||||
|             print(f"{base_url}/documents/{document.pk}") | ||||
|         else: | ||||
| @@ -208,7 +210,7 @@ def set_tags( | ||||
|                     if color | ||||
|                     else str(document) | ||||
|                 ) | ||||
|                 + f" [{document.pk}]" | ||||
|                 + f" [{document.pk}]", | ||||
|             ) | ||||
|         if relevant_tags: | ||||
|             print("Suggest tags: " + ", ".join([t.name for t in relevant_tags])) | ||||
| @@ -254,7 +256,7 @@ def cleanup_document_deletion(sender, instance, using, **kwargs): | ||||
|             except OSError as e: | ||||
|                 logger.error( | ||||
|                     f"Failed to move {instance.source_path} to trash at " | ||||
|                     f"{new_file_path}: {e}. Skipping cleanup!" | ||||
|                     f"{new_file_path}: {e}. Skipping cleanup!", | ||||
|                 ) | ||||
|                 return | ||||
|  | ||||
| @@ -270,16 +272,18 @@ def cleanup_document_deletion(sender, instance, using, **kwargs): | ||||
|                 except OSError as e: | ||||
|                     logger.warning( | ||||
|                         f"While deleting document {str(instance)}, the file " | ||||
|                         f"{filename} could not be deleted: {e}" | ||||
|                         f"{filename} could not be deleted: {e}", | ||||
|                     ) | ||||
|  | ||||
|         delete_empty_directories( | ||||
|             os.path.dirname(instance.source_path), root=settings.ORIGINALS_DIR | ||||
|             os.path.dirname(instance.source_path), | ||||
|             root=settings.ORIGINALS_DIR, | ||||
|         ) | ||||
|  | ||||
|         if instance.has_archive_version: | ||||
|             delete_empty_directories( | ||||
|                 os.path.dirname(instance.archive_path), root=settings.ARCHIVE_DIR | ||||
|                 os.path.dirname(instance.archive_path), | ||||
|                 root=settings.ARCHIVE_DIR, | ||||
|             ) | ||||
|  | ||||
|  | ||||
| @@ -297,7 +301,7 @@ def validate_move(instance, old_path, new_path): | ||||
|         # Can't do anything if the new file already exists. Skip updating file. | ||||
|         logger.warning( | ||||
|             f"Document {str(instance)}: Cannot rename file " | ||||
|             f"since target path {new_path} already exists." | ||||
|             f"since target path {new_path} already exists.", | ||||
|         ) | ||||
|         raise CannotMoveFilesException() | ||||
|  | ||||
| @@ -331,12 +335,11 @@ def update_filename_and_move_files(sender, instance, **kwargs): | ||||
|             if instance.has_archive_version: | ||||
|  | ||||
|                 instance.archive_filename = generate_unique_filename( | ||||
|                     instance, archive_filename=True | ||||
|                     instance, | ||||
|                     archive_filename=True, | ||||
|                 ) | ||||
|  | ||||
|                 move_archive = ( | ||||
|                     old_archive_filename != instance.archive_filename | ||||
|                 )  # NOQA: E501 | ||||
|                 move_archive = old_archive_filename != instance.archive_filename | ||||
|             else: | ||||
|                 move_archive = False | ||||
|  | ||||
| @@ -374,7 +377,7 @@ def update_filename_and_move_files(sender, instance, **kwargs): | ||||
|                 if move_archive and os.path.isfile(instance.archive_path): | ||||
|                     os.rename(instance.archive_path, old_archive_path) | ||||
|  | ||||
|             except Exception as e: | ||||
|             except Exception: | ||||
|                 # This is fine, since: | ||||
|                 # A: if we managed to move source from A to B, we will also | ||||
|                 #  manage to move it from B to A. If not, we have a serious | ||||
| @@ -393,14 +396,16 @@ def update_filename_and_move_files(sender, instance, **kwargs): | ||||
|         # something has failed above. | ||||
|         if not os.path.isfile(old_source_path): | ||||
|             delete_empty_directories( | ||||
|                 os.path.dirname(old_source_path), root=settings.ORIGINALS_DIR | ||||
|                 os.path.dirname(old_source_path), | ||||
|                 root=settings.ORIGINALS_DIR, | ||||
|             ) | ||||
|  | ||||
|         if instance.has_archive_version and not os.path.isfile( | ||||
|             old_archive_path | ||||
|         ):  # NOQA: E501 | ||||
|             old_archive_path, | ||||
|         ): | ||||
|             delete_empty_directories( | ||||
|                 os.path.dirname(old_archive_path), root=settings.ARCHIVE_DIR | ||||
|                 os.path.dirname(old_archive_path), | ||||
|                 root=settings.ARCHIVE_DIR, | ||||
|             ) | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -3,13 +3,18 @@ import logging | ||||
| import tqdm | ||||
| from django.conf import settings | ||||
| from django.db.models.signals import post_save | ||||
| from whoosh.writing import AsyncWriter | ||||
|  | ||||
| from documents import index, sanity_checker | ||||
| from documents.classifier import DocumentClassifier, load_classifier | ||||
| from documents.consumer import Consumer, ConsumerError | ||||
| from documents.models import Document, Tag, DocumentType, Correspondent | ||||
| from documents import index | ||||
| from documents import sanity_checker | ||||
| from documents.classifier import DocumentClassifier | ||||
| from documents.classifier import load_classifier | ||||
| from documents.consumer import Consumer | ||||
| from documents.consumer import ConsumerError | ||||
| from documents.models import Correspondent | ||||
| from documents.models import Document | ||||
| from documents.models import DocumentType | ||||
| from documents.models import Tag | ||||
| from documents.sanity_checker import SanityCheckFailedException | ||||
| from whoosh.writing import AsyncWriter | ||||
|  | ||||
| logger = logging.getLogger("paperless.tasks") | ||||
|  | ||||
| @@ -47,7 +52,7 @@ def train_classifier(): | ||||
|     try: | ||||
|         if classifier.train(): | ||||
|             logger.info( | ||||
|                 "Saving updated classifier model to {}...".format(settings.MODEL_FILE) | ||||
|                 "Saving updated classifier model to {}...".format(settings.MODEL_FILE), | ||||
|             ) | ||||
|             classifier.save() | ||||
|         else: | ||||
| @@ -82,7 +87,7 @@ def consume_file( | ||||
|     else: | ||||
|         raise ConsumerError( | ||||
|             "Unknown error: Returned document was null, but " | ||||
|             "no error message was given." | ||||
|             "no error message was given.", | ||||
|         ) | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,8 @@ | ||||
| from factory import Faker | ||||
| from factory.django import DjangoModelFactory | ||||
|  | ||||
| from ..models import Document, Correspondent | ||||
| from ..models import Correspondent | ||||
| from ..models import Document | ||||
|  | ||||
|  | ||||
| class CorrespondentFactory(DjangoModelFactory): | ||||
|   | ||||
| @@ -3,7 +3,6 @@ from unittest import mock | ||||
| from django.contrib.admin.sites import AdminSite | ||||
| from django.test import TestCase | ||||
| from django.utils import timezone | ||||
|  | ||||
| from documents import index | ||||
| from documents.admin import DocumentAdmin | ||||
| from documents.models import Document | ||||
| @@ -42,7 +41,8 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase): | ||||
|         docs = [] | ||||
|         for i in range(42): | ||||
|             doc = Document.objects.create( | ||||
|                 title="Many documents with the same title", checksum=f"{i:02}" | ||||
|                 title="Many documents with the same title", | ||||
|                 checksum=f"{i:02}", | ||||
|             ) | ||||
|             docs.append(doc) | ||||
|             index.add_or_update_document(doc) | ||||
| @@ -61,6 +61,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_created(self): | ||||
|         doc = Document.objects.create( | ||||
|             title="test", created=timezone.make_aware(timezone.datetime(2020, 4, 12)) | ||||
|             title="test", | ||||
|             created=timezone.make_aware(timezone.datetime(2020, 4, 12)), | ||||
|         ) | ||||
|         self.assertEqual(self.doc_admin.created_(doc), "2020-04-12") | ||||
|   | ||||
| @@ -10,22 +10,20 @@ from unittest import mock | ||||
| import pytest | ||||
| from django.conf import settings | ||||
| from django.contrib.auth.models import User | ||||
| from django.utils import timezone | ||||
| from django.test import override_settings | ||||
| from django.utils import timezone | ||||
| from documents import bulk_edit | ||||
| from documents import index | ||||
| from documents.models import Correspondent | ||||
| from documents.models import Document | ||||
| from documents.models import DocumentType | ||||
| from documents.models import MatchingModel | ||||
| from documents.models import SavedView | ||||
| from documents.models import Tag | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from rest_framework.test import APITestCase | ||||
| from whoosh.writing import AsyncWriter | ||||
|  | ||||
| from documents import index, bulk_edit | ||||
| from documents.models import ( | ||||
|     Document, | ||||
|     Correspondent, | ||||
|     DocumentType, | ||||
|     Tag, | ||||
|     SavedView, | ||||
|     MatchingModel, | ||||
| ) | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     def setUp(self): | ||||
| @@ -72,7 +70,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         returned_doc["title"] = "the new title" | ||||
|  | ||||
|         response = self.client.put( | ||||
|             "/api/documents/{}/".format(doc.pk), returned_doc, format="json" | ||||
|             "/api/documents/{}/".format(doc.pk), | ||||
|             returned_doc, | ||||
|             format="json", | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(response.status_code, 200) | ||||
| @@ -127,7 +127,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         self.assertEqual(len(results[0]), 2) | ||||
|  | ||||
|         response = self.client.get( | ||||
|             "/api/documents/?fields=id,conteasdnt", format="json" | ||||
|             "/api/documents/?fields=id,conteasdnt", | ||||
|             format="json", | ||||
|         ) | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|         results = response.data["results"] | ||||
| @@ -162,7 +163,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         ) | ||||
|  | ||||
|         with open( | ||||
|             os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb" | ||||
|             os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)), | ||||
|             "wb", | ||||
|         ) as f: | ||||
|             f.write(content_thumbnail) | ||||
|  | ||||
| @@ -206,7 +208,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         self.assertEqual(response.content, content_archive) | ||||
|  | ||||
|         response = self.client.get( | ||||
|             "/api/documents/{}/download/?original=true".format(doc.pk) | ||||
|             "/api/documents/{}/download/?original=true".format(doc.pk), | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(response.status_code, 200) | ||||
| @@ -218,7 +220,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         self.assertEqual(response.content, content_archive) | ||||
|  | ||||
|         response = self.client.get( | ||||
|             "/api/documents/{}/preview/?original=true".format(doc.pk) | ||||
|             "/api/documents/{}/preview/?original=true".format(doc.pk), | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(response.status_code, 200) | ||||
| @@ -227,7 +229,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     def test_document_actions_not_existing_file(self): | ||||
|  | ||||
|         doc = Document.objects.create( | ||||
|             title="none", filename=os.path.basename("asd"), mime_type="application/pdf" | ||||
|             title="none", | ||||
|             filename=os.path.basename("asd"), | ||||
|             mime_type="application/pdf", | ||||
|         ) | ||||
|  | ||||
|         response = self.client.get("/api/documents/{}/download/".format(doc.pk)) | ||||
| @@ -242,13 +246,19 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     def test_document_filters(self): | ||||
|  | ||||
|         doc1 = Document.objects.create( | ||||
|             title="none1", checksum="A", mime_type="application/pdf" | ||||
|             title="none1", | ||||
|             checksum="A", | ||||
|             mime_type="application/pdf", | ||||
|         ) | ||||
|         doc2 = Document.objects.create( | ||||
|             title="none2", checksum="B", mime_type="application/pdf" | ||||
|             title="none2", | ||||
|             checksum="B", | ||||
|             mime_type="application/pdf", | ||||
|         ) | ||||
|         doc3 = Document.objects.create( | ||||
|             title="none3", checksum="C", mime_type="application/pdf" | ||||
|             title="none3", | ||||
|             checksum="C", | ||||
|             mime_type="application/pdf", | ||||
|         ) | ||||
|  | ||||
|         tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True) | ||||
| @@ -273,7 +283,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id]) | ||||
|  | ||||
|         response = self.client.get( | ||||
|             "/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id) | ||||
|             "/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id), | ||||
|         ) | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|         results = response.data["results"] | ||||
| @@ -281,7 +291,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc3.id]) | ||||
|  | ||||
|         response = self.client.get( | ||||
|             "/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id) | ||||
|             "/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id), | ||||
|         ) | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|         results = response.data["results"] | ||||
| @@ -289,7 +299,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id]) | ||||
|  | ||||
|         response = self.client.get( | ||||
|             "/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id) | ||||
|             "/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id), | ||||
|         ) | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|         results = response.data["results"] | ||||
| @@ -297,14 +307,14 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         self.assertEqual(results[0]["id"], doc3.id) | ||||
|  | ||||
|         response = self.client.get( | ||||
|             "/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id) | ||||
|             "/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id), | ||||
|         ) | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|         results = response.data["results"] | ||||
|         self.assertEqual(len(results), 0) | ||||
|  | ||||
|         response = self.client.get( | ||||
|             "/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id) | ||||
|             "/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id), | ||||
|         ) | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|         results = response.data["results"] | ||||
| @@ -317,7 +327,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc2.id]) | ||||
|  | ||||
|         response = self.client.get( | ||||
|             "/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id) | ||||
|             "/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id), | ||||
|         ) | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|         results = response.data["results"] | ||||
| @@ -325,7 +335,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         self.assertEqual(results[0]["id"], doc1.id) | ||||
|  | ||||
|         response = self.client.get( | ||||
|             "/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id) | ||||
|             "/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id), | ||||
|         ) | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|         results = response.data["results"] | ||||
| @@ -443,7 +453,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|         for i in range(1, 6): | ||||
|             response = self.client.get( | ||||
|                 f"/api/documents/?query=content&page={i}&page_size=10" | ||||
|                 f"/api/documents/?query=content&page={i}&page_size=10", | ||||
|             ) | ||||
|             results = response.data["results"] | ||||
|             self.assertEqual(response.data["count"], 55) | ||||
| @@ -595,31 +605,35 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         self.assertCountEqual(search_query("&correspondent__id=" + str(c.id)), [d1.id]) | ||||
|         self.assertCountEqual(search_query("&document_type__id=" + str(dt.id)), [d2.id]) | ||||
|         self.assertCountEqual( | ||||
|             search_query("&correspondent__isnull"), [d2.id, d3.id, d4.id, d5.id] | ||||
|             search_query("&correspondent__isnull"), | ||||
|             [d2.id, d3.id, d4.id, d5.id], | ||||
|         ) | ||||
|         self.assertCountEqual( | ||||
|             search_query("&document_type__isnull"), [d1.id, d3.id, d4.id, d5.id] | ||||
|             search_query("&document_type__isnull"), | ||||
|             [d1.id, d3.id, d4.id, d5.id], | ||||
|         ) | ||||
|         self.assertCountEqual( | ||||
|             search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)), [d3.id] | ||||
|             search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)), | ||||
|             [d3.id], | ||||
|         ) | ||||
|         self.assertCountEqual(search_query("&tags__id__all=" + str(t.id)), [d3.id]) | ||||
|         self.assertCountEqual( | ||||
|             search_query("&tags__id__all=" + str(t2.id)), [d3.id, d4.id] | ||||
|             search_query("&tags__id__all=" + str(t2.id)), | ||||
|             [d3.id, d4.id], | ||||
|         ) | ||||
|  | ||||
|         self.assertIn( | ||||
|             d4.id, | ||||
|             search_query( | ||||
|                 "&created__date__lt=" | ||||
|                 + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d") | ||||
|                 + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"), | ||||
|             ), | ||||
|         ) | ||||
|         self.assertNotIn( | ||||
|             d4.id, | ||||
|             search_query( | ||||
|                 "&created__date__gt=" | ||||
|                 + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d") | ||||
|                 + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"), | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
| @@ -627,40 +641,44 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|             d4.id, | ||||
|             search_query( | ||||
|                 "&created__date__lt=" | ||||
|                 + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d") | ||||
|                 + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"), | ||||
|             ), | ||||
|         ) | ||||
|         self.assertIn( | ||||
|             d4.id, | ||||
|             search_query( | ||||
|                 "&created__date__gt=" | ||||
|                 + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d") | ||||
|                 + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"), | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|         self.assertIn( | ||||
|             d5.id, | ||||
|             search_query( | ||||
|                 "&added__date__lt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d") | ||||
|                 "&added__date__lt=" | ||||
|                 + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"), | ||||
|             ), | ||||
|         ) | ||||
|         self.assertNotIn( | ||||
|             d5.id, | ||||
|             search_query( | ||||
|                 "&added__date__gt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d") | ||||
|                 "&added__date__gt=" | ||||
|                 + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"), | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|         self.assertNotIn( | ||||
|             d5.id, | ||||
|             search_query( | ||||
|                 "&added__date__lt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d") | ||||
|                 "&added__date__lt=" | ||||
|                 + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"), | ||||
|             ), | ||||
|         ) | ||||
|         self.assertIn( | ||||
|             d5.id, | ||||
|             search_query( | ||||
|                 "&added__date__gt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d") | ||||
|                 "&added__date__gt=" | ||||
|                 + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"), | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
| @@ -700,18 +718,22 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|             return [hit["id"] for hit in r.data["results"]] | ||||
|  | ||||
|         self.assertListEqual( | ||||
|             search_query("&ordering=archive_serial_number"), [d3.id, d1.id, d2.id] | ||||
|             search_query("&ordering=archive_serial_number"), | ||||
|             [d3.id, d1.id, d2.id], | ||||
|         ) | ||||
|         self.assertListEqual( | ||||
|             search_query("&ordering=-archive_serial_number"), [d2.id, d1.id, d3.id] | ||||
|             search_query("&ordering=-archive_serial_number"), | ||||
|             [d2.id, d1.id, d3.id], | ||||
|         ) | ||||
|         self.assertListEqual(search_query("&ordering=title"), [d3.id, d2.id, d1.id]) | ||||
|         self.assertListEqual(search_query("&ordering=-title"), [d1.id, d2.id, d3.id]) | ||||
|         self.assertListEqual( | ||||
|             search_query("&ordering=correspondent__name"), [d1.id, d3.id, d2.id] | ||||
|             search_query("&ordering=correspondent__name"), | ||||
|             [d1.id, d3.id, d2.id], | ||||
|         ) | ||||
|         self.assertListEqual( | ||||
|             search_query("&ordering=-correspondent__name"), [d2.id, d3.id, d1.id] | ||||
|             search_query("&ordering=-correspondent__name"), | ||||
|             [d2.id, d3.id, d1.id], | ||||
|         ) | ||||
|  | ||||
|     def test_statistics(self): | ||||
| @@ -740,10 +762,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     def test_upload(self, m): | ||||
|  | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
|         ) as f: | ||||
|             response = self.client.post( | ||||
|                 "/api/documents/post_document/", {"document": f} | ||||
|                 "/api/documents/post_document/", | ||||
|                 {"document": f}, | ||||
|             ) | ||||
|  | ||||
|         self.assertEqual(response.status_code, 200) | ||||
| @@ -761,7 +785,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     def test_upload_empty_metadata(self, m): | ||||
|  | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
|         ) as f: | ||||
|             response = self.client.post( | ||||
|                 "/api/documents/post_document/", | ||||
| @@ -783,10 +808,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     def test_upload_invalid_form(self, m): | ||||
|  | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
|         ) as f: | ||||
|             response = self.client.post( | ||||
|                 "/api/documents/post_document/", {"documenst": f} | ||||
|                 "/api/documents/post_document/", | ||||
|                 {"documenst": f}, | ||||
|             ) | ||||
|         self.assertEqual(response.status_code, 400) | ||||
|         m.assert_not_called() | ||||
| @@ -795,10 +822,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     def test_upload_invalid_file(self, m): | ||||
|  | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb" | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), | ||||
|             "rb", | ||||
|         ) as f: | ||||
|             response = self.client.post( | ||||
|                 "/api/documents/post_document/", {"document": f} | ||||
|                 "/api/documents/post_document/", | ||||
|                 {"document": f}, | ||||
|             ) | ||||
|         self.assertEqual(response.status_code, 400) | ||||
|         m.assert_not_called() | ||||
| @@ -806,7 +835,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     @mock.patch("documents.views.async_task") | ||||
|     def test_upload_with_title(self, async_task): | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
|         ) as f: | ||||
|             response = self.client.post( | ||||
|                 "/api/documents/post_document/", | ||||
| @@ -824,10 +854,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     def test_upload_with_correspondent(self, async_task): | ||||
|         c = Correspondent.objects.create(name="test-corres") | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
|         ) as f: | ||||
|             response = self.client.post( | ||||
|                 "/api/documents/post_document/", {"document": f, "correspondent": c.id} | ||||
|                 "/api/documents/post_document/", | ||||
|                 {"document": f, "correspondent": c.id}, | ||||
|             ) | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|  | ||||
| @@ -840,10 +872,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     @mock.patch("documents.views.async_task") | ||||
|     def test_upload_with_invalid_correspondent(self, async_task): | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
|         ) as f: | ||||
|             response = self.client.post( | ||||
|                 "/api/documents/post_document/", {"document": f, "correspondent": 3456} | ||||
|                 "/api/documents/post_document/", | ||||
|                 {"document": f, "correspondent": 3456}, | ||||
|             ) | ||||
|         self.assertEqual(response.status_code, 400) | ||||
|  | ||||
| @@ -853,10 +887,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     def test_upload_with_document_type(self, async_task): | ||||
|         dt = DocumentType.objects.create(name="invoice") | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
|         ) as f: | ||||
|             response = self.client.post( | ||||
|                 "/api/documents/post_document/", {"document": f, "document_type": dt.id} | ||||
|                 "/api/documents/post_document/", | ||||
|                 {"document": f, "document_type": dt.id}, | ||||
|             ) | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|  | ||||
| @@ -869,10 +905,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     @mock.patch("documents.views.async_task") | ||||
|     def test_upload_with_invalid_document_type(self, async_task): | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
|         ) as f: | ||||
|             response = self.client.post( | ||||
|                 "/api/documents/post_document/", {"document": f, "document_type": 34578} | ||||
|                 "/api/documents/post_document/", | ||||
|                 {"document": f, "document_type": 34578}, | ||||
|             ) | ||||
|         self.assertEqual(response.status_code, 400) | ||||
|  | ||||
| @@ -883,10 +921,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         t1 = Tag.objects.create(name="tag1") | ||||
|         t2 = Tag.objects.create(name="tag2") | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
|         ) as f: | ||||
|             response = self.client.post( | ||||
|                 "/api/documents/post_document/", {"document": f, "tags": [t2.id, t1.id]} | ||||
|                 "/api/documents/post_document/", | ||||
|                 {"document": f, "tags": [t2.id, t1.id]}, | ||||
|             ) | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|  | ||||
| @@ -901,7 +941,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         t1 = Tag.objects.create(name="tag1") | ||||
|         t2 = Tag.objects.create(name="tag2") | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
|         ) as f: | ||||
|             response = self.client.post( | ||||
|                 "/api/documents/post_document/", | ||||
| @@ -952,7 +993,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|     def test_get_metadata_no_archive(self): | ||||
|         doc = Document.objects.create( | ||||
|             title="test", filename="file.pdf", mime_type="application/pdf" | ||||
|             title="test", | ||||
|             filename="file.pdf", | ||||
|             mime_type="application/pdf", | ||||
|         ) | ||||
|  | ||||
|         shutil.copy( | ||||
| @@ -999,7 +1042,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|         self.assertEqual( | ||||
|             response.data, {"correspondents": [], "tags": [], "document_types": []} | ||||
|             response.data, | ||||
|             {"correspondents": [], "tags": [], "document_types": []}, | ||||
|         ) | ||||
|  | ||||
|     def test_get_suggestions_invalid_doc(self): | ||||
| @@ -1010,10 +1054,15 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     @mock.patch("documents.views.match_tags") | ||||
|     @mock.patch("documents.views.match_document_types") | ||||
|     def test_get_suggestions( | ||||
|         self, match_document_types, match_tags, match_correspondents | ||||
|         self, | ||||
|         match_document_types, | ||||
|         match_tags, | ||||
|         match_correspondents, | ||||
|     ): | ||||
|         doc = Document.objects.create( | ||||
|             title="test", mime_type="application/pdf", content="this is an invoice!" | ||||
|             title="test", | ||||
|             mime_type="application/pdf", | ||||
|             content="this is an invoice!", | ||||
|         ) | ||||
|         match_tags.return_value = [Tag(id=56), Tag(id=123)] | ||||
|         match_document_types.return_value = [DocumentType(id=23)] | ||||
| @@ -1094,7 +1143,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|         self.assertEqual(v1.user, self.user) | ||||
|  | ||||
|         response = self.client.patch( | ||||
|             f"/api/saved_views/{v1.id}/", {"show_in_sidebar": False}, format="json" | ||||
|             f"/api/saved_views/{v1.id}/", | ||||
|             {"show_in_sidebar": False}, | ||||
|             format="json", | ||||
|         ) | ||||
|  | ||||
|         v1 = SavedView.objects.get(id=v1.id) | ||||
| @@ -1183,7 +1234,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     def test_regex_no_algorithm(self): | ||||
|         for endpoint in ["correspondents", "tags", "document_types"]: | ||||
|             response = self.client.post( | ||||
|                 f"/api/{endpoint}/", {"name": "test", "match": "[0-9]"}, format="json" | ||||
|                 f"/api/{endpoint}/", | ||||
|                 {"name": "test", "match": "[0-9]"}, | ||||
|                 format="json", | ||||
|             ) | ||||
|             self.assertEqual(response.status_code, 201, endpoint) | ||||
|  | ||||
| @@ -1200,7 +1253,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|     def test_tag_color(self): | ||||
|         response = self.client.post( | ||||
|             "/api/tags/", {"name": "tag", "colour": 3}, format="json" | ||||
|             "/api/tags/", | ||||
|             {"name": "tag", "colour": 3}, | ||||
|             format="json", | ||||
|         ) | ||||
|         self.assertEqual(response.status_code, 201) | ||||
|         self.assertEqual(Tag.objects.get(id=response.data["id"]).color, "#b2df8a") | ||||
| @@ -1213,14 +1268,17 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|     def test_tag_color_invalid(self): | ||||
|         response = self.client.post( | ||||
|             "/api/tags/", {"name": "tag", "colour": 34}, format="json" | ||||
|             "/api/tags/", | ||||
|             {"name": "tag", "colour": 34}, | ||||
|             format="json", | ||||
|         ) | ||||
|         self.assertEqual(response.status_code, 400) | ||||
|  | ||||
|     def test_tag_color_custom(self): | ||||
|         tag = Tag.objects.create(name="test", color="#abcdef") | ||||
|         self.assertEqual( | ||||
|             self.client.get(f"/api/tags/{tag.id}/", format="json").data["colour"], 1 | ||||
|             self.client.get(f"/api/tags/{tag.id}/", format="json").data["colour"], | ||||
|             1, | ||||
|         ) | ||||
|  | ||||
|  | ||||
| @@ -1236,32 +1294,42 @@ class TestDocumentApiV2(DirectoriesMixin, APITestCase): | ||||
|     def test_tag_validate_color(self): | ||||
|         self.assertEqual( | ||||
|             self.client.post( | ||||
|                 "/api/tags/", {"name": "test", "color": "#12fFaA"}, format="json" | ||||
|                 "/api/tags/", | ||||
|                 {"name": "test", "color": "#12fFaA"}, | ||||
|                 format="json", | ||||
|             ).status_code, | ||||
|             201, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual( | ||||
|             self.client.post( | ||||
|                 "/api/tags/", {"name": "test1", "color": "abcdef"}, format="json" | ||||
|                 "/api/tags/", | ||||
|                 {"name": "test1", "color": "abcdef"}, | ||||
|                 format="json", | ||||
|             ).status_code, | ||||
|             400, | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             self.client.post( | ||||
|                 "/api/tags/", {"name": "test2", "color": "#abcdfg"}, format="json" | ||||
|                 "/api/tags/", | ||||
|                 {"name": "test2", "color": "#abcdfg"}, | ||||
|                 format="json", | ||||
|             ).status_code, | ||||
|             400, | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             self.client.post( | ||||
|                 "/api/tags/", {"name": "test3", "color": "#asd"}, format="json" | ||||
|                 "/api/tags/", | ||||
|                 {"name": "test3", "color": "#asd"}, | ||||
|                 format="json", | ||||
|             ).status_code, | ||||
|             400, | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             self.client.post( | ||||
|                 "/api/tags/", {"name": "test4", "color": "#12121212"}, format="json" | ||||
|                 "/api/tags/", | ||||
|                 {"name": "test4", "color": "#12121212"}, | ||||
|                 format="json", | ||||
|             ).status_code, | ||||
|             400, | ||||
|         ) | ||||
| @@ -1313,10 +1381,16 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|         self.t2 = Tag.objects.create(name="t2") | ||||
|         self.doc1 = Document.objects.create(checksum="A", title="A") | ||||
|         self.doc2 = Document.objects.create( | ||||
|             checksum="B", title="B", correspondent=self.c1, document_type=self.dt1 | ||||
|             checksum="B", | ||||
|             title="B", | ||||
|             correspondent=self.c1, | ||||
|             document_type=self.dt1, | ||||
|         ) | ||||
|         self.doc3 = Document.objects.create( | ||||
|             checksum="C", title="C", correspondent=self.c2, document_type=self.dt2 | ||||
|             checksum="C", | ||||
|             title="C", | ||||
|             correspondent=self.c2, | ||||
|             document_type=self.dt2, | ||||
|         ) | ||||
|         self.doc4 = Document.objects.create(checksum="D", title="D") | ||||
|         self.doc5 = Document.objects.create(checksum="E", title="E") | ||||
| @@ -1327,7 +1401,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|     def test_set_correspondent(self): | ||||
|         self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 1) | ||||
|         bulk_edit.set_correspondent( | ||||
|             [self.doc1.id, self.doc2.id, self.doc3.id], self.c2.id | ||||
|             [self.doc1.id, self.doc2.id, self.doc3.id], | ||||
|             self.c2.id, | ||||
|         ) | ||||
|         self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 3) | ||||
|         self.async_task.assert_called_once() | ||||
| @@ -1345,7 +1420,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|     def test_set_document_type(self): | ||||
|         self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 1) | ||||
|         bulk_edit.set_document_type( | ||||
|             [self.doc1.id, self.doc2.id, self.doc3.id], self.dt2.id | ||||
|             [self.doc1.id, self.doc2.id, self.doc3.id], | ||||
|             self.dt2.id, | ||||
|         ) | ||||
|         self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 3) | ||||
|         self.async_task.assert_called_once() | ||||
| @@ -1363,7 +1439,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|     def test_add_tag(self): | ||||
|         self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 2) | ||||
|         bulk_edit.add_tag( | ||||
|             [self.doc1.id, self.doc2.id, self.doc3.id, self.doc4.id], self.t1.id | ||||
|             [self.doc1.id, self.doc2.id, self.doc3.id, self.doc4.id], | ||||
|             self.t1.id, | ||||
|         ) | ||||
|         self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 4) | ||||
|         self.async_task.assert_called_once() | ||||
| @@ -1415,7 +1492,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                     "documents": [self.doc1.id], | ||||
|                     "method": "set_correspondent", | ||||
|                     "parameters": {"correspondent": self.c1.id}, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1435,7 +1512,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                     "documents": [self.doc1.id], | ||||
|                     "method": "set_correspondent", | ||||
|                     "parameters": {"correspondent": None}, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1455,7 +1532,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                     "documents": [self.doc1.id], | ||||
|                     "method": "set_document_type", | ||||
|                     "parameters": {"document_type": self.dt1.id}, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1475,7 +1552,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                     "documents": [self.doc1.id], | ||||
|                     "method": "set_document_type", | ||||
|                     "parameters": {"document_type": None}, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1495,7 +1572,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                     "documents": [self.doc1.id], | ||||
|                     "method": "add_tag", | ||||
|                     "parameters": {"tag": self.t1.id}, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1515,7 +1592,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                     "documents": [self.doc1.id], | ||||
|                     "method": "remove_tag", | ||||
|                     "parameters": {"tag": self.t1.id}, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1538,7 +1615,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                         "add_tags": [self.t1.id], | ||||
|                         "remove_tags": [self.t2.id], | ||||
|                     }, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1555,7 +1632,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|         response = self.client.post( | ||||
|             "/api/documents/bulk_edit/", | ||||
|             json.dumps( | ||||
|                 {"documents": [self.doc1.id], "method": "delete", "parameters": {}} | ||||
|                 {"documents": [self.doc1.id], "method": "delete", "parameters": {}}, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1580,7 +1657,11 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|         response = self.client.post( | ||||
|             "/api/documents/bulk_edit/", | ||||
|             json.dumps( | ||||
|                 {"documents": [self.doc2.id], "method": "exterminate", "parameters": {}} | ||||
|                 { | ||||
|                     "documents": [self.doc2.id], | ||||
|                     "method": "exterminate", | ||||
|                     "parameters": {}, | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1596,7 +1677,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                     "documents": [self.doc2.id], | ||||
|                     "method": "set_correspondent", | ||||
|                     "parameters": {"correspondent": 345657}, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1613,7 +1694,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                     "documents": [self.doc2.id], | ||||
|                     "method": "set_correspondent", | ||||
|                     "parameters": {}, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1628,7 +1709,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                     "documents": [self.doc2.id], | ||||
|                     "method": "set_document_type", | ||||
|                     "parameters": {"document_type": 345657}, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1645,7 +1726,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                     "documents": [self.doc2.id], | ||||
|                     "method": "set_document_type", | ||||
|                     "parameters": {}, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1660,7 +1741,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                     "documents": [self.doc2.id], | ||||
|                     "method": "add_tag", | ||||
|                     "parameters": {"tag": 345657}, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1672,7 +1753,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|         response = self.client.post( | ||||
|             "/api/documents/bulk_edit/", | ||||
|             json.dumps( | ||||
|                 {"documents": [self.doc2.id], "method": "add_tag", "parameters": {}} | ||||
|                 {"documents": [self.doc2.id], "method": "add_tag", "parameters": {}}, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1687,7 +1768,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                     "documents": [self.doc2.id], | ||||
|                     "method": "remove_tag", | ||||
|                     "parameters": {"tag": 345657}, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1699,7 +1780,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|         response = self.client.post( | ||||
|             "/api/documents/bulk_edit/", | ||||
|             json.dumps( | ||||
|                 {"documents": [self.doc2.id], "method": "remove_tag", "parameters": {}} | ||||
|                 {"documents": [self.doc2.id], "method": "remove_tag", "parameters": {}}, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1717,7 +1798,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                         "add_tags": [self.t2.id, 1657], | ||||
|                         "remove_tags": [1123123], | ||||
|                     }, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1731,7 +1812,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                     "documents": [self.doc2.id], | ||||
|                     "method": "modify_tags", | ||||
|                     "parameters": {"remove_tags": [1123123]}, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1744,7 +1825,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|                     "documents": [self.doc2.id], | ||||
|                     "method": "modify_tags", | ||||
|                     "parameters": {"add_tags": [self.t2.id, 1657]}, | ||||
|                 } | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1774,7 +1855,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase): | ||||
|         response = self.client.post( | ||||
|             "/api/documents/selection_data/", | ||||
|             json.dumps( | ||||
|                 {"documents": [self.doc1.id, self.doc2.id, self.doc4.id, self.doc5.id]} | ||||
|                 {"documents": [self.doc1.id, self.doc2.id, self.doc4.id, self.doc5.id]}, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1856,7 +1937,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase): | ||||
|         response = self.client.post( | ||||
|             "/api/documents/bulk_download/", | ||||
|             json.dumps( | ||||
|                 {"documents": [self.doc2.id, self.doc3.id], "content": "originals"} | ||||
|                 {"documents": [self.doc2.id, self.doc3.id], "content": "originals"}, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1914,17 +1995,20 @@ class TestBulkDownload(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|             with self.doc2.source_file as f: | ||||
|                 self.assertEqual( | ||||
|                     f.read(), zipf.read("originals/2021-01-01 document A.pdf") | ||||
|                     f.read(), | ||||
|                     zipf.read("originals/2021-01-01 document A.pdf"), | ||||
|                 ) | ||||
|  | ||||
|             with self.doc3.archive_file as f: | ||||
|                 self.assertEqual( | ||||
|                     f.read(), zipf.read("archive/2020-03-21 document B.pdf") | ||||
|                     f.read(), | ||||
|                     zipf.read("archive/2020-03-21 document B.pdf"), | ||||
|                 ) | ||||
|  | ||||
|             with self.doc3.source_file as f: | ||||
|                 self.assertEqual( | ||||
|                     f.read(), zipf.read("originals/2020-03-21 document B.jpg") | ||||
|                     f.read(), | ||||
|                     zipf.read("originals/2020-03-21 document B.jpg"), | ||||
|                 ) | ||||
|  | ||||
|     def test_filename_clashes(self): | ||||
| @@ -1953,7 +2037,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase): | ||||
|         response = self.client.post( | ||||
|             "/api/documents/bulk_download/", | ||||
|             json.dumps( | ||||
|                 {"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"} | ||||
|                 {"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"}, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
| @@ -1968,13 +2052,16 @@ class TestApiAuth(APITestCase): | ||||
|  | ||||
|         self.assertEqual(self.client.get(f"/api/documents/{d.id}/").status_code, 401) | ||||
|         self.assertEqual( | ||||
|             self.client.get(f"/api/documents/{d.id}/download/").status_code, 401 | ||||
|             self.client.get(f"/api/documents/{d.id}/download/").status_code, | ||||
|             401, | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             self.client.get(f"/api/documents/{d.id}/preview/").status_code, 401 | ||||
|             self.client.get(f"/api/documents/{d.id}/preview/").status_code, | ||||
|             401, | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             self.client.get(f"/api/documents/{d.id}/thumb/").status_code, 401 | ||||
|             self.client.get(f"/api/documents/{d.id}/thumb/").status_code, | ||||
|             401, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(self.client.get("/api/tags/").status_code, 401) | ||||
| @@ -1987,10 +2074,12 @@ class TestApiAuth(APITestCase): | ||||
|         self.assertEqual(self.client.get("/api/search/autocomplete/").status_code, 401) | ||||
|         self.assertEqual(self.client.get("/api/documents/bulk_edit/").status_code, 401) | ||||
|         self.assertEqual( | ||||
|             self.client.get("/api/documents/bulk_download/").status_code, 401 | ||||
|             self.client.get("/api/documents/bulk_download/").status_code, | ||||
|             401, | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             self.client.get("/api/documents/selection_data/").status_code, 401 | ||||
|             self.client.get("/api/documents/selection_data/").status_code, | ||||
|             401, | ||||
|         ) | ||||
|  | ||||
|     def test_api_version_no_auth(self): | ||||
|   | ||||
| @@ -4,10 +4,11 @@ from unittest import mock | ||||
| from django.core.checks import Error | ||||
| from django.test import TestCase | ||||
|  | ||||
| from .factories import DocumentFactory | ||||
| from .. import document_consumer_declaration | ||||
| from ..checks import changed_password_check, parser_check | ||||
| from ..checks import changed_password_check | ||||
| from ..checks import parser_check | ||||
| from ..models import Document | ||||
| from ..signals import document_consumer_declaration | ||||
| from .factories import DocumentFactory | ||||
|  | ||||
|  | ||||
| class ChecksTestCase(TestCase): | ||||
| @@ -30,7 +31,7 @@ class ChecksTestCase(TestCase): | ||||
|                 [ | ||||
|                     Error( | ||||
|                         "No parsers found. This is a bug. The consumer won't be " | ||||
|                         "able to consume any documents without parsers." | ||||
|                     ) | ||||
|                         "able to consume any documents without parsers.", | ||||
|                     ), | ||||
|                 ], | ||||
|             ) | ||||
|   | ||||
| @@ -5,14 +5,15 @@ from unittest import mock | ||||
|  | ||||
| import pytest | ||||
| from django.conf import settings | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from documents.classifier import ( | ||||
|     DocumentClassifier, | ||||
|     IncompatibleClassifierVersionError, | ||||
|     load_classifier, | ||||
| ) | ||||
| from documents.models import Correspondent, Document, Tag, DocumentType | ||||
| from django.test import override_settings | ||||
| from django.test import TestCase | ||||
| from documents.classifier import DocumentClassifier | ||||
| from documents.classifier import IncompatibleClassifierVersionError | ||||
| from documents.classifier import load_classifier | ||||
| from documents.models import Correspondent | ||||
| from documents.models import Document | ||||
| from documents.models import DocumentType | ||||
| from documents.models import Tag | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| @@ -23,26 +24,37 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def generate_test_data(self): | ||||
|         self.c1 = Correspondent.objects.create( | ||||
|             name="c1", matching_algorithm=Correspondent.MATCH_AUTO | ||||
|             name="c1", | ||||
|             matching_algorithm=Correspondent.MATCH_AUTO, | ||||
|         ) | ||||
|         self.c2 = Correspondent.objects.create(name="c2") | ||||
|         self.c3 = Correspondent.objects.create( | ||||
|             name="c3", matching_algorithm=Correspondent.MATCH_AUTO | ||||
|             name="c3", | ||||
|             matching_algorithm=Correspondent.MATCH_AUTO, | ||||
|         ) | ||||
|         self.t1 = Tag.objects.create( | ||||
|             name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12 | ||||
|             name="t1", | ||||
|             matching_algorithm=Tag.MATCH_AUTO, | ||||
|             pk=12, | ||||
|         ) | ||||
|         self.t2 = Tag.objects.create( | ||||
|             name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True | ||||
|             name="t2", | ||||
|             matching_algorithm=Tag.MATCH_ANY, | ||||
|             pk=34, | ||||
|             is_inbox_tag=True, | ||||
|         ) | ||||
|         self.t3 = Tag.objects.create( | ||||
|             name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45 | ||||
|             name="t3", | ||||
|             matching_algorithm=Tag.MATCH_AUTO, | ||||
|             pk=45, | ||||
|         ) | ||||
|         self.dt = DocumentType.objects.create( | ||||
|             name="dt", matching_algorithm=DocumentType.MATCH_AUTO | ||||
|             name="dt", | ||||
|             matching_algorithm=DocumentType.MATCH_AUTO, | ||||
|         ) | ||||
|         self.dt2 = DocumentType.objects.create( | ||||
|             name="dt2", matching_algorithm=DocumentType.MATCH_AUTO | ||||
|             name="dt2", | ||||
|             matching_algorithm=DocumentType.MATCH_AUTO, | ||||
|         ) | ||||
|  | ||||
|         self.doc1 = Document.objects.create( | ||||
| @@ -59,7 +71,9 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|             checksum="B", | ||||
|         ) | ||||
|         self.doc_inbox = Document.objects.create( | ||||
|             title="doc235", content="aa", checksum="C" | ||||
|             title="doc235", | ||||
|             content="aa", | ||||
|             checksum="C", | ||||
|         ) | ||||
|  | ||||
|         self.doc1.tags.add(self.t1) | ||||
| @@ -90,27 +104,33 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|         self.generate_test_data() | ||||
|         self.classifier.train() | ||||
|         self.assertListEqual( | ||||
|             list(self.classifier.correspondent_classifier.classes_), [-1, self.c1.pk] | ||||
|             list(self.classifier.correspondent_classifier.classes_), | ||||
|             [-1, self.c1.pk], | ||||
|         ) | ||||
|         self.assertListEqual( | ||||
|             list(self.classifier.tags_binarizer.classes_), [self.t1.pk, self.t3.pk] | ||||
|             list(self.classifier.tags_binarizer.classes_), | ||||
|             [self.t1.pk, self.t3.pk], | ||||
|         ) | ||||
|  | ||||
|     def testPredict(self): | ||||
|         self.generate_test_data() | ||||
|         self.classifier.train() | ||||
|         self.assertEqual( | ||||
|             self.classifier.predict_correspondent(self.doc1.content), self.c1.pk | ||||
|             self.classifier.predict_correspondent(self.doc1.content), | ||||
|             self.c1.pk, | ||||
|         ) | ||||
|         self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None) | ||||
|         self.assertListEqual( | ||||
|             self.classifier.predict_tags(self.doc1.content), [self.t1.pk] | ||||
|             self.classifier.predict_tags(self.doc1.content), | ||||
|             [self.t1.pk], | ||||
|         ) | ||||
|         self.assertListEqual( | ||||
|             self.classifier.predict_tags(self.doc2.content), [self.t1.pk, self.t3.pk] | ||||
|             self.classifier.predict_tags(self.doc2.content), | ||||
|             [self.t1.pk, self.t3.pk], | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             self.classifier.predict_document_type(self.doc1.content), self.dt.pk | ||||
|             self.classifier.predict_document_type(self.doc1.content), | ||||
|             self.dt.pk, | ||||
|         ) | ||||
|         self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None) | ||||
|  | ||||
| @@ -133,7 +153,8 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         current_ver = DocumentClassifier.FORMAT_VERSION | ||||
|         with mock.patch( | ||||
|             "documents.classifier.DocumentClassifier.FORMAT_VERSION", current_ver + 1 | ||||
|             "documents.classifier.DocumentClassifier.FORMAT_VERSION", | ||||
|             current_ver + 1, | ||||
|         ): | ||||
|             # assure that we won't load old classifiers. | ||||
|             self.assertRaises(IncompatibleClassifierVersionError, classifier2.load) | ||||
| @@ -157,7 +178,7 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|         self.assertFalse(new_classifier.train()) | ||||
|  | ||||
|     @override_settings( | ||||
|         MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle") | ||||
|         MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"), | ||||
|     ) | ||||
|     def test_load_and_classify(self): | ||||
|         self.generate_test_data() | ||||
| @@ -169,7 +190,8 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_one_correspondent_predict(self): | ||||
|         c1 = Correspondent.objects.create( | ||||
|             name="c1", matching_algorithm=Correspondent.MATCH_AUTO | ||||
|             name="c1", | ||||
|             matching_algorithm=Correspondent.MATCH_AUTO, | ||||
|         ) | ||||
|         doc1 = Document.objects.create( | ||||
|             title="doc1", | ||||
| @@ -183,7 +205,8 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_one_correspondent_predict_manydocs(self): | ||||
|         c1 = Correspondent.objects.create( | ||||
|             name="c1", matching_algorithm=Correspondent.MATCH_AUTO | ||||
|             name="c1", | ||||
|             matching_algorithm=Correspondent.MATCH_AUTO, | ||||
|         ) | ||||
|         doc1 = Document.objects.create( | ||||
|             title="doc1", | ||||
| @@ -192,7 +215,9 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|             checksum="A", | ||||
|         ) | ||||
|         doc2 = Document.objects.create( | ||||
|             title="doc2", content="this is a document from noone", checksum="B" | ||||
|             title="doc2", | ||||
|             content="this is a document from noone", | ||||
|             checksum="B", | ||||
|         ) | ||||
|  | ||||
|         self.classifier.train() | ||||
| @@ -201,7 +226,8 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_one_type_predict(self): | ||||
|         dt = DocumentType.objects.create( | ||||
|             name="dt", matching_algorithm=DocumentType.MATCH_AUTO | ||||
|             name="dt", | ||||
|             matching_algorithm=DocumentType.MATCH_AUTO, | ||||
|         ) | ||||
|  | ||||
|         doc1 = Document.objects.create( | ||||
| @@ -216,7 +242,8 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_one_type_predict_manydocs(self): | ||||
|         dt = DocumentType.objects.create( | ||||
|             name="dt", matching_algorithm=DocumentType.MATCH_AUTO | ||||
|             name="dt", | ||||
|             matching_algorithm=DocumentType.MATCH_AUTO, | ||||
|         ) | ||||
|  | ||||
|         doc1 = Document.objects.create( | ||||
| @@ -227,7 +254,9 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|         ) | ||||
|  | ||||
|         doc2 = Document.objects.create( | ||||
|             title="doc1", content="this is a document from c2", checksum="B" | ||||
|             title="doc1", | ||||
|             content="this is a document from c2", | ||||
|             checksum="B", | ||||
|         ) | ||||
|  | ||||
|         self.classifier.train() | ||||
| @@ -238,7 +267,9 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|         t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12) | ||||
|  | ||||
|         doc1 = Document.objects.create( | ||||
|             title="doc1", content="this is a document from c1", checksum="A" | ||||
|             title="doc1", | ||||
|             content="this is a document from c1", | ||||
|             checksum="A", | ||||
|         ) | ||||
|  | ||||
|         doc1.tags.add(t1) | ||||
| @@ -249,7 +280,9 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|         t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12) | ||||
|  | ||||
|         doc1 = Document.objects.create( | ||||
|             title="doc1", content="this is a document from c1", checksum="A" | ||||
|             title="doc1", | ||||
|             content="this is a document from c1", | ||||
|             checksum="A", | ||||
|         ) | ||||
|  | ||||
|         self.classifier.train() | ||||
| @@ -260,7 +293,9 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|         t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121) | ||||
|  | ||||
|         doc4 = Document.objects.create( | ||||
|             title="doc1", content="this is a document from c4", checksum="D" | ||||
|             title="doc1", | ||||
|             content="this is a document from c4", | ||||
|             checksum="D", | ||||
|         ) | ||||
|  | ||||
|         doc4.tags.add(t1) | ||||
| @@ -273,16 +308,24 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|         t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121) | ||||
|  | ||||
|         doc1 = Document.objects.create( | ||||
|             title="doc1", content="this is a document from c1", checksum="A" | ||||
|             title="doc1", | ||||
|             content="this is a document from c1", | ||||
|             checksum="A", | ||||
|         ) | ||||
|         doc2 = Document.objects.create( | ||||
|             title="doc1", content="this is a document from c2", checksum="B" | ||||
|             title="doc1", | ||||
|             content="this is a document from c2", | ||||
|             checksum="B", | ||||
|         ) | ||||
|         doc3 = Document.objects.create( | ||||
|             title="doc1", content="this is a document from c3", checksum="C" | ||||
|             title="doc1", | ||||
|             content="this is a document from c3", | ||||
|             checksum="C", | ||||
|         ) | ||||
|         doc4 = Document.objects.create( | ||||
|             title="doc1", content="this is a document from c4", checksum="D" | ||||
|             title="doc1", | ||||
|             content="this is a document from c4", | ||||
|             checksum="D", | ||||
|         ) | ||||
|  | ||||
|         doc1.tags.add(t1) | ||||
| @@ -300,10 +343,14 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|         t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12) | ||||
|  | ||||
|         doc1 = Document.objects.create( | ||||
|             title="doc1", content="this is a document from c1", checksum="A" | ||||
|             title="doc1", | ||||
|             content="this is a document from c1", | ||||
|             checksum="A", | ||||
|         ) | ||||
|         doc2 = Document.objects.create( | ||||
|             title="doc2", content="this is a document from c2", checksum="B" | ||||
|             title="doc2", | ||||
|             content="this is a document from c2", | ||||
|             checksum="B", | ||||
|         ) | ||||
|  | ||||
|         doc1.tags.add(t1) | ||||
| @@ -316,10 +363,14 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|         t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12) | ||||
|  | ||||
|         doc1 = Document.objects.create( | ||||
|             title="doc1", content="this is a document from c1", checksum="A" | ||||
|             title="doc1", | ||||
|             content="this is a document from c1", | ||||
|             checksum="A", | ||||
|         ) | ||||
|         doc2 = Document.objects.create( | ||||
|             title="doc2", content="this is a document from c2", checksum="B" | ||||
|             title="doc2", | ||||
|             content="this is a document from c2", | ||||
|             checksum="B", | ||||
|         ) | ||||
|  | ||||
|         doc1.tags.add(t1) | ||||
| @@ -338,13 +389,15 @@ class TestClassifier(DirectoriesMixin, TestCase): | ||||
|         load.assert_called_once() | ||||
|  | ||||
|     @override_settings( | ||||
|         CACHES={"default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}} | ||||
|         CACHES={ | ||||
|             "default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}, | ||||
|         }, | ||||
|     ) | ||||
|     @override_settings( | ||||
|         MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle") | ||||
|         MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"), | ||||
|     ) | ||||
|     @pytest.mark.skip( | ||||
|         reason="Disabled caching due to high memory usage - need to investigate." | ||||
|         reason="Disabled caching due to high memory usage - need to investigate.", | ||||
|     ) | ||||
|     def test_load_classifier_cached(self): | ||||
|         classifier = load_classifier() | ||||
|   | ||||
| @@ -6,13 +6,20 @@ from unittest import mock | ||||
| from unittest.mock import MagicMock | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.test import TestCase, override_settings | ||||
| from django.test import override_settings | ||||
| from django.test import TestCase | ||||
|  | ||||
| from .utils import DirectoriesMixin | ||||
| from ..consumer import Consumer, ConsumerError | ||||
| from ..models import FileInfo, Tag, Correspondent, DocumentType, Document | ||||
| from ..parsers import DocumentParser, ParseError | ||||
| from ..consumer import Consumer | ||||
| from ..consumer import ConsumerError | ||||
| from ..models import Correspondent | ||||
| from ..models import Document | ||||
| from ..models import DocumentType | ||||
| from ..models import FileInfo | ||||
| from ..models import Tag | ||||
| from ..parsers import DocumentParser | ||||
| from ..parsers import ParseError | ||||
| from ..tasks import sanity_check | ||||
| from .utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| class TestAttributes(TestCase): | ||||
| @@ -33,12 +40,18 @@ class TestAttributes(TestCase): | ||||
|  | ||||
|     def test_guess_attributes_from_name_when_title_starts_with_dash(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "- weird but should not break.pdf", None, "- weird but should not break", () | ||||
|             "- weird but should not break.pdf", | ||||
|             None, | ||||
|             "- weird but should not break", | ||||
|             (), | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name_when_title_ends_with_dash(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "weird but should not break -.pdf", None, "weird but should not break -", () | ||||
|             "weird but should not break -.pdf", | ||||
|             None, | ||||
|             "weird but should not break -", | ||||
|             (), | ||||
|         ) | ||||
|  | ||||
|  | ||||
| @@ -53,7 +66,12 @@ class TestFieldPermutations(TestCase): | ||||
|     valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"] | ||||
|  | ||||
|     def _test_guessed_attributes( | ||||
|         self, filename, created=None, correspondent=None, title=None, tags=None | ||||
|         self, | ||||
|         filename, | ||||
|         created=None, | ||||
|         correspondent=None, | ||||
|         title=None, | ||||
|         tags=None, | ||||
|     ): | ||||
|  | ||||
|         info = FileInfo.from_filename(filename) | ||||
| @@ -131,7 +149,7 @@ class TestFieldPermutations(TestCase): | ||||
|             FILENAME_PARSE_TRANSFORMS=[ | ||||
|                 (all_patt, "all.gif"), | ||||
|                 (all_patt, "anotherall.gif"), | ||||
|             ] | ||||
|             ], | ||||
|         ): | ||||
|             info = FileInfo.from_filename(filename) | ||||
|             self.assertEqual(info.title, "all") | ||||
| @@ -141,7 +159,7 @@ class TestFieldPermutations(TestCase): | ||||
|             FILENAME_PARSE_TRANSFORMS=[ | ||||
|                 (none_patt, "none.gif"), | ||||
|                 (all_patt, "anotherall.gif"), | ||||
|             ] | ||||
|             ], | ||||
|         ): | ||||
|             info = FileInfo.from_filename(filename) | ||||
|             self.assertEqual(info.title, "anotherall") | ||||
| @@ -238,7 +256,9 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def make_dummy_parser(self, logging_group, progress_callback=None): | ||||
|         return DummyParser( | ||||
|             logging_group, self.dirs.scratch_dir, self.get_test_archive_file() | ||||
|             logging_group, | ||||
|             self.dirs.scratch_dir, | ||||
|             self.get_test_archive_file(), | ||||
|         ) | ||||
|  | ||||
|     def make_faulty_parser(self, logging_group, progress_callback=None): | ||||
| @@ -257,7 +277,7 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|                     "mime_types": {"application/pdf": ".pdf"}, | ||||
|                     "weight": 0, | ||||
|                 }, | ||||
|             ) | ||||
|             ), | ||||
|         ] | ||||
|         self.addCleanup(patcher.stop) | ||||
|  | ||||
| @@ -282,7 +302,11 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def get_test_archive_file(self): | ||||
|         src = os.path.join( | ||||
|             os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf" | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "documents", | ||||
|             "archive", | ||||
|             "0000001.pdf", | ||||
|         ) | ||||
|         dst = os.path.join(self.dirs.scratch_dir, "sample_archive.pdf") | ||||
|         shutil.copy(src, dst) | ||||
| @@ -296,7 +320,8 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         self.assertEqual(document.content, "The Text") | ||||
|         self.assertEqual( | ||||
|             document.title, os.path.splitext(os.path.basename(filename))[0] | ||||
|             document.title, | ||||
|             os.path.splitext(os.path.basename(filename))[0], | ||||
|         ) | ||||
|         self.assertIsNone(document.correspondent) | ||||
|         self.assertIsNone(document.document_type) | ||||
| @@ -339,7 +364,8 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|         override_filename = "Statement for November.pdf" | ||||
|  | ||||
|         document = self.consumer.try_consume_file( | ||||
|             filename, override_filename=override_filename | ||||
|             filename, | ||||
|             override_filename=override_filename, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(document.title, "Statement for November") | ||||
| @@ -348,7 +374,8 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def testOverrideTitle(self): | ||||
|         document = self.consumer.try_consume_file( | ||||
|             self.get_test_file(), override_title="Override Title" | ||||
|             self.get_test_file(), | ||||
|             override_title="Override Title", | ||||
|         ) | ||||
|         self.assertEqual(document.title, "Override Title") | ||||
|         self._assert_first_last_send_progress() | ||||
| @@ -357,7 +384,8 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|         c = Correspondent.objects.create(name="test") | ||||
|  | ||||
|         document = self.consumer.try_consume_file( | ||||
|             self.get_test_file(), override_correspondent_id=c.pk | ||||
|             self.get_test_file(), | ||||
|             override_correspondent_id=c.pk, | ||||
|         ) | ||||
|         self.assertEqual(document.correspondent.id, c.id) | ||||
|         self._assert_first_last_send_progress() | ||||
| @@ -366,7 +394,8 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|         dt = DocumentType.objects.create(name="test") | ||||
|  | ||||
|         document = self.consumer.try_consume_file( | ||||
|             self.get_test_file(), override_document_type_id=dt.pk | ||||
|             self.get_test_file(), | ||||
|             override_document_type_id=dt.pk, | ||||
|         ) | ||||
|         self.assertEqual(document.document_type.id, dt.id) | ||||
|         self._assert_first_last_send_progress() | ||||
| @@ -376,7 +405,8 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|         t2 = Tag.objects.create(name="t2") | ||||
|         t3 = Tag.objects.create(name="t3") | ||||
|         document = self.consumer.try_consume_file( | ||||
|             self.get_test_file(), override_tag_ids=[t1.id, t3.id] | ||||
|             self.get_test_file(), | ||||
|             override_tag_ids=[t1.id, t3.id], | ||||
|         ) | ||||
|  | ||||
|         self.assertIn(t1, document.tags.all()) | ||||
| @@ -446,7 +476,7 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|                     "mime_types": {"application/pdf": ".pdf"}, | ||||
|                     "weight": 0, | ||||
|                 }, | ||||
|             ) | ||||
|             ), | ||||
|         ] | ||||
|  | ||||
|         self.assertRaisesMessage( | ||||
| @@ -595,16 +625,16 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|                     "mime_types": {"application/pdf": ".pdf", "image/png": ".png"}, | ||||
|                     "weight": 0, | ||||
|                 }, | ||||
|             ) | ||||
|             ), | ||||
|         ] | ||||
|         doc1 = self.consumer.try_consume_file( | ||||
|             os.path.join(settings.CONSUMPTION_DIR, "simple.png") | ||||
|             os.path.join(settings.CONSUMPTION_DIR, "simple.png"), | ||||
|         ) | ||||
|         doc2 = self.consumer.try_consume_file( | ||||
|             os.path.join(settings.CONSUMPTION_DIR, "simple.pdf") | ||||
|             os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"), | ||||
|         ) | ||||
|         doc3 = self.consumer.try_consume_file( | ||||
|             os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf") | ||||
|             os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"), | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(doc1.filename, "simple.png") | ||||
| @@ -691,7 +721,9 @@ class PostConsumeTestCase(TestCase): | ||||
|             with override_settings(POST_CONSUME_SCRIPT=script.name): | ||||
|                 c = Correspondent.objects.create(name="my_bank") | ||||
|                 doc = Document.objects.create( | ||||
|                     title="Test", mime_type="application/pdf", correspondent=c | ||||
|                     title="Test", | ||||
|                     mime_type="application/pdf", | ||||
|                     correspondent=c, | ||||
|                 ) | ||||
|                 tag1 = Tag.objects.create(name="a") | ||||
|                 tag2 = Tag.objects.create(name="b") | ||||
|   | ||||
| @@ -5,15 +5,16 @@ from uuid import uuid4 | ||||
|  | ||||
| from dateutil import tz | ||||
| from django.conf import settings | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from django.test import override_settings | ||||
| from django.test import TestCase | ||||
| from documents.parsers import parse_date | ||||
|  | ||||
|  | ||||
| class TestDate(TestCase): | ||||
|  | ||||
|     SAMPLE_FILES = os.path.join( | ||||
|         os.path.dirname(__file__), "../../paperless_tesseract/tests/samples" | ||||
|         os.path.dirname(__file__), | ||||
|         "../../paperless_tesseract/tests/samples", | ||||
|     ) | ||||
|     SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8]) | ||||
|  | ||||
| @@ -111,11 +112,11 @@ class TestDate(TestCase): | ||||
|     @override_settings(FILENAME_DATE_ORDER="YMD") | ||||
|     def test_filename_date_parse_invalid(self, *args): | ||||
|         self.assertIsNone( | ||||
|             parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here") | ||||
|             parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"), | ||||
|         ) | ||||
|  | ||||
|     @override_settings( | ||||
|         IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)) | ||||
|         IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)), | ||||
|     ) | ||||
|     def test_ignored_dates(self, *args): | ||||
|         text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem " "ipsum" | ||||
|   | ||||
| @@ -3,10 +3,12 @@ import tempfile | ||||
| from pathlib import Path | ||||
| from unittest import mock | ||||
|  | ||||
| from django.test import TestCase, override_settings | ||||
| from django.test import override_settings | ||||
| from django.test import TestCase | ||||
| from django.utils import timezone | ||||
|  | ||||
| from ..models import Document, Correspondent | ||||
| from ..models import Correspondent | ||||
| from ..models import Document | ||||
|  | ||||
|  | ||||
| class TestDocument(TestCase): | ||||
|   | ||||
| @@ -9,17 +9,19 @@ from unittest import mock | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.db import DatabaseError | ||||
| from django.test import TestCase, override_settings | ||||
| from django.test import override_settings | ||||
| from django.test import TestCase | ||||
| from django.utils import timezone | ||||
|  | ||||
| from ..file_handling import create_source_path_directory | ||||
| from ..file_handling import delete_empty_directories | ||||
| from ..file_handling import generate_filename | ||||
| from ..file_handling import generate_unique_filename | ||||
| from ..models import Correspondent | ||||
| from ..models import Document | ||||
| from ..models import DocumentType | ||||
| from ..models import Tag | ||||
| from .utils import DirectoriesMixin | ||||
| from ..file_handling import ( | ||||
|     generate_filename, | ||||
|     create_source_path_directory, | ||||
|     delete_empty_directories, | ||||
|     generate_unique_filename, | ||||
| ) | ||||
| from ..models import Document, Correspondent, Tag, DocumentType | ||||
|  | ||||
|  | ||||
| class TestFileHandling(DirectoriesMixin, TestCase): | ||||
| @@ -34,7 +36,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         document.storage_type = Document.STORAGE_TYPE_GPG | ||||
|         self.assertEqual( | ||||
|             generate_filename(document), "{:07d}.pdf.gpg".format(document.pk) | ||||
|             generate_filename(document), | ||||
|             "{:07d}.pdf.gpg".format(document.pk), | ||||
|         ) | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||
| @@ -75,7 +78,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) | ||||
|         self.assertEqual( | ||||
|             os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True | ||||
|             os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), | ||||
|             True, | ||||
|         ) | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||
| @@ -93,7 +97,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         # Test source_path | ||||
|         self.assertEqual( | ||||
|             document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf" | ||||
|             document.source_path, | ||||
|             settings.ORIGINALS_DIR + "/none/none.pdf", | ||||
|         ) | ||||
|  | ||||
|         # Make the folder read- and execute-only (no writing and no renaming) | ||||
| @@ -105,7 +110,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         # Check proper handling of files | ||||
|         self.assertEqual( | ||||
|             os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True | ||||
|             os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), | ||||
|             True, | ||||
|         ) | ||||
|         self.assertEqual(document.filename, "none/none.pdf") | ||||
|  | ||||
| @@ -145,7 +151,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|             # Check proper handling of files | ||||
|             self.assertTrue(os.path.isfile(document.source_path)) | ||||
|             self.assertEqual( | ||||
|                 os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True | ||||
|                 os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), | ||||
|                 True, | ||||
|             ) | ||||
|             self.assertEqual(document.filename, "none/none.pdf") | ||||
|  | ||||
| @@ -167,7 +174,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|         pk = document.pk | ||||
|         document.delete() | ||||
|         self.assertEqual( | ||||
|             os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False | ||||
|             os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), | ||||
|             False, | ||||
|         ) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) | ||||
|  | ||||
| @@ -192,7 +200,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|         self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none/none.pdf"), False) | ||||
|         document.delete() | ||||
|         self.assertEqual( | ||||
|             os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False | ||||
|             os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), | ||||
|             False, | ||||
|         ) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) | ||||
|         self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none.pdf"), True) | ||||
| @@ -363,7 +372,9 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|         self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf") | ||||
|  | ||||
|         doc = Document.objects.create( | ||||
|             title="doc2", checksum="B", mime_type="application/pdf" | ||||
|             title="doc2", | ||||
|             checksum="B", | ||||
|             mime_type="application/pdf", | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(generate_filename(doc), "doc2.pdf") | ||||
| @@ -380,12 +391,14 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|         ) | ||||
|  | ||||
|     @override_settings( | ||||
|         PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}" | ||||
|         PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}", | ||||
|     ) | ||||
|     def test_created_year_month_day(self): | ||||
|         d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1)) | ||||
|         doc1 = Document.objects.create( | ||||
|             title="doc1", mime_type="application/pdf", created=d1 | ||||
|             title="doc1", | ||||
|             mime_type="application/pdf", | ||||
|             created=d1, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(generate_filename(doc1), "2020-03-06.pdf") | ||||
| @@ -395,12 +408,14 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|         self.assertEqual(generate_filename(doc1), "2020-11-16.pdf") | ||||
|  | ||||
|     @override_settings( | ||||
|         PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}" | ||||
|         PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}", | ||||
|     ) | ||||
|     def test_added_year_month_day(self): | ||||
|         d1 = timezone.make_aware(datetime.datetime(232, 1, 9, 1, 1, 1)) | ||||
|         doc1 = Document.objects.create( | ||||
|             title="doc1", mime_type="application/pdf", added=d1 | ||||
|             title="doc1", | ||||
|             mime_type="application/pdf", | ||||
|             added=d1, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(generate_filename(doc1), "232-01-09.pdf") | ||||
| @@ -410,7 +425,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|         self.assertEqual(generate_filename(doc1), "2020-11-16.pdf") | ||||
|  | ||||
|     @override_settings( | ||||
|         PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}" | ||||
|         PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}", | ||||
|     ) | ||||
|     def test_nested_directory_cleanup(self): | ||||
|         document = Document() | ||||
| @@ -431,7 +446,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|         document.delete() | ||||
|  | ||||
|         self.assertEqual( | ||||
|             os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False | ||||
|             os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), | ||||
|             False, | ||||
|         ) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) | ||||
| @@ -456,7 +472,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|         os.makedirs(os.path.join(tmp, "notempty", "empty")) | ||||
|  | ||||
|         delete_empty_directories( | ||||
|             os.path.join(tmp, "notempty", "empty"), root=settings.ORIGINALS_DIR | ||||
|             os.path.join(tmp, "notempty", "empty"), | ||||
|             root=settings.ORIGINALS_DIR, | ||||
|         ) | ||||
|         self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True) | ||||
|         self.assertEqual(os.path.isfile(os.path.join(tmp, "notempty", "file")), True) | ||||
| @@ -483,10 +500,16 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{title}") | ||||
|     def test_duplicates(self): | ||||
|         document = Document.objects.create( | ||||
|             mime_type="application/pdf", title="qwe", checksum="A", pk=1 | ||||
|             mime_type="application/pdf", | ||||
|             title="qwe", | ||||
|             checksum="A", | ||||
|             pk=1, | ||||
|         ) | ||||
|         document2 = Document.objects.create( | ||||
|             mime_type="application/pdf", title="qwe", checksum="B", pk=2 | ||||
|             mime_type="application/pdf", | ||||
|             title="qwe", | ||||
|             checksum="B", | ||||
|             pk=2, | ||||
|         ) | ||||
|         Path(document.source_path).touch() | ||||
|         Path(document2.source_path).touch() | ||||
| @@ -584,10 +607,12 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase): | ||||
|         self.assertTrue(os.path.isfile(doc.source_path)) | ||||
|         self.assertTrue(os.path.isfile(doc.archive_path)) | ||||
|         self.assertEqual( | ||||
|             doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf") | ||||
|             doc.source_path, | ||||
|             os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"), | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf") | ||||
|             doc.archive_path, | ||||
|             os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"), | ||||
|         ) | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") | ||||
| @@ -851,7 +876,10 @@ class TestFilenameGeneration(TestCase): | ||||
|     def test_invalid_characters(self): | ||||
|  | ||||
|         doc = Document.objects.create( | ||||
|             title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1" | ||||
|             title="This. is the title.", | ||||
|             mime_type="application/pdf", | ||||
|             pk=1, | ||||
|             checksum="1", | ||||
|         ) | ||||
|         self.assertEqual(generate_filename(doc), "This. is the title.pdf") | ||||
|  | ||||
| @@ -877,7 +905,9 @@ class TestFilenameGeneration(TestCase): | ||||
|  | ||||
| def run(): | ||||
|     doc = Document.objects.create( | ||||
|         checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow" | ||||
|         checksum=str(uuid.uuid4()), | ||||
|         title=str(uuid.uuid4()), | ||||
|         content="wow", | ||||
|     ) | ||||
|     doc.filename = generate_unique_filename(doc) | ||||
|     Path(doc.thumbnail_path).touch() | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| from django.core.management.base import CommandError | ||||
| from django.test import TestCase | ||||
|  | ||||
| from documents.settings import EXPORTER_FILE_NAME | ||||
|  | ||||
| from ..management.commands.document_importer import Command | ||||
|  | ||||
|  | ||||
| @@ -12,7 +12,9 @@ class TestImporter(TestCase): | ||||
|     def test_check_manifest_exists(self): | ||||
|         cmd = Command() | ||||
|         self.assertRaises( | ||||
|             CommandError, cmd._check_manifest_exists, "/tmp/manifest.json" | ||||
|             CommandError, | ||||
|             cmd._check_manifest_exists, | ||||
|             "/tmp/manifest.json", | ||||
|         ) | ||||
|  | ||||
|     def test_check_manifest(self): | ||||
| @@ -26,11 +28,11 @@ class TestImporter(TestCase): | ||||
|         self.assertTrue("The manifest file contains a record" in str(cm.exception)) | ||||
|  | ||||
|         cmd.manifest = [ | ||||
|             {"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"} | ||||
|             {"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"}, | ||||
|         ] | ||||
|         # self.assertRaises(CommandError, cmd._check_manifest) | ||||
|         with self.assertRaises(CommandError) as cm: | ||||
|             cmd._check_manifest() | ||||
|         self.assertTrue( | ||||
|             'The manifest file refers to "noexist.pdf"' in str(cm.exception) | ||||
|             'The manifest file refers to "noexist.pdf"' in str(cm.exception), | ||||
|         ) | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| from django.test import TestCase | ||||
|  | ||||
| from documents import index | ||||
| from documents.models import Document | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| @@ -9,7 +8,9 @@ class TestAutoComplete(DirectoriesMixin, TestCase): | ||||
|     def test_auto_complete(self): | ||||
|  | ||||
|         doc1 = Document.objects.create( | ||||
|             title="doc1", checksum="A", content="test test2 test3" | ||||
|             title="doc1", | ||||
|             checksum="A", | ||||
|             content="test test2 test3", | ||||
|         ) | ||||
|         doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2") | ||||
|         doc3 = Document.objects.create(title="doc3", checksum="C", content="test2") | ||||
| @@ -21,10 +22,12 @@ class TestAutoComplete(DirectoriesMixin, TestCase): | ||||
|         ix = index.open_index() | ||||
|  | ||||
|         self.assertListEqual( | ||||
|             index.autocomplete(ix, "tes"), [b"test3", b"test", b"test2"] | ||||
|             index.autocomplete(ix, "tes"), | ||||
|             [b"test3", b"test", b"test2"], | ||||
|         ) | ||||
|         self.assertListEqual( | ||||
|             index.autocomplete(ix, "tes", limit=3), [b"test3", b"test", b"test2"] | ||||
|             index.autocomplete(ix, "tes", limit=3), | ||||
|             [b"test3", b"test", b"test2"], | ||||
|         ) | ||||
|         self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"]) | ||||
|         self.assertListEqual(index.autocomplete(ix, "tes", limit=0), []) | ||||
|   | ||||
| @@ -1,16 +1,14 @@ | ||||
| import hashlib | ||||
| import tempfile | ||||
| import filecmp | ||||
| import hashlib | ||||
| import os | ||||
| import shutil | ||||
| import tempfile | ||||
| from pathlib import Path | ||||
| from unittest import mock | ||||
|  | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
|  | ||||
| from django.core.management import call_command | ||||
|  | ||||
| from django.test import override_settings | ||||
| from django.test import TestCase | ||||
| from documents.file_handling import generate_filename | ||||
| from documents.management.commands.document_archiver import handle_document | ||||
| from documents.models import Document | ||||
| @@ -34,7 +32,8 @@ class TestArchiver(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         doc = self.make_models() | ||||
|         shutil.copy( | ||||
|             sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf") | ||||
|             sample_file, | ||||
|             os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"), | ||||
|         ) | ||||
|  | ||||
|         call_command("document_archiver") | ||||
| @@ -43,7 +42,8 @@ class TestArchiver(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         doc = self.make_models() | ||||
|         shutil.copy( | ||||
|             sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf") | ||||
|             sample_file, | ||||
|             os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"), | ||||
|         ) | ||||
|  | ||||
|         handle_document(doc.pk) | ||||
| @@ -90,7 +90,8 @@ class TestArchiver(DirectoriesMixin, TestCase): | ||||
|         ) | ||||
|         shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf")) | ||||
|         shutil.copy( | ||||
|             sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf") | ||||
|             sample_file, | ||||
|             os.path.join(self.dirs.originals_dir, f"document_01.pdf"), | ||||
|         ) | ||||
|  | ||||
|         handle_document(doc2.pk) | ||||
| @@ -120,7 +121,9 @@ class TestDecryptDocuments(TestCase): | ||||
|         os.makedirs(thumb_dir, exist_ok=True) | ||||
|  | ||||
|         override_settings( | ||||
|             ORIGINALS_DIR=originals_dir, THUMBNAIL_DIR=thumb_dir, PASSPHRASE="test" | ||||
|             ORIGINALS_DIR=originals_dir, | ||||
|             THUMBNAIL_DIR=thumb_dir, | ||||
|             PASSPHRASE="test", | ||||
|         ).enable() | ||||
|  | ||||
|         doc = Document.objects.create( | ||||
| @@ -206,7 +209,7 @@ class TestRenamer(DirectoriesMixin, TestCase): | ||||
|  | ||||
| class TestCreateClassifier(TestCase): | ||||
|     @mock.patch( | ||||
|         "documents.management.commands.document_create_classifier.train_classifier" | ||||
|         "documents.management.commands.document_create_classifier.train_classifier", | ||||
|     ) | ||||
|     def test_create_classifier(self, m): | ||||
|         call_command("document_create_classifier") | ||||
| @@ -224,7 +227,10 @@ class TestSanityChecker(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_errors(self): | ||||
|         doc = Document.objects.create( | ||||
|             title="test", content="test", filename="test.pdf", checksum="abc" | ||||
|             title="test", | ||||
|             content="test", | ||||
|             filename="test.pdf", | ||||
|             checksum="abc", | ||||
|         ) | ||||
|         Path(doc.source_path).touch() | ||||
|         Path(doc.thumbnail_path).touch() | ||||
|   | ||||
| @@ -6,12 +6,13 @@ from time import sleep | ||||
| from unittest import mock | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core.management import call_command, CommandError | ||||
| from django.test import override_settings, TransactionTestCase | ||||
|  | ||||
| from documents.models import Tag | ||||
| from django.core.management import call_command | ||||
| from django.core.management import CommandError | ||||
| from django.test import override_settings | ||||
| from django.test import TransactionTestCase | ||||
| from documents.consumer import ConsumerError | ||||
| from documents.management.commands import document_consumer | ||||
| from documents.models import Tag | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| @@ -41,7 +42,7 @@ class ConsumerMixin: | ||||
|         super(ConsumerMixin, self).setUp() | ||||
|         self.t = None | ||||
|         patcher = mock.patch( | ||||
|             "documents.management.commands.document_consumer.async_task" | ||||
|             "documents.management.commands.document_consumer.async_task", | ||||
|         ) | ||||
|         self.task_mock = patcher.start() | ||||
|         self.addCleanup(patcher.stop) | ||||
| @@ -208,13 +209,16 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase): | ||||
|         self.t_start() | ||||
|  | ||||
|         shutil.copy( | ||||
|             self.sample_file, os.path.join(self.dirs.consumption_dir, ".DS_STORE") | ||||
|             self.sample_file, | ||||
|             os.path.join(self.dirs.consumption_dir, ".DS_STORE"), | ||||
|         ) | ||||
|         shutil.copy( | ||||
|             self.sample_file, os.path.join(self.dirs.consumption_dir, "my_file.pdf") | ||||
|             self.sample_file, | ||||
|             os.path.join(self.dirs.consumption_dir, "my_file.pdf"), | ||||
|         ) | ||||
|         shutil.copy( | ||||
|             self.sample_file, os.path.join(self.dirs.consumption_dir, "._my_file.pdf") | ||||
|             self.sample_file, | ||||
|             os.path.join(self.dirs.consumption_dir, "._my_file.pdf"), | ||||
|         ) | ||||
|         shutil.copy( | ||||
|             self.sample_file, | ||||
| @@ -258,7 +262,9 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase): | ||||
|  | ||||
|  | ||||
| @override_settings( | ||||
|     CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=3, CONSUMER_POLLING_RETRY_COUNT=20 | ||||
|     CONSUMER_POLLING=1, | ||||
|     CONSUMER_POLLING_DELAY=3, | ||||
|     CONSUMER_POLLING_RETRY_COUNT=20, | ||||
| ) | ||||
| class TestConsumerPolling(TestConsumer): | ||||
|     # just do all the tests with polling | ||||
| @@ -319,7 +325,9 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase): | ||||
|         self.assertCountEqual(kwargs["override_tag_ids"], tag_ids) | ||||
|  | ||||
|     @override_settings( | ||||
|         CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20 | ||||
|         CONSUMER_POLLING=1, | ||||
|         CONSUMER_POLLING_DELAY=1, | ||||
|         CONSUMER_POLLING_RETRY_COUNT=20, | ||||
|     ) | ||||
|     def test_consume_file_with_path_tags_polling(self): | ||||
|         self.test_consume_file_with_path_tags() | ||||
|   | ||||
| @@ -7,13 +7,17 @@ from pathlib import Path | ||||
| from unittest import mock | ||||
|  | ||||
| from django.core.management import call_command | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from django.test import override_settings | ||||
| from django.test import TestCase | ||||
| from documents.management.commands import document_exporter | ||||
| from documents.models import Document, Tag, DocumentType, Correspondent | ||||
| from documents.models import Correspondent | ||||
| from documents.models import Document | ||||
| from documents.models import DocumentType | ||||
| from documents.models import Tag | ||||
| from documents.sanity_checker import check_sanity | ||||
| from documents.settings import EXPORTER_FILE_NAME | ||||
| from documents.tests.utils import DirectoriesMixin, paperless_environment | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from documents.tests.utils import paperless_environment | ||||
|  | ||||
|  | ||||
| class TestExportImport(DirectoriesMixin, TestCase): | ||||
| @@ -66,8 +70,9 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|     def _get_document_from_manifest(self, manifest, id): | ||||
|         f = list( | ||||
|             filter( | ||||
|                 lambda d: d["model"] == "documents.document" and d["pk"] == id, manifest | ||||
|             ) | ||||
|                 lambda d: d["model"] == "documents.document" and d["pk"] == id, | ||||
|                 manifest, | ||||
|             ), | ||||
|         ) | ||||
|         if len(f) == 1: | ||||
|             return f[0] | ||||
| @@ -76,7 +81,10 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     @override_settings(PASSPHRASE="test") | ||||
|     def _do_export( | ||||
|         self, use_filename_format=False, compare_checksums=False, delete=False | ||||
|         self, | ||||
|         use_filename_format=False, | ||||
|         compare_checksums=False, | ||||
|         delete=False, | ||||
|     ): | ||||
|         args = ["document_exporter", self.target] | ||||
|         if use_filename_format: | ||||
| @@ -104,7 +112,8 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         self.assertEqual(len(manifest), 8) | ||||
|         self.assertEqual( | ||||
|             len(list(filter(lambda e: e["model"] == "documents.document", manifest))), 4 | ||||
|             len(list(filter(lambda e: e["model"] == "documents.document", manifest))), | ||||
|             4, | ||||
|         ) | ||||
|  | ||||
|         self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json"))) | ||||
| @@ -129,7 +138,8 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|         for element in manifest: | ||||
|             if element["model"] == "documents.document": | ||||
|                 fname = os.path.join( | ||||
|                     self.target, element[document_exporter.EXPORTER_FILE_NAME] | ||||
|                     self.target, | ||||
|                     element[document_exporter.EXPORTER_FILE_NAME], | ||||
|                 ) | ||||
|                 self.assertTrue(os.path.exists(fname)) | ||||
|                 self.assertTrue( | ||||
| @@ -137,8 +147,8 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|                         os.path.join( | ||||
|                             self.target, | ||||
|                             element[document_exporter.EXPORTER_THUMBNAIL_NAME], | ||||
|                         ) | ||||
|                     ) | ||||
|                         ), | ||||
|                     ), | ||||
|                 ) | ||||
|  | ||||
|                 with open(fname, "rb") as f: | ||||
| @@ -146,12 +156,14 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|                 self.assertEqual(checksum, element["fields"]["checksum"]) | ||||
|  | ||||
|                 self.assertEqual( | ||||
|                     element["fields"]["storage_type"], Document.STORAGE_TYPE_UNENCRYPTED | ||||
|                     element["fields"]["storage_type"], | ||||
|                     Document.STORAGE_TYPE_UNENCRYPTED, | ||||
|                 ) | ||||
|  | ||||
|                 if document_exporter.EXPORTER_ARCHIVE_NAME in element: | ||||
|                     fname = os.path.join( | ||||
|                         self.target, element[document_exporter.EXPORTER_ARCHIVE_NAME] | ||||
|                         self.target, | ||||
|                         element[document_exporter.EXPORTER_ARCHIVE_NAME], | ||||
|                     ) | ||||
|                     self.assertTrue(os.path.exists(fname)) | ||||
|  | ||||
| @@ -188,7 +200,7 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|         ) | ||||
|  | ||||
|         with override_settings( | ||||
|             PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}" | ||||
|             PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}", | ||||
|         ): | ||||
|             self.test_exporter(use_filename_format=True) | ||||
|  | ||||
| @@ -205,7 +217,7 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|         st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime | ||||
|  | ||||
|         with mock.patch( | ||||
|             "documents.management.commands.document_exporter.shutil.copy2" | ||||
|             "documents.management.commands.document_exporter.shutil.copy2", | ||||
|         ) as m: | ||||
|             self._do_export() | ||||
|             m.assert_not_called() | ||||
| @@ -216,7 +228,7 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|         Path(self.d1.source_path).touch() | ||||
|  | ||||
|         with mock.patch( | ||||
|             "documents.management.commands.document_exporter.shutil.copy2" | ||||
|             "documents.management.commands.document_exporter.shutil.copy2", | ||||
|         ) as m: | ||||
|             self._do_export() | ||||
|             self.assertEqual(m.call_count, 1) | ||||
| @@ -239,7 +251,7 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|         self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json"))) | ||||
|  | ||||
|         with mock.patch( | ||||
|             "documents.management.commands.document_exporter.shutil.copy2" | ||||
|             "documents.management.commands.document_exporter.shutil.copy2", | ||||
|         ) as m: | ||||
|             self._do_export() | ||||
|             m.assert_not_called() | ||||
| @@ -250,7 +262,7 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|         self.d2.save() | ||||
|  | ||||
|         with mock.patch( | ||||
|             "documents.management.commands.document_exporter.shutil.copy2" | ||||
|             "documents.management.commands.document_exporter.shutil.copy2", | ||||
|         ) as m: | ||||
|             self._do_export(compare_checksums=True) | ||||
|             self.assertEqual(m.call_count, 1) | ||||
| @@ -270,26 +282,29 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|         doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id) | ||||
|         self.assertTrue( | ||||
|             os.path.isfile( | ||||
|                 os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]) | ||||
|             ) | ||||
|                 os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]), | ||||
|             ), | ||||
|         ) | ||||
|         self.d3.delete() | ||||
|  | ||||
|         manifest = self._do_export() | ||||
|         self.assertRaises( | ||||
|             ValueError, self._get_document_from_manifest, manifest, self.d3.id | ||||
|             ValueError, | ||||
|             self._get_document_from_manifest, | ||||
|             manifest, | ||||
|             self.d3.id, | ||||
|         ) | ||||
|         self.assertTrue( | ||||
|             os.path.isfile( | ||||
|                 os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]) | ||||
|             ) | ||||
|                 os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]), | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|         manifest = self._do_export(delete=True) | ||||
|         self.assertFalse( | ||||
|             os.path.isfile( | ||||
|                 os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]) | ||||
|             ) | ||||
|                 os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]), | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|         self.assertTrue(len(manifest), 6) | ||||
| @@ -316,7 +331,7 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|         self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json"))) | ||||
|         self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf"))) | ||||
|         self.assertTrue( | ||||
|             os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")) | ||||
|             os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")), | ||||
|         ) | ||||
|  | ||||
|     def test_export_missing_files(self): | ||||
|   | ||||
| @@ -1,35 +1,50 @@ | ||||
| from django.core.management import call_command | ||||
| from django.test import TestCase | ||||
|  | ||||
| from documents.models import Document, Tag, Correspondent, DocumentType | ||||
| from documents.models import Correspondent | ||||
| from documents.models import Document | ||||
| from documents.models import DocumentType | ||||
| from documents.models import Tag | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| class TestRetagger(DirectoriesMixin, TestCase): | ||||
|     def make_models(self): | ||||
|         self.d1 = Document.objects.create( | ||||
|             checksum="A", title="A", content="first document" | ||||
|             checksum="A", | ||||
|             title="A", | ||||
|             content="first document", | ||||
|         ) | ||||
|         self.d2 = Document.objects.create( | ||||
|             checksum="B", title="B", content="second document" | ||||
|             checksum="B", | ||||
|             title="B", | ||||
|             content="second document", | ||||
|         ) | ||||
|         self.d3 = Document.objects.create( | ||||
|             checksum="C", title="C", content="unrelated document" | ||||
|             checksum="C", | ||||
|             title="C", | ||||
|             content="unrelated document", | ||||
|         ) | ||||
|         self.d4 = Document.objects.create( | ||||
|             checksum="D", title="D", content="auto document" | ||||
|             checksum="D", | ||||
|             title="D", | ||||
|             content="auto document", | ||||
|         ) | ||||
|  | ||||
|         self.tag_first = Tag.objects.create( | ||||
|             name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY | ||||
|             name="tag1", | ||||
|             match="first", | ||||
|             matching_algorithm=Tag.MATCH_ANY, | ||||
|         ) | ||||
|         self.tag_second = Tag.objects.create( | ||||
|             name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY | ||||
|             name="tag2", | ||||
|             match="second", | ||||
|             matching_algorithm=Tag.MATCH_ANY, | ||||
|         ) | ||||
|         self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True) | ||||
|         self.tag_no_match = Tag.objects.create(name="test2") | ||||
|         self.tag_auto = Tag.objects.create( | ||||
|             name="tagauto", matching_algorithm=Tag.MATCH_AUTO | ||||
|             name="tagauto", | ||||
|             matching_algorithm=Tag.MATCH_AUTO, | ||||
|         ) | ||||
|  | ||||
|         self.d3.tags.add(self.tag_inbox) | ||||
| @@ -37,17 +52,25 @@ class TestRetagger(DirectoriesMixin, TestCase): | ||||
|         self.d4.tags.add(self.tag_auto) | ||||
|  | ||||
|         self.correspondent_first = Correspondent.objects.create( | ||||
|             name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY | ||||
|             name="c1", | ||||
|             match="first", | ||||
|             matching_algorithm=Correspondent.MATCH_ANY, | ||||
|         ) | ||||
|         self.correspondent_second = Correspondent.objects.create( | ||||
|             name="c2", match="second", matching_algorithm=Correspondent.MATCH_ANY | ||||
|             name="c2", | ||||
|             match="second", | ||||
|             matching_algorithm=Correspondent.MATCH_ANY, | ||||
|         ) | ||||
|  | ||||
|         self.doctype_first = DocumentType.objects.create( | ||||
|             name="dt1", match="first", matching_algorithm=DocumentType.MATCH_ANY | ||||
|             name="dt1", | ||||
|             match="first", | ||||
|             matching_algorithm=DocumentType.MATCH_ANY, | ||||
|         ) | ||||
|         self.doctype_second = DocumentType.objects.create( | ||||
|             name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY | ||||
|             name="dt2", | ||||
|             match="second", | ||||
|             matching_algorithm=DocumentType.MATCH_ANY, | ||||
|         ) | ||||
|  | ||||
|     def get_updated_docs(self): | ||||
| @@ -98,10 +121,12 @@ class TestRetagger(DirectoriesMixin, TestCase): | ||||
|         self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id)) | ||||
|  | ||||
|         self.assertCountEqual( | ||||
|             [tag.id for tag in d_first.tags.all()], [self.tag_first.id] | ||||
|             [tag.id for tag in d_first.tags.all()], | ||||
|             [self.tag_first.id], | ||||
|         ) | ||||
|         self.assertCountEqual( | ||||
|             [tag.id for tag in d_second.tags.all()], [self.tag_second.id] | ||||
|             [tag.id for tag in d_second.tags.all()], | ||||
|             [self.tag_second.id], | ||||
|         ) | ||||
|         self.assertCountEqual( | ||||
|             [tag.id for tag in d_unrelated.tags.all()], | ||||
| @@ -133,7 +158,10 @@ class TestRetagger(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_add_tags_suggest_url(self): | ||||
|         call_command( | ||||
|             "document_retagger", "--tags", "--suggest", "--base-url=http://localhost" | ||||
|             "document_retagger", | ||||
|             "--tags", | ||||
|             "--suggest", | ||||
|             "--base-url=http://localhost", | ||||
|         ) | ||||
|         d_first, d_second, d_unrelated, d_auto = self.get_updated_docs() | ||||
|  | ||||
|   | ||||
| @@ -5,9 +5,11 @@ from unittest import mock | ||||
| from django.contrib.auth.models import User | ||||
| from django.core.management import call_command | ||||
| from django.test import TestCase | ||||
|  | ||||
| from documents.management.commands.document_thumbnails import _process_document | ||||
| from documents.models import Document, Tag, Correspondent, DocumentType | ||||
| from documents.models import Correspondent | ||||
| from documents.models import Document | ||||
| from documents.models import DocumentType | ||||
| from documents.models import Tag | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -4,9 +4,11 @@ from unittest import mock | ||||
|  | ||||
| from django.core.management import call_command | ||||
| from django.test import TestCase | ||||
|  | ||||
| from documents.management.commands.document_thumbnails import _process_document | ||||
| from documents.models import Document, Tag, Correspondent, DocumentType | ||||
| from documents.models import Correspondent | ||||
| from documents.models import Document | ||||
| from documents.models import DocumentType | ||||
| from documents.models import Tag | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -4,10 +4,14 @@ from random import randint | ||||
|  | ||||
| from django.contrib.admin.models import LogEntry | ||||
| from django.contrib.auth.models import User | ||||
| from django.test import TestCase, override_settings | ||||
| from django.test import override_settings | ||||
| from django.test import TestCase | ||||
|  | ||||
| from .. import matching | ||||
| from ..models import Correspondent, Document, Tag, DocumentType | ||||
| from ..models import Correspondent | ||||
| from ..models import Document | ||||
| from ..models import DocumentType | ||||
| from ..models import Tag | ||||
| from ..signals import document_consumption_finished | ||||
|  | ||||
|  | ||||
| @@ -209,7 +213,8 @@ class TestDocumentConsumptionFinishedSignal(TestCase): | ||||
|         TestCase.setUp(self) | ||||
|         User.objects.create_user(username="test_consumer", password="12345") | ||||
|         self.doc_contains = Document.objects.create( | ||||
|             content="I contain the keyword.", mime_type="application/pdf" | ||||
|             content="I contain the keyword.", | ||||
|             mime_type="application/pdf", | ||||
|         ) | ||||
|  | ||||
|         self.index_dir = tempfile.mkdtemp() | ||||
| @@ -221,43 +226,56 @@ class TestDocumentConsumptionFinishedSignal(TestCase): | ||||
|  | ||||
|     def test_tag_applied_any(self): | ||||
|         t1 = Tag.objects.create( | ||||
|             name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY | ||||
|             name="test", | ||||
|             match="keyword", | ||||
|             matching_algorithm=Tag.MATCH_ANY, | ||||
|         ) | ||||
|         document_consumption_finished.send( | ||||
|             sender=self.__class__, document=self.doc_contains | ||||
|             sender=self.__class__, | ||||
|             document=self.doc_contains, | ||||
|         ) | ||||
|         self.assertTrue(list(self.doc_contains.tags.all()) == [t1]) | ||||
|  | ||||
|     def test_tag_not_applied(self): | ||||
|         Tag.objects.create( | ||||
|             name="test", match="no-match", matching_algorithm=Tag.MATCH_ANY | ||||
|             name="test", | ||||
|             match="no-match", | ||||
|             matching_algorithm=Tag.MATCH_ANY, | ||||
|         ) | ||||
|         document_consumption_finished.send( | ||||
|             sender=self.__class__, document=self.doc_contains | ||||
|             sender=self.__class__, | ||||
|             document=self.doc_contains, | ||||
|         ) | ||||
|         self.assertTrue(list(self.doc_contains.tags.all()) == []) | ||||
|  | ||||
|     def test_correspondent_applied(self): | ||||
|         correspondent = Correspondent.objects.create( | ||||
|             name="test", match="keyword", matching_algorithm=Correspondent.MATCH_ANY | ||||
|             name="test", | ||||
|             match="keyword", | ||||
|             matching_algorithm=Correspondent.MATCH_ANY, | ||||
|         ) | ||||
|         document_consumption_finished.send( | ||||
|             sender=self.__class__, document=self.doc_contains | ||||
|             sender=self.__class__, | ||||
|             document=self.doc_contains, | ||||
|         ) | ||||
|         self.assertTrue(self.doc_contains.correspondent == correspondent) | ||||
|  | ||||
|     def test_correspondent_not_applied(self): | ||||
|         Tag.objects.create( | ||||
|             name="test", match="no-match", matching_algorithm=Correspondent.MATCH_ANY | ||||
|             name="test", | ||||
|             match="no-match", | ||||
|             matching_algorithm=Correspondent.MATCH_ANY, | ||||
|         ) | ||||
|         document_consumption_finished.send( | ||||
|             sender=self.__class__, document=self.doc_contains | ||||
|             sender=self.__class__, | ||||
|             document=self.doc_contains, | ||||
|         ) | ||||
|         self.assertEqual(self.doc_contains.correspondent, None) | ||||
|  | ||||
|     def test_logentry_created(self): | ||||
|         document_consumption_finished.send( | ||||
|             sender=self.__class__, document=self.doc_contains | ||||
|             sender=self.__class__, | ||||
|             document=self.doc_contains, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(LogEntry.objects.count(), 1) | ||||
|   | ||||
| @@ -6,9 +6,9 @@ from unittest import mock | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.test import override_settings | ||||
|  | ||||
| from documents.parsers import ParseError | ||||
| from documents.tests.utils import DirectoriesMixin, TestMigrations | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from documents.tests.utils import TestMigrations | ||||
|  | ||||
|  | ||||
| STORAGE_TYPE_GPG = "gpg" | ||||
| @@ -93,10 +93,18 @@ def make_test_document( | ||||
| simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg") | ||||
| simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") | ||||
| simple_pdf2 = os.path.join( | ||||
|     os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf" | ||||
|     os.path.dirname(__file__), | ||||
|     "samples", | ||||
|     "documents", | ||||
|     "originals", | ||||
|     "0000002.pdf", | ||||
| ) | ||||
| simple_pdf3 = os.path.join( | ||||
|     os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf" | ||||
|     os.path.dirname(__file__), | ||||
|     "samples", | ||||
|     "documents", | ||||
|     "originals", | ||||
|     "0000003.pdf", | ||||
| ) | ||||
| simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt") | ||||
| simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png") | ||||
| @@ -121,19 +129,43 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations): | ||||
|             simple_pdf, | ||||
|         ) | ||||
|         self.no_text = make_test_document( | ||||
|             Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf | ||||
|             Document, | ||||
|             "no-text", | ||||
|             "image/png", | ||||
|             simple_png2, | ||||
|             "no-text.png", | ||||
|             simple_pdf, | ||||
|         ) | ||||
|         self.doc_no_archive = make_test_document( | ||||
|             Document, "no_archive", "text/plain", simple_txt, "no_archive.txt" | ||||
|             Document, | ||||
|             "no_archive", | ||||
|             "text/plain", | ||||
|             simple_txt, | ||||
|             "no_archive.txt", | ||||
|         ) | ||||
|         self.clash1 = make_test_document( | ||||
|             Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf | ||||
|             Document, | ||||
|             "clash", | ||||
|             "application/pdf", | ||||
|             simple_pdf, | ||||
|             "clash.pdf", | ||||
|             simple_pdf, | ||||
|         ) | ||||
|         self.clash2 = make_test_document( | ||||
|             Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf | ||||
|             Document, | ||||
|             "clash", | ||||
|             "image/jpeg", | ||||
|             simple_jpg, | ||||
|             "clash.jpg", | ||||
|             simple_pdf, | ||||
|         ) | ||||
|         self.clash3 = make_test_document( | ||||
|             Document, "clash", "image/png", simple_png, "clash.png", simple_pdf | ||||
|             Document, | ||||
|             "clash", | ||||
|             "image/png", | ||||
|             simple_png, | ||||
|             "clash.png", | ||||
|             simple_pdf, | ||||
|         ) | ||||
|         self.clash4 = make_test_document( | ||||
|             Document, | ||||
| @@ -147,7 +179,8 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations): | ||||
|         self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2)) | ||||
|         self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3)) | ||||
|         self.assertNotEqual( | ||||
|             archive_path_old(self.clash1), archive_path_old(self.clash4) | ||||
|             archive_path_old(self.clash1), | ||||
|             archive_path_old(self.clash4), | ||||
|         ) | ||||
|  | ||||
|     def testArchiveFilesMigrated(self): | ||||
| @@ -171,19 +204,23 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations): | ||||
|                 self.assertEqual(archive_checksum, doc.archive_checksum) | ||||
|  | ||||
|         self.assertEqual( | ||||
|             Document.objects.filter(archive_checksum__isnull=False).count(), 6 | ||||
|             Document.objects.filter(archive_checksum__isnull=False).count(), | ||||
|             6, | ||||
|         ) | ||||
|  | ||||
|     def test_filenames(self): | ||||
|         Document = self.apps.get_model("documents", "Document") | ||||
|         self.assertEqual( | ||||
|             Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf" | ||||
|             Document.objects.get(id=self.unrelated.id).archive_filename, | ||||
|             "unrelated.pdf", | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf" | ||||
|             Document.objects.get(id=self.no_text.id).archive_filename, | ||||
|             "no-text.pdf", | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             Document.objects.get(id=self.doc_no_archive.id).archive_filename, None | ||||
|             Document.objects.get(id=self.doc_no_archive.id).archive_filename, | ||||
|             None, | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             Document.objects.get(id=self.clash1.id).archive_filename, | ||||
| @@ -198,7 +235,8 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations): | ||||
|             f"{self.clash3.id:07}.pdf", | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf" | ||||
|             Document.objects.get(id=self.clash4.id).archive_filename, | ||||
|             "clash.png.pdf", | ||||
|         ) | ||||
|  | ||||
|  | ||||
| @@ -207,16 +245,20 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles): | ||||
|     def test_filenames(self): | ||||
|         Document = self.apps.get_model("documents", "Document") | ||||
|         self.assertEqual( | ||||
|             Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf" | ||||
|             Document.objects.get(id=self.unrelated.id).archive_filename, | ||||
|             "unrelated.pdf", | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf" | ||||
|             Document.objects.get(id=self.no_text.id).archive_filename, | ||||
|             "no-text.pdf", | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             Document.objects.get(id=self.doc_no_archive.id).archive_filename, None | ||||
|             Document.objects.get(id=self.doc_no_archive.id).archive_filename, | ||||
|             None, | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf" | ||||
|             Document.objects.get(id=self.clash1.id).archive_filename, | ||||
|             "none/clash.pdf", | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             Document.objects.get(id=self.clash2.id).archive_filename, | ||||
| @@ -227,7 +269,8 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles): | ||||
|             "none/clash_02.pdf", | ||||
|         ) | ||||
|         self.assertEqual( | ||||
|             Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf" | ||||
|             Document.objects.get(id=self.clash4.id).archive_filename, | ||||
|             "clash.png.pdf", | ||||
|         ) | ||||
|  | ||||
|  | ||||
| @@ -248,12 +291,19 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): | ||||
|         Document = self.apps.get_model("documents", "Document") | ||||
|  | ||||
|         doc = make_test_document( | ||||
|             Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf | ||||
|             Document, | ||||
|             "clash", | ||||
|             "application/pdf", | ||||
|             simple_pdf, | ||||
|             "clash.pdf", | ||||
|             simple_pdf, | ||||
|         ) | ||||
|         os.unlink(archive_path_old(doc)) | ||||
|  | ||||
|         self.assertRaisesMessage( | ||||
|             ValueError, "does not exist at: ", self.performMigration | ||||
|             ValueError, | ||||
|             "does not exist at: ", | ||||
|             self.performMigration, | ||||
|         ) | ||||
|  | ||||
|     def test_parser_missing(self): | ||||
| @@ -277,7 +327,9 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): | ||||
|         ) | ||||
|  | ||||
|         self.assertRaisesMessage( | ||||
|             ValueError, "no parsers are available", self.performMigration | ||||
|             ValueError, | ||||
|             "no parsers are available", | ||||
|             self.performMigration, | ||||
|         ) | ||||
|  | ||||
|     @mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper") | ||||
| @@ -286,7 +338,12 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): | ||||
|         Document = self.apps.get_model("documents", "Document") | ||||
|  | ||||
|         doc1 = make_test_document( | ||||
|             Document, "document", "image/png", simple_png, "document.png", simple_pdf | ||||
|             Document, | ||||
|             "document", | ||||
|             "image/png", | ||||
|             simple_png, | ||||
|             "document.png", | ||||
|             simple_pdf, | ||||
|         ) | ||||
|         doc2 = make_test_document( | ||||
|             Document, | ||||
| @@ -311,8 +368,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): | ||||
|                     filter( | ||||
|                         lambda log: "Parse error, will try again in 5 seconds" in log, | ||||
|                         capture.output, | ||||
|                     ) | ||||
|                 ) | ||||
|                     ), | ||||
|                 ), | ||||
|             ), | ||||
|             4, | ||||
|         ) | ||||
| @@ -324,8 +381,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): | ||||
|                         lambda log: "Unable to regenerate archive document for ID:" | ||||
|                         in log, | ||||
|                         capture.output, | ||||
|                     ) | ||||
|                 ) | ||||
|                     ), | ||||
|                 ), | ||||
|             ), | ||||
|             2, | ||||
|         ) | ||||
| @@ -347,7 +404,12 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): | ||||
|         Document = self.apps.get_model("documents", "Document") | ||||
|  | ||||
|         doc1 = make_test_document( | ||||
|             Document, "document", "image/png", simple_png, "document.png", simple_pdf | ||||
|             Document, | ||||
|             "document", | ||||
|             "image/png", | ||||
|             simple_png, | ||||
|             "document.png", | ||||
|             simple_pdf, | ||||
|         ) | ||||
|         doc2 = make_test_document( | ||||
|             Document, | ||||
| @@ -368,8 +430,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): | ||||
|                         lambda log: "Parser did not return an archive document for document" | ||||
|                         in log, | ||||
|                         capture.output, | ||||
|                     ) | ||||
|                 ) | ||||
|                     ), | ||||
|                 ), | ||||
|             ), | ||||
|             2, | ||||
|         ) | ||||
| @@ -405,7 +467,11 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations): | ||||
|             "unrelated.pdf", | ||||
|         ) | ||||
|         doc_no_archive = make_test_document( | ||||
|             Document, "no_archive", "text/plain", simple_txt, "no_archive.txt" | ||||
|             Document, | ||||
|             "no_archive", | ||||
|             "text/plain", | ||||
|             simple_txt, | ||||
|             "no_archive.txt", | ||||
|         ) | ||||
|         clashB = make_test_document( | ||||
|             Document, | ||||
| @@ -434,13 +500,14 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations): | ||||
|                 self.assertEqual(archive_checksum, doc.archive_checksum) | ||||
|  | ||||
|         self.assertEqual( | ||||
|             Document.objects.filter(archive_checksum__isnull=False).count(), 2 | ||||
|             Document.objects.filter(archive_checksum__isnull=False).count(), | ||||
|             2, | ||||
|         ) | ||||
|  | ||||
|  | ||||
| @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") | ||||
| class TestMigrateArchiveFilesBackwardsWithFilenameFormat( | ||||
|     TestMigrateArchiveFilesBackwards | ||||
|     TestMigrateArchiveFilesBackwards, | ||||
| ): | ||||
|     pass | ||||
|  | ||||
| @@ -505,5 +572,7 @@ class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations): | ||||
|         ) | ||||
|  | ||||
|         self.assertRaisesMessage( | ||||
|             ValueError, "file already exists.", self.performMigration | ||||
|             ValueError, | ||||
|             "file already exists.", | ||||
|             self.performMigration, | ||||
|         ) | ||||
|   | ||||
| @@ -3,9 +3,9 @@ import shutil | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.test import override_settings | ||||
|  | ||||
| from documents.parsers import get_default_file_extension | ||||
| from documents.tests.utils import DirectoriesMixin, TestMigrations | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from documents.tests.utils import TestMigrations | ||||
|  | ||||
| STORAGE_TYPE_UNENCRYPTED = "unencrypted" | ||||
| STORAGE_TYPE_GPG = "gpg" | ||||
| @@ -46,7 +46,9 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations): | ||||
|     def setUpBeforeMigration(self, apps): | ||||
|         Document = apps.get_model("documents", "Document") | ||||
|         doc = Document.objects.create( | ||||
|             title="test", file_type="pdf", filename="file1.pdf" | ||||
|             title="test", | ||||
|             file_type="pdf", | ||||
|             filename="file1.pdf", | ||||
|         ) | ||||
|         self.doc_id = doc.id | ||||
|         shutil.copy( | ||||
| @@ -55,7 +57,9 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations): | ||||
|         ) | ||||
|  | ||||
|         doc2 = Document.objects.create( | ||||
|             checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG | ||||
|             checksum="B", | ||||
|             file_type="pdf", | ||||
|             storage_type=STORAGE_TYPE_GPG, | ||||
|         ) | ||||
|         self.doc2_id = doc2.id | ||||
|         shutil.copy( | ||||
| @@ -88,7 +92,9 @@ class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations): | ||||
|     def setUpBeforeMigration(self, apps): | ||||
|         Document = apps.get_model("documents", "Document") | ||||
|         doc = Document.objects.create( | ||||
|             title="test", mime_type="application/pdf", filename="file1.pdf" | ||||
|             title="test", | ||||
|             mime_type="application/pdf", | ||||
|             filename="file1.pdf", | ||||
|         ) | ||||
|         self.doc_id = doc.id | ||||
|         shutil.copy( | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| from documents.tests.utils import DirectoriesMixin, TestMigrations | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from documents.tests.utils import TestMigrations | ||||
|  | ||||
|  | ||||
| class TestMigrateNullCharacters(DirectoriesMixin, TestMigrations): | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| from documents.tests.utils import DirectoriesMixin, TestMigrations | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from documents.tests.utils import TestMigrations | ||||
|  | ||||
|  | ||||
| class TestMigrateTagColor(DirectoriesMixin, TestMigrations): | ||||
|   | ||||
| @@ -1,7 +1,9 @@ | ||||
| from django.test import TestCase | ||||
|  | ||||
| from .factories import DocumentFactory, CorrespondentFactory | ||||
| from ..models import Document, Correspondent | ||||
| from ..models import Correspondent | ||||
| from ..models import Document | ||||
| from .factories import CorrespondentFactory | ||||
| from .factories import DocumentFactory | ||||
|  | ||||
|  | ||||
| class CorrespondentTestCase(TestCase): | ||||
|   | ||||
| @@ -4,16 +4,14 @@ import tempfile | ||||
| from tempfile import TemporaryDirectory | ||||
| from unittest import mock | ||||
|  | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from documents.parsers import ( | ||||
|     get_parser_class, | ||||
|     get_supported_file_extensions, | ||||
|     get_default_file_extension, | ||||
|     get_parser_class_for_mime_type, | ||||
|     DocumentParser, | ||||
|     is_file_ext_supported, | ||||
| ) | ||||
| from django.test import override_settings | ||||
| from django.test import TestCase | ||||
| from documents.parsers import DocumentParser | ||||
| from documents.parsers import get_default_file_extension | ||||
| from documents.parsers import get_parser_class | ||||
| from documents.parsers import get_parser_class_for_mime_type | ||||
| from documents.parsers import get_supported_file_extensions | ||||
| from documents.parsers import is_file_ext_supported | ||||
| from paperless_tesseract.parsers import RasterisedDocumentParser | ||||
| from paperless_text.parsers import TextDocumentParser | ||||
|  | ||||
|   | ||||
| @@ -6,9 +6,9 @@ from pathlib import Path | ||||
| import filelock | ||||
| from django.conf import settings | ||||
| from django.test import TestCase | ||||
|  | ||||
| from documents.models import Document | ||||
| from documents.sanity_checker import check_sanity, SanityCheckMessages | ||||
| from documents.sanity_checker import check_sanity | ||||
| from documents.sanity_checker import SanityCheckMessages | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| @@ -23,7 +23,8 @@ class TestSanityCheckMessages(TestCase): | ||||
|             self.assertEqual(len(capture.output), 1) | ||||
|             self.assertEqual(capture.records[0].levelno, logging.INFO) | ||||
|             self.assertEqual( | ||||
|                 capture.records[0].message, "Sanity checker detected no issues." | ||||
|                 capture.records[0].message, | ||||
|                 "Sanity checker detected no issues.", | ||||
|             ) | ||||
|  | ||||
|     def test_info(self): | ||||
|   | ||||
| @@ -2,8 +2,8 @@ import logging | ||||
| from unittest import mock | ||||
|  | ||||
| from django.test import TestCase | ||||
|  | ||||
| from paperless.settings import default_task_workers, default_threads_per_worker | ||||
| from paperless.settings import default_task_workers | ||||
| from paperless.settings import default_threads_per_worker | ||||
|  | ||||
|  | ||||
| class TestSettings(TestCase): | ||||
| @@ -21,7 +21,7 @@ class TestSettings(TestCase): | ||||
|     def test_workers_threads(self): | ||||
|         for i in range(1, 64): | ||||
|             with mock.patch( | ||||
|                 "paperless.settings.multiprocessing.cpu_count" | ||||
|                 "paperless.settings.multiprocessing.cpu_count", | ||||
|             ) as cpu_count: | ||||
|                 cpu_count.return_value = i | ||||
|  | ||||
|   | ||||
| @@ -4,10 +4,13 @@ from unittest import mock | ||||
| from django.conf import settings | ||||
| from django.test import TestCase | ||||
| from django.utils import timezone | ||||
|  | ||||
| from documents import tasks | ||||
| from documents.models import Document, Tag, Correspondent, DocumentType | ||||
| from documents.sanity_checker import SanityCheckMessages, SanityCheckFailedException | ||||
| from documents.models import Correspondent | ||||
| from documents.models import Document | ||||
| from documents.models import DocumentType | ||||
| from documents.models import Tag | ||||
| from documents.sanity_checker import SanityCheckFailedException | ||||
| from documents.sanity_checker import SanityCheckMessages | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| @@ -106,7 +109,8 @@ class TestTasks(DirectoriesMixin, TestCase): | ||||
|         messages.warning("Some warning") | ||||
|         m.return_value = messages | ||||
|         self.assertEqual( | ||||
|             tasks.sanity_check(), "Sanity check exited with warnings. See log." | ||||
|             tasks.sanity_check(), | ||||
|             "Sanity check exited with warnings. See log.", | ||||
|         ) | ||||
|         m.assert_called_once() | ||||
|  | ||||
| @@ -116,7 +120,8 @@ class TestTasks(DirectoriesMixin, TestCase): | ||||
|         messages.info("Some info") | ||||
|         m.return_value = messages | ||||
|         self.assertEqual( | ||||
|             tasks.sanity_check(), "Sanity check exited with infos. See log." | ||||
|             tasks.sanity_check(), | ||||
|             "Sanity check exited with infos. See log.", | ||||
|         ) | ||||
|         m.assert_called_once() | ||||
|  | ||||
|   | ||||
| @@ -25,7 +25,7 @@ class TestViews(TestCase): | ||||
|         ]: | ||||
|             if language_given: | ||||
|                 self.client.cookies.load( | ||||
|                     {settings.LANGUAGE_COOKIE_NAME: language_given} | ||||
|                     {settings.LANGUAGE_COOKIE_NAME: language_given}, | ||||
|                 ) | ||||
|             elif settings.LANGUAGE_COOKIE_NAME in self.client.cookies.keys(): | ||||
|                 self.client.cookies.pop(settings.LANGUAGE_COOKIE_NAME) | ||||
| @@ -51,5 +51,6 @@ class TestViews(TestCase): | ||||
|                 f"frontend/{language_actual}/polyfills.js", | ||||
|             ) | ||||
|             self.assertEqual( | ||||
|                 response.context_data["main_js"], f"frontend/{language_actual}/main.js" | ||||
|                 response.context_data["main_js"], | ||||
|                 f"frontend/{language_actual}/main.js", | ||||
|             ) | ||||
|   | ||||
| @@ -7,7 +7,8 @@ from contextlib import contextmanager | ||||
| from django.apps import apps | ||||
| from django.db import connection | ||||
| from django.db.migrations.executor import MigrationExecutor | ||||
| from django.test import override_settings, TransactionTestCase | ||||
| from django.test import override_settings | ||||
| from django.test import TransactionTestCase | ||||
|  | ||||
|  | ||||
| def setup_directories(): | ||||
| @@ -97,7 +98,7 @@ class TestMigrations(TransactionTestCase): | ||||
|         assert ( | ||||
|             self.migrate_from and self.migrate_to | ||||
|         ), "TestCase '{}' must define migrate_from and migrate_to     properties".format( | ||||
|             type(self).__name__ | ||||
|             type(self).__name__, | ||||
|         ) | ||||
|         self.migrate_from = [(self.app, self.migrate_from)] | ||||
|         self.migrate_to = [(self.app, self.migrate_to)] | ||||
|   | ||||
| @@ -5,63 +5,70 @@ import uuid | ||||
| import zipfile | ||||
| from datetime import datetime | ||||
| from time import mktime | ||||
| from urllib.parse import quote_plus | ||||
| from unicodedata import normalize | ||||
| from urllib.parse import quote_plus | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.db.models import Count, Max, Case, When, IntegerField | ||||
| from django.db.models import Case | ||||
| from django.db.models import Count | ||||
| from django.db.models import IntegerField | ||||
| from django.db.models import Max | ||||
| from django.db.models import When | ||||
| from django.db.models.functions import Lower | ||||
| from django.http import HttpResponse, HttpResponseBadRequest, Http404 | ||||
| from django.http import Http404 | ||||
| from django.http import HttpResponse | ||||
| from django.http import HttpResponseBadRequest | ||||
| from django.utils.translation import get_language | ||||
| from django.views.decorators.cache import cache_control | ||||
| from django.views.generic import TemplateView | ||||
| from django_filters.rest_framework import DjangoFilterBackend | ||||
| from django_q.tasks import async_task | ||||
| from paperless.db import GnuPG | ||||
| from paperless.views import StandardPagination | ||||
| from rest_framework import parsers | ||||
| from rest_framework.decorators import action | ||||
| from rest_framework.exceptions import NotFound | ||||
| from rest_framework.filters import OrderingFilter, SearchFilter | ||||
| from rest_framework.filters import OrderingFilter | ||||
| from rest_framework.filters import SearchFilter | ||||
| from rest_framework.generics import GenericAPIView | ||||
| from rest_framework.mixins import ( | ||||
|     DestroyModelMixin, | ||||
|     ListModelMixin, | ||||
|     RetrieveModelMixin, | ||||
|     UpdateModelMixin, | ||||
| ) | ||||
| from rest_framework.mixins import DestroyModelMixin | ||||
| from rest_framework.mixins import ListModelMixin | ||||
| from rest_framework.mixins import RetrieveModelMixin | ||||
| from rest_framework.mixins import UpdateModelMixin | ||||
| from rest_framework.permissions import IsAuthenticated | ||||
| from rest_framework.response import Response | ||||
| from rest_framework.views import APIView | ||||
| from rest_framework.viewsets import GenericViewSet, ModelViewSet, ViewSet | ||||
| from rest_framework.viewsets import GenericViewSet | ||||
| from rest_framework.viewsets import ModelViewSet | ||||
| from rest_framework.viewsets import ViewSet | ||||
|  | ||||
| from paperless.db import GnuPG | ||||
| from paperless.views import StandardPagination | ||||
| from .bulk_download import ( | ||||
|     OriginalAndArchiveStrategy, | ||||
|     OriginalsOnlyStrategy, | ||||
|     ArchiveOnlyStrategy, | ||||
| ) | ||||
| from .bulk_download import ArchiveOnlyStrategy | ||||
| from .bulk_download import OriginalAndArchiveStrategy | ||||
| from .bulk_download import OriginalsOnlyStrategy | ||||
| from .classifier import load_classifier | ||||
| from .filters import ( | ||||
|     CorrespondentFilterSet, | ||||
|     DocumentFilterSet, | ||||
|     TagFilterSet, | ||||
|     DocumentTypeFilterSet, | ||||
| ) | ||||
| from .matching import match_correspondents, match_tags, match_document_types | ||||
| from .models import Correspondent, Document, Tag, DocumentType, SavedView | ||||
| from .filters import CorrespondentFilterSet | ||||
| from .filters import DocumentFilterSet | ||||
| from .filters import DocumentTypeFilterSet | ||||
| from .filters import TagFilterSet | ||||
| from .matching import match_correspondents | ||||
| from .matching import match_document_types | ||||
| from .matching import match_tags | ||||
| from .models import Correspondent | ||||
| from .models import Document | ||||
| from .models import DocumentType | ||||
| from .models import SavedView | ||||
| from .models import Tag | ||||
| from .parsers import get_parser_class_for_mime_type | ||||
| from .serialisers import ( | ||||
|     CorrespondentSerializer, | ||||
|     DocumentSerializer, | ||||
|     TagSerializerVersion1, | ||||
|     TagSerializer, | ||||
|     DocumentTypeSerializer, | ||||
|     PostDocumentSerializer, | ||||
|     SavedViewSerializer, | ||||
|     BulkEditSerializer, | ||||
|     DocumentListSerializer, | ||||
|     BulkDownloadSerializer, | ||||
| ) | ||||
| from .serialisers import BulkDownloadSerializer | ||||
| from .serialisers import BulkEditSerializer | ||||
| from .serialisers import CorrespondentSerializer | ||||
| from .serialisers import DocumentListSerializer | ||||
| from .serialisers import DocumentSerializer | ||||
| from .serialisers import DocumentTypeSerializer | ||||
| from .serialisers import PostDocumentSerializer | ||||
| from .serialisers import SavedViewSerializer | ||||
| from .serialisers import TagSerializer | ||||
| from .serialisers import TagSerializerVersion1 | ||||
|  | ||||
| logger = logging.getLogger("paperless.api") | ||||
|  | ||||
| @@ -89,16 +96,14 @@ class IndexView(TemplateView): | ||||
|         context["full_name"] = self.request.user.get_full_name() | ||||
|         context["styles_css"] = f"frontend/{self.get_language()}/styles.css" | ||||
|         context["runtime_js"] = f"frontend/{self.get_language()}/runtime.js" | ||||
|         context[ | ||||
|             "polyfills_js" | ||||
|         ] = f"frontend/{self.get_language()}/polyfills.js"  # NOQA: E501 | ||||
|         context["polyfills_js"] = f"frontend/{self.get_language()}/polyfills.js" | ||||
|         context["main_js"] = f"frontend/{self.get_language()}/main.js" | ||||
|         context[ | ||||
|             "webmanifest" | ||||
|         ] = f"frontend/{self.get_language()}/manifest.webmanifest"  # NOQA: E501 | ||||
|         ] = f"frontend/{self.get_language()}/manifest.webmanifest"  # noqa: E501 | ||||
|         context[ | ||||
|             "apple_touch_icon" | ||||
|         ] = f"frontend/{self.get_language()}/apple-touch-icon.png"  # NOQA: E501 | ||||
|         ] = f"frontend/{self.get_language()}/apple-touch-icon.png"  # noqa: E501 | ||||
|         return context | ||||
|  | ||||
|  | ||||
| @@ -106,7 +111,8 @@ class CorrespondentViewSet(ModelViewSet): | ||||
|     model = Correspondent | ||||
|  | ||||
|     queryset = Correspondent.objects.annotate( | ||||
|         document_count=Count("documents"), last_correspondence=Max("documents__created") | ||||
|         document_count=Count("documents"), | ||||
|         last_correspondence=Max("documents__created"), | ||||
|     ).order_by(Lower("name")) | ||||
|  | ||||
|     serializer_class = CorrespondentSerializer | ||||
| @@ -127,7 +133,7 @@ class TagViewSet(ModelViewSet): | ||||
|     model = Tag | ||||
|  | ||||
|     queryset = Tag.objects.annotate(document_count=Count("documents")).order_by( | ||||
|         Lower("name") | ||||
|         Lower("name"), | ||||
|     ) | ||||
|  | ||||
|     def get_serializer_class(self): | ||||
| @@ -147,7 +153,7 @@ class DocumentTypeViewSet(ModelViewSet): | ||||
|     model = DocumentType | ||||
|  | ||||
|     queryset = DocumentType.objects.annotate( | ||||
|         document_count=Count("documents") | ||||
|         document_count=Count("documents"), | ||||
|     ).order_by(Lower("name")) | ||||
|  | ||||
|     serializer_class = DocumentTypeSerializer | ||||
| @@ -220,9 +226,7 @@ class DocumentViewSet( | ||||
|  | ||||
|     def file_response(self, pk, request, disposition): | ||||
|         doc = Document.objects.get(id=pk) | ||||
|         if ( | ||||
|             not self.original_requested(request) and doc.has_archive_version | ||||
|         ):  # NOQA: E501 | ||||
|         if not self.original_requested(request) and doc.has_archive_version: | ||||
|             file_handle = doc.archive_file | ||||
|             filename = doc.get_public_filename(archive=True) | ||||
|             mime_type = "application/pdf" | ||||
| @@ -258,7 +262,7 @@ class DocumentViewSet( | ||||
|  | ||||
|             try: | ||||
|                 return parser.extract_metadata(file, mime_type) | ||||
|             except Exception as e: | ||||
|             except Exception: | ||||
|                 # TODO: cover GPG errors, remove later. | ||||
|                 return [] | ||||
|         else: | ||||
| @@ -291,7 +295,8 @@ class DocumentViewSet( | ||||
|         if doc.has_archive_version: | ||||
|             meta["archive_size"] = self.get_filesize(doc.archive_path) | ||||
|             meta["archive_metadata"] = self.get_metadata( | ||||
|                 doc.archive_path, "application/pdf" | ||||
|                 doc.archive_path, | ||||
|                 "application/pdf", | ||||
|             ) | ||||
|         else: | ||||
|             meta["archive_size"] = None | ||||
| @@ -315,7 +320,7 @@ class DocumentViewSet( | ||||
|                 "document_types": [ | ||||
|                     dt.id for dt in match_document_types(doc, classifier) | ||||
|                 ], | ||||
|             } | ||||
|             }, | ||||
|         ) | ||||
|  | ||||
|     @action(methods=["get"], detail=True) | ||||
| @@ -357,7 +362,7 @@ class SearchResultSerializer(DocumentSerializer): | ||||
|             "score": instance.score, | ||||
|             "highlights": instance.highlights("content", text=doc.content) | ||||
|             if doc | ||||
|             else None,  # NOQA: E501 | ||||
|             else None, | ||||
|             "rank": instance.rank, | ||||
|         } | ||||
|  | ||||
| @@ -500,7 +505,9 @@ class PostDocumentView(GenericAPIView): | ||||
|         os.makedirs(settings.SCRATCH_DIR, exist_ok=True) | ||||
|  | ||||
|         with tempfile.NamedTemporaryFile( | ||||
|             prefix="paperless-upload-", dir=settings.SCRATCH_DIR, delete=False | ||||
|             prefix="paperless-upload-", | ||||
|             dir=settings.SCRATCH_DIR, | ||||
|             delete=False, | ||||
|         ) as f: | ||||
|             f.write(doc_data) | ||||
|             os.utime(f.name, times=(t, t)) | ||||
| @@ -537,20 +544,20 @@ class SelectionDataView(GenericAPIView): | ||||
|  | ||||
|         correspondents = Correspondent.objects.annotate( | ||||
|             document_count=Count( | ||||
|                 Case(When(documents__id__in=ids, then=1), output_field=IntegerField()) | ||||
|             ) | ||||
|                 Case(When(documents__id__in=ids, then=1), output_field=IntegerField()), | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|         tags = Tag.objects.annotate( | ||||
|             document_count=Count( | ||||
|                 Case(When(documents__id__in=ids, then=1), output_field=IntegerField()) | ||||
|             ) | ||||
|                 Case(When(documents__id__in=ids, then=1), output_field=IntegerField()), | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|         types = DocumentType.objects.annotate( | ||||
|             document_count=Count( | ||||
|                 Case(When(documents__id__in=ids, then=1), output_field=IntegerField()) | ||||
|             ) | ||||
|                 Case(When(documents__id__in=ids, then=1), output_field=IntegerField()), | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|         r = Response( | ||||
| @@ -565,7 +572,7 @@ class SelectionDataView(GenericAPIView): | ||||
|                 "selected_document_types": [ | ||||
|                     {"id": t.id, "document_count": t.document_count} for t in types | ||||
|                 ], | ||||
|             } | ||||
|             }, | ||||
|         ) | ||||
|  | ||||
|         return r | ||||
| @@ -612,7 +619,7 @@ class StatisticsView(APIView): | ||||
|             { | ||||
|                 "documents_total": documents_total, | ||||
|                 "documents_inbox": documents_inbox, | ||||
|             } | ||||
|             }, | ||||
|         ) | ||||
|  | ||||
|  | ||||
| @@ -632,7 +639,9 @@ class BulkDownloadView(GenericAPIView): | ||||
|  | ||||
|         os.makedirs(settings.SCRATCH_DIR, exist_ok=True) | ||||
|         temp = tempfile.NamedTemporaryFile( | ||||
|             dir=settings.SCRATCH_DIR, suffix="-compressed-archive", delete=False | ||||
|             dir=settings.SCRATCH_DIR, | ||||
|             suffix="-compressed-archive", | ||||
|             delete=False, | ||||
|         ) | ||||
|  | ||||
|         if content == "both": | ||||
| @@ -651,7 +660,8 @@ class BulkDownloadView(GenericAPIView): | ||||
|         with open(temp.name, "rb") as f: | ||||
|             response = HttpResponse(f, content_type="application/zip") | ||||
|             response["Content-Disposition"] = '{}; filename="{}"'.format( | ||||
|                 "attachment", "documents.zip" | ||||
|                 "attachment", | ||||
|                 "documents.zip", | ||||
|             ) | ||||
|  | ||||
|             return response | ||||
|   | ||||
| @@ -1 +1,4 @@ | ||||
| from .checks import paths_check, binaries_check | ||||
| from .checks import binaries_check | ||||
| from .checks import paths_check | ||||
|  | ||||
| __all__ = ["binaries_check", "paths_check"] | ||||
|   | ||||
| @@ -9,14 +9,14 @@ from django.core.asgi import get_asgi_application | ||||
| os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings") | ||||
| django_asgi_app = get_asgi_application() | ||||
|  | ||||
| from channels.auth import AuthMiddlewareStack  # NOQA: E402 | ||||
| from channels.routing import ProtocolTypeRouter, URLRouter  # NOQA: E402 | ||||
| from channels.auth import AuthMiddlewareStack  # noqa: E402 | ||||
| from channels.routing import ProtocolTypeRouter, URLRouter  # noqa: E402 | ||||
|  | ||||
| from paperless.urls import websocket_urlpatterns  # NOQA: E402 | ||||
| from paperless.urls import websocket_urlpatterns  # noqa: E402 | ||||
|  | ||||
| application = ProtocolTypeRouter( | ||||
|     { | ||||
|         "http": get_asgi_application(), | ||||
|         "websocket": AuthMiddlewareStack(URLRouter(websocket_urlpatterns)), | ||||
|     } | ||||
|     }, | ||||
| ) | ||||
|   | ||||
| @@ -1,9 +1,9 @@ | ||||
| from django.conf import settings | ||||
| from django.contrib import auth | ||||
| from django.contrib.auth.middleware import RemoteUserMiddleware | ||||
| from django.contrib.auth.models import User | ||||
| from django.utils.deprecation import MiddlewareMixin | ||||
| from rest_framework import authentication | ||||
| from django.contrib.auth.middleware import RemoteUserMiddleware | ||||
|  | ||||
|  | ||||
| class AutoLoginMiddleware(MiddlewareMixin): | ||||
| @@ -25,7 +25,7 @@ class AngularApiAuthenticationOverride(authentication.BaseAuthentication): | ||||
|             settings.DEBUG | ||||
|             and "Referer" in request.headers | ||||
|             and request.headers["Referer"].startswith("http://localhost:4200/") | ||||
|         ):  # NOQA: E501 | ||||
|         ): | ||||
|             user = User.objects.filter(is_staff=True).first() | ||||
|             print("Auto-Login with user {}".format(user)) | ||||
|             return (user, None) | ||||
|   | ||||
| @@ -3,7 +3,9 @@ import shutil | ||||
| import stat | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core.checks import Error, Warning, register | ||||
| from django.core.checks import Error | ||||
| from django.core.checks import register | ||||
| from django.core.checks import Warning | ||||
|  | ||||
| exists_message = "{} is set but doesn't exist." | ||||
| exists_hint = "Create a directory at {}" | ||||
| @@ -19,11 +21,12 @@ def path_check(var, directory): | ||||
|     if directory: | ||||
|         if not os.path.isdir(directory): | ||||
|             messages.append( | ||||
|                 Error(exists_message.format(var), exists_hint.format(directory)) | ||||
|                 Error(exists_message.format(var), exists_hint.format(directory)), | ||||
|             ) | ||||
|         else: | ||||
|             test_file = os.path.join( | ||||
|                 directory, f"__paperless_write_test_{os.getpid()}__" | ||||
|                 directory, | ||||
|                 f"__paperless_write_test_{os.getpid()}__", | ||||
|             ) | ||||
|             try: | ||||
|                 with open(test_file, "w"): | ||||
| @@ -34,9 +37,9 @@ def path_check(var, directory): | ||||
|                         writeable_message.format(var), | ||||
|                         writeable_hint.format( | ||||
|                             f"\n{stat.filemode(os.stat(directory).st_mode)} " | ||||
|                             f"{directory}\n" | ||||
|                             f"{directory}\n", | ||||
|                         ), | ||||
|                     ), | ||||
|                     ) | ||||
|                 ) | ||||
|             finally: | ||||
|                 if os.path.isfile(test_file): | ||||
| @@ -88,8 +91,8 @@ def debug_mode_check(app_configs, **kwargs): | ||||
|                 "security issue, since it puts security overides in place which " | ||||
|                 "are meant to be only used during development. This " | ||||
|                 "also means that paperless will tell anyone various " | ||||
|                 "debugging information when something goes wrong." | ||||
|             ) | ||||
|                 "debugging information when something goes wrong.", | ||||
|             ), | ||||
|         ] | ||||
|     else: | ||||
|         return [] | ||||
|   | ||||
| @@ -1,7 +1,8 @@ | ||||
| import json | ||||
|  | ||||
| from asgiref.sync import async_to_sync | ||||
| from channels.exceptions import DenyConnection, AcceptConnection | ||||
| from channels.exceptions import AcceptConnection | ||||
| from channels.exceptions import DenyConnection | ||||
| from channels.generic.websocket import WebsocketConsumer | ||||
|  | ||||
|  | ||||
| @@ -14,13 +15,15 @@ class StatusConsumer(WebsocketConsumer): | ||||
|             raise DenyConnection() | ||||
|         else: | ||||
|             async_to_sync(self.channel_layer.group_add)( | ||||
|                 "status_updates", self.channel_name | ||||
|                 "status_updates", | ||||
|                 self.channel_name, | ||||
|             ) | ||||
|             raise AcceptConnection() | ||||
|  | ||||
|     def disconnect(self, close_code): | ||||
|         async_to_sync(self.channel_layer.group_discard)( | ||||
|             "status_updates", self.channel_name | ||||
|             "status_updates", | ||||
|             self.channel_name, | ||||
|         ) | ||||
|  | ||||
|     def status_update(self, event): | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| import gnupg | ||||
|  | ||||
| from django.conf import settings | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| from django.conf import settings | ||||
|  | ||||
| from paperless import version | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -5,9 +5,8 @@ import os | ||||
| import re | ||||
|  | ||||
| from concurrent_log_handler.queue import setup_logging_queues | ||||
| from dotenv import load_dotenv | ||||
|  | ||||
| from django.utils.translation import gettext_lazy as _ | ||||
| from dotenv import load_dotenv | ||||
|  | ||||
| # Tap paperless.conf if it's available | ||||
| if os.path.exists("../paperless.conf"): | ||||
| @@ -68,7 +67,8 @@ MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle") | ||||
| LOGGING_DIR = os.getenv("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log")) | ||||
|  | ||||
| CONSUMPTION_DIR = os.getenv( | ||||
|     "PAPERLESS_CONSUMPTION_DIR", os.path.join(BASE_DIR, "..", "consume") | ||||
|     "PAPERLESS_CONSUMPTION_DIR", | ||||
|     os.path.join(BASE_DIR, "..", "consume"), | ||||
| ) | ||||
|  | ||||
| # This will be created if it doesn't exist | ||||
| @@ -119,7 +119,7 @@ REST_FRAMEWORK = { | ||||
|  | ||||
| if DEBUG: | ||||
|     REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append( | ||||
|         "paperless.auth.AngularApiAuthenticationOverride" | ||||
|         "paperless.auth.AngularApiAuthenticationOverride", | ||||
|     ) | ||||
|  | ||||
| MIDDLEWARE = [ | ||||
| @@ -191,7 +191,8 @@ if AUTO_LOGIN_USERNAME: | ||||
|  | ||||
| ENABLE_HTTP_REMOTE_USER = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER") | ||||
| HTTP_REMOTE_USER_HEADER_NAME = os.getenv( | ||||
|     "PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME", "HTTP_REMOTE_USER" | ||||
|     "PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME", | ||||
|     "HTTP_REMOTE_USER", | ||||
| ) | ||||
|  | ||||
| if ENABLE_HTTP_REMOTE_USER: | ||||
| @@ -201,7 +202,7 @@ if ENABLE_HTTP_REMOTE_USER: | ||||
|         "django.contrib.auth.backends.ModelBackend", | ||||
|     ] | ||||
|     REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append( | ||||
|         "rest_framework.authentication.RemoteUserAuthentication" | ||||
|         "rest_framework.authentication.RemoteUserAuthentication", | ||||
|     ) | ||||
|  | ||||
| # X-Frame options for embedded PDF display: | ||||
| @@ -212,7 +213,7 @@ else: | ||||
|  | ||||
| # We allow CORS from localhost:8080 | ||||
| CORS_ALLOWED_ORIGINS = tuple( | ||||
|     os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(",") | ||||
|     os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(","), | ||||
| ) | ||||
|  | ||||
| if DEBUG: | ||||
| @@ -223,7 +224,8 @@ if DEBUG: | ||||
| # Paperless on a closed network.  However, if you're putting this anywhere | ||||
| # public, you should change the key to something unique and verbose. | ||||
| SECRET_KEY = os.getenv( | ||||
|     "PAPERLESS_SECRET_KEY", "e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee" | ||||
|     "PAPERLESS_SECRET_KEY", | ||||
|     "e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee", | ||||
| ) | ||||
|  | ||||
| _allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS") | ||||
| @@ -268,7 +270,7 @@ DATABASES = { | ||||
|     "default": { | ||||
|         "ENGINE": "django.db.backends.sqlite3", | ||||
|         "NAME": os.path.join(DATA_DIR, "db.sqlite3"), | ||||
|     } | ||||
|     }, | ||||
| } | ||||
|  | ||||
| if os.getenv("PAPERLESS_DBHOST"): | ||||
| @@ -423,7 +425,8 @@ def default_threads_per_worker(task_workers): | ||||
|  | ||||
|  | ||||
| THREADS_PER_WORKER = os.getenv( | ||||
|     "PAPERLESS_THREADS_PER_WORKER", default_threads_per_worker(TASK_WORKERS) | ||||
|     "PAPERLESS_THREADS_PER_WORKER", | ||||
|     default_threads_per_worker(TASK_WORKERS), | ||||
| ) | ||||
|  | ||||
| ############################################################################### | ||||
| @@ -435,7 +438,7 @@ CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0)) | ||||
| CONSUMER_POLLING_DELAY = int(os.getenv("PAPERLESS_CONSUMER_POLLING_DELAY", 5)) | ||||
|  | ||||
| CONSUMER_POLLING_RETRY_COUNT = int( | ||||
|     os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5) | ||||
|     os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5), | ||||
| ) | ||||
|  | ||||
| CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES") | ||||
| @@ -448,8 +451,8 @@ CONSUMER_IGNORE_PATTERNS = list( | ||||
|         os.getenv( | ||||
|             "PAPERLESS_CONSUMER_IGNORE_PATTERNS", | ||||
|             '[".DS_STORE/*", "._*", ".stfolder/*"]', | ||||
|         ) | ||||
|     ) | ||||
|         ), | ||||
|     ), | ||||
| ) | ||||
|  | ||||
| CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS") | ||||
| @@ -479,7 +482,7 @@ OCR_DESKEW = __get_boolean("PAPERLESS_OCR_DESKEW", "true") | ||||
| OCR_ROTATE_PAGES = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true") | ||||
|  | ||||
| OCR_ROTATE_PAGES_THRESHOLD = float( | ||||
|     os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0) | ||||
|     os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0), | ||||
| ) | ||||
|  | ||||
| OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}") | ||||
| @@ -536,7 +539,8 @@ THUMBNAIL_FONT_NAME = os.getenv( | ||||
| PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO") | ||||
| PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998") | ||||
| PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv( | ||||
|     "PAPERLESS_TIKA_GOTENBERG_ENDPOINT", "http://localhost:3000" | ||||
|     "PAPERLESS_TIKA_GOTENBERG_ENDPOINT", | ||||
|     "http://localhost:3000", | ||||
| ) | ||||
|  | ||||
| if PAPERLESS_TIKA_ENABLED: | ||||
|   | ||||
| @@ -1,10 +1,11 @@ | ||||
| import os | ||||
| import shutil | ||||
|  | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from django.test import override_settings | ||||
| from django.test import TestCase | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from paperless import binaries_check, paths_check | ||||
| from paperless import binaries_check | ||||
| from paperless import paths_check | ||||
| from paperless.checks import debug_mode_check | ||||
|  | ||||
|  | ||||
| @@ -20,7 +21,9 @@ class TestChecks(DirectoriesMixin, TestCase): | ||||
|         self.assertEqual(paths_check(None), []) | ||||
|  | ||||
|     @override_settings( | ||||
|         MEDIA_ROOT="uuh", DATA_DIR="whatever", CONSUMPTION_DIR="idontcare" | ||||
|         MEDIA_ROOT="uuh", | ||||
|         DATA_DIR="whatever", | ||||
|         CONSUMPTION_DIR="idontcare", | ||||
|     ) | ||||
|     def test_paths_check_dont_exist(self): | ||||
|         msgs = paths_check(None) | ||||
|   | ||||
| @@ -2,8 +2,8 @@ from unittest import mock | ||||
|  | ||||
| from channels.layers import get_channel_layer | ||||
| from channels.testing import WebsocketCommunicator | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from django.test import override_settings | ||||
| from django.test import TestCase | ||||
| from paperless.asgi import application | ||||
|  | ||||
|  | ||||
| @@ -46,7 +46,8 @@ class TestWebSockets(TestCase): | ||||
|  | ||||
|         channel_layer = get_channel_layer() | ||||
|         await channel_layer.group_send( | ||||
|             "status_updates", {"type": "status_update", "data": message} | ||||
|             "status_updates", | ||||
|             {"type": "status_update", "data": message}, | ||||
|         ) | ||||
|  | ||||
|         response = await communicator.receive_json_from() | ||||
|   | ||||
| @@ -1,34 +1,30 @@ | ||||
| from django.conf import settings | ||||
| from django.conf.urls import include | ||||
| from django.contrib import admin | ||||
| from django.contrib.auth.decorators import login_required | ||||
| from django.urls import path, re_path | ||||
| from django.urls import path | ||||
| from django.urls import re_path | ||||
| from django.utils.translation import gettext_lazy as _ | ||||
| from django.views.decorators.csrf import csrf_exempt | ||||
| from django.views.generic import RedirectView | ||||
| from documents.views import BulkDownloadView | ||||
| from documents.views import BulkEditView | ||||
| from documents.views import CorrespondentViewSet | ||||
| from documents.views import DocumentTypeViewSet | ||||
| from documents.views import IndexView | ||||
| from documents.views import LogViewSet | ||||
| from documents.views import PostDocumentView | ||||
| from documents.views import SavedViewViewSet | ||||
| from documents.views import SearchAutoCompleteView | ||||
| from documents.views import SelectionDataView | ||||
| from documents.views import StatisticsView | ||||
| from documents.views import TagViewSet | ||||
| from documents.views import UnifiedSearchViewSet | ||||
| from paperless.consumers import StatusConsumer | ||||
| from paperless.views import FaviconView | ||||
| from rest_framework.authtoken import views | ||||
| from rest_framework.routers import DefaultRouter | ||||
|  | ||||
| from django.utils.translation import gettext_lazy as _ | ||||
|  | ||||
| from django.conf import settings | ||||
|  | ||||
| from paperless.consumers import StatusConsumer | ||||
| from documents.views import ( | ||||
|     CorrespondentViewSet, | ||||
|     UnifiedSearchViewSet, | ||||
|     LogViewSet, | ||||
|     TagViewSet, | ||||
|     DocumentTypeViewSet, | ||||
|     IndexView, | ||||
|     SearchAutoCompleteView, | ||||
|     StatisticsView, | ||||
|     PostDocumentView, | ||||
|     SavedViewViewSet, | ||||
|     BulkEditView, | ||||
|     SelectionDataView, | ||||
|     BulkDownloadView, | ||||
| ) | ||||
| from paperless.views import FaviconView | ||||
|  | ||||
| api_router = DefaultRouter() | ||||
| api_router.register(r"correspondents", CorrespondentViewSet) | ||||
| api_router.register(r"document_types", DocumentTypeViewSet) | ||||
| @@ -62,7 +58,9 @@ urlpatterns = [ | ||||
|                     name="post_document", | ||||
|                 ), | ||||
|                 re_path( | ||||
|                     r"^documents/bulk_edit/", BulkEditView.as_view(), name="bulk_edit" | ||||
|                     r"^documents/bulk_edit/", | ||||
|                     BulkEditView.as_view(), | ||||
|                     name="bulk_edit", | ||||
|                 ), | ||||
|                 re_path( | ||||
|                     r"^documents/selection_data/", | ||||
| @@ -76,7 +74,7 @@ urlpatterns = [ | ||||
|                 ), | ||||
|                 path("token/", views.obtain_auth_token), | ||||
|             ] | ||||
|             + api_router.urls | ||||
|             + api_router.urls, | ||||
|         ), | ||||
|     ), | ||||
|     re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"), | ||||
| @@ -88,35 +86,37 @@ urlpatterns = [ | ||||
|                 re_path( | ||||
|                     r"^doc/(?P<pk>\d+)$", | ||||
|                     RedirectView.as_view( | ||||
|                         url=settings.BASE_URL + "api/documents/%(pk)s/download/" | ||||
|                         url=settings.BASE_URL + "api/documents/%(pk)s/download/", | ||||
|                     ), | ||||
|                 ), | ||||
|                 re_path( | ||||
|                     r"^thumb/(?P<pk>\d+)$", | ||||
|                     RedirectView.as_view( | ||||
|                         url=settings.BASE_URL + "api/documents/%(pk)s/thumb/" | ||||
|                         url=settings.BASE_URL + "api/documents/%(pk)s/thumb/", | ||||
|                     ), | ||||
|                 ), | ||||
|                 re_path( | ||||
|                     r"^preview/(?P<pk>\d+)$", | ||||
|                     RedirectView.as_view( | ||||
|                         url=settings.BASE_URL + "api/documents/%(pk)s/preview/" | ||||
|                         url=settings.BASE_URL + "api/documents/%(pk)s/preview/", | ||||
|                     ), | ||||
|                 ), | ||||
|             ] | ||||
|             ], | ||||
|         ), | ||||
|     ), | ||||
|     re_path( | ||||
|         r"^push$", | ||||
|         csrf_exempt( | ||||
|             RedirectView.as_view(url=settings.BASE_URL + "api/documents/post_document/") | ||||
|             RedirectView.as_view( | ||||
|                 url=settings.BASE_URL + "api/documents/post_document/", | ||||
|             ), | ||||
|         ), | ||||
|     ), | ||||
|     # Frontend assets TODO: this is pretty bad, but it works. | ||||
|     path( | ||||
|         "assets/<path:path>", | ||||
|         RedirectView.as_view( | ||||
|             url=settings.STATIC_URL + "frontend/en-US/assets/%(path)s" | ||||
|             url=settings.STATIC_URL + "frontend/en-US/assets/%(path)s", | ||||
|         ), | ||||
|     ), | ||||
|     # TODO: with localization, this is even worse! :/ | ||||
|   | ||||
| @@ -14,7 +14,11 @@ class StandardPagination(PageNumberPagination): | ||||
| class FaviconView(View): | ||||
|     def get(self, request, *args, **kwargs): | ||||
|         favicon = os.path.join( | ||||
|             os.path.dirname(__file__), "static", "paperless", "img", "favicon.ico" | ||||
|             os.path.dirname(__file__), | ||||
|             "static", | ||||
|             "paperless", | ||||
|             "img", | ||||
|             "favicon.ico", | ||||
|         ) | ||||
|         with open(favicon, "rb") as f: | ||||
|             return HttpResponse(f, content_type="image/x-icon") | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| import os | ||||
| from uvicorn.workers import UvicornWorker | ||||
|  | ||||
| from django.conf import settings | ||||
| from uvicorn.workers import UvicornWorker | ||||
|  | ||||
| os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings") | ||||
|  | ||||
|   | ||||
| @@ -6,7 +6,6 @@ It exposes the WSGI callable as a module-level variable named ``application``. | ||||
| For more information on this file, see | ||||
| https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/ | ||||
| """ | ||||
|  | ||||
| import os | ||||
|  | ||||
| from django.core.wsgi import get_wsgi_application | ||||
|   | ||||
| @@ -1,8 +1,8 @@ | ||||
| from django.contrib import admin | ||||
| from django import forms | ||||
| from paperless_mail.models import MailAccount, MailRule | ||||
|  | ||||
| from django.contrib import admin | ||||
| from django.utils.translation import gettext_lazy as _ | ||||
| from paperless_mail.models import MailAccount | ||||
| from paperless_mail.models import MailRule | ||||
|  | ||||
|  | ||||
| class MailAccountAdminForm(forms.ModelForm): | ||||
| @@ -48,7 +48,7 @@ class MailRuleAdmin(admin.ModelAdmin): | ||||
|             { | ||||
|                 "description": _( | ||||
|                     "Paperless will only process mails that match ALL of the " | ||||
|                     "filters given below." | ||||
|                     "filters given below.", | ||||
|                 ), | ||||
|                 "fields": ( | ||||
|                     "filter_from", | ||||
| @@ -66,7 +66,7 @@ class MailRuleAdmin(admin.ModelAdmin): | ||||
|                 "description": _( | ||||
|                     "The action applied to the mail. This action is only " | ||||
|                     "performed when documents were consumed from the mail. " | ||||
|                     "Mails without attachments will remain entirely untouched." | ||||
|                     "Mails without attachments will remain entirely untouched.", | ||||
|                 ), | ||||
|                 "fields": ("action", "action_parameter"), | ||||
|             }, | ||||
| @@ -78,7 +78,7 @@ class MailRuleAdmin(admin.ModelAdmin): | ||||
|                     "Assign metadata to documents consumed from this rule " | ||||
|                     "automatically. If you do not assign tags, types or " | ||||
|                     "correspondents here, paperless will still process all " | ||||
|                     "matching rules that you have defined." | ||||
|                     "matching rules that you have defined.", | ||||
|                 ), | ||||
|                 "fields": ( | ||||
|                     "assign_title_from", | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| from django.apps import AppConfig | ||||
|  | ||||
| from django.utils.translation import gettext_lazy as _ | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| import os | ||||
| import tempfile | ||||
| from datetime import timedelta, date | ||||
| from datetime import date | ||||
| from datetime import timedelta | ||||
| from fnmatch import fnmatch | ||||
|  | ||||
| import magic | ||||
| @@ -8,18 +9,16 @@ import pathvalidate | ||||
| from django.conf import settings | ||||
| from django.db import DatabaseError | ||||
| from django_q.tasks import async_task | ||||
| from imap_tools import ( | ||||
|     MailBox, | ||||
|     MailBoxUnencrypted, | ||||
|     AND, | ||||
|     MailMessageFlags, | ||||
|     MailboxFolderSelectError, | ||||
| ) | ||||
|  | ||||
| from documents.loggers import LoggingMixin | ||||
| from documents.models import Correspondent | ||||
| from documents.parsers import is_mime_type_supported | ||||
| from paperless_mail.models import MailAccount, MailRule | ||||
| from imap_tools import AND | ||||
| from imap_tools import MailBox | ||||
| from imap_tools import MailboxFolderSelectError | ||||
| from imap_tools import MailBoxUnencrypted | ||||
| from imap_tools import MailMessageFlags | ||||
| from paperless_mail.models import MailAccount | ||||
| from paperless_mail.models import MailRule | ||||
|  | ||||
|  | ||||
| class MailError(Exception): | ||||
| @@ -120,8 +119,8 @@ class MailAccountHandler(LoggingMixin): | ||||
|  | ||||
|         else: | ||||
|             raise NotImplementedError( | ||||
|                 "Unknown title selector." | ||||
|             )  # pragma: nocover  # NOQA: E501 | ||||
|                 "Unknown title selector.", | ||||
|             )  # pragma: nocover | ||||
|  | ||||
|     def get_correspondent(self, message, rule): | ||||
|         c_from = rule.assign_correspondent_from | ||||
| @@ -137,7 +136,7 @@ class MailAccountHandler(LoggingMixin): | ||||
|                 message.from_values | ||||
|                 and "name" in message.from_values | ||||
|                 and message.from_values["name"] | ||||
|             ):  # NOQA: E501 | ||||
|             ): | ||||
|                 return self._correspondent_from_name(message.from_values["name"]) | ||||
|             else: | ||||
|                 return self._correspondent_from_name(message.from_) | ||||
| @@ -147,8 +146,8 @@ class MailAccountHandler(LoggingMixin): | ||||
|  | ||||
|         else: | ||||
|             raise NotImplementedError( | ||||
|                 "Unknwown correspondent selector" | ||||
|             )  # pragma: nocover  # NOQA: E501 | ||||
|                 "Unknwown correspondent selector", | ||||
|             )  # pragma: nocover | ||||
|  | ||||
|     def handle_mail_account(self, account): | ||||
|  | ||||
| @@ -159,7 +158,9 @@ class MailAccountHandler(LoggingMixin): | ||||
|         total_processed_files = 0 | ||||
|  | ||||
|         with get_mailbox( | ||||
|             account.imap_server, account.imap_port, account.imap_security | ||||
|             account.imap_server, | ||||
|             account.imap_port, | ||||
|             account.imap_security, | ||||
|         ) as M: | ||||
|  | ||||
|             try: | ||||
| @@ -193,7 +194,7 @@ class MailAccountHandler(LoggingMixin): | ||||
|         except MailboxFolderSelectError: | ||||
|             raise MailError( | ||||
|                 f"Rule {rule}: Folder {rule.folder} " | ||||
|                 f"does not exist in account {rule.account}" | ||||
|                 f"does not exist in account {rule.account}", | ||||
|             ) | ||||
|  | ||||
|         criterias = make_criterias(rule) | ||||
| @@ -242,12 +243,14 @@ class MailAccountHandler(LoggingMixin): | ||||
|  | ||||
|         try: | ||||
|             get_rule_action(rule).post_consume( | ||||
|                 M, post_consume_messages, rule.action_parameter | ||||
|                 M, | ||||
|                 post_consume_messages, | ||||
|                 rule.action_parameter, | ||||
|             ) | ||||
|  | ||||
|         except Exception as e: | ||||
|             raise MailError( | ||||
|                 f"Rule {rule}: Error while processing post-consume actions: " f"{e}" | ||||
|                 f"Rule {rule}: Error while processing post-consume actions: " f"{e}", | ||||
|             ) | ||||
|  | ||||
|         return total_processed_files | ||||
| @@ -274,7 +277,7 @@ class MailAccountHandler(LoggingMixin): | ||||
|             if ( | ||||
|                 not att.content_disposition == "attachment" | ||||
|                 and rule.attachment_type == MailRule.ATTACHMENT_TYPE_ATTACHMENTS_ONLY | ||||
|             ):  # NOQA: E501 | ||||
|             ): | ||||
|                 self.log( | ||||
|                     "debug", | ||||
|                     f"Rule {rule}: " | ||||
| @@ -297,7 +300,8 @@ class MailAccountHandler(LoggingMixin): | ||||
|  | ||||
|                 os.makedirs(settings.SCRATCH_DIR, exist_ok=True) | ||||
|                 _, temp_filename = tempfile.mkstemp( | ||||
|                     prefix="paperless-mail-", dir=settings.SCRATCH_DIR | ||||
|                     prefix="paperless-mail-", | ||||
|                     dir=settings.SCRATCH_DIR, | ||||
|                 ) | ||||
|                 with open(temp_filename, "wb") as f: | ||||
|                     f.write(att.payload) | ||||
| @@ -313,15 +317,13 @@ class MailAccountHandler(LoggingMixin): | ||||
|                     "documents.tasks.consume_file", | ||||
|                     path=temp_filename, | ||||
|                     override_filename=pathvalidate.sanitize_filename( | ||||
|                         att.filename | ||||
|                     ),  # NOQA: E501 | ||||
|                         att.filename, | ||||
|                     ), | ||||
|                     override_title=title, | ||||
|                     override_correspondent_id=correspondent.id | ||||
|                     if correspondent | ||||
|                     else None,  # NOQA: E501 | ||||
|                     override_document_type_id=doc_type.id | ||||
|                     if doc_type | ||||
|                     else None,  # NOQA: E501 | ||||
|                     else None, | ||||
|                     override_document_type_id=doc_type.id if doc_type else None, | ||||
|                     override_tag_ids=[tag.id] if tag else None, | ||||
|                     task_name=att.filename[:100], | ||||
|                 ) | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| from django.core.management.base import BaseCommand | ||||
|  | ||||
| from paperless_mail import tasks | ||||
|  | ||||
|  | ||||
| @@ -7,7 +6,8 @@ class Command(BaseCommand): | ||||
|  | ||||
|     help = """ | ||||
|     """.replace( | ||||
|         "    ", "" | ||||
|         "    ", | ||||
|         "", | ||||
|     ) | ||||
|  | ||||
|     def handle(self, *args, **options): | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| from django.db import models | ||||
|  | ||||
| import documents.models as document_models | ||||
|  | ||||
| from django.db import models | ||||
| from django.utils.translation import gettext_lazy as _ | ||||
|  | ||||
|  | ||||
| @@ -30,12 +28,14 @@ class MailAccount(models.Model): | ||||
|         null=True, | ||||
|         help_text=_( | ||||
|             "This is usually 143 for unencrypted and STARTTLS " | ||||
|             "connections, and 993 for SSL connections." | ||||
|             "connections, and 993 for SSL connections.", | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
|     imap_security = models.PositiveIntegerField( | ||||
|         _("IMAP security"), choices=IMAP_SECURITY_OPTIONS, default=IMAP_SECURITY_SSL | ||||
|         _("IMAP security"), | ||||
|         choices=IMAP_SECURITY_OPTIONS, | ||||
|         default=IMAP_SECURITY_SSL, | ||||
|     ) | ||||
|  | ||||
|     username = models.CharField(_("username"), max_length=256) | ||||
| @@ -48,7 +48,7 @@ class MailAccount(models.Model): | ||||
|         default="UTF-8", | ||||
|         help_text=_( | ||||
|             "The character set to use when communicating with the " | ||||
|             "mail server, such as 'UTF-8' or 'US-ASCII'." | ||||
|             "mail server, such as 'UTF-8' or 'US-ASCII'.", | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
| @@ -123,13 +123,22 @@ class MailRule(models.Model): | ||||
|     ) | ||||
|  | ||||
|     filter_from = models.CharField( | ||||
|         _("filter from"), max_length=256, null=True, blank=True | ||||
|         _("filter from"), | ||||
|         max_length=256, | ||||
|         null=True, | ||||
|         blank=True, | ||||
|     ) | ||||
|     filter_subject = models.CharField( | ||||
|         _("filter subject"), max_length=256, null=True, blank=True | ||||
|         _("filter subject"), | ||||
|         max_length=256, | ||||
|         null=True, | ||||
|         blank=True, | ||||
|     ) | ||||
|     filter_body = models.CharField( | ||||
|         _("filter body"), max_length=256, null=True, blank=True | ||||
|         _("filter body"), | ||||
|         max_length=256, | ||||
|         null=True, | ||||
|         blank=True, | ||||
|     ) | ||||
|  | ||||
|     filter_attachment_filename = models.CharField( | ||||
| @@ -140,12 +149,14 @@ class MailRule(models.Model): | ||||
|         help_text=_( | ||||
|             "Only consume documents which entirely match this " | ||||
|             "filename if specified. Wildcards such as *.pdf or " | ||||
|             "*invoice* are allowed. Case insensitive." | ||||
|             "*invoice* are allowed. Case insensitive.", | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
|     maximum_age = models.PositiveIntegerField( | ||||
|         _("maximum age"), default=30, help_text=_("Specified in days.") | ||||
|         _("maximum age"), | ||||
|         default=30, | ||||
|         help_text=_("Specified in days."), | ||||
|     ) | ||||
|  | ||||
|     attachment_type = models.PositiveIntegerField( | ||||
| @@ -154,7 +165,7 @@ class MailRule(models.Model): | ||||
|         default=ATTACHMENT_TYPE_ATTACHMENTS_ONLY, | ||||
|         help_text=_( | ||||
|             "Inline attachments include embedded images, so it's best " | ||||
|             "to combine this option with a filename filter." | ||||
|             "to combine this option with a filename filter.", | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
| @@ -173,12 +184,14 @@ class MailRule(models.Model): | ||||
|             "Additional parameter for the action selected above, " | ||||
|             "i.e., " | ||||
|             "the target folder of the move to folder action. " | ||||
|             "Subfolders must be separated by dots." | ||||
|             "Subfolders must be separated by dots.", | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
|     assign_title_from = models.PositiveIntegerField( | ||||
|         _("assign title from"), choices=TITLE_SELECTOR, default=TITLE_FROM_SUBJECT | ||||
|         _("assign title from"), | ||||
|         choices=TITLE_SELECTOR, | ||||
|         default=TITLE_FROM_SUBJECT, | ||||
|     ) | ||||
|  | ||||
|     assign_tag = models.ForeignKey( | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| import logging | ||||
|  | ||||
| from paperless_mail.mail import MailAccountHandler, MailError | ||||
| from paperless_mail.mail import MailAccountHandler | ||||
| from paperless_mail.mail import MailError | ||||
| from paperless_mail.models import MailAccount | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -7,13 +7,15 @@ from unittest import mock | ||||
| from django.core.management import call_command | ||||
| from django.db import DatabaseError | ||||
| from django.test import TestCase | ||||
| from imap_tools import MailMessageFlags, MailboxFolderSelectError | ||||
|  | ||||
| from documents.models import Correspondent | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from imap_tools import MailboxFolderSelectError | ||||
| from imap_tools import MailMessageFlags | ||||
| from paperless_mail import tasks | ||||
| from paperless_mail.mail import MailError, MailAccountHandler | ||||
| from paperless_mail.models import MailRule, MailAccount | ||||
| from paperless_mail.mail import MailAccountHandler | ||||
| from paperless_mail.mail import MailError | ||||
| from paperless_mail.models import MailAccount | ||||
| from paperless_mail.models import MailRule | ||||
|  | ||||
|  | ||||
| class BogusFolderManager: | ||||
| @@ -83,7 +85,7 @@ class BogusMailBox(ContextManager): | ||||
|     def move(self, uid_list, folder): | ||||
|         if folder == "spam": | ||||
|             self.messages_spam.append( | ||||
|                 filter(lambda m: m.uid in uid_list, self.messages) | ||||
|                 filter(lambda m: m.uid in uid_list, self.messages), | ||||
|             ) | ||||
|             self.messages = list(filter(lambda m: m.uid not in uid_list, self.messages)) | ||||
|         else: | ||||
| @@ -115,7 +117,9 @@ def create_message( | ||||
|  | ||||
|  | ||||
| def create_attachment( | ||||
|     filename="the_file.pdf", content_disposition="attachment", payload=b"a PDF document" | ||||
|     filename="the_file.pdf", | ||||
|     content_disposition="attachment", | ||||
|     payload=b"a PDF document", | ||||
| ): | ||||
|     attachment = namedtuple("Attachment", []) | ||||
|     attachment.filename = filename | ||||
| @@ -163,7 +167,7 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|                 body="cables", | ||||
|                 seen=True, | ||||
|                 flagged=False, | ||||
|             ) | ||||
|             ), | ||||
|         ) | ||||
|         self.bogus_mailbox.messages.append( | ||||
|             create_message( | ||||
| @@ -171,14 +175,14 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|                 body="from my favorite electronic store", | ||||
|                 seen=False, | ||||
|                 flagged=True, | ||||
|             ) | ||||
|             ), | ||||
|         ) | ||||
|         self.bogus_mailbox.messages.append( | ||||
|             create_message( | ||||
|                 subject="Claim your $10M price now!", | ||||
|                 from_="amazon@amazon-some-indian-site.org", | ||||
|                 seen=False, | ||||
|             ) | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|     def test_get_correspondent(self): | ||||
| @@ -196,12 +200,14 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|         handler = MailAccountHandler() | ||||
|  | ||||
|         rule = MailRule( | ||||
|             name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING | ||||
|             name="a", | ||||
|             assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING, | ||||
|         ) | ||||
|         self.assertIsNone(handler.get_correspondent(message, rule)) | ||||
|  | ||||
|         rule = MailRule( | ||||
|             name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL | ||||
|             name="b", | ||||
|             assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL, | ||||
|         ) | ||||
|         c = handler.get_correspondent(message, rule) | ||||
|         self.assertIsNotNone(c) | ||||
| @@ -212,7 +218,8 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|         self.assertEqual(c.id, me_localhost.id) | ||||
|  | ||||
|         rule = MailRule( | ||||
|             name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME | ||||
|             name="c", | ||||
|             assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME, | ||||
|         ) | ||||
|         c = handler.get_correspondent(message, rule) | ||||
|         self.assertIsNotNone(c) | ||||
| @@ -244,7 +251,9 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_handle_message(self): | ||||
|         message = create_message( | ||||
|             subject="the message title", from_="Myself", num_attachments=2 | ||||
|             subject="the message title", | ||||
|             from_="Myself", | ||||
|             num_attachments=2, | ||||
|         ) | ||||
|  | ||||
|         account = MailAccount() | ||||
| @@ -376,11 +385,16 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|     def test_handle_mail_account_mark_read(self): | ||||
|  | ||||
|         account = MailAccount.objects.create( | ||||
|             name="test", imap_server="", username="admin", password="secret" | ||||
|             name="test", | ||||
|             imap_server="", | ||||
|             username="admin", | ||||
|             password="secret", | ||||
|         ) | ||||
|  | ||||
|         rule = MailRule.objects.create( | ||||
|             name="testrule", account=account, action=MailRule.ACTION_MARK_READ | ||||
|             name="testrule", | ||||
|             account=account, | ||||
|             action=MailRule.ACTION_MARK_READ, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(len(self.bogus_mailbox.messages), 3) | ||||
| @@ -394,7 +408,10 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|     def test_handle_mail_account_delete(self): | ||||
|  | ||||
|         account = MailAccount.objects.create( | ||||
|             name="test", imap_server="", username="admin", password="secret" | ||||
|             name="test", | ||||
|             imap_server="", | ||||
|             username="admin", | ||||
|             password="secret", | ||||
|         ) | ||||
|  | ||||
|         rule = MailRule.objects.create( | ||||
| @@ -412,7 +429,10 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_handle_mail_account_flag(self): | ||||
|         account = MailAccount.objects.create( | ||||
|             name="test", imap_server="", username="admin", password="secret" | ||||
|             name="test", | ||||
|             imap_server="", | ||||
|             username="admin", | ||||
|             password="secret", | ||||
|         ) | ||||
|  | ||||
|         rule = MailRule.objects.create( | ||||
| @@ -432,7 +452,10 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_handle_mail_account_move(self): | ||||
|         account = MailAccount.objects.create( | ||||
|             name="test", imap_server="", username="admin", password="secret" | ||||
|             name="test", | ||||
|             imap_server="", | ||||
|             username="admin", | ||||
|             password="secret", | ||||
|         ) | ||||
|  | ||||
|         rule = MailRule.objects.create( | ||||
| @@ -453,7 +476,10 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_error_login(self): | ||||
|         account = MailAccount.objects.create( | ||||
|             name="test", imap_server="", username="admin", password="wrong" | ||||
|             name="test", | ||||
|             imap_server="", | ||||
|             username="admin", | ||||
|             password="wrong", | ||||
|         ) | ||||
|  | ||||
|         try: | ||||
| @@ -465,11 +491,17 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_error_skip_account(self): | ||||
|         account_faulty = MailAccount.objects.create( | ||||
|             name="test", imap_server="", username="admin", password="wroasdng" | ||||
|             name="test", | ||||
|             imap_server="", | ||||
|             username="admin", | ||||
|             password="wroasdng", | ||||
|         ) | ||||
|  | ||||
|         account = MailAccount.objects.create( | ||||
|             name="test2", imap_server="", username="admin", password="secret" | ||||
|             name="test2", | ||||
|             imap_server="", | ||||
|             username="admin", | ||||
|             password="secret", | ||||
|         ) | ||||
|         rule = MailRule.objects.create( | ||||
|             name="testrule", | ||||
| @@ -487,7 +519,10 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|     def test_error_skip_rule(self): | ||||
|  | ||||
|         account = MailAccount.objects.create( | ||||
|             name="test2", imap_server="", username="admin", password="secret" | ||||
|             name="test2", | ||||
|             imap_server="", | ||||
|             username="admin", | ||||
|             password="secret", | ||||
|         ) | ||||
|         rule = MailRule.objects.create( | ||||
|             name="testrule", | ||||
| @@ -523,7 +558,10 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|         m.side_effect = get_correspondent_fake | ||||
|  | ||||
|         account = MailAccount.objects.create( | ||||
|             name="test2", imap_server="", username="admin", password="secret" | ||||
|             name="test2", | ||||
|             imap_server="", | ||||
|             username="admin", | ||||
|             password="secret", | ||||
|         ) | ||||
|         rule = MailRule.objects.create( | ||||
|             name="testrule", | ||||
| @@ -544,7 +582,10 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|     def test_error_create_correspondent(self): | ||||
|  | ||||
|         account = MailAccount.objects.create( | ||||
|             name="test2", imap_server="", username="admin", password="secret" | ||||
|             name="test2", | ||||
|             imap_server="", | ||||
|             username="admin", | ||||
|             password="secret", | ||||
|         ) | ||||
|         rule = MailRule.objects.create( | ||||
|             name="testrule", | ||||
| @@ -579,7 +620,10 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|     def test_filters(self): | ||||
|  | ||||
|         account = MailAccount.objects.create( | ||||
|             name="test3", imap_server="", username="admin", password="secret" | ||||
|             name="test3", | ||||
|             imap_server="", | ||||
|             username="admin", | ||||
|             password="secret", | ||||
|         ) | ||||
|         rule = MailRule.objects.create( | ||||
|             name="testrule3", | ||||
| @@ -629,7 +673,7 @@ class TestMail(DirectoriesMixin, TestCase): | ||||
|  | ||||
| class TestManagementCommand(TestCase): | ||||
|     @mock.patch( | ||||
|         "paperless_mail.management.commands.mail_fetcher.tasks.process_mail_accounts" | ||||
|         "paperless_mail.management.commands.mail_fetcher.tasks.process_mail_accounts", | ||||
|     ) | ||||
|     def test_mail_fetcher(self, m): | ||||
|  | ||||
| @@ -644,10 +688,16 @@ class TestTasks(TestCase): | ||||
|         m.side_effect = lambda account: 6 | ||||
|  | ||||
|         MailAccount.objects.create( | ||||
|             name="A", imap_server="A", username="A", password="A" | ||||
|             name="A", | ||||
|             imap_server="A", | ||||
|             username="A", | ||||
|             password="A", | ||||
|         ) | ||||
|         MailAccount.objects.create( | ||||
|             name="B", imap_server="A", username="A", password="A" | ||||
|             name="B", | ||||
|             imap_server="A", | ||||
|             username="A", | ||||
|             password="A", | ||||
|         ) | ||||
|  | ||||
|         result = tasks.process_mail_accounts() | ||||
| @@ -663,7 +713,10 @@ class TestTasks(TestCase): | ||||
|     def test_single_accounts(self, m): | ||||
|  | ||||
|         MailAccount.objects.create( | ||||
|             name="A", imap_server="A", username="A", password="A" | ||||
|             name="A", | ||||
|             imap_server="A", | ||||
|             username="A", | ||||
|             password="A", | ||||
|         ) | ||||
|  | ||||
|         tasks.process_mail_account("A") | ||||
|   | ||||
| @@ -1,2 +1,5 @@ | ||||
| # this is here so that django finds the checks. | ||||
| from .checks import * | ||||
| from .checks import check_default_language_available | ||||
| from .checks import get_tesseract_langs | ||||
|  | ||||
| __all__ = ["get_tesseract_langs", "check_default_language_available"] | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| from django.apps import AppConfig | ||||
|  | ||||
| from paperless_tesseract.signals import tesseract_consumer_declaration | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,9 @@ | ||||
| import subprocess | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core.checks import Error, Warning, register | ||||
| from django.core.checks import Error | ||||
| from django.core.checks import register | ||||
| from django.core.checks import Warning | ||||
|  | ||||
|  | ||||
| def get_tesseract_langs(): | ||||
| @@ -19,8 +21,8 @@ def check_default_language_available(app_configs, **kwargs): | ||||
|         return [ | ||||
|             Warning( | ||||
|                 "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. " | ||||
|                 "This means that tesseract will fallback to english." | ||||
|             ) | ||||
|                 "This means that tesseract will fallback to english.", | ||||
|             ), | ||||
|         ] | ||||
|  | ||||
|     specified_langs = settings.OCR_LANGUAGE.split("+") | ||||
| @@ -31,8 +33,8 @@ def check_default_language_available(app_configs, **kwargs): | ||||
|                 Error( | ||||
|                     f"The selected ocr language {lang} is " | ||||
|                     f"not installed. Paperless cannot OCR your documents " | ||||
|                     f"without it. Please fix PAPERLESS_OCR_LANGUAGE." | ||||
|                 ) | ||||
|                     f"without it. Please fix PAPERLESS_OCR_LANGUAGE.", | ||||
|                 ), | ||||
|             ] | ||||
|  | ||||
|     return [] | ||||
|   | ||||
| @@ -2,10 +2,11 @@ import json | ||||
| import os | ||||
| import re | ||||
|  | ||||
| from PIL import Image | ||||
| from django.conf import settings | ||||
|  | ||||
| from documents.parsers import DocumentParser, ParseError, make_thumbnail_from_pdf | ||||
| from documents.parsers import DocumentParser | ||||
| from documents.parsers import make_thumbnail_from_pdf | ||||
| from documents.parsers import ParseError | ||||
| from PIL import Image | ||||
|  | ||||
|  | ||||
| class NoTextFoundException(Exception): | ||||
| @@ -42,7 +43,7 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|                             "prefix": meta.REVERSE_NS[m.group(1)], | ||||
|                             "key": m.group(2), | ||||
|                             "value": value, | ||||
|                         } | ||||
|                         }, | ||||
|                     ) | ||||
|                 except Exception as e: | ||||
|                     self.log( | ||||
| @@ -53,7 +54,9 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|  | ||||
|     def get_thumbnail(self, document_path, mime_type, file_name=None): | ||||
|         return make_thumbnail_from_pdf( | ||||
|             self.archive_path or document_path, self.tempdir, self.logging_group | ||||
|             self.archive_path or document_path, | ||||
|             self.tempdir, | ||||
|             self.logging_group, | ||||
|         ) | ||||
|  | ||||
|     def is_image(self, mime_type): | ||||
| @@ -110,7 +113,6 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|             return None | ||||
|  | ||||
|         from pdfminer.high_level import extract_text as pdfminer_extract_text | ||||
|         from pdfminer.pdftypes import PDFException | ||||
|  | ||||
|         try: | ||||
|             stripped = post_process_text(pdfminer_extract_text(pdf_file)) | ||||
| @@ -129,7 +131,12 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|             return None | ||||
|  | ||||
|     def construct_ocrmypdf_parameters( | ||||
|         self, input_file, mime_type, output_file, sidecar_file, safe_fallback=False | ||||
|         self, | ||||
|         input_file, | ||||
|         mime_type, | ||||
|         output_file, | ||||
|         sidecar_file, | ||||
|         safe_fallback=False, | ||||
|     ): | ||||
|         ocrmypdf_args = { | ||||
|             "input_file": input_file, | ||||
| @@ -167,7 +174,7 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|             ocrmypdf_args["rotate_pages"] = True | ||||
|             ocrmypdf_args[ | ||||
|                 "rotate_pages_threshold" | ||||
|             ] = settings.OCR_ROTATE_PAGES_THRESHOLD  # NOQA: E501 | ||||
|             ] = settings.OCR_ROTATE_PAGES_THRESHOLD | ||||
|  | ||||
|         if settings.OCR_PAGES > 0: | ||||
|             ocrmypdf_args["pages"] = f"1-{settings.OCR_PAGES}" | ||||
| @@ -202,7 +209,7 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|                 raise ParseError( | ||||
|                     f"Cannot produce archive PDF for image {input_file}, " | ||||
|                     f"no DPI information is present in this image and " | ||||
|                     f"OCR_IMAGE_DPI is not set." | ||||
|                     f"OCR_IMAGE_DPI is not set.", | ||||
|                 ) | ||||
|  | ||||
|         if settings.OCR_USER_ARGS and not safe_fallback: | ||||
| @@ -241,7 +248,10 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|         sidecar_file = os.path.join(self.tempdir, "sidecar.txt") | ||||
|  | ||||
|         args = self.construct_ocrmypdf_parameters( | ||||
|             document_path, mime_type, archive_path, sidecar_file | ||||
|             document_path, | ||||
|             mime_type, | ||||
|             archive_path, | ||||
|             sidecar_file, | ||||
|         ) | ||||
|  | ||||
|         try: | ||||
| @@ -289,7 +299,8 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|                 # is bigger and blurry due to --force-ocr. | ||||
|  | ||||
|                 self.text = self.extract_text( | ||||
|                     sidecar_file_fallback, archive_path_fallback | ||||
|                     sidecar_file_fallback, | ||||
|                     archive_path_fallback, | ||||
|                 ) | ||||
|  | ||||
|             except Exception as e: | ||||
|   | ||||
| @@ -1,8 +1,8 @@ | ||||
| from unittest import mock | ||||
|  | ||||
| from django.core.checks import ERROR | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from django.test import override_settings | ||||
| from django.test import TestCase | ||||
| from paperless_tesseract import check_default_language_available | ||||
|  | ||||
|  | ||||
| @@ -16,8 +16,8 @@ class TestChecks(TestCase): | ||||
|         self.assertEqual(len(msgs), 1) | ||||
|         self.assertTrue( | ||||
|             msgs[0].msg.startswith( | ||||
|                 "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE" | ||||
|             ) | ||||
|                 "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE", | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|     @override_settings(OCR_LANGUAGE="ita") | ||||
|   | ||||
| @@ -3,11 +3,13 @@ import uuid | ||||
| from typing import ContextManager | ||||
| from unittest import mock | ||||
|  | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from documents.parsers import ParseError, run_convert | ||||
| from django.test import override_settings | ||||
| from django.test import TestCase | ||||
| from documents.parsers import ParseError | ||||
| from documents.parsers import run_convert | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from paperless_tesseract.parsers import RasterisedDocumentParser, post_process_text | ||||
| from paperless_tesseract.parsers import post_process_text | ||||
| from paperless_tesseract.parsers import RasterisedDocumentParser | ||||
|  | ||||
| image_to_string_calls = [] | ||||
|  | ||||
| @@ -56,7 +58,9 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|                 result, | ||||
|                 actual_result, | ||||
|                 "strip_exceess_whitespace({}) != '{}', but '{}'".format( | ||||
|                     source, result, actual_result | ||||
|                     source, | ||||
|                     result, | ||||
|                     actual_result, | ||||
|                 ), | ||||
|             ) | ||||
|  | ||||
| @@ -65,7 +69,8 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|     def test_get_text_from_pdf(self): | ||||
|         parser = RasterisedDocumentParser(uuid.uuid4()) | ||||
|         text = parser.extract_text( | ||||
|             None, os.path.join(self.SAMPLE_FILES, "simple-digital.pdf") | ||||
|             None, | ||||
|             os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), | ||||
|         ) | ||||
|  | ||||
|         self.assertContainsStrings(text.strip(), ["This is a test document."]) | ||||
| @@ -73,7 +78,8 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|     def test_thumbnail(self): | ||||
|         parser = RasterisedDocumentParser(uuid.uuid4()) | ||||
|         thumb = parser.get_thumbnail( | ||||
|             os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|         self.assertTrue(os.path.isfile(thumb)) | ||||
|  | ||||
| @@ -89,14 +95,16 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         parser = RasterisedDocumentParser(uuid.uuid4()) | ||||
|         thumb = parser.get_thumbnail( | ||||
|             os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|         self.assertTrue(os.path.isfile(thumb)) | ||||
|  | ||||
|     def test_thumbnail_encrypted(self): | ||||
|         parser = RasterisedDocumentParser(uuid.uuid4()) | ||||
|         thumb = parser.get_thumbnail( | ||||
|             os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|         self.assertTrue(os.path.isfile(thumb)) | ||||
|  | ||||
| @@ -113,7 +121,8 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|  | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|  | ||||
|         self.assertTrue(os.path.isfile(parser.archive_path)) | ||||
| @@ -124,7 +133,8 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|  | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "with-form.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|  | ||||
|         self.assertTrue(os.path.isfile(parser.archive_path)) | ||||
| @@ -139,7 +149,8 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|  | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "with-form.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|  | ||||
|         self.assertIsNone(parser.archive_path) | ||||
| @@ -168,7 +179,8 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|  | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|  | ||||
|         self.assertIsNone(parser.archive_path) | ||||
| @@ -178,7 +190,8 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|     def test_with_form_error_notext(self): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "with-form.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|  | ||||
|         self.assertContainsStrings( | ||||
| @@ -191,7 +204,8 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|  | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "with-form.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|  | ||||
|         self.assertContainsStrings( | ||||
| @@ -221,7 +235,7 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|  | ||||
|         dpi = parser.calculate_a4_dpi( | ||||
|             os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png") | ||||
|             os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(dpi, 62) | ||||
| @@ -233,7 +247,8 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         def f(): | ||||
|             parser.parse( | ||||
|                 os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), "image/png" | ||||
|                 os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), | ||||
|                 "image/png", | ||||
|             ) | ||||
|  | ||||
|         self.assertRaises(ParseError, f) | ||||
| @@ -247,68 +262,80 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|         self.assertTrue(os.path.isfile(parser.archive_path)) | ||||
|  | ||||
|         self.assertContainsStrings( | ||||
|             parser.get_text().lower(), ["this is a test document."] | ||||
|             parser.get_text().lower(), | ||||
|             ["this is a test document."], | ||||
|         ) | ||||
|  | ||||
|     def test_multi_page(self): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|         self.assertTrue(os.path.isfile(parser.archive_path)) | ||||
|         self.assertContainsStrings( | ||||
|             parser.get_text().lower(), ["page 1", "page 2", "page 3"] | ||||
|             parser.get_text().lower(), | ||||
|             ["page 1", "page 2", "page 3"], | ||||
|         ) | ||||
|  | ||||
|     @override_settings(OCR_PAGES=2, OCR_MODE="skip") | ||||
|     def test_multi_page_pages_skip(self): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|         self.assertTrue(os.path.isfile(parser.archive_path)) | ||||
|         self.assertContainsStrings( | ||||
|             parser.get_text().lower(), ["page 1", "page 2", "page 3"] | ||||
|             parser.get_text().lower(), | ||||
|             ["page 1", "page 2", "page 3"], | ||||
|         ) | ||||
|  | ||||
|     @override_settings(OCR_PAGES=2, OCR_MODE="redo") | ||||
|     def test_multi_page_pages_redo(self): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|         self.assertTrue(os.path.isfile(parser.archive_path)) | ||||
|         self.assertContainsStrings( | ||||
|             parser.get_text().lower(), ["page 1", "page 2", "page 3"] | ||||
|             parser.get_text().lower(), | ||||
|             ["page 1", "page 2", "page 3"], | ||||
|         ) | ||||
|  | ||||
|     @override_settings(OCR_PAGES=2, OCR_MODE="force") | ||||
|     def test_multi_page_pages_force(self): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|         self.assertTrue(os.path.isfile(parser.archive_path)) | ||||
|         self.assertContainsStrings( | ||||
|             parser.get_text().lower(), ["page 1", "page 2", "page 3"] | ||||
|             parser.get_text().lower(), | ||||
|             ["page 1", "page 2", "page 3"], | ||||
|         ) | ||||
|  | ||||
|     @override_settings(OOCR_MODE="skip") | ||||
|     def test_multi_page_analog_pages_skip(self): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|         self.assertTrue(os.path.isfile(parser.archive_path)) | ||||
|         self.assertContainsStrings( | ||||
|             parser.get_text().lower(), ["page 1", "page 2", "page 3"] | ||||
|             parser.get_text().lower(), | ||||
|             ["page 1", "page 2", "page 3"], | ||||
|         ) | ||||
|  | ||||
|     @override_settings(OCR_PAGES=2, OCR_MODE="redo") | ||||
|     def test_multi_page_analog_pages_redo(self): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|         self.assertTrue(os.path.isfile(parser.archive_path)) | ||||
|         self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"]) | ||||
| @@ -318,7 +345,8 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|     def test_multi_page_analog_pages_force(self): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|         self.assertTrue(os.path.isfile(parser.archive_path)) | ||||
|         self.assertContainsStrings(parser.get_text().lower(), ["page 1"]) | ||||
| @@ -329,29 +357,34 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|     def test_skip_noarchive_withtext(self): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|         self.assertIsNone(parser.archive_path) | ||||
|         self.assertContainsStrings( | ||||
|             parser.get_text().lower(), ["page 1", "page 2", "page 3"] | ||||
|             parser.get_text().lower(), | ||||
|             ["page 1", "page 2", "page 3"], | ||||
|         ) | ||||
|  | ||||
|     @override_settings(OCR_MODE="skip_noarchive") | ||||
|     def test_skip_noarchive_notext(self): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|         self.assertTrue(os.path.isfile(parser.archive_path)) | ||||
|         self.assertContainsStrings( | ||||
|             parser.get_text().lower(), ["page 1", "page 2", "page 3"] | ||||
|             parser.get_text().lower(), | ||||
|             ["page 1", "page 2", "page 3"], | ||||
|         ) | ||||
|  | ||||
|     @override_settings(OCR_MODE="skip") | ||||
|     def test_multi_page_mixed(self): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|         self.assertTrue(os.path.isfile(parser.archive_path)) | ||||
|         self.assertContainsStrings( | ||||
| @@ -368,11 +401,13 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|     def test_multi_page_mixed_no_archive(self): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|         parser.parse( | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), "application/pdf" | ||||
|             os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), | ||||
|             "application/pdf", | ||||
|         ) | ||||
|         self.assertIsNone(parser.archive_path) | ||||
|         self.assertContainsStrings( | ||||
|             parser.get_text().lower(), ["page 4", "page 5", "page 6"] | ||||
|             parser.get_text().lower(), | ||||
|             ["page 4", "page 5", "page 6"], | ||||
|         ) | ||||
|  | ||||
|     @override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True) | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| from django.apps import AppConfig | ||||
|  | ||||
| from paperless_text.signals import text_consumer_declaration | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,9 +1,10 @@ | ||||
| import os | ||||
|  | ||||
| from PIL import ImageDraw, ImageFont, Image | ||||
| from django.conf import settings | ||||
|  | ||||
| from documents.parsers import DocumentParser | ||||
| from PIL import Image | ||||
| from PIL import ImageDraw | ||||
| from PIL import ImageFont | ||||
|  | ||||
|  | ||||
| class TextDocumentParser(DocumentParser): | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| import os | ||||
|  | ||||
| from django.test import TestCase | ||||
|  | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from paperless_text.parsers import TextDocumentParser | ||||
|  | ||||
| @@ -13,7 +12,8 @@ class TestTextParser(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         # just make sure that it does not crash | ||||
|         f = parser.get_thumbnail( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "test.txt"), "text/plain" | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "test.txt"), | ||||
|             "text/plain", | ||||
|         ) | ||||
|         self.assertTrue(os.path.isfile(f)) | ||||
|  | ||||
| @@ -22,7 +22,8 @@ class TestTextParser(DirectoriesMixin, TestCase): | ||||
|         parser = TextDocumentParser(None) | ||||
|  | ||||
|         parser.parse( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "test.txt"), "text/plain" | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "test.txt"), | ||||
|             "text/plain", | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(parser.get_text(), "This is a test file.\n") | ||||
|   | ||||
| @@ -1,10 +1,11 @@ | ||||
| import os | ||||
| import requests | ||||
|  | ||||
| import dateutil.parser | ||||
|  | ||||
| import requests | ||||
| from django.conf import settings | ||||
|  | ||||
| from documents.parsers import DocumentParser, ParseError, make_thumbnail_from_pdf | ||||
| from documents.parsers import DocumentParser | ||||
| from documents.parsers import make_thumbnail_from_pdf | ||||
| from documents.parsers import ParseError | ||||
| from tika import parser | ||||
|  | ||||
|  | ||||
| @@ -20,7 +21,9 @@ class TikaDocumentParser(DocumentParser): | ||||
|             self.archive_path = self.convert_to_pdf(document_path, file_name) | ||||
|  | ||||
|         return make_thumbnail_from_pdf( | ||||
|             self.archive_path, self.tempdir, self.logging_group | ||||
|             self.archive_path, | ||||
|             self.tempdir, | ||||
|             self.logging_group, | ||||
|         ) | ||||
|  | ||||
|     def extract_metadata(self, document_path, mime_type): | ||||
| @@ -53,7 +56,7 @@ class TikaDocumentParser(DocumentParser): | ||||
|         except Exception as err: | ||||
|             raise ParseError( | ||||
|                 f"Could not parse {document_path} with tika server at " | ||||
|                 f"{tika_server}: {err}" | ||||
|                 f"{tika_server}: {err}", | ||||
|             ) | ||||
|  | ||||
|         self.text = parsed["content"].strip() | ||||
| @@ -74,11 +77,12 @@ class TikaDocumentParser(DocumentParser): | ||||
|         url = gotenberg_server + "/forms/libreoffice/convert" | ||||
|  | ||||
|         self.log("info", f"Converting {document_path} to PDF as {pdf_path}") | ||||
|         with open(document_path, "rb") as document_handle: | ||||
|             files = { | ||||
|                 "files": ( | ||||
|                     file_name or os.path.basename(document_path), | ||||
|                 open(document_path, "rb"), | ||||
|             ) | ||||
|                     document_handle, | ||||
|                 ), | ||||
|             } | ||||
|             headers = {} | ||||
|  | ||||
| @@ -88,7 +92,7 @@ class TikaDocumentParser(DocumentParser): | ||||
|             except Exception as err: | ||||
|                 raise ParseError(f"Error while converting document to PDF: {err}") | ||||
|  | ||||
|         file = open(pdf_path, "wb") | ||||
|         with open(pdf_path, "wb") as file: | ||||
|             file.write(response.content) | ||||
|             file.close() | ||||
|  | ||||
|   | ||||
| @@ -10,12 +10,12 @@ def tika_consumer_declaration(sender, **kwargs): | ||||
|         "weight": 10, | ||||
|         "mime_types": { | ||||
|             "application/msword": ".doc", | ||||
|             "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",  # NOQA: E501 | ||||
|             "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",  # noqa: E501 | ||||
|             "application/vnd.ms-excel": ".xls", | ||||
|             "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",  # NOQA: E501 | ||||
|             "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",  # noqa: E501 | ||||
|             "application/vnd.ms-powerpoint": ".ppt", | ||||
|             "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",  # NOQA: E501 | ||||
|             "application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx",  # NOQA: E501 | ||||
|             "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",  # noqa: E501 | ||||
|             "application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx",  # noqa: E501 | ||||
|             "application/vnd.oasis.opendocument.presentation": ".odp", | ||||
|             "application/vnd.oasis.opendocument.spreadsheet": ".ods", | ||||
|             "application/vnd.oasis.opendocument.text": ".odt", | ||||
|   | ||||
| @@ -4,9 +4,8 @@ from pathlib import Path | ||||
| from unittest import mock | ||||
|  | ||||
| from django.test import TestCase | ||||
| from requests import Response | ||||
|  | ||||
| from paperless_tika.parsers import TikaDocumentParser | ||||
| from requests import Response | ||||
|  | ||||
|  | ||||
| class TestTikaParser(TestCase): | ||||
| @@ -42,14 +41,15 @@ class TestTikaParser(TestCase): | ||||
|     @mock.patch("paperless_tika.parsers.parser.from_file") | ||||
|     def test_metadata(self, from_file): | ||||
|         from_file.return_value = { | ||||
|             "metadata": {"Creation-Date": "2020-11-21", "Some-key": "value"} | ||||
|             "metadata": {"Creation-Date": "2020-11-21", "Some-key": "value"}, | ||||
|         } | ||||
|  | ||||
|         file = os.path.join(self.parser.tempdir, "input.odt") | ||||
|         Path(file).touch() | ||||
|  | ||||
|         metadata = self.parser.extract_metadata( | ||||
|             file, "application/vnd.oasis.opendocument.text" | ||||
|             file, | ||||
|             "application/vnd.oasis.opendocument.text", | ||||
|         ) | ||||
|  | ||||
|         self.assertTrue("Creation-Date" in [m["key"] for m in metadata]) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Trenton Holmes
					Trenton Holmes