Merge pull request #278 from stumpylog/pre-commit-python-changes

Python Cleanup from pre-commit
This commit is contained in:
Quinn Casey 2022-03-12 08:09:13 -08:00 committed by GitHub
commit 168ce2111d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
95 changed files with 1640 additions and 992 deletions

View File

@ -62,6 +62,7 @@ repos:
exclude: "(migrations)|(paperless/settings.py)|(.*\\.tox)|(.*/tests/.*)" exclude: "(migrations)|(paperless/settings.py)|(.*\\.tox)|(.*/tests/.*)"
args: args:
- "--max-line-length=88" - "--max-line-length=88"
- "--ignore=E203,W503"
- repo: https://github.com/psf/black - repo: https://github.com/psf/black
rev: 22.1.0 rev: 22.1.0
hooks: hooks:

View File

@ -1,2 +1,5 @@
# this is here so that django finds the checks. # this is here so that django finds the checks.
from .checks import * from .checks import changed_password_check
from .checks import parser_check
__all__ = ["changed_password_check", "parser_check"]

View File

@ -1,13 +1,11 @@
from django.contrib import admin from django.contrib import admin
from .models import ( from .models import Correspondent
Correspondent, from .models import Document
Document, from .models import DocumentType
DocumentType, from .models import SavedView
Tag, from .models import SavedViewFilterRule
SavedView, from .models import Tag
SavedViewFilterRule,
)
class CorrespondentAdmin(admin.ModelAdmin): class CorrespondentAdmin(admin.ModelAdmin):

View File

@ -1,5 +1,4 @@
from django.apps import AppConfig from django.apps import AppConfig
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _

View File

@ -8,7 +8,10 @@ class BulkArchiveStrategy:
self.zipf = zipf self.zipf = zipf
def make_unique_filename( def make_unique_filename(
self, doc: Document, archive: bool = False, folder: str = "" self,
doc: Document,
archive: bool = False,
folder: str = "",
): ):
counter = 0 counter = 0
while True: while True:
@ -34,7 +37,8 @@ class ArchiveOnlyStrategy(BulkArchiveStrategy):
def add_document(self, doc: Document): def add_document(self, doc: Document):
if doc.has_archive_version: if doc.has_archive_version:
self.zipf.write( self.zipf.write(
doc.archive_path, self.make_unique_filename(doc, archive=True) doc.archive_path,
self.make_unique_filename(doc, archive=True),
) )
else: else:
self.zipf.write(doc.source_path, self.make_unique_filename(doc)) self.zipf.write(doc.source_path, self.make_unique_filename(doc))
@ -49,5 +53,6 @@ class OriginalAndArchiveStrategy(BulkArchiveStrategy):
) )
self.zipf.write( self.zipf.write(
doc.source_path, self.make_unique_filename(doc, folder="originals/") doc.source_path,
self.make_unique_filename(doc, folder="originals/"),
) )

View File

@ -2,8 +2,9 @@ import itertools
from django.db.models import Q from django.db.models import Q
from django_q.tasks import async_task from django_q.tasks import async_task
from documents.models import Correspondent
from documents.models import Document, Correspondent, DocumentType from documents.models import Document
from documents.models import DocumentType
def set_correspondent(doc_ids, correspondent): def set_correspondent(doc_ids, correspondent):
@ -40,7 +41,7 @@ def add_tag(doc_ids, tag):
DocumentTagRelationship = Document.tags.through DocumentTagRelationship = Document.tags.through
DocumentTagRelationship.objects.bulk_create( DocumentTagRelationship.objects.bulk_create(
[DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs] [DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs],
) )
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs) async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
@ -56,7 +57,7 @@ def remove_tag(doc_ids, tag):
DocumentTagRelationship = Document.tags.through DocumentTagRelationship = Document.tags.through
DocumentTagRelationship.objects.filter( DocumentTagRelationship.objects.filter(
Q(document_id__in=affected_docs) & Q(tag_id=tag) Q(document_id__in=affected_docs) & Q(tag_id=tag),
).delete() ).delete()
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs) async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)

View File

@ -1,10 +1,11 @@
import textwrap import textwrap
from django.conf import settings from django.conf import settings
from django.core.checks import Error, register from django.core.checks import Error
from django.core.checks import register
from django.core.exceptions import FieldError from django.core.exceptions import FieldError
from django.db.utils import OperationalError, ProgrammingError from django.db.utils import OperationalError
from django.db.utils import ProgrammingError
from documents.signals import document_consumer_declaration from documents.signals import document_consumer_declaration
@ -16,7 +17,7 @@ def changed_password_check(app_configs, **kwargs):
try: try:
encrypted_doc = Document.objects.filter( encrypted_doc = Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG storage_type=Document.STORAGE_TYPE_GPG,
).first() ).first()
except (OperationalError, ProgrammingError, FieldError): except (OperationalError, ProgrammingError, FieldError):
return [] # No documents table yet return [] # No documents table yet
@ -27,8 +28,8 @@ def changed_password_check(app_configs, **kwargs):
return [ return [
Error( Error(
"The database contains encrypted documents but no password " "The database contains encrypted documents but no password "
"is set." "is set.",
) ),
] ]
if not GnuPG.decrypted(encrypted_doc.source_file): if not GnuPG.decrypted(encrypted_doc.source_file):
@ -42,9 +43,9 @@ def changed_password_check(app_configs, **kwargs):
If you intend to change your password, you must first export If you intend to change your password, you must first export
all of the old documents, start fresh with the new password all of the old documents, start fresh with the new password
and then re-import them." and then re-import them."
""" """,
) ),
) ),
] ]
return [] return []
@ -61,8 +62,8 @@ def parser_check(app_configs, **kwargs):
return [ return [
Error( Error(
"No parsers found. This is a bug. The consumer won't be " "No parsers found. This is a bug. The consumer won't be "
"able to consume any documents without parsers." "able to consume any documents without parsers.",
) ),
] ]
else: else:
return [] return []

View File

@ -6,8 +6,8 @@ import re
import shutil import shutil
from django.conf import settings from django.conf import settings
from documents.models import Document
from documents.models import Document, MatchingModel from documents.models import MatchingModel
class IncompatibleClassifierVersionError(Exception): class IncompatibleClassifierVersionError(Exception):
@ -30,8 +30,8 @@ def preprocess_content(content):
def load_classifier(): def load_classifier():
if not os.path.isfile(settings.MODEL_FILE): if not os.path.isfile(settings.MODEL_FILE):
logger.debug( logger.debug(
f"Document classification model does not exist (yet), not " "Document classification model does not exist (yet), not "
f"performing automatic matching." "performing automatic matching.",
) )
return None return None
@ -42,16 +42,16 @@ def load_classifier():
except (ClassifierModelCorruptError, IncompatibleClassifierVersionError): except (ClassifierModelCorruptError, IncompatibleClassifierVersionError):
# there's something wrong with the model file. # there's something wrong with the model file.
logger.exception( logger.exception(
f"Unrecoverable error while loading document " "Unrecoverable error while loading document "
f"classification model, deleting model file." "classification model, deleting model file.",
) )
os.unlink(settings.MODEL_FILE) os.unlink(settings.MODEL_FILE)
classifier = None classifier = None
except OSError: except OSError:
logger.exception(f"IO error while loading document classification model") logger.exception("IO error while loading document classification model")
classifier = None classifier = None
except Exception: except Exception:
logger.exception(f"Unknown error while loading document classification model") logger.exception("Unknown error while loading document classification model")
classifier = None classifier = None
return classifier return classifier
@ -78,7 +78,7 @@ class DocumentClassifier(object):
if schema_version != self.FORMAT_VERSION: if schema_version != self.FORMAT_VERSION:
raise IncompatibleClassifierVersionError( raise IncompatibleClassifierVersionError(
"Cannor load classifier, incompatible versions." "Cannor load classifier, incompatible versions.",
) )
else: else:
try: try:
@ -122,8 +122,8 @@ class DocumentClassifier(object):
logger.debug("Gathering data from database...") logger.debug("Gathering data from database...")
m = hashlib.sha1() m = hashlib.sha1()
for doc in Document.objects.order_by("pk").exclude( for doc in Document.objects.order_by("pk").exclude(
tags__is_inbox_tag=True tags__is_inbox_tag=True,
): # NOQA: E501 ):
preprocessed_content = preprocess_content(doc.content) preprocessed_content = preprocess_content(doc.content)
m.update(preprocessed_content.encode("utf-8")) m.update(preprocessed_content.encode("utf-8"))
data.append(preprocessed_content) data.append(preprocessed_content)
@ -146,9 +146,9 @@ class DocumentClassifier(object):
[ [
tag.pk tag.pk
for tag in doc.tags.filter( for tag in doc.tags.filter(
matching_algorithm=MatchingModel.MATCH_AUTO matching_algorithm=MatchingModel.MATCH_AUTO,
) )
] ],
) )
for tag in tags: for tag in tags:
m.update(tag.to_bytes(4, "little", signed=True)) m.update(tag.to_bytes(4, "little", signed=True))
@ -177,8 +177,11 @@ class DocumentClassifier(object):
logger.debug( logger.debug(
"{} documents, {} tag(s), {} correspondent(s), " "{} documents, {} tag(s), {} correspondent(s), "
"{} document type(s).".format( "{} document type(s).".format(
len(data), num_tags, num_correspondents, num_document_types len(data),
) num_tags,
num_correspondents,
num_document_types,
),
) )
from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import CountVectorizer
@ -188,7 +191,9 @@ class DocumentClassifier(object):
# Step 2: vectorize data # Step 2: vectorize data
logger.debug("Vectorizing data...") logger.debug("Vectorizing data...")
self.data_vectorizer = CountVectorizer( self.data_vectorizer = CountVectorizer(
analyzer="word", ngram_range=(1, 2), min_df=0.01 analyzer="word",
ngram_range=(1, 2),
min_df=0.01,
) )
data_vectorized = self.data_vectorizer.fit_transform(data) data_vectorized = self.data_vectorizer.fit_transform(data)
@ -204,7 +209,7 @@ class DocumentClassifier(object):
] ]
self.tags_binarizer = LabelBinarizer() self.tags_binarizer = LabelBinarizer()
labels_tags_vectorized = self.tags_binarizer.fit_transform( labels_tags_vectorized = self.tags_binarizer.fit_transform(
labels_tags labels_tags,
).ravel() ).ravel()
else: else:
self.tags_binarizer = MultiLabelBinarizer() self.tags_binarizer = MultiLabelBinarizer()
@ -223,7 +228,8 @@ class DocumentClassifier(object):
else: else:
self.correspondent_classifier = None self.correspondent_classifier = None
logger.debug( logger.debug(
"There are no correspondents. Not training correspondent " "classifier." "There are no correspondents. Not training correspondent "
"classifier.",
) )
if num_document_types > 0: if num_document_types > 0:
@ -233,7 +239,8 @@ class DocumentClassifier(object):
else: else:
self.document_type_classifier = None self.document_type_classifier = None
logger.debug( logger.debug(
"There are no document types. Not training document type " "classifier." "There are no document types. Not training document type "
"classifier.",
) )
self.data_hash = new_data_hash self.data_hash = new_data_hash

View File

@ -15,11 +15,19 @@ from filelock import FileLock
from rest_framework.reverse import reverse from rest_framework.reverse import reverse
from .classifier import load_classifier from .classifier import load_classifier
from .file_handling import create_source_path_directory, generate_unique_filename from .file_handling import create_source_path_directory
from .file_handling import generate_unique_filename
from .loggers import LoggingMixin from .loggers import LoggingMixin
from .models import Document, FileInfo, Correspondent, DocumentType, Tag from .models import Correspondent
from .parsers import ParseError, get_parser_class_for_mime_type, parse_date from .models import Document
from .signals import document_consumption_finished, document_consumption_started from .models import DocumentType
from .models import FileInfo
from .models import Tag
from .parsers import get_parser_class_for_mime_type
from .parsers import parse_date
from .parsers import ParseError
from .signals import document_consumption_finished
from .signals import document_consumption_started
class ConsumerError(Exception): class ConsumerError(Exception):
@ -46,12 +54,15 @@ class Consumer(LoggingMixin):
logging_name = "paperless.consumer" logging_name = "paperless.consumer"
def _send_progress( def _send_progress(
self, current_progress, max_progress, status, message=None, document_id=None self,
current_progress,
max_progress,
status,
message=None,
document_id=None,
): ):
payload = { payload = {
"filename": os.path.basename(self.filename) "filename": os.path.basename(self.filename) if self.filename else None,
if self.filename
else None, # NOQA: E501
"task_id": self.task_id, "task_id": self.task_id,
"current_progress": current_progress, "current_progress": current_progress,
"max_progress": max_progress, "max_progress": max_progress,
@ -60,7 +71,8 @@ class Consumer(LoggingMixin):
"document_id": document_id, "document_id": document_id,
} }
async_to_sync(self.channel_layer.group_send)( async_to_sync(self.channel_layer.group_send)(
"status_updates", {"type": "status_update", "data": payload} "status_updates",
{"type": "status_update", "data": payload},
) )
def _fail(self, message, log_message=None, exc_info=None): def _fail(self, message, log_message=None, exc_info=None):
@ -83,15 +95,16 @@ class Consumer(LoggingMixin):
def pre_check_file_exists(self): def pre_check_file_exists(self):
if not os.path.isfile(self.path): if not os.path.isfile(self.path):
self._fail( self._fail(
MESSAGE_FILE_NOT_FOUND, f"Cannot consume {self.path}: File not found." MESSAGE_FILE_NOT_FOUND,
f"Cannot consume {self.path}: File not found.",
) )
def pre_check_duplicate(self): def pre_check_duplicate(self):
with open(self.path, "rb") as f: with open(self.path, "rb") as f:
checksum = hashlib.md5(f.read()).hexdigest() checksum = hashlib.md5(f.read()).hexdigest()
if Document.objects.filter( if Document.objects.filter(
Q(checksum=checksum) | Q(archive_checksum=checksum) Q(checksum=checksum) | Q(archive_checksum=checksum),
).exists(): # NOQA: E501 ).exists():
if settings.CONSUMER_DELETE_DUPLICATES: if settings.CONSUMER_DELETE_DUPLICATES:
os.unlink(self.path) os.unlink(self.path)
self._fail( self._fail(
@ -139,7 +152,8 @@ class Consumer(LoggingMixin):
) )
self.log( self.log(
"info", f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}" "info",
f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}",
) )
try: try:
@ -154,7 +168,7 @@ class Consumer(LoggingMixin):
reverse("document-thumb", kwargs={"pk": document.pk}), reverse("document-thumb", kwargs={"pk": document.pk}),
str(document.correspondent), str(document.correspondent),
str(",".join(document.tags.all().values_list("name", flat=True))), str(",".join(document.tags.all().values_list("name", flat=True))),
) ),
).wait() ).wait()
except Exception as e: except Exception as e:
self._fail( self._fail(
@ -213,7 +227,9 @@ class Consumer(LoggingMixin):
# Notify all listeners that we're going to do some work. # Notify all listeners that we're going to do some work.
document_consumption_started.send( document_consumption_started.send(
sender=self.__class__, filename=self.path, logging_group=self.logging_group sender=self.__class__,
filename=self.path,
logging_group=self.logging_group,
) )
self.run_pre_consume_script() self.run_pre_consume_script()
@ -247,7 +263,9 @@ class Consumer(LoggingMixin):
self.log("debug", f"Generating thumbnail for {self.filename}...") self.log("debug", f"Generating thumbnail for {self.filename}...")
self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL) self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
thumbnail = document_parser.get_optimised_thumbnail( thumbnail = document_parser.get_optimised_thumbnail(
self.path, mime_type, self.filename self.path,
mime_type,
self.filename,
) )
text = document_parser.get_text() text = document_parser.get_text()
@ -301,21 +319,26 @@ class Consumer(LoggingMixin):
self._write(document.storage_type, self.path, document.source_path) self._write(document.storage_type, self.path, document.source_path)
self._write( self._write(
document.storage_type, thumbnail, document.thumbnail_path document.storage_type,
thumbnail,
document.thumbnail_path,
) )
if archive_path and os.path.isfile(archive_path): if archive_path and os.path.isfile(archive_path):
document.archive_filename = generate_unique_filename( document.archive_filename = generate_unique_filename(
document, archive_filename=True document,
archive_filename=True,
) )
create_source_path_directory(document.archive_path) create_source_path_directory(document.archive_path)
self._write( self._write(
document.storage_type, archive_path, document.archive_path document.storage_type,
archive_path,
document.archive_path,
) )
with open(archive_path, "rb") as f: with open(archive_path, "rb") as f:
document.archive_checksum = hashlib.md5( document.archive_checksum = hashlib.md5(
f.read() f.read(),
).hexdigest() ).hexdigest()
# Don't save with the lock active. Saving will cause the file # Don't save with the lock active. Saving will cause the file
@ -328,7 +351,8 @@ class Consumer(LoggingMixin):
# https://github.com/jonaswinkler/paperless-ng/discussions/1037 # https://github.com/jonaswinkler/paperless-ng/discussions/1037
shadow_file = os.path.join( shadow_file = os.path.join(
os.path.dirname(self.path), "._" + os.path.basename(self.path) os.path.dirname(self.path),
"._" + os.path.basename(self.path),
) )
if os.path.isfile(shadow_file): if os.path.isfile(shadow_file):
@ -390,12 +414,12 @@ class Consumer(LoggingMixin):
def apply_overrides(self, document): def apply_overrides(self, document):
if self.override_correspondent_id: if self.override_correspondent_id:
document.correspondent = Correspondent.objects.get( document.correspondent = Correspondent.objects.get(
pk=self.override_correspondent_id pk=self.override_correspondent_id,
) )
if self.override_document_type_id: if self.override_document_type_id:
document.document_type = DocumentType.objects.get( document.document_type = DocumentType.objects.get(
pk=self.override_document_type_id pk=self.override_document_type_id,
) )
if self.override_tag_ids: if self.override_tag_ids:

View File

@ -103,15 +103,17 @@ def generate_unique_filename(doc, archive_filename=False):
if archive_filename and doc.filename: if archive_filename and doc.filename:
new_filename = os.path.splitext(doc.filename)[0] + ".pdf" new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
if new_filename == old_filename or not os.path.exists( if new_filename == old_filename or not os.path.exists(
os.path.join(root, new_filename) os.path.join(root, new_filename),
): # NOQA: E501 ):
return new_filename return new_filename
counter = 0 counter = 0
while True: while True:
new_filename = generate_filename( new_filename = generate_filename(
doc, counter, archive_filename=archive_filename doc,
counter,
archive_filename=archive_filename,
) )
if new_filename == old_filename: if new_filename == old_filename:
# still the same as before. # still the same as before.
@ -137,14 +139,16 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
if doc.correspondent: if doc.correspondent:
correspondent = pathvalidate.sanitize_filename( correspondent = pathvalidate.sanitize_filename(
doc.correspondent.name, replacement_text="-" doc.correspondent.name,
replacement_text="-",
) )
else: else:
correspondent = "none" correspondent = "none"
if doc.document_type: if doc.document_type:
document_type = pathvalidate.sanitize_filename( document_type = pathvalidate.sanitize_filename(
doc.document_type.name, replacement_text="-" doc.document_type.name,
replacement_text="-",
) )
else: else:
document_type = "none" document_type = "none"
@ -160,9 +164,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
document_type=document_type, document_type=document_type,
created=datetime.date.isoformat(doc.created), created=datetime.date.isoformat(doc.created),
created_year=doc.created.year if doc.created else "none", created_year=doc.created.year if doc.created else "none",
created_month=f"{doc.created.month:02}" created_month=f"{doc.created.month:02}" if doc.created else "none",
if doc.created
else "none", # NOQA: E501
created_day=f"{doc.created.day:02}" if doc.created else "none", created_day=f"{doc.created.day:02}" if doc.created else "none",
added=datetime.date.isoformat(doc.added), added=datetime.date.isoformat(doc.added),
added_year=doc.added.year if doc.added else "none", added_year=doc.added.year if doc.added else "none",
@ -178,7 +180,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
except (ValueError, KeyError, IndexError): except (ValueError, KeyError, IndexError):
logger.warning( logger.warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: " f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default" f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default",
) )
counter_str = f"_{counter:02}" if counter else "" counter_str = f"_{counter:02}" if counter else ""

View File

@ -1,7 +1,13 @@
from django.db.models import Q from django.db.models import Q
from django_filters.rest_framework import BooleanFilter, FilterSet, Filter from django_filters.rest_framework import BooleanFilter
from django_filters.rest_framework import Filter
from django_filters.rest_framework import FilterSet
from .models import Correspondent, Document, Tag, DocumentType, Log from .models import Correspondent
from .models import Document
from .models import DocumentType
from .models import Log
from .models import Tag
CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"] CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"]
ID_KWARGS = ["in", "exact"] ID_KWARGS = ["in", "exact"]
@ -75,7 +81,10 @@ class TitleContentFilter(Filter):
class DocumentFilterSet(FilterSet): class DocumentFilterSet(FilterSet):
is_tagged = BooleanFilter( is_tagged = BooleanFilter(
label="Is tagged", field_name="tags", lookup_expr="isnull", exclude=True label="Is tagged",
field_name="tags",
lookup_expr="isnull",
exclude=True,
) )
tags__id__all = TagsFilter() tags__id__all = TagsFilter()

View File

@ -1,21 +1,30 @@
import logging import logging
import math
import os import os
from contextlib import contextmanager from contextlib import contextmanager
import math
from dateutil.parser import isoparse from dateutil.parser import isoparse
from django.conf import settings from django.conf import settings
from whoosh import highlight, classify, query from documents.models import Document
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME, BOOLEAN from whoosh import classify
from whoosh import highlight
from whoosh import query
from whoosh.fields import BOOLEAN
from whoosh.fields import DATETIME
from whoosh.fields import KEYWORD
from whoosh.fields import NUMERIC
from whoosh.fields import Schema
from whoosh.fields import TEXT
from whoosh.highlight import HtmlFormatter from whoosh.highlight import HtmlFormatter
from whoosh.index import create_in, exists_in, open_dir from whoosh.index import create_in
from whoosh.index import exists_in
from whoosh.index import open_dir
from whoosh.qparser import MultifieldParser from whoosh.qparser import MultifieldParser
from whoosh.qparser.dateparse import DateParserPlugin from whoosh.qparser.dateparse import DateParserPlugin
from whoosh.searching import ResultsPage, Searcher from whoosh.searching import ResultsPage
from whoosh.searching import Searcher
from whoosh.writing import AsyncWriter from whoosh.writing import AsyncWriter
from documents.models import Document
logger = logging.getLogger("paperless.index") logger = logging.getLogger("paperless.index")
@ -45,7 +54,7 @@ def open_index(recreate=False):
if exists_in(settings.INDEX_DIR) and not recreate: if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema()) return open_dir(settings.INDEX_DIR, schema=get_schema())
except Exception: except Exception:
logger.exception(f"Error while opening the index, recreating.") logger.exception("Error while opening the index, recreating.")
if not os.path.isdir(settings.INDEX_DIR): if not os.path.isdir(settings.INDEX_DIR):
os.makedirs(settings.INDEX_DIR, exist_ok=True) os.makedirs(settings.INDEX_DIR, exist_ok=True)
@ -138,11 +147,11 @@ class DelayedQuery:
criterias.append(query.Term("has_type", v == "false")) criterias.append(query.Term("has_type", v == "false"))
elif k == "created__date__lt": elif k == "created__date__lt":
criterias.append( criterias.append(
query.DateRange("created", start=None, end=isoparse(v)) query.DateRange("created", start=None, end=isoparse(v)),
) )
elif k == "created__date__gt": elif k == "created__date__gt":
criterias.append( criterias.append(
query.DateRange("created", start=isoparse(v), end=None) query.DateRange("created", start=isoparse(v), end=None),
) )
elif k == "added__date__gt": elif k == "added__date__gt":
criterias.append(query.DateRange("added", start=isoparse(v), end=None)) criterias.append(query.DateRange("added", start=isoparse(v), end=None))
@ -220,7 +229,7 @@ class DelayedQuery:
hit[1], hit[1],
), ),
page.results.top_n, page.results.top_n,
) ),
) )
self.saved_results[item.start] = page self.saved_results[item.start] = page
@ -240,7 +249,7 @@ class DelayedFullTextQuery(DelayedQuery):
corrected = self.searcher.correct_query(q, q_str) corrected = self.searcher.correct_query(q, q_str)
if corrected.query != q: if corrected.query != q:
corrected_query = corrected.string corrected.query = corrected.string
return q, None return q, None
@ -252,10 +261,14 @@ class DelayedMoreLikeThisQuery(DelayedQuery):
docnum = self.searcher.document_number(id=more_like_doc_id) docnum = self.searcher.document_number(id=more_like_doc_id)
kts = self.searcher.key_terms_from_text( kts = self.searcher.key_terms_from_text(
"content", content, numterms=20, model=classify.Bo1Model, normalize=False "content",
content,
numterms=20,
model=classify.Bo1Model,
normalize=False,
) )
q = query.Or( q = query.Or(
[query.Term("content", word, boost=weight) for word, weight in kts] [query.Term("content", word, boost=weight) for word, weight in kts],
) )
mask = {docnum} mask = {docnum}
@ -266,7 +279,9 @@ def autocomplete(ix, term, limit=10):
with ix.reader() as reader: with ix.reader() as reader:
terms = [] terms = []
for (score, t) in reader.most_distinctive_terms( for (score, t) in reader.most_distinctive_terms(
"content", number=limit, prefix=term.lower() "content",
number=limit,
prefix=term.lower(),
): ):
terms.append(t) terms.append(t)
return terms return terms

View File

@ -1,8 +1,6 @@
import logging import logging
import uuid import uuid
from django.conf import settings
class LoggingMixin: class LoggingMixin:

View File

@ -1,8 +1,8 @@
import os import os
from django.conf import settings from django.conf import settings
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from documents.models import Document from documents.models import Document
from paperless.db import GnuPG from paperless.db import GnuPG
@ -31,9 +31,9 @@ class Command(BaseCommand):
"this unless you've got a recent backup\nWARNING: handy. It " "this unless you've got a recent backup\nWARNING: handy. It "
"*should* work without a hitch, but be safe and backup your\n" "*should* work without a hitch, but be safe and backup your\n"
"WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to " "WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to "
"continue.\n\n" "continue.\n\n",
) )
__ = input() _ = input()
except KeyboardInterrupt: except KeyboardInterrupt:
return return
@ -41,7 +41,7 @@ class Command(BaseCommand):
if not passphrase: if not passphrase:
raise CommandError( raise CommandError(
"Passphrase not defined. Please set it with --passphrase or " "Passphrase not defined. Please set it with --passphrase or "
"by declaring it in your environment or your config." "by declaring it in your environment or your config.",
) )
self.__gpg_to_unencrypted(passphrase) self.__gpg_to_unencrypted(passphrase)
@ -50,7 +50,7 @@ class Command(BaseCommand):
def __gpg_to_unencrypted(passphrase): def __gpg_to_unencrypted(passphrase):
encrypted_files = Document.objects.filter( encrypted_files = Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG storage_type=Document.STORAGE_TYPE_GPG,
) )
for document in encrypted_files: for document in encrypted_files:
@ -71,7 +71,7 @@ class Command(BaseCommand):
if not ext == ".gpg": if not ext == ".gpg":
raise CommandError( raise CommandError(
f"Abort: encrypted file {document.source_path} does not " f"Abort: encrypted file {document.source_path} does not "
f"end with .gpg" f"end with .gpg",
) )
document.filename = os.path.splitext(document.filename)[0] document.filename = os.path.splitext(document.filename)[0]
@ -83,7 +83,8 @@ class Command(BaseCommand):
f.write(raw_thumb) f.write(raw_thumb)
Document.objects.filter(id=document.id).update( Document.objects.filter(id=document.id).update(
storage_type=document.storage_type, filename=document.filename storage_type=document.storage_type,
filename=document.filename,
) )
for path in old_paths: for path in old_paths:

View File

@ -1,7 +1,6 @@
import hashlib import hashlib
import multiprocessing
import logging import logging
import multiprocessing
import os import os
import shutil import shutil
import uuid import uuid
@ -11,12 +10,12 @@ from django import db
from django.conf import settings from django.conf import settings
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db import transaction from django.db import transaction
from filelock import FileLock
from whoosh.writing import AsyncWriter
from documents.models import Document from documents.models import Document
from filelock import FileLock
from ... import index from ... import index
from ...file_handling import create_source_path_directory, generate_unique_filename from ...file_handling import create_source_path_directory
from ...file_handling import generate_unique_filename
from ...parsers import get_parser_class_for_mime_type from ...parsers import get_parser_class_for_mime_type
@ -33,7 +32,7 @@ def handle_document(document_id):
if not parser_class: if not parser_class:
logger.error( logger.error(
f"No parser found for mime type {mime_type}, cannot " f"No parser found for mime type {mime_type}, cannot "
f"archive document {document} (ID: {document_id})" f"archive document {document} (ID: {document_id})",
) )
return return
@ -43,7 +42,9 @@ def handle_document(document_id):
parser.parse(document.source_path, mime_type, document.get_public_filename()) parser.parse(document.source_path, mime_type, document.get_public_filename())
thumbnail = parser.get_optimised_thumbnail( thumbnail = parser.get_optimised_thumbnail(
document.source_path, mime_type, document.get_public_filename() document.source_path,
mime_type,
document.get_public_filename(),
) )
if parser.get_archive_path(): if parser.get_archive_path():
@ -55,7 +56,8 @@ def handle_document(document_id):
# We also don't use save() since that triggers the filehandling # We also don't use save() since that triggers the filehandling
# logic, and we don't want that yet (file not yet in place) # logic, and we don't want that yet (file not yet in place)
document.archive_filename = generate_unique_filename( document.archive_filename = generate_unique_filename(
document, archive_filename=True document,
archive_filename=True,
) )
Document.objects.filter(pk=document.pk).update( Document.objects.filter(pk=document.pk).update(
archive_checksum=checksum, archive_checksum=checksum,
@ -70,9 +72,9 @@ def handle_document(document_id):
with index.open_index_writer() as writer: with index.open_index_writer() as writer:
index.update_document(writer, document) index.update_document(writer, document)
except Exception as e: except Exception:
logger.exception( logger.exception(
f"Error while parsing document {document} " f"(ID: {document_id})" f"Error while parsing document {document} " f"(ID: {document_id})",
) )
finally: finally:
parser.cleanup() parser.cleanup()
@ -86,7 +88,8 @@ class Command(BaseCommand):
back-tag all previously indexed documents with metadata created (or back-tag all previously indexed documents with metadata created (or
modified) after their initial import. modified) after their initial import.
""".replace( """.replace(
" ", "" " ",
"",
) )
def add_arguments(self, parser): def add_arguments(self, parser):
@ -129,7 +132,7 @@ class Command(BaseCommand):
map( map(
lambda doc: doc.id, lambda doc: doc.id,
filter(lambda d: overwrite or not d.has_archive_version, documents), filter(lambda d: overwrite or not d.has_archive_version, documents),
) ),
) )
# Note to future self: this prevents django from reusing database # Note to future self: this prevents django from reusing database
@ -146,7 +149,7 @@ class Command(BaseCommand):
pool.imap_unordered(handle_document, document_ids), pool.imap_unordered(handle_document, document_ids),
total=len(document_ids), total=len(document_ids),
disable=options["no_progress_bar"], disable=options["no_progress_bar"],
) ),
) )
except KeyboardInterrupt: except KeyboardInterrupt:
print("Aborting...") print("Aborting...")

View File

@ -1,17 +1,18 @@
import logging import logging
import os import os
from pathlib import Path, PurePath from pathlib import Path
from pathlib import PurePath
from threading import Thread from threading import Thread
from time import sleep from time import sleep
from django.conf import settings from django.conf import settings
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django_q.tasks import async_task from django_q.tasks import async_task
from watchdog.events import FileSystemEventHandler
from watchdog.observers.polling import PollingObserver
from documents.models import Tag from documents.models import Tag
from documents.parsers import is_file_ext_supported from documents.parsers import is_file_ext_supported
from watchdog.events import FileSystemEventHandler
from watchdog.observers.polling import PollingObserver
try: try:
from inotifyrecursive import INotify, flags from inotifyrecursive import INotify, flags
@ -29,7 +30,7 @@ def _tags_from_path(filepath):
path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts
for part in path_parts: for part in path_parts:
tag_ids.add( tag_ids.add(
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk,
) )
return tag_ids return tag_ids
@ -56,7 +57,7 @@ def _consume(filepath):
try: try:
if settings.CONSUMER_SUBDIRS_AS_TAGS: if settings.CONSUMER_SUBDIRS_AS_TAGS:
tag_ids = _tags_from_path(filepath) tag_ids = _tags_from_path(filepath)
except Exception as e: except Exception:
logger.exception("Error creating tags from path") logger.exception("Error creating tags from path")
try: try:
@ -67,7 +68,7 @@ def _consume(filepath):
override_tag_ids=tag_ids if tag_ids else None, override_tag_ids=tag_ids if tag_ids else None,
task_name=os.path.basename(filepath)[:100], task_name=os.path.basename(filepath)[:100],
) )
except Exception as e: except Exception:
# Catch all so that the consumer won't crash. # Catch all so that the consumer won't crash.
# This is also what the test case is listening for to check for # This is also what the test case is listening for to check for
# errors. # errors.
@ -86,7 +87,7 @@ def _consume_wait_unmodified(file):
new_mtime = os.stat(file).st_mtime new_mtime = os.stat(file).st_mtime
except FileNotFoundError: except FileNotFoundError:
logger.debug( logger.debug(
f"File {file} moved while waiting for it to remain " f"unmodified." f"File {file} moved while waiting for it to remain " f"unmodified.",
) )
return return
if new_mtime == mtime: if new_mtime == mtime:

View File

@ -9,7 +9,8 @@ class Command(BaseCommand):
Trains the classifier on your data and saves the resulting models to a Trains the classifier on your data and saves the resulting models to a
file. The document consumer will then automatically use this new model. file. The document consumer will then automatically use this new model.
""".replace( """.replace(
" ", "" " ",
"",
) )
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):

View File

@ -6,28 +6,28 @@ import time
import tqdm import tqdm
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User, Group from django.contrib.auth.models import Group
from django.contrib.auth.models import User
from django.core import serializers from django.core import serializers
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.db import transaction from django.db import transaction
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import SavedView
from documents.models import SavedViewFilterRule
from documents.models import Tag
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from filelock import FileLock from filelock import FileLock
from documents.models import (
Document,
Correspondent,
Tag,
DocumentType,
SavedView,
SavedViewFilterRule,
)
from documents.settings import (
EXPORTER_FILE_NAME,
EXPORTER_THUMBNAIL_NAME,
EXPORTER_ARCHIVE_NAME,
)
from paperless.db import GnuPG from paperless.db import GnuPG
from paperless_mail.models import MailAccount, MailRule from paperless_mail.models import MailAccount
from ...file_handling import generate_filename, delete_empty_directories from paperless_mail.models import MailRule
from ...file_handling import delete_empty_directories
from ...file_handling import generate_filename
class Command(BaseCommand): class Command(BaseCommand):
@ -37,7 +37,8 @@ class Command(BaseCommand):
directory. And include a manifest file containing document data for directory. And include a manifest file containing document data for
easy import. easy import.
""".replace( """.replace(
" ", "" " ",
"",
) )
def add_arguments(self, parser): def add_arguments(self, parser):
@ -107,20 +108,20 @@ class Command(BaseCommand):
# 1. Take a snapshot of what files exist in the current export folder # 1. Take a snapshot of what files exist in the current export folder
for root, dirs, files in os.walk(self.target): for root, dirs, files in os.walk(self.target):
self.files_in_export_dir.extend( self.files_in_export_dir.extend(
map(lambda f: os.path.abspath(os.path.join(root, f)), files) map(lambda f: os.path.abspath(os.path.join(root, f)), files),
) )
# 2. Create manifest, containing all correspondents, types, tags and # 2. Create manifest, containing all correspondents, types, tags and
# documents # documents
with transaction.atomic(): with transaction.atomic():
manifest = json.loads( manifest = json.loads(
serializers.serialize("json", Correspondent.objects.all()) serializers.serialize("json", Correspondent.objects.all()),
) )
manifest += json.loads(serializers.serialize("json", Tag.objects.all())) manifest += json.loads(serializers.serialize("json", Tag.objects.all()))
manifest += json.loads( manifest += json.loads(
serializers.serialize("json", DocumentType.objects.all()) serializers.serialize("json", DocumentType.objects.all()),
) )
documents = Document.objects.order_by("id") documents = Document.objects.order_by("id")
@ -129,19 +130,19 @@ class Command(BaseCommand):
manifest += document_manifest manifest += document_manifest
manifest += json.loads( manifest += json.loads(
serializers.serialize("json", MailAccount.objects.all()) serializers.serialize("json", MailAccount.objects.all()),
) )
manifest += json.loads( manifest += json.loads(
serializers.serialize("json", MailRule.objects.all()) serializers.serialize("json", MailRule.objects.all()),
) )
manifest += json.loads( manifest += json.loads(
serializers.serialize("json", SavedView.objects.all()) serializers.serialize("json", SavedView.objects.all()),
) )
manifest += json.loads( manifest += json.loads(
serializers.serialize("json", SavedViewFilterRule.objects.all()) serializers.serialize("json", SavedViewFilterRule.objects.all()),
) )
manifest += json.loads(serializers.serialize("json", Group.objects.all())) manifest += json.loads(serializers.serialize("json", Group.objects.all()))
@ -155,9 +156,7 @@ class Command(BaseCommand):
disable=progress_bar_disable, disable=progress_bar_disable,
): ):
# 3.1. store files unencrypted # 3.1. store files unencrypted
document_dict["fields"][ document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED
"storage_type"
] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
document = document_map[document_dict["pk"]] document = document_map[document_dict["pk"]]
@ -166,7 +165,9 @@ class Command(BaseCommand):
while True: while True:
if self.use_filename_format: if self.use_filename_format:
base_name = generate_filename( base_name = generate_filename(
document, counter=filename_counter, append_gpg=False document,
counter=filename_counter,
append_gpg=False,
) )
else: else:
base_name = document.get_public_filename(counter=filename_counter) base_name = document.get_public_filename(counter=filename_counter)
@ -217,14 +218,18 @@ class Command(BaseCommand):
os.utime(archive_target, times=(t, t)) os.utime(archive_target, times=(t, t))
else: else:
self.check_and_copy( self.check_and_copy(
document.source_path, document.checksum, original_target document.source_path,
document.checksum,
original_target,
) )
self.check_and_copy(document.thumbnail_path, None, thumbnail_target) self.check_and_copy(document.thumbnail_path, None, thumbnail_target)
if archive_target: if archive_target:
self.check_and_copy( self.check_and_copy(
document.archive_path, document.archive_checksum, archive_target document.archive_path,
document.archive_checksum,
archive_target,
) )
# 4. write manifest to target forlder # 4. write manifest to target forlder
@ -243,7 +248,8 @@ class Command(BaseCommand):
os.remove(f) os.remove(f)
delete_empty_directories( delete_empty_directories(
os.path.abspath(os.path.dirname(f)), os.path.abspath(self.target) os.path.abspath(os.path.dirname(f)),
os.path.abspath(self.target),
) )
def check_and_copy(self, source, source_checksum, target): def check_and_copy(self, source, source_checksum, target):

View File

@ -7,16 +7,16 @@ from contextlib import contextmanager
import tqdm import tqdm
from django.conf import settings from django.conf import settings
from django.core.management import call_command from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand
from django.db.models.signals import post_save, m2m_changed from django.core.management.base import CommandError
from django.db.models.signals import m2m_changed
from django.db.models.signals import post_save
from documents.models import Document
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from filelock import FileLock from filelock import FileLock
from documents.models import Document
from documents.settings import (
EXPORTER_FILE_NAME,
EXPORTER_THUMBNAIL_NAME,
EXPORTER_ARCHIVE_NAME,
)
from ...file_handling import create_source_path_directory from ...file_handling import create_source_path_directory
from ...signals.handlers import update_filename_and_move_files from ...signals.handlers import update_filename_and_move_files
@ -36,7 +36,8 @@ class Command(BaseCommand):
Using a manifest.json file, load the data from there, and import the Using a manifest.json file, load the data from there, and import the
documents it refers to. documents it refers to.
""".replace( """.replace(
" ", "" " ",
"",
) )
def add_arguments(self, parser): def add_arguments(self, parser):
@ -73,7 +74,9 @@ class Command(BaseCommand):
self._check_manifest() self._check_manifest()
with disable_signal( with disable_signal(
post_save, receiver=update_filename_and_move_files, sender=Document post_save,
receiver=update_filename_and_move_files,
sender=Document,
): ):
with disable_signal( with disable_signal(
m2m_changed, m2m_changed,
@ -92,7 +95,7 @@ class Command(BaseCommand):
def _check_manifest_exists(path): def _check_manifest_exists(path):
if not os.path.exists(path): if not os.path.exists(path):
raise CommandError( raise CommandError(
"That directory doesn't appear to contain a manifest.json " "file." "That directory doesn't appear to contain a manifest.json " "file.",
) )
def _check_manifest(self): def _check_manifest(self):
@ -105,14 +108,14 @@ class Command(BaseCommand):
if EXPORTER_FILE_NAME not in record: if EXPORTER_FILE_NAME not in record:
raise CommandError( raise CommandError(
"The manifest file contains a record which does not " "The manifest file contains a record which does not "
"refer to an actual document file." "refer to an actual document file.",
) )
doc_file = record[EXPORTER_FILE_NAME] doc_file = record[EXPORTER_FILE_NAME]
if not os.path.exists(os.path.join(self.source, doc_file)): if not os.path.exists(os.path.join(self.source, doc_file)):
raise CommandError( raise CommandError(
'The manifest file refers to "{}" which does not ' 'The manifest file refers to "{}" which does not '
"appear to be in the source directory.".format(doc_file) "appear to be in the source directory.".format(doc_file),
) )
if EXPORTER_ARCHIVE_NAME in record: if EXPORTER_ARCHIVE_NAME in record:
@ -120,7 +123,7 @@ class Command(BaseCommand):
if not os.path.exists(os.path.join(self.source, archive_file)): if not os.path.exists(os.path.join(self.source, archive_file)):
raise CommandError( raise CommandError(
f"The manifest file refers to {archive_file} which " f"The manifest file refers to {archive_file} which "
f"does not appear to be in the source directory." f"does not appear to be in the source directory.",
) )
def _import_files_from_manifest(self, progress_bar_disable): def _import_files_from_manifest(self, progress_bar_disable):
@ -132,7 +135,7 @@ class Command(BaseCommand):
print("Copy files into paperless...") print("Copy files into paperless...")
manifest_documents = list( manifest_documents = list(
filter(lambda r: r["model"] == "documents.document", self.manifest) filter(lambda r: r["model"] == "documents.document", self.manifest),
) )
for record in tqdm.tqdm(manifest_documents, disable=progress_bar_disable): for record in tqdm.tqdm(manifest_documents, disable=progress_bar_disable):

View File

@ -1,7 +1,7 @@
from django.core.management import BaseCommand from django.core.management import BaseCommand
from django.db import transaction from django.db import transaction
from documents.tasks import index_optimize
from documents.tasks import index_reindex, index_optimize from documents.tasks import index_reindex
class Command(BaseCommand): class Command(BaseCommand):

View File

@ -3,7 +3,6 @@ import logging
import tqdm import tqdm
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db.models.signals import post_save from django.db.models.signals import post_save
from documents.models import Document from documents.models import Document
@ -12,7 +11,8 @@ class Command(BaseCommand):
help = """ help = """
This will rename all documents to match the latest filename format. This will rename all documents to match the latest filename format.
""".replace( """.replace(
" ", "" " ",
"",
) )
def add_arguments(self, parser): def add_arguments(self, parser):
@ -28,6 +28,7 @@ class Command(BaseCommand):
logging.getLogger().handlers[0].level = logging.ERROR logging.getLogger().handlers[0].level = logging.ERROR
for document in tqdm.tqdm( for document in tqdm.tqdm(
Document.objects.all(), disable=options["no_progress_bar"] Document.objects.all(),
disable=options["no_progress_bar"],
): ):
post_save.send(Document, instance=document) post_save.send(Document, instance=document)

View File

@ -2,10 +2,12 @@ import logging
import tqdm import tqdm
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from documents.classifier import load_classifier from documents.classifier import load_classifier
from documents.models import Document from documents.models import Document
from ...signals.handlers import set_correspondent, set_document_type, set_tags
from ...signals.handlers import set_correspondent
from ...signals.handlers import set_document_type
from ...signals.handlers import set_tags
logger = logging.getLogger("paperless.management.retagger") logger = logging.getLogger("paperless.management.retagger")
@ -19,7 +21,8 @@ class Command(BaseCommand):
back-tag all previously indexed documents with metadata created (or back-tag all previously indexed documents with metadata created (or
modified) after their initial import. modified) after their initial import.
""".replace( """.replace(
" ", "" " ",
"",
) )
def add_arguments(self, parser): def add_arguments(self, parser):
@ -57,7 +60,8 @@ class Command(BaseCommand):
help="Return the suggestion, don't change anything.", help="Return the suggestion, don't change anything.",
) )
parser.add_argument( parser.add_argument(
"--base-url", help="The base URL to use to build the link to the documents." "--base-url",
help="The base URL to use to build the link to the documents.",
) )
def handle(self, *args, **options): def handle(self, *args, **options):

View File

@ -7,7 +7,8 @@ class Command(BaseCommand):
help = """ help = """
This command checks your document archive for issues. This command checks your document archive for issues.
""".replace( """.replace(
" ", "" " ",
"",
) )
def add_arguments(self, parser): def add_arguments(self, parser):

View File

@ -5,8 +5,8 @@ import shutil
import tqdm import tqdm
from django import db from django import db
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from documents.models import Document from documents.models import Document
from ...parsers import get_parser_class_for_mime_type from ...parsers import get_parser_class_for_mime_type
@ -22,7 +22,9 @@ def _process_document(doc_in):
try: try:
thumb = parser.get_optimised_thumbnail( thumb = parser.get_optimised_thumbnail(
document.source_path, document.mime_type, document.get_public_filename() document.source_path,
document.mime_type,
document.get_public_filename(),
) )
shutil.move(thumb, document.thumbnail_path) shutil.move(thumb, document.thumbnail_path)
@ -35,7 +37,8 @@ class Command(BaseCommand):
help = """ help = """
This will regenerate the thumbnails for all documents. This will regenerate the thumbnails for all documents.
""".replace( """.replace(
" ", "" " ",
"",
) )
def add_arguments(self, parser): def add_arguments(self, parser):
@ -76,5 +79,5 @@ class Command(BaseCommand):
pool.imap_unordered(_process_document, ids), pool.imap_unordered(_process_document, ids),
total=len(ids), total=len(ids),
disable=options["no_progress_bar"], disable=options["no_progress_bar"],
) ),
) )

View File

@ -2,7 +2,7 @@ import logging
import os import os
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand
logger = logging.getLogger("paperless.management.superuser") logger = logging.getLogger("paperless.management.superuser")
@ -13,7 +13,8 @@ class Command(BaseCommand):
help = """ help = """
Creates a Django superuser based on env variables. Creates a Django superuser based on env variables.
""".replace( """.replace(
" ", "" " ",
"",
) )
def handle(self, *args, **options): def handle(self, *args, **options):
@ -39,5 +40,5 @@ class Command(BaseCommand):
self.stdout.write(f'Did not create superuser "{username}".') self.stdout.write(f'Did not create superuser "{username}".')
self.stdout.write( self.stdout.write(
'Make sure you specified "PAPERLESS_ADMIN_PASSWORD" in your ' 'Make sure you specified "PAPERLESS_ADMIN_PASSWORD" in your '
'"docker-compose.env" file.' '"docker-compose.env" file.',
) )

View File

@ -1,8 +1,10 @@
import logging import logging
import re import re
from documents.models import Correspondent
from documents.models import MatchingModel, Correspondent, DocumentType, Tag from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import Tag
logger = logging.getLogger("paperless.matching") logger = logging.getLogger("paperless.matching")
@ -12,7 +14,7 @@ def log_reason(matching_model, document, reason):
class_name = type(matching_model).__name__ class_name = type(matching_model).__name__
logger.debug( logger.debug(
f"{class_name} {matching_model.name} matched on document " f"{class_name} {matching_model.name} matched on document "
f"{document} because {reason}" f"{document} because {reason}",
) )
@ -25,7 +27,7 @@ def match_correspondents(document, classifier):
correspondents = Correspondent.objects.all() correspondents = Correspondent.objects.all()
return list( return list(
filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents) filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents),
) )
@ -38,7 +40,7 @@ def match_document_types(document, classifier):
document_types = DocumentType.objects.all() document_types = DocumentType.objects.all()
return list( return list(
filter(lambda o: matches(o, document) or o.pk == pred_id, document_types) filter(lambda o: matches(o, document) or o.pk == pred_id, document_types),
) )
@ -51,7 +53,7 @@ def match_tags(document, classifier):
tags = Tag.objects.all() tags = Tag.objects.all()
return list( return list(
filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags) filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags),
) )
@ -92,7 +94,7 @@ def matches(matching_model, document):
rf"\b{re.escape(matching_model.match)}\b", rf"\b{re.escape(matching_model.match)}\b",
document_content, document_content,
**search_kwargs, **search_kwargs,
) ),
) )
if result: if result:
log_reason( log_reason(
@ -105,11 +107,12 @@ def matches(matching_model, document):
elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX: elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
try: try:
match = re.search( match = re.search(
re.compile(matching_model.match, **search_kwargs), document_content re.compile(matching_model.match, **search_kwargs),
document_content,
) )
except re.error: except re.error:
logger.error( logger.error(
f"Error while processing regular expression " f"{matching_model.match}" f"Error while processing regular expression " f"{matching_model.match}",
) )
return False return False
if match: if match:

View File

@ -5,17 +5,14 @@ import os
import re import re
from collections import OrderedDict from collections import OrderedDict
import pathvalidate
import dateutil.parser import dateutil.parser
import pathvalidate
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.db import models from django.db import models
from django.utils import timezone from django.utils import timezone
from django.utils.timezone import is_aware from django.utils.timezone import is_aware
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from documents.parsers import get_default_file_extension from documents.parsers import get_default_file_extension
@ -42,7 +39,9 @@ class MatchingModel(models.Model):
match = models.CharField(_("match"), max_length=256, blank=True) match = models.CharField(_("match"), max_length=256, blank=True)
matching_algorithm = models.PositiveIntegerField( matching_algorithm = models.PositiveIntegerField(
_("matching algorithm"), choices=MATCHING_ALGORITHMS, default=MATCH_ANY _("matching algorithm"),
choices=MATCHING_ALGORITHMS,
default=MATCH_ANY,
) )
is_insensitive = models.BooleanField(_("is insensitive"), default=True) is_insensitive = models.BooleanField(_("is insensitive"), default=True)
@ -71,7 +70,7 @@ class Tag(MatchingModel):
default=False, default=False,
help_text=_( help_text=_(
"Marks this tag as an inbox tag: All newly consumed " "Marks this tag as an inbox tag: All newly consumed "
"documents will be tagged with inbox tags." "documents will be tagged with inbox tags.",
), ),
) )
@ -120,14 +119,17 @@ class Document(models.Model):
blank=True, blank=True,
help_text=_( help_text=_(
"The raw, text-only data of the document. This field is " "The raw, text-only data of the document. This field is "
"primarily used for searching." "primarily used for searching.",
), ),
) )
mime_type = models.CharField(_("mime type"), max_length=256, editable=False) mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
tags = models.ManyToManyField( tags = models.ManyToManyField(
Tag, related_name="documents", blank=True, verbose_name=_("tags") Tag,
related_name="documents",
blank=True,
verbose_name=_("tags"),
) )
checksum = models.CharField( checksum = models.CharField(
@ -150,7 +152,10 @@ class Document(models.Model):
created = models.DateTimeField(_("created"), default=timezone.now, db_index=True) created = models.DateTimeField(_("created"), default=timezone.now, db_index=True)
modified = models.DateTimeField( modified = models.DateTimeField(
_("modified"), auto_now=True, editable=False, db_index=True _("modified"),
auto_now=True,
editable=False,
db_index=True,
) )
storage_type = models.CharField( storage_type = models.CharField(
@ -162,7 +167,10 @@ class Document(models.Model):
) )
added = models.DateTimeField( added = models.DateTimeField(
_("added"), default=timezone.now, editable=False, db_index=True _("added"),
default=timezone.now,
editable=False,
db_index=True,
) )
filename = models.FilePathField( filename = models.FilePathField(
@ -192,7 +200,7 @@ class Document(models.Model):
unique=True, unique=True,
db_index=True, db_index=True,
help_text=_( help_text=_(
"The position of this document in your physical document " "archive." "The position of this document in your physical document " "archive.",
), ),
) )
@ -289,7 +297,9 @@ class Log(models.Model):
message = models.TextField(_("message")) message = models.TextField(_("message"))
level = models.PositiveIntegerField( level = models.PositiveIntegerField(
_("level"), choices=LEVELS, default=logging.INFO _("level"),
choices=LEVELS,
default=logging.INFO,
) )
created = models.DateTimeField(_("created"), auto_now_add=True) created = models.DateTimeField(_("created"), auto_now_add=True)
@ -321,7 +331,10 @@ class SavedView(models.Model):
) )
sort_field = models.CharField( sort_field = models.CharField(
_("sort field"), max_length=128, null=True, blank=True _("sort field"),
max_length=128,
null=True,
blank=True,
) )
sort_reverse = models.BooleanField(_("sort reverse"), default=False) sort_reverse = models.BooleanField(_("sort reverse"), default=False)
@ -383,11 +396,16 @@ class FileInfo:
), ),
), ),
("title", re.compile(r"(?P<title>.*)$", flags=re.IGNORECASE)), ("title", re.compile(r"(?P<title>.*)$", flags=re.IGNORECASE)),
] ],
) )
def __init__( def __init__(
self, created=None, correspondent=None, title=None, tags=(), extension=None self,
created=None,
correspondent=None,
title=None,
tags=(),
extension=None,
): ):
self.created = created self.created = created

View File

@ -9,6 +9,8 @@ import tempfile
import magic import magic
from django.conf import settings from django.conf import settings
from django.utils import timezone from django.utils import timezone
from documents.loggers import LoggingMixin
from documents.signals import document_consumer_declaration
# This regular expression will try to find dates in the document at # This regular expression will try to find dates in the document at
# hand and will match the following formats: # hand and will match the following formats:
@ -21,17 +23,15 @@ from django.utils import timezone
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
# - MONTH ZZZZ, with ZZZZ being 4 digits # - MONTH ZZZZ, with ZZZZ being 4 digits
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
from documents.loggers import LoggingMixin
from documents.signals import document_consumer_declaration
# TODO: isnt there a date parsing library for this? # TODO: isnt there a date parsing library for this?
DATE_REGEX = re.compile( DATE_REGEX = re.compile(
r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|" # NOQA: E501 r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|" # noqa: E501
r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|" # NOQA: E501 r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|" # noqa: E501
r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" # NOQA: E501 r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" # noqa: E501
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|" r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))" r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))",
) )

View File

@ -3,9 +3,8 @@ import logging
import os import os
from django.conf import settings from django.conf import settings
from tqdm import tqdm
from documents.models import Document from documents.models import Document
from tqdm import tqdm
class SanityCheckMessages: class SanityCheckMessages:
@ -88,19 +87,19 @@ def check_sanity(progress=False):
if not checksum == doc.checksum: if not checksum == doc.checksum:
messages.error( messages.error(
f"Checksum mismatch of document {doc.pk}. " f"Checksum mismatch of document {doc.pk}. "
f"Stored: {doc.checksum}, actual: {checksum}." f"Stored: {doc.checksum}, actual: {checksum}.",
) )
# Check sanity of the archive file. # Check sanity of the archive file.
if doc.archive_checksum and not doc.archive_filename: if doc.archive_checksum and not doc.archive_filename:
messages.error( messages.error(
f"Document {doc.pk} has an archive file checksum, but no " f"Document {doc.pk} has an archive file checksum, but no "
f"archive filename." f"archive filename.",
) )
elif not doc.archive_checksum and doc.archive_filename: elif not doc.archive_checksum and doc.archive_filename:
messages.error( messages.error(
f"Document {doc.pk} has an archive file, but its checksum is " f"Document {doc.pk} has an archive file, but its checksum is "
f"missing." f"missing.",
) )
elif doc.has_archive_version: elif doc.has_archive_version:
if not os.path.isfile(doc.archive_path): if not os.path.isfile(doc.archive_path):
@ -113,7 +112,7 @@ def check_sanity(progress=False):
checksum = hashlib.md5(f.read()).hexdigest() checksum = hashlib.md5(f.read()).hexdigest()
except OSError as e: except OSError as e:
messages.error( messages.error(
f"Cannot read archive file of document {doc.pk}: {e}" f"Cannot read archive file of document {doc.pk}: {e}",
) )
else: else:
if not checksum == doc.archive_checksum: if not checksum == doc.archive_checksum:
@ -121,7 +120,7 @@ def check_sanity(progress=False):
f"Checksum mismatch of archived document " f"Checksum mismatch of archived document "
f"{doc.pk}. " f"{doc.pk}. "
f"Stored: {doc.archive_checksum}, " f"Stored: {doc.archive_checksum}, "
f"actual: {checksum}." f"actual: {checksum}.",
) )
# other document checks # other document checks

View File

@ -1,25 +1,22 @@
import math
import re import re
import magic import magic
import math
from django.utils.text import slugify from django.utils.text import slugify
from django.utils.translation import gettext as _
from rest_framework import serializers from rest_framework import serializers
from rest_framework.fields import SerializerMethodField from rest_framework.fields import SerializerMethodField
from . import bulk_edit from . import bulk_edit
from .models import ( from .models import Correspondent
Correspondent, from .models import Document
Tag, from .models import DocumentType
Document, from .models import MatchingModel
DocumentType, from .models import SavedView
SavedView, from .models import SavedViewFilterRule
SavedViewFilterRule, from .models import Tag
MatchingModel,
)
from .parsers import is_mime_type_supported from .parsers import is_mime_type_supported
from django.utils.translation import gettext as _
# https://www.django-rest-framework.org/api-guide/serializers/#example # https://www.django-rest-framework.org/api-guide/serializers/#example
class DynamicFieldsModelSerializer(serializers.ModelSerializer): class DynamicFieldsModelSerializer(serializers.ModelSerializer):
@ -56,12 +53,12 @@ class MatchingModelSerializer(serializers.ModelSerializer):
if ( if (
"matching_algorithm" in self.initial_data "matching_algorithm" in self.initial_data
and self.initial_data["matching_algorithm"] == MatchingModel.MATCH_REGEX and self.initial_data["matching_algorithm"] == MatchingModel.MATCH_REGEX
): # NOQA: E501 ):
try: try:
re.compile(match) re.compile(match)
except Exception as e: except re.error as e:
raise serializers.ValidationError( raise serializers.ValidationError(
_("Invalid regular expression: %(error)s") % {"error": str(e)} _("Invalid regular expression: %(error)s") % {"error": str(e.msg)},
) )
return match return match
@ -156,7 +153,7 @@ class TagSerializer(MatchingModelSerializer):
luminance = math.sqrt( luminance = math.sqrt(
0.299 * math.pow(rgb[0], 2) 0.299 * math.pow(rgb[0], 2)
+ 0.587 * math.pow(rgb[1], 2) + 0.587 * math.pow(rgb[1], 2)
+ 0.114 * math.pow(rgb[2], 2) + 0.114 * math.pow(rgb[2], 2),
) )
return "#ffffff" if luminance < 0.53 else "#000000" return "#ffffff" if luminance < 0.53 else "#000000"
except ValueError: except ValueError:
@ -298,7 +295,7 @@ class DocumentListSerializer(serializers.Serializer):
count = Document.objects.filter(id__in=documents).count() count = Document.objects.filter(id__in=documents).count()
if not count == len(documents): if not count == len(documents):
raise serializers.ValidationError( raise serializers.ValidationError(
f"Some documents in {name} don't exist or were " f"specified twice." f"Some documents in {name} don't exist or were " f"specified twice.",
) )
def validate_documents(self, documents): def validate_documents(self, documents):
@ -331,7 +328,7 @@ class BulkEditSerializer(DocumentListSerializer):
count = Tag.objects.filter(id__in=tags).count() count = Tag.objects.filter(id__in=tags).count()
if not count == len(tags): if not count == len(tags):
raise serializers.ValidationError( raise serializers.ValidationError(
f"Some tags in {name} don't exist or were specified twice." f"Some tags in {name} don't exist or were specified twice.",
) )
def validate_method(self, method): def validate_method(self, method):
@ -456,7 +453,7 @@ class PostDocumentSerializer(serializers.Serializer):
if not is_mime_type_supported(mime_type): if not is_mime_type_supported(mime_type):
raise serializers.ValidationError( raise serializers.ValidationError(
_("File type %(type)s not supported") % {"type": mime_type} _("File type %(type)s not supported") % {"type": mime_type},
) )
return document.name, document_data return document.name, document_data
@ -483,11 +480,13 @@ class PostDocumentSerializer(serializers.Serializer):
class BulkDownloadSerializer(DocumentListSerializer): class BulkDownloadSerializer(DocumentListSerializer):
content = serializers.ChoiceField( content = serializers.ChoiceField(
choices=["archive", "originals", "both"], default="archive" choices=["archive", "originals", "both"],
default="archive",
) )
compression = serializers.ChoiceField( compression = serializers.ChoiceField(
choices=["none", "deflated", "bzip2", "lzma"], default="none" choices=["none", "deflated", "bzip2", "lzma"],
default="none",
) )
def validate_compression(self, compression): def validate_compression(self, compression):

View File

@ -1,24 +1,26 @@
import logging import logging
import os import os
from django.utils import termcolors
from django.conf import settings from django.conf import settings
from django.contrib.admin.models import ADDITION, LogEntry from django.contrib.admin.models import ADDITION
from django.contrib.admin.models import LogEntry
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
from django.db import models, DatabaseError from django.db import DatabaseError
from django.db import models
from django.db.models import Q from django.db.models import Q
from django.dispatch import receiver from django.dispatch import receiver
from django.utils import termcolors, timezone from django.utils import termcolors
from django.utils import timezone
from filelock import FileLock from filelock import FileLock
from .. import matching from .. import matching
from ..file_handling import ( from ..file_handling import create_source_path_directory
delete_empty_directories, from ..file_handling import delete_empty_directories
create_source_path_directory, from ..file_handling import generate_unique_filename
generate_unique_filename, from ..models import Document
) from ..models import MatchingModel
from ..models import Document, Tag, MatchingModel from ..models import Tag
logger = logging.getLogger("paperless.handlers") logger = logging.getLogger("paperless.handlers")
@ -72,7 +74,7 @@ def set_correspondent(
print( print(
termcolors.colorize(str(document), fg="green") termcolors.colorize(str(document), fg="green")
if color if color
else str(document) else str(document),
) )
print(f"{base_url}/documents/{document.pk}") print(f"{base_url}/documents/{document.pk}")
else: else:
@ -82,7 +84,7 @@ def set_correspondent(
if color if color
else str(document) else str(document)
) )
+ f" [{document.pk}]" + f" [{document.pk}]",
) )
print(f"Suggest correspondent {selected}") print(f"Suggest correspondent {selected}")
else: else:
@ -139,7 +141,7 @@ def set_document_type(
print( print(
termcolors.colorize(str(document), fg="green") termcolors.colorize(str(document), fg="green")
if color if color
else str(document) else str(document),
) )
print(f"{base_url}/documents/{document.pk}") print(f"{base_url}/documents/{document.pk}")
else: else:
@ -149,7 +151,7 @@ def set_document_type(
if color if color
else str(document) else str(document)
) )
+ f" [{document.pk}]" + f" [{document.pk}]",
) )
print(f"Suggest document type {selected}") print(f"Suggest document type {selected}")
else: else:
@ -176,9 +178,9 @@ def set_tags(
if replace: if replace:
Document.tags.through.objects.filter(document=document).exclude( Document.tags.through.objects.filter(document=document).exclude(
Q(tag__is_inbox_tag=True) Q(tag__is_inbox_tag=True),
).exclude( ).exclude(
Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO) Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO),
).delete() ).delete()
current_tags = set(document.tags.all()) current_tags = set(document.tags.all())
@ -198,7 +200,7 @@ def set_tags(
print( print(
termcolors.colorize(str(document), fg="green") termcolors.colorize(str(document), fg="green")
if color if color
else str(document) else str(document),
) )
print(f"{base_url}/documents/{document.pk}") print(f"{base_url}/documents/{document.pk}")
else: else:
@ -208,7 +210,7 @@ def set_tags(
if color if color
else str(document) else str(document)
) )
+ f" [{document.pk}]" + f" [{document.pk}]",
) )
if relevant_tags: if relevant_tags:
print("Suggest tags: " + ", ".join([t.name for t in relevant_tags])) print("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
@ -254,7 +256,7 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
except OSError as e: except OSError as e:
logger.error( logger.error(
f"Failed to move {instance.source_path} to trash at " f"Failed to move {instance.source_path} to trash at "
f"{new_file_path}: {e}. Skipping cleanup!" f"{new_file_path}: {e}. Skipping cleanup!",
) )
return return
@ -270,16 +272,18 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
except OSError as e: except OSError as e:
logger.warning( logger.warning(
f"While deleting document {str(instance)}, the file " f"While deleting document {str(instance)}, the file "
f"{filename} could not be deleted: {e}" f"{filename} could not be deleted: {e}",
) )
delete_empty_directories( delete_empty_directories(
os.path.dirname(instance.source_path), root=settings.ORIGINALS_DIR os.path.dirname(instance.source_path),
root=settings.ORIGINALS_DIR,
) )
if instance.has_archive_version: if instance.has_archive_version:
delete_empty_directories( delete_empty_directories(
os.path.dirname(instance.archive_path), root=settings.ARCHIVE_DIR os.path.dirname(instance.archive_path),
root=settings.ARCHIVE_DIR,
) )
@ -297,7 +301,7 @@ def validate_move(instance, old_path, new_path):
# Can't do anything if the new file already exists. Skip updating file. # Can't do anything if the new file already exists. Skip updating file.
logger.warning( logger.warning(
f"Document {str(instance)}: Cannot rename file " f"Document {str(instance)}: Cannot rename file "
f"since target path {new_path} already exists." f"since target path {new_path} already exists.",
) )
raise CannotMoveFilesException() raise CannotMoveFilesException()
@ -331,12 +335,11 @@ def update_filename_and_move_files(sender, instance, **kwargs):
if instance.has_archive_version: if instance.has_archive_version:
instance.archive_filename = generate_unique_filename( instance.archive_filename = generate_unique_filename(
instance, archive_filename=True instance,
archive_filename=True,
) )
move_archive = ( move_archive = old_archive_filename != instance.archive_filename
old_archive_filename != instance.archive_filename
) # NOQA: E501
else: else:
move_archive = False move_archive = False
@ -374,7 +377,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
if move_archive and os.path.isfile(instance.archive_path): if move_archive and os.path.isfile(instance.archive_path):
os.rename(instance.archive_path, old_archive_path) os.rename(instance.archive_path, old_archive_path)
except Exception as e: except Exception:
# This is fine, since: # This is fine, since:
# A: if we managed to move source from A to B, we will also # A: if we managed to move source from A to B, we will also
# manage to move it from B to A. If not, we have a serious # manage to move it from B to A. If not, we have a serious
@ -393,14 +396,16 @@ def update_filename_and_move_files(sender, instance, **kwargs):
# something has failed above. # something has failed above.
if not os.path.isfile(old_source_path): if not os.path.isfile(old_source_path):
delete_empty_directories( delete_empty_directories(
os.path.dirname(old_source_path), root=settings.ORIGINALS_DIR os.path.dirname(old_source_path),
root=settings.ORIGINALS_DIR,
) )
if instance.has_archive_version and not os.path.isfile( if instance.has_archive_version and not os.path.isfile(
old_archive_path old_archive_path,
): # NOQA: E501 ):
delete_empty_directories( delete_empty_directories(
os.path.dirname(old_archive_path), root=settings.ARCHIVE_DIR os.path.dirname(old_archive_path),
root=settings.ARCHIVE_DIR,
) )

View File

@ -3,13 +3,18 @@ import logging
import tqdm import tqdm
from django.conf import settings from django.conf import settings
from django.db.models.signals import post_save from django.db.models.signals import post_save
from whoosh.writing import AsyncWriter from documents import index
from documents import sanity_checker
from documents import index, sanity_checker from documents.classifier import DocumentClassifier
from documents.classifier import DocumentClassifier, load_classifier from documents.classifier import load_classifier
from documents.consumer import Consumer, ConsumerError from documents.consumer import Consumer
from documents.models import Document, Tag, DocumentType, Correspondent from documents.consumer import ConsumerError
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import Tag
from documents.sanity_checker import SanityCheckFailedException from documents.sanity_checker import SanityCheckFailedException
from whoosh.writing import AsyncWriter
logger = logging.getLogger("paperless.tasks") logger = logging.getLogger("paperless.tasks")
@ -47,7 +52,7 @@ def train_classifier():
try: try:
if classifier.train(): if classifier.train():
logger.info( logger.info(
"Saving updated classifier model to {}...".format(settings.MODEL_FILE) "Saving updated classifier model to {}...".format(settings.MODEL_FILE),
) )
classifier.save() classifier.save()
else: else:
@ -82,7 +87,7 @@ def consume_file(
else: else:
raise ConsumerError( raise ConsumerError(
"Unknown error: Returned document was null, but " "Unknown error: Returned document was null, but "
"no error message was given." "no error message was given.",
) )

View File

@ -1,7 +1,8 @@
from factory import Faker from factory import Faker
from factory.django import DjangoModelFactory from factory.django import DjangoModelFactory
from ..models import Document, Correspondent from ..models import Correspondent
from ..models import Document
class CorrespondentFactory(DjangoModelFactory): class CorrespondentFactory(DjangoModelFactory):

View File

@ -3,7 +3,6 @@ from unittest import mock
from django.contrib.admin.sites import AdminSite from django.contrib.admin.sites import AdminSite
from django.test import TestCase from django.test import TestCase
from django.utils import timezone from django.utils import timezone
from documents import index from documents import index
from documents.admin import DocumentAdmin from documents.admin import DocumentAdmin
from documents.models import Document from documents.models import Document
@ -42,7 +41,8 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
docs = [] docs = []
for i in range(42): for i in range(42):
doc = Document.objects.create( doc = Document.objects.create(
title="Many documents with the same title", checksum=f"{i:02}" title="Many documents with the same title",
checksum=f"{i:02}",
) )
docs.append(doc) docs.append(doc)
index.add_or_update_document(doc) index.add_or_update_document(doc)
@ -61,6 +61,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
def test_created(self): def test_created(self):
doc = Document.objects.create( doc = Document.objects.create(
title="test", created=timezone.make_aware(timezone.datetime(2020, 4, 12)) title="test",
created=timezone.make_aware(timezone.datetime(2020, 4, 12)),
) )
self.assertEqual(self.doc_admin.created_(doc), "2020-04-12") self.assertEqual(self.doc_admin.created_(doc), "2020-04-12")

View File

@ -10,22 +10,20 @@ from unittest import mock
import pytest import pytest
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.utils import timezone
from django.test import override_settings from django.test import override_settings
from django.utils import timezone
from documents import bulk_edit
from documents import index
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import SavedView
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin
from rest_framework.test import APITestCase from rest_framework.test import APITestCase
from whoosh.writing import AsyncWriter from whoosh.writing import AsyncWriter
from documents import index, bulk_edit
from documents.models import (
Document,
Correspondent,
DocumentType,
Tag,
SavedView,
MatchingModel,
)
from documents.tests.utils import DirectoriesMixin
class TestDocumentApi(DirectoriesMixin, APITestCase): class TestDocumentApi(DirectoriesMixin, APITestCase):
def setUp(self): def setUp(self):
@ -72,7 +70,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
returned_doc["title"] = "the new title" returned_doc["title"] = "the new title"
response = self.client.put( response = self.client.put(
"/api/documents/{}/".format(doc.pk), returned_doc, format="json" "/api/documents/{}/".format(doc.pk),
returned_doc,
format="json",
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
@ -127,7 +127,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(len(results[0]), 2) self.assertEqual(len(results[0]), 2)
response = self.client.get( response = self.client.get(
"/api/documents/?fields=id,conteasdnt", format="json" "/api/documents/?fields=id,conteasdnt",
format="json",
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
results = response.data["results"] results = response.data["results"]
@ -162,7 +163,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
) )
with open( with open(
os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb" os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)),
"wb",
) as f: ) as f:
f.write(content_thumbnail) f.write(content_thumbnail)
@ -206,7 +208,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.content, content_archive) self.assertEqual(response.content, content_archive)
response = self.client.get( response = self.client.get(
"/api/documents/{}/download/?original=true".format(doc.pk) "/api/documents/{}/download/?original=true".format(doc.pk),
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
@ -218,7 +220,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.content, content_archive) self.assertEqual(response.content, content_archive)
response = self.client.get( response = self.client.get(
"/api/documents/{}/preview/?original=true".format(doc.pk) "/api/documents/{}/preview/?original=true".format(doc.pk),
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
@ -227,7 +229,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_document_actions_not_existing_file(self): def test_document_actions_not_existing_file(self):
doc = Document.objects.create( doc = Document.objects.create(
title="none", filename=os.path.basename("asd"), mime_type="application/pdf" title="none",
filename=os.path.basename("asd"),
mime_type="application/pdf",
) )
response = self.client.get("/api/documents/{}/download/".format(doc.pk)) response = self.client.get("/api/documents/{}/download/".format(doc.pk))
@ -242,13 +246,19 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_document_filters(self): def test_document_filters(self):
doc1 = Document.objects.create( doc1 = Document.objects.create(
title="none1", checksum="A", mime_type="application/pdf" title="none1",
checksum="A",
mime_type="application/pdf",
) )
doc2 = Document.objects.create( doc2 = Document.objects.create(
title="none2", checksum="B", mime_type="application/pdf" title="none2",
checksum="B",
mime_type="application/pdf",
) )
doc3 = Document.objects.create( doc3 = Document.objects.create(
title="none3", checksum="C", mime_type="application/pdf" title="none3",
checksum="C",
mime_type="application/pdf",
) )
tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True) tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True)
@ -273,7 +283,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id]) self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id])
response = self.client.get( response = self.client.get(
"/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id) "/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id),
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
results = response.data["results"] results = response.data["results"]
@ -281,7 +291,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc3.id]) self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc3.id])
response = self.client.get( response = self.client.get(
"/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id) "/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id),
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
results = response.data["results"] results = response.data["results"]
@ -289,7 +299,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id]) self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id])
response = self.client.get( response = self.client.get(
"/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id) "/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id),
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
results = response.data["results"] results = response.data["results"]
@ -297,14 +307,14 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(results[0]["id"], doc3.id) self.assertEqual(results[0]["id"], doc3.id)
response = self.client.get( response = self.client.get(
"/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id) "/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id),
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
results = response.data["results"] results = response.data["results"]
self.assertEqual(len(results), 0) self.assertEqual(len(results), 0)
response = self.client.get( response = self.client.get(
"/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id) "/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id),
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
results = response.data["results"] results = response.data["results"]
@ -317,7 +327,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc2.id]) self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc2.id])
response = self.client.get( response = self.client.get(
"/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id) "/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id),
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
results = response.data["results"] results = response.data["results"]
@ -325,7 +335,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(results[0]["id"], doc1.id) self.assertEqual(results[0]["id"], doc1.id)
response = self.client.get( response = self.client.get(
"/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id) "/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id),
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
results = response.data["results"] results = response.data["results"]
@ -443,7 +453,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
for i in range(1, 6): for i in range(1, 6):
response = self.client.get( response = self.client.get(
f"/api/documents/?query=content&page={i}&page_size=10" f"/api/documents/?query=content&page={i}&page_size=10",
) )
results = response.data["results"] results = response.data["results"]
self.assertEqual(response.data["count"], 55) self.assertEqual(response.data["count"], 55)
@ -595,31 +605,35 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertCountEqual(search_query("&correspondent__id=" + str(c.id)), [d1.id]) self.assertCountEqual(search_query("&correspondent__id=" + str(c.id)), [d1.id])
self.assertCountEqual(search_query("&document_type__id=" + str(dt.id)), [d2.id]) self.assertCountEqual(search_query("&document_type__id=" + str(dt.id)), [d2.id])
self.assertCountEqual( self.assertCountEqual(
search_query("&correspondent__isnull"), [d2.id, d3.id, d4.id, d5.id] search_query("&correspondent__isnull"),
[d2.id, d3.id, d4.id, d5.id],
) )
self.assertCountEqual( self.assertCountEqual(
search_query("&document_type__isnull"), [d1.id, d3.id, d4.id, d5.id] search_query("&document_type__isnull"),
[d1.id, d3.id, d4.id, d5.id],
) )
self.assertCountEqual( self.assertCountEqual(
search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)), [d3.id] search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)),
[d3.id],
) )
self.assertCountEqual(search_query("&tags__id__all=" + str(t.id)), [d3.id]) self.assertCountEqual(search_query("&tags__id__all=" + str(t.id)), [d3.id])
self.assertCountEqual( self.assertCountEqual(
search_query("&tags__id__all=" + str(t2.id)), [d3.id, d4.id] search_query("&tags__id__all=" + str(t2.id)),
[d3.id, d4.id],
) )
self.assertIn( self.assertIn(
d4.id, d4.id,
search_query( search_query(
"&created__date__lt=" "&created__date__lt="
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d") + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
), ),
) )
self.assertNotIn( self.assertNotIn(
d4.id, d4.id,
search_query( search_query(
"&created__date__gt=" "&created__date__gt="
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d") + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
), ),
) )
@ -627,40 +641,44 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
d4.id, d4.id,
search_query( search_query(
"&created__date__lt=" "&created__date__lt="
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d") + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
), ),
) )
self.assertIn( self.assertIn(
d4.id, d4.id,
search_query( search_query(
"&created__date__gt=" "&created__date__gt="
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d") + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
), ),
) )
self.assertIn( self.assertIn(
d5.id, d5.id,
search_query( search_query(
"&added__date__lt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d") "&added__date__lt="
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
), ),
) )
self.assertNotIn( self.assertNotIn(
d5.id, d5.id,
search_query( search_query(
"&added__date__gt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d") "&added__date__gt="
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
), ),
) )
self.assertNotIn( self.assertNotIn(
d5.id, d5.id,
search_query( search_query(
"&added__date__lt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d") "&added__date__lt="
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
), ),
) )
self.assertIn( self.assertIn(
d5.id, d5.id,
search_query( search_query(
"&added__date__gt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d") "&added__date__gt="
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
), ),
) )
@ -700,18 +718,22 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
return [hit["id"] for hit in r.data["results"]] return [hit["id"] for hit in r.data["results"]]
self.assertListEqual( self.assertListEqual(
search_query("&ordering=archive_serial_number"), [d3.id, d1.id, d2.id] search_query("&ordering=archive_serial_number"),
[d3.id, d1.id, d2.id],
) )
self.assertListEqual( self.assertListEqual(
search_query("&ordering=-archive_serial_number"), [d2.id, d1.id, d3.id] search_query("&ordering=-archive_serial_number"),
[d2.id, d1.id, d3.id],
) )
self.assertListEqual(search_query("&ordering=title"), [d3.id, d2.id, d1.id]) self.assertListEqual(search_query("&ordering=title"), [d3.id, d2.id, d1.id])
self.assertListEqual(search_query("&ordering=-title"), [d1.id, d2.id, d3.id]) self.assertListEqual(search_query("&ordering=-title"), [d1.id, d2.id, d3.id])
self.assertListEqual( self.assertListEqual(
search_query("&ordering=correspondent__name"), [d1.id, d3.id, d2.id] search_query("&ordering=correspondent__name"),
[d1.id, d3.id, d2.id],
) )
self.assertListEqual( self.assertListEqual(
search_query("&ordering=-correspondent__name"), [d2.id, d3.id, d1.id] search_query("&ordering=-correspondent__name"),
[d2.id, d3.id, d1.id],
) )
def test_statistics(self): def test_statistics(self):
@ -740,10 +762,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_upload(self, m): def test_upload(self, m):
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f: ) as f:
response = self.client.post( response = self.client.post(
"/api/documents/post_document/", {"document": f} "/api/documents/post_document/",
{"document": f},
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
@ -761,7 +785,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_upload_empty_metadata(self, m): def test_upload_empty_metadata(self, m):
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f: ) as f:
response = self.client.post( response = self.client.post(
"/api/documents/post_document/", "/api/documents/post_document/",
@ -783,10 +808,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_upload_invalid_form(self, m): def test_upload_invalid_form(self, m):
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f: ) as f:
response = self.client.post( response = self.client.post(
"/api/documents/post_document/", {"documenst": f} "/api/documents/post_document/",
{"documenst": f},
) )
self.assertEqual(response.status_code, 400) self.assertEqual(response.status_code, 400)
m.assert_not_called() m.assert_not_called()
@ -795,10 +822,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_upload_invalid_file(self, m): def test_upload_invalid_file(self, m):
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb" os.path.join(os.path.dirname(__file__), "samples", "simple.zip"),
"rb",
) as f: ) as f:
response = self.client.post( response = self.client.post(
"/api/documents/post_document/", {"document": f} "/api/documents/post_document/",
{"document": f},
) )
self.assertEqual(response.status_code, 400) self.assertEqual(response.status_code, 400)
m.assert_not_called() m.assert_not_called()
@ -806,7 +835,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.async_task") @mock.patch("documents.views.async_task")
def test_upload_with_title(self, async_task): def test_upload_with_title(self, async_task):
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f: ) as f:
response = self.client.post( response = self.client.post(
"/api/documents/post_document/", "/api/documents/post_document/",
@ -824,10 +854,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_upload_with_correspondent(self, async_task): def test_upload_with_correspondent(self, async_task):
c = Correspondent.objects.create(name="test-corres") c = Correspondent.objects.create(name="test-corres")
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f: ) as f:
response = self.client.post( response = self.client.post(
"/api/documents/post_document/", {"document": f, "correspondent": c.id} "/api/documents/post_document/",
{"document": f, "correspondent": c.id},
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
@ -840,10 +872,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.async_task") @mock.patch("documents.views.async_task")
def test_upload_with_invalid_correspondent(self, async_task): def test_upload_with_invalid_correspondent(self, async_task):
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f: ) as f:
response = self.client.post( response = self.client.post(
"/api/documents/post_document/", {"document": f, "correspondent": 3456} "/api/documents/post_document/",
{"document": f, "correspondent": 3456},
) )
self.assertEqual(response.status_code, 400) self.assertEqual(response.status_code, 400)
@ -853,10 +887,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_upload_with_document_type(self, async_task): def test_upload_with_document_type(self, async_task):
dt = DocumentType.objects.create(name="invoice") dt = DocumentType.objects.create(name="invoice")
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f: ) as f:
response = self.client.post( response = self.client.post(
"/api/documents/post_document/", {"document": f, "document_type": dt.id} "/api/documents/post_document/",
{"document": f, "document_type": dt.id},
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
@ -869,10 +905,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.async_task") @mock.patch("documents.views.async_task")
def test_upload_with_invalid_document_type(self, async_task): def test_upload_with_invalid_document_type(self, async_task):
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f: ) as f:
response = self.client.post( response = self.client.post(
"/api/documents/post_document/", {"document": f, "document_type": 34578} "/api/documents/post_document/",
{"document": f, "document_type": 34578},
) )
self.assertEqual(response.status_code, 400) self.assertEqual(response.status_code, 400)
@ -883,10 +921,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
t1 = Tag.objects.create(name="tag1") t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2") t2 = Tag.objects.create(name="tag2")
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f: ) as f:
response = self.client.post( response = self.client.post(
"/api/documents/post_document/", {"document": f, "tags": [t2.id, t1.id]} "/api/documents/post_document/",
{"document": f, "tags": [t2.id, t1.id]},
) )
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
@ -901,7 +941,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
t1 = Tag.objects.create(name="tag1") t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2") t2 = Tag.objects.create(name="tag2")
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb" os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f: ) as f:
response = self.client.post( response = self.client.post(
"/api/documents/post_document/", "/api/documents/post_document/",
@ -952,7 +993,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_get_metadata_no_archive(self): def test_get_metadata_no_archive(self):
doc = Document.objects.create( doc = Document.objects.create(
title="test", filename="file.pdf", mime_type="application/pdf" title="test",
filename="file.pdf",
mime_type="application/pdf",
) )
shutil.copy( shutil.copy(
@ -999,7 +1042,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.assertEqual( self.assertEqual(
response.data, {"correspondents": [], "tags": [], "document_types": []} response.data,
{"correspondents": [], "tags": [], "document_types": []},
) )
def test_get_suggestions_invalid_doc(self): def test_get_suggestions_invalid_doc(self):
@ -1010,10 +1054,15 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.match_tags") @mock.patch("documents.views.match_tags")
@mock.patch("documents.views.match_document_types") @mock.patch("documents.views.match_document_types")
def test_get_suggestions( def test_get_suggestions(
self, match_document_types, match_tags, match_correspondents self,
match_document_types,
match_tags,
match_correspondents,
): ):
doc = Document.objects.create( doc = Document.objects.create(
title="test", mime_type="application/pdf", content="this is an invoice!" title="test",
mime_type="application/pdf",
content="this is an invoice!",
) )
match_tags.return_value = [Tag(id=56), Tag(id=123)] match_tags.return_value = [Tag(id=56), Tag(id=123)]
match_document_types.return_value = [DocumentType(id=23)] match_document_types.return_value = [DocumentType(id=23)]
@ -1094,7 +1143,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(v1.user, self.user) self.assertEqual(v1.user, self.user)
response = self.client.patch( response = self.client.patch(
f"/api/saved_views/{v1.id}/", {"show_in_sidebar": False}, format="json" f"/api/saved_views/{v1.id}/",
{"show_in_sidebar": False},
format="json",
) )
v1 = SavedView.objects.get(id=v1.id) v1 = SavedView.objects.get(id=v1.id)
@ -1183,7 +1234,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_regex_no_algorithm(self): def test_regex_no_algorithm(self):
for endpoint in ["correspondents", "tags", "document_types"]: for endpoint in ["correspondents", "tags", "document_types"]:
response = self.client.post( response = self.client.post(
f"/api/{endpoint}/", {"name": "test", "match": "[0-9]"}, format="json" f"/api/{endpoint}/",
{"name": "test", "match": "[0-9]"},
format="json",
) )
self.assertEqual(response.status_code, 201, endpoint) self.assertEqual(response.status_code, 201, endpoint)
@ -1200,7 +1253,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_tag_color(self): def test_tag_color(self):
response = self.client.post( response = self.client.post(
"/api/tags/", {"name": "tag", "colour": 3}, format="json" "/api/tags/",
{"name": "tag", "colour": 3},
format="json",
) )
self.assertEqual(response.status_code, 201) self.assertEqual(response.status_code, 201)
self.assertEqual(Tag.objects.get(id=response.data["id"]).color, "#b2df8a") self.assertEqual(Tag.objects.get(id=response.data["id"]).color, "#b2df8a")
@ -1213,14 +1268,17 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_tag_color_invalid(self): def test_tag_color_invalid(self):
response = self.client.post( response = self.client.post(
"/api/tags/", {"name": "tag", "colour": 34}, format="json" "/api/tags/",
{"name": "tag", "colour": 34},
format="json",
) )
self.assertEqual(response.status_code, 400) self.assertEqual(response.status_code, 400)
def test_tag_color_custom(self): def test_tag_color_custom(self):
tag = Tag.objects.create(name="test", color="#abcdef") tag = Tag.objects.create(name="test", color="#abcdef")
self.assertEqual( self.assertEqual(
self.client.get(f"/api/tags/{tag.id}/", format="json").data["colour"], 1 self.client.get(f"/api/tags/{tag.id}/", format="json").data["colour"],
1,
) )
@ -1236,32 +1294,42 @@ class TestDocumentApiV2(DirectoriesMixin, APITestCase):
def test_tag_validate_color(self): def test_tag_validate_color(self):
self.assertEqual( self.assertEqual(
self.client.post( self.client.post(
"/api/tags/", {"name": "test", "color": "#12fFaA"}, format="json" "/api/tags/",
{"name": "test", "color": "#12fFaA"},
format="json",
).status_code, ).status_code,
201, 201,
) )
self.assertEqual( self.assertEqual(
self.client.post( self.client.post(
"/api/tags/", {"name": "test1", "color": "abcdef"}, format="json" "/api/tags/",
{"name": "test1", "color": "abcdef"},
format="json",
).status_code, ).status_code,
400, 400,
) )
self.assertEqual( self.assertEqual(
self.client.post( self.client.post(
"/api/tags/", {"name": "test2", "color": "#abcdfg"}, format="json" "/api/tags/",
{"name": "test2", "color": "#abcdfg"},
format="json",
).status_code, ).status_code,
400, 400,
) )
self.assertEqual( self.assertEqual(
self.client.post( self.client.post(
"/api/tags/", {"name": "test3", "color": "#asd"}, format="json" "/api/tags/",
{"name": "test3", "color": "#asd"},
format="json",
).status_code, ).status_code,
400, 400,
) )
self.assertEqual( self.assertEqual(
self.client.post( self.client.post(
"/api/tags/", {"name": "test4", "color": "#12121212"}, format="json" "/api/tags/",
{"name": "test4", "color": "#12121212"},
format="json",
).status_code, ).status_code,
400, 400,
) )
@ -1313,10 +1381,16 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
self.t2 = Tag.objects.create(name="t2") self.t2 = Tag.objects.create(name="t2")
self.doc1 = Document.objects.create(checksum="A", title="A") self.doc1 = Document.objects.create(checksum="A", title="A")
self.doc2 = Document.objects.create( self.doc2 = Document.objects.create(
checksum="B", title="B", correspondent=self.c1, document_type=self.dt1 checksum="B",
title="B",
correspondent=self.c1,
document_type=self.dt1,
) )
self.doc3 = Document.objects.create( self.doc3 = Document.objects.create(
checksum="C", title="C", correspondent=self.c2, document_type=self.dt2 checksum="C",
title="C",
correspondent=self.c2,
document_type=self.dt2,
) )
self.doc4 = Document.objects.create(checksum="D", title="D") self.doc4 = Document.objects.create(checksum="D", title="D")
self.doc5 = Document.objects.create(checksum="E", title="E") self.doc5 = Document.objects.create(checksum="E", title="E")
@ -1327,7 +1401,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
def test_set_correspondent(self): def test_set_correspondent(self):
self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 1) self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 1)
bulk_edit.set_correspondent( bulk_edit.set_correspondent(
[self.doc1.id, self.doc2.id, self.doc3.id], self.c2.id [self.doc1.id, self.doc2.id, self.doc3.id],
self.c2.id,
) )
self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 3) self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 3)
self.async_task.assert_called_once() self.async_task.assert_called_once()
@ -1345,7 +1420,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
def test_set_document_type(self): def test_set_document_type(self):
self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 1) self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 1)
bulk_edit.set_document_type( bulk_edit.set_document_type(
[self.doc1.id, self.doc2.id, self.doc3.id], self.dt2.id [self.doc1.id, self.doc2.id, self.doc3.id],
self.dt2.id,
) )
self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 3) self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 3)
self.async_task.assert_called_once() self.async_task.assert_called_once()
@ -1363,7 +1439,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
def test_add_tag(self): def test_add_tag(self):
self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 2) self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 2)
bulk_edit.add_tag( bulk_edit.add_tag(
[self.doc1.id, self.doc2.id, self.doc3.id, self.doc4.id], self.t1.id [self.doc1.id, self.doc2.id, self.doc3.id, self.doc4.id],
self.t1.id,
) )
self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 4) self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 4)
self.async_task.assert_called_once() self.async_task.assert_called_once()
@ -1415,7 +1492,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc1.id], "documents": [self.doc1.id],
"method": "set_correspondent", "method": "set_correspondent",
"parameters": {"correspondent": self.c1.id}, "parameters": {"correspondent": self.c1.id},
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1435,7 +1512,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc1.id], "documents": [self.doc1.id],
"method": "set_correspondent", "method": "set_correspondent",
"parameters": {"correspondent": None}, "parameters": {"correspondent": None},
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1455,7 +1532,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc1.id], "documents": [self.doc1.id],
"method": "set_document_type", "method": "set_document_type",
"parameters": {"document_type": self.dt1.id}, "parameters": {"document_type": self.dt1.id},
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1475,7 +1552,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc1.id], "documents": [self.doc1.id],
"method": "set_document_type", "method": "set_document_type",
"parameters": {"document_type": None}, "parameters": {"document_type": None},
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1495,7 +1572,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc1.id], "documents": [self.doc1.id],
"method": "add_tag", "method": "add_tag",
"parameters": {"tag": self.t1.id}, "parameters": {"tag": self.t1.id},
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1515,7 +1592,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc1.id], "documents": [self.doc1.id],
"method": "remove_tag", "method": "remove_tag",
"parameters": {"tag": self.t1.id}, "parameters": {"tag": self.t1.id},
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1538,7 +1615,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"add_tags": [self.t1.id], "add_tags": [self.t1.id],
"remove_tags": [self.t2.id], "remove_tags": [self.t2.id],
}, },
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1555,7 +1632,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
response = self.client.post( response = self.client.post(
"/api/documents/bulk_edit/", "/api/documents/bulk_edit/",
json.dumps( json.dumps(
{"documents": [self.doc1.id], "method": "delete", "parameters": {}} {"documents": [self.doc1.id], "method": "delete", "parameters": {}},
), ),
content_type="application/json", content_type="application/json",
) )
@ -1580,7 +1657,11 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
response = self.client.post( response = self.client.post(
"/api/documents/bulk_edit/", "/api/documents/bulk_edit/",
json.dumps( json.dumps(
{"documents": [self.doc2.id], "method": "exterminate", "parameters": {}} {
"documents": [self.doc2.id],
"method": "exterminate",
"parameters": {},
},
), ),
content_type="application/json", content_type="application/json",
) )
@ -1596,7 +1677,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id], "documents": [self.doc2.id],
"method": "set_correspondent", "method": "set_correspondent",
"parameters": {"correspondent": 345657}, "parameters": {"correspondent": 345657},
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1613,7 +1694,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id], "documents": [self.doc2.id],
"method": "set_correspondent", "method": "set_correspondent",
"parameters": {}, "parameters": {},
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1628,7 +1709,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id], "documents": [self.doc2.id],
"method": "set_document_type", "method": "set_document_type",
"parameters": {"document_type": 345657}, "parameters": {"document_type": 345657},
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1645,7 +1726,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id], "documents": [self.doc2.id],
"method": "set_document_type", "method": "set_document_type",
"parameters": {}, "parameters": {},
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1660,7 +1741,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id], "documents": [self.doc2.id],
"method": "add_tag", "method": "add_tag",
"parameters": {"tag": 345657}, "parameters": {"tag": 345657},
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1672,7 +1753,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
response = self.client.post( response = self.client.post(
"/api/documents/bulk_edit/", "/api/documents/bulk_edit/",
json.dumps( json.dumps(
{"documents": [self.doc2.id], "method": "add_tag", "parameters": {}} {"documents": [self.doc2.id], "method": "add_tag", "parameters": {}},
), ),
content_type="application/json", content_type="application/json",
) )
@ -1687,7 +1768,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id], "documents": [self.doc2.id],
"method": "remove_tag", "method": "remove_tag",
"parameters": {"tag": 345657}, "parameters": {"tag": 345657},
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1699,7 +1780,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
response = self.client.post( response = self.client.post(
"/api/documents/bulk_edit/", "/api/documents/bulk_edit/",
json.dumps( json.dumps(
{"documents": [self.doc2.id], "method": "remove_tag", "parameters": {}} {"documents": [self.doc2.id], "method": "remove_tag", "parameters": {}},
), ),
content_type="application/json", content_type="application/json",
) )
@ -1717,7 +1798,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"add_tags": [self.t2.id, 1657], "add_tags": [self.t2.id, 1657],
"remove_tags": [1123123], "remove_tags": [1123123],
}, },
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1731,7 +1812,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id], "documents": [self.doc2.id],
"method": "modify_tags", "method": "modify_tags",
"parameters": {"remove_tags": [1123123]}, "parameters": {"remove_tags": [1123123]},
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1744,7 +1825,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id], "documents": [self.doc2.id],
"method": "modify_tags", "method": "modify_tags",
"parameters": {"add_tags": [self.t2.id, 1657]}, "parameters": {"add_tags": [self.t2.id, 1657]},
} },
), ),
content_type="application/json", content_type="application/json",
) )
@ -1774,7 +1855,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
response = self.client.post( response = self.client.post(
"/api/documents/selection_data/", "/api/documents/selection_data/",
json.dumps( json.dumps(
{"documents": [self.doc1.id, self.doc2.id, self.doc4.id, self.doc5.id]} {"documents": [self.doc1.id, self.doc2.id, self.doc4.id, self.doc5.id]},
), ),
content_type="application/json", content_type="application/json",
) )
@ -1856,7 +1937,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
response = self.client.post( response = self.client.post(
"/api/documents/bulk_download/", "/api/documents/bulk_download/",
json.dumps( json.dumps(
{"documents": [self.doc2.id, self.doc3.id], "content": "originals"} {"documents": [self.doc2.id, self.doc3.id], "content": "originals"},
), ),
content_type="application/json", content_type="application/json",
) )
@ -1914,17 +1995,20 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
with self.doc2.source_file as f: with self.doc2.source_file as f:
self.assertEqual( self.assertEqual(
f.read(), zipf.read("originals/2021-01-01 document A.pdf") f.read(),
zipf.read("originals/2021-01-01 document A.pdf"),
) )
with self.doc3.archive_file as f: with self.doc3.archive_file as f:
self.assertEqual( self.assertEqual(
f.read(), zipf.read("archive/2020-03-21 document B.pdf") f.read(),
zipf.read("archive/2020-03-21 document B.pdf"),
) )
with self.doc3.source_file as f: with self.doc3.source_file as f:
self.assertEqual( self.assertEqual(
f.read(), zipf.read("originals/2020-03-21 document B.jpg") f.read(),
zipf.read("originals/2020-03-21 document B.jpg"),
) )
def test_filename_clashes(self): def test_filename_clashes(self):
@ -1953,7 +2037,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
response = self.client.post( response = self.client.post(
"/api/documents/bulk_download/", "/api/documents/bulk_download/",
json.dumps( json.dumps(
{"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"} {"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"},
), ),
content_type="application/json", content_type="application/json",
) )
@ -1968,13 +2052,16 @@ class TestApiAuth(APITestCase):
self.assertEqual(self.client.get(f"/api/documents/{d.id}/").status_code, 401) self.assertEqual(self.client.get(f"/api/documents/{d.id}/").status_code, 401)
self.assertEqual( self.assertEqual(
self.client.get(f"/api/documents/{d.id}/download/").status_code, 401 self.client.get(f"/api/documents/{d.id}/download/").status_code,
401,
) )
self.assertEqual( self.assertEqual(
self.client.get(f"/api/documents/{d.id}/preview/").status_code, 401 self.client.get(f"/api/documents/{d.id}/preview/").status_code,
401,
) )
self.assertEqual( self.assertEqual(
self.client.get(f"/api/documents/{d.id}/thumb/").status_code, 401 self.client.get(f"/api/documents/{d.id}/thumb/").status_code,
401,
) )
self.assertEqual(self.client.get("/api/tags/").status_code, 401) self.assertEqual(self.client.get("/api/tags/").status_code, 401)
@ -1987,10 +2074,12 @@ class TestApiAuth(APITestCase):
self.assertEqual(self.client.get("/api/search/autocomplete/").status_code, 401) self.assertEqual(self.client.get("/api/search/autocomplete/").status_code, 401)
self.assertEqual(self.client.get("/api/documents/bulk_edit/").status_code, 401) self.assertEqual(self.client.get("/api/documents/bulk_edit/").status_code, 401)
self.assertEqual( self.assertEqual(
self.client.get("/api/documents/bulk_download/").status_code, 401 self.client.get("/api/documents/bulk_download/").status_code,
401,
) )
self.assertEqual( self.assertEqual(
self.client.get("/api/documents/selection_data/").status_code, 401 self.client.get("/api/documents/selection_data/").status_code,
401,
) )
def test_api_version_no_auth(self): def test_api_version_no_auth(self):

View File

@ -4,10 +4,11 @@ from unittest import mock
from django.core.checks import Error from django.core.checks import Error
from django.test import TestCase from django.test import TestCase
from .factories import DocumentFactory from ..checks import changed_password_check
from .. import document_consumer_declaration from ..checks import parser_check
from ..checks import changed_password_check, parser_check
from ..models import Document from ..models import Document
from ..signals import document_consumer_declaration
from .factories import DocumentFactory
class ChecksTestCase(TestCase): class ChecksTestCase(TestCase):
@ -30,7 +31,7 @@ class ChecksTestCase(TestCase):
[ [
Error( Error(
"No parsers found. This is a bug. The consumer won't be " "No parsers found. This is a bug. The consumer won't be "
"able to consume any documents without parsers." "able to consume any documents without parsers.",
) ),
], ],
) )

View File

@ -5,14 +5,15 @@ from unittest import mock
import pytest import pytest
from django.conf import settings from django.conf import settings
from django.test import TestCase, override_settings from django.test import override_settings
from django.test import TestCase
from documents.classifier import ( from documents.classifier import DocumentClassifier
DocumentClassifier, from documents.classifier import IncompatibleClassifierVersionError
IncompatibleClassifierVersionError, from documents.classifier import load_classifier
load_classifier, from documents.models import Correspondent
) from documents.models import Document
from documents.models import Correspondent, Document, Tag, DocumentType from documents.models import DocumentType
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
@ -23,26 +24,37 @@ class TestClassifier(DirectoriesMixin, TestCase):
def generate_test_data(self): def generate_test_data(self):
self.c1 = Correspondent.objects.create( self.c1 = Correspondent.objects.create(
name="c1", matching_algorithm=Correspondent.MATCH_AUTO name="c1",
matching_algorithm=Correspondent.MATCH_AUTO,
) )
self.c2 = Correspondent.objects.create(name="c2") self.c2 = Correspondent.objects.create(name="c2")
self.c3 = Correspondent.objects.create( self.c3 = Correspondent.objects.create(
name="c3", matching_algorithm=Correspondent.MATCH_AUTO name="c3",
matching_algorithm=Correspondent.MATCH_AUTO,
) )
self.t1 = Tag.objects.create( self.t1 = Tag.objects.create(
name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12 name="t1",
matching_algorithm=Tag.MATCH_AUTO,
pk=12,
) )
self.t2 = Tag.objects.create( self.t2 = Tag.objects.create(
name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True name="t2",
matching_algorithm=Tag.MATCH_ANY,
pk=34,
is_inbox_tag=True,
) )
self.t3 = Tag.objects.create( self.t3 = Tag.objects.create(
name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45 name="t3",
matching_algorithm=Tag.MATCH_AUTO,
pk=45,
) )
self.dt = DocumentType.objects.create( self.dt = DocumentType.objects.create(
name="dt", matching_algorithm=DocumentType.MATCH_AUTO name="dt",
matching_algorithm=DocumentType.MATCH_AUTO,
) )
self.dt2 = DocumentType.objects.create( self.dt2 = DocumentType.objects.create(
name="dt2", matching_algorithm=DocumentType.MATCH_AUTO name="dt2",
matching_algorithm=DocumentType.MATCH_AUTO,
) )
self.doc1 = Document.objects.create( self.doc1 = Document.objects.create(
@ -59,7 +71,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
checksum="B", checksum="B",
) )
self.doc_inbox = Document.objects.create( self.doc_inbox = Document.objects.create(
title="doc235", content="aa", checksum="C" title="doc235",
content="aa",
checksum="C",
) )
self.doc1.tags.add(self.t1) self.doc1.tags.add(self.t1)
@ -90,27 +104,33 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.generate_test_data() self.generate_test_data()
self.classifier.train() self.classifier.train()
self.assertListEqual( self.assertListEqual(
list(self.classifier.correspondent_classifier.classes_), [-1, self.c1.pk] list(self.classifier.correspondent_classifier.classes_),
[-1, self.c1.pk],
) )
self.assertListEqual( self.assertListEqual(
list(self.classifier.tags_binarizer.classes_), [self.t1.pk, self.t3.pk] list(self.classifier.tags_binarizer.classes_),
[self.t1.pk, self.t3.pk],
) )
def testPredict(self): def testPredict(self):
self.generate_test_data() self.generate_test_data()
self.classifier.train() self.classifier.train()
self.assertEqual( self.assertEqual(
self.classifier.predict_correspondent(self.doc1.content), self.c1.pk self.classifier.predict_correspondent(self.doc1.content),
self.c1.pk,
) )
self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None) self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
self.assertListEqual( self.assertListEqual(
self.classifier.predict_tags(self.doc1.content), [self.t1.pk] self.classifier.predict_tags(self.doc1.content),
[self.t1.pk],
) )
self.assertListEqual( self.assertListEqual(
self.classifier.predict_tags(self.doc2.content), [self.t1.pk, self.t3.pk] self.classifier.predict_tags(self.doc2.content),
[self.t1.pk, self.t3.pk],
) )
self.assertEqual( self.assertEqual(
self.classifier.predict_document_type(self.doc1.content), self.dt.pk self.classifier.predict_document_type(self.doc1.content),
self.dt.pk,
) )
self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None) self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
@ -133,7 +153,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
current_ver = DocumentClassifier.FORMAT_VERSION current_ver = DocumentClassifier.FORMAT_VERSION
with mock.patch( with mock.patch(
"documents.classifier.DocumentClassifier.FORMAT_VERSION", current_ver + 1 "documents.classifier.DocumentClassifier.FORMAT_VERSION",
current_ver + 1,
): ):
# assure that we won't load old classifiers. # assure that we won't load old classifiers.
self.assertRaises(IncompatibleClassifierVersionError, classifier2.load) self.assertRaises(IncompatibleClassifierVersionError, classifier2.load)
@ -157,7 +178,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertFalse(new_classifier.train()) self.assertFalse(new_classifier.train())
@override_settings( @override_settings(
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle") MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
) )
def test_load_and_classify(self): def test_load_and_classify(self):
self.generate_test_data() self.generate_test_data()
@ -169,7 +190,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
def test_one_correspondent_predict(self): def test_one_correspondent_predict(self):
c1 = Correspondent.objects.create( c1 = Correspondent.objects.create(
name="c1", matching_algorithm=Correspondent.MATCH_AUTO name="c1",
matching_algorithm=Correspondent.MATCH_AUTO,
) )
doc1 = Document.objects.create( doc1 = Document.objects.create(
title="doc1", title="doc1",
@ -183,7 +205,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
def test_one_correspondent_predict_manydocs(self): def test_one_correspondent_predict_manydocs(self):
c1 = Correspondent.objects.create( c1 = Correspondent.objects.create(
name="c1", matching_algorithm=Correspondent.MATCH_AUTO name="c1",
matching_algorithm=Correspondent.MATCH_AUTO,
) )
doc1 = Document.objects.create( doc1 = Document.objects.create(
title="doc1", title="doc1",
@ -192,7 +215,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
checksum="A", checksum="A",
) )
doc2 = Document.objects.create( doc2 = Document.objects.create(
title="doc2", content="this is a document from noone", checksum="B" title="doc2",
content="this is a document from noone",
checksum="B",
) )
self.classifier.train() self.classifier.train()
@ -201,7 +226,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
def test_one_type_predict(self): def test_one_type_predict(self):
dt = DocumentType.objects.create( dt = DocumentType.objects.create(
name="dt", matching_algorithm=DocumentType.MATCH_AUTO name="dt",
matching_algorithm=DocumentType.MATCH_AUTO,
) )
doc1 = Document.objects.create( doc1 = Document.objects.create(
@ -216,7 +242,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
def test_one_type_predict_manydocs(self): def test_one_type_predict_manydocs(self):
dt = DocumentType.objects.create( dt = DocumentType.objects.create(
name="dt", matching_algorithm=DocumentType.MATCH_AUTO name="dt",
matching_algorithm=DocumentType.MATCH_AUTO,
) )
doc1 = Document.objects.create( doc1 = Document.objects.create(
@ -227,7 +254,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
) )
doc2 = Document.objects.create( doc2 = Document.objects.create(
title="doc1", content="this is a document from c2", checksum="B" title="doc1",
content="this is a document from c2",
checksum="B",
) )
self.classifier.train() self.classifier.train()
@ -238,7 +267,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12) t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
doc1 = Document.objects.create( doc1 = Document.objects.create(
title="doc1", content="this is a document from c1", checksum="A" title="doc1",
content="this is a document from c1",
checksum="A",
) )
doc1.tags.add(t1) doc1.tags.add(t1)
@ -249,7 +280,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12) t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
doc1 = Document.objects.create( doc1 = Document.objects.create(
title="doc1", content="this is a document from c1", checksum="A" title="doc1",
content="this is a document from c1",
checksum="A",
) )
self.classifier.train() self.classifier.train()
@ -260,7 +293,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121) t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
doc4 = Document.objects.create( doc4 = Document.objects.create(
title="doc1", content="this is a document from c4", checksum="D" title="doc1",
content="this is a document from c4",
checksum="D",
) )
doc4.tags.add(t1) doc4.tags.add(t1)
@ -273,16 +308,24 @@ class TestClassifier(DirectoriesMixin, TestCase):
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121) t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
doc1 = Document.objects.create( doc1 = Document.objects.create(
title="doc1", content="this is a document from c1", checksum="A" title="doc1",
content="this is a document from c1",
checksum="A",
) )
doc2 = Document.objects.create( doc2 = Document.objects.create(
title="doc1", content="this is a document from c2", checksum="B" title="doc1",
content="this is a document from c2",
checksum="B",
) )
doc3 = Document.objects.create( doc3 = Document.objects.create(
title="doc1", content="this is a document from c3", checksum="C" title="doc1",
content="this is a document from c3",
checksum="C",
) )
doc4 = Document.objects.create( doc4 = Document.objects.create(
title="doc1", content="this is a document from c4", checksum="D" title="doc1",
content="this is a document from c4",
checksum="D",
) )
doc1.tags.add(t1) doc1.tags.add(t1)
@ -300,10 +343,14 @@ class TestClassifier(DirectoriesMixin, TestCase):
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12) t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
doc1 = Document.objects.create( doc1 = Document.objects.create(
title="doc1", content="this is a document from c1", checksum="A" title="doc1",
content="this is a document from c1",
checksum="A",
) )
doc2 = Document.objects.create( doc2 = Document.objects.create(
title="doc2", content="this is a document from c2", checksum="B" title="doc2",
content="this is a document from c2",
checksum="B",
) )
doc1.tags.add(t1) doc1.tags.add(t1)
@ -316,10 +363,14 @@ class TestClassifier(DirectoriesMixin, TestCase):
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12) t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
doc1 = Document.objects.create( doc1 = Document.objects.create(
title="doc1", content="this is a document from c1", checksum="A" title="doc1",
content="this is a document from c1",
checksum="A",
) )
doc2 = Document.objects.create( doc2 = Document.objects.create(
title="doc2", content="this is a document from c2", checksum="B" title="doc2",
content="this is a document from c2",
checksum="B",
) )
doc1.tags.add(t1) doc1.tags.add(t1)
@ -338,13 +389,15 @@ class TestClassifier(DirectoriesMixin, TestCase):
load.assert_called_once() load.assert_called_once()
@override_settings( @override_settings(
CACHES={"default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}} CACHES={
"default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"},
},
) )
@override_settings( @override_settings(
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle") MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
) )
@pytest.mark.skip( @pytest.mark.skip(
reason="Disabled caching due to high memory usage - need to investigate." reason="Disabled caching due to high memory usage - need to investigate.",
) )
def test_load_classifier_cached(self): def test_load_classifier_cached(self):
classifier = load_classifier() classifier = load_classifier()

View File

@ -6,13 +6,20 @@ from unittest import mock
from unittest.mock import MagicMock from unittest.mock import MagicMock
from django.conf import settings from django.conf import settings
from django.test import TestCase, override_settings from django.test import override_settings
from django.test import TestCase
from .utils import DirectoriesMixin from ..consumer import Consumer
from ..consumer import Consumer, ConsumerError from ..consumer import ConsumerError
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document from ..models import Correspondent
from ..parsers import DocumentParser, ParseError from ..models import Document
from ..models import DocumentType
from ..models import FileInfo
from ..models import Tag
from ..parsers import DocumentParser
from ..parsers import ParseError
from ..tasks import sanity_check from ..tasks import sanity_check
from .utils import DirectoriesMixin
class TestAttributes(TestCase): class TestAttributes(TestCase):
@ -33,12 +40,18 @@ class TestAttributes(TestCase):
def test_guess_attributes_from_name_when_title_starts_with_dash(self): def test_guess_attributes_from_name_when_title_starts_with_dash(self):
self._test_guess_attributes_from_name( self._test_guess_attributes_from_name(
"- weird but should not break.pdf", None, "- weird but should not break", () "- weird but should not break.pdf",
None,
"- weird but should not break",
(),
) )
def test_guess_attributes_from_name_when_title_ends_with_dash(self): def test_guess_attributes_from_name_when_title_ends_with_dash(self):
self._test_guess_attributes_from_name( self._test_guess_attributes_from_name(
"weird but should not break -.pdf", None, "weird but should not break -", () "weird but should not break -.pdf",
None,
"weird but should not break -",
(),
) )
@ -53,7 +66,12 @@ class TestFieldPermutations(TestCase):
valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"] valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"]
def _test_guessed_attributes( def _test_guessed_attributes(
self, filename, created=None, correspondent=None, title=None, tags=None self,
filename,
created=None,
correspondent=None,
title=None,
tags=None,
): ):
info = FileInfo.from_filename(filename) info = FileInfo.from_filename(filename)
@ -131,7 +149,7 @@ class TestFieldPermutations(TestCase):
FILENAME_PARSE_TRANSFORMS=[ FILENAME_PARSE_TRANSFORMS=[
(all_patt, "all.gif"), (all_patt, "all.gif"),
(all_patt, "anotherall.gif"), (all_patt, "anotherall.gif"),
] ],
): ):
info = FileInfo.from_filename(filename) info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "all") self.assertEqual(info.title, "all")
@ -141,7 +159,7 @@ class TestFieldPermutations(TestCase):
FILENAME_PARSE_TRANSFORMS=[ FILENAME_PARSE_TRANSFORMS=[
(none_patt, "none.gif"), (none_patt, "none.gif"),
(all_patt, "anotherall.gif"), (all_patt, "anotherall.gif"),
] ],
): ):
info = FileInfo.from_filename(filename) info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "anotherall") self.assertEqual(info.title, "anotherall")
@ -238,7 +256,9 @@ class TestConsumer(DirectoriesMixin, TestCase):
def make_dummy_parser(self, logging_group, progress_callback=None): def make_dummy_parser(self, logging_group, progress_callback=None):
return DummyParser( return DummyParser(
logging_group, self.dirs.scratch_dir, self.get_test_archive_file() logging_group,
self.dirs.scratch_dir,
self.get_test_archive_file(),
) )
def make_faulty_parser(self, logging_group, progress_callback=None): def make_faulty_parser(self, logging_group, progress_callback=None):
@ -257,7 +277,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
"mime_types": {"application/pdf": ".pdf"}, "mime_types": {"application/pdf": ".pdf"},
"weight": 0, "weight": 0,
}, },
) ),
] ]
self.addCleanup(patcher.stop) self.addCleanup(patcher.stop)
@ -282,7 +302,11 @@ class TestConsumer(DirectoriesMixin, TestCase):
def get_test_archive_file(self): def get_test_archive_file(self):
src = os.path.join( src = os.path.join(
os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf" os.path.dirname(__file__),
"samples",
"documents",
"archive",
"0000001.pdf",
) )
dst = os.path.join(self.dirs.scratch_dir, "sample_archive.pdf") dst = os.path.join(self.dirs.scratch_dir, "sample_archive.pdf")
shutil.copy(src, dst) shutil.copy(src, dst)
@ -296,7 +320,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertEqual(document.content, "The Text") self.assertEqual(document.content, "The Text")
self.assertEqual( self.assertEqual(
document.title, os.path.splitext(os.path.basename(filename))[0] document.title,
os.path.splitext(os.path.basename(filename))[0],
) )
self.assertIsNone(document.correspondent) self.assertIsNone(document.correspondent)
self.assertIsNone(document.document_type) self.assertIsNone(document.document_type)
@ -339,7 +364,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
override_filename = "Statement for November.pdf" override_filename = "Statement for November.pdf"
document = self.consumer.try_consume_file( document = self.consumer.try_consume_file(
filename, override_filename=override_filename filename,
override_filename=override_filename,
) )
self.assertEqual(document.title, "Statement for November") self.assertEqual(document.title, "Statement for November")
@ -348,7 +374,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
def testOverrideTitle(self): def testOverrideTitle(self):
document = self.consumer.try_consume_file( document = self.consumer.try_consume_file(
self.get_test_file(), override_title="Override Title" self.get_test_file(),
override_title="Override Title",
) )
self.assertEqual(document.title, "Override Title") self.assertEqual(document.title, "Override Title")
self._assert_first_last_send_progress() self._assert_first_last_send_progress()
@ -357,7 +384,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
c = Correspondent.objects.create(name="test") c = Correspondent.objects.create(name="test")
document = self.consumer.try_consume_file( document = self.consumer.try_consume_file(
self.get_test_file(), override_correspondent_id=c.pk self.get_test_file(),
override_correspondent_id=c.pk,
) )
self.assertEqual(document.correspondent.id, c.id) self.assertEqual(document.correspondent.id, c.id)
self._assert_first_last_send_progress() self._assert_first_last_send_progress()
@ -366,7 +394,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
dt = DocumentType.objects.create(name="test") dt = DocumentType.objects.create(name="test")
document = self.consumer.try_consume_file( document = self.consumer.try_consume_file(
self.get_test_file(), override_document_type_id=dt.pk self.get_test_file(),
override_document_type_id=dt.pk,
) )
self.assertEqual(document.document_type.id, dt.id) self.assertEqual(document.document_type.id, dt.id)
self._assert_first_last_send_progress() self._assert_first_last_send_progress()
@ -376,7 +405,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
t2 = Tag.objects.create(name="t2") t2 = Tag.objects.create(name="t2")
t3 = Tag.objects.create(name="t3") t3 = Tag.objects.create(name="t3")
document = self.consumer.try_consume_file( document = self.consumer.try_consume_file(
self.get_test_file(), override_tag_ids=[t1.id, t3.id] self.get_test_file(),
override_tag_ids=[t1.id, t3.id],
) )
self.assertIn(t1, document.tags.all()) self.assertIn(t1, document.tags.all())
@ -446,7 +476,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
"mime_types": {"application/pdf": ".pdf"}, "mime_types": {"application/pdf": ".pdf"},
"weight": 0, "weight": 0,
}, },
) ),
] ]
self.assertRaisesMessage( self.assertRaisesMessage(
@ -595,16 +625,16 @@ class TestConsumer(DirectoriesMixin, TestCase):
"mime_types": {"application/pdf": ".pdf", "image/png": ".png"}, "mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
"weight": 0, "weight": 0,
}, },
) ),
] ]
doc1 = self.consumer.try_consume_file( doc1 = self.consumer.try_consume_file(
os.path.join(settings.CONSUMPTION_DIR, "simple.png") os.path.join(settings.CONSUMPTION_DIR, "simple.png"),
) )
doc2 = self.consumer.try_consume_file( doc2 = self.consumer.try_consume_file(
os.path.join(settings.CONSUMPTION_DIR, "simple.pdf") os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"),
) )
doc3 = self.consumer.try_consume_file( doc3 = self.consumer.try_consume_file(
os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf") os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"),
) )
self.assertEqual(doc1.filename, "simple.png") self.assertEqual(doc1.filename, "simple.png")
@ -691,7 +721,9 @@ class PostConsumeTestCase(TestCase):
with override_settings(POST_CONSUME_SCRIPT=script.name): with override_settings(POST_CONSUME_SCRIPT=script.name):
c = Correspondent.objects.create(name="my_bank") c = Correspondent.objects.create(name="my_bank")
doc = Document.objects.create( doc = Document.objects.create(
title="Test", mime_type="application/pdf", correspondent=c title="Test",
mime_type="application/pdf",
correspondent=c,
) )
tag1 = Tag.objects.create(name="a") tag1 = Tag.objects.create(name="a")
tag2 = Tag.objects.create(name="b") tag2 = Tag.objects.create(name="b")

View File

@ -5,15 +5,16 @@ from uuid import uuid4
from dateutil import tz from dateutil import tz
from django.conf import settings from django.conf import settings
from django.test import TestCase, override_settings from django.test import override_settings
from django.test import TestCase
from documents.parsers import parse_date from documents.parsers import parse_date
class TestDate(TestCase): class TestDate(TestCase):
SAMPLE_FILES = os.path.join( SAMPLE_FILES = os.path.join(
os.path.dirname(__file__), "../../paperless_tesseract/tests/samples" os.path.dirname(__file__),
"../../paperless_tesseract/tests/samples",
) )
SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8]) SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
@ -111,11 +112,11 @@ class TestDate(TestCase):
@override_settings(FILENAME_DATE_ORDER="YMD") @override_settings(FILENAME_DATE_ORDER="YMD")
def test_filename_date_parse_invalid(self, *args): def test_filename_date_parse_invalid(self, *args):
self.assertIsNone( self.assertIsNone(
parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here") parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"),
) )
@override_settings( @override_settings(
IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)) IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)),
) )
def test_ignored_dates(self, *args): def test_ignored_dates(self, *args):
text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem " "ipsum" text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem " "ipsum"

View File

@ -3,10 +3,12 @@ import tempfile
from pathlib import Path from pathlib import Path
from unittest import mock from unittest import mock
from django.test import TestCase, override_settings from django.test import override_settings
from django.test import TestCase
from django.utils import timezone from django.utils import timezone
from ..models import Document, Correspondent from ..models import Correspondent
from ..models import Document
class TestDocument(TestCase): class TestDocument(TestCase):

View File

@ -9,17 +9,19 @@ from unittest import mock
from django.conf import settings from django.conf import settings
from django.db import DatabaseError from django.db import DatabaseError
from django.test import TestCase, override_settings from django.test import override_settings
from django.test import TestCase
from django.utils import timezone from django.utils import timezone
from ..file_handling import create_source_path_directory
from ..file_handling import delete_empty_directories
from ..file_handling import generate_filename
from ..file_handling import generate_unique_filename
from ..models import Correspondent
from ..models import Document
from ..models import DocumentType
from ..models import Tag
from .utils import DirectoriesMixin from .utils import DirectoriesMixin
from ..file_handling import (
generate_filename,
create_source_path_directory,
delete_empty_directories,
generate_unique_filename,
)
from ..models import Document, Correspondent, Tag, DocumentType
class TestFileHandling(DirectoriesMixin, TestCase): class TestFileHandling(DirectoriesMixin, TestCase):
@ -34,7 +36,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.storage_type = Document.STORAGE_TYPE_GPG document.storage_type = Document.STORAGE_TYPE_GPG
self.assertEqual( self.assertEqual(
generate_filename(document), "{:07d}.pdf.gpg".format(document.pk) generate_filename(document),
"{:07d}.pdf.gpg".format(document.pk),
) )
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@ -75,7 +78,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual( self.assertEqual(
os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"),
True,
) )
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@ -93,7 +97,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Test source_path # Test source_path
self.assertEqual( self.assertEqual(
document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf" document.source_path,
settings.ORIGINALS_DIR + "/none/none.pdf",
) )
# Make the folder read- and execute-only (no writing and no renaming) # Make the folder read- and execute-only (no writing and no renaming)
@ -105,7 +110,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Check proper handling of files # Check proper handling of files
self.assertEqual( self.assertEqual(
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
True,
) )
self.assertEqual(document.filename, "none/none.pdf") self.assertEqual(document.filename, "none/none.pdf")
@ -145,7 +151,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Check proper handling of files # Check proper handling of files
self.assertTrue(os.path.isfile(document.source_path)) self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual( self.assertEqual(
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
True,
) )
self.assertEqual(document.filename, "none/none.pdf") self.assertEqual(document.filename, "none/none.pdf")
@ -167,7 +174,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
pk = document.pk pk = document.pk
document.delete() document.delete()
self.assertEqual( self.assertEqual(
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
False,
) )
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
@ -192,7 +200,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none/none.pdf"), False) self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none/none.pdf"), False)
document.delete() document.delete()
self.assertEqual( self.assertEqual(
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
False,
) )
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none.pdf"), True) self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none.pdf"), True)
@ -363,7 +372,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf") self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf")
doc = Document.objects.create( doc = Document.objects.create(
title="doc2", checksum="B", mime_type="application/pdf" title="doc2",
checksum="B",
mime_type="application/pdf",
) )
self.assertEqual(generate_filename(doc), "doc2.pdf") self.assertEqual(generate_filename(doc), "doc2.pdf")
@ -380,12 +391,14 @@ class TestFileHandling(DirectoriesMixin, TestCase):
) )
@override_settings( @override_settings(
PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}" PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}",
) )
def test_created_year_month_day(self): def test_created_year_month_day(self):
d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1)) d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1))
doc1 = Document.objects.create( doc1 = Document.objects.create(
title="doc1", mime_type="application/pdf", created=d1 title="doc1",
mime_type="application/pdf",
created=d1,
) )
self.assertEqual(generate_filename(doc1), "2020-03-06.pdf") self.assertEqual(generate_filename(doc1), "2020-03-06.pdf")
@ -395,12 +408,14 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf") self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
@override_settings( @override_settings(
PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}" PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}",
) )
def test_added_year_month_day(self): def test_added_year_month_day(self):
d1 = timezone.make_aware(datetime.datetime(232, 1, 9, 1, 1, 1)) d1 = timezone.make_aware(datetime.datetime(232, 1, 9, 1, 1, 1))
doc1 = Document.objects.create( doc1 = Document.objects.create(
title="doc1", mime_type="application/pdf", added=d1 title="doc1",
mime_type="application/pdf",
added=d1,
) )
self.assertEqual(generate_filename(doc1), "232-01-09.pdf") self.assertEqual(generate_filename(doc1), "232-01-09.pdf")
@ -410,7 +425,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf") self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
@override_settings( @override_settings(
PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}" PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}",
) )
def test_nested_directory_cleanup(self): def test_nested_directory_cleanup(self):
document = Document() document = Document()
@ -431,7 +446,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.delete() document.delete()
self.assertEqual( self.assertEqual(
os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"),
False,
) )
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
@ -456,7 +472,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
os.makedirs(os.path.join(tmp, "notempty", "empty")) os.makedirs(os.path.join(tmp, "notempty", "empty"))
delete_empty_directories( delete_empty_directories(
os.path.join(tmp, "notempty", "empty"), root=settings.ORIGINALS_DIR os.path.join(tmp, "notempty", "empty"),
root=settings.ORIGINALS_DIR,
) )
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True) self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
self.assertEqual(os.path.isfile(os.path.join(tmp, "notempty", "file")), True) self.assertEqual(os.path.isfile(os.path.join(tmp, "notempty", "file")), True)
@ -483,10 +500,16 @@ class TestFileHandling(DirectoriesMixin, TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}") @override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_duplicates(self): def test_duplicates(self):
document = Document.objects.create( document = Document.objects.create(
mime_type="application/pdf", title="qwe", checksum="A", pk=1 mime_type="application/pdf",
title="qwe",
checksum="A",
pk=1,
) )
document2 = Document.objects.create( document2 = Document.objects.create(
mime_type="application/pdf", title="qwe", checksum="B", pk=2 mime_type="application/pdf",
title="qwe",
checksum="B",
pk=2,
) )
Path(document.source_path).touch() Path(document.source_path).touch()
Path(document2.source_path).touch() Path(document2.source_path).touch()
@ -584,10 +607,12 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path)) self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path)) self.assertTrue(os.path.isfile(doc.archive_path))
self.assertEqual( self.assertEqual(
doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf") doc.source_path,
os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"),
) )
self.assertEqual( self.assertEqual(
doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf") doc.archive_path,
os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"),
) )
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@ -851,7 +876,10 @@ class TestFilenameGeneration(TestCase):
def test_invalid_characters(self): def test_invalid_characters(self):
doc = Document.objects.create( doc = Document.objects.create(
title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1" title="This. is the title.",
mime_type="application/pdf",
pk=1,
checksum="1",
) )
self.assertEqual(generate_filename(doc), "This. is the title.pdf") self.assertEqual(generate_filename(doc), "This. is the title.pdf")
@ -877,7 +905,9 @@ class TestFilenameGeneration(TestCase):
def run(): def run():
doc = Document.objects.create( doc = Document.objects.create(
checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow" checksum=str(uuid.uuid4()),
title=str(uuid.uuid4()),
content="wow",
) )
doc.filename = generate_unique_filename(doc) doc.filename = generate_unique_filename(doc)
Path(doc.thumbnail_path).touch() Path(doc.thumbnail_path).touch()

View File

@ -1,7 +1,7 @@
from django.core.management.base import CommandError from django.core.management.base import CommandError
from django.test import TestCase from django.test import TestCase
from documents.settings import EXPORTER_FILE_NAME from documents.settings import EXPORTER_FILE_NAME
from ..management.commands.document_importer import Command from ..management.commands.document_importer import Command
@ -12,7 +12,9 @@ class TestImporter(TestCase):
def test_check_manifest_exists(self): def test_check_manifest_exists(self):
cmd = Command() cmd = Command()
self.assertRaises( self.assertRaises(
CommandError, cmd._check_manifest_exists, "/tmp/manifest.json" CommandError,
cmd._check_manifest_exists,
"/tmp/manifest.json",
) )
def test_check_manifest(self): def test_check_manifest(self):
@ -26,11 +28,11 @@ class TestImporter(TestCase):
self.assertTrue("The manifest file contains a record" in str(cm.exception)) self.assertTrue("The manifest file contains a record" in str(cm.exception))
cmd.manifest = [ cmd.manifest = [
{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"} {"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"},
] ]
# self.assertRaises(CommandError, cmd._check_manifest) # self.assertRaises(CommandError, cmd._check_manifest)
with self.assertRaises(CommandError) as cm: with self.assertRaises(CommandError) as cm:
cmd._check_manifest() cmd._check_manifest()
self.assertTrue( self.assertTrue(
'The manifest file refers to "noexist.pdf"' in str(cm.exception) 'The manifest file refers to "noexist.pdf"' in str(cm.exception),
) )

View File

@ -1,5 +1,4 @@
from django.test import TestCase from django.test import TestCase
from documents import index from documents import index
from documents.models import Document from documents.models import Document
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
@ -9,7 +8,9 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
def test_auto_complete(self): def test_auto_complete(self):
doc1 = Document.objects.create( doc1 = Document.objects.create(
title="doc1", checksum="A", content="test test2 test3" title="doc1",
checksum="A",
content="test test2 test3",
) )
doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2") doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2")
doc3 = Document.objects.create(title="doc3", checksum="C", content="test2") doc3 = Document.objects.create(title="doc3", checksum="C", content="test2")
@ -21,10 +22,12 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
ix = index.open_index() ix = index.open_index()
self.assertListEqual( self.assertListEqual(
index.autocomplete(ix, "tes"), [b"test3", b"test", b"test2"] index.autocomplete(ix, "tes"),
[b"test3", b"test", b"test2"],
) )
self.assertListEqual( self.assertListEqual(
index.autocomplete(ix, "tes", limit=3), [b"test3", b"test", b"test2"] index.autocomplete(ix, "tes", limit=3),
[b"test3", b"test", b"test2"],
) )
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"]) self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), []) self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])

View File

@ -1,16 +1,14 @@
import hashlib
import tempfile
import filecmp import filecmp
import hashlib
import os import os
import shutil import shutil
import tempfile
from pathlib import Path from pathlib import Path
from unittest import mock from unittest import mock
from django.test import TestCase, override_settings
from django.core.management import call_command from django.core.management import call_command
from django.test import override_settings
from django.test import TestCase
from documents.file_handling import generate_filename from documents.file_handling import generate_filename
from documents.management.commands.document_archiver import handle_document from documents.management.commands.document_archiver import handle_document
from documents.models import Document from documents.models import Document
@ -34,7 +32,8 @@ class TestArchiver(DirectoriesMixin, TestCase):
doc = self.make_models() doc = self.make_models()
shutil.copy( shutil.copy(
sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf") sample_file,
os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
) )
call_command("document_archiver") call_command("document_archiver")
@ -43,7 +42,8 @@ class TestArchiver(DirectoriesMixin, TestCase):
doc = self.make_models() doc = self.make_models()
shutil.copy( shutil.copy(
sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf") sample_file,
os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
) )
handle_document(doc.pk) handle_document(doc.pk)
@ -90,7 +90,8 @@ class TestArchiver(DirectoriesMixin, TestCase):
) )
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf")) shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf"))
shutil.copy( shutil.copy(
sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf") sample_file,
os.path.join(self.dirs.originals_dir, f"document_01.pdf"),
) )
handle_document(doc2.pk) handle_document(doc2.pk)
@ -120,7 +121,9 @@ class TestDecryptDocuments(TestCase):
os.makedirs(thumb_dir, exist_ok=True) os.makedirs(thumb_dir, exist_ok=True)
override_settings( override_settings(
ORIGINALS_DIR=originals_dir, THUMBNAIL_DIR=thumb_dir, PASSPHRASE="test" ORIGINALS_DIR=originals_dir,
THUMBNAIL_DIR=thumb_dir,
PASSPHRASE="test",
).enable() ).enable()
doc = Document.objects.create( doc = Document.objects.create(
@ -206,7 +209,7 @@ class TestRenamer(DirectoriesMixin, TestCase):
class TestCreateClassifier(TestCase): class TestCreateClassifier(TestCase):
@mock.patch( @mock.patch(
"documents.management.commands.document_create_classifier.train_classifier" "documents.management.commands.document_create_classifier.train_classifier",
) )
def test_create_classifier(self, m): def test_create_classifier(self, m):
call_command("document_create_classifier") call_command("document_create_classifier")
@ -224,7 +227,10 @@ class TestSanityChecker(DirectoriesMixin, TestCase):
def test_errors(self): def test_errors(self):
doc = Document.objects.create( doc = Document.objects.create(
title="test", content="test", filename="test.pdf", checksum="abc" title="test",
content="test",
filename="test.pdf",
checksum="abc",
) )
Path(doc.source_path).touch() Path(doc.source_path).touch()
Path(doc.thumbnail_path).touch() Path(doc.thumbnail_path).touch()

View File

@ -6,12 +6,13 @@ from time import sleep
from unittest import mock from unittest import mock
from django.conf import settings from django.conf import settings
from django.core.management import call_command, CommandError from django.core.management import call_command
from django.test import override_settings, TransactionTestCase from django.core.management import CommandError
from django.test import override_settings
from documents.models import Tag from django.test import TransactionTestCase
from documents.consumer import ConsumerError from documents.consumer import ConsumerError
from documents.management.commands import document_consumer from documents.management.commands import document_consumer
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
@ -41,7 +42,7 @@ class ConsumerMixin:
super(ConsumerMixin, self).setUp() super(ConsumerMixin, self).setUp()
self.t = None self.t = None
patcher = mock.patch( patcher = mock.patch(
"documents.management.commands.document_consumer.async_task" "documents.management.commands.document_consumer.async_task",
) )
self.task_mock = patcher.start() self.task_mock = patcher.start()
self.addCleanup(patcher.stop) self.addCleanup(patcher.stop)
@ -208,13 +209,16 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.t_start() self.t_start()
shutil.copy( shutil.copy(
self.sample_file, os.path.join(self.dirs.consumption_dir, ".DS_STORE") self.sample_file,
os.path.join(self.dirs.consumption_dir, ".DS_STORE"),
) )
shutil.copy( shutil.copy(
self.sample_file, os.path.join(self.dirs.consumption_dir, "my_file.pdf") self.sample_file,
os.path.join(self.dirs.consumption_dir, "my_file.pdf"),
) )
shutil.copy( shutil.copy(
self.sample_file, os.path.join(self.dirs.consumption_dir, "._my_file.pdf") self.sample_file,
os.path.join(self.dirs.consumption_dir, "._my_file.pdf"),
) )
shutil.copy( shutil.copy(
self.sample_file, self.sample_file,
@ -258,7 +262,9 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
@override_settings( @override_settings(
CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=3, CONSUMER_POLLING_RETRY_COUNT=20 CONSUMER_POLLING=1,
CONSUMER_POLLING_DELAY=3,
CONSUMER_POLLING_RETRY_COUNT=20,
) )
class TestConsumerPolling(TestConsumer): class TestConsumerPolling(TestConsumer):
# just do all the tests with polling # just do all the tests with polling
@ -319,7 +325,9 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.assertCountEqual(kwargs["override_tag_ids"], tag_ids) self.assertCountEqual(kwargs["override_tag_ids"], tag_ids)
@override_settings( @override_settings(
CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20 CONSUMER_POLLING=1,
CONSUMER_POLLING_DELAY=1,
CONSUMER_POLLING_RETRY_COUNT=20,
) )
def test_consume_file_with_path_tags_polling(self): def test_consume_file_with_path_tags_polling(self):
self.test_consume_file_with_path_tags() self.test_consume_file_with_path_tags()

View File

@ -7,13 +7,17 @@ from pathlib import Path
from unittest import mock from unittest import mock
from django.core.management import call_command from django.core.management import call_command
from django.test import TestCase, override_settings from django.test import override_settings
from django.test import TestCase
from documents.management.commands import document_exporter from documents.management.commands import document_exporter
from documents.models import Document, Tag, DocumentType, Correspondent from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import Tag
from documents.sanity_checker import check_sanity from documents.sanity_checker import check_sanity
from documents.settings import EXPORTER_FILE_NAME from documents.settings import EXPORTER_FILE_NAME
from documents.tests.utils import DirectoriesMixin, paperless_environment from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import paperless_environment
class TestExportImport(DirectoriesMixin, TestCase): class TestExportImport(DirectoriesMixin, TestCase):
@ -66,8 +70,9 @@ class TestExportImport(DirectoriesMixin, TestCase):
def _get_document_from_manifest(self, manifest, id): def _get_document_from_manifest(self, manifest, id):
f = list( f = list(
filter( filter(
lambda d: d["model"] == "documents.document" and d["pk"] == id, manifest lambda d: d["model"] == "documents.document" and d["pk"] == id,
) manifest,
),
) )
if len(f) == 1: if len(f) == 1:
return f[0] return f[0]
@ -76,7 +81,10 @@ class TestExportImport(DirectoriesMixin, TestCase):
@override_settings(PASSPHRASE="test") @override_settings(PASSPHRASE="test")
def _do_export( def _do_export(
self, use_filename_format=False, compare_checksums=False, delete=False self,
use_filename_format=False,
compare_checksums=False,
delete=False,
): ):
args = ["document_exporter", self.target] args = ["document_exporter", self.target]
if use_filename_format: if use_filename_format:
@ -104,7 +112,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.assertEqual(len(manifest), 8) self.assertEqual(len(manifest), 8)
self.assertEqual( self.assertEqual(
len(list(filter(lambda e: e["model"] == "documents.document", manifest))), 4 len(list(filter(lambda e: e["model"] == "documents.document", manifest))),
4,
) )
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json"))) self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
@ -129,7 +138,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
for element in manifest: for element in manifest:
if element["model"] == "documents.document": if element["model"] == "documents.document":
fname = os.path.join( fname = os.path.join(
self.target, element[document_exporter.EXPORTER_FILE_NAME] self.target,
element[document_exporter.EXPORTER_FILE_NAME],
) )
self.assertTrue(os.path.exists(fname)) self.assertTrue(os.path.exists(fname))
self.assertTrue( self.assertTrue(
@ -137,8 +147,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
os.path.join( os.path.join(
self.target, self.target,
element[document_exporter.EXPORTER_THUMBNAIL_NAME], element[document_exporter.EXPORTER_THUMBNAIL_NAME],
) ),
) ),
) )
with open(fname, "rb") as f: with open(fname, "rb") as f:
@ -146,12 +156,14 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.assertEqual(checksum, element["fields"]["checksum"]) self.assertEqual(checksum, element["fields"]["checksum"])
self.assertEqual( self.assertEqual(
element["fields"]["storage_type"], Document.STORAGE_TYPE_UNENCRYPTED element["fields"]["storage_type"],
Document.STORAGE_TYPE_UNENCRYPTED,
) )
if document_exporter.EXPORTER_ARCHIVE_NAME in element: if document_exporter.EXPORTER_ARCHIVE_NAME in element:
fname = os.path.join( fname = os.path.join(
self.target, element[document_exporter.EXPORTER_ARCHIVE_NAME] self.target,
element[document_exporter.EXPORTER_ARCHIVE_NAME],
) )
self.assertTrue(os.path.exists(fname)) self.assertTrue(os.path.exists(fname))
@ -188,7 +200,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
) )
with override_settings( with override_settings(
PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}" PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}",
): ):
self.test_exporter(use_filename_format=True) self.test_exporter(use_filename_format=True)
@ -205,7 +217,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
with mock.patch( with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2" "documents.management.commands.document_exporter.shutil.copy2",
) as m: ) as m:
self._do_export() self._do_export()
m.assert_not_called() m.assert_not_called()
@ -216,7 +228,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
Path(self.d1.source_path).touch() Path(self.d1.source_path).touch()
with mock.patch( with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2" "documents.management.commands.document_exporter.shutil.copy2",
) as m: ) as m:
self._do_export() self._do_export()
self.assertEqual(m.call_count, 1) self.assertEqual(m.call_count, 1)
@ -239,7 +251,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json"))) self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
with mock.patch( with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2" "documents.management.commands.document_exporter.shutil.copy2",
) as m: ) as m:
self._do_export() self._do_export()
m.assert_not_called() m.assert_not_called()
@ -250,7 +262,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.d2.save() self.d2.save()
with mock.patch( with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2" "documents.management.commands.document_exporter.shutil.copy2",
) as m: ) as m:
self._do_export(compare_checksums=True) self._do_export(compare_checksums=True)
self.assertEqual(m.call_count, 1) self.assertEqual(m.call_count, 1)
@ -270,26 +282,29 @@ class TestExportImport(DirectoriesMixin, TestCase):
doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id) doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
self.assertTrue( self.assertTrue(
os.path.isfile( os.path.isfile(
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]) os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]),
) ),
) )
self.d3.delete() self.d3.delete()
manifest = self._do_export() manifest = self._do_export()
self.assertRaises( self.assertRaises(
ValueError, self._get_document_from_manifest, manifest, self.d3.id ValueError,
self._get_document_from_manifest,
manifest,
self.d3.id,
) )
self.assertTrue( self.assertTrue(
os.path.isfile( os.path.isfile(
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]) os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]),
) ),
) )
manifest = self._do_export(delete=True) manifest = self._do_export(delete=True)
self.assertFalse( self.assertFalse(
os.path.isfile( os.path.isfile(
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]) os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]),
) ),
) )
self.assertTrue(len(manifest), 6) self.assertTrue(len(manifest), 6)
@ -316,7 +331,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json"))) self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf"))) self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf")))
self.assertTrue( self.assertTrue(
os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")) os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")),
) )
def test_export_missing_files(self): def test_export_missing_files(self):

View File

@ -1,35 +1,50 @@
from django.core.management import call_command from django.core.management import call_command
from django.test import TestCase from django.test import TestCase
from documents.models import Correspondent
from documents.models import Document, Tag, Correspondent, DocumentType from documents.models import Document
from documents.models import DocumentType
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
class TestRetagger(DirectoriesMixin, TestCase): class TestRetagger(DirectoriesMixin, TestCase):
def make_models(self): def make_models(self):
self.d1 = Document.objects.create( self.d1 = Document.objects.create(
checksum="A", title="A", content="first document" checksum="A",
title="A",
content="first document",
) )
self.d2 = Document.objects.create( self.d2 = Document.objects.create(
checksum="B", title="B", content="second document" checksum="B",
title="B",
content="second document",
) )
self.d3 = Document.objects.create( self.d3 = Document.objects.create(
checksum="C", title="C", content="unrelated document" checksum="C",
title="C",
content="unrelated document",
) )
self.d4 = Document.objects.create( self.d4 = Document.objects.create(
checksum="D", title="D", content="auto document" checksum="D",
title="D",
content="auto document",
) )
self.tag_first = Tag.objects.create( self.tag_first = Tag.objects.create(
name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY name="tag1",
match="first",
matching_algorithm=Tag.MATCH_ANY,
) )
self.tag_second = Tag.objects.create( self.tag_second = Tag.objects.create(
name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY name="tag2",
match="second",
matching_algorithm=Tag.MATCH_ANY,
) )
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True) self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
self.tag_no_match = Tag.objects.create(name="test2") self.tag_no_match = Tag.objects.create(name="test2")
self.tag_auto = Tag.objects.create( self.tag_auto = Tag.objects.create(
name="tagauto", matching_algorithm=Tag.MATCH_AUTO name="tagauto",
matching_algorithm=Tag.MATCH_AUTO,
) )
self.d3.tags.add(self.tag_inbox) self.d3.tags.add(self.tag_inbox)
@ -37,17 +52,25 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.d4.tags.add(self.tag_auto) self.d4.tags.add(self.tag_auto)
self.correspondent_first = Correspondent.objects.create( self.correspondent_first = Correspondent.objects.create(
name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY name="c1",
match="first",
matching_algorithm=Correspondent.MATCH_ANY,
) )
self.correspondent_second = Correspondent.objects.create( self.correspondent_second = Correspondent.objects.create(
name="c2", match="second", matching_algorithm=Correspondent.MATCH_ANY name="c2",
match="second",
matching_algorithm=Correspondent.MATCH_ANY,
) )
self.doctype_first = DocumentType.objects.create( self.doctype_first = DocumentType.objects.create(
name="dt1", match="first", matching_algorithm=DocumentType.MATCH_ANY name="dt1",
match="first",
matching_algorithm=DocumentType.MATCH_ANY,
) )
self.doctype_second = DocumentType.objects.create( self.doctype_second = DocumentType.objects.create(
name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY name="dt2",
match="second",
matching_algorithm=DocumentType.MATCH_ANY,
) )
def get_updated_docs(self): def get_updated_docs(self):
@ -98,10 +121,12 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id)) self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
self.assertCountEqual( self.assertCountEqual(
[tag.id for tag in d_first.tags.all()], [self.tag_first.id] [tag.id for tag in d_first.tags.all()],
[self.tag_first.id],
) )
self.assertCountEqual( self.assertCountEqual(
[tag.id for tag in d_second.tags.all()], [self.tag_second.id] [tag.id for tag in d_second.tags.all()],
[self.tag_second.id],
) )
self.assertCountEqual( self.assertCountEqual(
[tag.id for tag in d_unrelated.tags.all()], [tag.id for tag in d_unrelated.tags.all()],
@ -133,7 +158,10 @@ class TestRetagger(DirectoriesMixin, TestCase):
def test_add_tags_suggest_url(self): def test_add_tags_suggest_url(self):
call_command( call_command(
"document_retagger", "--tags", "--suggest", "--base-url=http://localhost" "document_retagger",
"--tags",
"--suggest",
"--base-url=http://localhost",
) )
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs() d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

View File

@ -5,9 +5,11 @@ from unittest import mock
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.core.management import call_command from django.core.management import call_command
from django.test import TestCase from django.test import TestCase
from documents.management.commands.document_thumbnails import _process_document from documents.management.commands.document_thumbnails import _process_document
from documents.models import Document, Tag, Correspondent, DocumentType from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin

View File

@ -4,9 +4,11 @@ from unittest import mock
from django.core.management import call_command from django.core.management import call_command
from django.test import TestCase from django.test import TestCase
from documents.management.commands.document_thumbnails import _process_document from documents.management.commands.document_thumbnails import _process_document
from documents.models import Document, Tag, Correspondent, DocumentType from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin

View File

@ -4,10 +4,14 @@ from random import randint
from django.contrib.admin.models import LogEntry from django.contrib.admin.models import LogEntry
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.test import TestCase, override_settings from django.test import override_settings
from django.test import TestCase
from .. import matching from .. import matching
from ..models import Correspondent, Document, Tag, DocumentType from ..models import Correspondent
from ..models import Document
from ..models import DocumentType
from ..models import Tag
from ..signals import document_consumption_finished from ..signals import document_consumption_finished
@ -209,7 +213,8 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
TestCase.setUp(self) TestCase.setUp(self)
User.objects.create_user(username="test_consumer", password="12345") User.objects.create_user(username="test_consumer", password="12345")
self.doc_contains = Document.objects.create( self.doc_contains = Document.objects.create(
content="I contain the keyword.", mime_type="application/pdf" content="I contain the keyword.",
mime_type="application/pdf",
) )
self.index_dir = tempfile.mkdtemp() self.index_dir = tempfile.mkdtemp()
@ -221,43 +226,56 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
def test_tag_applied_any(self): def test_tag_applied_any(self):
t1 = Tag.objects.create( t1 = Tag.objects.create(
name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY name="test",
match="keyword",
matching_algorithm=Tag.MATCH_ANY,
) )
document_consumption_finished.send( document_consumption_finished.send(
sender=self.__class__, document=self.doc_contains sender=self.__class__,
document=self.doc_contains,
) )
self.assertTrue(list(self.doc_contains.tags.all()) == [t1]) self.assertTrue(list(self.doc_contains.tags.all()) == [t1])
def test_tag_not_applied(self): def test_tag_not_applied(self):
Tag.objects.create( Tag.objects.create(
name="test", match="no-match", matching_algorithm=Tag.MATCH_ANY name="test",
match="no-match",
matching_algorithm=Tag.MATCH_ANY,
) )
document_consumption_finished.send( document_consumption_finished.send(
sender=self.__class__, document=self.doc_contains sender=self.__class__,
document=self.doc_contains,
) )
self.assertTrue(list(self.doc_contains.tags.all()) == []) self.assertTrue(list(self.doc_contains.tags.all()) == [])
def test_correspondent_applied(self): def test_correspondent_applied(self):
correspondent = Correspondent.objects.create( correspondent = Correspondent.objects.create(
name="test", match="keyword", matching_algorithm=Correspondent.MATCH_ANY name="test",
match="keyword",
matching_algorithm=Correspondent.MATCH_ANY,
) )
document_consumption_finished.send( document_consumption_finished.send(
sender=self.__class__, document=self.doc_contains sender=self.__class__,
document=self.doc_contains,
) )
self.assertTrue(self.doc_contains.correspondent == correspondent) self.assertTrue(self.doc_contains.correspondent == correspondent)
def test_correspondent_not_applied(self): def test_correspondent_not_applied(self):
Tag.objects.create( Tag.objects.create(
name="test", match="no-match", matching_algorithm=Correspondent.MATCH_ANY name="test",
match="no-match",
matching_algorithm=Correspondent.MATCH_ANY,
) )
document_consumption_finished.send( document_consumption_finished.send(
sender=self.__class__, document=self.doc_contains sender=self.__class__,
document=self.doc_contains,
) )
self.assertEqual(self.doc_contains.correspondent, None) self.assertEqual(self.doc_contains.correspondent, None)
def test_logentry_created(self): def test_logentry_created(self):
document_consumption_finished.send( document_consumption_finished.send(
sender=self.__class__, document=self.doc_contains sender=self.__class__,
document=self.doc_contains,
) )
self.assertEqual(LogEntry.objects.count(), 1) self.assertEqual(LogEntry.objects.count(), 1)

View File

@ -6,9 +6,9 @@ from unittest import mock
from django.conf import settings from django.conf import settings
from django.test import override_settings from django.test import override_settings
from documents.parsers import ParseError from documents.parsers import ParseError
from documents.tests.utils import DirectoriesMixin, TestMigrations from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import TestMigrations
STORAGE_TYPE_GPG = "gpg" STORAGE_TYPE_GPG = "gpg"
@ -93,10 +93,18 @@ def make_test_document(
simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg") simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg")
simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
simple_pdf2 = os.path.join( simple_pdf2 = os.path.join(
os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf" os.path.dirname(__file__),
"samples",
"documents",
"originals",
"0000002.pdf",
) )
simple_pdf3 = os.path.join( simple_pdf3 = os.path.join(
os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf" os.path.dirname(__file__),
"samples",
"documents",
"originals",
"0000003.pdf",
) )
simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt") simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt")
simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png") simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png")
@ -121,19 +129,43 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
simple_pdf, simple_pdf,
) )
self.no_text = make_test_document( self.no_text = make_test_document(
Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf Document,
"no-text",
"image/png",
simple_png2,
"no-text.png",
simple_pdf,
) )
self.doc_no_archive = make_test_document( self.doc_no_archive = make_test_document(
Document, "no_archive", "text/plain", simple_txt, "no_archive.txt" Document,
"no_archive",
"text/plain",
simple_txt,
"no_archive.txt",
) )
self.clash1 = make_test_document( self.clash1 = make_test_document(
Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf Document,
"clash",
"application/pdf",
simple_pdf,
"clash.pdf",
simple_pdf,
) )
self.clash2 = make_test_document( self.clash2 = make_test_document(
Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf Document,
"clash",
"image/jpeg",
simple_jpg,
"clash.jpg",
simple_pdf,
) )
self.clash3 = make_test_document( self.clash3 = make_test_document(
Document, "clash", "image/png", simple_png, "clash.png", simple_pdf Document,
"clash",
"image/png",
simple_png,
"clash.png",
simple_pdf,
) )
self.clash4 = make_test_document( self.clash4 = make_test_document(
Document, Document,
@ -147,7 +179,8 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2)) self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2))
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3)) self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3))
self.assertNotEqual( self.assertNotEqual(
archive_path_old(self.clash1), archive_path_old(self.clash4) archive_path_old(self.clash1),
archive_path_old(self.clash4),
) )
def testArchiveFilesMigrated(self): def testArchiveFilesMigrated(self):
@ -171,19 +204,23 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
self.assertEqual(archive_checksum, doc.archive_checksum) self.assertEqual(archive_checksum, doc.archive_checksum)
self.assertEqual( self.assertEqual(
Document.objects.filter(archive_checksum__isnull=False).count(), 6 Document.objects.filter(archive_checksum__isnull=False).count(),
6,
) )
def test_filenames(self): def test_filenames(self):
Document = self.apps.get_model("documents", "Document") Document = self.apps.get_model("documents", "Document")
self.assertEqual( self.assertEqual(
Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf" Document.objects.get(id=self.unrelated.id).archive_filename,
"unrelated.pdf",
) )
self.assertEqual( self.assertEqual(
Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf" Document.objects.get(id=self.no_text.id).archive_filename,
"no-text.pdf",
) )
self.assertEqual( self.assertEqual(
Document.objects.get(id=self.doc_no_archive.id).archive_filename, None Document.objects.get(id=self.doc_no_archive.id).archive_filename,
None,
) )
self.assertEqual( self.assertEqual(
Document.objects.get(id=self.clash1.id).archive_filename, Document.objects.get(id=self.clash1.id).archive_filename,
@ -198,7 +235,8 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
f"{self.clash3.id:07}.pdf", f"{self.clash3.id:07}.pdf",
) )
self.assertEqual( self.assertEqual(
Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf" Document.objects.get(id=self.clash4.id).archive_filename,
"clash.png.pdf",
) )
@ -207,16 +245,20 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
def test_filenames(self): def test_filenames(self):
Document = self.apps.get_model("documents", "Document") Document = self.apps.get_model("documents", "Document")
self.assertEqual( self.assertEqual(
Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf" Document.objects.get(id=self.unrelated.id).archive_filename,
"unrelated.pdf",
) )
self.assertEqual( self.assertEqual(
Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf" Document.objects.get(id=self.no_text.id).archive_filename,
"no-text.pdf",
) )
self.assertEqual( self.assertEqual(
Document.objects.get(id=self.doc_no_archive.id).archive_filename, None Document.objects.get(id=self.doc_no_archive.id).archive_filename,
None,
) )
self.assertEqual( self.assertEqual(
Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf" Document.objects.get(id=self.clash1.id).archive_filename,
"none/clash.pdf",
) )
self.assertEqual( self.assertEqual(
Document.objects.get(id=self.clash2.id).archive_filename, Document.objects.get(id=self.clash2.id).archive_filename,
@ -227,7 +269,8 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
"none/clash_02.pdf", "none/clash_02.pdf",
) )
self.assertEqual( self.assertEqual(
Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf" Document.objects.get(id=self.clash4.id).archive_filename,
"clash.png.pdf",
) )
@ -248,12 +291,19 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
Document = self.apps.get_model("documents", "Document") Document = self.apps.get_model("documents", "Document")
doc = make_test_document( doc = make_test_document(
Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf Document,
"clash",
"application/pdf",
simple_pdf,
"clash.pdf",
simple_pdf,
) )
os.unlink(archive_path_old(doc)) os.unlink(archive_path_old(doc))
self.assertRaisesMessage( self.assertRaisesMessage(
ValueError, "does not exist at: ", self.performMigration ValueError,
"does not exist at: ",
self.performMigration,
) )
def test_parser_missing(self): def test_parser_missing(self):
@ -277,7 +327,9 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
) )
self.assertRaisesMessage( self.assertRaisesMessage(
ValueError, "no parsers are available", self.performMigration ValueError,
"no parsers are available",
self.performMigration,
) )
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper") @mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
@ -286,7 +338,12 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
Document = self.apps.get_model("documents", "Document") Document = self.apps.get_model("documents", "Document")
doc1 = make_test_document( doc1 = make_test_document(
Document, "document", "image/png", simple_png, "document.png", simple_pdf Document,
"document",
"image/png",
simple_png,
"document.png",
simple_pdf,
) )
doc2 = make_test_document( doc2 = make_test_document(
Document, Document,
@ -311,8 +368,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
filter( filter(
lambda log: "Parse error, will try again in 5 seconds" in log, lambda log: "Parse error, will try again in 5 seconds" in log,
capture.output, capture.output,
) ),
) ),
), ),
4, 4,
) )
@ -324,8 +381,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
lambda log: "Unable to regenerate archive document for ID:" lambda log: "Unable to regenerate archive document for ID:"
in log, in log,
capture.output, capture.output,
) ),
) ),
), ),
2, 2,
) )
@ -347,7 +404,12 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
Document = self.apps.get_model("documents", "Document") Document = self.apps.get_model("documents", "Document")
doc1 = make_test_document( doc1 = make_test_document(
Document, "document", "image/png", simple_png, "document.png", simple_pdf Document,
"document",
"image/png",
simple_png,
"document.png",
simple_pdf,
) )
doc2 = make_test_document( doc2 = make_test_document(
Document, Document,
@ -368,8 +430,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
lambda log: "Parser did not return an archive document for document" lambda log: "Parser did not return an archive document for document"
in log, in log,
capture.output, capture.output,
) ),
) ),
), ),
2, 2,
) )
@ -405,7 +467,11 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
"unrelated.pdf", "unrelated.pdf",
) )
doc_no_archive = make_test_document( doc_no_archive = make_test_document(
Document, "no_archive", "text/plain", simple_txt, "no_archive.txt" Document,
"no_archive",
"text/plain",
simple_txt,
"no_archive.txt",
) )
clashB = make_test_document( clashB = make_test_document(
Document, Document,
@ -434,13 +500,14 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
self.assertEqual(archive_checksum, doc.archive_checksum) self.assertEqual(archive_checksum, doc.archive_checksum)
self.assertEqual( self.assertEqual(
Document.objects.filter(archive_checksum__isnull=False).count(), 2 Document.objects.filter(archive_checksum__isnull=False).count(),
2,
) )
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
class TestMigrateArchiveFilesBackwardsWithFilenameFormat( class TestMigrateArchiveFilesBackwardsWithFilenameFormat(
TestMigrateArchiveFilesBackwards TestMigrateArchiveFilesBackwards,
): ):
pass pass
@ -505,5 +572,7 @@ class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations):
) )
self.assertRaisesMessage( self.assertRaisesMessage(
ValueError, "file already exists.", self.performMigration ValueError,
"file already exists.",
self.performMigration,
) )

View File

@ -3,9 +3,9 @@ import shutil
from django.conf import settings from django.conf import settings
from django.test import override_settings from django.test import override_settings
from documents.parsers import get_default_file_extension from documents.parsers import get_default_file_extension
from documents.tests.utils import DirectoriesMixin, TestMigrations from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import TestMigrations
STORAGE_TYPE_UNENCRYPTED = "unencrypted" STORAGE_TYPE_UNENCRYPTED = "unencrypted"
STORAGE_TYPE_GPG = "gpg" STORAGE_TYPE_GPG = "gpg"
@ -46,7 +46,9 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
def setUpBeforeMigration(self, apps): def setUpBeforeMigration(self, apps):
Document = apps.get_model("documents", "Document") Document = apps.get_model("documents", "Document")
doc = Document.objects.create( doc = Document.objects.create(
title="test", file_type="pdf", filename="file1.pdf" title="test",
file_type="pdf",
filename="file1.pdf",
) )
self.doc_id = doc.id self.doc_id = doc.id
shutil.copy( shutil.copy(
@ -55,7 +57,9 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
) )
doc2 = Document.objects.create( doc2 = Document.objects.create(
checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG checksum="B",
file_type="pdf",
storage_type=STORAGE_TYPE_GPG,
) )
self.doc2_id = doc2.id self.doc2_id = doc2.id
shutil.copy( shutil.copy(
@ -88,7 +92,9 @@ class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations):
def setUpBeforeMigration(self, apps): def setUpBeforeMigration(self, apps):
Document = apps.get_model("documents", "Document") Document = apps.get_model("documents", "Document")
doc = Document.objects.create( doc = Document.objects.create(
title="test", mime_type="application/pdf", filename="file1.pdf" title="test",
mime_type="application/pdf",
filename="file1.pdf",
) )
self.doc_id = doc.id self.doc_id = doc.id
shutil.copy( shutil.copy(

View File

@ -1,4 +1,5 @@
from documents.tests.utils import DirectoriesMixin, TestMigrations from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import TestMigrations
class TestMigrateNullCharacters(DirectoriesMixin, TestMigrations): class TestMigrateNullCharacters(DirectoriesMixin, TestMigrations):

View File

@ -1,4 +1,5 @@
from documents.tests.utils import DirectoriesMixin, TestMigrations from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import TestMigrations
class TestMigrateTagColor(DirectoriesMixin, TestMigrations): class TestMigrateTagColor(DirectoriesMixin, TestMigrations):

View File

@ -1,7 +1,9 @@
from django.test import TestCase from django.test import TestCase
from .factories import DocumentFactory, CorrespondentFactory from ..models import Correspondent
from ..models import Document, Correspondent from ..models import Document
from .factories import CorrespondentFactory
from .factories import DocumentFactory
class CorrespondentTestCase(TestCase): class CorrespondentTestCase(TestCase):

View File

@ -4,16 +4,14 @@ import tempfile
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
from unittest import mock from unittest import mock
from django.test import TestCase, override_settings from django.test import override_settings
from django.test import TestCase
from documents.parsers import ( from documents.parsers import DocumentParser
get_parser_class, from documents.parsers import get_default_file_extension
get_supported_file_extensions, from documents.parsers import get_parser_class
get_default_file_extension, from documents.parsers import get_parser_class_for_mime_type
get_parser_class_for_mime_type, from documents.parsers import get_supported_file_extensions
DocumentParser, from documents.parsers import is_file_ext_supported
is_file_ext_supported,
)
from paperless_tesseract.parsers import RasterisedDocumentParser from paperless_tesseract.parsers import RasterisedDocumentParser
from paperless_text.parsers import TextDocumentParser from paperless_text.parsers import TextDocumentParser

View File

@ -6,9 +6,9 @@ from pathlib import Path
import filelock import filelock
from django.conf import settings from django.conf import settings
from django.test import TestCase from django.test import TestCase
from documents.models import Document from documents.models import Document
from documents.sanity_checker import check_sanity, SanityCheckMessages from documents.sanity_checker import check_sanity
from documents.sanity_checker import SanityCheckMessages
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
@ -23,7 +23,8 @@ class TestSanityCheckMessages(TestCase):
self.assertEqual(len(capture.output), 1) self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.INFO) self.assertEqual(capture.records[0].levelno, logging.INFO)
self.assertEqual( self.assertEqual(
capture.records[0].message, "Sanity checker detected no issues." capture.records[0].message,
"Sanity checker detected no issues.",
) )
def test_info(self): def test_info(self):

View File

@ -2,8 +2,8 @@ import logging
from unittest import mock from unittest import mock
from django.test import TestCase from django.test import TestCase
from paperless.settings import default_task_workers
from paperless.settings import default_task_workers, default_threads_per_worker from paperless.settings import default_threads_per_worker
class TestSettings(TestCase): class TestSettings(TestCase):
@ -21,7 +21,7 @@ class TestSettings(TestCase):
def test_workers_threads(self): def test_workers_threads(self):
for i in range(1, 64): for i in range(1, 64):
with mock.patch( with mock.patch(
"paperless.settings.multiprocessing.cpu_count" "paperless.settings.multiprocessing.cpu_count",
) as cpu_count: ) as cpu_count:
cpu_count.return_value = i cpu_count.return_value = i

View File

@ -4,10 +4,13 @@ from unittest import mock
from django.conf import settings from django.conf import settings
from django.test import TestCase from django.test import TestCase
from django.utils import timezone from django.utils import timezone
from documents import tasks from documents import tasks
from documents.models import Document, Tag, Correspondent, DocumentType from documents.models import Correspondent
from documents.sanity_checker import SanityCheckMessages, SanityCheckFailedException from documents.models import Document
from documents.models import DocumentType
from documents.models import Tag
from documents.sanity_checker import SanityCheckFailedException
from documents.sanity_checker import SanityCheckMessages
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
@ -106,7 +109,8 @@ class TestTasks(DirectoriesMixin, TestCase):
messages.warning("Some warning") messages.warning("Some warning")
m.return_value = messages m.return_value = messages
self.assertEqual( self.assertEqual(
tasks.sanity_check(), "Sanity check exited with warnings. See log." tasks.sanity_check(),
"Sanity check exited with warnings. See log.",
) )
m.assert_called_once() m.assert_called_once()
@ -116,7 +120,8 @@ class TestTasks(DirectoriesMixin, TestCase):
messages.info("Some info") messages.info("Some info")
m.return_value = messages m.return_value = messages
self.assertEqual( self.assertEqual(
tasks.sanity_check(), "Sanity check exited with infos. See log." tasks.sanity_check(),
"Sanity check exited with infos. See log.",
) )
m.assert_called_once() m.assert_called_once()

View File

@ -25,7 +25,7 @@ class TestViews(TestCase):
]: ]:
if language_given: if language_given:
self.client.cookies.load( self.client.cookies.load(
{settings.LANGUAGE_COOKIE_NAME: language_given} {settings.LANGUAGE_COOKIE_NAME: language_given},
) )
elif settings.LANGUAGE_COOKIE_NAME in self.client.cookies.keys(): elif settings.LANGUAGE_COOKIE_NAME in self.client.cookies.keys():
self.client.cookies.pop(settings.LANGUAGE_COOKIE_NAME) self.client.cookies.pop(settings.LANGUAGE_COOKIE_NAME)
@ -51,5 +51,6 @@ class TestViews(TestCase):
f"frontend/{language_actual}/polyfills.js", f"frontend/{language_actual}/polyfills.js",
) )
self.assertEqual( self.assertEqual(
response.context_data["main_js"], f"frontend/{language_actual}/main.js" response.context_data["main_js"],
f"frontend/{language_actual}/main.js",
) )

View File

@ -7,7 +7,8 @@ from contextlib import contextmanager
from django.apps import apps from django.apps import apps
from django.db import connection from django.db import connection
from django.db.migrations.executor import MigrationExecutor from django.db.migrations.executor import MigrationExecutor
from django.test import override_settings, TransactionTestCase from django.test import override_settings
from django.test import TransactionTestCase
def setup_directories(): def setup_directories():
@ -97,7 +98,7 @@ class TestMigrations(TransactionTestCase):
assert ( assert (
self.migrate_from and self.migrate_to self.migrate_from and self.migrate_to
), "TestCase '{}' must define migrate_from and migrate_to properties".format( ), "TestCase '{}' must define migrate_from and migrate_to properties".format(
type(self).__name__ type(self).__name__,
) )
self.migrate_from = [(self.app, self.migrate_from)] self.migrate_from = [(self.app, self.migrate_from)]
self.migrate_to = [(self.app, self.migrate_to)] self.migrate_to = [(self.app, self.migrate_to)]

View File

@ -5,63 +5,70 @@ import uuid
import zipfile import zipfile
from datetime import datetime from datetime import datetime
from time import mktime from time import mktime
from urllib.parse import quote_plus
from unicodedata import normalize from unicodedata import normalize
from urllib.parse import quote_plus
from django.conf import settings from django.conf import settings
from django.db.models import Count, Max, Case, When, IntegerField from django.db.models import Case
from django.db.models import Count
from django.db.models import IntegerField
from django.db.models import Max
from django.db.models import When
from django.db.models.functions import Lower from django.db.models.functions import Lower
from django.http import HttpResponse, HttpResponseBadRequest, Http404 from django.http import Http404
from django.http import HttpResponse
from django.http import HttpResponseBadRequest
from django.utils.translation import get_language from django.utils.translation import get_language
from django.views.decorators.cache import cache_control from django.views.decorators.cache import cache_control
from django.views.generic import TemplateView from django.views.generic import TemplateView
from django_filters.rest_framework import DjangoFilterBackend from django_filters.rest_framework import DjangoFilterBackend
from django_q.tasks import async_task from django_q.tasks import async_task
from paperless.db import GnuPG
from paperless.views import StandardPagination
from rest_framework import parsers from rest_framework import parsers
from rest_framework.decorators import action from rest_framework.decorators import action
from rest_framework.exceptions import NotFound from rest_framework.exceptions import NotFound
from rest_framework.filters import OrderingFilter, SearchFilter from rest_framework.filters import OrderingFilter
from rest_framework.filters import SearchFilter
from rest_framework.generics import GenericAPIView from rest_framework.generics import GenericAPIView
from rest_framework.mixins import ( from rest_framework.mixins import DestroyModelMixin
DestroyModelMixin, from rest_framework.mixins import ListModelMixin
ListModelMixin, from rest_framework.mixins import RetrieveModelMixin
RetrieveModelMixin, from rest_framework.mixins import UpdateModelMixin
UpdateModelMixin,
)
from rest_framework.permissions import IsAuthenticated from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response from rest_framework.response import Response
from rest_framework.views import APIView from rest_framework.views import APIView
from rest_framework.viewsets import GenericViewSet, ModelViewSet, ViewSet from rest_framework.viewsets import GenericViewSet
from rest_framework.viewsets import ModelViewSet
from rest_framework.viewsets import ViewSet
from paperless.db import GnuPG from .bulk_download import ArchiveOnlyStrategy
from paperless.views import StandardPagination from .bulk_download import OriginalAndArchiveStrategy
from .bulk_download import ( from .bulk_download import OriginalsOnlyStrategy
OriginalAndArchiveStrategy,
OriginalsOnlyStrategy,
ArchiveOnlyStrategy,
)
from .classifier import load_classifier from .classifier import load_classifier
from .filters import ( from .filters import CorrespondentFilterSet
CorrespondentFilterSet, from .filters import DocumentFilterSet
DocumentFilterSet, from .filters import DocumentTypeFilterSet
TagFilterSet, from .filters import TagFilterSet
DocumentTypeFilterSet, from .matching import match_correspondents
) from .matching import match_document_types
from .matching import match_correspondents, match_tags, match_document_types from .matching import match_tags
from .models import Correspondent, Document, Tag, DocumentType, SavedView from .models import Correspondent
from .models import Document
from .models import DocumentType
from .models import SavedView
from .models import Tag
from .parsers import get_parser_class_for_mime_type from .parsers import get_parser_class_for_mime_type
from .serialisers import ( from .serialisers import BulkDownloadSerializer
CorrespondentSerializer, from .serialisers import BulkEditSerializer
DocumentSerializer, from .serialisers import CorrespondentSerializer
TagSerializerVersion1, from .serialisers import DocumentListSerializer
TagSerializer, from .serialisers import DocumentSerializer
DocumentTypeSerializer, from .serialisers import DocumentTypeSerializer
PostDocumentSerializer, from .serialisers import PostDocumentSerializer
SavedViewSerializer, from .serialisers import SavedViewSerializer
BulkEditSerializer, from .serialisers import TagSerializer
DocumentListSerializer, from .serialisers import TagSerializerVersion1
BulkDownloadSerializer,
)
logger = logging.getLogger("paperless.api") logger = logging.getLogger("paperless.api")
@ -89,16 +96,14 @@ class IndexView(TemplateView):
context["full_name"] = self.request.user.get_full_name() context["full_name"] = self.request.user.get_full_name()
context["styles_css"] = f"frontend/{self.get_language()}/styles.css" context["styles_css"] = f"frontend/{self.get_language()}/styles.css"
context["runtime_js"] = f"frontend/{self.get_language()}/runtime.js" context["runtime_js"] = f"frontend/{self.get_language()}/runtime.js"
context[ context["polyfills_js"] = f"frontend/{self.get_language()}/polyfills.js"
"polyfills_js"
] = f"frontend/{self.get_language()}/polyfills.js" # NOQA: E501
context["main_js"] = f"frontend/{self.get_language()}/main.js" context["main_js"] = f"frontend/{self.get_language()}/main.js"
context[ context[
"webmanifest" "webmanifest"
] = f"frontend/{self.get_language()}/manifest.webmanifest" # NOQA: E501 ] = f"frontend/{self.get_language()}/manifest.webmanifest" # noqa: E501
context[ context[
"apple_touch_icon" "apple_touch_icon"
] = f"frontend/{self.get_language()}/apple-touch-icon.png" # NOQA: E501 ] = f"frontend/{self.get_language()}/apple-touch-icon.png" # noqa: E501
return context return context
@ -106,7 +111,8 @@ class CorrespondentViewSet(ModelViewSet):
model = Correspondent model = Correspondent
queryset = Correspondent.objects.annotate( queryset = Correspondent.objects.annotate(
document_count=Count("documents"), last_correspondence=Max("documents__created") document_count=Count("documents"),
last_correspondence=Max("documents__created"),
).order_by(Lower("name")) ).order_by(Lower("name"))
serializer_class = CorrespondentSerializer serializer_class = CorrespondentSerializer
@ -127,7 +133,7 @@ class TagViewSet(ModelViewSet):
model = Tag model = Tag
queryset = Tag.objects.annotate(document_count=Count("documents")).order_by( queryset = Tag.objects.annotate(document_count=Count("documents")).order_by(
Lower("name") Lower("name"),
) )
def get_serializer_class(self): def get_serializer_class(self):
@ -147,7 +153,7 @@ class DocumentTypeViewSet(ModelViewSet):
model = DocumentType model = DocumentType
queryset = DocumentType.objects.annotate( queryset = DocumentType.objects.annotate(
document_count=Count("documents") document_count=Count("documents"),
).order_by(Lower("name")) ).order_by(Lower("name"))
serializer_class = DocumentTypeSerializer serializer_class = DocumentTypeSerializer
@ -220,9 +226,7 @@ class DocumentViewSet(
def file_response(self, pk, request, disposition): def file_response(self, pk, request, disposition):
doc = Document.objects.get(id=pk) doc = Document.objects.get(id=pk)
if ( if not self.original_requested(request) and doc.has_archive_version:
not self.original_requested(request) and doc.has_archive_version
): # NOQA: E501
file_handle = doc.archive_file file_handle = doc.archive_file
filename = doc.get_public_filename(archive=True) filename = doc.get_public_filename(archive=True)
mime_type = "application/pdf" mime_type = "application/pdf"
@ -258,7 +262,7 @@ class DocumentViewSet(
try: try:
return parser.extract_metadata(file, mime_type) return parser.extract_metadata(file, mime_type)
except Exception as e: except Exception:
# TODO: cover GPG errors, remove later. # TODO: cover GPG errors, remove later.
return [] return []
else: else:
@ -291,7 +295,8 @@ class DocumentViewSet(
if doc.has_archive_version: if doc.has_archive_version:
meta["archive_size"] = self.get_filesize(doc.archive_path) meta["archive_size"] = self.get_filesize(doc.archive_path)
meta["archive_metadata"] = self.get_metadata( meta["archive_metadata"] = self.get_metadata(
doc.archive_path, "application/pdf" doc.archive_path,
"application/pdf",
) )
else: else:
meta["archive_size"] = None meta["archive_size"] = None
@ -315,7 +320,7 @@ class DocumentViewSet(
"document_types": [ "document_types": [
dt.id for dt in match_document_types(doc, classifier) dt.id for dt in match_document_types(doc, classifier)
], ],
} },
) )
@action(methods=["get"], detail=True) @action(methods=["get"], detail=True)
@ -357,7 +362,7 @@ class SearchResultSerializer(DocumentSerializer):
"score": instance.score, "score": instance.score,
"highlights": instance.highlights("content", text=doc.content) "highlights": instance.highlights("content", text=doc.content)
if doc if doc
else None, # NOQA: E501 else None,
"rank": instance.rank, "rank": instance.rank,
} }
@ -500,7 +505,9 @@ class PostDocumentView(GenericAPIView):
os.makedirs(settings.SCRATCH_DIR, exist_ok=True) os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
with tempfile.NamedTemporaryFile( with tempfile.NamedTemporaryFile(
prefix="paperless-upload-", dir=settings.SCRATCH_DIR, delete=False prefix="paperless-upload-",
dir=settings.SCRATCH_DIR,
delete=False,
) as f: ) as f:
f.write(doc_data) f.write(doc_data)
os.utime(f.name, times=(t, t)) os.utime(f.name, times=(t, t))
@ -537,20 +544,20 @@ class SelectionDataView(GenericAPIView):
correspondents = Correspondent.objects.annotate( correspondents = Correspondent.objects.annotate(
document_count=Count( document_count=Count(
Case(When(documents__id__in=ids, then=1), output_field=IntegerField()) Case(When(documents__id__in=ids, then=1), output_field=IntegerField()),
) ),
) )
tags = Tag.objects.annotate( tags = Tag.objects.annotate(
document_count=Count( document_count=Count(
Case(When(documents__id__in=ids, then=1), output_field=IntegerField()) Case(When(documents__id__in=ids, then=1), output_field=IntegerField()),
) ),
) )
types = DocumentType.objects.annotate( types = DocumentType.objects.annotate(
document_count=Count( document_count=Count(
Case(When(documents__id__in=ids, then=1), output_field=IntegerField()) Case(When(documents__id__in=ids, then=1), output_field=IntegerField()),
) ),
) )
r = Response( r = Response(
@ -565,7 +572,7 @@ class SelectionDataView(GenericAPIView):
"selected_document_types": [ "selected_document_types": [
{"id": t.id, "document_count": t.document_count} for t in types {"id": t.id, "document_count": t.document_count} for t in types
], ],
} },
) )
return r return r
@ -612,7 +619,7 @@ class StatisticsView(APIView):
{ {
"documents_total": documents_total, "documents_total": documents_total,
"documents_inbox": documents_inbox, "documents_inbox": documents_inbox,
} },
) )
@ -632,7 +639,9 @@ class BulkDownloadView(GenericAPIView):
os.makedirs(settings.SCRATCH_DIR, exist_ok=True) os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
temp = tempfile.NamedTemporaryFile( temp = tempfile.NamedTemporaryFile(
dir=settings.SCRATCH_DIR, suffix="-compressed-archive", delete=False dir=settings.SCRATCH_DIR,
suffix="-compressed-archive",
delete=False,
) )
if content == "both": if content == "both":
@ -651,7 +660,8 @@ class BulkDownloadView(GenericAPIView):
with open(temp.name, "rb") as f: with open(temp.name, "rb") as f:
response = HttpResponse(f, content_type="application/zip") response = HttpResponse(f, content_type="application/zip")
response["Content-Disposition"] = '{}; filename="{}"'.format( response["Content-Disposition"] = '{}; filename="{}"'.format(
"attachment", "documents.zip" "attachment",
"documents.zip",
) )
return response return response

View File

@ -1 +1,4 @@
from .checks import paths_check, binaries_check from .checks import binaries_check
from .checks import paths_check
__all__ = ["binaries_check", "paths_check"]

View File

@ -9,14 +9,14 @@ from django.core.asgi import get_asgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings") os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
django_asgi_app = get_asgi_application() django_asgi_app = get_asgi_application()
from channels.auth import AuthMiddlewareStack # NOQA: E402 from channels.auth import AuthMiddlewareStack # noqa: E402
from channels.routing import ProtocolTypeRouter, URLRouter # NOQA: E402 from channels.routing import ProtocolTypeRouter, URLRouter # noqa: E402
from paperless.urls import websocket_urlpatterns # NOQA: E402 from paperless.urls import websocket_urlpatterns # noqa: E402
application = ProtocolTypeRouter( application = ProtocolTypeRouter(
{ {
"http": get_asgi_application(), "http": get_asgi_application(),
"websocket": AuthMiddlewareStack(URLRouter(websocket_urlpatterns)), "websocket": AuthMiddlewareStack(URLRouter(websocket_urlpatterns)),
} },
) )

View File

@ -1,9 +1,9 @@
from django.conf import settings from django.conf import settings
from django.contrib import auth from django.contrib import auth
from django.contrib.auth.middleware import RemoteUserMiddleware
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.utils.deprecation import MiddlewareMixin from django.utils.deprecation import MiddlewareMixin
from rest_framework import authentication from rest_framework import authentication
from django.contrib.auth.middleware import RemoteUserMiddleware
class AutoLoginMiddleware(MiddlewareMixin): class AutoLoginMiddleware(MiddlewareMixin):
@ -25,7 +25,7 @@ class AngularApiAuthenticationOverride(authentication.BaseAuthentication):
settings.DEBUG settings.DEBUG
and "Referer" in request.headers and "Referer" in request.headers
and request.headers["Referer"].startswith("http://localhost:4200/") and request.headers["Referer"].startswith("http://localhost:4200/")
): # NOQA: E501 ):
user = User.objects.filter(is_staff=True).first() user = User.objects.filter(is_staff=True).first()
print("Auto-Login with user {}".format(user)) print("Auto-Login with user {}".format(user))
return (user, None) return (user, None)

View File

@ -3,7 +3,9 @@ import shutil
import stat import stat
from django.conf import settings from django.conf import settings
from django.core.checks import Error, Warning, register from django.core.checks import Error
from django.core.checks import register
from django.core.checks import Warning
exists_message = "{} is set but doesn't exist." exists_message = "{} is set but doesn't exist."
exists_hint = "Create a directory at {}" exists_hint = "Create a directory at {}"
@ -19,11 +21,12 @@ def path_check(var, directory):
if directory: if directory:
if not os.path.isdir(directory): if not os.path.isdir(directory):
messages.append( messages.append(
Error(exists_message.format(var), exists_hint.format(directory)) Error(exists_message.format(var), exists_hint.format(directory)),
) )
else: else:
test_file = os.path.join( test_file = os.path.join(
directory, f"__paperless_write_test_{os.getpid()}__" directory,
f"__paperless_write_test_{os.getpid()}__",
) )
try: try:
with open(test_file, "w"): with open(test_file, "w"):
@ -34,9 +37,9 @@ def path_check(var, directory):
writeable_message.format(var), writeable_message.format(var),
writeable_hint.format( writeable_hint.format(
f"\n{stat.filemode(os.stat(directory).st_mode)} " f"\n{stat.filemode(os.stat(directory).st_mode)} "
f"{directory}\n" f"{directory}\n",
), ),
) ),
) )
finally: finally:
if os.path.isfile(test_file): if os.path.isfile(test_file):
@ -88,8 +91,8 @@ def debug_mode_check(app_configs, **kwargs):
"security issue, since it puts security overides in place which " "security issue, since it puts security overides in place which "
"are meant to be only used during development. This " "are meant to be only used during development. This "
"also means that paperless will tell anyone various " "also means that paperless will tell anyone various "
"debugging information when something goes wrong." "debugging information when something goes wrong.",
) ),
] ]
else: else:
return [] return []

View File

@ -1,7 +1,8 @@
import json import json
from asgiref.sync import async_to_sync from asgiref.sync import async_to_sync
from channels.exceptions import DenyConnection, AcceptConnection from channels.exceptions import AcceptConnection
from channels.exceptions import DenyConnection
from channels.generic.websocket import WebsocketConsumer from channels.generic.websocket import WebsocketConsumer
@ -14,13 +15,15 @@ class StatusConsumer(WebsocketConsumer):
raise DenyConnection() raise DenyConnection()
else: else:
async_to_sync(self.channel_layer.group_add)( async_to_sync(self.channel_layer.group_add)(
"status_updates", self.channel_name "status_updates",
self.channel_name,
) )
raise AcceptConnection() raise AcceptConnection()
def disconnect(self, close_code): def disconnect(self, close_code):
async_to_sync(self.channel_layer.group_discard)( async_to_sync(self.channel_layer.group_discard)(
"status_updates", self.channel_name "status_updates",
self.channel_name,
) )
def status_update(self, event): def status_update(self, event):

View File

@ -1,5 +1,4 @@
import gnupg import gnupg
from django.conf import settings from django.conf import settings

View File

@ -1,5 +1,4 @@
from django.conf import settings from django.conf import settings
from paperless import version from paperless import version

View File

@ -5,9 +5,8 @@ import os
import re import re
from concurrent_log_handler.queue import setup_logging_queues from concurrent_log_handler.queue import setup_logging_queues
from dotenv import load_dotenv
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from dotenv import load_dotenv
# Tap paperless.conf if it's available # Tap paperless.conf if it's available
if os.path.exists("../paperless.conf"): if os.path.exists("../paperless.conf"):
@ -68,7 +67,8 @@ MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")
LOGGING_DIR = os.getenv("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log")) LOGGING_DIR = os.getenv("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log"))
CONSUMPTION_DIR = os.getenv( CONSUMPTION_DIR = os.getenv(
"PAPERLESS_CONSUMPTION_DIR", os.path.join(BASE_DIR, "..", "consume") "PAPERLESS_CONSUMPTION_DIR",
os.path.join(BASE_DIR, "..", "consume"),
) )
# This will be created if it doesn't exist # This will be created if it doesn't exist
@ -119,7 +119,7 @@ REST_FRAMEWORK = {
if DEBUG: if DEBUG:
REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append( REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append(
"paperless.auth.AngularApiAuthenticationOverride" "paperless.auth.AngularApiAuthenticationOverride",
) )
MIDDLEWARE = [ MIDDLEWARE = [
@ -191,7 +191,8 @@ if AUTO_LOGIN_USERNAME:
ENABLE_HTTP_REMOTE_USER = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER") ENABLE_HTTP_REMOTE_USER = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
HTTP_REMOTE_USER_HEADER_NAME = os.getenv( HTTP_REMOTE_USER_HEADER_NAME = os.getenv(
"PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME", "HTTP_REMOTE_USER" "PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME",
"HTTP_REMOTE_USER",
) )
if ENABLE_HTTP_REMOTE_USER: if ENABLE_HTTP_REMOTE_USER:
@ -201,7 +202,7 @@ if ENABLE_HTTP_REMOTE_USER:
"django.contrib.auth.backends.ModelBackend", "django.contrib.auth.backends.ModelBackend",
] ]
REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append( REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append(
"rest_framework.authentication.RemoteUserAuthentication" "rest_framework.authentication.RemoteUserAuthentication",
) )
# X-Frame options for embedded PDF display: # X-Frame options for embedded PDF display:
@ -212,7 +213,7 @@ else:
# We allow CORS from localhost:8080 # We allow CORS from localhost:8080
CORS_ALLOWED_ORIGINS = tuple( CORS_ALLOWED_ORIGINS = tuple(
os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(",") os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(","),
) )
if DEBUG: if DEBUG:
@ -223,7 +224,8 @@ if DEBUG:
# Paperless on a closed network. However, if you're putting this anywhere # Paperless on a closed network. However, if you're putting this anywhere
# public, you should change the key to something unique and verbose. # public, you should change the key to something unique and verbose.
SECRET_KEY = os.getenv( SECRET_KEY = os.getenv(
"PAPERLESS_SECRET_KEY", "e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee" "PAPERLESS_SECRET_KEY",
"e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee",
) )
_allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS") _allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS")
@ -268,7 +270,7 @@ DATABASES = {
"default": { "default": {
"ENGINE": "django.db.backends.sqlite3", "ENGINE": "django.db.backends.sqlite3",
"NAME": os.path.join(DATA_DIR, "db.sqlite3"), "NAME": os.path.join(DATA_DIR, "db.sqlite3"),
} },
} }
if os.getenv("PAPERLESS_DBHOST"): if os.getenv("PAPERLESS_DBHOST"):
@ -423,7 +425,8 @@ def default_threads_per_worker(task_workers):
THREADS_PER_WORKER = os.getenv( THREADS_PER_WORKER = os.getenv(
"PAPERLESS_THREADS_PER_WORKER", default_threads_per_worker(TASK_WORKERS) "PAPERLESS_THREADS_PER_WORKER",
default_threads_per_worker(TASK_WORKERS),
) )
############################################################################### ###############################################################################
@ -435,7 +438,7 @@ CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
CONSUMER_POLLING_DELAY = int(os.getenv("PAPERLESS_CONSUMER_POLLING_DELAY", 5)) CONSUMER_POLLING_DELAY = int(os.getenv("PAPERLESS_CONSUMER_POLLING_DELAY", 5))
CONSUMER_POLLING_RETRY_COUNT = int( CONSUMER_POLLING_RETRY_COUNT = int(
os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5) os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5),
) )
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES") CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
@ -448,8 +451,8 @@ CONSUMER_IGNORE_PATTERNS = list(
os.getenv( os.getenv(
"PAPERLESS_CONSUMER_IGNORE_PATTERNS", "PAPERLESS_CONSUMER_IGNORE_PATTERNS",
'[".DS_STORE/*", "._*", ".stfolder/*"]', '[".DS_STORE/*", "._*", ".stfolder/*"]',
) ),
) ),
) )
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS") CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
@ -479,7 +482,7 @@ OCR_DESKEW = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
OCR_ROTATE_PAGES = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true") OCR_ROTATE_PAGES = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
OCR_ROTATE_PAGES_THRESHOLD = float( OCR_ROTATE_PAGES_THRESHOLD = float(
os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0) os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0),
) )
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}") OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}")
@ -536,7 +539,8 @@ THUMBNAIL_FONT_NAME = os.getenv(
PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO") PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998") PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv( PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv(
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT", "http://localhost:3000" "PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
"http://localhost:3000",
) )
if PAPERLESS_TIKA_ENABLED: if PAPERLESS_TIKA_ENABLED:

View File

@ -1,10 +1,11 @@
import os import os
import shutil import shutil
from django.test import TestCase, override_settings from django.test import override_settings
from django.test import TestCase
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
from paperless import binaries_check, paths_check from paperless import binaries_check
from paperless import paths_check
from paperless.checks import debug_mode_check from paperless.checks import debug_mode_check
@ -20,7 +21,9 @@ class TestChecks(DirectoriesMixin, TestCase):
self.assertEqual(paths_check(None), []) self.assertEqual(paths_check(None), [])
@override_settings( @override_settings(
MEDIA_ROOT="uuh", DATA_DIR="whatever", CONSUMPTION_DIR="idontcare" MEDIA_ROOT="uuh",
DATA_DIR="whatever",
CONSUMPTION_DIR="idontcare",
) )
def test_paths_check_dont_exist(self): def test_paths_check_dont_exist(self):
msgs = paths_check(None) msgs = paths_check(None)

View File

@ -2,8 +2,8 @@ from unittest import mock
from channels.layers import get_channel_layer from channels.layers import get_channel_layer
from channels.testing import WebsocketCommunicator from channels.testing import WebsocketCommunicator
from django.test import TestCase, override_settings from django.test import override_settings
from django.test import TestCase
from paperless.asgi import application from paperless.asgi import application
@ -46,7 +46,8 @@ class TestWebSockets(TestCase):
channel_layer = get_channel_layer() channel_layer = get_channel_layer()
await channel_layer.group_send( await channel_layer.group_send(
"status_updates", {"type": "status_update", "data": message} "status_updates",
{"type": "status_update", "data": message},
) )
response = await communicator.receive_json_from() response = await communicator.receive_json_from()

View File

@ -1,34 +1,30 @@
from django.conf import settings
from django.conf.urls import include from django.conf.urls import include
from django.contrib import admin from django.contrib import admin
from django.contrib.auth.decorators import login_required from django.contrib.auth.decorators import login_required
from django.urls import path, re_path from django.urls import path
from django.urls import re_path
from django.utils.translation import gettext_lazy as _
from django.views.decorators.csrf import csrf_exempt from django.views.decorators.csrf import csrf_exempt
from django.views.generic import RedirectView from django.views.generic import RedirectView
from documents.views import BulkDownloadView
from documents.views import BulkEditView
from documents.views import CorrespondentViewSet
from documents.views import DocumentTypeViewSet
from documents.views import IndexView
from documents.views import LogViewSet
from documents.views import PostDocumentView
from documents.views import SavedViewViewSet
from documents.views import SearchAutoCompleteView
from documents.views import SelectionDataView
from documents.views import StatisticsView
from documents.views import TagViewSet
from documents.views import UnifiedSearchViewSet
from paperless.consumers import StatusConsumer
from paperless.views import FaviconView
from rest_framework.authtoken import views from rest_framework.authtoken import views
from rest_framework.routers import DefaultRouter from rest_framework.routers import DefaultRouter
from django.utils.translation import gettext_lazy as _
from django.conf import settings
from paperless.consumers import StatusConsumer
from documents.views import (
CorrespondentViewSet,
UnifiedSearchViewSet,
LogViewSet,
TagViewSet,
DocumentTypeViewSet,
IndexView,
SearchAutoCompleteView,
StatisticsView,
PostDocumentView,
SavedViewViewSet,
BulkEditView,
SelectionDataView,
BulkDownloadView,
)
from paperless.views import FaviconView
api_router = DefaultRouter() api_router = DefaultRouter()
api_router.register(r"correspondents", CorrespondentViewSet) api_router.register(r"correspondents", CorrespondentViewSet)
api_router.register(r"document_types", DocumentTypeViewSet) api_router.register(r"document_types", DocumentTypeViewSet)
@ -62,7 +58,9 @@ urlpatterns = [
name="post_document", name="post_document",
), ),
re_path( re_path(
r"^documents/bulk_edit/", BulkEditView.as_view(), name="bulk_edit" r"^documents/bulk_edit/",
BulkEditView.as_view(),
name="bulk_edit",
), ),
re_path( re_path(
r"^documents/selection_data/", r"^documents/selection_data/",
@ -76,7 +74,7 @@ urlpatterns = [
), ),
path("token/", views.obtain_auth_token), path("token/", views.obtain_auth_token),
] ]
+ api_router.urls + api_router.urls,
), ),
), ),
re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"), re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
@ -88,35 +86,37 @@ urlpatterns = [
re_path( re_path(
r"^doc/(?P<pk>\d+)$", r"^doc/(?P<pk>\d+)$",
RedirectView.as_view( RedirectView.as_view(
url=settings.BASE_URL + "api/documents/%(pk)s/download/" url=settings.BASE_URL + "api/documents/%(pk)s/download/",
), ),
), ),
re_path( re_path(
r"^thumb/(?P<pk>\d+)$", r"^thumb/(?P<pk>\d+)$",
RedirectView.as_view( RedirectView.as_view(
url=settings.BASE_URL + "api/documents/%(pk)s/thumb/" url=settings.BASE_URL + "api/documents/%(pk)s/thumb/",
), ),
), ),
re_path( re_path(
r"^preview/(?P<pk>\d+)$", r"^preview/(?P<pk>\d+)$",
RedirectView.as_view( RedirectView.as_view(
url=settings.BASE_URL + "api/documents/%(pk)s/preview/" url=settings.BASE_URL + "api/documents/%(pk)s/preview/",
), ),
), ),
] ],
), ),
), ),
re_path( re_path(
r"^push$", r"^push$",
csrf_exempt( csrf_exempt(
RedirectView.as_view(url=settings.BASE_URL + "api/documents/post_document/") RedirectView.as_view(
url=settings.BASE_URL + "api/documents/post_document/",
),
), ),
), ),
# Frontend assets TODO: this is pretty bad, but it works. # Frontend assets TODO: this is pretty bad, but it works.
path( path(
"assets/<path:path>", "assets/<path:path>",
RedirectView.as_view( RedirectView.as_view(
url=settings.STATIC_URL + "frontend/en-US/assets/%(path)s" url=settings.STATIC_URL + "frontend/en-US/assets/%(path)s",
), ),
), ),
# TODO: with localization, this is even worse! :/ # TODO: with localization, this is even worse! :/

View File

@ -14,7 +14,11 @@ class StandardPagination(PageNumberPagination):
class FaviconView(View): class FaviconView(View):
def get(self, request, *args, **kwargs): def get(self, request, *args, **kwargs):
favicon = os.path.join( favicon = os.path.join(
os.path.dirname(__file__), "static", "paperless", "img", "favicon.ico" os.path.dirname(__file__),
"static",
"paperless",
"img",
"favicon.ico",
) )
with open(favicon, "rb") as f: with open(favicon, "rb") as f:
return HttpResponse(f, content_type="image/x-icon") return HttpResponse(f, content_type="image/x-icon")

View File

@ -1,6 +1,7 @@
import os import os
from uvicorn.workers import UvicornWorker
from django.conf import settings from django.conf import settings
from uvicorn.workers import UvicornWorker
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings") os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")

View File

@ -6,7 +6,6 @@ It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see For more information on this file, see
https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/ https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/
""" """
import os import os
from django.core.wsgi import get_wsgi_application from django.core.wsgi import get_wsgi_application

View File

@ -1,8 +1,8 @@
from django.contrib import admin
from django import forms from django import forms
from paperless_mail.models import MailAccount, MailRule from django.contrib import admin
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
class MailAccountAdminForm(forms.ModelForm): class MailAccountAdminForm(forms.ModelForm):
@ -48,7 +48,7 @@ class MailRuleAdmin(admin.ModelAdmin):
{ {
"description": _( "description": _(
"Paperless will only process mails that match ALL of the " "Paperless will only process mails that match ALL of the "
"filters given below." "filters given below.",
), ),
"fields": ( "fields": (
"filter_from", "filter_from",
@ -66,7 +66,7 @@ class MailRuleAdmin(admin.ModelAdmin):
"description": _( "description": _(
"The action applied to the mail. This action is only " "The action applied to the mail. This action is only "
"performed when documents were consumed from the mail. " "performed when documents were consumed from the mail. "
"Mails without attachments will remain entirely untouched." "Mails without attachments will remain entirely untouched.",
), ),
"fields": ("action", "action_parameter"), "fields": ("action", "action_parameter"),
}, },
@ -78,7 +78,7 @@ class MailRuleAdmin(admin.ModelAdmin):
"Assign metadata to documents consumed from this rule " "Assign metadata to documents consumed from this rule "
"automatically. If you do not assign tags, types or " "automatically. If you do not assign tags, types or "
"correspondents here, paperless will still process all " "correspondents here, paperless will still process all "
"matching rules that you have defined." "matching rules that you have defined.",
), ),
"fields": ( "fields": (
"assign_title_from", "assign_title_from",

View File

@ -1,5 +1,4 @@
from django.apps import AppConfig from django.apps import AppConfig
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _

View File

@ -1,6 +1,7 @@
import os import os
import tempfile import tempfile
from datetime import timedelta, date from datetime import date
from datetime import timedelta
from fnmatch import fnmatch from fnmatch import fnmatch
import magic import magic
@ -8,18 +9,16 @@ import pathvalidate
from django.conf import settings from django.conf import settings
from django.db import DatabaseError from django.db import DatabaseError
from django_q.tasks import async_task from django_q.tasks import async_task
from imap_tools import (
MailBox,
MailBoxUnencrypted,
AND,
MailMessageFlags,
MailboxFolderSelectError,
)
from documents.loggers import LoggingMixin from documents.loggers import LoggingMixin
from documents.models import Correspondent from documents.models import Correspondent
from documents.parsers import is_mime_type_supported from documents.parsers import is_mime_type_supported
from paperless_mail.models import MailAccount, MailRule from imap_tools import AND
from imap_tools import MailBox
from imap_tools import MailboxFolderSelectError
from imap_tools import MailBoxUnencrypted
from imap_tools import MailMessageFlags
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
class MailError(Exception): class MailError(Exception):
@ -120,8 +119,8 @@ class MailAccountHandler(LoggingMixin):
else: else:
raise NotImplementedError( raise NotImplementedError(
"Unknown title selector." "Unknown title selector.",
) # pragma: nocover # NOQA: E501 ) # pragma: nocover
def get_correspondent(self, message, rule): def get_correspondent(self, message, rule):
c_from = rule.assign_correspondent_from c_from = rule.assign_correspondent_from
@ -137,7 +136,7 @@ class MailAccountHandler(LoggingMixin):
message.from_values message.from_values
and "name" in message.from_values and "name" in message.from_values
and message.from_values["name"] and message.from_values["name"]
): # NOQA: E501 ):
return self._correspondent_from_name(message.from_values["name"]) return self._correspondent_from_name(message.from_values["name"])
else: else:
return self._correspondent_from_name(message.from_) return self._correspondent_from_name(message.from_)
@ -147,8 +146,8 @@ class MailAccountHandler(LoggingMixin):
else: else:
raise NotImplementedError( raise NotImplementedError(
"Unknwown correspondent selector" "Unknwown correspondent selector",
) # pragma: nocover # NOQA: E501 ) # pragma: nocover
def handle_mail_account(self, account): def handle_mail_account(self, account):
@ -159,7 +158,9 @@ class MailAccountHandler(LoggingMixin):
total_processed_files = 0 total_processed_files = 0
with get_mailbox( with get_mailbox(
account.imap_server, account.imap_port, account.imap_security account.imap_server,
account.imap_port,
account.imap_security,
) as M: ) as M:
try: try:
@ -193,7 +194,7 @@ class MailAccountHandler(LoggingMixin):
except MailboxFolderSelectError: except MailboxFolderSelectError:
raise MailError( raise MailError(
f"Rule {rule}: Folder {rule.folder} " f"Rule {rule}: Folder {rule.folder} "
f"does not exist in account {rule.account}" f"does not exist in account {rule.account}",
) )
criterias = make_criterias(rule) criterias = make_criterias(rule)
@ -242,12 +243,14 @@ class MailAccountHandler(LoggingMixin):
try: try:
get_rule_action(rule).post_consume( get_rule_action(rule).post_consume(
M, post_consume_messages, rule.action_parameter M,
post_consume_messages,
rule.action_parameter,
) )
except Exception as e: except Exception as e:
raise MailError( raise MailError(
f"Rule {rule}: Error while processing post-consume actions: " f"{e}" f"Rule {rule}: Error while processing post-consume actions: " f"{e}",
) )
return total_processed_files return total_processed_files
@ -274,7 +277,7 @@ class MailAccountHandler(LoggingMixin):
if ( if (
not att.content_disposition == "attachment" not att.content_disposition == "attachment"
and rule.attachment_type == MailRule.ATTACHMENT_TYPE_ATTACHMENTS_ONLY and rule.attachment_type == MailRule.ATTACHMENT_TYPE_ATTACHMENTS_ONLY
): # NOQA: E501 ):
self.log( self.log(
"debug", "debug",
f"Rule {rule}: " f"Rule {rule}: "
@ -297,7 +300,8 @@ class MailAccountHandler(LoggingMixin):
os.makedirs(settings.SCRATCH_DIR, exist_ok=True) os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
_, temp_filename = tempfile.mkstemp( _, temp_filename = tempfile.mkstemp(
prefix="paperless-mail-", dir=settings.SCRATCH_DIR prefix="paperless-mail-",
dir=settings.SCRATCH_DIR,
) )
with open(temp_filename, "wb") as f: with open(temp_filename, "wb") as f:
f.write(att.payload) f.write(att.payload)
@ -313,15 +317,13 @@ class MailAccountHandler(LoggingMixin):
"documents.tasks.consume_file", "documents.tasks.consume_file",
path=temp_filename, path=temp_filename,
override_filename=pathvalidate.sanitize_filename( override_filename=pathvalidate.sanitize_filename(
att.filename att.filename,
), # NOQA: E501 ),
override_title=title, override_title=title,
override_correspondent_id=correspondent.id override_correspondent_id=correspondent.id
if correspondent if correspondent
else None, # NOQA: E501 else None,
override_document_type_id=doc_type.id override_document_type_id=doc_type.id if doc_type else None,
if doc_type
else None, # NOQA: E501
override_tag_ids=[tag.id] if tag else None, override_tag_ids=[tag.id] if tag else None,
task_name=att.filename[:100], task_name=att.filename[:100],
) )

View File

@ -1,5 +1,4 @@
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from paperless_mail import tasks from paperless_mail import tasks
@ -7,7 +6,8 @@ class Command(BaseCommand):
help = """ help = """
""".replace( """.replace(
" ", "" " ",
"",
) )
def handle(self, *args, **options): def handle(self, *args, **options):

View File

@ -1,7 +1,5 @@
from django.db import models
import documents.models as document_models import documents.models as document_models
from django.db import models
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
@ -30,12 +28,14 @@ class MailAccount(models.Model):
null=True, null=True,
help_text=_( help_text=_(
"This is usually 143 for unencrypted and STARTTLS " "This is usually 143 for unencrypted and STARTTLS "
"connections, and 993 for SSL connections." "connections, and 993 for SSL connections.",
), ),
) )
imap_security = models.PositiveIntegerField( imap_security = models.PositiveIntegerField(
_("IMAP security"), choices=IMAP_SECURITY_OPTIONS, default=IMAP_SECURITY_SSL _("IMAP security"),
choices=IMAP_SECURITY_OPTIONS,
default=IMAP_SECURITY_SSL,
) )
username = models.CharField(_("username"), max_length=256) username = models.CharField(_("username"), max_length=256)
@ -48,7 +48,7 @@ class MailAccount(models.Model):
default="UTF-8", default="UTF-8",
help_text=_( help_text=_(
"The character set to use when communicating with the " "The character set to use when communicating with the "
"mail server, such as 'UTF-8' or 'US-ASCII'." "mail server, such as 'UTF-8' or 'US-ASCII'.",
), ),
) )
@ -123,13 +123,22 @@ class MailRule(models.Model):
) )
filter_from = models.CharField( filter_from = models.CharField(
_("filter from"), max_length=256, null=True, blank=True _("filter from"),
max_length=256,
null=True,
blank=True,
) )
filter_subject = models.CharField( filter_subject = models.CharField(
_("filter subject"), max_length=256, null=True, blank=True _("filter subject"),
max_length=256,
null=True,
blank=True,
) )
filter_body = models.CharField( filter_body = models.CharField(
_("filter body"), max_length=256, null=True, blank=True _("filter body"),
max_length=256,
null=True,
blank=True,
) )
filter_attachment_filename = models.CharField( filter_attachment_filename = models.CharField(
@ -140,12 +149,14 @@ class MailRule(models.Model):
help_text=_( help_text=_(
"Only consume documents which entirely match this " "Only consume documents which entirely match this "
"filename if specified. Wildcards such as *.pdf or " "filename if specified. Wildcards such as *.pdf or "
"*invoice* are allowed. Case insensitive." "*invoice* are allowed. Case insensitive.",
), ),
) )
maximum_age = models.PositiveIntegerField( maximum_age = models.PositiveIntegerField(
_("maximum age"), default=30, help_text=_("Specified in days.") _("maximum age"),
default=30,
help_text=_("Specified in days."),
) )
attachment_type = models.PositiveIntegerField( attachment_type = models.PositiveIntegerField(
@ -154,7 +165,7 @@ class MailRule(models.Model):
default=ATTACHMENT_TYPE_ATTACHMENTS_ONLY, default=ATTACHMENT_TYPE_ATTACHMENTS_ONLY,
help_text=_( help_text=_(
"Inline attachments include embedded images, so it's best " "Inline attachments include embedded images, so it's best "
"to combine this option with a filename filter." "to combine this option with a filename filter.",
), ),
) )
@ -173,12 +184,14 @@ class MailRule(models.Model):
"Additional parameter for the action selected above, " "Additional parameter for the action selected above, "
"i.e., " "i.e., "
"the target folder of the move to folder action. " "the target folder of the move to folder action. "
"Subfolders must be separated by dots." "Subfolders must be separated by dots.",
), ),
) )
assign_title_from = models.PositiveIntegerField( assign_title_from = models.PositiveIntegerField(
_("assign title from"), choices=TITLE_SELECTOR, default=TITLE_FROM_SUBJECT _("assign title from"),
choices=TITLE_SELECTOR,
default=TITLE_FROM_SUBJECT,
) )
assign_tag = models.ForeignKey( assign_tag = models.ForeignKey(

View File

@ -1,6 +1,7 @@
import logging import logging
from paperless_mail.mail import MailAccountHandler, MailError from paperless_mail.mail import MailAccountHandler
from paperless_mail.mail import MailError
from paperless_mail.models import MailAccount from paperless_mail.models import MailAccount

View File

@ -7,13 +7,15 @@ from unittest import mock
from django.core.management import call_command from django.core.management import call_command
from django.db import DatabaseError from django.db import DatabaseError
from django.test import TestCase from django.test import TestCase
from imap_tools import MailMessageFlags, MailboxFolderSelectError
from documents.models import Correspondent from documents.models import Correspondent
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
from imap_tools import MailboxFolderSelectError
from imap_tools import MailMessageFlags
from paperless_mail import tasks from paperless_mail import tasks
from paperless_mail.mail import MailError, MailAccountHandler from paperless_mail.mail import MailAccountHandler
from paperless_mail.models import MailRule, MailAccount from paperless_mail.mail import MailError
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
class BogusFolderManager: class BogusFolderManager:
@ -83,7 +85,7 @@ class BogusMailBox(ContextManager):
def move(self, uid_list, folder): def move(self, uid_list, folder):
if folder == "spam": if folder == "spam":
self.messages_spam.append( self.messages_spam.append(
filter(lambda m: m.uid in uid_list, self.messages) filter(lambda m: m.uid in uid_list, self.messages),
) )
self.messages = list(filter(lambda m: m.uid not in uid_list, self.messages)) self.messages = list(filter(lambda m: m.uid not in uid_list, self.messages))
else: else:
@ -115,7 +117,9 @@ def create_message(
def create_attachment( def create_attachment(
filename="the_file.pdf", content_disposition="attachment", payload=b"a PDF document" filename="the_file.pdf",
content_disposition="attachment",
payload=b"a PDF document",
): ):
attachment = namedtuple("Attachment", []) attachment = namedtuple("Attachment", [])
attachment.filename = filename attachment.filename = filename
@ -163,7 +167,7 @@ class TestMail(DirectoriesMixin, TestCase):
body="cables", body="cables",
seen=True, seen=True,
flagged=False, flagged=False,
) ),
) )
self.bogus_mailbox.messages.append( self.bogus_mailbox.messages.append(
create_message( create_message(
@ -171,14 +175,14 @@ class TestMail(DirectoriesMixin, TestCase):
body="from my favorite electronic store", body="from my favorite electronic store",
seen=False, seen=False,
flagged=True, flagged=True,
) ),
) )
self.bogus_mailbox.messages.append( self.bogus_mailbox.messages.append(
create_message( create_message(
subject="Claim your $10M price now!", subject="Claim your $10M price now!",
from_="amazon@amazon-some-indian-site.org", from_="amazon@amazon-some-indian-site.org",
seen=False, seen=False,
) ),
) )
def test_get_correspondent(self): def test_get_correspondent(self):
@ -196,12 +200,14 @@ class TestMail(DirectoriesMixin, TestCase):
handler = MailAccountHandler() handler = MailAccountHandler()
rule = MailRule( rule = MailRule(
name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING name="a",
assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING,
) )
self.assertIsNone(handler.get_correspondent(message, rule)) self.assertIsNone(handler.get_correspondent(message, rule))
rule = MailRule( rule = MailRule(
name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL name="b",
assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL,
) )
c = handler.get_correspondent(message, rule) c = handler.get_correspondent(message, rule)
self.assertIsNotNone(c) self.assertIsNotNone(c)
@ -212,7 +218,8 @@ class TestMail(DirectoriesMixin, TestCase):
self.assertEqual(c.id, me_localhost.id) self.assertEqual(c.id, me_localhost.id)
rule = MailRule( rule = MailRule(
name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME name="c",
assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME,
) )
c = handler.get_correspondent(message, rule) c = handler.get_correspondent(message, rule)
self.assertIsNotNone(c) self.assertIsNotNone(c)
@ -244,7 +251,9 @@ class TestMail(DirectoriesMixin, TestCase):
def test_handle_message(self): def test_handle_message(self):
message = create_message( message = create_message(
subject="the message title", from_="Myself", num_attachments=2 subject="the message title",
from_="Myself",
num_attachments=2,
) )
account = MailAccount() account = MailAccount()
@ -376,11 +385,16 @@ class TestMail(DirectoriesMixin, TestCase):
def test_handle_mail_account_mark_read(self): def test_handle_mail_account_mark_read(self):
account = MailAccount.objects.create( account = MailAccount.objects.create(
name="test", imap_server="", username="admin", password="secret" name="test",
imap_server="",
username="admin",
password="secret",
) )
rule = MailRule.objects.create( rule = MailRule.objects.create(
name="testrule", account=account, action=MailRule.ACTION_MARK_READ name="testrule",
account=account,
action=MailRule.ACTION_MARK_READ,
) )
self.assertEqual(len(self.bogus_mailbox.messages), 3) self.assertEqual(len(self.bogus_mailbox.messages), 3)
@ -394,7 +408,10 @@ class TestMail(DirectoriesMixin, TestCase):
def test_handle_mail_account_delete(self): def test_handle_mail_account_delete(self):
account = MailAccount.objects.create( account = MailAccount.objects.create(
name="test", imap_server="", username="admin", password="secret" name="test",
imap_server="",
username="admin",
password="secret",
) )
rule = MailRule.objects.create( rule = MailRule.objects.create(
@ -412,7 +429,10 @@ class TestMail(DirectoriesMixin, TestCase):
def test_handle_mail_account_flag(self): def test_handle_mail_account_flag(self):
account = MailAccount.objects.create( account = MailAccount.objects.create(
name="test", imap_server="", username="admin", password="secret" name="test",
imap_server="",
username="admin",
password="secret",
) )
rule = MailRule.objects.create( rule = MailRule.objects.create(
@ -432,7 +452,10 @@ class TestMail(DirectoriesMixin, TestCase):
def test_handle_mail_account_move(self): def test_handle_mail_account_move(self):
account = MailAccount.objects.create( account = MailAccount.objects.create(
name="test", imap_server="", username="admin", password="secret" name="test",
imap_server="",
username="admin",
password="secret",
) )
rule = MailRule.objects.create( rule = MailRule.objects.create(
@ -453,7 +476,10 @@ class TestMail(DirectoriesMixin, TestCase):
def test_error_login(self): def test_error_login(self):
account = MailAccount.objects.create( account = MailAccount.objects.create(
name="test", imap_server="", username="admin", password="wrong" name="test",
imap_server="",
username="admin",
password="wrong",
) )
try: try:
@ -465,11 +491,17 @@ class TestMail(DirectoriesMixin, TestCase):
def test_error_skip_account(self): def test_error_skip_account(self):
account_faulty = MailAccount.objects.create( account_faulty = MailAccount.objects.create(
name="test", imap_server="", username="admin", password="wroasdng" name="test",
imap_server="",
username="admin",
password="wroasdng",
) )
account = MailAccount.objects.create( account = MailAccount.objects.create(
name="test2", imap_server="", username="admin", password="secret" name="test2",
imap_server="",
username="admin",
password="secret",
) )
rule = MailRule.objects.create( rule = MailRule.objects.create(
name="testrule", name="testrule",
@ -487,7 +519,10 @@ class TestMail(DirectoriesMixin, TestCase):
def test_error_skip_rule(self): def test_error_skip_rule(self):
account = MailAccount.objects.create( account = MailAccount.objects.create(
name="test2", imap_server="", username="admin", password="secret" name="test2",
imap_server="",
username="admin",
password="secret",
) )
rule = MailRule.objects.create( rule = MailRule.objects.create(
name="testrule", name="testrule",
@ -523,7 +558,10 @@ class TestMail(DirectoriesMixin, TestCase):
m.side_effect = get_correspondent_fake m.side_effect = get_correspondent_fake
account = MailAccount.objects.create( account = MailAccount.objects.create(
name="test2", imap_server="", username="admin", password="secret" name="test2",
imap_server="",
username="admin",
password="secret",
) )
rule = MailRule.objects.create( rule = MailRule.objects.create(
name="testrule", name="testrule",
@ -544,7 +582,10 @@ class TestMail(DirectoriesMixin, TestCase):
def test_error_create_correspondent(self): def test_error_create_correspondent(self):
account = MailAccount.objects.create( account = MailAccount.objects.create(
name="test2", imap_server="", username="admin", password="secret" name="test2",
imap_server="",
username="admin",
password="secret",
) )
rule = MailRule.objects.create( rule = MailRule.objects.create(
name="testrule", name="testrule",
@ -579,7 +620,10 @@ class TestMail(DirectoriesMixin, TestCase):
def test_filters(self): def test_filters(self):
account = MailAccount.objects.create( account = MailAccount.objects.create(
name="test3", imap_server="", username="admin", password="secret" name="test3",
imap_server="",
username="admin",
password="secret",
) )
rule = MailRule.objects.create( rule = MailRule.objects.create(
name="testrule3", name="testrule3",
@ -629,7 +673,7 @@ class TestMail(DirectoriesMixin, TestCase):
class TestManagementCommand(TestCase): class TestManagementCommand(TestCase):
@mock.patch( @mock.patch(
"paperless_mail.management.commands.mail_fetcher.tasks.process_mail_accounts" "paperless_mail.management.commands.mail_fetcher.tasks.process_mail_accounts",
) )
def test_mail_fetcher(self, m): def test_mail_fetcher(self, m):
@ -644,10 +688,16 @@ class TestTasks(TestCase):
m.side_effect = lambda account: 6 m.side_effect = lambda account: 6
MailAccount.objects.create( MailAccount.objects.create(
name="A", imap_server="A", username="A", password="A" name="A",
imap_server="A",
username="A",
password="A",
) )
MailAccount.objects.create( MailAccount.objects.create(
name="B", imap_server="A", username="A", password="A" name="B",
imap_server="A",
username="A",
password="A",
) )
result = tasks.process_mail_accounts() result = tasks.process_mail_accounts()
@ -663,7 +713,10 @@ class TestTasks(TestCase):
def test_single_accounts(self, m): def test_single_accounts(self, m):
MailAccount.objects.create( MailAccount.objects.create(
name="A", imap_server="A", username="A", password="A" name="A",
imap_server="A",
username="A",
password="A",
) )
tasks.process_mail_account("A") tasks.process_mail_account("A")

View File

@ -1,2 +1,5 @@
# this is here so that django finds the checks. # this is here so that django finds the checks.
from .checks import * from .checks import check_default_language_available
from .checks import get_tesseract_langs
__all__ = ["get_tesseract_langs", "check_default_language_available"]

View File

@ -1,5 +1,4 @@
from django.apps import AppConfig from django.apps import AppConfig
from paperless_tesseract.signals import tesseract_consumer_declaration from paperless_tesseract.signals import tesseract_consumer_declaration

View File

@ -1,7 +1,9 @@
import subprocess import subprocess
from django.conf import settings from django.conf import settings
from django.core.checks import Error, Warning, register from django.core.checks import Error
from django.core.checks import register
from django.core.checks import Warning
def get_tesseract_langs(): def get_tesseract_langs():
@ -19,8 +21,8 @@ def check_default_language_available(app_configs, **kwargs):
return [ return [
Warning( Warning(
"No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. " "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. "
"This means that tesseract will fallback to english." "This means that tesseract will fallback to english.",
) ),
] ]
specified_langs = settings.OCR_LANGUAGE.split("+") specified_langs = settings.OCR_LANGUAGE.split("+")
@ -31,8 +33,8 @@ def check_default_language_available(app_configs, **kwargs):
Error( Error(
f"The selected ocr language {lang} is " f"The selected ocr language {lang} is "
f"not installed. Paperless cannot OCR your documents " f"not installed. Paperless cannot OCR your documents "
f"without it. Please fix PAPERLESS_OCR_LANGUAGE." f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
) ),
] ]
return [] return []

View File

@ -2,10 +2,11 @@ import json
import os import os
import re import re
from PIL import Image
from django.conf import settings from django.conf import settings
from documents.parsers import DocumentParser
from documents.parsers import DocumentParser, ParseError, make_thumbnail_from_pdf from documents.parsers import make_thumbnail_from_pdf
from documents.parsers import ParseError
from PIL import Image
class NoTextFoundException(Exception): class NoTextFoundException(Exception):
@ -42,7 +43,7 @@ class RasterisedDocumentParser(DocumentParser):
"prefix": meta.REVERSE_NS[m.group(1)], "prefix": meta.REVERSE_NS[m.group(1)],
"key": m.group(2), "key": m.group(2),
"value": value, "value": value,
} },
) )
except Exception as e: except Exception as e:
self.log( self.log(
@ -53,7 +54,9 @@ class RasterisedDocumentParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type, file_name=None): def get_thumbnail(self, document_path, mime_type, file_name=None):
return make_thumbnail_from_pdf( return make_thumbnail_from_pdf(
self.archive_path or document_path, self.tempdir, self.logging_group self.archive_path or document_path,
self.tempdir,
self.logging_group,
) )
def is_image(self, mime_type): def is_image(self, mime_type):
@ -110,7 +113,6 @@ class RasterisedDocumentParser(DocumentParser):
return None return None
from pdfminer.high_level import extract_text as pdfminer_extract_text from pdfminer.high_level import extract_text as pdfminer_extract_text
from pdfminer.pdftypes import PDFException
try: try:
stripped = post_process_text(pdfminer_extract_text(pdf_file)) stripped = post_process_text(pdfminer_extract_text(pdf_file))
@ -129,7 +131,12 @@ class RasterisedDocumentParser(DocumentParser):
return None return None
def construct_ocrmypdf_parameters( def construct_ocrmypdf_parameters(
self, input_file, mime_type, output_file, sidecar_file, safe_fallback=False self,
input_file,
mime_type,
output_file,
sidecar_file,
safe_fallback=False,
): ):
ocrmypdf_args = { ocrmypdf_args = {
"input_file": input_file, "input_file": input_file,
@ -167,7 +174,7 @@ class RasterisedDocumentParser(DocumentParser):
ocrmypdf_args["rotate_pages"] = True ocrmypdf_args["rotate_pages"] = True
ocrmypdf_args[ ocrmypdf_args[
"rotate_pages_threshold" "rotate_pages_threshold"
] = settings.OCR_ROTATE_PAGES_THRESHOLD # NOQA: E501 ] = settings.OCR_ROTATE_PAGES_THRESHOLD
if settings.OCR_PAGES > 0: if settings.OCR_PAGES > 0:
ocrmypdf_args["pages"] = f"1-{settings.OCR_PAGES}" ocrmypdf_args["pages"] = f"1-{settings.OCR_PAGES}"
@ -202,7 +209,7 @@ class RasterisedDocumentParser(DocumentParser):
raise ParseError( raise ParseError(
f"Cannot produce archive PDF for image {input_file}, " f"Cannot produce archive PDF for image {input_file}, "
f"no DPI information is present in this image and " f"no DPI information is present in this image and "
f"OCR_IMAGE_DPI is not set." f"OCR_IMAGE_DPI is not set.",
) )
if settings.OCR_USER_ARGS and not safe_fallback: if settings.OCR_USER_ARGS and not safe_fallback:
@ -241,7 +248,10 @@ class RasterisedDocumentParser(DocumentParser):
sidecar_file = os.path.join(self.tempdir, "sidecar.txt") sidecar_file = os.path.join(self.tempdir, "sidecar.txt")
args = self.construct_ocrmypdf_parameters( args = self.construct_ocrmypdf_parameters(
document_path, mime_type, archive_path, sidecar_file document_path,
mime_type,
archive_path,
sidecar_file,
) )
try: try:
@ -289,7 +299,8 @@ class RasterisedDocumentParser(DocumentParser):
# is bigger and blurry due to --force-ocr. # is bigger and blurry due to --force-ocr.
self.text = self.extract_text( self.text = self.extract_text(
sidecar_file_fallback, archive_path_fallback sidecar_file_fallback,
archive_path_fallback,
) )
except Exception as e: except Exception as e:

View File

@ -1,8 +1,8 @@
from unittest import mock from unittest import mock
from django.core.checks import ERROR from django.core.checks import ERROR
from django.test import TestCase, override_settings from django.test import override_settings
from django.test import TestCase
from paperless_tesseract import check_default_language_available from paperless_tesseract import check_default_language_available
@ -16,8 +16,8 @@ class TestChecks(TestCase):
self.assertEqual(len(msgs), 1) self.assertEqual(len(msgs), 1)
self.assertTrue( self.assertTrue(
msgs[0].msg.startswith( msgs[0].msg.startswith(
"No OCR language has been specified with PAPERLESS_OCR_LANGUAGE" "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE",
) ),
) )
@override_settings(OCR_LANGUAGE="ita") @override_settings(OCR_LANGUAGE="ita")

View File

@ -3,11 +3,13 @@ import uuid
from typing import ContextManager from typing import ContextManager
from unittest import mock from unittest import mock
from django.test import TestCase, override_settings from django.test import override_settings
from django.test import TestCase
from documents.parsers import ParseError, run_convert from documents.parsers import ParseError
from documents.parsers import run_convert
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
from paperless_tesseract.parsers import RasterisedDocumentParser, post_process_text from paperless_tesseract.parsers import post_process_text
from paperless_tesseract.parsers import RasterisedDocumentParser
image_to_string_calls = [] image_to_string_calls = []
@ -56,7 +58,9 @@ class TestParser(DirectoriesMixin, TestCase):
result, result,
actual_result, actual_result,
"strip_exceess_whitespace({}) != '{}', but '{}'".format( "strip_exceess_whitespace({}) != '{}', but '{}'".format(
source, result, actual_result source,
result,
actual_result,
), ),
) )
@ -65,7 +69,8 @@ class TestParser(DirectoriesMixin, TestCase):
def test_get_text_from_pdf(self): def test_get_text_from_pdf(self):
parser = RasterisedDocumentParser(uuid.uuid4()) parser = RasterisedDocumentParser(uuid.uuid4())
text = parser.extract_text( text = parser.extract_text(
None, os.path.join(self.SAMPLE_FILES, "simple-digital.pdf") None,
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
) )
self.assertContainsStrings(text.strip(), ["This is a test document."]) self.assertContainsStrings(text.strip(), ["This is a test document."])
@ -73,7 +78,8 @@ class TestParser(DirectoriesMixin, TestCase):
def test_thumbnail(self): def test_thumbnail(self):
parser = RasterisedDocumentParser(uuid.uuid4()) parser = RasterisedDocumentParser(uuid.uuid4())
thumb = parser.get_thumbnail( thumb = parser.get_thumbnail(
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
"application/pdf",
) )
self.assertTrue(os.path.isfile(thumb)) self.assertTrue(os.path.isfile(thumb))
@ -89,14 +95,16 @@ class TestParser(DirectoriesMixin, TestCase):
parser = RasterisedDocumentParser(uuid.uuid4()) parser = RasterisedDocumentParser(uuid.uuid4())
thumb = parser.get_thumbnail( thumb = parser.get_thumbnail(
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
"application/pdf",
) )
self.assertTrue(os.path.isfile(thumb)) self.assertTrue(os.path.isfile(thumb))
def test_thumbnail_encrypted(self): def test_thumbnail_encrypted(self):
parser = RasterisedDocumentParser(uuid.uuid4()) parser = RasterisedDocumentParser(uuid.uuid4())
thumb = parser.get_thumbnail( thumb = parser.get_thumbnail(
os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "encrypted.pdf"),
"application/pdf",
) )
self.assertTrue(os.path.isfile(thumb)) self.assertTrue(os.path.isfile(thumb))
@ -113,7 +121,8 @@ class TestParser(DirectoriesMixin, TestCase):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
"application/pdf",
) )
self.assertTrue(os.path.isfile(parser.archive_path)) self.assertTrue(os.path.isfile(parser.archive_path))
@ -124,7 +133,8 @@ class TestParser(DirectoriesMixin, TestCase):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
"application/pdf",
) )
self.assertTrue(os.path.isfile(parser.archive_path)) self.assertTrue(os.path.isfile(parser.archive_path))
@ -139,7 +149,8 @@ class TestParser(DirectoriesMixin, TestCase):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
"application/pdf",
) )
self.assertIsNone(parser.archive_path) self.assertIsNone(parser.archive_path)
@ -168,7 +179,8 @@ class TestParser(DirectoriesMixin, TestCase):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "encrypted.pdf"),
"application/pdf",
) )
self.assertIsNone(parser.archive_path) self.assertIsNone(parser.archive_path)
@ -178,7 +190,8 @@ class TestParser(DirectoriesMixin, TestCase):
def test_with_form_error_notext(self): def test_with_form_error_notext(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
"application/pdf",
) )
self.assertContainsStrings( self.assertContainsStrings(
@ -191,7 +204,8 @@ class TestParser(DirectoriesMixin, TestCase):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
"application/pdf",
) )
self.assertContainsStrings( self.assertContainsStrings(
@ -221,7 +235,7 @@ class TestParser(DirectoriesMixin, TestCase):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
dpi = parser.calculate_a4_dpi( dpi = parser.calculate_a4_dpi(
os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png") os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"),
) )
self.assertEqual(dpi, 62) self.assertEqual(dpi, 62)
@ -233,7 +247,8 @@ class TestParser(DirectoriesMixin, TestCase):
def f(): def f():
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), "image/png" os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"),
"image/png",
) )
self.assertRaises(ParseError, f) self.assertRaises(ParseError, f)
@ -247,68 +262,80 @@ class TestParser(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(parser.archive_path)) self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings( self.assertContainsStrings(
parser.get_text().lower(), ["this is a test document."] parser.get_text().lower(),
["this is a test document."],
) )
def test_multi_page(self): def test_multi_page(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
"application/pdf",
) )
self.assertTrue(os.path.isfile(parser.archive_path)) self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings( self.assertContainsStrings(
parser.get_text().lower(), ["page 1", "page 2", "page 3"] parser.get_text().lower(),
["page 1", "page 2", "page 3"],
) )
@override_settings(OCR_PAGES=2, OCR_MODE="skip") @override_settings(OCR_PAGES=2, OCR_MODE="skip")
def test_multi_page_pages_skip(self): def test_multi_page_pages_skip(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
"application/pdf",
) )
self.assertTrue(os.path.isfile(parser.archive_path)) self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings( self.assertContainsStrings(
parser.get_text().lower(), ["page 1", "page 2", "page 3"] parser.get_text().lower(),
["page 1", "page 2", "page 3"],
) )
@override_settings(OCR_PAGES=2, OCR_MODE="redo") @override_settings(OCR_PAGES=2, OCR_MODE="redo")
def test_multi_page_pages_redo(self): def test_multi_page_pages_redo(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
"application/pdf",
) )
self.assertTrue(os.path.isfile(parser.archive_path)) self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings( self.assertContainsStrings(
parser.get_text().lower(), ["page 1", "page 2", "page 3"] parser.get_text().lower(),
["page 1", "page 2", "page 3"],
) )
@override_settings(OCR_PAGES=2, OCR_MODE="force") @override_settings(OCR_PAGES=2, OCR_MODE="force")
def test_multi_page_pages_force(self): def test_multi_page_pages_force(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
"application/pdf",
) )
self.assertTrue(os.path.isfile(parser.archive_path)) self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings( self.assertContainsStrings(
parser.get_text().lower(), ["page 1", "page 2", "page 3"] parser.get_text().lower(),
["page 1", "page 2", "page 3"],
) )
@override_settings(OOCR_MODE="skip") @override_settings(OOCR_MODE="skip")
def test_multi_page_analog_pages_skip(self): def test_multi_page_analog_pages_skip(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
"application/pdf",
) )
self.assertTrue(os.path.isfile(parser.archive_path)) self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings( self.assertContainsStrings(
parser.get_text().lower(), ["page 1", "page 2", "page 3"] parser.get_text().lower(),
["page 1", "page 2", "page 3"],
) )
@override_settings(OCR_PAGES=2, OCR_MODE="redo") @override_settings(OCR_PAGES=2, OCR_MODE="redo")
def test_multi_page_analog_pages_redo(self): def test_multi_page_analog_pages_redo(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
"application/pdf",
) )
self.assertTrue(os.path.isfile(parser.archive_path)) self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"]) self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"])
@ -318,7 +345,8 @@ class TestParser(DirectoriesMixin, TestCase):
def test_multi_page_analog_pages_force(self): def test_multi_page_analog_pages_force(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
"application/pdf",
) )
self.assertTrue(os.path.isfile(parser.archive_path)) self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings(parser.get_text().lower(), ["page 1"]) self.assertContainsStrings(parser.get_text().lower(), ["page 1"])
@ -329,29 +357,34 @@ class TestParser(DirectoriesMixin, TestCase):
def test_skip_noarchive_withtext(self): def test_skip_noarchive_withtext(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
"application/pdf",
) )
self.assertIsNone(parser.archive_path) self.assertIsNone(parser.archive_path)
self.assertContainsStrings( self.assertContainsStrings(
parser.get_text().lower(), ["page 1", "page 2", "page 3"] parser.get_text().lower(),
["page 1", "page 2", "page 3"],
) )
@override_settings(OCR_MODE="skip_noarchive") @override_settings(OCR_MODE="skip_noarchive")
def test_skip_noarchive_notext(self): def test_skip_noarchive_notext(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
"application/pdf",
) )
self.assertTrue(os.path.isfile(parser.archive_path)) self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings( self.assertContainsStrings(
parser.get_text().lower(), ["page 1", "page 2", "page 3"] parser.get_text().lower(),
["page 1", "page 2", "page 3"],
) )
@override_settings(OCR_MODE="skip") @override_settings(OCR_MODE="skip")
def test_multi_page_mixed(self): def test_multi_page_mixed(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
"application/pdf",
) )
self.assertTrue(os.path.isfile(parser.archive_path)) self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings( self.assertContainsStrings(
@ -368,11 +401,13 @@ class TestParser(DirectoriesMixin, TestCase):
def test_multi_page_mixed_no_archive(self): def test_multi_page_mixed_no_archive(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), "application/pdf" os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
"application/pdf",
) )
self.assertIsNone(parser.archive_path) self.assertIsNone(parser.archive_path)
self.assertContainsStrings( self.assertContainsStrings(
parser.get_text().lower(), ["page 4", "page 5", "page 6"] parser.get_text().lower(),
["page 4", "page 5", "page 6"],
) )
@override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True) @override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True)

View File

@ -1,5 +1,4 @@
from django.apps import AppConfig from django.apps import AppConfig
from paperless_text.signals import text_consumer_declaration from paperless_text.signals import text_consumer_declaration

View File

@ -1,9 +1,10 @@
import os import os
from PIL import ImageDraw, ImageFont, Image
from django.conf import settings from django.conf import settings
from documents.parsers import DocumentParser from documents.parsers import DocumentParser
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
class TextDocumentParser(DocumentParser): class TextDocumentParser(DocumentParser):

View File

@ -1,7 +1,6 @@
import os import os
from django.test import TestCase from django.test import TestCase
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
from paperless_text.parsers import TextDocumentParser from paperless_text.parsers import TextDocumentParser
@ -13,7 +12,8 @@ class TestTextParser(DirectoriesMixin, TestCase):
# just make sure that it does not crash # just make sure that it does not crash
f = parser.get_thumbnail( f = parser.get_thumbnail(
os.path.join(os.path.dirname(__file__), "samples", "test.txt"), "text/plain" os.path.join(os.path.dirname(__file__), "samples", "test.txt"),
"text/plain",
) )
self.assertTrue(os.path.isfile(f)) self.assertTrue(os.path.isfile(f))
@ -22,7 +22,8 @@ class TestTextParser(DirectoriesMixin, TestCase):
parser = TextDocumentParser(None) parser = TextDocumentParser(None)
parser.parse( parser.parse(
os.path.join(os.path.dirname(__file__), "samples", "test.txt"), "text/plain" os.path.join(os.path.dirname(__file__), "samples", "test.txt"),
"text/plain",
) )
self.assertEqual(parser.get_text(), "This is a test file.\n") self.assertEqual(parser.get_text(), "This is a test file.\n")

View File

@ -1,10 +1,11 @@
import os import os
import requests
import dateutil.parser import dateutil.parser
import requests
from django.conf import settings from django.conf import settings
from documents.parsers import DocumentParser
from documents.parsers import DocumentParser, ParseError, make_thumbnail_from_pdf from documents.parsers import make_thumbnail_from_pdf
from documents.parsers import ParseError
from tika import parser from tika import parser
@ -20,7 +21,9 @@ class TikaDocumentParser(DocumentParser):
self.archive_path = self.convert_to_pdf(document_path, file_name) self.archive_path = self.convert_to_pdf(document_path, file_name)
return make_thumbnail_from_pdf( return make_thumbnail_from_pdf(
self.archive_path, self.tempdir, self.logging_group self.archive_path,
self.tempdir,
self.logging_group,
) )
def extract_metadata(self, document_path, mime_type): def extract_metadata(self, document_path, mime_type):
@ -53,7 +56,7 @@ class TikaDocumentParser(DocumentParser):
except Exception as err: except Exception as err:
raise ParseError( raise ParseError(
f"Could not parse {document_path} with tika server at " f"Could not parse {document_path} with tika server at "
f"{tika_server}: {err}" f"{tika_server}: {err}",
) )
self.text = parsed["content"].strip() self.text = parsed["content"].strip()
@ -74,22 +77,23 @@ class TikaDocumentParser(DocumentParser):
url = gotenberg_server + "/forms/libreoffice/convert" url = gotenberg_server + "/forms/libreoffice/convert"
self.log("info", f"Converting {document_path} to PDF as {pdf_path}") self.log("info", f"Converting {document_path} to PDF as {pdf_path}")
files = { with open(document_path, "rb") as document_handle:
"files": ( files = {
file_name or os.path.basename(document_path), "files": (
open(document_path, "rb"), file_name or os.path.basename(document_path),
) document_handle,
} ),
headers = {} }
headers = {}
try: try:
response = requests.post(url, files=files, headers=headers) response = requests.post(url, files=files, headers=headers)
response.raise_for_status() # ensure we notice bad responses response.raise_for_status() # ensure we notice bad responses
except Exception as err: except Exception as err:
raise ParseError(f"Error while converting document to PDF: {err}") raise ParseError(f"Error while converting document to PDF: {err}")
file = open(pdf_path, "wb") with open(pdf_path, "wb") as file:
file.write(response.content) file.write(response.content)
file.close() file.close()
return pdf_path return pdf_path

View File

@ -10,12 +10,12 @@ def tika_consumer_declaration(sender, **kwargs):
"weight": 10, "weight": 10,
"mime_types": { "mime_types": {
"application/msword": ".doc", "application/msword": ".doc",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", # NOQA: E501 "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", # noqa: E501
"application/vnd.ms-excel": ".xls", "application/vnd.ms-excel": ".xls",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", # NOQA: E501 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", # noqa: E501
"application/vnd.ms-powerpoint": ".ppt", "application/vnd.ms-powerpoint": ".ppt",
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", # NOQA: E501 "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", # noqa: E501
"application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx", # NOQA: E501 "application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx", # noqa: E501
"application/vnd.oasis.opendocument.presentation": ".odp", "application/vnd.oasis.opendocument.presentation": ".odp",
"application/vnd.oasis.opendocument.spreadsheet": ".ods", "application/vnd.oasis.opendocument.spreadsheet": ".ods",
"application/vnd.oasis.opendocument.text": ".odt", "application/vnd.oasis.opendocument.text": ".odt",

View File

@ -4,9 +4,8 @@ from pathlib import Path
from unittest import mock from unittest import mock
from django.test import TestCase from django.test import TestCase
from requests import Response
from paperless_tika.parsers import TikaDocumentParser from paperless_tika.parsers import TikaDocumentParser
from requests import Response
class TestTikaParser(TestCase): class TestTikaParser(TestCase):
@ -42,14 +41,15 @@ class TestTikaParser(TestCase):
@mock.patch("paperless_tika.parsers.parser.from_file") @mock.patch("paperless_tika.parsers.parser.from_file")
def test_metadata(self, from_file): def test_metadata(self, from_file):
from_file.return_value = { from_file.return_value = {
"metadata": {"Creation-Date": "2020-11-21", "Some-key": "value"} "metadata": {"Creation-Date": "2020-11-21", "Some-key": "value"},
} }
file = os.path.join(self.parser.tempdir, "input.odt") file = os.path.join(self.parser.tempdir, "input.odt")
Path(file).touch() Path(file).touch()
metadata = self.parser.extract_metadata( metadata = self.parser.extract_metadata(
file, "application/vnd.oasis.opendocument.text" file,
"application/vnd.oasis.opendocument.text",
) )
self.assertTrue("Creation-Date" in [m["key"] for m in metadata]) self.assertTrue("Creation-Date" in [m["key"] for m in metadata])