Merge pull request #278 from stumpylog/pre-commit-python-changes

Python Cleanup from pre-commit
This commit is contained in:
Quinn Casey 2022-03-12 08:09:13 -08:00 committed by GitHub
commit 168ce2111d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
95 changed files with 1640 additions and 992 deletions

View File

@ -62,6 +62,7 @@ repos:
exclude: "(migrations)|(paperless/settings.py)|(.*\\.tox)|(.*/tests/.*)"
args:
- "--max-line-length=88"
- "--ignore=E203,W503"
- repo: https://github.com/psf/black
rev: 22.1.0
hooks:

View File

@ -1,2 +1,5 @@
# this is here so that django finds the checks.
from .checks import *
from .checks import changed_password_check
from .checks import parser_check
__all__ = ["changed_password_check", "parser_check"]

View File

@ -1,13 +1,11 @@
from django.contrib import admin
from .models import (
Correspondent,
Document,
DocumentType,
Tag,
SavedView,
SavedViewFilterRule,
)
from .models import Correspondent
from .models import Document
from .models import DocumentType
from .models import SavedView
from .models import SavedViewFilterRule
from .models import Tag
class CorrespondentAdmin(admin.ModelAdmin):

View File

@ -1,5 +1,4 @@
from django.apps import AppConfig
from django.utils.translation import gettext_lazy as _

View File

@ -8,7 +8,10 @@ class BulkArchiveStrategy:
self.zipf = zipf
def make_unique_filename(
self, doc: Document, archive: bool = False, folder: str = ""
self,
doc: Document,
archive: bool = False,
folder: str = "",
):
counter = 0
while True:
@ -34,7 +37,8 @@ class ArchiveOnlyStrategy(BulkArchiveStrategy):
def add_document(self, doc: Document):
if doc.has_archive_version:
self.zipf.write(
doc.archive_path, self.make_unique_filename(doc, archive=True)
doc.archive_path,
self.make_unique_filename(doc, archive=True),
)
else:
self.zipf.write(doc.source_path, self.make_unique_filename(doc))
@ -49,5 +53,6 @@ class OriginalAndArchiveStrategy(BulkArchiveStrategy):
)
self.zipf.write(
doc.source_path, self.make_unique_filename(doc, folder="originals/")
doc.source_path,
self.make_unique_filename(doc, folder="originals/"),
)

View File

@ -2,8 +2,9 @@ import itertools
from django.db.models import Q
from django_q.tasks import async_task
from documents.models import Document, Correspondent, DocumentType
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
def set_correspondent(doc_ids, correspondent):
@ -40,7 +41,7 @@ def add_tag(doc_ids, tag):
DocumentTagRelationship = Document.tags.through
DocumentTagRelationship.objects.bulk_create(
[DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs]
[DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs],
)
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
@ -56,7 +57,7 @@ def remove_tag(doc_ids, tag):
DocumentTagRelationship = Document.tags.through
DocumentTagRelationship.objects.filter(
Q(document_id__in=affected_docs) & Q(tag_id=tag)
Q(document_id__in=affected_docs) & Q(tag_id=tag),
).delete()
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)

View File

@ -1,10 +1,11 @@
import textwrap
from django.conf import settings
from django.core.checks import Error, register
from django.core.checks import Error
from django.core.checks import register
from django.core.exceptions import FieldError
from django.db.utils import OperationalError, ProgrammingError
from django.db.utils import OperationalError
from django.db.utils import ProgrammingError
from documents.signals import document_consumer_declaration
@ -16,7 +17,7 @@ def changed_password_check(app_configs, **kwargs):
try:
encrypted_doc = Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG
storage_type=Document.STORAGE_TYPE_GPG,
).first()
except (OperationalError, ProgrammingError, FieldError):
return [] # No documents table yet
@ -27,8 +28,8 @@ def changed_password_check(app_configs, **kwargs):
return [
Error(
"The database contains encrypted documents but no password "
"is set."
)
"is set.",
),
]
if not GnuPG.decrypted(encrypted_doc.source_file):
@ -42,9 +43,9 @@ def changed_password_check(app_configs, **kwargs):
If you intend to change your password, you must first export
all of the old documents, start fresh with the new password
and then re-import them."
"""
)
)
""",
),
),
]
return []
@ -61,8 +62,8 @@ def parser_check(app_configs, **kwargs):
return [
Error(
"No parsers found. This is a bug. The consumer won't be "
"able to consume any documents without parsers."
)
"able to consume any documents without parsers.",
),
]
else:
return []

View File

@ -6,8 +6,8 @@ import re
import shutil
from django.conf import settings
from documents.models import Document, MatchingModel
from documents.models import Document
from documents.models import MatchingModel
class IncompatibleClassifierVersionError(Exception):
@ -30,8 +30,8 @@ def preprocess_content(content):
def load_classifier():
if not os.path.isfile(settings.MODEL_FILE):
logger.debug(
f"Document classification model does not exist (yet), not "
f"performing automatic matching."
"Document classification model does not exist (yet), not "
"performing automatic matching.",
)
return None
@ -42,16 +42,16 @@ def load_classifier():
except (ClassifierModelCorruptError, IncompatibleClassifierVersionError):
# there's something wrong with the model file.
logger.exception(
f"Unrecoverable error while loading document "
f"classification model, deleting model file."
"Unrecoverable error while loading document "
"classification model, deleting model file.",
)
os.unlink(settings.MODEL_FILE)
classifier = None
except OSError:
logger.exception(f"IO error while loading document classification model")
logger.exception("IO error while loading document classification model")
classifier = None
except Exception:
logger.exception(f"Unknown error while loading document classification model")
logger.exception("Unknown error while loading document classification model")
classifier = None
return classifier
@ -78,7 +78,7 @@ class DocumentClassifier(object):
if schema_version != self.FORMAT_VERSION:
raise IncompatibleClassifierVersionError(
"Cannor load classifier, incompatible versions."
"Cannor load classifier, incompatible versions.",
)
else:
try:
@ -122,8 +122,8 @@ class DocumentClassifier(object):
logger.debug("Gathering data from database...")
m = hashlib.sha1()
for doc in Document.objects.order_by("pk").exclude(
tags__is_inbox_tag=True
): # NOQA: E501
tags__is_inbox_tag=True,
):
preprocessed_content = preprocess_content(doc.content)
m.update(preprocessed_content.encode("utf-8"))
data.append(preprocessed_content)
@ -146,9 +146,9 @@ class DocumentClassifier(object):
[
tag.pk
for tag in doc.tags.filter(
matching_algorithm=MatchingModel.MATCH_AUTO
matching_algorithm=MatchingModel.MATCH_AUTO,
)
]
],
)
for tag in tags:
m.update(tag.to_bytes(4, "little", signed=True))
@ -177,8 +177,11 @@ class DocumentClassifier(object):
logger.debug(
"{} documents, {} tag(s), {} correspondent(s), "
"{} document type(s).".format(
len(data), num_tags, num_correspondents, num_document_types
)
len(data),
num_tags,
num_correspondents,
num_document_types,
),
)
from sklearn.feature_extraction.text import CountVectorizer
@ -188,7 +191,9 @@ class DocumentClassifier(object):
# Step 2: vectorize data
logger.debug("Vectorizing data...")
self.data_vectorizer = CountVectorizer(
analyzer="word", ngram_range=(1, 2), min_df=0.01
analyzer="word",
ngram_range=(1, 2),
min_df=0.01,
)
data_vectorized = self.data_vectorizer.fit_transform(data)
@ -204,7 +209,7 @@ class DocumentClassifier(object):
]
self.tags_binarizer = LabelBinarizer()
labels_tags_vectorized = self.tags_binarizer.fit_transform(
labels_tags
labels_tags,
).ravel()
else:
self.tags_binarizer = MultiLabelBinarizer()
@ -223,7 +228,8 @@ class DocumentClassifier(object):
else:
self.correspondent_classifier = None
logger.debug(
"There are no correspondents. Not training correspondent " "classifier."
"There are no correspondents. Not training correspondent "
"classifier.",
)
if num_document_types > 0:
@ -233,7 +239,8 @@ class DocumentClassifier(object):
else:
self.document_type_classifier = None
logger.debug(
"There are no document types. Not training document type " "classifier."
"There are no document types. Not training document type "
"classifier.",
)
self.data_hash = new_data_hash

View File

@ -15,11 +15,19 @@ from filelock import FileLock
from rest_framework.reverse import reverse
from .classifier import load_classifier
from .file_handling import create_source_path_directory, generate_unique_filename
from .file_handling import create_source_path_directory
from .file_handling import generate_unique_filename
from .loggers import LoggingMixin
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
from .signals import document_consumption_finished, document_consumption_started
from .models import Correspondent
from .models import Document
from .models import DocumentType
from .models import FileInfo
from .models import Tag
from .parsers import get_parser_class_for_mime_type
from .parsers import parse_date
from .parsers import ParseError
from .signals import document_consumption_finished
from .signals import document_consumption_started
class ConsumerError(Exception):
@ -46,12 +54,15 @@ class Consumer(LoggingMixin):
logging_name = "paperless.consumer"
def _send_progress(
self, current_progress, max_progress, status, message=None, document_id=None
self,
current_progress,
max_progress,
status,
message=None,
document_id=None,
):
payload = {
"filename": os.path.basename(self.filename)
if self.filename
else None, # NOQA: E501
"filename": os.path.basename(self.filename) if self.filename else None,
"task_id": self.task_id,
"current_progress": current_progress,
"max_progress": max_progress,
@ -60,7 +71,8 @@ class Consumer(LoggingMixin):
"document_id": document_id,
}
async_to_sync(self.channel_layer.group_send)(
"status_updates", {"type": "status_update", "data": payload}
"status_updates",
{"type": "status_update", "data": payload},
)
def _fail(self, message, log_message=None, exc_info=None):
@ -83,15 +95,16 @@ class Consumer(LoggingMixin):
def pre_check_file_exists(self):
if not os.path.isfile(self.path):
self._fail(
MESSAGE_FILE_NOT_FOUND, f"Cannot consume {self.path}: File not found."
MESSAGE_FILE_NOT_FOUND,
f"Cannot consume {self.path}: File not found.",
)
def pre_check_duplicate(self):
with open(self.path, "rb") as f:
checksum = hashlib.md5(f.read()).hexdigest()
if Document.objects.filter(
Q(checksum=checksum) | Q(archive_checksum=checksum)
).exists(): # NOQA: E501
Q(checksum=checksum) | Q(archive_checksum=checksum),
).exists():
if settings.CONSUMER_DELETE_DUPLICATES:
os.unlink(self.path)
self._fail(
@ -139,7 +152,8 @@ class Consumer(LoggingMixin):
)
self.log(
"info", f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}"
"info",
f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}",
)
try:
@ -154,7 +168,7 @@ class Consumer(LoggingMixin):
reverse("document-thumb", kwargs={"pk": document.pk}),
str(document.correspondent),
str(",".join(document.tags.all().values_list("name", flat=True))),
)
),
).wait()
except Exception as e:
self._fail(
@ -213,7 +227,9 @@ class Consumer(LoggingMixin):
# Notify all listeners that we're going to do some work.
document_consumption_started.send(
sender=self.__class__, filename=self.path, logging_group=self.logging_group
sender=self.__class__,
filename=self.path,
logging_group=self.logging_group,
)
self.run_pre_consume_script()
@ -247,7 +263,9 @@ class Consumer(LoggingMixin):
self.log("debug", f"Generating thumbnail for {self.filename}...")
self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
thumbnail = document_parser.get_optimised_thumbnail(
self.path, mime_type, self.filename
self.path,
mime_type,
self.filename,
)
text = document_parser.get_text()
@ -301,21 +319,26 @@ class Consumer(LoggingMixin):
self._write(document.storage_type, self.path, document.source_path)
self._write(
document.storage_type, thumbnail, document.thumbnail_path
document.storage_type,
thumbnail,
document.thumbnail_path,
)
if archive_path and os.path.isfile(archive_path):
document.archive_filename = generate_unique_filename(
document, archive_filename=True
document,
archive_filename=True,
)
create_source_path_directory(document.archive_path)
self._write(
document.storage_type, archive_path, document.archive_path
document.storage_type,
archive_path,
document.archive_path,
)
with open(archive_path, "rb") as f:
document.archive_checksum = hashlib.md5(
f.read()
f.read(),
).hexdigest()
# Don't save with the lock active. Saving will cause the file
@ -328,7 +351,8 @@ class Consumer(LoggingMixin):
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
shadow_file = os.path.join(
os.path.dirname(self.path), "._" + os.path.basename(self.path)
os.path.dirname(self.path),
"._" + os.path.basename(self.path),
)
if os.path.isfile(shadow_file):
@ -390,12 +414,12 @@ class Consumer(LoggingMixin):
def apply_overrides(self, document):
if self.override_correspondent_id:
document.correspondent = Correspondent.objects.get(
pk=self.override_correspondent_id
pk=self.override_correspondent_id,
)
if self.override_document_type_id:
document.document_type = DocumentType.objects.get(
pk=self.override_document_type_id
pk=self.override_document_type_id,
)
if self.override_tag_ids:

View File

@ -103,15 +103,17 @@ def generate_unique_filename(doc, archive_filename=False):
if archive_filename and doc.filename:
new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
if new_filename == old_filename or not os.path.exists(
os.path.join(root, new_filename)
): # NOQA: E501
os.path.join(root, new_filename),
):
return new_filename
counter = 0
while True:
new_filename = generate_filename(
doc, counter, archive_filename=archive_filename
doc,
counter,
archive_filename=archive_filename,
)
if new_filename == old_filename:
# still the same as before.
@ -137,14 +139,16 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
if doc.correspondent:
correspondent = pathvalidate.sanitize_filename(
doc.correspondent.name, replacement_text="-"
doc.correspondent.name,
replacement_text="-",
)
else:
correspondent = "none"
if doc.document_type:
document_type = pathvalidate.sanitize_filename(
doc.document_type.name, replacement_text="-"
doc.document_type.name,
replacement_text="-",
)
else:
document_type = "none"
@ -160,9 +164,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
document_type=document_type,
created=datetime.date.isoformat(doc.created),
created_year=doc.created.year if doc.created else "none",
created_month=f"{doc.created.month:02}"
if doc.created
else "none", # NOQA: E501
created_month=f"{doc.created.month:02}" if doc.created else "none",
created_day=f"{doc.created.day:02}" if doc.created else "none",
added=datetime.date.isoformat(doc.added),
added_year=doc.added.year if doc.added else "none",
@ -178,7 +180,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
except (ValueError, KeyError, IndexError):
logger.warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default"
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default",
)
counter_str = f"_{counter:02}" if counter else ""

View File

@ -1,7 +1,13 @@
from django.db.models import Q
from django_filters.rest_framework import BooleanFilter, FilterSet, Filter
from django_filters.rest_framework import BooleanFilter
from django_filters.rest_framework import Filter
from django_filters.rest_framework import FilterSet
from .models import Correspondent, Document, Tag, DocumentType, Log
from .models import Correspondent
from .models import Document
from .models import DocumentType
from .models import Log
from .models import Tag
CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"]
ID_KWARGS = ["in", "exact"]
@ -75,7 +81,10 @@ class TitleContentFilter(Filter):
class DocumentFilterSet(FilterSet):
is_tagged = BooleanFilter(
label="Is tagged", field_name="tags", lookup_expr="isnull", exclude=True
label="Is tagged",
field_name="tags",
lookup_expr="isnull",
exclude=True,
)
tags__id__all = TagsFilter()

View File

@ -1,21 +1,30 @@
import logging
import math
import os
from contextlib import contextmanager
import math
from dateutil.parser import isoparse
from django.conf import settings
from whoosh import highlight, classify, query
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME, BOOLEAN
from documents.models import Document
from whoosh import classify
from whoosh import highlight
from whoosh import query
from whoosh.fields import BOOLEAN
from whoosh.fields import DATETIME
from whoosh.fields import KEYWORD
from whoosh.fields import NUMERIC
from whoosh.fields import Schema
from whoosh.fields import TEXT
from whoosh.highlight import HtmlFormatter
from whoosh.index import create_in, exists_in, open_dir
from whoosh.index import create_in
from whoosh.index import exists_in
from whoosh.index import open_dir
from whoosh.qparser import MultifieldParser
from whoosh.qparser.dateparse import DateParserPlugin
from whoosh.searching import ResultsPage, Searcher
from whoosh.searching import ResultsPage
from whoosh.searching import Searcher
from whoosh.writing import AsyncWriter
from documents.models import Document
logger = logging.getLogger("paperless.index")
@ -45,7 +54,7 @@ def open_index(recreate=False):
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema())
except Exception:
logger.exception(f"Error while opening the index, recreating.")
logger.exception("Error while opening the index, recreating.")
if not os.path.isdir(settings.INDEX_DIR):
os.makedirs(settings.INDEX_DIR, exist_ok=True)
@ -138,11 +147,11 @@ class DelayedQuery:
criterias.append(query.Term("has_type", v == "false"))
elif k == "created__date__lt":
criterias.append(
query.DateRange("created", start=None, end=isoparse(v))
query.DateRange("created", start=None, end=isoparse(v)),
)
elif k == "created__date__gt":
criterias.append(
query.DateRange("created", start=isoparse(v), end=None)
query.DateRange("created", start=isoparse(v), end=None),
)
elif k == "added__date__gt":
criterias.append(query.DateRange("added", start=isoparse(v), end=None))
@ -220,7 +229,7 @@ class DelayedQuery:
hit[1],
),
page.results.top_n,
)
),
)
self.saved_results[item.start] = page
@ -240,7 +249,7 @@ class DelayedFullTextQuery(DelayedQuery):
corrected = self.searcher.correct_query(q, q_str)
if corrected.query != q:
corrected_query = corrected.string
corrected.query = corrected.string
return q, None
@ -252,10 +261,14 @@ class DelayedMoreLikeThisQuery(DelayedQuery):
docnum = self.searcher.document_number(id=more_like_doc_id)
kts = self.searcher.key_terms_from_text(
"content", content, numterms=20, model=classify.Bo1Model, normalize=False
"content",
content,
numterms=20,
model=classify.Bo1Model,
normalize=False,
)
q = query.Or(
[query.Term("content", word, boost=weight) for word, weight in kts]
[query.Term("content", word, boost=weight) for word, weight in kts],
)
mask = {docnum}
@ -266,7 +279,9 @@ def autocomplete(ix, term, limit=10):
with ix.reader() as reader:
terms = []
for (score, t) in reader.most_distinctive_terms(
"content", number=limit, prefix=term.lower()
"content",
number=limit,
prefix=term.lower(),
):
terms.append(t)
return terms

View File

@ -1,8 +1,6 @@
import logging
import uuid
from django.conf import settings
class LoggingMixin:

View File

@ -1,8 +1,8 @@
import os
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from documents.models import Document
from paperless.db import GnuPG
@ -31,9 +31,9 @@ class Command(BaseCommand):
"this unless you've got a recent backup\nWARNING: handy. It "
"*should* work without a hitch, but be safe and backup your\n"
"WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to "
"continue.\n\n"
"continue.\n\n",
)
__ = input()
_ = input()
except KeyboardInterrupt:
return
@ -41,7 +41,7 @@ class Command(BaseCommand):
if not passphrase:
raise CommandError(
"Passphrase not defined. Please set it with --passphrase or "
"by declaring it in your environment or your config."
"by declaring it in your environment or your config.",
)
self.__gpg_to_unencrypted(passphrase)
@ -50,7 +50,7 @@ class Command(BaseCommand):
def __gpg_to_unencrypted(passphrase):
encrypted_files = Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG
storage_type=Document.STORAGE_TYPE_GPG,
)
for document in encrypted_files:
@ -71,7 +71,7 @@ class Command(BaseCommand):
if not ext == ".gpg":
raise CommandError(
f"Abort: encrypted file {document.source_path} does not "
f"end with .gpg"
f"end with .gpg",
)
document.filename = os.path.splitext(document.filename)[0]
@ -83,7 +83,8 @@ class Command(BaseCommand):
f.write(raw_thumb)
Document.objects.filter(id=document.id).update(
storage_type=document.storage_type, filename=document.filename
storage_type=document.storage_type,
filename=document.filename,
)
for path in old_paths:

View File

@ -1,7 +1,6 @@
import hashlib
import multiprocessing
import logging
import multiprocessing
import os
import shutil
import uuid
@ -11,12 +10,12 @@ from django import db
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db import transaction
from filelock import FileLock
from whoosh.writing import AsyncWriter
from documents.models import Document
from filelock import FileLock
from ... import index
from ...file_handling import create_source_path_directory, generate_unique_filename
from ...file_handling import create_source_path_directory
from ...file_handling import generate_unique_filename
from ...parsers import get_parser_class_for_mime_type
@ -33,7 +32,7 @@ def handle_document(document_id):
if not parser_class:
logger.error(
f"No parser found for mime type {mime_type}, cannot "
f"archive document {document} (ID: {document_id})"
f"archive document {document} (ID: {document_id})",
)
return
@ -43,7 +42,9 @@ def handle_document(document_id):
parser.parse(document.source_path, mime_type, document.get_public_filename())
thumbnail = parser.get_optimised_thumbnail(
document.source_path, mime_type, document.get_public_filename()
document.source_path,
mime_type,
document.get_public_filename(),
)
if parser.get_archive_path():
@ -55,7 +56,8 @@ def handle_document(document_id):
# We also don't use save() since that triggers the filehandling
# logic, and we don't want that yet (file not yet in place)
document.archive_filename = generate_unique_filename(
document, archive_filename=True
document,
archive_filename=True,
)
Document.objects.filter(pk=document.pk).update(
archive_checksum=checksum,
@ -70,9 +72,9 @@ def handle_document(document_id):
with index.open_index_writer() as writer:
index.update_document(writer, document)
except Exception as e:
except Exception:
logger.exception(
f"Error while parsing document {document} " f"(ID: {document_id})"
f"Error while parsing document {document} " f"(ID: {document_id})",
)
finally:
parser.cleanup()
@ -86,7 +88,8 @@ class Command(BaseCommand):
back-tag all previously indexed documents with metadata created (or
modified) after their initial import.
""".replace(
" ", ""
" ",
"",
)
def add_arguments(self, parser):
@ -129,7 +132,7 @@ class Command(BaseCommand):
map(
lambda doc: doc.id,
filter(lambda d: overwrite or not d.has_archive_version, documents),
)
),
)
# Note to future self: this prevents django from reusing database
@ -146,7 +149,7 @@ class Command(BaseCommand):
pool.imap_unordered(handle_document, document_ids),
total=len(document_ids),
disable=options["no_progress_bar"],
)
),
)
except KeyboardInterrupt:
print("Aborting...")

View File

@ -1,17 +1,18 @@
import logging
import os
from pathlib import Path, PurePath
from pathlib import Path
from pathlib import PurePath
from threading import Thread
from time import sleep
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django_q.tasks import async_task
from watchdog.events import FileSystemEventHandler
from watchdog.observers.polling import PollingObserver
from documents.models import Tag
from documents.parsers import is_file_ext_supported
from watchdog.events import FileSystemEventHandler
from watchdog.observers.polling import PollingObserver
try:
from inotifyrecursive import INotify, flags
@ -29,7 +30,7 @@ def _tags_from_path(filepath):
path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts
for part in path_parts:
tag_ids.add(
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk,
)
return tag_ids
@ -56,7 +57,7 @@ def _consume(filepath):
try:
if settings.CONSUMER_SUBDIRS_AS_TAGS:
tag_ids = _tags_from_path(filepath)
except Exception as e:
except Exception:
logger.exception("Error creating tags from path")
try:
@ -67,7 +68,7 @@ def _consume(filepath):
override_tag_ids=tag_ids if tag_ids else None,
task_name=os.path.basename(filepath)[:100],
)
except Exception as e:
except Exception:
# Catch all so that the consumer won't crash.
# This is also what the test case is listening for to check for
# errors.
@ -86,7 +87,7 @@ def _consume_wait_unmodified(file):
new_mtime = os.stat(file).st_mtime
except FileNotFoundError:
logger.debug(
f"File {file} moved while waiting for it to remain " f"unmodified."
f"File {file} moved while waiting for it to remain " f"unmodified.",
)
return
if new_mtime == mtime:

View File

@ -9,7 +9,8 @@ class Command(BaseCommand):
Trains the classifier on your data and saves the resulting models to a
file. The document consumer will then automatically use this new model.
""".replace(
" ", ""
" ",
"",
)
def __init__(self, *args, **kwargs):

View File

@ -6,28 +6,28 @@ import time
import tqdm
from django.conf import settings
from django.contrib.auth.models import User, Group
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
from django.core import serializers
from django.core.management.base import BaseCommand, CommandError
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.db import transaction
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import SavedView
from documents.models import SavedViewFilterRule
from documents.models import Tag
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from filelock import FileLock
from documents.models import (
Document,
Correspondent,
Tag,
DocumentType,
SavedView,
SavedViewFilterRule,
)
from documents.settings import (
EXPORTER_FILE_NAME,
EXPORTER_THUMBNAIL_NAME,
EXPORTER_ARCHIVE_NAME,
)
from paperless.db import GnuPG
from paperless_mail.models import MailAccount, MailRule
from ...file_handling import generate_filename, delete_empty_directories
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
from ...file_handling import delete_empty_directories
from ...file_handling import generate_filename
class Command(BaseCommand):
@ -37,7 +37,8 @@ class Command(BaseCommand):
directory. And include a manifest file containing document data for
easy import.
""".replace(
" ", ""
" ",
"",
)
def add_arguments(self, parser):
@ -107,20 +108,20 @@ class Command(BaseCommand):
# 1. Take a snapshot of what files exist in the current export folder
for root, dirs, files in os.walk(self.target):
self.files_in_export_dir.extend(
map(lambda f: os.path.abspath(os.path.join(root, f)), files)
map(lambda f: os.path.abspath(os.path.join(root, f)), files),
)
# 2. Create manifest, containing all correspondents, types, tags and
# documents
with transaction.atomic():
manifest = json.loads(
serializers.serialize("json", Correspondent.objects.all())
serializers.serialize("json", Correspondent.objects.all()),
)
manifest += json.loads(serializers.serialize("json", Tag.objects.all()))
manifest += json.loads(
serializers.serialize("json", DocumentType.objects.all())
serializers.serialize("json", DocumentType.objects.all()),
)
documents = Document.objects.order_by("id")
@ -129,19 +130,19 @@ class Command(BaseCommand):
manifest += document_manifest
manifest += json.loads(
serializers.serialize("json", MailAccount.objects.all())
serializers.serialize("json", MailAccount.objects.all()),
)
manifest += json.loads(
serializers.serialize("json", MailRule.objects.all())
serializers.serialize("json", MailRule.objects.all()),
)
manifest += json.loads(
serializers.serialize("json", SavedView.objects.all())
serializers.serialize("json", SavedView.objects.all()),
)
manifest += json.loads(
serializers.serialize("json", SavedViewFilterRule.objects.all())
serializers.serialize("json", SavedViewFilterRule.objects.all()),
)
manifest += json.loads(serializers.serialize("json", Group.objects.all()))
@ -155,9 +156,7 @@ class Command(BaseCommand):
disable=progress_bar_disable,
):
# 3.1. store files unencrypted
document_dict["fields"][
"storage_type"
] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED
document = document_map[document_dict["pk"]]
@ -166,7 +165,9 @@ class Command(BaseCommand):
while True:
if self.use_filename_format:
base_name = generate_filename(
document, counter=filename_counter, append_gpg=False
document,
counter=filename_counter,
append_gpg=False,
)
else:
base_name = document.get_public_filename(counter=filename_counter)
@ -217,14 +218,18 @@ class Command(BaseCommand):
os.utime(archive_target, times=(t, t))
else:
self.check_and_copy(
document.source_path, document.checksum, original_target
document.source_path,
document.checksum,
original_target,
)
self.check_and_copy(document.thumbnail_path, None, thumbnail_target)
if archive_target:
self.check_and_copy(
document.archive_path, document.archive_checksum, archive_target
document.archive_path,
document.archive_checksum,
archive_target,
)
# 4. write manifest to target forlder
@ -243,7 +248,8 @@ class Command(BaseCommand):
os.remove(f)
delete_empty_directories(
os.path.abspath(os.path.dirname(f)), os.path.abspath(self.target)
os.path.abspath(os.path.dirname(f)),
os.path.abspath(self.target),
)
def check_and_copy(self, source, source_checksum, target):

View File

@ -7,16 +7,16 @@ from contextlib import contextmanager
import tqdm
from django.conf import settings
from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError
from django.db.models.signals import post_save, m2m_changed
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.db.models.signals import m2m_changed
from django.db.models.signals import post_save
from documents.models import Document
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from filelock import FileLock
from documents.models import Document
from documents.settings import (
EXPORTER_FILE_NAME,
EXPORTER_THUMBNAIL_NAME,
EXPORTER_ARCHIVE_NAME,
)
from ...file_handling import create_source_path_directory
from ...signals.handlers import update_filename_and_move_files
@ -36,7 +36,8 @@ class Command(BaseCommand):
Using a manifest.json file, load the data from there, and import the
documents it refers to.
""".replace(
" ", ""
" ",
"",
)
def add_arguments(self, parser):
@ -73,7 +74,9 @@ class Command(BaseCommand):
self._check_manifest()
with disable_signal(
post_save, receiver=update_filename_and_move_files, sender=Document
post_save,
receiver=update_filename_and_move_files,
sender=Document,
):
with disable_signal(
m2m_changed,
@ -92,7 +95,7 @@ class Command(BaseCommand):
def _check_manifest_exists(path):
if not os.path.exists(path):
raise CommandError(
"That directory doesn't appear to contain a manifest.json " "file."
"That directory doesn't appear to contain a manifest.json " "file.",
)
def _check_manifest(self):
@ -105,14 +108,14 @@ class Command(BaseCommand):
if EXPORTER_FILE_NAME not in record:
raise CommandError(
"The manifest file contains a record which does not "
"refer to an actual document file."
"refer to an actual document file.",
)
doc_file = record[EXPORTER_FILE_NAME]
if not os.path.exists(os.path.join(self.source, doc_file)):
raise CommandError(
'The manifest file refers to "{}" which does not '
"appear to be in the source directory.".format(doc_file)
"appear to be in the source directory.".format(doc_file),
)
if EXPORTER_ARCHIVE_NAME in record:
@ -120,7 +123,7 @@ class Command(BaseCommand):
if not os.path.exists(os.path.join(self.source, archive_file)):
raise CommandError(
f"The manifest file refers to {archive_file} which "
f"does not appear to be in the source directory."
f"does not appear to be in the source directory.",
)
def _import_files_from_manifest(self, progress_bar_disable):
@ -132,7 +135,7 @@ class Command(BaseCommand):
print("Copy files into paperless...")
manifest_documents = list(
filter(lambda r: r["model"] == "documents.document", self.manifest)
filter(lambda r: r["model"] == "documents.document", self.manifest),
)
for record in tqdm.tqdm(manifest_documents, disable=progress_bar_disable):

View File

@ -1,7 +1,7 @@
from django.core.management import BaseCommand
from django.db import transaction
from documents.tasks import index_reindex, index_optimize
from documents.tasks import index_optimize
from documents.tasks import index_reindex
class Command(BaseCommand):

View File

@ -3,7 +3,6 @@ import logging
import tqdm
from django.core.management.base import BaseCommand
from django.db.models.signals import post_save
from documents.models import Document
@ -12,7 +11,8 @@ class Command(BaseCommand):
help = """
This will rename all documents to match the latest filename format.
""".replace(
" ", ""
" ",
"",
)
def add_arguments(self, parser):
@ -28,6 +28,7 @@ class Command(BaseCommand):
logging.getLogger().handlers[0].level = logging.ERROR
for document in tqdm.tqdm(
Document.objects.all(), disable=options["no_progress_bar"]
Document.objects.all(),
disable=options["no_progress_bar"],
):
post_save.send(Document, instance=document)

View File

@ -2,10 +2,12 @@ import logging
import tqdm
from django.core.management.base import BaseCommand
from documents.classifier import load_classifier
from documents.models import Document
from ...signals.handlers import set_correspondent, set_document_type, set_tags
from ...signals.handlers import set_correspondent
from ...signals.handlers import set_document_type
from ...signals.handlers import set_tags
logger = logging.getLogger("paperless.management.retagger")
@ -19,7 +21,8 @@ class Command(BaseCommand):
back-tag all previously indexed documents with metadata created (or
modified) after their initial import.
""".replace(
" ", ""
" ",
"",
)
def add_arguments(self, parser):
@ -57,7 +60,8 @@ class Command(BaseCommand):
help="Return the suggestion, don't change anything.",
)
parser.add_argument(
"--base-url", help="The base URL to use to build the link to the documents."
"--base-url",
help="The base URL to use to build the link to the documents.",
)
def handle(self, *args, **options):

View File

@ -7,7 +7,8 @@ class Command(BaseCommand):
help = """
This command checks your document archive for issues.
""".replace(
" ", ""
" ",
"",
)
def add_arguments(self, parser):

View File

@ -5,8 +5,8 @@ import shutil
import tqdm
from django import db
from django.core.management.base import BaseCommand
from documents.models import Document
from ...parsers import get_parser_class_for_mime_type
@ -22,7 +22,9 @@ def _process_document(doc_in):
try:
thumb = parser.get_optimised_thumbnail(
document.source_path, document.mime_type, document.get_public_filename()
document.source_path,
document.mime_type,
document.get_public_filename(),
)
shutil.move(thumb, document.thumbnail_path)
@ -35,7 +37,8 @@ class Command(BaseCommand):
help = """
This will regenerate the thumbnails for all documents.
""".replace(
" ", ""
" ",
"",
)
def add_arguments(self, parser):
@ -76,5 +79,5 @@ class Command(BaseCommand):
pool.imap_unordered(_process_document, ids),
total=len(ids),
disable=options["no_progress_bar"],
)
),
)

View File

@ -2,7 +2,7 @@ import logging
import os
from django.contrib.auth.models import User
from django.core.management.base import BaseCommand, CommandError
from django.core.management.base import BaseCommand
logger = logging.getLogger("paperless.management.superuser")
@ -13,7 +13,8 @@ class Command(BaseCommand):
help = """
Creates a Django superuser based on env variables.
""".replace(
" ", ""
" ",
"",
)
def handle(self, *args, **options):
@ -39,5 +40,5 @@ class Command(BaseCommand):
self.stdout.write(f'Did not create superuser "{username}".')
self.stdout.write(
'Make sure you specified "PAPERLESS_ADMIN_PASSWORD" in your '
'"docker-compose.env" file.'
'"docker-compose.env" file.',
)

View File

@ -1,8 +1,10 @@
import logging
import re
from documents.models import MatchingModel, Correspondent, DocumentType, Tag
from documents.models import Correspondent
from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import Tag
logger = logging.getLogger("paperless.matching")
@ -12,7 +14,7 @@ def log_reason(matching_model, document, reason):
class_name = type(matching_model).__name__
logger.debug(
f"{class_name} {matching_model.name} matched on document "
f"{document} because {reason}"
f"{document} because {reason}",
)
@ -25,7 +27,7 @@ def match_correspondents(document, classifier):
correspondents = Correspondent.objects.all()
return list(
filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents)
filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents),
)
@ -38,7 +40,7 @@ def match_document_types(document, classifier):
document_types = DocumentType.objects.all()
return list(
filter(lambda o: matches(o, document) or o.pk == pred_id, document_types)
filter(lambda o: matches(o, document) or o.pk == pred_id, document_types),
)
@ -51,7 +53,7 @@ def match_tags(document, classifier):
tags = Tag.objects.all()
return list(
filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags)
filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags),
)
@ -92,7 +94,7 @@ def matches(matching_model, document):
rf"\b{re.escape(matching_model.match)}\b",
document_content,
**search_kwargs,
)
),
)
if result:
log_reason(
@ -105,11 +107,12 @@ def matches(matching_model, document):
elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
try:
match = re.search(
re.compile(matching_model.match, **search_kwargs), document_content
re.compile(matching_model.match, **search_kwargs),
document_content,
)
except re.error:
logger.error(
f"Error while processing regular expression " f"{matching_model.match}"
f"Error while processing regular expression " f"{matching_model.match}",
)
return False
if match:

View File

@ -5,17 +5,14 @@ import os
import re
from collections import OrderedDict
import pathvalidate
import dateutil.parser
import pathvalidate
from django.conf import settings
from django.contrib.auth.models import User
from django.db import models
from django.utils import timezone
from django.utils.timezone import is_aware
from django.utils.translation import gettext_lazy as _
from documents.parsers import get_default_file_extension
@ -42,7 +39,9 @@ class MatchingModel(models.Model):
match = models.CharField(_("match"), max_length=256, blank=True)
matching_algorithm = models.PositiveIntegerField(
_("matching algorithm"), choices=MATCHING_ALGORITHMS, default=MATCH_ANY
_("matching algorithm"),
choices=MATCHING_ALGORITHMS,
default=MATCH_ANY,
)
is_insensitive = models.BooleanField(_("is insensitive"), default=True)
@ -71,7 +70,7 @@ class Tag(MatchingModel):
default=False,
help_text=_(
"Marks this tag as an inbox tag: All newly consumed "
"documents will be tagged with inbox tags."
"documents will be tagged with inbox tags.",
),
)
@ -120,14 +119,17 @@ class Document(models.Model):
blank=True,
help_text=_(
"The raw, text-only data of the document. This field is "
"primarily used for searching."
"primarily used for searching.",
),
)
mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
tags = models.ManyToManyField(
Tag, related_name="documents", blank=True, verbose_name=_("tags")
Tag,
related_name="documents",
blank=True,
verbose_name=_("tags"),
)
checksum = models.CharField(
@ -150,7 +152,10 @@ class Document(models.Model):
created = models.DateTimeField(_("created"), default=timezone.now, db_index=True)
modified = models.DateTimeField(
_("modified"), auto_now=True, editable=False, db_index=True
_("modified"),
auto_now=True,
editable=False,
db_index=True,
)
storage_type = models.CharField(
@ -162,7 +167,10 @@ class Document(models.Model):
)
added = models.DateTimeField(
_("added"), default=timezone.now, editable=False, db_index=True
_("added"),
default=timezone.now,
editable=False,
db_index=True,
)
filename = models.FilePathField(
@ -192,7 +200,7 @@ class Document(models.Model):
unique=True,
db_index=True,
help_text=_(
"The position of this document in your physical document " "archive."
"The position of this document in your physical document " "archive.",
),
)
@ -289,7 +297,9 @@ class Log(models.Model):
message = models.TextField(_("message"))
level = models.PositiveIntegerField(
_("level"), choices=LEVELS, default=logging.INFO
_("level"),
choices=LEVELS,
default=logging.INFO,
)
created = models.DateTimeField(_("created"), auto_now_add=True)
@ -321,7 +331,10 @@ class SavedView(models.Model):
)
sort_field = models.CharField(
_("sort field"), max_length=128, null=True, blank=True
_("sort field"),
max_length=128,
null=True,
blank=True,
)
sort_reverse = models.BooleanField(_("sort reverse"), default=False)
@ -383,11 +396,16 @@ class FileInfo:
),
),
("title", re.compile(r"(?P<title>.*)$", flags=re.IGNORECASE)),
]
],
)
def __init__(
self, created=None, correspondent=None, title=None, tags=(), extension=None
self,
created=None,
correspondent=None,
title=None,
tags=(),
extension=None,
):
self.created = created

View File

@ -9,6 +9,8 @@ import tempfile
import magic
from django.conf import settings
from django.utils import timezone
from documents.loggers import LoggingMixin
from documents.signals import document_consumer_declaration
# This regular expression will try to find dates in the document at
# hand and will match the following formats:
@ -21,17 +23,15 @@ from django.utils import timezone
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
# - MONTH ZZZZ, with ZZZZ being 4 digits
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
from documents.loggers import LoggingMixin
from documents.signals import document_consumer_declaration
# TODO: isnt there a date parsing library for this?
DATE_REGEX = re.compile(
r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|" # NOQA: E501
r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|" # NOQA: E501
r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" # NOQA: E501
r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|" # noqa: E501
r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|" # noqa: E501
r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" # noqa: E501
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))"
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))",
)

View File

@ -3,9 +3,8 @@ import logging
import os
from django.conf import settings
from tqdm import tqdm
from documents.models import Document
from tqdm import tqdm
class SanityCheckMessages:
@ -88,19 +87,19 @@ def check_sanity(progress=False):
if not checksum == doc.checksum:
messages.error(
f"Checksum mismatch of document {doc.pk}. "
f"Stored: {doc.checksum}, actual: {checksum}."
f"Stored: {doc.checksum}, actual: {checksum}.",
)
# Check sanity of the archive file.
if doc.archive_checksum and not doc.archive_filename:
messages.error(
f"Document {doc.pk} has an archive file checksum, but no "
f"archive filename."
f"archive filename.",
)
elif not doc.archive_checksum and doc.archive_filename:
messages.error(
f"Document {doc.pk} has an archive file, but its checksum is "
f"missing."
f"missing.",
)
elif doc.has_archive_version:
if not os.path.isfile(doc.archive_path):
@ -113,7 +112,7 @@ def check_sanity(progress=False):
checksum = hashlib.md5(f.read()).hexdigest()
except OSError as e:
messages.error(
f"Cannot read archive file of document {doc.pk}: {e}"
f"Cannot read archive file of document {doc.pk}: {e}",
)
else:
if not checksum == doc.archive_checksum:
@ -121,7 +120,7 @@ def check_sanity(progress=False):
f"Checksum mismatch of archived document "
f"{doc.pk}. "
f"Stored: {doc.archive_checksum}, "
f"actual: {checksum}."
f"actual: {checksum}.",
)
# other document checks

View File

@ -1,25 +1,22 @@
import math
import re
import magic
import math
from django.utils.text import slugify
from django.utils.translation import gettext as _
from rest_framework import serializers
from rest_framework.fields import SerializerMethodField
from . import bulk_edit
from .models import (
Correspondent,
Tag,
Document,
DocumentType,
SavedView,
SavedViewFilterRule,
MatchingModel,
)
from .models import Correspondent
from .models import Document
from .models import DocumentType
from .models import MatchingModel
from .models import SavedView
from .models import SavedViewFilterRule
from .models import Tag
from .parsers import is_mime_type_supported
from django.utils.translation import gettext as _
# https://www.django-rest-framework.org/api-guide/serializers/#example
class DynamicFieldsModelSerializer(serializers.ModelSerializer):
@ -56,12 +53,12 @@ class MatchingModelSerializer(serializers.ModelSerializer):
if (
"matching_algorithm" in self.initial_data
and self.initial_data["matching_algorithm"] == MatchingModel.MATCH_REGEX
): # NOQA: E501
):
try:
re.compile(match)
except Exception as e:
except re.error as e:
raise serializers.ValidationError(
_("Invalid regular expression: %(error)s") % {"error": str(e)}
_("Invalid regular expression: %(error)s") % {"error": str(e.msg)},
)
return match
@ -156,7 +153,7 @@ class TagSerializer(MatchingModelSerializer):
luminance = math.sqrt(
0.299 * math.pow(rgb[0], 2)
+ 0.587 * math.pow(rgb[1], 2)
+ 0.114 * math.pow(rgb[2], 2)
+ 0.114 * math.pow(rgb[2], 2),
)
return "#ffffff" if luminance < 0.53 else "#000000"
except ValueError:
@ -298,7 +295,7 @@ class DocumentListSerializer(serializers.Serializer):
count = Document.objects.filter(id__in=documents).count()
if not count == len(documents):
raise serializers.ValidationError(
f"Some documents in {name} don't exist or were " f"specified twice."
f"Some documents in {name} don't exist or were " f"specified twice.",
)
def validate_documents(self, documents):
@ -331,7 +328,7 @@ class BulkEditSerializer(DocumentListSerializer):
count = Tag.objects.filter(id__in=tags).count()
if not count == len(tags):
raise serializers.ValidationError(
f"Some tags in {name} don't exist or were specified twice."
f"Some tags in {name} don't exist or were specified twice.",
)
def validate_method(self, method):
@ -456,7 +453,7 @@ class PostDocumentSerializer(serializers.Serializer):
if not is_mime_type_supported(mime_type):
raise serializers.ValidationError(
_("File type %(type)s not supported") % {"type": mime_type}
_("File type %(type)s not supported") % {"type": mime_type},
)
return document.name, document_data
@ -483,11 +480,13 @@ class PostDocumentSerializer(serializers.Serializer):
class BulkDownloadSerializer(DocumentListSerializer):
content = serializers.ChoiceField(
choices=["archive", "originals", "both"], default="archive"
choices=["archive", "originals", "both"],
default="archive",
)
compression = serializers.ChoiceField(
choices=["none", "deflated", "bzip2", "lzma"], default="none"
choices=["none", "deflated", "bzip2", "lzma"],
default="none",
)
def validate_compression(self, compression):

View File

@ -1,24 +1,26 @@
import logging
import os
from django.utils import termcolors
from django.conf import settings
from django.contrib.admin.models import ADDITION, LogEntry
from django.contrib.admin.models import ADDITION
from django.contrib.admin.models import LogEntry
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.db import models, DatabaseError
from django.db import DatabaseError
from django.db import models
from django.db.models import Q
from django.dispatch import receiver
from django.utils import termcolors, timezone
from django.utils import termcolors
from django.utils import timezone
from filelock import FileLock
from .. import matching
from ..file_handling import (
delete_empty_directories,
create_source_path_directory,
generate_unique_filename,
)
from ..models import Document, Tag, MatchingModel
from ..file_handling import create_source_path_directory
from ..file_handling import delete_empty_directories
from ..file_handling import generate_unique_filename
from ..models import Document
from ..models import MatchingModel
from ..models import Tag
logger = logging.getLogger("paperless.handlers")
@ -72,7 +74,7 @@ def set_correspondent(
print(
termcolors.colorize(str(document), fg="green")
if color
else str(document)
else str(document),
)
print(f"{base_url}/documents/{document.pk}")
else:
@ -82,7 +84,7 @@ def set_correspondent(
if color
else str(document)
)
+ f" [{document.pk}]"
+ f" [{document.pk}]",
)
print(f"Suggest correspondent {selected}")
else:
@ -139,7 +141,7 @@ def set_document_type(
print(
termcolors.colorize(str(document), fg="green")
if color
else str(document)
else str(document),
)
print(f"{base_url}/documents/{document.pk}")
else:
@ -149,7 +151,7 @@ def set_document_type(
if color
else str(document)
)
+ f" [{document.pk}]"
+ f" [{document.pk}]",
)
print(f"Suggest document type {selected}")
else:
@ -176,9 +178,9 @@ def set_tags(
if replace:
Document.tags.through.objects.filter(document=document).exclude(
Q(tag__is_inbox_tag=True)
Q(tag__is_inbox_tag=True),
).exclude(
Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO)
Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO),
).delete()
current_tags = set(document.tags.all())
@ -198,7 +200,7 @@ def set_tags(
print(
termcolors.colorize(str(document), fg="green")
if color
else str(document)
else str(document),
)
print(f"{base_url}/documents/{document.pk}")
else:
@ -208,7 +210,7 @@ def set_tags(
if color
else str(document)
)
+ f" [{document.pk}]"
+ f" [{document.pk}]",
)
if relevant_tags:
print("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
@ -254,7 +256,7 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
except OSError as e:
logger.error(
f"Failed to move {instance.source_path} to trash at "
f"{new_file_path}: {e}. Skipping cleanup!"
f"{new_file_path}: {e}. Skipping cleanup!",
)
return
@ -270,16 +272,18 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
except OSError as e:
logger.warning(
f"While deleting document {str(instance)}, the file "
f"{filename} could not be deleted: {e}"
f"{filename} could not be deleted: {e}",
)
delete_empty_directories(
os.path.dirname(instance.source_path), root=settings.ORIGINALS_DIR
os.path.dirname(instance.source_path),
root=settings.ORIGINALS_DIR,
)
if instance.has_archive_version:
delete_empty_directories(
os.path.dirname(instance.archive_path), root=settings.ARCHIVE_DIR
os.path.dirname(instance.archive_path),
root=settings.ARCHIVE_DIR,
)
@ -297,7 +301,7 @@ def validate_move(instance, old_path, new_path):
# Can't do anything if the new file already exists. Skip updating file.
logger.warning(
f"Document {str(instance)}: Cannot rename file "
f"since target path {new_path} already exists."
f"since target path {new_path} already exists.",
)
raise CannotMoveFilesException()
@ -331,12 +335,11 @@ def update_filename_and_move_files(sender, instance, **kwargs):
if instance.has_archive_version:
instance.archive_filename = generate_unique_filename(
instance, archive_filename=True
instance,
archive_filename=True,
)
move_archive = (
old_archive_filename != instance.archive_filename
) # NOQA: E501
move_archive = old_archive_filename != instance.archive_filename
else:
move_archive = False
@ -374,7 +377,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
if move_archive and os.path.isfile(instance.archive_path):
os.rename(instance.archive_path, old_archive_path)
except Exception as e:
except Exception:
# This is fine, since:
# A: if we managed to move source from A to B, we will also
# manage to move it from B to A. If not, we have a serious
@ -393,14 +396,16 @@ def update_filename_and_move_files(sender, instance, **kwargs):
# something has failed above.
if not os.path.isfile(old_source_path):
delete_empty_directories(
os.path.dirname(old_source_path), root=settings.ORIGINALS_DIR
os.path.dirname(old_source_path),
root=settings.ORIGINALS_DIR,
)
if instance.has_archive_version and not os.path.isfile(
old_archive_path
): # NOQA: E501
old_archive_path,
):
delete_empty_directories(
os.path.dirname(old_archive_path), root=settings.ARCHIVE_DIR
os.path.dirname(old_archive_path),
root=settings.ARCHIVE_DIR,
)

View File

@ -3,13 +3,18 @@ import logging
import tqdm
from django.conf import settings
from django.db.models.signals import post_save
from whoosh.writing import AsyncWriter
from documents import index, sanity_checker
from documents.classifier import DocumentClassifier, load_classifier
from documents.consumer import Consumer, ConsumerError
from documents.models import Document, Tag, DocumentType, Correspondent
from documents import index
from documents import sanity_checker
from documents.classifier import DocumentClassifier
from documents.classifier import load_classifier
from documents.consumer import Consumer
from documents.consumer import ConsumerError
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import Tag
from documents.sanity_checker import SanityCheckFailedException
from whoosh.writing import AsyncWriter
logger = logging.getLogger("paperless.tasks")
@ -47,7 +52,7 @@ def train_classifier():
try:
if classifier.train():
logger.info(
"Saving updated classifier model to {}...".format(settings.MODEL_FILE)
"Saving updated classifier model to {}...".format(settings.MODEL_FILE),
)
classifier.save()
else:
@ -82,7 +87,7 @@ def consume_file(
else:
raise ConsumerError(
"Unknown error: Returned document was null, but "
"no error message was given."
"no error message was given.",
)

View File

@ -1,7 +1,8 @@
from factory import Faker
from factory.django import DjangoModelFactory
from ..models import Document, Correspondent
from ..models import Correspondent
from ..models import Document
class CorrespondentFactory(DjangoModelFactory):

View File

@ -3,7 +3,6 @@ from unittest import mock
from django.contrib.admin.sites import AdminSite
from django.test import TestCase
from django.utils import timezone
from documents import index
from documents.admin import DocumentAdmin
from documents.models import Document
@ -42,7 +41,8 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
docs = []
for i in range(42):
doc = Document.objects.create(
title="Many documents with the same title", checksum=f"{i:02}"
title="Many documents with the same title",
checksum=f"{i:02}",
)
docs.append(doc)
index.add_or_update_document(doc)
@ -61,6 +61,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
def test_created(self):
doc = Document.objects.create(
title="test", created=timezone.make_aware(timezone.datetime(2020, 4, 12))
title="test",
created=timezone.make_aware(timezone.datetime(2020, 4, 12)),
)
self.assertEqual(self.doc_admin.created_(doc), "2020-04-12")

View File

@ -10,22 +10,20 @@ from unittest import mock
import pytest
from django.conf import settings
from django.contrib.auth.models import User
from django.utils import timezone
from django.test import override_settings
from django.utils import timezone
from documents import bulk_edit
from documents import index
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import SavedView
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin
from rest_framework.test import APITestCase
from whoosh.writing import AsyncWriter
from documents import index, bulk_edit
from documents.models import (
Document,
Correspondent,
DocumentType,
Tag,
SavedView,
MatchingModel,
)
from documents.tests.utils import DirectoriesMixin
class TestDocumentApi(DirectoriesMixin, APITestCase):
def setUp(self):
@ -72,7 +70,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
returned_doc["title"] = "the new title"
response = self.client.put(
"/api/documents/{}/".format(doc.pk), returned_doc, format="json"
"/api/documents/{}/".format(doc.pk),
returned_doc,
format="json",
)
self.assertEqual(response.status_code, 200)
@ -127,7 +127,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(len(results[0]), 2)
response = self.client.get(
"/api/documents/?fields=id,conteasdnt", format="json"
"/api/documents/?fields=id,conteasdnt",
format="json",
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
@ -162,7 +163,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
)
with open(
os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb"
os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)),
"wb",
) as f:
f.write(content_thumbnail)
@ -206,7 +208,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.content, content_archive)
response = self.client.get(
"/api/documents/{}/download/?original=true".format(doc.pk)
"/api/documents/{}/download/?original=true".format(doc.pk),
)
self.assertEqual(response.status_code, 200)
@ -218,7 +220,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.content, content_archive)
response = self.client.get(
"/api/documents/{}/preview/?original=true".format(doc.pk)
"/api/documents/{}/preview/?original=true".format(doc.pk),
)
self.assertEqual(response.status_code, 200)
@ -227,7 +229,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_document_actions_not_existing_file(self):
doc = Document.objects.create(
title="none", filename=os.path.basename("asd"), mime_type="application/pdf"
title="none",
filename=os.path.basename("asd"),
mime_type="application/pdf",
)
response = self.client.get("/api/documents/{}/download/".format(doc.pk))
@ -242,13 +246,19 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_document_filters(self):
doc1 = Document.objects.create(
title="none1", checksum="A", mime_type="application/pdf"
title="none1",
checksum="A",
mime_type="application/pdf",
)
doc2 = Document.objects.create(
title="none2", checksum="B", mime_type="application/pdf"
title="none2",
checksum="B",
mime_type="application/pdf",
)
doc3 = Document.objects.create(
title="none3", checksum="C", mime_type="application/pdf"
title="none3",
checksum="C",
mime_type="application/pdf",
)
tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True)
@ -273,7 +283,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id])
response = self.client.get(
"/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id)
"/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id),
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
@ -281,7 +291,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc3.id])
response = self.client.get(
"/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id)
"/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id),
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
@ -289,7 +299,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id])
response = self.client.get(
"/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id)
"/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id),
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
@ -297,14 +307,14 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(results[0]["id"], doc3.id)
response = self.client.get(
"/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id)
"/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id),
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
self.assertEqual(len(results), 0)
response = self.client.get(
"/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id)
"/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id),
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
@ -317,7 +327,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc2.id])
response = self.client.get(
"/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id)
"/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id),
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
@ -325,7 +335,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(results[0]["id"], doc1.id)
response = self.client.get(
"/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id)
"/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id),
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
@ -443,7 +453,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
for i in range(1, 6):
response = self.client.get(
f"/api/documents/?query=content&page={i}&page_size=10"
f"/api/documents/?query=content&page={i}&page_size=10",
)
results = response.data["results"]
self.assertEqual(response.data["count"], 55)
@ -595,31 +605,35 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertCountEqual(search_query("&correspondent__id=" + str(c.id)), [d1.id])
self.assertCountEqual(search_query("&document_type__id=" + str(dt.id)), [d2.id])
self.assertCountEqual(
search_query("&correspondent__isnull"), [d2.id, d3.id, d4.id, d5.id]
search_query("&correspondent__isnull"),
[d2.id, d3.id, d4.id, d5.id],
)
self.assertCountEqual(
search_query("&document_type__isnull"), [d1.id, d3.id, d4.id, d5.id]
search_query("&document_type__isnull"),
[d1.id, d3.id, d4.id, d5.id],
)
self.assertCountEqual(
search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)), [d3.id]
search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)),
[d3.id],
)
self.assertCountEqual(search_query("&tags__id__all=" + str(t.id)), [d3.id])
self.assertCountEqual(
search_query("&tags__id__all=" + str(t2.id)), [d3.id, d4.id]
search_query("&tags__id__all=" + str(t2.id)),
[d3.id, d4.id],
)
self.assertIn(
d4.id,
search_query(
"&created__date__lt="
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d")
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
),
)
self.assertNotIn(
d4.id,
search_query(
"&created__date__gt="
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d")
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
),
)
@ -627,40 +641,44 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
d4.id,
search_query(
"&created__date__lt="
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d")
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
),
)
self.assertIn(
d4.id,
search_query(
"&created__date__gt="
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d")
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
),
)
self.assertIn(
d5.id,
search_query(
"&added__date__lt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d")
"&added__date__lt="
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
),
)
self.assertNotIn(
d5.id,
search_query(
"&added__date__gt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d")
"&added__date__gt="
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
),
)
self.assertNotIn(
d5.id,
search_query(
"&added__date__lt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d")
"&added__date__lt="
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
),
)
self.assertIn(
d5.id,
search_query(
"&added__date__gt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d")
"&added__date__gt="
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
),
)
@ -700,18 +718,22 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
return [hit["id"] for hit in r.data["results"]]
self.assertListEqual(
search_query("&ordering=archive_serial_number"), [d3.id, d1.id, d2.id]
search_query("&ordering=archive_serial_number"),
[d3.id, d1.id, d2.id],
)
self.assertListEqual(
search_query("&ordering=-archive_serial_number"), [d2.id, d1.id, d3.id]
search_query("&ordering=-archive_serial_number"),
[d2.id, d1.id, d3.id],
)
self.assertListEqual(search_query("&ordering=title"), [d3.id, d2.id, d1.id])
self.assertListEqual(search_query("&ordering=-title"), [d1.id, d2.id, d3.id])
self.assertListEqual(
search_query("&ordering=correspondent__name"), [d1.id, d3.id, d2.id]
search_query("&ordering=correspondent__name"),
[d1.id, d3.id, d2.id],
)
self.assertListEqual(
search_query("&ordering=-correspondent__name"), [d2.id, d3.id, d1.id]
search_query("&ordering=-correspondent__name"),
[d2.id, d3.id, d1.id],
)
def test_statistics(self):
@ -740,10 +762,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_upload(self, m):
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
response = self.client.post(
"/api/documents/post_document/", {"document": f}
"/api/documents/post_document/",
{"document": f},
)
self.assertEqual(response.status_code, 200)
@ -761,7 +785,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_upload_empty_metadata(self, m):
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
response = self.client.post(
"/api/documents/post_document/",
@ -783,10 +808,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_upload_invalid_form(self, m):
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
response = self.client.post(
"/api/documents/post_document/", {"documenst": f}
"/api/documents/post_document/",
{"documenst": f},
)
self.assertEqual(response.status_code, 400)
m.assert_not_called()
@ -795,10 +822,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_upload_invalid_file(self, m):
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb"
os.path.join(os.path.dirname(__file__), "samples", "simple.zip"),
"rb",
) as f:
response = self.client.post(
"/api/documents/post_document/", {"document": f}
"/api/documents/post_document/",
{"document": f},
)
self.assertEqual(response.status_code, 400)
m.assert_not_called()
@ -806,7 +835,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.async_task")
def test_upload_with_title(self, async_task):
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
response = self.client.post(
"/api/documents/post_document/",
@ -824,10 +854,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_upload_with_correspondent(self, async_task):
c = Correspondent.objects.create(name="test-corres")
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
response = self.client.post(
"/api/documents/post_document/", {"document": f, "correspondent": c.id}
"/api/documents/post_document/",
{"document": f, "correspondent": c.id},
)
self.assertEqual(response.status_code, 200)
@ -840,10 +872,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.async_task")
def test_upload_with_invalid_correspondent(self, async_task):
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
response = self.client.post(
"/api/documents/post_document/", {"document": f, "correspondent": 3456}
"/api/documents/post_document/",
{"document": f, "correspondent": 3456},
)
self.assertEqual(response.status_code, 400)
@ -853,10 +887,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_upload_with_document_type(self, async_task):
dt = DocumentType.objects.create(name="invoice")
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
response = self.client.post(
"/api/documents/post_document/", {"document": f, "document_type": dt.id}
"/api/documents/post_document/",
{"document": f, "document_type": dt.id},
)
self.assertEqual(response.status_code, 200)
@ -869,10 +905,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.async_task")
def test_upload_with_invalid_document_type(self, async_task):
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
response = self.client.post(
"/api/documents/post_document/", {"document": f, "document_type": 34578}
"/api/documents/post_document/",
{"document": f, "document_type": 34578},
)
self.assertEqual(response.status_code, 400)
@ -883,10 +921,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2")
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
response = self.client.post(
"/api/documents/post_document/", {"document": f, "tags": [t2.id, t1.id]}
"/api/documents/post_document/",
{"document": f, "tags": [t2.id, t1.id]},
)
self.assertEqual(response.status_code, 200)
@ -901,7 +941,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2")
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
response = self.client.post(
"/api/documents/post_document/",
@ -952,7 +993,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_get_metadata_no_archive(self):
doc = Document.objects.create(
title="test", filename="file.pdf", mime_type="application/pdf"
title="test",
filename="file.pdf",
mime_type="application/pdf",
)
shutil.copy(
@ -999,7 +1042,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 200)
self.assertEqual(
response.data, {"correspondents": [], "tags": [], "document_types": []}
response.data,
{"correspondents": [], "tags": [], "document_types": []},
)
def test_get_suggestions_invalid_doc(self):
@ -1010,10 +1054,15 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.match_tags")
@mock.patch("documents.views.match_document_types")
def test_get_suggestions(
self, match_document_types, match_tags, match_correspondents
self,
match_document_types,
match_tags,
match_correspondents,
):
doc = Document.objects.create(
title="test", mime_type="application/pdf", content="this is an invoice!"
title="test",
mime_type="application/pdf",
content="this is an invoice!",
)
match_tags.return_value = [Tag(id=56), Tag(id=123)]
match_document_types.return_value = [DocumentType(id=23)]
@ -1094,7 +1143,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(v1.user, self.user)
response = self.client.patch(
f"/api/saved_views/{v1.id}/", {"show_in_sidebar": False}, format="json"
f"/api/saved_views/{v1.id}/",
{"show_in_sidebar": False},
format="json",
)
v1 = SavedView.objects.get(id=v1.id)
@ -1183,7 +1234,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_regex_no_algorithm(self):
for endpoint in ["correspondents", "tags", "document_types"]:
response = self.client.post(
f"/api/{endpoint}/", {"name": "test", "match": "[0-9]"}, format="json"
f"/api/{endpoint}/",
{"name": "test", "match": "[0-9]"},
format="json",
)
self.assertEqual(response.status_code, 201, endpoint)
@ -1200,7 +1253,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_tag_color(self):
response = self.client.post(
"/api/tags/", {"name": "tag", "colour": 3}, format="json"
"/api/tags/",
{"name": "tag", "colour": 3},
format="json",
)
self.assertEqual(response.status_code, 201)
self.assertEqual(Tag.objects.get(id=response.data["id"]).color, "#b2df8a")
@ -1213,14 +1268,17 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_tag_color_invalid(self):
response = self.client.post(
"/api/tags/", {"name": "tag", "colour": 34}, format="json"
"/api/tags/",
{"name": "tag", "colour": 34},
format="json",
)
self.assertEqual(response.status_code, 400)
def test_tag_color_custom(self):
tag = Tag.objects.create(name="test", color="#abcdef")
self.assertEqual(
self.client.get(f"/api/tags/{tag.id}/", format="json").data["colour"], 1
self.client.get(f"/api/tags/{tag.id}/", format="json").data["colour"],
1,
)
@ -1236,32 +1294,42 @@ class TestDocumentApiV2(DirectoriesMixin, APITestCase):
def test_tag_validate_color(self):
self.assertEqual(
self.client.post(
"/api/tags/", {"name": "test", "color": "#12fFaA"}, format="json"
"/api/tags/",
{"name": "test", "color": "#12fFaA"},
format="json",
).status_code,
201,
)
self.assertEqual(
self.client.post(
"/api/tags/", {"name": "test1", "color": "abcdef"}, format="json"
"/api/tags/",
{"name": "test1", "color": "abcdef"},
format="json",
).status_code,
400,
)
self.assertEqual(
self.client.post(
"/api/tags/", {"name": "test2", "color": "#abcdfg"}, format="json"
"/api/tags/",
{"name": "test2", "color": "#abcdfg"},
format="json",
).status_code,
400,
)
self.assertEqual(
self.client.post(
"/api/tags/", {"name": "test3", "color": "#asd"}, format="json"
"/api/tags/",
{"name": "test3", "color": "#asd"},
format="json",
).status_code,
400,
)
self.assertEqual(
self.client.post(
"/api/tags/", {"name": "test4", "color": "#12121212"}, format="json"
"/api/tags/",
{"name": "test4", "color": "#12121212"},
format="json",
).status_code,
400,
)
@ -1313,10 +1381,16 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
self.t2 = Tag.objects.create(name="t2")
self.doc1 = Document.objects.create(checksum="A", title="A")
self.doc2 = Document.objects.create(
checksum="B", title="B", correspondent=self.c1, document_type=self.dt1
checksum="B",
title="B",
correspondent=self.c1,
document_type=self.dt1,
)
self.doc3 = Document.objects.create(
checksum="C", title="C", correspondent=self.c2, document_type=self.dt2
checksum="C",
title="C",
correspondent=self.c2,
document_type=self.dt2,
)
self.doc4 = Document.objects.create(checksum="D", title="D")
self.doc5 = Document.objects.create(checksum="E", title="E")
@ -1327,7 +1401,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
def test_set_correspondent(self):
self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 1)
bulk_edit.set_correspondent(
[self.doc1.id, self.doc2.id, self.doc3.id], self.c2.id
[self.doc1.id, self.doc2.id, self.doc3.id],
self.c2.id,
)
self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 3)
self.async_task.assert_called_once()
@ -1345,7 +1420,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
def test_set_document_type(self):
self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 1)
bulk_edit.set_document_type(
[self.doc1.id, self.doc2.id, self.doc3.id], self.dt2.id
[self.doc1.id, self.doc2.id, self.doc3.id],
self.dt2.id,
)
self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 3)
self.async_task.assert_called_once()
@ -1363,7 +1439,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
def test_add_tag(self):
self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 2)
bulk_edit.add_tag(
[self.doc1.id, self.doc2.id, self.doc3.id, self.doc4.id], self.t1.id
[self.doc1.id, self.doc2.id, self.doc3.id, self.doc4.id],
self.t1.id,
)
self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 4)
self.async_task.assert_called_once()
@ -1415,7 +1492,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc1.id],
"method": "set_correspondent",
"parameters": {"correspondent": self.c1.id},
}
},
),
content_type="application/json",
)
@ -1435,7 +1512,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc1.id],
"method": "set_correspondent",
"parameters": {"correspondent": None},
}
},
),
content_type="application/json",
)
@ -1455,7 +1532,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc1.id],
"method": "set_document_type",
"parameters": {"document_type": self.dt1.id},
}
},
),
content_type="application/json",
)
@ -1475,7 +1552,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc1.id],
"method": "set_document_type",
"parameters": {"document_type": None},
}
},
),
content_type="application/json",
)
@ -1495,7 +1572,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc1.id],
"method": "add_tag",
"parameters": {"tag": self.t1.id},
}
},
),
content_type="application/json",
)
@ -1515,7 +1592,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc1.id],
"method": "remove_tag",
"parameters": {"tag": self.t1.id},
}
},
),
content_type="application/json",
)
@ -1538,7 +1615,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"add_tags": [self.t1.id],
"remove_tags": [self.t2.id],
},
}
},
),
content_type="application/json",
)
@ -1555,7 +1632,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{"documents": [self.doc1.id], "method": "delete", "parameters": {}}
{"documents": [self.doc1.id], "method": "delete", "parameters": {}},
),
content_type="application/json",
)
@ -1580,7 +1657,11 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{"documents": [self.doc2.id], "method": "exterminate", "parameters": {}}
{
"documents": [self.doc2.id],
"method": "exterminate",
"parameters": {},
},
),
content_type="application/json",
)
@ -1596,7 +1677,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id],
"method": "set_correspondent",
"parameters": {"correspondent": 345657},
}
},
),
content_type="application/json",
)
@ -1613,7 +1694,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id],
"method": "set_correspondent",
"parameters": {},
}
},
),
content_type="application/json",
)
@ -1628,7 +1709,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id],
"method": "set_document_type",
"parameters": {"document_type": 345657},
}
},
),
content_type="application/json",
)
@ -1645,7 +1726,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id],
"method": "set_document_type",
"parameters": {},
}
},
),
content_type="application/json",
)
@ -1660,7 +1741,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id],
"method": "add_tag",
"parameters": {"tag": 345657},
}
},
),
content_type="application/json",
)
@ -1672,7 +1753,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{"documents": [self.doc2.id], "method": "add_tag", "parameters": {}}
{"documents": [self.doc2.id], "method": "add_tag", "parameters": {}},
),
content_type="application/json",
)
@ -1687,7 +1768,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id],
"method": "remove_tag",
"parameters": {"tag": 345657},
}
},
),
content_type="application/json",
)
@ -1699,7 +1780,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{"documents": [self.doc2.id], "method": "remove_tag", "parameters": {}}
{"documents": [self.doc2.id], "method": "remove_tag", "parameters": {}},
),
content_type="application/json",
)
@ -1717,7 +1798,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"add_tags": [self.t2.id, 1657],
"remove_tags": [1123123],
},
}
},
),
content_type="application/json",
)
@ -1731,7 +1812,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id],
"method": "modify_tags",
"parameters": {"remove_tags": [1123123]},
}
},
),
content_type="application/json",
)
@ -1744,7 +1825,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
"documents": [self.doc2.id],
"method": "modify_tags",
"parameters": {"add_tags": [self.t2.id, 1657]},
}
},
),
content_type="application/json",
)
@ -1774,7 +1855,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
response = self.client.post(
"/api/documents/selection_data/",
json.dumps(
{"documents": [self.doc1.id, self.doc2.id, self.doc4.id, self.doc5.id]}
{"documents": [self.doc1.id, self.doc2.id, self.doc4.id, self.doc5.id]},
),
content_type="application/json",
)
@ -1856,7 +1937,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
response = self.client.post(
"/api/documents/bulk_download/",
json.dumps(
{"documents": [self.doc2.id, self.doc3.id], "content": "originals"}
{"documents": [self.doc2.id, self.doc3.id], "content": "originals"},
),
content_type="application/json",
)
@ -1914,17 +1995,20 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
with self.doc2.source_file as f:
self.assertEqual(
f.read(), zipf.read("originals/2021-01-01 document A.pdf")
f.read(),
zipf.read("originals/2021-01-01 document A.pdf"),
)
with self.doc3.archive_file as f:
self.assertEqual(
f.read(), zipf.read("archive/2020-03-21 document B.pdf")
f.read(),
zipf.read("archive/2020-03-21 document B.pdf"),
)
with self.doc3.source_file as f:
self.assertEqual(
f.read(), zipf.read("originals/2020-03-21 document B.jpg")
f.read(),
zipf.read("originals/2020-03-21 document B.jpg"),
)
def test_filename_clashes(self):
@ -1953,7 +2037,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
response = self.client.post(
"/api/documents/bulk_download/",
json.dumps(
{"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"}
{"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"},
),
content_type="application/json",
)
@ -1968,13 +2052,16 @@ class TestApiAuth(APITestCase):
self.assertEqual(self.client.get(f"/api/documents/{d.id}/").status_code, 401)
self.assertEqual(
self.client.get(f"/api/documents/{d.id}/download/").status_code, 401
self.client.get(f"/api/documents/{d.id}/download/").status_code,
401,
)
self.assertEqual(
self.client.get(f"/api/documents/{d.id}/preview/").status_code, 401
self.client.get(f"/api/documents/{d.id}/preview/").status_code,
401,
)
self.assertEqual(
self.client.get(f"/api/documents/{d.id}/thumb/").status_code, 401
self.client.get(f"/api/documents/{d.id}/thumb/").status_code,
401,
)
self.assertEqual(self.client.get("/api/tags/").status_code, 401)
@ -1987,10 +2074,12 @@ class TestApiAuth(APITestCase):
self.assertEqual(self.client.get("/api/search/autocomplete/").status_code, 401)
self.assertEqual(self.client.get("/api/documents/bulk_edit/").status_code, 401)
self.assertEqual(
self.client.get("/api/documents/bulk_download/").status_code, 401
self.client.get("/api/documents/bulk_download/").status_code,
401,
)
self.assertEqual(
self.client.get("/api/documents/selection_data/").status_code, 401
self.client.get("/api/documents/selection_data/").status_code,
401,
)
def test_api_version_no_auth(self):

View File

@ -4,10 +4,11 @@ from unittest import mock
from django.core.checks import Error
from django.test import TestCase
from .factories import DocumentFactory
from .. import document_consumer_declaration
from ..checks import changed_password_check, parser_check
from ..checks import changed_password_check
from ..checks import parser_check
from ..models import Document
from ..signals import document_consumer_declaration
from .factories import DocumentFactory
class ChecksTestCase(TestCase):
@ -30,7 +31,7 @@ class ChecksTestCase(TestCase):
[
Error(
"No parsers found. This is a bug. The consumer won't be "
"able to consume any documents without parsers."
)
"able to consume any documents without parsers.",
),
],
)

View File

@ -5,14 +5,15 @@ from unittest import mock
import pytest
from django.conf import settings
from django.test import TestCase, override_settings
from documents.classifier import (
DocumentClassifier,
IncompatibleClassifierVersionError,
load_classifier,
)
from documents.models import Correspondent, Document, Tag, DocumentType
from django.test import override_settings
from django.test import TestCase
from documents.classifier import DocumentClassifier
from documents.classifier import IncompatibleClassifierVersionError
from documents.classifier import load_classifier
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin
@ -23,26 +24,37 @@ class TestClassifier(DirectoriesMixin, TestCase):
def generate_test_data(self):
self.c1 = Correspondent.objects.create(
name="c1", matching_algorithm=Correspondent.MATCH_AUTO
name="c1",
matching_algorithm=Correspondent.MATCH_AUTO,
)
self.c2 = Correspondent.objects.create(name="c2")
self.c3 = Correspondent.objects.create(
name="c3", matching_algorithm=Correspondent.MATCH_AUTO
name="c3",
matching_algorithm=Correspondent.MATCH_AUTO,
)
self.t1 = Tag.objects.create(
name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12
name="t1",
matching_algorithm=Tag.MATCH_AUTO,
pk=12,
)
self.t2 = Tag.objects.create(
name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True
name="t2",
matching_algorithm=Tag.MATCH_ANY,
pk=34,
is_inbox_tag=True,
)
self.t3 = Tag.objects.create(
name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45
name="t3",
matching_algorithm=Tag.MATCH_AUTO,
pk=45,
)
self.dt = DocumentType.objects.create(
name="dt", matching_algorithm=DocumentType.MATCH_AUTO
name="dt",
matching_algorithm=DocumentType.MATCH_AUTO,
)
self.dt2 = DocumentType.objects.create(
name="dt2", matching_algorithm=DocumentType.MATCH_AUTO
name="dt2",
matching_algorithm=DocumentType.MATCH_AUTO,
)
self.doc1 = Document.objects.create(
@ -59,7 +71,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
checksum="B",
)
self.doc_inbox = Document.objects.create(
title="doc235", content="aa", checksum="C"
title="doc235",
content="aa",
checksum="C",
)
self.doc1.tags.add(self.t1)
@ -90,27 +104,33 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.generate_test_data()
self.classifier.train()
self.assertListEqual(
list(self.classifier.correspondent_classifier.classes_), [-1, self.c1.pk]
list(self.classifier.correspondent_classifier.classes_),
[-1, self.c1.pk],
)
self.assertListEqual(
list(self.classifier.tags_binarizer.classes_), [self.t1.pk, self.t3.pk]
list(self.classifier.tags_binarizer.classes_),
[self.t1.pk, self.t3.pk],
)
def testPredict(self):
self.generate_test_data()
self.classifier.train()
self.assertEqual(
self.classifier.predict_correspondent(self.doc1.content), self.c1.pk
self.classifier.predict_correspondent(self.doc1.content),
self.c1.pk,
)
self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
self.assertListEqual(
self.classifier.predict_tags(self.doc1.content), [self.t1.pk]
self.classifier.predict_tags(self.doc1.content),
[self.t1.pk],
)
self.assertListEqual(
self.classifier.predict_tags(self.doc2.content), [self.t1.pk, self.t3.pk]
self.classifier.predict_tags(self.doc2.content),
[self.t1.pk, self.t3.pk],
)
self.assertEqual(
self.classifier.predict_document_type(self.doc1.content), self.dt.pk
self.classifier.predict_document_type(self.doc1.content),
self.dt.pk,
)
self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
@ -133,7 +153,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
current_ver = DocumentClassifier.FORMAT_VERSION
with mock.patch(
"documents.classifier.DocumentClassifier.FORMAT_VERSION", current_ver + 1
"documents.classifier.DocumentClassifier.FORMAT_VERSION",
current_ver + 1,
):
# assure that we won't load old classifiers.
self.assertRaises(IncompatibleClassifierVersionError, classifier2.load)
@ -157,7 +178,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertFalse(new_classifier.train())
@override_settings(
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle")
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
)
def test_load_and_classify(self):
self.generate_test_data()
@ -169,7 +190,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
def test_one_correspondent_predict(self):
c1 = Correspondent.objects.create(
name="c1", matching_algorithm=Correspondent.MATCH_AUTO
name="c1",
matching_algorithm=Correspondent.MATCH_AUTO,
)
doc1 = Document.objects.create(
title="doc1",
@ -183,7 +205,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
def test_one_correspondent_predict_manydocs(self):
c1 = Correspondent.objects.create(
name="c1", matching_algorithm=Correspondent.MATCH_AUTO
name="c1",
matching_algorithm=Correspondent.MATCH_AUTO,
)
doc1 = Document.objects.create(
title="doc1",
@ -192,7 +215,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
checksum="A",
)
doc2 = Document.objects.create(
title="doc2", content="this is a document from noone", checksum="B"
title="doc2",
content="this is a document from noone",
checksum="B",
)
self.classifier.train()
@ -201,7 +226,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
def test_one_type_predict(self):
dt = DocumentType.objects.create(
name="dt", matching_algorithm=DocumentType.MATCH_AUTO
name="dt",
matching_algorithm=DocumentType.MATCH_AUTO,
)
doc1 = Document.objects.create(
@ -216,7 +242,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
def test_one_type_predict_manydocs(self):
dt = DocumentType.objects.create(
name="dt", matching_algorithm=DocumentType.MATCH_AUTO
name="dt",
matching_algorithm=DocumentType.MATCH_AUTO,
)
doc1 = Document.objects.create(
@ -227,7 +254,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
)
doc2 = Document.objects.create(
title="doc1", content="this is a document from c2", checksum="B"
title="doc1",
content="this is a document from c2",
checksum="B",
)
self.classifier.train()
@ -238,7 +267,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
doc1 = Document.objects.create(
title="doc1", content="this is a document from c1", checksum="A"
title="doc1",
content="this is a document from c1",
checksum="A",
)
doc1.tags.add(t1)
@ -249,7 +280,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
doc1 = Document.objects.create(
title="doc1", content="this is a document from c1", checksum="A"
title="doc1",
content="this is a document from c1",
checksum="A",
)
self.classifier.train()
@ -260,7 +293,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
doc4 = Document.objects.create(
title="doc1", content="this is a document from c4", checksum="D"
title="doc1",
content="this is a document from c4",
checksum="D",
)
doc4.tags.add(t1)
@ -273,16 +308,24 @@ class TestClassifier(DirectoriesMixin, TestCase):
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
doc1 = Document.objects.create(
title="doc1", content="this is a document from c1", checksum="A"
title="doc1",
content="this is a document from c1",
checksum="A",
)
doc2 = Document.objects.create(
title="doc1", content="this is a document from c2", checksum="B"
title="doc1",
content="this is a document from c2",
checksum="B",
)
doc3 = Document.objects.create(
title="doc1", content="this is a document from c3", checksum="C"
title="doc1",
content="this is a document from c3",
checksum="C",
)
doc4 = Document.objects.create(
title="doc1", content="this is a document from c4", checksum="D"
title="doc1",
content="this is a document from c4",
checksum="D",
)
doc1.tags.add(t1)
@ -300,10 +343,14 @@ class TestClassifier(DirectoriesMixin, TestCase):
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
doc1 = Document.objects.create(
title="doc1", content="this is a document from c1", checksum="A"
title="doc1",
content="this is a document from c1",
checksum="A",
)
doc2 = Document.objects.create(
title="doc2", content="this is a document from c2", checksum="B"
title="doc2",
content="this is a document from c2",
checksum="B",
)
doc1.tags.add(t1)
@ -316,10 +363,14 @@ class TestClassifier(DirectoriesMixin, TestCase):
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
doc1 = Document.objects.create(
title="doc1", content="this is a document from c1", checksum="A"
title="doc1",
content="this is a document from c1",
checksum="A",
)
doc2 = Document.objects.create(
title="doc2", content="this is a document from c2", checksum="B"
title="doc2",
content="this is a document from c2",
checksum="B",
)
doc1.tags.add(t1)
@ -338,13 +389,15 @@ class TestClassifier(DirectoriesMixin, TestCase):
load.assert_called_once()
@override_settings(
CACHES={"default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}}
CACHES={
"default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"},
},
)
@override_settings(
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle")
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
)
@pytest.mark.skip(
reason="Disabled caching due to high memory usage - need to investigate."
reason="Disabled caching due to high memory usage - need to investigate.",
)
def test_load_classifier_cached(self):
classifier = load_classifier()

View File

@ -6,13 +6,20 @@ from unittest import mock
from unittest.mock import MagicMock
from django.conf import settings
from django.test import TestCase, override_settings
from django.test import override_settings
from django.test import TestCase
from .utils import DirectoriesMixin
from ..consumer import Consumer, ConsumerError
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
from ..parsers import DocumentParser, ParseError
from ..consumer import Consumer
from ..consumer import ConsumerError
from ..models import Correspondent
from ..models import Document
from ..models import DocumentType
from ..models import FileInfo
from ..models import Tag
from ..parsers import DocumentParser
from ..parsers import ParseError
from ..tasks import sanity_check
from .utils import DirectoriesMixin
class TestAttributes(TestCase):
@ -33,12 +40,18 @@ class TestAttributes(TestCase):
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
self._test_guess_attributes_from_name(
"- weird but should not break.pdf", None, "- weird but should not break", ()
"- weird but should not break.pdf",
None,
"- weird but should not break",
(),
)
def test_guess_attributes_from_name_when_title_ends_with_dash(self):
self._test_guess_attributes_from_name(
"weird but should not break -.pdf", None, "weird but should not break -", ()
"weird but should not break -.pdf",
None,
"weird but should not break -",
(),
)
@ -53,7 +66,12 @@ class TestFieldPermutations(TestCase):
valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"]
def _test_guessed_attributes(
self, filename, created=None, correspondent=None, title=None, tags=None
self,
filename,
created=None,
correspondent=None,
title=None,
tags=None,
):
info = FileInfo.from_filename(filename)
@ -131,7 +149,7 @@ class TestFieldPermutations(TestCase):
FILENAME_PARSE_TRANSFORMS=[
(all_patt, "all.gif"),
(all_patt, "anotherall.gif"),
]
],
):
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "all")
@ -141,7 +159,7 @@ class TestFieldPermutations(TestCase):
FILENAME_PARSE_TRANSFORMS=[
(none_patt, "none.gif"),
(all_patt, "anotherall.gif"),
]
],
):
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "anotherall")
@ -238,7 +256,9 @@ class TestConsumer(DirectoriesMixin, TestCase):
def make_dummy_parser(self, logging_group, progress_callback=None):
return DummyParser(
logging_group, self.dirs.scratch_dir, self.get_test_archive_file()
logging_group,
self.dirs.scratch_dir,
self.get_test_archive_file(),
)
def make_faulty_parser(self, logging_group, progress_callback=None):
@ -257,7 +277,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
"mime_types": {"application/pdf": ".pdf"},
"weight": 0,
},
)
),
]
self.addCleanup(patcher.stop)
@ -282,7 +302,11 @@ class TestConsumer(DirectoriesMixin, TestCase):
def get_test_archive_file(self):
src = os.path.join(
os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf"
os.path.dirname(__file__),
"samples",
"documents",
"archive",
"0000001.pdf",
)
dst = os.path.join(self.dirs.scratch_dir, "sample_archive.pdf")
shutil.copy(src, dst)
@ -296,7 +320,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertEqual(document.content, "The Text")
self.assertEqual(
document.title, os.path.splitext(os.path.basename(filename))[0]
document.title,
os.path.splitext(os.path.basename(filename))[0],
)
self.assertIsNone(document.correspondent)
self.assertIsNone(document.document_type)
@ -339,7 +364,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
override_filename = "Statement for November.pdf"
document = self.consumer.try_consume_file(
filename, override_filename=override_filename
filename,
override_filename=override_filename,
)
self.assertEqual(document.title, "Statement for November")
@ -348,7 +374,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
def testOverrideTitle(self):
document = self.consumer.try_consume_file(
self.get_test_file(), override_title="Override Title"
self.get_test_file(),
override_title="Override Title",
)
self.assertEqual(document.title, "Override Title")
self._assert_first_last_send_progress()
@ -357,7 +384,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
c = Correspondent.objects.create(name="test")
document = self.consumer.try_consume_file(
self.get_test_file(), override_correspondent_id=c.pk
self.get_test_file(),
override_correspondent_id=c.pk,
)
self.assertEqual(document.correspondent.id, c.id)
self._assert_first_last_send_progress()
@ -366,7 +394,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
dt = DocumentType.objects.create(name="test")
document = self.consumer.try_consume_file(
self.get_test_file(), override_document_type_id=dt.pk
self.get_test_file(),
override_document_type_id=dt.pk,
)
self.assertEqual(document.document_type.id, dt.id)
self._assert_first_last_send_progress()
@ -376,7 +405,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
t2 = Tag.objects.create(name="t2")
t3 = Tag.objects.create(name="t3")
document = self.consumer.try_consume_file(
self.get_test_file(), override_tag_ids=[t1.id, t3.id]
self.get_test_file(),
override_tag_ids=[t1.id, t3.id],
)
self.assertIn(t1, document.tags.all())
@ -446,7 +476,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
"mime_types": {"application/pdf": ".pdf"},
"weight": 0,
},
)
),
]
self.assertRaisesMessage(
@ -595,16 +625,16 @@ class TestConsumer(DirectoriesMixin, TestCase):
"mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
"weight": 0,
},
)
),
]
doc1 = self.consumer.try_consume_file(
os.path.join(settings.CONSUMPTION_DIR, "simple.png")
os.path.join(settings.CONSUMPTION_DIR, "simple.png"),
)
doc2 = self.consumer.try_consume_file(
os.path.join(settings.CONSUMPTION_DIR, "simple.pdf")
os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"),
)
doc3 = self.consumer.try_consume_file(
os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf")
os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"),
)
self.assertEqual(doc1.filename, "simple.png")
@ -691,7 +721,9 @@ class PostConsumeTestCase(TestCase):
with override_settings(POST_CONSUME_SCRIPT=script.name):
c = Correspondent.objects.create(name="my_bank")
doc = Document.objects.create(
title="Test", mime_type="application/pdf", correspondent=c
title="Test",
mime_type="application/pdf",
correspondent=c,
)
tag1 = Tag.objects.create(name="a")
tag2 = Tag.objects.create(name="b")

View File

@ -5,15 +5,16 @@ from uuid import uuid4
from dateutil import tz
from django.conf import settings
from django.test import TestCase, override_settings
from django.test import override_settings
from django.test import TestCase
from documents.parsers import parse_date
class TestDate(TestCase):
SAMPLE_FILES = os.path.join(
os.path.dirname(__file__), "../../paperless_tesseract/tests/samples"
os.path.dirname(__file__),
"../../paperless_tesseract/tests/samples",
)
SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
@ -111,11 +112,11 @@ class TestDate(TestCase):
@override_settings(FILENAME_DATE_ORDER="YMD")
def test_filename_date_parse_invalid(self, *args):
self.assertIsNone(
parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here")
parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"),
)
@override_settings(
IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17))
IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)),
)
def test_ignored_dates(self, *args):
text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem " "ipsum"

View File

@ -3,10 +3,12 @@ import tempfile
from pathlib import Path
from unittest import mock
from django.test import TestCase, override_settings
from django.test import override_settings
from django.test import TestCase
from django.utils import timezone
from ..models import Document, Correspondent
from ..models import Correspondent
from ..models import Document
class TestDocument(TestCase):

View File

@ -9,17 +9,19 @@ from unittest import mock
from django.conf import settings
from django.db import DatabaseError
from django.test import TestCase, override_settings
from django.test import override_settings
from django.test import TestCase
from django.utils import timezone
from ..file_handling import create_source_path_directory
from ..file_handling import delete_empty_directories
from ..file_handling import generate_filename
from ..file_handling import generate_unique_filename
from ..models import Correspondent
from ..models import Document
from ..models import DocumentType
from ..models import Tag
from .utils import DirectoriesMixin
from ..file_handling import (
generate_filename,
create_source_path_directory,
delete_empty_directories,
generate_unique_filename,
)
from ..models import Document, Correspondent, Tag, DocumentType
class TestFileHandling(DirectoriesMixin, TestCase):
@ -34,7 +36,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.storage_type = Document.STORAGE_TYPE_GPG
self.assertEqual(
generate_filename(document), "{:07d}.pdf.gpg".format(document.pk)
generate_filename(document),
"{:07d}.pdf.gpg".format(document.pk),
)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@ -75,7 +78,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(
os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True
os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"),
True,
)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@ -93,7 +97,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Test source_path
self.assertEqual(
document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf"
document.source_path,
settings.ORIGINALS_DIR + "/none/none.pdf",
)
# Make the folder read- and execute-only (no writing and no renaming)
@ -105,7 +110,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Check proper handling of files
self.assertEqual(
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
True,
)
self.assertEqual(document.filename, "none/none.pdf")
@ -145,7 +151,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Check proper handling of files
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
True,
)
self.assertEqual(document.filename, "none/none.pdf")
@ -167,7 +174,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
pk = document.pk
document.delete()
self.assertEqual(
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
False,
)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
@ -192,7 +200,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none/none.pdf"), False)
document.delete()
self.assertEqual(
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
False,
)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none.pdf"), True)
@ -363,7 +372,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf")
doc = Document.objects.create(
title="doc2", checksum="B", mime_type="application/pdf"
title="doc2",
checksum="B",
mime_type="application/pdf",
)
self.assertEqual(generate_filename(doc), "doc2.pdf")
@ -380,12 +391,14 @@ class TestFileHandling(DirectoriesMixin, TestCase):
)
@override_settings(
PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}"
PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}",
)
def test_created_year_month_day(self):
d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1))
doc1 = Document.objects.create(
title="doc1", mime_type="application/pdf", created=d1
title="doc1",
mime_type="application/pdf",
created=d1,
)
self.assertEqual(generate_filename(doc1), "2020-03-06.pdf")
@ -395,12 +408,14 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
@override_settings(
PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}"
PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}",
)
def test_added_year_month_day(self):
d1 = timezone.make_aware(datetime.datetime(232, 1, 9, 1, 1, 1))
doc1 = Document.objects.create(
title="doc1", mime_type="application/pdf", added=d1
title="doc1",
mime_type="application/pdf",
added=d1,
)
self.assertEqual(generate_filename(doc1), "232-01-09.pdf")
@ -410,7 +425,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
@override_settings(
PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}"
PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}",
)
def test_nested_directory_cleanup(self):
document = Document()
@ -431,7 +446,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.delete()
self.assertEqual(
os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False
os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"),
False,
)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
@ -456,7 +472,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
os.makedirs(os.path.join(tmp, "notempty", "empty"))
delete_empty_directories(
os.path.join(tmp, "notempty", "empty"), root=settings.ORIGINALS_DIR
os.path.join(tmp, "notempty", "empty"),
root=settings.ORIGINALS_DIR,
)
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
self.assertEqual(os.path.isfile(os.path.join(tmp, "notempty", "file")), True)
@ -483,10 +500,16 @@ class TestFileHandling(DirectoriesMixin, TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_duplicates(self):
document = Document.objects.create(
mime_type="application/pdf", title="qwe", checksum="A", pk=1
mime_type="application/pdf",
title="qwe",
checksum="A",
pk=1,
)
document2 = Document.objects.create(
mime_type="application/pdf", title="qwe", checksum="B", pk=2
mime_type="application/pdf",
title="qwe",
checksum="B",
pk=2,
)
Path(document.source_path).touch()
Path(document2.source_path).touch()
@ -584,10 +607,12 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
self.assertEqual(
doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf")
doc.source_path,
os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"),
)
self.assertEqual(
doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")
doc.archive_path,
os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"),
)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@ -851,7 +876,10 @@ class TestFilenameGeneration(TestCase):
def test_invalid_characters(self):
doc = Document.objects.create(
title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1"
title="This. is the title.",
mime_type="application/pdf",
pk=1,
checksum="1",
)
self.assertEqual(generate_filename(doc), "This. is the title.pdf")
@ -877,7 +905,9 @@ class TestFilenameGeneration(TestCase):
def run():
doc = Document.objects.create(
checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow"
checksum=str(uuid.uuid4()),
title=str(uuid.uuid4()),
content="wow",
)
doc.filename = generate_unique_filename(doc)
Path(doc.thumbnail_path).touch()

View File

@ -1,7 +1,7 @@
from django.core.management.base import CommandError
from django.test import TestCase
from documents.settings import EXPORTER_FILE_NAME
from ..management.commands.document_importer import Command
@ -12,7 +12,9 @@ class TestImporter(TestCase):
def test_check_manifest_exists(self):
cmd = Command()
self.assertRaises(
CommandError, cmd._check_manifest_exists, "/tmp/manifest.json"
CommandError,
cmd._check_manifest_exists,
"/tmp/manifest.json",
)
def test_check_manifest(self):
@ -26,11 +28,11 @@ class TestImporter(TestCase):
self.assertTrue("The manifest file contains a record" in str(cm.exception))
cmd.manifest = [
{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"}
{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"},
]
# self.assertRaises(CommandError, cmd._check_manifest)
with self.assertRaises(CommandError) as cm:
cmd._check_manifest()
self.assertTrue(
'The manifest file refers to "noexist.pdf"' in str(cm.exception)
'The manifest file refers to "noexist.pdf"' in str(cm.exception),
)

View File

@ -1,5 +1,4 @@
from django.test import TestCase
from documents import index
from documents.models import Document
from documents.tests.utils import DirectoriesMixin
@ -9,7 +8,9 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
def test_auto_complete(self):
doc1 = Document.objects.create(
title="doc1", checksum="A", content="test test2 test3"
title="doc1",
checksum="A",
content="test test2 test3",
)
doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2")
doc3 = Document.objects.create(title="doc3", checksum="C", content="test2")
@ -21,10 +22,12 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
ix = index.open_index()
self.assertListEqual(
index.autocomplete(ix, "tes"), [b"test3", b"test", b"test2"]
index.autocomplete(ix, "tes"),
[b"test3", b"test", b"test2"],
)
self.assertListEqual(
index.autocomplete(ix, "tes", limit=3), [b"test3", b"test", b"test2"]
index.autocomplete(ix, "tes", limit=3),
[b"test3", b"test", b"test2"],
)
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])

View File

@ -1,16 +1,14 @@
import hashlib
import tempfile
import filecmp
import hashlib
import os
import shutil
import tempfile
from pathlib import Path
from unittest import mock
from django.test import TestCase, override_settings
from django.core.management import call_command
from django.test import override_settings
from django.test import TestCase
from documents.file_handling import generate_filename
from documents.management.commands.document_archiver import handle_document
from documents.models import Document
@ -34,7 +32,8 @@ class TestArchiver(DirectoriesMixin, TestCase):
doc = self.make_models()
shutil.copy(
sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")
sample_file,
os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
)
call_command("document_archiver")
@ -43,7 +42,8 @@ class TestArchiver(DirectoriesMixin, TestCase):
doc = self.make_models()
shutil.copy(
sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")
sample_file,
os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
)
handle_document(doc.pk)
@ -90,7 +90,8 @@ class TestArchiver(DirectoriesMixin, TestCase):
)
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf"))
shutil.copy(
sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf")
sample_file,
os.path.join(self.dirs.originals_dir, f"document_01.pdf"),
)
handle_document(doc2.pk)
@ -120,7 +121,9 @@ class TestDecryptDocuments(TestCase):
os.makedirs(thumb_dir, exist_ok=True)
override_settings(
ORIGINALS_DIR=originals_dir, THUMBNAIL_DIR=thumb_dir, PASSPHRASE="test"
ORIGINALS_DIR=originals_dir,
THUMBNAIL_DIR=thumb_dir,
PASSPHRASE="test",
).enable()
doc = Document.objects.create(
@ -206,7 +209,7 @@ class TestRenamer(DirectoriesMixin, TestCase):
class TestCreateClassifier(TestCase):
@mock.patch(
"documents.management.commands.document_create_classifier.train_classifier"
"documents.management.commands.document_create_classifier.train_classifier",
)
def test_create_classifier(self, m):
call_command("document_create_classifier")
@ -224,7 +227,10 @@ class TestSanityChecker(DirectoriesMixin, TestCase):
def test_errors(self):
doc = Document.objects.create(
title="test", content="test", filename="test.pdf", checksum="abc"
title="test",
content="test",
filename="test.pdf",
checksum="abc",
)
Path(doc.source_path).touch()
Path(doc.thumbnail_path).touch()

View File

@ -6,12 +6,13 @@ from time import sleep
from unittest import mock
from django.conf import settings
from django.core.management import call_command, CommandError
from django.test import override_settings, TransactionTestCase
from documents.models import Tag
from django.core.management import call_command
from django.core.management import CommandError
from django.test import override_settings
from django.test import TransactionTestCase
from documents.consumer import ConsumerError
from documents.management.commands import document_consumer
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin
@ -41,7 +42,7 @@ class ConsumerMixin:
super(ConsumerMixin, self).setUp()
self.t = None
patcher = mock.patch(
"documents.management.commands.document_consumer.async_task"
"documents.management.commands.document_consumer.async_task",
)
self.task_mock = patcher.start()
self.addCleanup(patcher.stop)
@ -208,13 +209,16 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.t_start()
shutil.copy(
self.sample_file, os.path.join(self.dirs.consumption_dir, ".DS_STORE")
self.sample_file,
os.path.join(self.dirs.consumption_dir, ".DS_STORE"),
)
shutil.copy(
self.sample_file, os.path.join(self.dirs.consumption_dir, "my_file.pdf")
self.sample_file,
os.path.join(self.dirs.consumption_dir, "my_file.pdf"),
)
shutil.copy(
self.sample_file, os.path.join(self.dirs.consumption_dir, "._my_file.pdf")
self.sample_file,
os.path.join(self.dirs.consumption_dir, "._my_file.pdf"),
)
shutil.copy(
self.sample_file,
@ -258,7 +262,9 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
@override_settings(
CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=3, CONSUMER_POLLING_RETRY_COUNT=20
CONSUMER_POLLING=1,
CONSUMER_POLLING_DELAY=3,
CONSUMER_POLLING_RETRY_COUNT=20,
)
class TestConsumerPolling(TestConsumer):
# just do all the tests with polling
@ -319,7 +325,9 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.assertCountEqual(kwargs["override_tag_ids"], tag_ids)
@override_settings(
CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20
CONSUMER_POLLING=1,
CONSUMER_POLLING_DELAY=1,
CONSUMER_POLLING_RETRY_COUNT=20,
)
def test_consume_file_with_path_tags_polling(self):
self.test_consume_file_with_path_tags()

View File

@ -7,13 +7,17 @@ from pathlib import Path
from unittest import mock
from django.core.management import call_command
from django.test import TestCase, override_settings
from django.test import override_settings
from django.test import TestCase
from documents.management.commands import document_exporter
from documents.models import Document, Tag, DocumentType, Correspondent
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import Tag
from documents.sanity_checker import check_sanity
from documents.settings import EXPORTER_FILE_NAME
from documents.tests.utils import DirectoriesMixin, paperless_environment
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import paperless_environment
class TestExportImport(DirectoriesMixin, TestCase):
@ -66,8 +70,9 @@ class TestExportImport(DirectoriesMixin, TestCase):
def _get_document_from_manifest(self, manifest, id):
f = list(
filter(
lambda d: d["model"] == "documents.document" and d["pk"] == id, manifest
)
lambda d: d["model"] == "documents.document" and d["pk"] == id,
manifest,
),
)
if len(f) == 1:
return f[0]
@ -76,7 +81,10 @@ class TestExportImport(DirectoriesMixin, TestCase):
@override_settings(PASSPHRASE="test")
def _do_export(
self, use_filename_format=False, compare_checksums=False, delete=False
self,
use_filename_format=False,
compare_checksums=False,
delete=False,
):
args = ["document_exporter", self.target]
if use_filename_format:
@ -104,7 +112,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.assertEqual(len(manifest), 8)
self.assertEqual(
len(list(filter(lambda e: e["model"] == "documents.document", manifest))), 4
len(list(filter(lambda e: e["model"] == "documents.document", manifest))),
4,
)
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
@ -129,7 +138,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
for element in manifest:
if element["model"] == "documents.document":
fname = os.path.join(
self.target, element[document_exporter.EXPORTER_FILE_NAME]
self.target,
element[document_exporter.EXPORTER_FILE_NAME],
)
self.assertTrue(os.path.exists(fname))
self.assertTrue(
@ -137,8 +147,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
os.path.join(
self.target,
element[document_exporter.EXPORTER_THUMBNAIL_NAME],
)
)
),
),
)
with open(fname, "rb") as f:
@ -146,12 +156,14 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.assertEqual(checksum, element["fields"]["checksum"])
self.assertEqual(
element["fields"]["storage_type"], Document.STORAGE_TYPE_UNENCRYPTED
element["fields"]["storage_type"],
Document.STORAGE_TYPE_UNENCRYPTED,
)
if document_exporter.EXPORTER_ARCHIVE_NAME in element:
fname = os.path.join(
self.target, element[document_exporter.EXPORTER_ARCHIVE_NAME]
self.target,
element[document_exporter.EXPORTER_ARCHIVE_NAME],
)
self.assertTrue(os.path.exists(fname))
@ -188,7 +200,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
)
with override_settings(
PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}"
PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}",
):
self.test_exporter(use_filename_format=True)
@ -205,7 +217,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2"
"documents.management.commands.document_exporter.shutil.copy2",
) as m:
self._do_export()
m.assert_not_called()
@ -216,7 +228,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
Path(self.d1.source_path).touch()
with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2"
"documents.management.commands.document_exporter.shutil.copy2",
) as m:
self._do_export()
self.assertEqual(m.call_count, 1)
@ -239,7 +251,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2"
"documents.management.commands.document_exporter.shutil.copy2",
) as m:
self._do_export()
m.assert_not_called()
@ -250,7 +262,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.d2.save()
with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2"
"documents.management.commands.document_exporter.shutil.copy2",
) as m:
self._do_export(compare_checksums=True)
self.assertEqual(m.call_count, 1)
@ -270,26 +282,29 @@ class TestExportImport(DirectoriesMixin, TestCase):
doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
self.assertTrue(
os.path.isfile(
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
)
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]),
),
)
self.d3.delete()
manifest = self._do_export()
self.assertRaises(
ValueError, self._get_document_from_manifest, manifest, self.d3.id
ValueError,
self._get_document_from_manifest,
manifest,
self.d3.id,
)
self.assertTrue(
os.path.isfile(
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
)
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]),
),
)
manifest = self._do_export(delete=True)
self.assertFalse(
os.path.isfile(
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
)
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]),
),
)
self.assertTrue(len(manifest), 6)
@ -316,7 +331,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf")))
self.assertTrue(
os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf"))
os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")),
)
def test_export_missing_files(self):

View File

@ -1,35 +1,50 @@
from django.core.management import call_command
from django.test import TestCase
from documents.models import Document, Tag, Correspondent, DocumentType
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin
class TestRetagger(DirectoriesMixin, TestCase):
def make_models(self):
self.d1 = Document.objects.create(
checksum="A", title="A", content="first document"
checksum="A",
title="A",
content="first document",
)
self.d2 = Document.objects.create(
checksum="B", title="B", content="second document"
checksum="B",
title="B",
content="second document",
)
self.d3 = Document.objects.create(
checksum="C", title="C", content="unrelated document"
checksum="C",
title="C",
content="unrelated document",
)
self.d4 = Document.objects.create(
checksum="D", title="D", content="auto document"
checksum="D",
title="D",
content="auto document",
)
self.tag_first = Tag.objects.create(
name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY
name="tag1",
match="first",
matching_algorithm=Tag.MATCH_ANY,
)
self.tag_second = Tag.objects.create(
name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY
name="tag2",
match="second",
matching_algorithm=Tag.MATCH_ANY,
)
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
self.tag_no_match = Tag.objects.create(name="test2")
self.tag_auto = Tag.objects.create(
name="tagauto", matching_algorithm=Tag.MATCH_AUTO
name="tagauto",
matching_algorithm=Tag.MATCH_AUTO,
)
self.d3.tags.add(self.tag_inbox)
@ -37,17 +52,25 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.d4.tags.add(self.tag_auto)
self.correspondent_first = Correspondent.objects.create(
name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY
name="c1",
match="first",
matching_algorithm=Correspondent.MATCH_ANY,
)
self.correspondent_second = Correspondent.objects.create(
name="c2", match="second", matching_algorithm=Correspondent.MATCH_ANY
name="c2",
match="second",
matching_algorithm=Correspondent.MATCH_ANY,
)
self.doctype_first = DocumentType.objects.create(
name="dt1", match="first", matching_algorithm=DocumentType.MATCH_ANY
name="dt1",
match="first",
matching_algorithm=DocumentType.MATCH_ANY,
)
self.doctype_second = DocumentType.objects.create(
name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY
name="dt2",
match="second",
matching_algorithm=DocumentType.MATCH_ANY,
)
def get_updated_docs(self):
@ -98,10 +121,12 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
self.assertCountEqual(
[tag.id for tag in d_first.tags.all()], [self.tag_first.id]
[tag.id for tag in d_first.tags.all()],
[self.tag_first.id],
)
self.assertCountEqual(
[tag.id for tag in d_second.tags.all()], [self.tag_second.id]
[tag.id for tag in d_second.tags.all()],
[self.tag_second.id],
)
self.assertCountEqual(
[tag.id for tag in d_unrelated.tags.all()],
@ -133,7 +158,10 @@ class TestRetagger(DirectoriesMixin, TestCase):
def test_add_tags_suggest_url(self):
call_command(
"document_retagger", "--tags", "--suggest", "--base-url=http://localhost"
"document_retagger",
"--tags",
"--suggest",
"--base-url=http://localhost",
)
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

View File

@ -5,9 +5,11 @@ from unittest import mock
from django.contrib.auth.models import User
from django.core.management import call_command
from django.test import TestCase
from documents.management.commands.document_thumbnails import _process_document
from documents.models import Document, Tag, Correspondent, DocumentType
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin

View File

@ -4,9 +4,11 @@ from unittest import mock
from django.core.management import call_command
from django.test import TestCase
from documents.management.commands.document_thumbnails import _process_document
from documents.models import Document, Tag, Correspondent, DocumentType
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin

View File

@ -4,10 +4,14 @@ from random import randint
from django.contrib.admin.models import LogEntry
from django.contrib.auth.models import User
from django.test import TestCase, override_settings
from django.test import override_settings
from django.test import TestCase
from .. import matching
from ..models import Correspondent, Document, Tag, DocumentType
from ..models import Correspondent
from ..models import Document
from ..models import DocumentType
from ..models import Tag
from ..signals import document_consumption_finished
@ -209,7 +213,8 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
TestCase.setUp(self)
User.objects.create_user(username="test_consumer", password="12345")
self.doc_contains = Document.objects.create(
content="I contain the keyword.", mime_type="application/pdf"
content="I contain the keyword.",
mime_type="application/pdf",
)
self.index_dir = tempfile.mkdtemp()
@ -221,43 +226,56 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
def test_tag_applied_any(self):
t1 = Tag.objects.create(
name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY
name="test",
match="keyword",
matching_algorithm=Tag.MATCH_ANY,
)
document_consumption_finished.send(
sender=self.__class__, document=self.doc_contains
sender=self.__class__,
document=self.doc_contains,
)
self.assertTrue(list(self.doc_contains.tags.all()) == [t1])
def test_tag_not_applied(self):
Tag.objects.create(
name="test", match="no-match", matching_algorithm=Tag.MATCH_ANY
name="test",
match="no-match",
matching_algorithm=Tag.MATCH_ANY,
)
document_consumption_finished.send(
sender=self.__class__, document=self.doc_contains
sender=self.__class__,
document=self.doc_contains,
)
self.assertTrue(list(self.doc_contains.tags.all()) == [])
def test_correspondent_applied(self):
correspondent = Correspondent.objects.create(
name="test", match="keyword", matching_algorithm=Correspondent.MATCH_ANY
name="test",
match="keyword",
matching_algorithm=Correspondent.MATCH_ANY,
)
document_consumption_finished.send(
sender=self.__class__, document=self.doc_contains
sender=self.__class__,
document=self.doc_contains,
)
self.assertTrue(self.doc_contains.correspondent == correspondent)
def test_correspondent_not_applied(self):
Tag.objects.create(
name="test", match="no-match", matching_algorithm=Correspondent.MATCH_ANY
name="test",
match="no-match",
matching_algorithm=Correspondent.MATCH_ANY,
)
document_consumption_finished.send(
sender=self.__class__, document=self.doc_contains
sender=self.__class__,
document=self.doc_contains,
)
self.assertEqual(self.doc_contains.correspondent, None)
def test_logentry_created(self):
document_consumption_finished.send(
sender=self.__class__, document=self.doc_contains
sender=self.__class__,
document=self.doc_contains,
)
self.assertEqual(LogEntry.objects.count(), 1)

View File

@ -6,9 +6,9 @@ from unittest import mock
from django.conf import settings
from django.test import override_settings
from documents.parsers import ParseError
from documents.tests.utils import DirectoriesMixin, TestMigrations
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import TestMigrations
STORAGE_TYPE_GPG = "gpg"
@ -93,10 +93,18 @@ def make_test_document(
simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg")
simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
simple_pdf2 = os.path.join(
os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf"
os.path.dirname(__file__),
"samples",
"documents",
"originals",
"0000002.pdf",
)
simple_pdf3 = os.path.join(
os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf"
os.path.dirname(__file__),
"samples",
"documents",
"originals",
"0000003.pdf",
)
simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt")
simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png")
@ -121,19 +129,43 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
simple_pdf,
)
self.no_text = make_test_document(
Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf
Document,
"no-text",
"image/png",
simple_png2,
"no-text.png",
simple_pdf,
)
self.doc_no_archive = make_test_document(
Document, "no_archive", "text/plain", simple_txt, "no_archive.txt"
Document,
"no_archive",
"text/plain",
simple_txt,
"no_archive.txt",
)
self.clash1 = make_test_document(
Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf
Document,
"clash",
"application/pdf",
simple_pdf,
"clash.pdf",
simple_pdf,
)
self.clash2 = make_test_document(
Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf
Document,
"clash",
"image/jpeg",
simple_jpg,
"clash.jpg",
simple_pdf,
)
self.clash3 = make_test_document(
Document, "clash", "image/png", simple_png, "clash.png", simple_pdf
Document,
"clash",
"image/png",
simple_png,
"clash.png",
simple_pdf,
)
self.clash4 = make_test_document(
Document,
@ -147,7 +179,8 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2))
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3))
self.assertNotEqual(
archive_path_old(self.clash1), archive_path_old(self.clash4)
archive_path_old(self.clash1),
archive_path_old(self.clash4),
)
def testArchiveFilesMigrated(self):
@ -171,19 +204,23 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
self.assertEqual(archive_checksum, doc.archive_checksum)
self.assertEqual(
Document.objects.filter(archive_checksum__isnull=False).count(), 6
Document.objects.filter(archive_checksum__isnull=False).count(),
6,
)
def test_filenames(self):
Document = self.apps.get_model("documents", "Document")
self.assertEqual(
Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf"
Document.objects.get(id=self.unrelated.id).archive_filename,
"unrelated.pdf",
)
self.assertEqual(
Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf"
Document.objects.get(id=self.no_text.id).archive_filename,
"no-text.pdf",
)
self.assertEqual(
Document.objects.get(id=self.doc_no_archive.id).archive_filename, None
Document.objects.get(id=self.doc_no_archive.id).archive_filename,
None,
)
self.assertEqual(
Document.objects.get(id=self.clash1.id).archive_filename,
@ -198,7 +235,8 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
f"{self.clash3.id:07}.pdf",
)
self.assertEqual(
Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf"
Document.objects.get(id=self.clash4.id).archive_filename,
"clash.png.pdf",
)
@ -207,16 +245,20 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
def test_filenames(self):
Document = self.apps.get_model("documents", "Document")
self.assertEqual(
Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf"
Document.objects.get(id=self.unrelated.id).archive_filename,
"unrelated.pdf",
)
self.assertEqual(
Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf"
Document.objects.get(id=self.no_text.id).archive_filename,
"no-text.pdf",
)
self.assertEqual(
Document.objects.get(id=self.doc_no_archive.id).archive_filename, None
Document.objects.get(id=self.doc_no_archive.id).archive_filename,
None,
)
self.assertEqual(
Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf"
Document.objects.get(id=self.clash1.id).archive_filename,
"none/clash.pdf",
)
self.assertEqual(
Document.objects.get(id=self.clash2.id).archive_filename,
@ -227,7 +269,8 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
"none/clash_02.pdf",
)
self.assertEqual(
Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf"
Document.objects.get(id=self.clash4.id).archive_filename,
"clash.png.pdf",
)
@ -248,12 +291,19 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
Document = self.apps.get_model("documents", "Document")
doc = make_test_document(
Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf
Document,
"clash",
"application/pdf",
simple_pdf,
"clash.pdf",
simple_pdf,
)
os.unlink(archive_path_old(doc))
self.assertRaisesMessage(
ValueError, "does not exist at: ", self.performMigration
ValueError,
"does not exist at: ",
self.performMigration,
)
def test_parser_missing(self):
@ -277,7 +327,9 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
)
self.assertRaisesMessage(
ValueError, "no parsers are available", self.performMigration
ValueError,
"no parsers are available",
self.performMigration,
)
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
@ -286,7 +338,12 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
Document = self.apps.get_model("documents", "Document")
doc1 = make_test_document(
Document, "document", "image/png", simple_png, "document.png", simple_pdf
Document,
"document",
"image/png",
simple_png,
"document.png",
simple_pdf,
)
doc2 = make_test_document(
Document,
@ -311,8 +368,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
filter(
lambda log: "Parse error, will try again in 5 seconds" in log,
capture.output,
)
)
),
),
),
4,
)
@ -324,8 +381,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
lambda log: "Unable to regenerate archive document for ID:"
in log,
capture.output,
)
)
),
),
),
2,
)
@ -347,7 +404,12 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
Document = self.apps.get_model("documents", "Document")
doc1 = make_test_document(
Document, "document", "image/png", simple_png, "document.png", simple_pdf
Document,
"document",
"image/png",
simple_png,
"document.png",
simple_pdf,
)
doc2 = make_test_document(
Document,
@ -368,8 +430,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
lambda log: "Parser did not return an archive document for document"
in log,
capture.output,
)
)
),
),
),
2,
)
@ -405,7 +467,11 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
"unrelated.pdf",
)
doc_no_archive = make_test_document(
Document, "no_archive", "text/plain", simple_txt, "no_archive.txt"
Document,
"no_archive",
"text/plain",
simple_txt,
"no_archive.txt",
)
clashB = make_test_document(
Document,
@ -434,13 +500,14 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
self.assertEqual(archive_checksum, doc.archive_checksum)
self.assertEqual(
Document.objects.filter(archive_checksum__isnull=False).count(), 2
Document.objects.filter(archive_checksum__isnull=False).count(),
2,
)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
class TestMigrateArchiveFilesBackwardsWithFilenameFormat(
TestMigrateArchiveFilesBackwards
TestMigrateArchiveFilesBackwards,
):
pass
@ -505,5 +572,7 @@ class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations):
)
self.assertRaisesMessage(
ValueError, "file already exists.", self.performMigration
ValueError,
"file already exists.",
self.performMigration,
)

View File

@ -3,9 +3,9 @@ import shutil
from django.conf import settings
from django.test import override_settings
from documents.parsers import get_default_file_extension
from documents.tests.utils import DirectoriesMixin, TestMigrations
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import TestMigrations
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
STORAGE_TYPE_GPG = "gpg"
@ -46,7 +46,9 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
def setUpBeforeMigration(self, apps):
Document = apps.get_model("documents", "Document")
doc = Document.objects.create(
title="test", file_type="pdf", filename="file1.pdf"
title="test",
file_type="pdf",
filename="file1.pdf",
)
self.doc_id = doc.id
shutil.copy(
@ -55,7 +57,9 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
)
doc2 = Document.objects.create(
checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG
checksum="B",
file_type="pdf",
storage_type=STORAGE_TYPE_GPG,
)
self.doc2_id = doc2.id
shutil.copy(
@ -88,7 +92,9 @@ class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations):
def setUpBeforeMigration(self, apps):
Document = apps.get_model("documents", "Document")
doc = Document.objects.create(
title="test", mime_type="application/pdf", filename="file1.pdf"
title="test",
mime_type="application/pdf",
filename="file1.pdf",
)
self.doc_id = doc.id
shutil.copy(

View File

@ -1,4 +1,5 @@
from documents.tests.utils import DirectoriesMixin, TestMigrations
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import TestMigrations
class TestMigrateNullCharacters(DirectoriesMixin, TestMigrations):

View File

@ -1,4 +1,5 @@
from documents.tests.utils import DirectoriesMixin, TestMigrations
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import TestMigrations
class TestMigrateTagColor(DirectoriesMixin, TestMigrations):

View File

@ -1,7 +1,9 @@
from django.test import TestCase
from .factories import DocumentFactory, CorrespondentFactory
from ..models import Document, Correspondent
from ..models import Correspondent
from ..models import Document
from .factories import CorrespondentFactory
from .factories import DocumentFactory
class CorrespondentTestCase(TestCase):

View File

@ -4,16 +4,14 @@ import tempfile
from tempfile import TemporaryDirectory
from unittest import mock
from django.test import TestCase, override_settings
from documents.parsers import (
get_parser_class,
get_supported_file_extensions,
get_default_file_extension,
get_parser_class_for_mime_type,
DocumentParser,
is_file_ext_supported,
)
from django.test import override_settings
from django.test import TestCase
from documents.parsers import DocumentParser
from documents.parsers import get_default_file_extension
from documents.parsers import get_parser_class
from documents.parsers import get_parser_class_for_mime_type
from documents.parsers import get_supported_file_extensions
from documents.parsers import is_file_ext_supported
from paperless_tesseract.parsers import RasterisedDocumentParser
from paperless_text.parsers import TextDocumentParser

View File

@ -6,9 +6,9 @@ from pathlib import Path
import filelock
from django.conf import settings
from django.test import TestCase
from documents.models import Document
from documents.sanity_checker import check_sanity, SanityCheckMessages
from documents.sanity_checker import check_sanity
from documents.sanity_checker import SanityCheckMessages
from documents.tests.utils import DirectoriesMixin
@ -23,7 +23,8 @@ class TestSanityCheckMessages(TestCase):
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.INFO)
self.assertEqual(
capture.records[0].message, "Sanity checker detected no issues."
capture.records[0].message,
"Sanity checker detected no issues.",
)
def test_info(self):

View File

@ -2,8 +2,8 @@ import logging
from unittest import mock
from django.test import TestCase
from paperless.settings import default_task_workers, default_threads_per_worker
from paperless.settings import default_task_workers
from paperless.settings import default_threads_per_worker
class TestSettings(TestCase):
@ -21,7 +21,7 @@ class TestSettings(TestCase):
def test_workers_threads(self):
for i in range(1, 64):
with mock.patch(
"paperless.settings.multiprocessing.cpu_count"
"paperless.settings.multiprocessing.cpu_count",
) as cpu_count:
cpu_count.return_value = i

View File

@ -4,10 +4,13 @@ from unittest import mock
from django.conf import settings
from django.test import TestCase
from django.utils import timezone
from documents import tasks
from documents.models import Document, Tag, Correspondent, DocumentType
from documents.sanity_checker import SanityCheckMessages, SanityCheckFailedException
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import Tag
from documents.sanity_checker import SanityCheckFailedException
from documents.sanity_checker import SanityCheckMessages
from documents.tests.utils import DirectoriesMixin
@ -106,7 +109,8 @@ class TestTasks(DirectoriesMixin, TestCase):
messages.warning("Some warning")
m.return_value = messages
self.assertEqual(
tasks.sanity_check(), "Sanity check exited with warnings. See log."
tasks.sanity_check(),
"Sanity check exited with warnings. See log.",
)
m.assert_called_once()
@ -116,7 +120,8 @@ class TestTasks(DirectoriesMixin, TestCase):
messages.info("Some info")
m.return_value = messages
self.assertEqual(
tasks.sanity_check(), "Sanity check exited with infos. See log."
tasks.sanity_check(),
"Sanity check exited with infos. See log.",
)
m.assert_called_once()

View File

@ -25,7 +25,7 @@ class TestViews(TestCase):
]:
if language_given:
self.client.cookies.load(
{settings.LANGUAGE_COOKIE_NAME: language_given}
{settings.LANGUAGE_COOKIE_NAME: language_given},
)
elif settings.LANGUAGE_COOKIE_NAME in self.client.cookies.keys():
self.client.cookies.pop(settings.LANGUAGE_COOKIE_NAME)
@ -51,5 +51,6 @@ class TestViews(TestCase):
f"frontend/{language_actual}/polyfills.js",
)
self.assertEqual(
response.context_data["main_js"], f"frontend/{language_actual}/main.js"
response.context_data["main_js"],
f"frontend/{language_actual}/main.js",
)

View File

@ -7,7 +7,8 @@ from contextlib import contextmanager
from django.apps import apps
from django.db import connection
from django.db.migrations.executor import MigrationExecutor
from django.test import override_settings, TransactionTestCase
from django.test import override_settings
from django.test import TransactionTestCase
def setup_directories():
@ -97,7 +98,7 @@ class TestMigrations(TransactionTestCase):
assert (
self.migrate_from and self.migrate_to
), "TestCase '{}' must define migrate_from and migrate_to properties".format(
type(self).__name__
type(self).__name__,
)
self.migrate_from = [(self.app, self.migrate_from)]
self.migrate_to = [(self.app, self.migrate_to)]

View File

@ -5,63 +5,70 @@ import uuid
import zipfile
from datetime import datetime
from time import mktime
from urllib.parse import quote_plus
from unicodedata import normalize
from urllib.parse import quote_plus
from django.conf import settings
from django.db.models import Count, Max, Case, When, IntegerField
from django.db.models import Case
from django.db.models import Count
from django.db.models import IntegerField
from django.db.models import Max
from django.db.models import When
from django.db.models.functions import Lower
from django.http import HttpResponse, HttpResponseBadRequest, Http404
from django.http import Http404
from django.http import HttpResponse
from django.http import HttpResponseBadRequest
from django.utils.translation import get_language
from django.views.decorators.cache import cache_control
from django.views.generic import TemplateView
from django_filters.rest_framework import DjangoFilterBackend
from django_q.tasks import async_task
from paperless.db import GnuPG
from paperless.views import StandardPagination
from rest_framework import parsers
from rest_framework.decorators import action
from rest_framework.exceptions import NotFound
from rest_framework.filters import OrderingFilter, SearchFilter
from rest_framework.filters import OrderingFilter
from rest_framework.filters import SearchFilter
from rest_framework.generics import GenericAPIView
from rest_framework.mixins import (
DestroyModelMixin,
ListModelMixin,
RetrieveModelMixin,
UpdateModelMixin,
)
from rest_framework.mixins import DestroyModelMixin
from rest_framework.mixins import ListModelMixin
from rest_framework.mixins import RetrieveModelMixin
from rest_framework.mixins import UpdateModelMixin
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from rest_framework.views import APIView
from rest_framework.viewsets import GenericViewSet, ModelViewSet, ViewSet
from rest_framework.viewsets import GenericViewSet
from rest_framework.viewsets import ModelViewSet
from rest_framework.viewsets import ViewSet
from paperless.db import GnuPG
from paperless.views import StandardPagination
from .bulk_download import (
OriginalAndArchiveStrategy,
OriginalsOnlyStrategy,
ArchiveOnlyStrategy,
)
from .bulk_download import ArchiveOnlyStrategy
from .bulk_download import OriginalAndArchiveStrategy
from .bulk_download import OriginalsOnlyStrategy
from .classifier import load_classifier
from .filters import (
CorrespondentFilterSet,
DocumentFilterSet,
TagFilterSet,
DocumentTypeFilterSet,
)
from .matching import match_correspondents, match_tags, match_document_types
from .models import Correspondent, Document, Tag, DocumentType, SavedView
from .filters import CorrespondentFilterSet
from .filters import DocumentFilterSet
from .filters import DocumentTypeFilterSet
from .filters import TagFilterSet
from .matching import match_correspondents
from .matching import match_document_types
from .matching import match_tags
from .models import Correspondent
from .models import Document
from .models import DocumentType
from .models import SavedView
from .models import Tag
from .parsers import get_parser_class_for_mime_type
from .serialisers import (
CorrespondentSerializer,
DocumentSerializer,
TagSerializerVersion1,
TagSerializer,
DocumentTypeSerializer,
PostDocumentSerializer,
SavedViewSerializer,
BulkEditSerializer,
DocumentListSerializer,
BulkDownloadSerializer,
)
from .serialisers import BulkDownloadSerializer
from .serialisers import BulkEditSerializer
from .serialisers import CorrespondentSerializer
from .serialisers import DocumentListSerializer
from .serialisers import DocumentSerializer
from .serialisers import DocumentTypeSerializer
from .serialisers import PostDocumentSerializer
from .serialisers import SavedViewSerializer
from .serialisers import TagSerializer
from .serialisers import TagSerializerVersion1
logger = logging.getLogger("paperless.api")
@ -89,16 +96,14 @@ class IndexView(TemplateView):
context["full_name"] = self.request.user.get_full_name()
context["styles_css"] = f"frontend/{self.get_language()}/styles.css"
context["runtime_js"] = f"frontend/{self.get_language()}/runtime.js"
context[
"polyfills_js"
] = f"frontend/{self.get_language()}/polyfills.js" # NOQA: E501
context["polyfills_js"] = f"frontend/{self.get_language()}/polyfills.js"
context["main_js"] = f"frontend/{self.get_language()}/main.js"
context[
"webmanifest"
] = f"frontend/{self.get_language()}/manifest.webmanifest" # NOQA: E501
] = f"frontend/{self.get_language()}/manifest.webmanifest" # noqa: E501
context[
"apple_touch_icon"
] = f"frontend/{self.get_language()}/apple-touch-icon.png" # NOQA: E501
] = f"frontend/{self.get_language()}/apple-touch-icon.png" # noqa: E501
return context
@ -106,7 +111,8 @@ class CorrespondentViewSet(ModelViewSet):
model = Correspondent
queryset = Correspondent.objects.annotate(
document_count=Count("documents"), last_correspondence=Max("documents__created")
document_count=Count("documents"),
last_correspondence=Max("documents__created"),
).order_by(Lower("name"))
serializer_class = CorrespondentSerializer
@ -127,7 +133,7 @@ class TagViewSet(ModelViewSet):
model = Tag
queryset = Tag.objects.annotate(document_count=Count("documents")).order_by(
Lower("name")
Lower("name"),
)
def get_serializer_class(self):
@ -147,7 +153,7 @@ class DocumentTypeViewSet(ModelViewSet):
model = DocumentType
queryset = DocumentType.objects.annotate(
document_count=Count("documents")
document_count=Count("documents"),
).order_by(Lower("name"))
serializer_class = DocumentTypeSerializer
@ -220,9 +226,7 @@ class DocumentViewSet(
def file_response(self, pk, request, disposition):
doc = Document.objects.get(id=pk)
if (
not self.original_requested(request) and doc.has_archive_version
): # NOQA: E501
if not self.original_requested(request) and doc.has_archive_version:
file_handle = doc.archive_file
filename = doc.get_public_filename(archive=True)
mime_type = "application/pdf"
@ -258,7 +262,7 @@ class DocumentViewSet(
try:
return parser.extract_metadata(file, mime_type)
except Exception as e:
except Exception:
# TODO: cover GPG errors, remove later.
return []
else:
@ -291,7 +295,8 @@ class DocumentViewSet(
if doc.has_archive_version:
meta["archive_size"] = self.get_filesize(doc.archive_path)
meta["archive_metadata"] = self.get_metadata(
doc.archive_path, "application/pdf"
doc.archive_path,
"application/pdf",
)
else:
meta["archive_size"] = None
@ -315,7 +320,7 @@ class DocumentViewSet(
"document_types": [
dt.id for dt in match_document_types(doc, classifier)
],
}
},
)
@action(methods=["get"], detail=True)
@ -357,7 +362,7 @@ class SearchResultSerializer(DocumentSerializer):
"score": instance.score,
"highlights": instance.highlights("content", text=doc.content)
if doc
else None, # NOQA: E501
else None,
"rank": instance.rank,
}
@ -500,7 +505,9 @@ class PostDocumentView(GenericAPIView):
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
with tempfile.NamedTemporaryFile(
prefix="paperless-upload-", dir=settings.SCRATCH_DIR, delete=False
prefix="paperless-upload-",
dir=settings.SCRATCH_DIR,
delete=False,
) as f:
f.write(doc_data)
os.utime(f.name, times=(t, t))
@ -537,20 +544,20 @@ class SelectionDataView(GenericAPIView):
correspondents = Correspondent.objects.annotate(
document_count=Count(
Case(When(documents__id__in=ids, then=1), output_field=IntegerField())
)
Case(When(documents__id__in=ids, then=1), output_field=IntegerField()),
),
)
tags = Tag.objects.annotate(
document_count=Count(
Case(When(documents__id__in=ids, then=1), output_field=IntegerField())
)
Case(When(documents__id__in=ids, then=1), output_field=IntegerField()),
),
)
types = DocumentType.objects.annotate(
document_count=Count(
Case(When(documents__id__in=ids, then=1), output_field=IntegerField())
)
Case(When(documents__id__in=ids, then=1), output_field=IntegerField()),
),
)
r = Response(
@ -565,7 +572,7 @@ class SelectionDataView(GenericAPIView):
"selected_document_types": [
{"id": t.id, "document_count": t.document_count} for t in types
],
}
},
)
return r
@ -612,7 +619,7 @@ class StatisticsView(APIView):
{
"documents_total": documents_total,
"documents_inbox": documents_inbox,
}
},
)
@ -632,7 +639,9 @@ class BulkDownloadView(GenericAPIView):
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
temp = tempfile.NamedTemporaryFile(
dir=settings.SCRATCH_DIR, suffix="-compressed-archive", delete=False
dir=settings.SCRATCH_DIR,
suffix="-compressed-archive",
delete=False,
)
if content == "both":
@ -651,7 +660,8 @@ class BulkDownloadView(GenericAPIView):
with open(temp.name, "rb") as f:
response = HttpResponse(f, content_type="application/zip")
response["Content-Disposition"] = '{}; filename="{}"'.format(
"attachment", "documents.zip"
"attachment",
"documents.zip",
)
return response

View File

@ -1 +1,4 @@
from .checks import paths_check, binaries_check
from .checks import binaries_check
from .checks import paths_check
__all__ = ["binaries_check", "paths_check"]

View File

@ -9,14 +9,14 @@ from django.core.asgi import get_asgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
django_asgi_app = get_asgi_application()
from channels.auth import AuthMiddlewareStack # NOQA: E402
from channels.routing import ProtocolTypeRouter, URLRouter # NOQA: E402
from channels.auth import AuthMiddlewareStack # noqa: E402
from channels.routing import ProtocolTypeRouter, URLRouter # noqa: E402
from paperless.urls import websocket_urlpatterns # NOQA: E402
from paperless.urls import websocket_urlpatterns # noqa: E402
application = ProtocolTypeRouter(
{
"http": get_asgi_application(),
"websocket": AuthMiddlewareStack(URLRouter(websocket_urlpatterns)),
}
},
)

View File

@ -1,9 +1,9 @@
from django.conf import settings
from django.contrib import auth
from django.contrib.auth.middleware import RemoteUserMiddleware
from django.contrib.auth.models import User
from django.utils.deprecation import MiddlewareMixin
from rest_framework import authentication
from django.contrib.auth.middleware import RemoteUserMiddleware
class AutoLoginMiddleware(MiddlewareMixin):
@ -25,7 +25,7 @@ class AngularApiAuthenticationOverride(authentication.BaseAuthentication):
settings.DEBUG
and "Referer" in request.headers
and request.headers["Referer"].startswith("http://localhost:4200/")
): # NOQA: E501
):
user = User.objects.filter(is_staff=True).first()
print("Auto-Login with user {}".format(user))
return (user, None)

View File

@ -3,7 +3,9 @@ import shutil
import stat
from django.conf import settings
from django.core.checks import Error, Warning, register
from django.core.checks import Error
from django.core.checks import register
from django.core.checks import Warning
exists_message = "{} is set but doesn't exist."
exists_hint = "Create a directory at {}"
@ -19,11 +21,12 @@ def path_check(var, directory):
if directory:
if not os.path.isdir(directory):
messages.append(
Error(exists_message.format(var), exists_hint.format(directory))
Error(exists_message.format(var), exists_hint.format(directory)),
)
else:
test_file = os.path.join(
directory, f"__paperless_write_test_{os.getpid()}__"
directory,
f"__paperless_write_test_{os.getpid()}__",
)
try:
with open(test_file, "w"):
@ -34,9 +37,9 @@ def path_check(var, directory):
writeable_message.format(var),
writeable_hint.format(
f"\n{stat.filemode(os.stat(directory).st_mode)} "
f"{directory}\n"
f"{directory}\n",
),
)
),
)
finally:
if os.path.isfile(test_file):
@ -88,8 +91,8 @@ def debug_mode_check(app_configs, **kwargs):
"security issue, since it puts security overides in place which "
"are meant to be only used during development. This "
"also means that paperless will tell anyone various "
"debugging information when something goes wrong."
)
"debugging information when something goes wrong.",
),
]
else:
return []

View File

@ -1,7 +1,8 @@
import json
from asgiref.sync import async_to_sync
from channels.exceptions import DenyConnection, AcceptConnection
from channels.exceptions import AcceptConnection
from channels.exceptions import DenyConnection
from channels.generic.websocket import WebsocketConsumer
@ -14,13 +15,15 @@ class StatusConsumer(WebsocketConsumer):
raise DenyConnection()
else:
async_to_sync(self.channel_layer.group_add)(
"status_updates", self.channel_name
"status_updates",
self.channel_name,
)
raise AcceptConnection()
def disconnect(self, close_code):
async_to_sync(self.channel_layer.group_discard)(
"status_updates", self.channel_name
"status_updates",
self.channel_name,
)
def status_update(self, event):

View File

@ -1,5 +1,4 @@
import gnupg
from django.conf import settings

View File

@ -1,5 +1,4 @@
from django.conf import settings
from paperless import version

View File

@ -5,9 +5,8 @@ import os
import re
from concurrent_log_handler.queue import setup_logging_queues
from dotenv import load_dotenv
from django.utils.translation import gettext_lazy as _
from dotenv import load_dotenv
# Tap paperless.conf if it's available
if os.path.exists("../paperless.conf"):
@ -68,7 +67,8 @@ MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")
LOGGING_DIR = os.getenv("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log"))
CONSUMPTION_DIR = os.getenv(
"PAPERLESS_CONSUMPTION_DIR", os.path.join(BASE_DIR, "..", "consume")
"PAPERLESS_CONSUMPTION_DIR",
os.path.join(BASE_DIR, "..", "consume"),
)
# This will be created if it doesn't exist
@ -119,7 +119,7 @@ REST_FRAMEWORK = {
if DEBUG:
REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append(
"paperless.auth.AngularApiAuthenticationOverride"
"paperless.auth.AngularApiAuthenticationOverride",
)
MIDDLEWARE = [
@ -191,7 +191,8 @@ if AUTO_LOGIN_USERNAME:
ENABLE_HTTP_REMOTE_USER = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
HTTP_REMOTE_USER_HEADER_NAME = os.getenv(
"PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME", "HTTP_REMOTE_USER"
"PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME",
"HTTP_REMOTE_USER",
)
if ENABLE_HTTP_REMOTE_USER:
@ -201,7 +202,7 @@ if ENABLE_HTTP_REMOTE_USER:
"django.contrib.auth.backends.ModelBackend",
]
REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append(
"rest_framework.authentication.RemoteUserAuthentication"
"rest_framework.authentication.RemoteUserAuthentication",
)
# X-Frame options for embedded PDF display:
@ -212,7 +213,7 @@ else:
# We allow CORS from localhost:8080
CORS_ALLOWED_ORIGINS = tuple(
os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(",")
os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(","),
)
if DEBUG:
@ -223,7 +224,8 @@ if DEBUG:
# Paperless on a closed network. However, if you're putting this anywhere
# public, you should change the key to something unique and verbose.
SECRET_KEY = os.getenv(
"PAPERLESS_SECRET_KEY", "e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee"
"PAPERLESS_SECRET_KEY",
"e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee",
)
_allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS")
@ -268,7 +270,7 @@ DATABASES = {
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": os.path.join(DATA_DIR, "db.sqlite3"),
}
},
}
if os.getenv("PAPERLESS_DBHOST"):
@ -423,7 +425,8 @@ def default_threads_per_worker(task_workers):
THREADS_PER_WORKER = os.getenv(
"PAPERLESS_THREADS_PER_WORKER", default_threads_per_worker(TASK_WORKERS)
"PAPERLESS_THREADS_PER_WORKER",
default_threads_per_worker(TASK_WORKERS),
)
###############################################################################
@ -435,7 +438,7 @@ CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
CONSUMER_POLLING_DELAY = int(os.getenv("PAPERLESS_CONSUMER_POLLING_DELAY", 5))
CONSUMER_POLLING_RETRY_COUNT = int(
os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5)
os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5),
)
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
@ -448,8 +451,8 @@ CONSUMER_IGNORE_PATTERNS = list(
os.getenv(
"PAPERLESS_CONSUMER_IGNORE_PATTERNS",
'[".DS_STORE/*", "._*", ".stfolder/*"]',
)
)
),
),
)
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
@ -479,7 +482,7 @@ OCR_DESKEW = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
OCR_ROTATE_PAGES = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
OCR_ROTATE_PAGES_THRESHOLD = float(
os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0)
os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0),
)
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}")
@ -536,7 +539,8 @@ THUMBNAIL_FONT_NAME = os.getenv(
PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv(
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT", "http://localhost:3000"
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
"http://localhost:3000",
)
if PAPERLESS_TIKA_ENABLED:

View File

@ -1,10 +1,11 @@
import os
import shutil
from django.test import TestCase, override_settings
from django.test import override_settings
from django.test import TestCase
from documents.tests.utils import DirectoriesMixin
from paperless import binaries_check, paths_check
from paperless import binaries_check
from paperless import paths_check
from paperless.checks import debug_mode_check
@ -20,7 +21,9 @@ class TestChecks(DirectoriesMixin, TestCase):
self.assertEqual(paths_check(None), [])
@override_settings(
MEDIA_ROOT="uuh", DATA_DIR="whatever", CONSUMPTION_DIR="idontcare"
MEDIA_ROOT="uuh",
DATA_DIR="whatever",
CONSUMPTION_DIR="idontcare",
)
def test_paths_check_dont_exist(self):
msgs = paths_check(None)

View File

@ -2,8 +2,8 @@ from unittest import mock
from channels.layers import get_channel_layer
from channels.testing import WebsocketCommunicator
from django.test import TestCase, override_settings
from django.test import override_settings
from django.test import TestCase
from paperless.asgi import application
@ -46,7 +46,8 @@ class TestWebSockets(TestCase):
channel_layer = get_channel_layer()
await channel_layer.group_send(
"status_updates", {"type": "status_update", "data": message}
"status_updates",
{"type": "status_update", "data": message},
)
response = await communicator.receive_json_from()

View File

@ -1,34 +1,30 @@
from django.conf import settings
from django.conf.urls import include
from django.contrib import admin
from django.contrib.auth.decorators import login_required
from django.urls import path, re_path
from django.urls import path
from django.urls import re_path
from django.utils.translation import gettext_lazy as _
from django.views.decorators.csrf import csrf_exempt
from django.views.generic import RedirectView
from documents.views import BulkDownloadView
from documents.views import BulkEditView
from documents.views import CorrespondentViewSet
from documents.views import DocumentTypeViewSet
from documents.views import IndexView
from documents.views import LogViewSet
from documents.views import PostDocumentView
from documents.views import SavedViewViewSet
from documents.views import SearchAutoCompleteView
from documents.views import SelectionDataView
from documents.views import StatisticsView
from documents.views import TagViewSet
from documents.views import UnifiedSearchViewSet
from paperless.consumers import StatusConsumer
from paperless.views import FaviconView
from rest_framework.authtoken import views
from rest_framework.routers import DefaultRouter
from django.utils.translation import gettext_lazy as _
from django.conf import settings
from paperless.consumers import StatusConsumer
from documents.views import (
CorrespondentViewSet,
UnifiedSearchViewSet,
LogViewSet,
TagViewSet,
DocumentTypeViewSet,
IndexView,
SearchAutoCompleteView,
StatisticsView,
PostDocumentView,
SavedViewViewSet,
BulkEditView,
SelectionDataView,
BulkDownloadView,
)
from paperless.views import FaviconView
api_router = DefaultRouter()
api_router.register(r"correspondents", CorrespondentViewSet)
api_router.register(r"document_types", DocumentTypeViewSet)
@ -62,7 +58,9 @@ urlpatterns = [
name="post_document",
),
re_path(
r"^documents/bulk_edit/", BulkEditView.as_view(), name="bulk_edit"
r"^documents/bulk_edit/",
BulkEditView.as_view(),
name="bulk_edit",
),
re_path(
r"^documents/selection_data/",
@ -76,7 +74,7 @@ urlpatterns = [
),
path("token/", views.obtain_auth_token),
]
+ api_router.urls
+ api_router.urls,
),
),
re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
@ -88,35 +86,37 @@ urlpatterns = [
re_path(
r"^doc/(?P<pk>\d+)$",
RedirectView.as_view(
url=settings.BASE_URL + "api/documents/%(pk)s/download/"
url=settings.BASE_URL + "api/documents/%(pk)s/download/",
),
),
re_path(
r"^thumb/(?P<pk>\d+)$",
RedirectView.as_view(
url=settings.BASE_URL + "api/documents/%(pk)s/thumb/"
url=settings.BASE_URL + "api/documents/%(pk)s/thumb/",
),
),
re_path(
r"^preview/(?P<pk>\d+)$",
RedirectView.as_view(
url=settings.BASE_URL + "api/documents/%(pk)s/preview/"
url=settings.BASE_URL + "api/documents/%(pk)s/preview/",
),
),
]
],
),
),
re_path(
r"^push$",
csrf_exempt(
RedirectView.as_view(url=settings.BASE_URL + "api/documents/post_document/")
RedirectView.as_view(
url=settings.BASE_URL + "api/documents/post_document/",
),
),
),
# Frontend assets TODO: this is pretty bad, but it works.
path(
"assets/<path:path>",
RedirectView.as_view(
url=settings.STATIC_URL + "frontend/en-US/assets/%(path)s"
url=settings.STATIC_URL + "frontend/en-US/assets/%(path)s",
),
),
# TODO: with localization, this is even worse! :/

View File

@ -14,7 +14,11 @@ class StandardPagination(PageNumberPagination):
class FaviconView(View):
def get(self, request, *args, **kwargs):
favicon = os.path.join(
os.path.dirname(__file__), "static", "paperless", "img", "favicon.ico"
os.path.dirname(__file__),
"static",
"paperless",
"img",
"favicon.ico",
)
with open(favicon, "rb") as f:
return HttpResponse(f, content_type="image/x-icon")

View File

@ -1,6 +1,7 @@
import os
from uvicorn.workers import UvicornWorker
from django.conf import settings
from uvicorn.workers import UvicornWorker
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")

View File

@ -6,7 +6,6 @@ It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application

View File

@ -1,8 +1,8 @@
from django.contrib import admin
from django import forms
from paperless_mail.models import MailAccount, MailRule
from django.contrib import admin
from django.utils.translation import gettext_lazy as _
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
class MailAccountAdminForm(forms.ModelForm):
@ -48,7 +48,7 @@ class MailRuleAdmin(admin.ModelAdmin):
{
"description": _(
"Paperless will only process mails that match ALL of the "
"filters given below."
"filters given below.",
),
"fields": (
"filter_from",
@ -66,7 +66,7 @@ class MailRuleAdmin(admin.ModelAdmin):
"description": _(
"The action applied to the mail. This action is only "
"performed when documents were consumed from the mail. "
"Mails without attachments will remain entirely untouched."
"Mails without attachments will remain entirely untouched.",
),
"fields": ("action", "action_parameter"),
},
@ -78,7 +78,7 @@ class MailRuleAdmin(admin.ModelAdmin):
"Assign metadata to documents consumed from this rule "
"automatically. If you do not assign tags, types or "
"correspondents here, paperless will still process all "
"matching rules that you have defined."
"matching rules that you have defined.",
),
"fields": (
"assign_title_from",

View File

@ -1,5 +1,4 @@
from django.apps import AppConfig
from django.utils.translation import gettext_lazy as _

View File

@ -1,6 +1,7 @@
import os
import tempfile
from datetime import timedelta, date
from datetime import date
from datetime import timedelta
from fnmatch import fnmatch
import magic
@ -8,18 +9,16 @@ import pathvalidate
from django.conf import settings
from django.db import DatabaseError
from django_q.tasks import async_task
from imap_tools import (
MailBox,
MailBoxUnencrypted,
AND,
MailMessageFlags,
MailboxFolderSelectError,
)
from documents.loggers import LoggingMixin
from documents.models import Correspondent
from documents.parsers import is_mime_type_supported
from paperless_mail.models import MailAccount, MailRule
from imap_tools import AND
from imap_tools import MailBox
from imap_tools import MailboxFolderSelectError
from imap_tools import MailBoxUnencrypted
from imap_tools import MailMessageFlags
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
class MailError(Exception):
@ -120,8 +119,8 @@ class MailAccountHandler(LoggingMixin):
else:
raise NotImplementedError(
"Unknown title selector."
) # pragma: nocover # NOQA: E501
"Unknown title selector.",
) # pragma: nocover
def get_correspondent(self, message, rule):
c_from = rule.assign_correspondent_from
@ -137,7 +136,7 @@ class MailAccountHandler(LoggingMixin):
message.from_values
and "name" in message.from_values
and message.from_values["name"]
): # NOQA: E501
):
return self._correspondent_from_name(message.from_values["name"])
else:
return self._correspondent_from_name(message.from_)
@ -147,8 +146,8 @@ class MailAccountHandler(LoggingMixin):
else:
raise NotImplementedError(
"Unknwown correspondent selector"
) # pragma: nocover # NOQA: E501
"Unknwown correspondent selector",
) # pragma: nocover
def handle_mail_account(self, account):
@ -159,7 +158,9 @@ class MailAccountHandler(LoggingMixin):
total_processed_files = 0
with get_mailbox(
account.imap_server, account.imap_port, account.imap_security
account.imap_server,
account.imap_port,
account.imap_security,
) as M:
try:
@ -193,7 +194,7 @@ class MailAccountHandler(LoggingMixin):
except MailboxFolderSelectError:
raise MailError(
f"Rule {rule}: Folder {rule.folder} "
f"does not exist in account {rule.account}"
f"does not exist in account {rule.account}",
)
criterias = make_criterias(rule)
@ -242,12 +243,14 @@ class MailAccountHandler(LoggingMixin):
try:
get_rule_action(rule).post_consume(
M, post_consume_messages, rule.action_parameter
M,
post_consume_messages,
rule.action_parameter,
)
except Exception as e:
raise MailError(
f"Rule {rule}: Error while processing post-consume actions: " f"{e}"
f"Rule {rule}: Error while processing post-consume actions: " f"{e}",
)
return total_processed_files
@ -274,7 +277,7 @@ class MailAccountHandler(LoggingMixin):
if (
not att.content_disposition == "attachment"
and rule.attachment_type == MailRule.ATTACHMENT_TYPE_ATTACHMENTS_ONLY
): # NOQA: E501
):
self.log(
"debug",
f"Rule {rule}: "
@ -297,7 +300,8 @@ class MailAccountHandler(LoggingMixin):
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
_, temp_filename = tempfile.mkstemp(
prefix="paperless-mail-", dir=settings.SCRATCH_DIR
prefix="paperless-mail-",
dir=settings.SCRATCH_DIR,
)
with open(temp_filename, "wb") as f:
f.write(att.payload)
@ -313,15 +317,13 @@ class MailAccountHandler(LoggingMixin):
"documents.tasks.consume_file",
path=temp_filename,
override_filename=pathvalidate.sanitize_filename(
att.filename
), # NOQA: E501
att.filename,
),
override_title=title,
override_correspondent_id=correspondent.id
if correspondent
else None, # NOQA: E501
override_document_type_id=doc_type.id
if doc_type
else None, # NOQA: E501
else None,
override_document_type_id=doc_type.id if doc_type else None,
override_tag_ids=[tag.id] if tag else None,
task_name=att.filename[:100],
)

View File

@ -1,5 +1,4 @@
from django.core.management.base import BaseCommand
from paperless_mail import tasks
@ -7,7 +6,8 @@ class Command(BaseCommand):
help = """
""".replace(
" ", ""
" ",
"",
)
def handle(self, *args, **options):

View File

@ -1,7 +1,5 @@
from django.db import models
import documents.models as document_models
from django.db import models
from django.utils.translation import gettext_lazy as _
@ -30,12 +28,14 @@ class MailAccount(models.Model):
null=True,
help_text=_(
"This is usually 143 for unencrypted and STARTTLS "
"connections, and 993 for SSL connections."
"connections, and 993 for SSL connections.",
),
)
imap_security = models.PositiveIntegerField(
_("IMAP security"), choices=IMAP_SECURITY_OPTIONS, default=IMAP_SECURITY_SSL
_("IMAP security"),
choices=IMAP_SECURITY_OPTIONS,
default=IMAP_SECURITY_SSL,
)
username = models.CharField(_("username"), max_length=256)
@ -48,7 +48,7 @@ class MailAccount(models.Model):
default="UTF-8",
help_text=_(
"The character set to use when communicating with the "
"mail server, such as 'UTF-8' or 'US-ASCII'."
"mail server, such as 'UTF-8' or 'US-ASCII'.",
),
)
@ -123,13 +123,22 @@ class MailRule(models.Model):
)
filter_from = models.CharField(
_("filter from"), max_length=256, null=True, blank=True
_("filter from"),
max_length=256,
null=True,
blank=True,
)
filter_subject = models.CharField(
_("filter subject"), max_length=256, null=True, blank=True
_("filter subject"),
max_length=256,
null=True,
blank=True,
)
filter_body = models.CharField(
_("filter body"), max_length=256, null=True, blank=True
_("filter body"),
max_length=256,
null=True,
blank=True,
)
filter_attachment_filename = models.CharField(
@ -140,12 +149,14 @@ class MailRule(models.Model):
help_text=_(
"Only consume documents which entirely match this "
"filename if specified. Wildcards such as *.pdf or "
"*invoice* are allowed. Case insensitive."
"*invoice* are allowed. Case insensitive.",
),
)
maximum_age = models.PositiveIntegerField(
_("maximum age"), default=30, help_text=_("Specified in days.")
_("maximum age"),
default=30,
help_text=_("Specified in days."),
)
attachment_type = models.PositiveIntegerField(
@ -154,7 +165,7 @@ class MailRule(models.Model):
default=ATTACHMENT_TYPE_ATTACHMENTS_ONLY,
help_text=_(
"Inline attachments include embedded images, so it's best "
"to combine this option with a filename filter."
"to combine this option with a filename filter.",
),
)
@ -173,12 +184,14 @@ class MailRule(models.Model):
"Additional parameter for the action selected above, "
"i.e., "
"the target folder of the move to folder action. "
"Subfolders must be separated by dots."
"Subfolders must be separated by dots.",
),
)
assign_title_from = models.PositiveIntegerField(
_("assign title from"), choices=TITLE_SELECTOR, default=TITLE_FROM_SUBJECT
_("assign title from"),
choices=TITLE_SELECTOR,
default=TITLE_FROM_SUBJECT,
)
assign_tag = models.ForeignKey(

View File

@ -1,6 +1,7 @@
import logging
from paperless_mail.mail import MailAccountHandler, MailError
from paperless_mail.mail import MailAccountHandler
from paperless_mail.mail import MailError
from paperless_mail.models import MailAccount

View File

@ -7,13 +7,15 @@ from unittest import mock
from django.core.management import call_command
from django.db import DatabaseError
from django.test import TestCase
from imap_tools import MailMessageFlags, MailboxFolderSelectError
from documents.models import Correspondent
from documents.tests.utils import DirectoriesMixin
from imap_tools import MailboxFolderSelectError
from imap_tools import MailMessageFlags
from paperless_mail import tasks
from paperless_mail.mail import MailError, MailAccountHandler
from paperless_mail.models import MailRule, MailAccount
from paperless_mail.mail import MailAccountHandler
from paperless_mail.mail import MailError
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
class BogusFolderManager:
@ -83,7 +85,7 @@ class BogusMailBox(ContextManager):
def move(self, uid_list, folder):
if folder == "spam":
self.messages_spam.append(
filter(lambda m: m.uid in uid_list, self.messages)
filter(lambda m: m.uid in uid_list, self.messages),
)
self.messages = list(filter(lambda m: m.uid not in uid_list, self.messages))
else:
@ -115,7 +117,9 @@ def create_message(
def create_attachment(
filename="the_file.pdf", content_disposition="attachment", payload=b"a PDF document"
filename="the_file.pdf",
content_disposition="attachment",
payload=b"a PDF document",
):
attachment = namedtuple("Attachment", [])
attachment.filename = filename
@ -163,7 +167,7 @@ class TestMail(DirectoriesMixin, TestCase):
body="cables",
seen=True,
flagged=False,
)
),
)
self.bogus_mailbox.messages.append(
create_message(
@ -171,14 +175,14 @@ class TestMail(DirectoriesMixin, TestCase):
body="from my favorite electronic store",
seen=False,
flagged=True,
)
),
)
self.bogus_mailbox.messages.append(
create_message(
subject="Claim your $10M price now!",
from_="amazon@amazon-some-indian-site.org",
seen=False,
)
),
)
def test_get_correspondent(self):
@ -196,12 +200,14 @@ class TestMail(DirectoriesMixin, TestCase):
handler = MailAccountHandler()
rule = MailRule(
name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING
name="a",
assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING,
)
self.assertIsNone(handler.get_correspondent(message, rule))
rule = MailRule(
name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL
name="b",
assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL,
)
c = handler.get_correspondent(message, rule)
self.assertIsNotNone(c)
@ -212,7 +218,8 @@ class TestMail(DirectoriesMixin, TestCase):
self.assertEqual(c.id, me_localhost.id)
rule = MailRule(
name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME
name="c",
assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME,
)
c = handler.get_correspondent(message, rule)
self.assertIsNotNone(c)
@ -244,7 +251,9 @@ class TestMail(DirectoriesMixin, TestCase):
def test_handle_message(self):
message = create_message(
subject="the message title", from_="Myself", num_attachments=2
subject="the message title",
from_="Myself",
num_attachments=2,
)
account = MailAccount()
@ -376,11 +385,16 @@ class TestMail(DirectoriesMixin, TestCase):
def test_handle_mail_account_mark_read(self):
account = MailAccount.objects.create(
name="test", imap_server="", username="admin", password="secret"
name="test",
imap_server="",
username="admin",
password="secret",
)
rule = MailRule.objects.create(
name="testrule", account=account, action=MailRule.ACTION_MARK_READ
name="testrule",
account=account,
action=MailRule.ACTION_MARK_READ,
)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
@ -394,7 +408,10 @@ class TestMail(DirectoriesMixin, TestCase):
def test_handle_mail_account_delete(self):
account = MailAccount.objects.create(
name="test", imap_server="", username="admin", password="secret"
name="test",
imap_server="",
username="admin",
password="secret",
)
rule = MailRule.objects.create(
@ -412,7 +429,10 @@ class TestMail(DirectoriesMixin, TestCase):
def test_handle_mail_account_flag(self):
account = MailAccount.objects.create(
name="test", imap_server="", username="admin", password="secret"
name="test",
imap_server="",
username="admin",
password="secret",
)
rule = MailRule.objects.create(
@ -432,7 +452,10 @@ class TestMail(DirectoriesMixin, TestCase):
def test_handle_mail_account_move(self):
account = MailAccount.objects.create(
name="test", imap_server="", username="admin", password="secret"
name="test",
imap_server="",
username="admin",
password="secret",
)
rule = MailRule.objects.create(
@ -453,7 +476,10 @@ class TestMail(DirectoriesMixin, TestCase):
def test_error_login(self):
account = MailAccount.objects.create(
name="test", imap_server="", username="admin", password="wrong"
name="test",
imap_server="",
username="admin",
password="wrong",
)
try:
@ -465,11 +491,17 @@ class TestMail(DirectoriesMixin, TestCase):
def test_error_skip_account(self):
account_faulty = MailAccount.objects.create(
name="test", imap_server="", username="admin", password="wroasdng"
name="test",
imap_server="",
username="admin",
password="wroasdng",
)
account = MailAccount.objects.create(
name="test2", imap_server="", username="admin", password="secret"
name="test2",
imap_server="",
username="admin",
password="secret",
)
rule = MailRule.objects.create(
name="testrule",
@ -487,7 +519,10 @@ class TestMail(DirectoriesMixin, TestCase):
def test_error_skip_rule(self):
account = MailAccount.objects.create(
name="test2", imap_server="", username="admin", password="secret"
name="test2",
imap_server="",
username="admin",
password="secret",
)
rule = MailRule.objects.create(
name="testrule",
@ -523,7 +558,10 @@ class TestMail(DirectoriesMixin, TestCase):
m.side_effect = get_correspondent_fake
account = MailAccount.objects.create(
name="test2", imap_server="", username="admin", password="secret"
name="test2",
imap_server="",
username="admin",
password="secret",
)
rule = MailRule.objects.create(
name="testrule",
@ -544,7 +582,10 @@ class TestMail(DirectoriesMixin, TestCase):
def test_error_create_correspondent(self):
account = MailAccount.objects.create(
name="test2", imap_server="", username="admin", password="secret"
name="test2",
imap_server="",
username="admin",
password="secret",
)
rule = MailRule.objects.create(
name="testrule",
@ -579,7 +620,10 @@ class TestMail(DirectoriesMixin, TestCase):
def test_filters(self):
account = MailAccount.objects.create(
name="test3", imap_server="", username="admin", password="secret"
name="test3",
imap_server="",
username="admin",
password="secret",
)
rule = MailRule.objects.create(
name="testrule3",
@ -629,7 +673,7 @@ class TestMail(DirectoriesMixin, TestCase):
class TestManagementCommand(TestCase):
@mock.patch(
"paperless_mail.management.commands.mail_fetcher.tasks.process_mail_accounts"
"paperless_mail.management.commands.mail_fetcher.tasks.process_mail_accounts",
)
def test_mail_fetcher(self, m):
@ -644,10 +688,16 @@ class TestTasks(TestCase):
m.side_effect = lambda account: 6
MailAccount.objects.create(
name="A", imap_server="A", username="A", password="A"
name="A",
imap_server="A",
username="A",
password="A",
)
MailAccount.objects.create(
name="B", imap_server="A", username="A", password="A"
name="B",
imap_server="A",
username="A",
password="A",
)
result = tasks.process_mail_accounts()
@ -663,7 +713,10 @@ class TestTasks(TestCase):
def test_single_accounts(self, m):
MailAccount.objects.create(
name="A", imap_server="A", username="A", password="A"
name="A",
imap_server="A",
username="A",
password="A",
)
tasks.process_mail_account("A")

View File

@ -1,2 +1,5 @@
# this is here so that django finds the checks.
from .checks import *
from .checks import check_default_language_available
from .checks import get_tesseract_langs
__all__ = ["get_tesseract_langs", "check_default_language_available"]

View File

@ -1,5 +1,4 @@
from django.apps import AppConfig
from paperless_tesseract.signals import tesseract_consumer_declaration

View File

@ -1,7 +1,9 @@
import subprocess
from django.conf import settings
from django.core.checks import Error, Warning, register
from django.core.checks import Error
from django.core.checks import register
from django.core.checks import Warning
def get_tesseract_langs():
@ -19,8 +21,8 @@ def check_default_language_available(app_configs, **kwargs):
return [
Warning(
"No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. "
"This means that tesseract will fallback to english."
)
"This means that tesseract will fallback to english.",
),
]
specified_langs = settings.OCR_LANGUAGE.split("+")
@ -31,8 +33,8 @@ def check_default_language_available(app_configs, **kwargs):
Error(
f"The selected ocr language {lang} is "
f"not installed. Paperless cannot OCR your documents "
f"without it. Please fix PAPERLESS_OCR_LANGUAGE."
)
f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
),
]
return []

View File

@ -2,10 +2,11 @@ import json
import os
import re
from PIL import Image
from django.conf import settings
from documents.parsers import DocumentParser, ParseError, make_thumbnail_from_pdf
from documents.parsers import DocumentParser
from documents.parsers import make_thumbnail_from_pdf
from documents.parsers import ParseError
from PIL import Image
class NoTextFoundException(Exception):
@ -42,7 +43,7 @@ class RasterisedDocumentParser(DocumentParser):
"prefix": meta.REVERSE_NS[m.group(1)],
"key": m.group(2),
"value": value,
}
},
)
except Exception as e:
self.log(
@ -53,7 +54,9 @@ class RasterisedDocumentParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type, file_name=None):
return make_thumbnail_from_pdf(
self.archive_path or document_path, self.tempdir, self.logging_group
self.archive_path or document_path,
self.tempdir,
self.logging_group,
)
def is_image(self, mime_type):
@ -110,7 +113,6 @@ class RasterisedDocumentParser(DocumentParser):
return None
from pdfminer.high_level import extract_text as pdfminer_extract_text
from pdfminer.pdftypes import PDFException
try:
stripped = post_process_text(pdfminer_extract_text(pdf_file))
@ -129,7 +131,12 @@ class RasterisedDocumentParser(DocumentParser):
return None
def construct_ocrmypdf_parameters(
self, input_file, mime_type, output_file, sidecar_file, safe_fallback=False
self,
input_file,
mime_type,
output_file,
sidecar_file,
safe_fallback=False,
):
ocrmypdf_args = {
"input_file": input_file,
@ -167,7 +174,7 @@ class RasterisedDocumentParser(DocumentParser):
ocrmypdf_args["rotate_pages"] = True
ocrmypdf_args[
"rotate_pages_threshold"
] = settings.OCR_ROTATE_PAGES_THRESHOLD # NOQA: E501
] = settings.OCR_ROTATE_PAGES_THRESHOLD
if settings.OCR_PAGES > 0:
ocrmypdf_args["pages"] = f"1-{settings.OCR_PAGES}"
@ -202,7 +209,7 @@ class RasterisedDocumentParser(DocumentParser):
raise ParseError(
f"Cannot produce archive PDF for image {input_file}, "
f"no DPI information is present in this image and "
f"OCR_IMAGE_DPI is not set."
f"OCR_IMAGE_DPI is not set.",
)
if settings.OCR_USER_ARGS and not safe_fallback:
@ -241,7 +248,10 @@ class RasterisedDocumentParser(DocumentParser):
sidecar_file = os.path.join(self.tempdir, "sidecar.txt")
args = self.construct_ocrmypdf_parameters(
document_path, mime_type, archive_path, sidecar_file
document_path,
mime_type,
archive_path,
sidecar_file,
)
try:
@ -289,7 +299,8 @@ class RasterisedDocumentParser(DocumentParser):
# is bigger and blurry due to --force-ocr.
self.text = self.extract_text(
sidecar_file_fallback, archive_path_fallback
sidecar_file_fallback,
archive_path_fallback,
)
except Exception as e:

View File

@ -1,8 +1,8 @@
from unittest import mock
from django.core.checks import ERROR
from django.test import TestCase, override_settings
from django.test import override_settings
from django.test import TestCase
from paperless_tesseract import check_default_language_available
@ -16,8 +16,8 @@ class TestChecks(TestCase):
self.assertEqual(len(msgs), 1)
self.assertTrue(
msgs[0].msg.startswith(
"No OCR language has been specified with PAPERLESS_OCR_LANGUAGE"
)
"No OCR language has been specified with PAPERLESS_OCR_LANGUAGE",
),
)
@override_settings(OCR_LANGUAGE="ita")

View File

@ -3,11 +3,13 @@ import uuid
from typing import ContextManager
from unittest import mock
from django.test import TestCase, override_settings
from documents.parsers import ParseError, run_convert
from django.test import override_settings
from django.test import TestCase
from documents.parsers import ParseError
from documents.parsers import run_convert
from documents.tests.utils import DirectoriesMixin
from paperless_tesseract.parsers import RasterisedDocumentParser, post_process_text
from paperless_tesseract.parsers import post_process_text
from paperless_tesseract.parsers import RasterisedDocumentParser
image_to_string_calls = []
@ -56,7 +58,9 @@ class TestParser(DirectoriesMixin, TestCase):
result,
actual_result,
"strip_exceess_whitespace({}) != '{}', but '{}'".format(
source, result, actual_result
source,
result,
actual_result,
),
)
@ -65,7 +69,8 @@ class TestParser(DirectoriesMixin, TestCase):
def test_get_text_from_pdf(self):
parser = RasterisedDocumentParser(uuid.uuid4())
text = parser.extract_text(
None, os.path.join(self.SAMPLE_FILES, "simple-digital.pdf")
None,
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
)
self.assertContainsStrings(text.strip(), ["This is a test document."])
@ -73,7 +78,8 @@ class TestParser(DirectoriesMixin, TestCase):
def test_thumbnail(self):
parser = RasterisedDocumentParser(uuid.uuid4())
thumb = parser.get_thumbnail(
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
"application/pdf",
)
self.assertTrue(os.path.isfile(thumb))
@ -89,14 +95,16 @@ class TestParser(DirectoriesMixin, TestCase):
parser = RasterisedDocumentParser(uuid.uuid4())
thumb = parser.get_thumbnail(
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
"application/pdf",
)
self.assertTrue(os.path.isfile(thumb))
def test_thumbnail_encrypted(self):
parser = RasterisedDocumentParser(uuid.uuid4())
thumb = parser.get_thumbnail(
os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "encrypted.pdf"),
"application/pdf",
)
self.assertTrue(os.path.isfile(thumb))
@ -113,7 +121,8 @@ class TestParser(DirectoriesMixin, TestCase):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
"application/pdf",
)
self.assertTrue(os.path.isfile(parser.archive_path))
@ -124,7 +133,8 @@ class TestParser(DirectoriesMixin, TestCase):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
"application/pdf",
)
self.assertTrue(os.path.isfile(parser.archive_path))
@ -139,7 +149,8 @@ class TestParser(DirectoriesMixin, TestCase):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
"application/pdf",
)
self.assertIsNone(parser.archive_path)
@ -168,7 +179,8 @@ class TestParser(DirectoriesMixin, TestCase):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "encrypted.pdf"),
"application/pdf",
)
self.assertIsNone(parser.archive_path)
@ -178,7 +190,8 @@ class TestParser(DirectoriesMixin, TestCase):
def test_with_form_error_notext(self):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
"application/pdf",
)
self.assertContainsStrings(
@ -191,7 +204,8 @@ class TestParser(DirectoriesMixin, TestCase):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
"application/pdf",
)
self.assertContainsStrings(
@ -221,7 +235,7 @@ class TestParser(DirectoriesMixin, TestCase):
parser = RasterisedDocumentParser(None)
dpi = parser.calculate_a4_dpi(
os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png")
os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"),
)
self.assertEqual(dpi, 62)
@ -233,7 +247,8 @@ class TestParser(DirectoriesMixin, TestCase):
def f():
parser.parse(
os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), "image/png"
os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"),
"image/png",
)
self.assertRaises(ParseError, f)
@ -247,68 +262,80 @@ class TestParser(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings(
parser.get_text().lower(), ["this is a test document."]
parser.get_text().lower(),
["this is a test document."],
)
def test_multi_page(self):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
"application/pdf",
)
self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings(
parser.get_text().lower(), ["page 1", "page 2", "page 3"]
parser.get_text().lower(),
["page 1", "page 2", "page 3"],
)
@override_settings(OCR_PAGES=2, OCR_MODE="skip")
def test_multi_page_pages_skip(self):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
"application/pdf",
)
self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings(
parser.get_text().lower(), ["page 1", "page 2", "page 3"]
parser.get_text().lower(),
["page 1", "page 2", "page 3"],
)
@override_settings(OCR_PAGES=2, OCR_MODE="redo")
def test_multi_page_pages_redo(self):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
"application/pdf",
)
self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings(
parser.get_text().lower(), ["page 1", "page 2", "page 3"]
parser.get_text().lower(),
["page 1", "page 2", "page 3"],
)
@override_settings(OCR_PAGES=2, OCR_MODE="force")
def test_multi_page_pages_force(self):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
"application/pdf",
)
self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings(
parser.get_text().lower(), ["page 1", "page 2", "page 3"]
parser.get_text().lower(),
["page 1", "page 2", "page 3"],
)
@override_settings(OOCR_MODE="skip")
def test_multi_page_analog_pages_skip(self):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
"application/pdf",
)
self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings(
parser.get_text().lower(), ["page 1", "page 2", "page 3"]
parser.get_text().lower(),
["page 1", "page 2", "page 3"],
)
@override_settings(OCR_PAGES=2, OCR_MODE="redo")
def test_multi_page_analog_pages_redo(self):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
"application/pdf",
)
self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"])
@ -318,7 +345,8 @@ class TestParser(DirectoriesMixin, TestCase):
def test_multi_page_analog_pages_force(self):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
"application/pdf",
)
self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings(parser.get_text().lower(), ["page 1"])
@ -329,29 +357,34 @@ class TestParser(DirectoriesMixin, TestCase):
def test_skip_noarchive_withtext(self):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
"application/pdf",
)
self.assertIsNone(parser.archive_path)
self.assertContainsStrings(
parser.get_text().lower(), ["page 1", "page 2", "page 3"]
parser.get_text().lower(),
["page 1", "page 2", "page 3"],
)
@override_settings(OCR_MODE="skip_noarchive")
def test_skip_noarchive_notext(self):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
"application/pdf",
)
self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings(
parser.get_text().lower(), ["page 1", "page 2", "page 3"]
parser.get_text().lower(),
["page 1", "page 2", "page 3"],
)
@override_settings(OCR_MODE="skip")
def test_multi_page_mixed(self):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
"application/pdf",
)
self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings(
@ -368,11 +401,13 @@ class TestParser(DirectoriesMixin, TestCase):
def test_multi_page_mixed_no_archive(self):
parser = RasterisedDocumentParser(None)
parser.parse(
os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), "application/pdf"
os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
"application/pdf",
)
self.assertIsNone(parser.archive_path)
self.assertContainsStrings(
parser.get_text().lower(), ["page 4", "page 5", "page 6"]
parser.get_text().lower(),
["page 4", "page 5", "page 6"],
)
@override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True)

View File

@ -1,5 +1,4 @@
from django.apps import AppConfig
from paperless_text.signals import text_consumer_declaration

View File

@ -1,9 +1,10 @@
import os
from PIL import ImageDraw, ImageFont, Image
from django.conf import settings
from documents.parsers import DocumentParser
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
class TextDocumentParser(DocumentParser):

View File

@ -1,7 +1,6 @@
import os
from django.test import TestCase
from documents.tests.utils import DirectoriesMixin
from paperless_text.parsers import TextDocumentParser
@ -13,7 +12,8 @@ class TestTextParser(DirectoriesMixin, TestCase):
# just make sure that it does not crash
f = parser.get_thumbnail(
os.path.join(os.path.dirname(__file__), "samples", "test.txt"), "text/plain"
os.path.join(os.path.dirname(__file__), "samples", "test.txt"),
"text/plain",
)
self.assertTrue(os.path.isfile(f))
@ -22,7 +22,8 @@ class TestTextParser(DirectoriesMixin, TestCase):
parser = TextDocumentParser(None)
parser.parse(
os.path.join(os.path.dirname(__file__), "samples", "test.txt"), "text/plain"
os.path.join(os.path.dirname(__file__), "samples", "test.txt"),
"text/plain",
)
self.assertEqual(parser.get_text(), "This is a test file.\n")

View File

@ -1,10 +1,11 @@
import os
import requests
import dateutil.parser
import requests
from django.conf import settings
from documents.parsers import DocumentParser, ParseError, make_thumbnail_from_pdf
from documents.parsers import DocumentParser
from documents.parsers import make_thumbnail_from_pdf
from documents.parsers import ParseError
from tika import parser
@ -20,7 +21,9 @@ class TikaDocumentParser(DocumentParser):
self.archive_path = self.convert_to_pdf(document_path, file_name)
return make_thumbnail_from_pdf(
self.archive_path, self.tempdir, self.logging_group
self.archive_path,
self.tempdir,
self.logging_group,
)
def extract_metadata(self, document_path, mime_type):
@ -53,7 +56,7 @@ class TikaDocumentParser(DocumentParser):
except Exception as err:
raise ParseError(
f"Could not parse {document_path} with tika server at "
f"{tika_server}: {err}"
f"{tika_server}: {err}",
)
self.text = parsed["content"].strip()
@ -74,22 +77,23 @@ class TikaDocumentParser(DocumentParser):
url = gotenberg_server + "/forms/libreoffice/convert"
self.log("info", f"Converting {document_path} to PDF as {pdf_path}")
files = {
"files": (
file_name or os.path.basename(document_path),
open(document_path, "rb"),
)
}
headers = {}
with open(document_path, "rb") as document_handle:
files = {
"files": (
file_name or os.path.basename(document_path),
document_handle,
),
}
headers = {}
try:
response = requests.post(url, files=files, headers=headers)
response.raise_for_status() # ensure we notice bad responses
except Exception as err:
raise ParseError(f"Error while converting document to PDF: {err}")
try:
response = requests.post(url, files=files, headers=headers)
response.raise_for_status() # ensure we notice bad responses
except Exception as err:
raise ParseError(f"Error while converting document to PDF: {err}")
file = open(pdf_path, "wb")
file.write(response.content)
file.close()
with open(pdf_path, "wb") as file:
file.write(response.content)
file.close()
return pdf_path

View File

@ -10,12 +10,12 @@ def tika_consumer_declaration(sender, **kwargs):
"weight": 10,
"mime_types": {
"application/msword": ".doc",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", # NOQA: E501
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", # noqa: E501
"application/vnd.ms-excel": ".xls",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", # NOQA: E501
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", # noqa: E501
"application/vnd.ms-powerpoint": ".ppt",
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", # NOQA: E501
"application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx", # NOQA: E501
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", # noqa: E501
"application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx", # noqa: E501
"application/vnd.oasis.opendocument.presentation": ".odp",
"application/vnd.oasis.opendocument.spreadsheet": ".ods",
"application/vnd.oasis.opendocument.text": ".odt",

View File

@ -4,9 +4,8 @@ from pathlib import Path
from unittest import mock
from django.test import TestCase
from requests import Response
from paperless_tika.parsers import TikaDocumentParser
from requests import Response
class TestTikaParser(TestCase):
@ -42,14 +41,15 @@ class TestTikaParser(TestCase):
@mock.patch("paperless_tika.parsers.parser.from_file")
def test_metadata(self, from_file):
from_file.return_value = {
"metadata": {"Creation-Date": "2020-11-21", "Some-key": "value"}
"metadata": {"Creation-Date": "2020-11-21", "Some-key": "value"},
}
file = os.path.join(self.parser.tempdir, "input.odt")
Path(file).touch()
metadata = self.parser.extract_metadata(
file, "application/vnd.oasis.opendocument.text"
file,
"application/vnd.oasis.opendocument.text",
)
self.assertTrue("Creation-Date" in [m["key"] for m in metadata])