Merge branch 'dev' into celery-tasks

2026-02-20 00:39:32 -06:00 · 2020-11-19 22:10:57 +01:00
parent 4253f4aca7 cbee56ae8c
commit 196faa8fdc
145 changed files with 5228 additions and 11538 deletions
--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -1,5 +1,4 @@
 from django.contrib import admin
-from django.contrib.auth.models import Group, User
 from django.utils.html import format_html, format_html_join
 from django.utils.safestring import mark_safe
 from whoosh.writing import AsyncWriter
@@ -32,7 +31,7 @@ class TagAdmin(admin.ModelAdmin):
    list_filter = ("colour", "matching_algorithm")
    list_editable = ("colour", "match", "matching_algorithm")

-    readonly_fields = ("slug",)
+    readonly_fields = ("slug", )


 class DocumentTypeAdmin(admin.ModelAdmin):
@@ -51,9 +50,17 @@ class DocumentTypeAdmin(admin.ModelAdmin):
 class DocumentAdmin(admin.ModelAdmin):

    search_fields = ("correspondent__name", "title", "content", "tags__name")
-    readonly_fields = ("added", "file_type", "storage_type",)
-    list_display = ("title", "created", "added", "correspondent",
-                    "tags_", "archive_serial_number", "document_type")
+    readonly_fields = ("added", "file_type", "storage_type", "filename")
+    list_display = (
+        "title",
+        "created",
+        "added",
+        "correspondent",
+        "tags_",
+        "archive_serial_number",
+        "document_type",
+        "filename"
+    )
    list_filter = (
        "document_type",
        "tags",
@@ -120,8 +127,3 @@ admin.site.register(Tag, TagAdmin)
 admin.site.register(DocumentType, DocumentTypeAdmin)
 admin.site.register(Document, DocumentAdmin)
 admin.site.register(Log, LogAdmin)
-
-
-# Unless we implement multi-user, these default registrations don't make sense.
-admin.site.unregister(Group)
-admin.site.unregister(User)
--- a/src/documents/apps.py
+++ b/src/documents/apps.py
@@ -1,5 +1,4 @@
 from django.apps import AppConfig
-from django.db.models.signals import post_delete


 class DocumentsConfig(AppConfig):
@@ -14,7 +13,6 @@ class DocumentsConfig(AppConfig):
            add_inbox_tags,
            run_pre_consume_script,
            run_post_consume_script,
-            cleanup_document_deletion,
            set_log_entry,
            set_correspondent,
            set_document_type,
@@ -33,6 +31,4 @@ class DocumentsConfig(AppConfig):
        document_consumption_finished.connect(add_to_index)
        document_consumption_finished.connect(run_post_consume_script)

-        post_delete.connect(cleanup_document_deletion)
-
        AppConfig.ready(self)
--- a/src/documents/checks.py
+++ b/src/documents/checks.py
@@ -4,6 +4,8 @@ from django.conf import settings
 from django.core.checks import Error, register
 from django.db.utils import OperationalError, ProgrammingError

+from documents.signals import document_consumer_declaration
+

@register()
 def changed_password_check(app_configs, **kwargs):
@@ -37,3 +39,17 @@ def changed_password_check(app_configs, **kwargs):
                """))]

    return []
+
+
+@register()
+def parser_check(app_configs, **kwargs):
+
+    parsers = []
+    for response in document_consumer_declaration.send(None):
+        parsers.append(response[1])
+
+    if len(parsers) == 0:
+        return [Error("No parsers found. This is a bug. The consumer won't be "
+                      "able to onsume any documents without parsers.")]
+    else:
+        return []
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -3,7 +3,6 @@ import logging
 import os
 import pickle
 import re
-import time

 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.neural_network import MLPClassifier
@@ -64,7 +63,7 @@ class DocumentClassifier(object):

    def save_classifier(self):
        with open(settings.MODEL_FILE, "wb") as f:
-            pickle.dump(self.FORMAT_VERSION, f) # Version
+            pickle.dump(self.FORMAT_VERSION, f)
            pickle.dump(self.data_hash, f)
            pickle.dump(self.data_vectorizer, f)

@@ -89,16 +88,14 @@ class DocumentClassifier(object):
            data.append(preprocessed_content)

            y = -1
-            if doc.document_type:
-                if doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO:
-                    y = doc.document_type.pk
+            if doc.document_type and doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO:
+                y = doc.document_type.pk
            m.update(y.to_bytes(4, 'little', signed=True))
            labels_document_type.append(y)

            y = -1
-            if doc.correspondent:
-                if doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO:
-                    y = doc.correspondent.pk
+            if doc.correspondent and doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO:
+                y = doc.correspondent.pk
            m.update(y.to_bytes(4, 'little', signed=True))
            labels_correspondent.append(y)

@@ -120,8 +117,8 @@ class DocumentClassifier(object):

        num_tags = len(labels_tags_unique)
        # substract 1 since -1 (null) is also part of the classes.
-        num_correspondents = len(labels_correspondent) - 1
-        num_document_types = len(labels_document_type) - 1
+        num_correspondents = len(set(labels_correspondent)) - 1
+        num_document_types = len(set(labels_document_type)) - 1

        logging.getLogger(__name__).debug(
            "{} documents, {} tag(s), {} correspondent(s), "
@@ -137,7 +134,7 @@ class DocumentClassifier(object):
        logging.getLogger(__name__).debug("Vectorizing data...")
        self.data_vectorizer = CountVectorizer(
            analyzer="word",
-            ngram_range=(1,2),
+            ngram_range=(1, 2),
            min_df=0.01
        )
        data_vectorized = self.data_vectorizer.fit_transform(data)
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -3,7 +3,6 @@ import hashlib
 import logging
 import os
 import re
-import uuid

 from asgiref.sync import async_to_sync
 from channels.layers import get_channel_layer
@@ -13,7 +12,9 @@ from django.utils import timezone

 from paperless.db import GnuPG
 from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
-from .models import Document, FileInfo
+from .file_handling import generate_filename, create_source_path_directory
+from .loggers import LoggingMixin
+from .models import Document, FileInfo, Correspondent, DocumentType, Tag
 from .parsers import ParseError, get_parser_class
 from .signals import (
    document_consumption_finished,
@@ -25,17 +26,10 @@ class ConsumerError(Exception):
    pass


-class Consumer:
-    """
-    Loop over every file found in CONSUMPTION_DIR and:
-      1. Convert it to a greyscale pnm
-      2. Use tesseract on the pnm
-      3. Store the document in the MEDIA_ROOT with optional encryption
-      4. Store the OCR'd text in the database
-      5. Delete the document and image(s)
-    """
+class Consumer(LoggingMixin):

-    def _send_progress(self, filename, current_progress, max_progress, status, message, document_id=None):
+    def _send_progress(self, filename, current_progress, max_progress, status,
+                       message, document_id=None):
        payload = {
            'filename': os.path.basename(filename),
            'current_progress': current_progress,
@@ -44,156 +38,226 @@ class Consumer:
            'message': message,
            'document_id': document_id
        }
-        async_to_sync(self.channel_layer.group_send)("status_updates", {'type': 'status_update', 'data': payload})
+        async_to_sync(self.channel_layer.group_send)("status_updates",
+                                                     {'type': 'status_update',
+                                                      'data': payload})

-    def __init__(self, consume=settings.CONSUMPTION_DIR,
-                 scratch=settings.SCRATCH_DIR):
-
-        self.logger = logging.getLogger(__name__)
-        self.logging_group = None
-
-        self.consume = consume
-        self.scratch = scratch
-
-        self.classifier = DocumentClassifier()
+    def __init__(self):
+        super().__init__()
+        self.path = None
+        self.filename = None
+        self.override_title = None
+        self.override_correspondent_id = None
+        self.override_tag_ids = None
+        self.override_document_type_id = None

        self.channel_layer = get_channel_layer()

-        os.makedirs(self.scratch, exist_ok=True)
+    def pre_check_file_exists(self):
+        if not os.path.isfile(self.path):
+            raise ConsumerError("Cannot consume {}: It is not a file".format(
+                self.path))

-        self.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        if settings.PASSPHRASE:
-            self.storage_type = Document.STORAGE_TYPE_GPG
-
-        if not self.consume:
+    def pre_check_consumption_dir(self):
+        if not settings.CONSUMPTION_DIR:
            raise ConsumerError(
                "The CONSUMPTION_DIR settings variable does not appear to be "
-                "set."
-            )
+                "set.")

-        if not os.path.exists(self.consume):
+        if not os.path.isdir(settings.CONSUMPTION_DIR):
            raise ConsumerError(
-                "Consumption directory {} does not exist".format(self.consume))
+                "Consumption directory {} does not exist".format(
+                    settings.CONSUMPTION_DIR))

-    def log(self, level, message):
-        getattr(self.logger, level)(message, extra={
-            "group": self.logging_group
-        })
+    def pre_check_regex(self):
+        if not re.match(FileInfo.REGEXES["title"], self.filename):
+            raise ConsumerError(
+                "Filename {} does not seem to be safe to "
+                "consume".format(self.filename))

-    @transaction.atomic
-    def try_consume_file(self, file):
-        """
-        Return True if file was consumed
-        """
-
-        self.logging_group = uuid.uuid4()
-
-        if not re.match(FileInfo.REGEXES["title"], file):
-            return False
-
-        doc = file
-
-        if self._is_duplicate(doc):
-            self.log(
-                "warning",
-                "Skipping {} as it appears to be a duplicate".format(doc)
+    def pre_check_duplicate(self):
+        with open(self.path, "rb") as f:
+            checksum = hashlib.md5(f.read()).hexdigest()
+        if Document.objects.filter(checksum=checksum).exists():
+            if settings.CONSUMER_DELETE_DUPLICATES:
+                os.unlink(self.path)
+            raise ConsumerError(
+                "Not consuming {}: It is a duplicate.".format(self.filename)
            )
-            return False

-        self.log("info", "Consuming {}".format(doc))
+    def pre_check_directories(self):
+        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
+        os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True)
+        os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)

+    def try_consume_file(self,
+                         path,
+                         override_filename=None,
+                         override_title=None,
+                         override_correspondent_id=None,
+                         override_document_type_id=None,
+                         override_tag_ids=None):
+        """
+        Return the document object if it was successfully created.
+        """

-        parser_class = get_parser_class(doc)
+        self.path = path
+        self.filename = override_filename or os.path.basename(path)
+        self.override_title = override_title
+        self.override_correspondent_id = override_correspondent_id
+        self.override_document_type_id = override_document_type_id
+        self.override_tag_ids = override_tag_ids
+
+        # this is for grouping logging entries for this particular file
+        # together.
+
+        self.renew_logging_group()
+
+        # Make sure that preconditions for consuming the file are met.
+
+        self.pre_check_file_exists()
+        self.pre_check_consumption_dir()
+        self.pre_check_directories()
+        self.pre_check_regex()
+        self.pre_check_duplicate()
+
+        self.log("info", "Consuming {}".format(self.filename))
+
+        # Determine the parser class.
+
+        parser_class = get_parser_class(self.filename)
        if not parser_class:
-            self.log(
-                "error", "No parsers could be found for {}".format(doc))
-            return False
+            raise ConsumerError("No parsers abvailable for {}".format(self.filename))
        else:
-            self.log("info", "Parser: {}".format(parser_class.__name__))
+            self.log("debug", "Parser: {}".format(parser_class.__name__))

-        self._send_progress(file, 0, 100, 'WORKING', 'Consumption started')
+        # Notify all listeners that we're going to do some work.
+
+        self._send_progress(self.filename, 0, 100, 'WORKING', 'Consumption started')

        document_consumption_started.send(
            sender=self.__class__,
-            filename=doc,
+            filename=self.path,
            logging_group=self.logging_group
        )

        def progress_callback(current_progress, max_progress, message):
            # recalculate progress to be within 20 and 80
            p = int((current_progress / max_progress) * 60 + 20)
-            self._send_progress(file, p, 100, "WORKING", message)
+            self._send_progress(self.filename, p, 100, "WORKING", message)

-        document_parser = parser_class(doc, self.logging_group, progress_callback)
+        # This doesn't parse the document yet, but gives us a parser.
+
+        document_parser = parser_class(self.path, self.logging_group, progress_callback)
+
+        # However, this already created working directories which we have to
+        # clean up.
+
+        # Parse the document. This may take some time.

        try:
-            self.log("info", "Generating thumbnail for {}...".format(doc))
-            self._send_progress(file, 10, 100, 'WORKING',
+            self.log("debug", "Generating thumbnail for {}...".format(self.filename))
+            self._send_progress(self.filename, 10, 100, 'WORKING',
                                'Generating thumbnail...')
            thumbnail = document_parser.get_optimised_thumbnail()
-            self._send_progress(file, 20, 100, 'WORKING',
+            self.log("debug", "Parsing {}...".format(self.filename))
+            self._send_progress(self.filename, 20, 100, 'WORKING',
                                'Getting text from document...')
            text = document_parser.get_text()
-            self._send_progress(file, 80, 100, 'WORKING',
+            self._send_progress(self.filename, 80, 100, 'WORKING',
                                'Getting date from document...')
            date = document_parser.get_date()
-            self._send_progress(file, 85, 100, 'WORKING',
-                                'Storing the document...')
-            document = self._store(
-                text,
-                doc,
-                thumbnail,
-                date
-            )
        except ParseError as e:
-            self.log("fatal", "PARSE FAILURE for {}: {}".format(doc, e))
+            document_parser.cleanup()
+            self._send_progress(self.filename, 100, 100, 'FAILED',
+                                "Failed: {}".format(e))
+            raise ConsumerError(e)
+
+        # Prepare the document classifier.
+
+        # TODO: I don't really like to do this here, but this way we avoid
+        #   reloading the classifier multiple times, since there are multiple
+        #   post-consume hooks that all require the classifier.
+
+        try:
+            classifier = DocumentClassifier()
+            classifier.reload()
+        except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
+            logging.getLogger(__name__).warning(
+                "Cannot classify documents: {}.".format(e))
+            classifier = None
+        self._send_progress(self.filename, 85, 100, 'WORKING',
+                            'Storing the document...')
+        # now that everything is done, we can start to store the document
+        # in the system. This will be a transaction and reasonably fast.
+        try:
+            with transaction.atomic():
+
+                # store the document.
+                document = self._store(
+                    text=text,
+                    date=date
+                )
+
+                # If we get here, it was successful. Proceed with post-consume
+                # hooks. If they fail, nothing will get changed.
+
+                self._send_progress(self.filename, 90, 100, 'WORKING',
+                                    'Performing post-consumption tasks...')
+
+                document_consumption_finished.send(
+                    sender=self.__class__,
+                    document=document,
+                    logging_group=self.logging_group,
+                    classifier=classifier
+                )
+
+                # After everything is in the database, copy the files into
+                # place. If this fails, we'll also rollback the transaction.
+
+                create_source_path_directory(document.source_path)
+                self._write(document, self.path, document.source_path)
+                self._write(document, thumbnail, document.thumbnail_path)
+
+                # Delete the file only if it was successfully consumed
+                self.log("debug", "Deleting file {}".format(self.path))
+                os.unlink(self.path)
+        except Exception as e:
+            raise ConsumerError(e)
            self._send_progress(file, 100, 100, 'FAILED',
                                "Failed: {}".format(e))
-
+        finally:
            document_parser.cleanup()
-            return False
-        else:
-            document_parser.cleanup()
-            self._cleanup_doc(doc)

-            self.log(
-                "info",
-                "Document {} consumption finished".format(document)
-            )
+        self.log(
+            "info",
+            "Document {} consumption finished".format(document)
+        )

-            classifier = None
+        self._send_progress(file, 100, 100, 'SUCCESS',
+                            'Finished.', document.id)

-            try:
-                self.classifier.reload()
-                classifier = self.classifier
-            except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
-                logging.getLogger(__name__).warning("Cannot classify documents: {}.".format(e))
+        return document

-            self._send_progress(file, 90, 100, 'WORKING',
-                                'Performing post-consumption tasks...')
+    def _store(self, text, date):

-            document_consumption_finished.send(
-                sender=self.__class__,
-                document=document,
-                logging_group=self.logging_group,
-                classifier=classifier
-            )
-            self._send_progress(file, 100, 100, 'SUCCESS',
-                                'Finished.', document.id)
-            return True
+        # If someone gave us the original filename, use it instead of doc.

-    def _store(self, text, doc, thumbnail, date):
+        file_info = FileInfo.from_path(self.filename)

-        file_info = FileInfo.from_path(doc)
-
-        stats = os.stat(doc)
+        stats = os.stat(self.path)

        self.log("debug", "Saving record to database")

        created = file_info.created or date or timezone.make_aware(
-                    datetime.datetime.fromtimestamp(stats.st_mtime))
+            datetime.datetime.fromtimestamp(stats.st_mtime))

-        with open(doc, "rb") as f:
+        if settings.PASSPHRASE:
+            storage_type = Document.STORAGE_TYPE_GPG
+        else:
+            storage_type = Document.STORAGE_TYPE_UNENCRYPTED
+
+        with open(self.path, "rb") as f:
            document = Document.objects.create(
                correspondent=file_info.correspondent,
                title=file_info.title,
@@ -202,7 +266,7 @@ class Consumer:
                checksum=hashlib.md5(f.read()).hexdigest(),
                created=created,
                modified=created,
-                storage_type=self.storage_type
+                storage_type=storage_type
            )

        relevant_tags = set(file_info.tags)
@@ -211,14 +275,30 @@ class Consumer:
            self.log("debug", "Tagging with {}".format(tag_names))
            document.tags.add(*relevant_tags)

-        self._write(document, doc, document.source_path)
-        self._write(document, thumbnail, document.thumbnail_path)
+        self.apply_overrides(document)

-        #TODO: why do we need to save the document again?
+        document.filename = generate_filename(document)
+
+        # We need to save the document twice, since we need the PK of the
+        # document in order to create its filename above.
        document.save()

        return document

+    def apply_overrides(self, document):
+        if self.override_title:
+            document.title = self.override_title
+
+        if self.override_correspondent_id:
+            document.correspondent = Correspondent.objects.get(pk=self.override_correspondent_id)
+
+        if self.override_document_type_id:
+            document.document_type = DocumentType.objects.get(pk=self.override_document_type_id)
+
+        if self.override_tag_ids:
+            for tag_id in self.override_tag_ids:
+                document.tags.add(Tag.objects.get(pk=tag_id))
+
    def _write(self, document, source, target):
        with open(source, "rb") as read_file:
            with open(target, "wb") as write_file:
@@ -227,13 +307,3 @@ class Consumer:
                    return
                self.log("debug", "Encrypting")
                write_file.write(GnuPG.encrypted(read_file))
-
-    def _cleanup_doc(self, doc):
-        self.log("debug", "Deleting document {}".format(doc))
-        os.unlink(doc)
-
-    @staticmethod
-    def _is_duplicate(doc):
-        with open(doc, "rb") as f:
-            checksum = hashlib.md5(f.read()).hexdigest()
-        return Document.objects.filter(checksum=checksum).exists()
--- a/src/documents/file_handling.py
+++ b/src/documents/file_handling.py
@@ -0,0 +1,102 @@
+import logging
+import os
+from collections import defaultdict
+
+from django.conf import settings
+from django.template.defaultfilters import slugify
+
+
+def create_source_path_directory(source_path):
+    os.makedirs(os.path.dirname(source_path), exist_ok=True)
+
+
+def delete_empty_directories(directory):
+    # Go up in the directory hierarchy and try to delete all directories
+    directory = os.path.normpath(directory)
+    root = os.path.normpath(settings.ORIGINALS_DIR)
+
+    if not directory.startswith(root + os.path.sep):
+        # don't do anything outside our originals folder.
+
+        # append os.path.set so that we avoid these cases:
+        #   directory = /home/originals2/test
+        #   root = /home/originals ("/" gets appended and startswith fails)
+        return
+
+    while directory != root:
+        if not os.listdir(directory):
+            # it's empty
+            try:
+                os.rmdir(directory)
+            except OSError:
+                # whatever. empty directories aren't that bad anyway.
+                return
+        else:
+            # it's not empty.
+            return
+
+        # go one level up
+        directory = os.path.normpath(os.path.dirname(directory))
+
+
+def many_to_dictionary(field):
+    # Converts ManyToManyField to dictionary by assuming, that field
+    # entries contain an _ or - which will be used as a delimiter
+    mydictionary = dict()
+
+    for index, t in enumerate(field.all()):
+        # Populate tag names by index
+        mydictionary[index] = slugify(t.name)
+
+        # Find delimiter
+        delimiter = t.name.find('_')
+
+        if delimiter == -1:
+            delimiter = t.name.find('-')
+
+        if delimiter == -1:
+            continue
+
+        key = t.name[:delimiter]
+        value = t.name[delimiter + 1:]
+
+        mydictionary[slugify(key)] = slugify(value)
+
+    return mydictionary
+
+
+def generate_filename(document):
+    # Create filename based on configured format
+    path = ""
+
+    try:
+        if settings.PAPERLESS_FILENAME_FORMAT is not None:
+            tags = defaultdict(lambda: slugify(None),
+                               many_to_dictionary(document.tags))
+            path = settings.PAPERLESS_FILENAME_FORMAT.format(
+                correspondent=slugify(document.correspondent),
+                title=slugify(document.title),
+                created=slugify(document.created),
+                created_year=document.created.year if document.created else "none",
+                created_month=document.created.month if document.created else "none",
+                created_day=document.created.day if document.created else "none",
+                added=slugify(document.added),
+                added_year=document.added.year if document.added else "none",
+                added_month=document.added.month if document.added else "none",
+                added_day=document.added.day if document.added else "none",
+                tags=tags,
+            )
+    except (ValueError, KeyError, IndexError):
+        logging.getLogger(__name__).warning("Invalid PAPERLESS_FILENAME_FORMAT: {}, falling back to default,".format(settings.PAPERLESS_FILENAME_FORMAT))
+
+    # Always append the primary key to guarantee uniqueness of filename
+    if len(path) > 0:
+        filename = "%s-%07i.%s" % (path, document.pk, document.file_type)
+    else:
+        filename = "%07i.%s" % (document.pk, document.file_type)
+
+    # Append .gpg for encrypted files
+    if document.storage_type == document.STORAGE_TYPE_GPG:
+        filename += ".gpg"
+
+    return filename
--- a/src/documents/forms.py
+++ b/src/documents/forms.py
@@ -1,10 +1,11 @@
 import os
-
+import tempfile
 from datetime import datetime
 from time import mktime

 from django import forms
 from django.conf import settings
+from django_q.tasks import async_task
 from pathvalidate import validate_filename, ValidationError


@@ -19,12 +20,6 @@ class UploadForm(forms.Form):
            raise forms.ValidationError("That filename is suspicious.")
        return self.cleaned_data.get("document")

-    def get_filename(self, i=None):
-        return os.path.join(
-            settings.CONSUMPTION_DIR,
-            "{}_{}".format(str(i), self.cleaned_data.get("document").name) if i else self.cleaned_data.get("document").name
-        )
-
    def save(self):
        """
        Since the consumer already does a lot of work, it's easier just to save
@@ -33,15 +28,16 @@ class UploadForm(forms.Form):
        """

        document = self.cleaned_data.get("document").read()
+        original_filename = self.cleaned_data.get("document").name

        t = int(mktime(datetime.now().timetuple()))

-        file_name = self.get_filename()
-        i = 0
-        while os.path.exists(file_name):
-            i += 1
-            file_name = self.get_filename(i)
+        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
+
+        # TODO: dont just append pdf. This is here for taht weird regex check at the start of the consumer.
+        with tempfile.NamedTemporaryFile(prefix="paperless-upload-", suffix=".pdf", dir=settings.SCRATCH_DIR, delete=False) as f:

-        with open(file_name, "wb") as f:
            f.write(document)
-            os.utime(file_name, times=(t, t))
+            os.utime(f.name, times=(t, t))
+
+            async_task("documents.tasks.consume_file", f.name, override_filename=original_filename, task_name=os.path.basename(original_filename))
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -1,7 +1,6 @@
 import logging
+from contextlib import contextmanager

-from django.db import models
-from django.dispatch import receiver
 from whoosh import highlight
 from whoosh.fields import Schema, TEXT, NUMERIC
 from whoosh.highlight import Formatter, get_text
@@ -9,10 +8,8 @@ from whoosh.index import create_in, exists_in, open_dir
 from whoosh.qparser import MultifieldParser
 from whoosh.writing import AsyncWriter

-from documents.models import Document
 from paperless import settings

-
 logger = logging.getLogger(__name__)


@@ -69,6 +66,9 @@ def open_index(recreate=False):
    if exists_in(settings.INDEX_DIR) and not recreate:
        return open_dir(settings.INDEX_DIR)
    else:
+        # TODO: this is not thread safe. If 2 instances try to create the index
+        #  at the same time, this fails. This currently prevents parallel
+        #  tests.
        return create_in(settings.INDEX_DIR, get_schema())


@@ -99,15 +99,19 @@ def remove_document_from_index(document):
        remove_document(writer, document)


+@contextmanager
 def query_page(ix, query, page):
-    with ix.searcher() as searcher:
+    searcher = ix.searcher()
+    try:
        query_parser = MultifieldParser(["content", "title", "correspondent"],
                                        ix.schema).parse(query)
        result_page = searcher.search_page(query_parser, page)
        result_page.results.fragmenter = highlight.ContextFragmenter(
            surround=50)
        result_page.results.formatter = JsonFormatter()
-        return result_page
+        yield result_page
+    finally:
+        searcher.close()


 def autocomplete(ix, term, limit=10):
--- a/src/documents/loggers.py
+++ b/src/documents/loggers.py
@@ -1,4 +1,5 @@
 import logging
+import uuid


 class PaperlessHandler(logging.Handler):
@@ -13,3 +14,19 @@ class PaperlessHandler(logging.Handler):
            kwargs["group"] = record.group

        Log.objects.create(**kwargs)
+
+
+class LoggingMixin:
+
+    logging_group = None
+
+    def renew_logging_group(self):
+        self.logging_group = uuid.uuid4()
+
+    def log(self, level, message):
+        target = ".".join([self.__class__.__module__, self.__class__.__name__])
+        logger = logging.getLogger(target)
+
+        getattr(logger, level)(message, extra={
+            "group": self.logging_group
+        })
--- a/src/documents/mail.py
+++ b/src/documents/mail.py
@@ -1,250 +0,0 @@
-import datetime
-import imaplib
-import logging
-import os
-import re
-import time
-import uuid
-
-from base64 import b64decode
-from email import policy
-from email.parser import BytesParser
-from dateutil import parser
-
-from django.conf import settings
-
-from .models import Correspondent
-
-
-class MailFetcherError(Exception):
-    pass
-
-
-class InvalidMessageError(MailFetcherError):
-    pass
-
-
-class Loggable(object):
-
-    def __init__(self, group=None):
-        self.logger = logging.getLogger(__name__)
-        self.logging_group = group or uuid.uuid4()
-
-    def log(self, level, message):
-        getattr(self.logger, level)(message, extra={
-            "group": self.logging_group
-        })
-
-
-class Message(Loggable):
-    """
-    A crude, but simple email message class.  We assume that there's a subject
-    and n attachments, and that we don't care about the message body.
-    """
-
-    SECRET = os.getenv("PAPERLESS_EMAIL_SECRET")
-
-    def __init__(self, data, group=None):
-        """
-        Cribbed heavily from
-        https://www.ianlewis.org/en/parsing-email-attachments-python
-        """
-
-        Loggable.__init__(self, group=group)
-
-        self.subject = None
-        self.time = None
-        self.attachment = None
-
-        message = BytesParser(policy=policy.default).parsebytes(data)
-        self.subject = str(message["Subject"]).replace("\r\n", "")
-        self.body = str(message.get_body())
-
-        self.check_subject()
-        self.check_body()
-
-        self._set_time(message)
-
-        self.log("info", 'Importing email: "{}"'.format(self.subject))
-
-        attachments = []
-        for part in message.walk():
-
-            content_disposition = part.get("Content-Disposition")
-            if not content_disposition:
-                continue
-
-            dispositions = content_disposition.strip().split(";")
-            if len(dispositions) < 2:
-                continue
-
-            if not dispositions[0].lower() == "attachment" and \
-               "filename" not in dispositions[1].lower():
-                continue
-
-            file_data = part.get_payload()
-
-            attachments.append(Attachment(
-                b64decode(file_data), content_type=part.get_content_type()))
-
-        if len(attachments) == 0:
-            raise InvalidMessageError(
-                "There don't appear to be any attachments to this message")
-
-        if len(attachments) > 1:
-            raise InvalidMessageError(
-                "There's more than one attachment to this message. It cannot "
-                "be indexed automatically."
-            )
-
-        self.attachment = attachments[0]
-
-    def __bool__(self):
-        return bool(self.attachment)
-
-    def check_subject(self):
-        if self.subject is None:
-            raise InvalidMessageError("Message does not have a subject")
-        if not Correspondent.SAFE_REGEX.match(self.subject):
-            raise InvalidMessageError("Message subject is unsafe: {}".format(
-                self.subject))
-
-    def check_body(self):
-        if self.SECRET not in self.body:
-            raise InvalidMessageError("The secret wasn't in the body")
-
-    def _set_time(self, message):
-        self.time = datetime.datetime.now()
-        message_time = message.get("Date")
-        if message_time:
-            try:
-                self.time = parser.parse(message_time)
-            except (ValueError, AttributeError):
-                pass  # We assume that "now" is ok
-
-    @property
-    def file_name(self):
-        return "{}.{}".format(self.subject, self.attachment.suffix)
-
-
-class Attachment(object):
-
-    SAFE_SUFFIX_REGEX = re.compile(
-        r"^(application/(pdf))|(image/(png|jpeg|gif|tiff))$")
-
-    def __init__(self, data, content_type):
-
-        self.content_type = content_type
-        self.data = data
-        self.suffix = None
-
-        m = self.SAFE_SUFFIX_REGEX.match(self.content_type)
-        if not m:
-            raise MailFetcherError(
-                "Not-awesome file type: {}".format(self.content_type))
-        self.suffix = m.group(2) or m.group(4)
-
-    def read(self):
-        return self.data
-
-
-class MailFetcher(Loggable):
-
-    def __init__(self, consume=settings.CONSUMPTION_DIR):
-
-        Loggable.__init__(self)
-
-        self._connection = None
-        self._host = os.getenv("PAPERLESS_CONSUME_MAIL_HOST")
-        self._port = os.getenv("PAPERLESS_CONSUME_MAIL_PORT")
-        self._username = os.getenv("PAPERLESS_CONSUME_MAIL_USER")
-        self._password = os.getenv("PAPERLESS_CONSUME_MAIL_PASS")
-        self._inbox = os.getenv("PAPERLESS_CONSUME_MAIL_INBOX", "INBOX")
-
-        self._enabled = bool(self._host)
-        if self._enabled and Message.SECRET is None:
-            raise MailFetcherError("No PAPERLESS_EMAIL_SECRET defined")
-
-        self.last_checked = time.time()
-        self.consume = consume
-
-    def pull(self):
-        """
-        Fetch all available mail at the target address and store it locally in
-        the consumption directory so that the file consumer can pick it up and
-        do its thing.
-        """
-
-        if self._enabled:
-
-            # Reset the grouping id for each fetch
-            self.logging_group = uuid.uuid4()
-
-            self.log("debug", "Checking mail")
-
-            for message in self._get_messages():
-
-                self.log("info", 'Storing email: "{}"'.format(message.subject))
-
-                t = int(time.mktime(message.time.timetuple()))
-                file_name = os.path.join(self.consume, message.file_name)
-                with open(file_name, "wb") as f:
-                    f.write(message.attachment.data)
-                    os.utime(file_name, times=(t, t))
-
-        self.last_checked = time.time()
-
-    def _get_messages(self):
-
-        r = []
-        try:
-
-            self._connect()
-            self._login()
-
-            for message in self._fetch():
-                if message:
-                    r.append(message)
-
-            self._connection.expunge()
-            self._connection.close()
-            self._connection.logout()
-
-        except MailFetcherError as e:
-            self.log("error", str(e))
-
-        return r
-
-    def _connect(self):
-        try:
-            self._connection = imaplib.IMAP4_SSL(self._host, self._port)
-        except OSError as e:
-            msg = "Problem connecting to {}: {}".format(self._host, e.strerror)
-            raise MailFetcherError(msg)
-
-    def _login(self):
-
-        login = self._connection.login(self._username, self._password)
-        if not login[0] == "OK":
-            raise MailFetcherError("Can't log into mail: {}".format(login[1]))
-
-        inbox = self._connection.select(self._inbox)
-        if not inbox[0] == "OK":
-            raise MailFetcherError("Can't find the inbox: {}".format(inbox[1]))
-
-    def _fetch(self):
-
-        for num in self._connection.search(None, "ALL")[1][0].split():
-
-            __, data = self._connection.fetch(num, "(RFC822)")
-
-            message = None
-            try:
-                message = Message(data[0][1], self.logging_group)
-            except InvalidMessageError as e:
-                self.log("error", str(e))
-            else:
-                self._connection.store(num, "+FLAGS", "\\Deleted")
-
-            if message:
-                yield message
--- a/src/documents/management/commands/document_consumer.py
+++ b/src/documents/management/commands/document_consumer.py
@@ -3,11 +3,10 @@ import os

 from django.conf import settings
 from django.core.management.base import BaseCommand
-
-from watchdog.observers import Observer
+from django_q.tasks import async_task
 from watchdog.events import FileSystemEventHandler
-
-from documents.consumer import Consumer
+from watchdog.observers import Observer
+from watchdog.observers.polling import PollingObserver

 try:
    from inotify_simple import INotify, flags
@@ -17,17 +16,25 @@ except ImportError:

 class Handler(FileSystemEventHandler):

-    def __init__(self, consumer):
-        self.consumer = consumer
+    def _consume(self, file):
+        if os.path.isfile(file):
+            try:
+                async_task("documents.tasks.consume_file", file, task_name=os.path.basename(file))
+            except Exception as e:
+                # Catch all so that the consumer won't crash.
+                logging.getLogger(__name__).error("Error while consuming document: {}".format(e))

    def on_created(self, event):
-        self.consumer.try_consume_file(event.src_path)
+        self._consume(event.src_path)
+
+    def on_moved(self, event):
+        self._consume(event.src_path)


 class Command(BaseCommand):
    """
    On every iteration of an infinite loop, consume what we can from the
-    consumption directory, and fetch any mail available.
+    consumption directory.
    """

    def __init__(self, *args, **kwargs):
@@ -35,12 +42,6 @@ class Command(BaseCommand):
        self.verbosity = 0
        self.logger = logging.getLogger(__name__)

-        self.file_consumer = None
-        self.mail_fetcher = None
-        self.first_iteration = True
-
-        self.consumer = Consumer()
-
        BaseCommand.__init__(self, *args, **kwargs)

    def add_arguments(self, parser):
@@ -56,9 +57,6 @@ class Command(BaseCommand):
        self.verbosity = options["verbosity"]
        directory = options["directory"]

-        for d in (settings.ORIGINALS_DIR, settings.THUMBNAIL_DIR):
-            os.makedirs(d, exist_ok=True)
-
        logging.getLogger(__name__).info(
            "Starting document consumer at {}".format(
                directory
@@ -68,11 +66,16 @@ class Command(BaseCommand):
        # Consume all files as this is not done initially by the watchdog
        for entry in os.scandir(directory):
            if entry.is_file():
-                self.consumer.try_consume_file(entry.path)
+                async_task("documents.tasks.consume_file", entry.path, task_name=os.path.basename(entry.path))

        # Start the watchdog. Woof!
-        observer = Observer()
-        event_handler = Handler(self.consumer)
+        if settings.CONSUMER_POLLING > 0:
+            logging.getLogger(__name__).info('Using polling instead of file'
+                                             'system notifications.')
+            observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
+        else:
+            observer = Observer()
+        event_handler = Handler()
        observer.schedule(event_handler, directory, recursive=True)
        observer.start()
        try:
--- a/src/documents/management/commands/document_create_classifier.py
+++ b/src/documents/management/commands/document_create_classifier.py
@@ -1,4 +1,5 @@
 from django.core.management.base import BaseCommand
+
 from ...mixins import Renderable
 from ...tasks import train_classifier

--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -1,16 +1,15 @@
 import json
 import os
-import time
 import shutil
+import time

-from django.core.management.base import BaseCommand, CommandError
 from django.core import serializers
+from django.core.management.base import BaseCommand, CommandError

 from documents.models import Document, Correspondent, Tag, DocumentType
-from paperless.db import GnuPG
-
-from ...mixins import Renderable
 from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
+from paperless.db import GnuPG
+from ...mixins import Renderable


 class Command(Renderable, BaseCommand):
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -3,15 +3,14 @@ import os
 import shutil

 from django.conf import settings
-from django.core.management.base import BaseCommand, CommandError
 from django.core.management import call_command
+from django.core.management.base import BaseCommand, CommandError

 from documents.models import Document
-from paperless.db import GnuPG
-
-from ...mixins import Renderable
-
 from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
+from paperless.db import GnuPG
+from ...file_handling import generate_filename, create_source_path_directory
+from ...mixins import Renderable


 class Command(Renderable, BaseCommand):
@@ -82,6 +81,10 @@ class Command(Renderable, BaseCommand):

    def _import_files_from_manifest(self):

+        storage_type = Document.STORAGE_TYPE_UNENCRYPTED
+        if settings.PASSPHRASE:
+            storage_type = Document.STORAGE_TYPE_GPG
+
        for record in self.manifest:

            if not record["model"] == "documents.document":
@@ -94,6 +97,14 @@ class Command(Renderable, BaseCommand):
            document_path = os.path.join(self.source, doc_file)
            thumbnail_path = os.path.join(self.source, thumb_file)

+            document.storage_type = storage_type
+            document.filename = generate_filename(document)
+
+            if os.path.isfile(document.source_path):
+                raise FileExistsError(document.source_path)
+
+            create_source_path_directory(document.source_path)
+
            if settings.PASSPHRASE:

                with open(document_path, "rb") as unencrypted:
@@ -109,18 +120,8 @@ class Command(Renderable, BaseCommand):
                        encrypted.write(GnuPG.encrypted(unencrypted))

            else:
-
+                print("Moving {} to {}".format(document_path, document.source_path))
                shutil.copy(document_path, document.source_path)
                shutil.copy(thumbnail_path, document.thumbnail_path)

-        # Reset the storage type to whatever we've used while importing
-
-        storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        if settings.PASSPHRASE:
-            storage_type = Document.STORAGE_TYPE_GPG
-
-        Document.objects.filter(
-            pk__in=[r["pk"] for r in self.manifest]
-        ).update(
-            storage_type=storage_type
-        )
+            document.save()
--- a/src/documents/management/commands/document_logs.py
+++ b/src/documents/management/commands/document_logs.py
@@ -8,5 +8,5 @@ class Command(BaseCommand):
    help = "A quick & dirty way to see what's in the logs"

    def handle(self, *args, **options):
-        for l in Log.objects.order_by("pk"):
-            print(l)
+        for log in Log.objects.order_by("pk"):
+            print(log)
--- a/src/documents/management/commands/document_renamer.py
+++ b/src/documents/management/commands/document_renamer.py
@@ -1,7 +1,6 @@
 from django.core.management.base import BaseCommand

-from documents.models import Document, Tag
-
+from documents.models import Document
 from ...mixins import Renderable


--- a/src/documents/matching.py
+++ b/src/documents/matching.py
@@ -9,16 +9,14 @@ def match_correspondents(document_content, classifier):
    correspondents = Correspondent.objects.all()
    predicted_correspondent_id = classifier.predict_correspondent(document_content) if classifier else None

-    matched_correspondents = [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id]
-    return matched_correspondents
+    return [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id]


 def match_document_types(document_content, classifier):
    document_types = DocumentType.objects.all()
    predicted_document_type_id = classifier.predict_document_type(document_content) if classifier else None

-    matched_document_types = [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id]
-    return matched_document_types
+    return [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id]


 def match_tags(document_content, classifier):
--- a/src/documents/migrations/1000_update_paperless_all.py
+++ b/src/documents/migrations/1000_update_paperless_all.py
@@ -1,7 +1,4 @@
 # Generated by Django 3.1.3 on 2020-11-07 12:35
-import os
-
-from django.conf import settings
 from django.db import migrations, models
 import django.db.models.deletion

--- a/src/documents/migrations/1001_auto_20201109_1636.py
+++ b/src/documents/migrations/1001_auto_20201109_1636.py
@@ -9,11 +9,11 @@ from django_q.tasks import schedule
 def add_schedules(apps, schema_editor):
    schedule('documents.tasks.train_classifier', name="Train the classifier", schedule_type=Schedule.HOURLY)
    schedule('documents.tasks.index_optimize', name="Optimize the index", schedule_type=Schedule.DAILY)
-    schedule('documents.tasks.consume_mail', name="Check E-Mail", schedule_type=Schedule.MINUTES, minutes=10)


 def remove_schedules(apps, schema_editor):
-    Schedule.objects.all().delete()
+    Schedule.objects.filter(func='documents.tasks.train_classifier').delete()
+    Schedule.objects.filter(func='documents.tasks.index_optimize').delete()


 class Migration(migrations.Migration):
--- a/src/documents/migrations/1002_auto_20201111_1105.py
+++ b/src/documents/migrations/1002_auto_20201111_1105.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.1.3 on 2020-11-11 11:05
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '1001_auto_20201109_1636'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='document',
+            name='filename',
+            field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True),
+        ),
+    ]
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -3,18 +3,15 @@
 import logging
 import os
 import re
-from collections import OrderedDict, defaultdict
+from collections import OrderedDict

 import dateutil.parser
 from django.conf import settings
 from django.db import models
-from django.dispatch import receiver
-from django.template.defaultfilters import slugify
 from django.utils import timezone
 from django.utils.text import slugify


-
 class MatchingModel(models.Model):

    MATCH_ANY = 1
@@ -116,6 +113,7 @@ class DocumentType(MatchingModel):

 class Document(models.Model):

+    # TODO: why do we need an explicit list
    TYPE_PDF = "pdf"
    TYPE_PNG = "png"
    TYPE_JPG = "jpg"
@@ -192,7 +190,7 @@ class Document(models.Model):
        default=timezone.now, editable=False, db_index=True)

    filename = models.FilePathField(
-        max_length=256,
+        max_length=1024,
        editable=False,
        default=None,
        null=True,
@@ -220,123 +218,18 @@ class Document(models.Model):
            return "{}: {}".format(created, self.correspondent or self.title)
        return str(created)

-    def find_renamed_document(self, subdirectory=""):
-        suffix = "%07i.%s" % (self.pk, self.file_type)
-
-        # Append .gpg for encrypted files
-        if self.storage_type == self.STORAGE_TYPE_GPG:
-            suffix += ".gpg"
-
-        # Go up in the directory hierarchy and try to delete all directories
-        root = os.path.normpath(Document.filename_to_path(subdirectory))
-
-        for filename in os.listdir(root):
-            if filename.endswith(suffix):
-                return os.path.join(subdirectory, filename)
-
-            fullname = os.path.join(subdirectory, filename)
-            if os.path.isdir(Document.filename_to_path(fullname)):
-                return self.find_renamed_document(fullname)
-
-        return None
-
-    @property
-    def source_filename(self):
-        # Initial filename generation (for new documents)
-        if self.filename is None:
-            self.filename = self.generate_source_filename()
-
-        # Check if document is still available under filename
-        elif not os.path.isfile(Document.filename_to_path(self.filename)):
-            recovered_filename = self.find_renamed_document()
-
-            # If we have found the file so update the filename
-            if recovered_filename is not None:
-                logger = logging.getLogger(__name__)
-                logger.warning("Filename of document " + str(self.id) +
-                               " has changed and was successfully updated")
-                self.filename = recovered_filename
-
-                # Remove all empty subdirectories from MEDIA_ROOT
-                Document.delete_all_empty_subdirectories(
-                        Document.filename_to_path(""))
-            else:
-                logger = logging.getLogger(__name__)
-                logger.error("File of document " + str(self.id) + " has " +
-                             "gone and could not be recovered")
-
-        return self.filename
-
-    @staticmethod
-    def many_to_dictionary(field):
-        # Converts ManyToManyField to dictionary by assuming, that field
-        # entries contain an _ or - which will be used as a delimiter
-        mydictionary = dict()
-
-        for index, t in enumerate(field.all()):
-            # Populate tag names by index
-            mydictionary[index] = slugify(t.name)
-
-            # Find delimiter
-            delimiter = t.name.find('_')
-
-            if delimiter == -1:
-                delimiter = t.name.find('-')
-
-            if delimiter == -1:
-                continue
-
-            key = t.name[:delimiter]
-            value = t.name[delimiter+1:]
-
-            mydictionary[slugify(key)] = slugify(value)
-
-        return mydictionary
-
-    def generate_source_filename(self):
-        # Create filename based on configured format
-        if settings.PAPERLESS_FILENAME_FORMAT is not None:
-            tags = defaultdict(lambda: slugify(None),
-                               self.many_to_dictionary(self.tags))
-            path = settings.PAPERLESS_FILENAME_FORMAT.format(
-                   correspondent=slugify(self.correspondent),
-                   title=slugify(self.title),
-                   created=slugify(self.created),
-                   added=slugify(self.added),
-                   tags=tags)
-        else:
-            path = ""
-
-        # Always append the primary key to guarantee uniqueness of filename
-        if len(path) > 0:
-            filename = "%s-%07i.%s" % (path, self.pk, self.file_type)
-        else:
-            filename = "%07i.%s" % (self.pk, self.file_type)
-
-        # Append .gpg for encrypted files
-        if self.storage_type == self.STORAGE_TYPE_GPG:
-            filename += ".gpg"
-
-        return filename
-
-    def create_source_directory(self):
-        new_filename = self.generate_source_filename()
-
-        # Determine the full "target" path
-        dir_new = Document.filename_to_path(os.path.dirname(new_filename))
-
-        # Create new path
-        os.makedirs(dir_new, exist_ok=True)
-
    @property
    def source_path(self):
-        return Document.filename_to_path(self.source_filename)
+        if self.filename:
+            fname = str(self.filename)
+        else:
+            fname = "{:07}.{}".format(self.pk, self.file_type)
+            if self.storage_type == self.STORAGE_TYPE_GPG:
+                fname += ".gpg"

-    @staticmethod
-    def filename_to_path(filename):
        return os.path.join(
            settings.ORIGINALS_DIR,
-            filename
+            fname
        )

    @property
@@ -362,125 +255,6 @@ class Document(models.Model):
    def thumbnail_file(self):
        return open(self.thumbnail_path, "rb")

-    def set_filename(self, filename):
-        if os.path.isfile(Document.filename_to_path(filename)):
-            self.filename = filename
-
-    @staticmethod
-    def try_delete_empty_directories(directory):
-        # Go up in the directory hierarchy and try to delete all directories
-        directory = os.path.normpath(directory)
-        root = os.path.normpath(Document.filename_to_path(""))
-
-        while directory != root:
-            # Try to delete the current directory
-            try:
-                os.rmdir(directory)
-            except os.error:
-                # Directory not empty, no need to go further up
-                return
-
-            # Cut off actual directory and go one level up
-            directory, _ = os.path.split(directory)
-            directory = os.path.normpath(directory)
-
-    @staticmethod
-    def delete_all_empty_subdirectories(directory):
-        # Go through all folders and try to delete all directories
-        root = os.path.normpath(Document.filename_to_path(directory))
-
-        for filename in os.listdir(root):
-            fullname = os.path.join(directory, filename)
-
-            if not os.path.isdir(Document.filename_to_path(fullname)):
-                continue
-
-            # Go into subdirectory to see, if there is more to delete
-            Document.delete_all_empty_subdirectories(
-                    os.path.join(directory, filename))
-
-            # Try to delete the directory
-            try:
-                os.rmdir(Document.filename_to_path(fullname))
-                continue
-            except os.error:
-                # Directory not empty, no need to go further up
-                continue
-
-
-@receiver(models.signals.m2m_changed, sender=Document.tags.through)
-@receiver(models.signals.post_save, sender=Document)
-def update_filename(sender, instance, **kwargs):
-    # Skip if document has not been saved yet
-    if instance.filename is None:
-        return
-
-    # Check is file exists and update filename otherwise
-    if not os.path.isfile(Document.filename_to_path(instance.filename)):
-        instance.filename = instance.source_filename
-
-    # Build the new filename
-    new_filename = instance.generate_source_filename()
-
-    # If the filename is the same, then nothing needs to be done
-    if instance.filename == new_filename:
-        return
-
-    # Determine the full "target" path
-    path_new = instance.filename_to_path(new_filename)
-    dir_new = instance.filename_to_path(os.path.dirname(new_filename))
-
-    # Create new path
-    instance.create_source_directory()
-
-    # Determine the full "current" path
-    path_current = instance.filename_to_path(instance.source_filename)
-
-    # Move file
-    try:
-        os.rename(path_current, path_new)
-    except PermissionError:
-        # Do not update filename in object
-        return
-    except FileNotFoundError:
-        logger = logging.getLogger(__name__)
-        logger.error("Renaming of document " + str(instance.id) + " failed " +
-                     "as file " + instance.filename + " was no longer present")
-        return
-
-    # Delete empty directory
-    old_dir = os.path.dirname(instance.filename)
-    old_path = instance.filename_to_path(old_dir)
-    Document.try_delete_empty_directories(old_path)
-
-    instance.filename = new_filename
-
-    # Save instance
-    # This will not cause a cascade of post_save signals, as next time
-    # nothing needs to be renamed
-    instance.save()
-
-
-@receiver(models.signals.post_delete, sender=Document)
-def delete_files(sender, instance, **kwargs):
-    if instance.filename is None:
-        return
-
-    # Remove the document
-    old_file = instance.filename_to_path(instance.filename)
-
-    try:
-        os.remove(old_file)
-    except FileNotFoundError:
-        logger = logging.getLogger(__name__)
-        logger.warning("Deleted document " + str(instance.id) + " but file " +
-                       old_file + " was no longer present")
-
-    # And remove the directory (if applicable)
-    old_dir = os.path.dirname(instance.filename)
-    old_path = instance.filename_to_path(old_dir)
-    Document.try_delete_empty_directories(old_path)
-

 class Log(models.Model):

@@ -518,7 +292,7 @@ class FileInfo:
            non_separated_word=r"([\w,. ]|([^\s]-))"
        )
    )
-
+    # TODO: what is this used for
    formats = "pdf|jpe?g|png|gif|tiff?|te?xt|md|csv"
    REGEXES = OrderedDict([
        ("created-correspondent-title-tags", re.compile(
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -20,13 +20,16 @@ from django.utils import timezone
 # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
 # - MONTH ZZZZ, with ZZZZ being 4 digits
 # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
+from documents.loggers import LoggingMixin
 from documents.signals import document_consumer_declaration

+# TODO: isnt there a date parsing library for this?
+
 DATE_REGEX = re.compile(
-    r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' +  # NOQA: E501
-    r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' +  # NOQA: E501
-    r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' +  # NOQA: E501
-    r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
+    r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|'   # NOQA: E501
+    r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|'   # NOQA: E501
+    r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|'   # NOQA: E501
+    r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|'
    r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
 )

@@ -39,17 +42,16 @@ def get_parser_class(doc):
    Determine the appropriate parser class based on the file
    """

-    parsers = []
-    for response in document_consumer_declaration.send(None):
-        parsers.append(response[1])
-
-    #TODO: add a check that checks parser availability.
-
    options = []
-    for parser in parsers:
-        result = parser(doc)
-        if result:
-            options.append(result)
+
+    # Sein letzter Befehl war: KOMMT! Und sie kamen. Alle. Sogar die Parser.
+
+    for response in document_consumer_declaration.send(None):
+        parser_declaration = response[1]
+        parser_test = parser_declaration["test"]
+
+        if parser_test(doc):
+            options.append(parser_declaration)

    if not options:
        return None
@@ -59,7 +61,7 @@ def get_parser_class(doc):
        options, key=lambda _: _["weight"], reverse=True)[0]["parser"]


-def run_convert(input, output, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None):
+def run_convert(input_file, output_file, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None):
    environment = os.environ.copy()
    if settings.CONVERT_MEMORY_LIMIT:
        environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
@@ -74,7 +76,7 @@ def run_convert(input, output, density=None, scale=None, alpha=None, strip=False
    args += ['-trim'] if trim else []
    args += ['-type', str(type)] if type else []
    args += ['-depth', str(depth)] if depth else []
-    args += [input, output]
+    args += [input_file, output_file]

    logger.debug("Execute: " + " ".join(args), extra={'group': logging_group})

@@ -100,17 +102,17 @@ class ParseError(Exception):
    pass


-class DocumentParser:
+class DocumentParser(LoggingMixin):
    """
    Subclass this to make your own parser.  Have a look at
    `paperless_tesseract.parsers` for inspiration.
    """

    def __init__(self, path, logging_group, progress_callback):
+        super().__init__()
+        self.logging_group = logging_group
        self.document_path = path
        self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
-        self.logger = logging.getLogger(__name__)
-        self.logging_group = logging_group
        self.progress_callback = progress_callback

    def get_thumbnail(self):
@@ -121,16 +123,19 @@ class DocumentParser:

    def optimise_thumbnail(self, in_path):

-        out_path = os.path.join(self.tempdir, "optipng.png")
+        if settings.OPTIMIZE_THUMBNAILS:
+            out_path = os.path.join(self.tempdir, "optipng.png")

-        args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path)
+            args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path)

-        self.log('debug', 'Execute: ' + " ".join(args))
+            self.log('debug', 'Execute: ' + " ".join(args))

-        if not subprocess.Popen(args).wait() == 0:
-            raise ParseError("Optipng failed at {}".format(args))
+            if not subprocess.Popen(args).wait() == 0:
+                raise ParseError("Optipng failed at {}".format(args))

-        return out_path
+            return out_path
+        else:
+            return in_path

    def get_optimised_thumbnail(self):
        return self.optimise_thumbnail(self.get_thumbnail())
@@ -222,11 +227,6 @@ class DocumentParser:

        return date

-    def log(self, level, message):
-        getattr(self.logger, level)(message, extra={
-            "group": self.logging_group
-        })
-
    def cleanup(self):
        self.log("debug", "Deleting directory {}".format(self.tempdir))
        shutil.rmtree(self.tempdir)
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -105,7 +105,6 @@ class DocumentSerializer(serializers.ModelSerializer):

 class LogSerializer(serializers.ModelSerializer):

-
    class Meta:
        model = Log
        fields = (
--- a/src/documents/signals/init.py
+++ b/src/documents/signals/init.py
@@ -1,5 +1,5 @@
 from django.dispatch import Signal

-document_consumption_started = Signal(providing_args=["filename"])
-document_consumption_finished = Signal(providing_args=["document"])
-document_consumer_declaration = Signal(providing_args=[])
+document_consumption_started = Signal()
+document_consumption_finished = Signal()
+document_consumer_declaration = Signal()
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -6,9 +6,13 @@ from django.conf import settings
 from django.contrib.admin.models import ADDITION, LogEntry
 from django.contrib.auth.models import User
 from django.contrib.contenttypes.models import ContentType
+from django.db import models, DatabaseError
+from django.dispatch import receiver
 from django.utils import timezone

 from .. import index, matching
+from ..file_handling import delete_empty_directories, generate_filename, \
+    create_source_path_directory
 from ..models import Document, Tag


@@ -141,17 +145,65 @@ def run_post_consume_script(sender, document, **kwargs):
    )).wait()


+@receiver(models.signals.post_delete, sender=Document)
 def cleanup_document_deletion(sender, instance, using, **kwargs):
-
-    if not isinstance(instance, Document):
-        return
-
    for f in (instance.source_path, instance.thumbnail_path):
        try:
            os.unlink(f)
        except FileNotFoundError:
            pass  # The file's already gone, so we're cool with it.

+    delete_empty_directories(os.path.dirname(instance.source_path))
+
+
+@receiver(models.signals.m2m_changed, sender=Document.tags.through)
+@receiver(models.signals.post_save, sender=Document)
+def update_filename_and_move_files(sender, instance, **kwargs):
+
+    if not instance.filename:
+        # Can't update the filename if there is not filename to begin with
+        # This happens after the consumer creates a new document.
+        # The PK needs to be set first by saving the document once. When this
+        # happens, the file is not yet in the ORIGINALS_DIR, and thus can't be
+        # renamed anyway. In all other cases, instance.filename will be set.
+        return
+
+    old_filename = instance.filename
+    old_path = instance.source_path
+    new_filename = generate_filename(instance)
+
+    if new_filename == instance.filename:
+        # Don't do anything if its the same.
+        return
+
+    new_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
+
+    if not os.path.isfile(old_path):
+        # Can't do anything if the old file does not exist anymore.
+        logging.getLogger(__name__).fatal('Document {}: File {} has gone.'.format(str(instance), old_path))
+        return
+
+    if os.path.isfile(new_path):
+        # Can't do anything if the new file already exists. Skip updating file.
+        logging.getLogger(__name__).warning('Document {}: Cannot rename file since target path {} already exists.'.format(str(instance), new_path))
+        return
+
+    create_source_path_directory(new_path)
+
+    try:
+        os.rename(old_path, new_path)
+        instance.filename = new_filename
+        instance.save()
+
+    except OSError as e:
+        instance.filename = old_filename
+    except DatabaseError as e:
+        os.rename(new_path, old_path)
+        instance.filename = old_filename
+
+    if not os.path.isfile(old_path):
+        delete_empty_directories(os.path.dirname(old_path))
+

 def set_log_entry(sender, document=None, logging_group=None, **kwargs):

--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -1,20 +1,15 @@
 import logging

 from django.conf import settings
-from django_q.tasks import async_task, result
 from whoosh.writing import AsyncWriter

 from documents import index
 from documents.classifier import DocumentClassifier, \
    IncompatibleClassifierVersionError
-from documents.mail import MailFetcher
+from documents.consumer import Consumer, ConsumerError
 from documents.models import Document


-def consume_mail():
-    MailFetcher().pull()
-
-
 def index_optimize():
    index.open_index().optimize()

@@ -55,3 +50,27 @@ def train_classifier():
        logging.getLogger(__name__).error(
            "Classifier error: " + str(e)
        )
+
+
+def consume_file(path,
+                 override_filename=None,
+                 override_title=None,
+                 override_correspondent_id=None,
+                 override_document_type_id=None,
+                 override_tag_ids=None):
+
+    document = Consumer().try_consume_file(
+        path,
+        override_filename=override_filename,
+        override_title=override_title,
+        override_correspondent_id=override_correspondent_id,
+        override_document_type_id=override_document_type_id,
+        override_tag_ids=override_tag_ids)
+
+    if document:
+        return "Success. New document id {} created".format(
+            document.pk
+        )
+    else:
+        raise ConsumerError("Unknown error: Returned document was null, but "
+                            "no error message was given.")
--- a/src/documents/tests/samples/inline_mail.txt
+++ b/src/documents/tests/samples/inline_mail.txt
--- a/src/documents/tests/samples/mail.txt
+++ b/src/documents/tests/samples/mail.txt
@@ -1,208 +0,0 @@
-Return-Path: <sender@example.com>
-X-Original-To: sender@mailbox4.mailhost.com
-Delivered-To: sender@mailbox4.mailhost.com
-Received: from mx8.mailhost.com (mail8.mailhost.com [75.126.24.68])
-	by mailbox4.mailhost.com (Postfix) with ESMTP id B62BD5498001
-	for <sender@mailbox4.mailhost.com>; Thu,  4 Feb 2016 22:01:17 +0000 (UTC)
-Received: from localhost (localhost.localdomain [127.0.0.1])
-	by mx8.mailhost.com (Postfix) with ESMTP id B41796F190D
-	for <sender@mailbox4.mailhost.com>; Thu,  4 Feb 2016 22:01:17 +0000 (UTC)
-X-Spam-Flag: NO
-X-Spam-Score: 0
-X-Spam-Level: 
-X-Spam-Status: No, score=0 tagged_above=-999 required=3
-	tests=[RCVD_IN_DNSWL_NONE=-0.0001]
-Received: from mx8.mailhost.com ([127.0.0.1])
-	by localhost (mail8.mailhost.com [127.0.0.1]) (amavisd-new, port 10024)
-	with ESMTP id 3cj6d28FXsS3 for <sender@mailbox4.mailhost.com>;
-	Thu,  4 Feb 2016 22:01:17 +0000 (UTC)
-Received: from smtp.mailhost.com (smtp.mailhost.com [74.55.86.74])
-	by mx8.mailhost.com (Postfix) with ESMTP id 527D76F1529
-	for <paperless@example.com>; Thu,  4 Feb 2016 22:01:17 +0000 (UTC)
-Received: from [10.114.0.19] (nl3x.mullvad.net [46.166.136.162])
-	by smtp.mailhost.com (Postfix) with ESMTP id 9C52420C6FDA
-	for <paperless@example.com>; Thu,  4 Feb 2016 22:01:16 +0000 (UTC)
-To: paperless@example.com
-From: Daniel Quinn <sender@example.com>
-Subject: Test 0
-Message-ID: <56B3CA2A.6030806@example.com>
-Date: Thu, 4 Feb 2016 22:01:14 +0000
-User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101
- Thunderbird/38.5.0
-MIME-Version: 1.0
-Content-Type: multipart/mixed;
- boundary="------------090701020702030809070008"
-
-This is a multi-part message in MIME format.
--------------090701020702030809070008
-Content-Type: text/plain; charset=utf-8
-Content-Transfer-Encoding: 7bit
-
-The secret word is "paperless" :-)
-
--------------090701020702030809070008
-Content-Type: application/pdf;
- name="test0.pdf"
-Content-Transfer-Encoding: base64
-Content-Disposition: attachment;
- filename="test0.pdf"
-
-JVBERi0xLjQKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0
-ZURlY29kZT4+CnN0cmVhbQp4nFWLQQvCMAyF7/kVOQutSdeuHZSA0+3gbVDwIN6c3gR38e/b
-bF4kkPfyvReyjB94IyFVF7pgG0ze4TLDZYevLamzPKEvEFqbMEZfq+WO+5GRHZbHNROLy+So
-UfFi6g7/RyusEpUl9VsQxQTlHR2oV3wUEzOdhOnXG1aw/o1yK2cYCkww4RdbUCevCmVuZHN0
-cmVhbQplbmRvYmoKCjMgMCBvYmoKMTM5CmVuZG9iagoKNSAwIG9iago8PC9MZW5ndGggNiAw
-IFIvRmlsdGVyL0ZsYXRlRGVjb2RlL0xlbmd0aDEgMTA4MjQ+PgpzdHJlYW0KeJzlOWt0G9WZ
-95uRbNmWLckPWY4SaRTFedmybI8T4rw8sS3ZiZ1YfqWSCbFkS7YEtiQkJSE8GlNeOQ5pUmh5
-Zkt2l+XQNl3GhLaBpcWw0D19UGALLRRS0gM9nD0lxVBK9wCx97tXI0UJAc727L8d+c587/u9
-7p0rOZXYEyJaMkV4Io1OBuLOqmqBEPJLQqB0dG9K2NRTsQHhM4Rw/zkWH5+870e7PiRE9Rgh
-+Y+NT+wf+/b3e4YI0YYJKX41HAoEfxj6vUjIIgltrA0jYef8/nzEr0F8WXgydY2bP7QO8WOI
-SxOx0cDxxbUmxN9AfOlk4Jr4apWLI8SMKBGigcmQpYXrRBx9KtobjyVTQbJsgZDl91B+PBGK
-d9838hzipwjhjyIN8EMvLYJ5FOd4lTovX1NQWKQtLtGR/3eX+jCpIJ3qTURH4ux+wcWfIFXk
-XkIW3qXY+ft898LH/5deaNKPe8hD5DFymLxGrlAYbuIhEbIHKbnX0+QlpNLLQ4bId8n055g9
-QU4hPy3nJ0doJJe8PORucpL8xwWzeMgkuQ59+QF5DRrIz7BVYuQD0JAbyXNo9QOkbb+UKa4E
-b2MMHMuhvk7u5w6RbdzbiNxLOZyT05NnyTHYjZZTGOfhbMQbP2P0NnID3vtJmOxFmF3qTZ/+
-jhQs/AWjuoFsI18jW8hEjsaT8ABfiPUbIA9gTp9mNGeGmd/JX8n9kOPO3YnIN8g4jgBg7Nxh
-fsvnZOh/ffGDpBhW8dWk4FJcrono5j/mGhc+5JeRQjK4MJehLXQt/IUPzEdVw6rF6k2qX3zR
-HHnfUE2iNln44/x180H1DvVDWK2HcePouHzI5x0c6O/r9fTs2N7dtW1rZ4fb1d7WukVq2bxp
-44b1zesuW7umod5Z56hduWJ59TL7UpvVVG7Q60qKiwoLNPl5ahXPAakVZPC7ZL5aMLgDdpc9
-0OmoFVymcLuj1mV3+2UhIMj4UC23d3Yykj0gC35BXo6PQA7ZL0soOXaRpJSWlLKSoBc2ko10
-CrsgP99uF07BUK8X4cPtdp8gn2XwdgarljOkGBGbDTWYV9RbwSW794anXX70EWaKCtvsbaFC
-Ry2ZKSxCsAgheaU9PgMrNwMDuJWu9TMc0RTTaTFSVyAoe3q9rnazzeZz1G6VS+ztjEXamEk5
-r03OZyaFCHWdHBJmamenbz+lJyP+Gm3QHgzs8sp8AHWnedf09G2yoUZeZW+XV137tgkjD8m1
-9naXXEOtdvVl5+k6PyXI6mq9XZj+K8Fw7GffvZASUCh51fq/EgrKXJsMfV4bvcxuzPX0tNsu
-uKf904FTC1MjdkFvn57RaqfjLkw38XjRxKmFJw6ZZfftPlnvD8N6nxK6u69LLuu93Ctz1W4h
-HEAK/rXYbevMNkNWxvN5bIJpweRghm02moZDpyQygog81etN4wIZMT9KJGeNT+b8lDOb4VQM
-Us5UhpNV99uxtl393mlZVb01aHdhxg8F5KkR7K4raWHsernkI7PNPl1qEJqdPiYroFdbgxFB
-Vi/HJKFWrgL2DVWZ1jOk5KP046wZJ1huKBWa7WiG2nHZXX7lb2/YhAYETHRnTboRBryy1I6A
-FFAq5pqpd6JGwI8Fi7SzYspOe1wut7dmq0vdckX6vUxFUZPL22TiH1W0ZKeLrSvBNe1vT7tA
-bdl7vY8TceHMTJNgPimSJuJrp8LGNuyy5a5pb3BMtvrNQVx3Y4LXbJMlH1bYZ/eGfLTtMEOr
-zphZc/hYrwx4u/rtXb1D3nWKI2kGNaeqdl1kxu41p81gA8qaao3g5cy8DwX1SBDcCNhbN+Jd
-zq/W4NBjwhmVNm7rRsELZpKRRjfkVYIr1K7IUfwCo2raTm2dGWt5FEU7bZ1mm8+Wvhy1HLIF
-ZWLU0NCkdmZYuE0hQ4P92dbJSDSXJtr0gtcesvvsYUGWPF4aG00Py7KSDJZzpVYDF2A5ycI0
-ERuyMwhNpuyuMecmV+5geBbtvIi9NcMWpjX2rv5patyuGCTo+VaZ0BaW1hnMbC+gC9qOe6+g
-xyXNFvT0jCTRxRxeT43Ytwan7f3ejUwa95MbzNfSuUpJF3QNtDpqcWtrnbHDwd4ZCQ72D3kf
-1+O58OCA91EOuDZ/q29mGfK8jwv40mBUjlIpkSICRailPkQ0TN78uETIFOOqGIHho6eAMJom
-QwMyeopL0/TpiZaziSTCIUeV5kgZaRXSNGnaFKOxa4bQlEmFakkjFUharpgzzwAlPYqUJ/Ac
-WwDkpBaKwTyDWn2MfAqmZgokc1piCiWktIcHB89PPTjkPanFt7OZ3XGiVnphu5jCWGx8rbiE
-IG2U633hab+PLjZixNLgH8hg34xlsm9GR/K0cqE91CoX2VspvYXSW9L0PErPxxYFI6D6FNbe
-IwPtgMu9NlySwqKfmaf1Z2mlfLipTOv/6MCMVeP3hqfxDFoOG6XTpVwRp+ErjFqigQJeoykw
-8AW831fAl3KEG/aR0hYj6IxwxghPGeGIEQ4YYdgISBQY/ao5I7xghOOMFzdCjxGsjJGmy0Z4
-gLFiTE0yQj0TIEZ4k3GnGL2eUTYssHnSakcYo4fx5hhdzsyRVhCYzhwzNMummWJcdM2ZmeOK
-7HV15koo1+6L6J/hUB5pqTEQ0cTuBtHkHN59hWgohcpmg9hQb1tzmcG+VAd2g81gX1EHNWCo
-rIANr4jnrjC3qY61my0/v6bhlTVm1d3lL8GG+edeyi/65CrzGnqgAlKOJ7c/4neCJeQJaT8p
-L68qLikpqCqwWJcs8viWkHJEKqs8Pm1lRRnHqdWGPp9af9wKZ6wwawW9FYgVmhE5aoW4FfxW
-8FhBskK9FQQrWBkbWVMZLrJeZJqyFY7n0HOTk0hckAAldoy6RaSAyNJQCs0Ye/rTUA/l+ZtB
-bDRWYOA0G032pfkKuGKNDdz5nT9qufb6xPxVNzy0+6YD88F9t0Mj/1G4btXGr9927q4qh6OK
-231iybkyCqk5kwMXTg2eT0vV3aQIvy39gzRGtNo8g6HSyBf0+wgPep6vkCpKPb4KndagM3h8
-uorySlBVQvOHlXC0Erh4JfgrwVMJUiXMVoJcCccZKlSCvhJIJcwxCormSl7YIzQFwywL2fKT
-RSb9r7D4LAEGUQk+z750+ZqmtZgA/nzQ10mOWkmqdUiF/zhfdfwWqFG9mcalT9bTOHmhiq7B
-gYV3uV/zz5GVxCc12fLLFxVjS6xaXWzjKystHp+5Us8XeXz5vHFqNcRXg381eFaDsBoeWQ3D
-q6FnNWT8JVgewmpUSrA26QKhg1kPV6wRK41i45omJ9RxzN3KCvuK5faleRXlxkoLz/165vvu
-79Q7GrqueeZeX2hX43eOjt/vXL0m0Tu4fcedQy120Nx+dEnpOze1P3Rt0xJb+6j7+iPW5yed
-nvbmHYsa69p20q8ZpHPhXf5q/mlixt1lUmoxaKqrVYJWW6Xi8di/tHBpr89UYTAsxooZrAZO
-yxsMRFNozFdhjBWkwuMj+qkVMLwCpBWAwBVYBEw+MbEhljY708knzawn0yvQoESp9N8KDNbQ
-tBlaYE3TcrYu16yF/BKoKBcb114GL933jT3z82WJmfe3Hr/ncMe2YP/Sdf8E5KZbh4+0jzby
-T3/1a+duqXLsToBp93VbeNWdgV3OPc/b5y0q9e6obDWxNYs1c6huJEbSIa0oLCnJL+P5SpNK
-W6T1+Aryi3S4pg29PmJ8wASyCVpM4DTRMiUybSSKivfNpc2NjbSH1NhABvuaFhArxAq7oRzr
-dFlFCcAO//B1N4RafvvbDfXr++03lyfGuTsdK155ZeDcgS2t+i0mK8u5B3Puxh6qIIvJYWmo
-CkC3SFOhq1hiqSKY6CprFSa6qkpbWmr0+Er1WnWvT2uctYBsgeMWOGqBKQvELeC3gMcCxAKb
-8SFZoN4CggX0FphjciiU2R2yO+MVSnFoRUzOzMJINx5bGxXlFqBpx2CwBQ3YdYKhArDlbE3L
-QbXpwPjab9bX/8vO13/xq6cgMn93OAZ37ILXSqfv9ZQWrbPWvQvqjz6YH+uDYw8/ePJeGus2
-jPUd3C/LcMecknrKVUWkqkqv0lusZXqPrwz3A4yY5GOD5eurUIGr7PVxRtwGO3J3RsI2wSlG
-SQN+RldWvxLk+Z0v04HnNz4WXnWeXTA0leJKWr4JcNHT9gNWPMNyu8D9+uq75w/87uWJWN63
-oT01/9/z1qmbrx7yJeY/dQ/BH/4GUGm75UOT4+PHqxzw/E/+bQX3joHVcwfG+CjWsxA77Anp
-RoO6iKhJpUlT4vFp9Fy5BwMSTEBMcMYEHhPUm0BvgjmGvmiCWdZ1x01w1ARTJoibwG8CyQRp
-lQ0PMJKHkeoZVc8YufrHmWZaDe9XfO6bMbtdZpdpNkFYfL0tsy/mNyn7DPYC/+h858uvvvrG
-b3732FdvvWnPvhtvnoLX5w3z7//507/95dVnnjjz1o+fTb8baR52YB6MxC9txCwY1UbMgg7f
-hhq9sZwv7/XxRvR8c24kcyyGdABIf8QEw3TxZd3fnd3MxVxfq7E/BQPbFA10UxTSa5Df0XBi
-aP6y/3rttuOX1fSn5j/85+/dMdG8bBW8/6dz1vmPH3LOh1/+gY36akZfT/Mn0NdvScOktFil
-KigtqDSpy4xl2IpGnQqPpX2+Yr1RW4D+Vxxn2Z7NJL/5TE49CCtgtm5yJpw0RTBBbtpzX9NE
-eUUrj5yXNH0H0K5UenQFXY1VtGOh+fj1E18Hcd/8nzUdT7TMXQMW0J6wcu9UOT69r8rRvaIZ
-yrkxfFPRGPGdnFeF9WiAR6UFgzZv8WIbWbnS4bBpebGxoc7ja9CttC02aB01Do/PqqupqMrL
-Kygo7/MV6FfgMYev7vPx+r0i7BRhrQjLRDCKkCfCRyK8LcLLIvxUhAdFuEuEERHAI0K7CPVM
-rlwElQjhuYzgYyKkRJBEaGJs5H0owusizIogMxs3ixAUFRNpGX1G7EURnhXheyIcZWJXibBB
-BCEzx7r0BMdF8IswkJmjnGm+zTS/KcIUTi/V5PDNTPdt5gAnM4E4mx5n1YmgUdbL8BcfMy88
-heYcxM6r5wjlbE6Z45lyPsuc0CqzJzTWAOyEVknvVZA9ppVw+edPbcsvOrZ1PSy59izZ/kL7
-3P75wduPL3K5WioMh+dbDw0Oem86PL9z3z4o4/0165uaa1rn/6Qc5LwnNIXFqrVbMmi/b8m5
-quyBh/WRE5vhD9hHi8msdAMpKzMVabX5pvwllsV40l2sK0PEaPL4Co0VpbRt9LRtHrTA2xZ4
-1gL4QlFZoBmRb1ogZYGgBQYs0G6BJgsss4CZsfHNxuW+1/Bt9qIFsq+8LD03o8N/18n3wnPv
-RRls3/6v69Pn3t7BITz4Xnn11aDl/bXN2WOvt39YOfcq58HbFt6C/eQVPPeapCKSl6ct5gvu
-v5wvIy3KmRP3qpwDJ+x3NTW53KLo3tXQ2dkgut3s/y30Pzblq28Z1m38K2dN/9b/yzuXdJ7/
-JXfhrbwqNf0FXJMloV6+bd5FvpJLueDS5zXjN8a3SLWKkHKumdTwS8gAR397Pkw6ES/Hpwd5
-23DsQHgHPs2oU4NPJ0eUX9KfgR3wDLcaP8e4t/kh/pcqj+ohtSlvY97P895VZtWTRhoDi0SP
-/bILgX/nf0p4xrVANOvbzqyfgJI7FZgj+WRMgXk8i04qsAplDiqwmpSQexQ4j+jIQwqcT64l
-P1BgDX43dipwASmBNgUuhCj0KnARWcw9lf0vVx33ugIXkzV8gQKXkEX8Zuq9iv46f4L3KjAQ
-QaVSYI6UqJYpME/WqhoVWIUyYQVWk8WqgwqcRyyqBxU4n3yoekaBNWSl+ocKXEAWq3+vwIXc
-G+qPFbiIrNP8RoG1ZFdBiQIXkysLrlTgEtJU8HJ7ZDySilwbCgrBQCogjMbi+xOR8XBKWDm6
-Smisb6gXOmKx8YmQ0BZLxGOJQCoSi9YVtl0s1ij0oYnOQKpW2BodreuOjITSskJ/KBEZ6wuN
-75kIJLYkR0PRYCghOISLJS7Gd4YSSYo01tXX1zWc514sHEkKASGVCARDk4HEVUJs7EJHhERo
-PJJMhRJIjESFwbr+OsETSIWiKSEQDQoDWcWesbHIaIgRR0OJVACFY6kwunrlnkQkGYyM0tmS
-ddkIctLRnwrtDQnbA6lUKBmLtgaSOBd6NhCJxpK1wr5wZDQs7AskhWAoGRmPInNkv3ChjoDc
-AMYSjcb2osm9oVr0eywRSoYj0XEhSUNWtIVUOJCiQU+GUonIaGBiYj/WbDKOWiNYpH2RVBgn
-ngwlhR2hfUJfbDIQ/W5d2hXMzRgmVYhMxhOxvcxHR3I0EQpFcbJAMDASmYik0Fo4kAiMYsYw
-bZHRJMsIJkKIB6IO155ELB5CT7/S0X1eEB1MZzMZm9iLM1PpaCgUpDOi23tDE6iEE0/EYlfR
-eMZiCXQ0mAo7cjwfi0VTqBoTAsEgBo7Zio3umaR1wjSnMs4FRhMx5MUnAim0MpmsC6dS8fVO
-5759++oCSmlGsTJ1aNn5RbzU/nhIqUeCWpmc6MbyR2np9rD60iD6t3YLPXHMjxudExSBWiHT
-mg11DcoUmMZIPJWsS0Ym6mKJcWePu5u0kwgZx5HCcS0JkSARcAQQDyA0SmIkTvaTBJMKI1Ug
-K5G6Cp+NpJ404BBIB0rFkD+B+gJpQziBWvQeYHZjJErq8FtE25daa0SoT/Gik2nXIrQV9UfR
-QjfqjSA3165A+hklgvss1Rwne9CPAFK2kCRqhVAmyCQE4sDxZTa+jL+TQckspxH9qsdPHXp/
-Kd0vsxxBWwLLdYpxqK+TzP+rkBZDvS/KiIByIVa/JHJCDAsyq9T2IEr0MykP06S5SLHZokxq
-4BIz9uCMY6g/ymqZkRxltmlPpC3HEA4rWb0SM55gHgSZXia2JM782Rpcujv6mXd72ZzbGZ3i
-ScZrRTypxJXO2QDzIoZUmot96AmdN8zgAMtnkGnTLosqmiPYd8IXziMougGlLlE2x17FS6pT
-q+R7jN2TbN4oziEw/9JVvnBugeUpwLKervQkclNMdhTpE/jZr6yzScxKeq4RZSXtY+syrEQ8
-yewKZAc+97GuiLG6RW1LWY3PZyXdN2NKpwpMN45wjEWRyaOD1YZGEmKeUijA1v4IakywudO+
-hVl3BFhtQ0qtUyyCTL6CSqTU6zijOIiL9QVd8SElp1/BnaL7khbTGcztTVqTCeZvMsd2lHkb
-zMaYzjaVmlBmSkc8wXakq7L1GWP9ls5okFlzfE7Ox1huUsqsMeZRED/piqd7K4a6e1g90usp
-3c2pz2QuwPIbU/TibF9KKb5MsvURZh0YJ+vxbOlE7+injvVh7qoZVdZMneKz8+/Wo37FWQZz
-10ci68sk+titrP5odtXtyVm/mUr04x7UzfaLuNI/biVzwkUW6Kq5eNdsYPvlhVGkuzGCeIr5
-k2S5rGMxjCO/B2foZufo9DcHG/p0iWumwLNlBEIEIAzjpIxYwU92wDAZhC1kE0j4lJDXis82
-xOmzDjaRKZTbhPTNiG9E+gbcPK14b8HRg+MIDhWOtEQ9Sjjx6VRwB+K1qPEC3oENSm1BKn1u
-Q7wTnx3K0410Fz5dCr4VcXwSP+TjQbyF3Z8ClXQSzpyDF86BcA4OfAKeT2Dqg6MfcO/PrbI+
-MvfUHNfz3vB7j7zH178HuvdAQ87qz3rO+s/Gzx4/m1eoexe05E9geOvMOuubm04P/n7TG4Pk
-NEZ2uv605/TUafm0+jTwg2/wRqt+Vpitn43PTs2+OHtmdm5WM/WToz/hfvyk06p70vokZz3Z
-c/LASd7/MOgetj7Mee73388dPQa6Y9ZjzmP8fffWWe/tsFjvvmuF9cxdc3dxpxZmT95VbHA/
-CT3QTTZhDnec5Besj2ypgO0Ylg7vVhxOHD04YjiO4MDvPShuxeGEbmkdP/wtKLrDfEfNHdfd
-cegOdfzWqVuP3spP3XL0Fu6RvU/t5ZKeVdZYtMYa7VhtrRJNg/kiP5iH0+Ds0taR6pVu/7Bk
-HUahy4fqrUMdq6xlYumgGgNWoaCOt/ItfA8f44/wT/H5mj6PxdqL44xnzsNJngKtW9dj7XH2
-8KcWzkihLhta2xbfNrWN3+peZe3sWGfVdVg7nB0vdLzZ8V5H3nAHPIB/7kfcT7l5yb3K6Zbc
-Fpt7cad50ChWDBpAN6gXdYMcYKFFMujULeg4nW5Yd0DH60gL4aaMoIZTcHRmoL+mputU/kJf
-l6zxXC7DQbm6n96l3iE576BMBocu984AfN13y+HDpHVJl9zY75X9S3xdchABiQJTCOiXzBhJ
-qy+ZTNWwC2pqEN6Dd1KzpwaJu5NpKsnySU0SkrhHJZkS1FCBNA54r6E8JFA9QO3dSUJvlFmT
-VqLaScUcU07fGGDa/T/LhW2oCmVuZHN0cmVhbQplbmRvYmoKCjYgMCBvYmoKNjI5MQplbmRv
-YmoKCjcgMCBvYmoKPDwvVHlwZS9Gb250RGVzY3JpcHRvci9Gb250TmFtZS9CQUFBQUErTGli
-ZXJhdGlvblNlcmlmCi9GbGFncyA0Ci9Gb250QkJveFstNTQzIC0zMDMgMTI3NyA5ODFdL0l0
-YWxpY0FuZ2xlIDAKL0FzY2VudCA4OTEKL0Rlc2NlbnQgLTIxNgovQ2FwSGVpZ2h0IDk4MQov
-U3RlbVYgODAKL0ZvbnRGaWxlMiA1IDAgUgo+PgplbmRvYmoKCjggMCBvYmoKPDwvTGVuZ3Ro
-IDI5Mi9GaWx0ZXIvRmxhdGVEZWNvZGU+PgpzdHJlYW0KeJxdkctuwyAQRfd8Bct0EfmROA/J
-spQmseRFH6rbD3BgnCLVGGGy8N+XmUlbqQvQmZl7BxiSY3NqrAnJqx9VC0H2xmoP03jzCuQF
-rsaKLJfaqHCPaFdD50QSve08BRga249lKZK3WJuCn+XioMcLPIjkxWvwxl7l4uPYxri9OfcF
-A9ggU1FVUkMf+zx17rkbICHXstGxbMK8jJY/wfvsQOYUZ3wVNWqYXKfAd/YKokzTSpZ1XQmw
-+l8tK9hy6dVn56M0i9I0LdZV5Jx4s0NeMe+R18TbFXJBnKfIG9ZkyFvWUJ8d5wvkPTPlD8w1
-8iMz9Tyyl/Qnzp+Qz8xn5JrPPdOj7rfH5+H8f8Ym1c37ODL6JJoVTslY+P1HNzp00foG7l+O
-gwplbmRzdHJlYW0KZW5kb2JqCgo5IDAgb2JqCjw8L1R5cGUvRm9udC9TdWJ0eXBlL1RydWVU
-eXBlL0Jhc2VGb250L0JBQUFBQStMaWJlcmF0aW9uU2VyaWYKL0ZpcnN0Q2hhciAwCi9MYXN0
-Q2hhciAxNQovV2lkdGhzWzc3NyA2MTAgNTAwIDI3NyAzODkgMjUwIDQ0MyAyNzcgNDQzIDUw
-MCA1MDAgNDQzIDUwMCA3NzcgNTAwIDI1MApdCi9Gb250RGVzY3JpcHRvciA3IDAgUgovVG9V
-bmljb2RlIDggMCBSCj4+CmVuZG9iagoKMTAgMCBvYmoKPDwvRjEgOSAwIFIKPj4KZW5kb2Jq
-CgoxMSAwIG9iago8PC9Gb250IDEwIDAgUgovUHJvY1NldFsvUERGL1RleHRdCj4+CmVuZG9i
-agoKMSAwIG9iago8PC9UeXBlL1BhZ2UvUGFyZW50IDQgMCBSL1Jlc291cmNlcyAxMSAwIFIv
-TWVkaWFCb3hbMCAwIDU5NSA4NDJdL0dyb3VwPDwvUy9UcmFuc3BhcmVuY3kvQ1MvRGV2aWNl
-UkdCL0kgdHJ1ZT4+L0NvbnRlbnRzIDIgMCBSPj4KZW5kb2JqCgo0IDAgb2JqCjw8L1R5cGUv
-UGFnZXMKL1Jlc291cmNlcyAxMSAwIFIKL01lZGlhQm94WyAwIDAgNTk1IDg0MiBdCi9LaWRz
-WyAxIDAgUiBdCi9Db3VudCAxPj4KZW5kb2JqCgoxMiAwIG9iago8PC9UeXBlL0NhdGFsb2cv
-UGFnZXMgNCAwIFIKL09wZW5BY3Rpb25bMSAwIFIgL1hZWiBudWxsIG51bGwgMF0KL0xhbmco
-ZW4tR0IpCj4+CmVuZG9iagoKMTMgMCBvYmoKPDwvQ3JlYXRvcjxGRUZGMDA1NzAwNzIwMDY5
-MDA3NDAwNjUwMDcyPgovUHJvZHVjZXI8RkVGRjAwNEMwMDY5MDA2MjAwNzIwMDY1MDA0RjAw
-NjYwMDY2MDA2OTAwNjMwMDY1MDAyMDAwMzUwMDJFMDAzMD4KL0NyZWF0aW9uRGF0ZShEOjIw
-MTYwMjA0MjIwMDAyWicpPj4KZW5kb2JqCgp4cmVmCjAgMTQKMDAwMDAwMDAwMCA2NTUzNSBm
-IAowMDAwMDA3NTA5IDAwMDAwIG4gCjAwMDAwMDAwMTkgMDAwMDAgbiAKMDAwMDAwMDIyOSAw
-MDAwMCBuIAowMDAwMDA3NjUyIDAwMDAwIG4gCjAwMDAwMDAyNDkgMDAwMDAgbiAKMDAwMDAw
-NjYyNSAwMDAwMCBuIAowMDAwMDA2NjQ2IDAwMDAwIG4gCjAwMDAwMDY4NDEgMDAwMDAgbiAK
-MDAwMDAwNzIwMiAwMDAwMCBuIAowMDAwMDA3NDIyIDAwMDAwIG4gCjAwMDAwMDc0NTQgMDAw
-MDAgbiAKMDAwMDAwNzc1MSAwMDAwMCBuIAowMDAwMDA3ODQ4IDAwMDAwIG4gCnRyYWlsZXIK
-PDwvU2l6ZSAxNC9Sb290IDEyIDAgUgovSW5mbyAxMyAwIFIKL0lEIFsgPDRFN0ZCMEZCMjA4
-ODBCNURBQkIzQTNEOTQxNDlBRTQ3Pgo8NEU3RkIwRkIyMDg4MEI1REFCQjNBM0Q5NDE0OUFF
-NDc+IF0KL0RvY0NoZWNrc3VtIC8yQTY0RDMzNzRFQTVEODMwNTRDNEI2RDFEMUY4QzU1RQo+
-PgpzdGFydHhyZWYKODAxOAolJUVPRgo=
--------------090701020702030809070008--
--- a/src/documents/tests/test_api.py
+++ b/src/documents/tests/test_api.py
@@ -0,0 +1,217 @@
+import os
+import shutil
+import tempfile
+from unittest import mock
+
+from django.contrib.auth.models import User
+from django.test import override_settings
+from rest_framework.test import APITestCase
+
+from documents.models import Document, Correspondent, DocumentType, Tag
+
+
+class DocumentApiTest(APITestCase):
+
+    def setUp(self):
+        self.scratch_dir = tempfile.mkdtemp()
+        self.media_dir = tempfile.mkdtemp()
+        self.originals_dir = os.path.join(self.media_dir, "documents", "originals")
+        self.thumbnail_dir = os.path.join(self.media_dir, "documents", "thumbnails")
+
+        os.makedirs(self.originals_dir, exist_ok=True)
+        os.makedirs(self.thumbnail_dir, exist_ok=True)
+
+        override_settings(
+            SCRATCH_DIR=self.scratch_dir,
+            MEDIA_ROOT=self.media_dir,
+            ORIGINALS_DIR=self.originals_dir,
+            THUMBNAIL_DIR=self.thumbnail_dir
+        ).enable()
+
+        user = User.objects.create_superuser(username="temp_admin")
+        self.client.force_login(user=user)
+
+    def tearDown(self):
+        shutil.rmtree(self.scratch_dir, ignore_errors=True)
+        shutil.rmtree(self.media_dir, ignore_errors=True)
+
+    def testDocuments(self):
+
+        response = self.client.get("/api/documents/").data
+
+        self.assertEqual(response['count'], 0)
+
+        c = Correspondent.objects.create(name="c", pk=41)
+        dt = DocumentType.objects.create(name="dt", pk=63)
+        tag = Tag.objects.create(name="t", pk=85)
+
+        doc = Document.objects.create(title="WOW", content="the content", correspondent=c, document_type=dt, checksum="123")
+
+        doc.tags.add(tag)
+
+        response = self.client.get("/api/documents/", format='json')
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.data['count'], 1)
+
+        returned_doc = response.data['results'][0]
+        self.assertEqual(returned_doc['id'], doc.id)
+        self.assertEqual(returned_doc['title'], doc.title)
+        self.assertEqual(returned_doc['correspondent']['name'], c.name)
+        self.assertEqual(returned_doc['document_type']['name'], dt.name)
+        self.assertEqual(returned_doc['correspondent']['id'], c.id)
+        self.assertEqual(returned_doc['document_type']['id'], dt.id)
+        self.assertEqual(returned_doc['correspondent']['id'], returned_doc['correspondent_id'])
+        self.assertEqual(returned_doc['document_type']['id'], returned_doc['document_type_id'])
+        self.assertEqual(len(returned_doc['tags']), 1)
+        self.assertEqual(returned_doc['tags'][0]['name'], tag.name)
+        self.assertEqual(returned_doc['tags'][0]['id'], tag.id)
+        self.assertListEqual(returned_doc['tags_id'], [tag.id])
+
+        c2 = Correspondent.objects.create(name="c2")
+
+        returned_doc['correspondent_id'] = c2.pk
+        returned_doc['title'] = "the new title"
+
+        response = self.client.put('/api/documents/{}/'.format(doc.pk), returned_doc, format='json')
+
+        self.assertEqual(response.status_code, 200)
+
+        doc_after_save = Document.objects.get(id=doc.id)
+
+        self.assertEqual(doc_after_save.correspondent, c2)
+        self.assertEqual(doc_after_save.title, "the new title")
+
+        self.client.delete("/api/documents/{}/".format(doc_after_save.pk))
+
+        self.assertEqual(len(Document.objects.all()), 0)
+
+    def test_document_actions(self):
+
+        _, filename = tempfile.mkstemp(dir=self.originals_dir)
+
+        content = b"This is a test"
+        content_thumbnail = b"thumbnail content"
+
+        with open(filename, "wb") as f:
+            f.write(content)
+
+        doc = Document.objects.create(title="none", filename=os.path.basename(filename), file_type="pdf")
+
+        with open(os.path.join(self.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb") as f:
+            f.write(content_thumbnail)
+
+        response = self.client.get('/api/documents/{}/download/'.format(doc.pk))
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.content, content)
+
+        response = self.client.get('/api/documents/{}/preview/'.format(doc.pk))
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.content, content)
+
+        response = self.client.get('/api/documents/{}/thumb/'.format(doc.pk))
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.content, content_thumbnail)
+
+    def test_document_actions_not_existing_file(self):
+
+        doc = Document.objects.create(title="none", filename=os.path.basename("asd"), file_type="pdf")
+
+        response = self.client.get('/api/documents/{}/download/'.format(doc.pk))
+        self.assertEqual(response.status_code, 404)
+
+        response = self.client.get('/api/documents/{}/preview/'.format(doc.pk))
+        self.assertEqual(response.status_code, 404)
+
+        response = self.client.get('/api/documents/{}/thumb/'.format(doc.pk))
+        self.assertEqual(response.status_code, 404)
+
+    def test_document_filters(self):
+
+        doc1 = Document.objects.create(title="none1", checksum="A")
+        doc2 = Document.objects.create(title="none2", checksum="B")
+        doc3 = Document.objects.create(title="none3", checksum="C")
+
+        tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True)
+        tag_2 = Tag.objects.create(name="t2")
+        tag_3 = Tag.objects.create(name="t3")
+
+        doc1.tags.add(tag_inbox)
+        doc2.tags.add(tag_2)
+        doc3.tags.add(tag_2)
+        doc3.tags.add(tag_3)
+
+        response = self.client.get("/api/documents/?is_in_inbox=true")
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['id'], doc1.id)
+
+        response = self.client.get("/api/documents/?is_in_inbox=false")
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 2)
+        self.assertEqual(results[0]['id'], doc2.id)
+        self.assertEqual(results[1]['id'], doc3.id)
+
+        response = self.client.get("/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 2)
+        self.assertEqual(results[0]['id'], doc1.id)
+        self.assertEqual(results[1]['id'], doc3.id)
+
+        response = self.client.get("/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['id'], doc3.id)
+
+        response = self.client.get("/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 0)
+
+        response = self.client.get("/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 3)
+
+    @mock.patch("documents.index.autocomplete")
+    def test_search_autocomplete(self, m):
+        m.side_effect = lambda ix, term, limit: [term for _ in range(limit)]
+
+        response = self.client.get("/api/search/autocomplete/?term=test")
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(len(response.data), 10)
+
+        response = self.client.get("/api/search/autocomplete/?term=test&limit=20")
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(len(response.data), 20)
+
+        response = self.client.get("/api/search/autocomplete/?term=test&limit=-1")
+        self.assertEqual(response.status_code, 400)
+
+        response = self.client.get("/api/search/autocomplete/")
+        self.assertEqual(response.status_code, 400)
+
+        response = self.client.get("/api/search/autocomplete/?term=")
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(len(response.data), 10)
+
+    def test_statistics(self):
+
+        doc1 = Document.objects.create(title="none1", checksum="A")
+        doc2 = Document.objects.create(title="none2", checksum="B")
+        doc3 = Document.objects.create(title="none3", checksum="C")
+
+        tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True)
+
+        doc1.tags.add(tag_inbox)
+
+        response = self.client.get("/api/statistics/")
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.data['documents_total'], 3)
+        self.assertEqual(response.data['documents_inbox'], 1)
--- a/src/documents/tests/test_checks.py
+++ b/src/documents/tests/test_checks.py
@@ -2,9 +2,9 @@ import unittest

 from django.test import TestCase

+from .factories import DocumentFactory
 from ..checks import changed_password_check
 from ..models import Document
-from .factories import DocumentFactory


 class ChecksTestCase(TestCase):
--- a/src/documents/tests/test_classifier.py
+++ b/src/documents/tests/test_classifier.py
@@ -0,0 +1,85 @@
+import tempfile
+
+from django.test import TestCase, override_settings
+
+from documents.classifier import DocumentClassifier
+from documents.models import Correspondent, Document, Tag, DocumentType
+
+
+class TestClassifier(TestCase):
+
+    def setUp(self):
+
+        self.classifier = DocumentClassifier()
+
+    def generate_test_data(self):
+        self.c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
+        self.c2 = Correspondent.objects.create(name="c2")
+        self.t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
+        self.t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True)
+        self.t3 = Tag.objects.create(name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45)
+        self.dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
+
+        self.doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=self.c1, checksum="A", document_type=self.dt)
+        self.doc2 = Document.objects.create(title="doc1", content="this is another document, but from c2", correspondent=self.c2, checksum="B")
+        self.doc_inbox = Document.objects.create(title="doc235", content="aa", checksum="C")
+
+        self.doc1.tags.add(self.t1)
+        self.doc2.tags.add(self.t1)
+        self.doc2.tags.add(self.t3)
+        self.doc_inbox.tags.add(self.t2)
+
+    def testNoTrainingData(self):
+        try:
+            self.classifier.train()
+        except ValueError as e:
+            self.assertEqual(str(e), "No training data available.")
+        else:
+            self.fail("Should raise exception")
+
+    def testEmpty(self):
+        Document.objects.create(title="WOW", checksum="3457", content="ASD")
+        self.classifier.train()
+        self.assertIsNone(self.classifier.document_type_classifier)
+        self.assertIsNone(self.classifier.tags_classifier)
+        self.assertIsNone(self.classifier.correspondent_classifier)
+
+        self.assertListEqual(self.classifier.predict_tags(""), [])
+        self.assertIsNone(self.classifier.predict_document_type(""))
+        self.assertIsNone(self.classifier.predict_correspondent(""))
+
+    def testTrain(self):
+        self.generate_test_data()
+        self.classifier.train()
+        self.assertListEqual(list(self.classifier.correspondent_classifier.classes_), [-1, self.c1.pk])
+        self.assertListEqual(list(self.classifier.tags_binarizer.classes_), [self.t1.pk, self.t3.pk])
+
+    def testPredict(self):
+        self.generate_test_data()
+        self.classifier.train()
+        self.assertEqual(self.classifier.predict_correspondent(self.doc1.content), self.c1.pk)
+        self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
+        self.assertTupleEqual(self.classifier.predict_tags(self.doc1.content), (self.t1.pk,))
+        self.assertTupleEqual(self.classifier.predict_tags(self.doc2.content), (self.t1.pk, self.t3.pk))
+        self.assertEqual(self.classifier.predict_document_type(self.doc1.content), self.dt.pk)
+        self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
+
+    def testDatasetHashing(self):
+
+        self.generate_test_data()
+
+        self.assertTrue(self.classifier.train())
+        self.assertFalse(self.classifier.train())
+
+    @override_settings(DATA_DIR=tempfile.mkdtemp())
+    def testSaveClassifier(self):
+
+        self.generate_test_data()
+
+        self.classifier.train()
+
+        self.classifier.save_classifier()
+
+        new_classifier = DocumentClassifier()
+        new_classifier.reload()
+        self.assertFalse(new_classifier.train())
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -1,8 +1,15 @@
+import os
 import re
+import shutil
+import tempfile
+from unittest import mock
+from unittest.mock import MagicMock

-from django.test import TestCase
+from django.test import TestCase, override_settings

-from ..models import FileInfo, Tag
+from ..consumer import Consumer, ConsumerError
+from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
+from ..parsers import DocumentParser, ParseError


 class TestAttributes(TestCase):
@@ -394,3 +401,254 @@ class TestFieldPermutations(TestCase):
            self.assertEqual(info.created.year, 2019)
            self.assertEqual(info.created.month, 9)
            self.assertEqual(info.created.day, 8)
+
+
+class DummyParser(DocumentParser):
+
+    def get_thumbnail(self):
+        # not important during tests
+        raise NotImplementedError()
+
+    def __init__(self, path, logging_group, scratch_dir):
+        super(DummyParser, self).__init__(path, logging_group)
+        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
+
+    def get_optimised_thumbnail(self):
+        return self.fake_thumb
+
+    def get_text(self):
+        return "The Text"
+
+
+class FaultyParser(DocumentParser):
+
+    def get_thumbnail(self):
+        # not important during tests
+        raise NotImplementedError()
+
+    def __init__(self, path, logging_group, scratch_dir):
+        super(FaultyParser, self).__init__(path, logging_group)
+        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
+
+    def get_optimised_thumbnail(self):
+        return self.fake_thumb
+
+    def get_text(self):
+        raise ParseError("Does not compute.")
+
+
+class TestConsumer(TestCase):
+
+    def make_dummy_parser(self, path, logging_group):
+        return DummyParser(path, logging_group, self.scratch_dir)
+
+    def make_faulty_parser(self, path, logging_group):
+        return FaultyParser(path, logging_group, self.scratch_dir)
+
+    def setUp(self):
+        self.scratch_dir = tempfile.mkdtemp()
+        self.media_dir = tempfile.mkdtemp()
+        self.consumption_dir = tempfile.mkdtemp()
+
+        override_settings(
+            SCRATCH_DIR=self.scratch_dir,
+            MEDIA_ROOT=self.media_dir,
+            ORIGINALS_DIR=os.path.join(self.media_dir, "documents", "originals"),
+            THUMBNAIL_DIR=os.path.join(self.media_dir, "documents", "thumbnails"),
+            CONSUMPTION_DIR=self.consumption_dir
+        ).enable()
+
+        patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
+        m = patcher.start()
+        m.return_value = [(None, {
+            "parser": self.make_dummy_parser,
+            "test": lambda _: True,
+            "weight": 0
+        })]
+
+        self.addCleanup(patcher.stop)
+
+        self.consumer = Consumer()
+
+    def tearDown(self):
+        shutil.rmtree(self.scratch_dir, ignore_errors=True)
+        shutil.rmtree(self.media_dir, ignore_errors=True)
+        shutil.rmtree(self.consumption_dir, ignore_errors=True)
+
+    def get_test_file(self):
+        fd, f = tempfile.mkstemp(suffix=".pdf", dir=self.scratch_dir)
+        return f
+
+    def testNormalOperation(self):
+
+        filename = self.get_test_file()
+        document = self.consumer.try_consume_file(filename)
+
+        self.assertEqual(document.content, "The Text")
+        self.assertEqual(document.title, os.path.splitext(os.path.basename(filename))[0])
+        self.assertIsNone(document.correspondent)
+        self.assertIsNone(document.document_type)
+        self.assertEqual(document.filename, "0000001.pdf")
+
+        self.assertTrue(os.path.isfile(
+            document.source_path
+        ))
+
+        self.assertTrue(os.path.isfile(
+            document.thumbnail_path
+        ))
+
+        self.assertFalse(os.path.isfile(filename))
+
+    def testOverrideFilename(self):
+        filename = self.get_test_file()
+        override_filename = "My Bank - Statement for November.pdf"
+
+        document = self.consumer.try_consume_file(filename, override_filename=override_filename)
+
+        self.assertEqual(document.correspondent.name, "My Bank")
+        self.assertEqual(document.title, "Statement for November")
+
+    def testOverrideTitle(self):
+
+        document = self.consumer.try_consume_file(self.get_test_file(), override_title="Override Title")
+        self.assertEqual(document.title, "Override Title")
+
+    def testOverrideCorrespondent(self):
+        c = Correspondent.objects.create(name="test")
+
+        document = self.consumer.try_consume_file(self.get_test_file(), override_correspondent_id=c.pk)
+        self.assertEqual(document.correspondent.id, c.id)
+
+    def testOverrideDocumentType(self):
+        dt = DocumentType.objects.create(name="test")
+
+        document = self.consumer.try_consume_file(self.get_test_file(), override_document_type_id=dt.pk)
+        self.assertEqual(document.document_type.id, dt.id)
+
+    def testOverrideTags(self):
+        t1 = Tag.objects.create(name="t1")
+        t2 = Tag.objects.create(name="t2")
+        t3 = Tag.objects.create(name="t3")
+        document = self.consumer.try_consume_file(self.get_test_file(), override_tag_ids=[t1.id, t3.id])
+
+        self.assertIn(t1, document.tags.all())
+        self.assertNotIn(t2, document.tags.all())
+        self.assertIn(t3, document.tags.all())
+
+    def testNotAFile(self):
+        try:
+            self.consumer.try_consume_file("non-existing-file")
+        except ConsumerError as e:
+            self.assertTrue(str(e).endswith('It is not a file'))
+            return
+
+        self.fail("Should throw exception")
+
+    @override_settings(CONSUMPTION_DIR=None)
+    def testConsumptionDirUnset(self):
+        try:
+            self.consumer.try_consume_file(self.get_test_file())
+        except ConsumerError as e:
+            self.assertEqual(str(e), "The CONSUMPTION_DIR settings variable does not appear to be set.")
+            return
+
+        self.fail("Should throw exception")
+
+    @override_settings(CONSUMPTION_DIR="asd")
+    def testNoConsumptionDir(self):
+        try:
+            self.consumer.try_consume_file(self.get_test_file())
+        except ConsumerError as e:
+            self.assertEqual(str(e), "Consumption directory asd does not exist")
+            return
+
+        self.fail("Should throw exception")
+
+    def testDuplicates(self):
+        self.consumer.try_consume_file(self.get_test_file())
+
+        try:
+            self.consumer.try_consume_file(self.get_test_file())
+        except ConsumerError as e:
+            self.assertTrue(str(e).endswith("It is a duplicate."))
+            return
+
+        self.fail("Should throw exception")
+
+    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    def testNoParsers(self, m):
+        m.return_value = []
+
+        try:
+            self.consumer.try_consume_file(self.get_test_file())
+        except ConsumerError as e:
+            self.assertTrue(str(e).startswith("No parsers abvailable"))
+            return
+
+        self.fail("Should throw exception")
+
+    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    def testFaultyParser(self, m):
+        m.return_value = [(None, {
+            "parser": self.make_faulty_parser,
+            "test": lambda _: True,
+            "weight": 0
+        })]
+
+        try:
+            self.consumer.try_consume_file(self.get_test_file())
+        except ConsumerError as e:
+            self.assertEqual(str(e), "Does not compute.")
+            return
+
+        self.fail("Should throw exception.")
+
+    @mock.patch("documents.consumer.Consumer._write")
+    def testPostSaveError(self, m):
+        filename = self.get_test_file()
+        m.side_effect = OSError("NO.")
+        try:
+            self.consumer.try_consume_file(filename)
+        except ConsumerError as e:
+            self.assertEqual(str(e), "NO.")
+        else:
+            self.fail("Should raise exception")
+
+        # file not deleted
+        self.assertTrue(os.path.isfile(filename))
+
+        # Database empty
+        self.assertEqual(len(Document.objects.all()), 0)
+
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
+    def testFilenameHandling(self):
+        filename = self.get_test_file()
+
+        document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs")
+
+        print(document.source_path)
+        print("===")
+
+        self.assertEqual(document.title, "new docs")
+        self.assertEqual(document.correspondent.name, "Bank")
+        self.assertEqual(document.filename, "bank/new-docs-0000001.pdf")
+
+    @mock.patch("documents.consumer.DocumentClassifier")
+    def testClassifyDocument(self, m):
+        correspondent = Correspondent.objects.create(name="test")
+        dtype = DocumentType.objects.create(name="test")
+        t1 = Tag.objects.create(name="t1")
+        t2 = Tag.objects.create(name="t2")
+
+        m.return_value = MagicMock()
+        m.return_value.predict_correspondent.return_value = correspondent.pk
+        m.return_value.predict_document_type.return_value = dtype.pk
+        m.return_value.predict_tags.return_value = [t1.pk]
+
+        document = self.consumer.try_consume_file(self.get_test_file())
+
+        self.assertEqual(document.correspondent, correspondent)
+        self.assertEqual(document.document_type, dtype)
+        self.assertIn(t1, document.tags.all())
+        self.assertNotIn(t2, document.tags.all())
--- a/src/documents/tests/test_file_handling.py
+++ b/src/documents/tests/test_file_handling.py
@@ -1,17 +1,14 @@
-import datetime
 import os
 import shutil
-from unittest import mock
-from uuid import uuid4
 from pathlib import Path
-from shutil import rmtree
+from uuid import uuid4

-from dateutil import tz
+from django.conf import settings
 from django.test import TestCase, override_settings

-from django.utils.text import slugify
-from ..models import Tag, Document, Correspondent
-from django.conf import settings
+from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories
+from ..models import Document, Correspondent
+from ..signals.handlers import update_filename_and_move_files


 class TestDate(TestCase):
@@ -31,18 +28,6 @@ class TestDate(TestCase):
        for dirname in self.deletion_list:
            shutil.rmtree(dirname, ignore_errors=True)

-    @override_settings(PAPERLESS_FILENAME_FORMAT="")
-    def test_source_filename(self):
-        document = Document()
-        document.file_type = "pdf"
-        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        document.save()
-
-        self.assertEqual(document.source_filename, "0000001.pdf")
-
-        document.filename = "test.pdf"
-        self.assertEqual(document.source_filename, "test.pdf")
-
    @override_settings(PAPERLESS_FILENAME_FORMAT="")
    def test_generate_source_filename(self):
        document = Document()
@@ -50,58 +35,50 @@ class TestDate(TestCase):
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
        document.save()

-        self.assertEqual(document.generate_source_filename(), "0000001.pdf")
+        self.assertEqual(generate_filename(document), "{:07d}.pdf".format(document.pk))

        document.storage_type = Document.STORAGE_TYPE_GPG
-        self.assertEqual(document.generate_source_filename(),
-                         "0000001.pdf.gpg")
+        self.assertEqual(generate_filename(document),
+                         "{:07d}.pdf.gpg".format(document.pk))

-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_file_renaming(self):
        document = Document()
        document.file_type = "pdf"
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
        document.save()

-        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
+        # Test default source_path
+        self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/{:07d}.pdf".format(document.pk))

-        # Test source_path
-        self.assertEqual(document.source_path, settings.MEDIA_ROOT +
-                         "/documents/originals/none/none-0000001.pdf")
+        document.filename = generate_filename(document)
+
+        # Ensure that filename is properly generated
+        self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))

        # Enable encryption and check again
        document.storage_type = Document.STORAGE_TYPE_GPG
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf.gpg")
+        document.filename = generate_filename(document)
+        self.assertEqual(document.filename,
+                         "none/none-{:07d}.pdf.gpg".format(document.pk))
+
        document.save()

-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), True)
+        # test that creating dirs for the source_path creates the correct directory
+        create_source_path_directory(document.source_path)
+        Path(document.source_path).touch()
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True)

        # Set a correspondent and save the document
-        document.correspondent = Correspondent.objects.get_or_create(
-                name="test")[0]
+        document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
        document.save()

        # Check proper handling of files
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/test"), True)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), False)
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/test/test-0000001.pdf.gpg"), True)
-        self.assertEqual(document.generate_source_filename(),
-                         "test/test-0000001.pdf.gpg")
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
+        self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True)

-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_file_renaming_missing_permissions(self):
        document = Document()
        document.file_type = "pdf"
@@ -109,34 +86,67 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf")
-        document.create_source_directory()
+        document.filename = generate_filename(document)
+        self.assertEqual(document.filename,
+                         "none/none-{:07d}.pdf".format(document.pk))
+        create_source_path_directory(document.source_path)
        Path(document.source_path).touch()

        # Test source_path
-        self.assertEqual(document.source_path, settings.MEDIA_ROOT +
-                         "/documents/originals/none/none-0000001.pdf")
+        self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk))

        # Make the folder read- and execute-only (no writing and no renaming)
-        os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o555)
+        os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)

        # Set a correspondent and save the document
-        document.correspondent = Correspondent.objects.get_or_create(
-                name="test")[0]
+        document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
        document.save()

        # Check proper handling of files
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/none/none-0000001.pdf"), True)
-        self.assertEqual(document.source_filename,
-                         "none/none-0000001.pdf")
+        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True)
+        self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))

-        os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o777)
+        os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)

-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
+    def test_file_renaming_database_error(self):
+
+        document1 = Document.objects.create(file_type="pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA")
+
+        document = Document()
+        document.file_type = "pdf"
+        document.checksum = "BBBBB"
+        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
+        document.save()
+
+        # Ensure that filename is properly generated
+        document.filename = generate_filename(document)
+        self.assertEqual(document.filename,
+                         "none/none-{:07d}.pdf".format(document.pk))
+        create_source_path_directory(document.source_path)
+        Path(document.source_path).touch()
+
+        # Test source_path
+        self.assertTrue(os.path.isfile(document.source_path))
+
+        # Set a correspondent and save the document
+        document.correspondent = Correspondent.objects.get_or_create(
+            name="test")[0]
+
+        # This will cause save() to fail.
+        document.checksum = document1.checksum
+
+        # Assume saving the document initially works, this gets called.
+        # After renaming, an error occurs, and filename is not saved:
+        # document should still be available at document.filename.
+        update_filename_and_move_files(None, document)
+
+        # Check proper handling of files
+        self.assertTrue(os.path.isfile(document.source_path))
+        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True)
+        self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
+
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_document_delete(self):
        document = Document()
        document.file_type = "pdf"
@@ -144,21 +154,20 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf")
-        document.create_source_directory()
+        document.filename = generate_filename(document)
+        self.assertEqual(document.filename,
+                         "none/none-{:07d}.pdf".format(document.pk))
+
+        create_source_path_directory(document.source_path)
        Path(document.source_path).touch()

        # Ensure file deletion after delete
+        pk = document.pk
        document.delete()
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT +
-                         "/documents/originals/none/none-0000001.pdf"), False)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), False)
+        self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False)
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)

-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_document_delete_nofile(self):
        document = Document()
        document.file_type = "pdf"
@@ -167,8 +176,7 @@ class TestDate(TestCase):

        document.delete()

-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_directory_not_empty(self):
        document = Document()
        document.file_type = "pdf"
@@ -176,28 +184,24 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf")
-        document.create_source_directory()
+        document.filename = generate_filename(document)
+        self.assertEqual(document.filename,
+                         "none/none-{:07d}.pdf".format(document.pk))
+
+        create_source_path_directory(document.source_path)
+
        Path(document.source_path).touch()
-        Path(document.source_path + "test").touch()
+        important_file = document.source_path + "test"
+        Path(important_file).touch()

        # Set a correspondent and save the document
-        document.correspondent = Correspondent.objects.get_or_create(
-                name="test")[0]
+        document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
        document.save()

        # Check proper handling of files
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/test"), True)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), True)
-
-        # Cleanup
-        os.remove(settings.MEDIA_ROOT +
-                  "/documents/originals/none/none-0000001.pdftest")
-        os.rmdir(settings.MEDIA_ROOT + "/documents/originals/none")
+        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/test"), True)
+        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/none"), True)
+        self.assertTrue(os.path.isfile(important_file))

    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
    def test_tags_with_underscore(self):
@@ -212,13 +216,8 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "demo-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        document.delete()
+        self.assertEqual(generate_filename(document),
+                         "demo-{:07d}.pdf".format(document.pk))

    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
    def test_tags_with_dash(self):
@@ -233,13 +232,8 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "demo-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        document.delete()
+        self.assertEqual(generate_filename(document),
+                         "demo-{:07d}.pdf".format(document.pk))

    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
    def test_tags_malformed(self):
@@ -254,13 +248,8 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        document.delete()
+        self.assertEqual(generate_filename(document),
+                         "none-{:07d}.pdf".format(document.pk))

    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
    def test_tags_all(self):
@@ -274,64 +263,25 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "demo-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
+        self.assertEqual(generate_filename(document),
+                         "demo-{:07d}.pdf".format(document.pk))

-        document.delete()
-
-    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
-    def test_tags_out_of_bounds_0(self):
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
+    def test_tags_out_of_bounds(self):
        document = Document()
        document.file_type = "pdf"
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
        document.save()

-        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        document.delete()
-
-    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[10000000]}")
-    def test_tags_out_of_bounds_10000000(self):
-        document = Document()
-        document.file_type = "pdf"
-        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
+        # Add tag to document
+        document.tags.create(name="demo")
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
+        self.assertEqual(generate_filename(document),
+                         "none-{:07d}.pdf".format(document.pk))

-        document.delete()
-
-    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[99]}")
-    def test_tags_out_of_bounds_99(self):
-        document = Document()
-        document.file_type = "pdf"
-        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        document.save()
-
-        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        document.delete()
-
-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}/{correspondent}")
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
    def test_nested_directory_cleanup(self):
        document = Document()
        document.file_type = "pdf"
@@ -339,153 +289,34 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none/none-0000001.pdf")
-        document.create_source_directory()
+        document.filename = generate_filename(document)
+        self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk))
+        create_source_path_directory(document.source_path)
        Path(document.source_path).touch()

        # Check proper handling of files
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none/none"), True)
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), True)

+        pk = document.pk
        document.delete()

-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT +
-                         "/documents/originals/none/none/none-0000001.pdf"),
-                         False)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none/none"), False)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), False)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals"), True)
+        self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False)
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)

    @override_settings(PAPERLESS_FILENAME_FORMAT=None)
    def test_format_none(self):
        document = Document()
+        document.pk = 1
        document.file_type = "pdf"
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        document.save()

-        self.assertEqual(document.generate_source_filename(), "0000001.pdf")
-
-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
-    def test_document_renamed(self):
-        document = Document()
-        document.file_type = "pdf"
-        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        document.save()
-
-        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        # Test source_path
-        self.assertEqual(document.source_path, settings.MEDIA_ROOT +
-                         "/documents/originals/none/none-0000001.pdf")
-
-        # Rename the document "illegaly"
-        os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test")
-        os.rename(settings.MEDIA_ROOT + "/documents/originals/" +
-                                        "none/none-0000001.pdf",
-                  settings.MEDIA_ROOT + "/documents/originals/" +
-                                        "test/test-0000001.pdf")
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/test/test-0000001.pdf"), True)
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/none/none-0000001.pdf"), False)
-
-        # Set new correspondent and expect document to be saved properly
-        document.correspondent = Correspondent.objects.get_or_create(
-                name="foo")[0]
-        document.save()
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/foo/foo-0000001.pdf"), True)
-
-        # Check proper handling of files
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/foo"), True)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), False)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/test"), False)
-        self.assertEqual(document.generate_source_filename(),
-                         "foo/foo-0000001.pdf")
-
-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
-    def test_document_renamed_encrypted(self):
-        document = Document()
-        document.file_type = "pdf"
-        document.storage_type = Document.STORAGE_TYPE_GPG
-        document.save()
-
-        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf.gpg")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        # Test source_path
-        self.assertEqual(document.source_path, settings.MEDIA_ROOT +
-                         "/documents/originals/none/none-0000001.pdf.gpg")
-
-        # Rename the document "illegaly"
-        os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test")
-        os.rename(settings.MEDIA_ROOT + "/documents/originals/" +
-                                        "none/none-0000001.pdf.gpg",
-                  settings.MEDIA_ROOT + "/documents/originals/" +
-                                        "test/test-0000001.pdf.gpg")
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/test/test-0000001.pdf.gpg"), True)
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/none/none-0000001.pdf"), False)
-
-        # Set new correspondent and expect document to be saved properly
-        document.correspondent = Correspondent.objects.get_or_create(
-                name="foo")[0]
-        document.save()
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/foo/foo-0000001.pdf.gpg"), True)
-
-        # Check proper handling of files
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/foo"), True)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), False)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/test"), False)
-        self.assertEqual(document.generate_source_filename(),
-                         "foo/foo-0000001.pdf.gpg")
-
-    def test_delete_all_empty_subdirectories(self):
-        # Create our working directory
-        tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
-        os.makedirs(tmp)
-        self.add_to_deletion_list(tmp)
-
-        os.makedirs(os.path.join(tmp, "empty"))
-        os.makedirs(os.path.join(tmp, "empty", "subdirectory"))
-
-        os.makedirs(os.path.join(tmp, "notempty"))
-        Path(os.path.join(tmp, "notempty", "file")).touch()
-
-        Document.delete_all_empty_subdirectories(tmp)
-
-        self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
-        self.assertEqual(os.path.isdir(os.path.join(tmp, "empty")), False)
-        self.assertEqual(os.path.isfile(
-            os.path.join(tmp, "notempty", "file")), True)
+        self.assertEqual(generate_filename(document), "0000001.pdf")

    def test_try_delete_empty_directories(self):
        # Create our working directory
-        tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
+        tmp = os.path.join(settings.ORIGINALS_DIR, "test_delete_empty")
        os.makedirs(tmp)
        self.add_to_deletion_list(tmp)

@@ -493,67 +324,27 @@ class TestDate(TestCase):
        Path(os.path.join(tmp, "notempty", "file")).touch()
        os.makedirs(os.path.join(tmp, "notempty", "empty"))

-        Document.try_delete_empty_directories(
-                os.path.join(tmp, "notempty", "empty"))
+        delete_empty_directories(os.path.join(tmp, "notempty", "empty"))
        self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
        self.assertEqual(os.path.isfile(
            os.path.join(tmp, "notempty", "file")), True)
        self.assertEqual(os.path.isdir(
            os.path.join(tmp, "notempty", "empty")), False)

-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
-    def test_document_accidentally_deleted(self):
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{created/[title]")
+    def test_invalid_format(self):
        document = Document()
+        document.pk = 1
        document.file_type = "pdf"
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        document.save()

-        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
+        self.assertEqual(generate_filename(document), "0000001.pdf")

-        # Test source_path
-        self.assertEqual(document.source_path, settings.MEDIA_ROOT +
-                         "/documents/originals/none/none-0000001.pdf")
-
-        # Delete the document "illegaly"
-        os.remove(settings.MEDIA_ROOT + "/documents/originals/" +
-                                        "none/none-0000001.pdf")
-
-        # Set new correspondent and expect document to be saved properly
-        document.correspondent = Correspondent.objects.get_or_create(
-                name="foo")[0]
-        document.save()
-
-        # Check proper handling of files
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), True)
-        self.assertEqual(document.source_filename,
-                         "none/none-0000001.pdf")
-
-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
-    def test_set_filename(self):
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{created__year}")
+    def test_invalid_format_key(self):
        document = Document()
+        document.pk = 1
        document.file_type = "pdf"
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        document.save()

-        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        # Set existing filename
-        document.set_filename(tmp)
-        self.assertEqual(document.source_filename, "none/none-0000001.pdf")
-
-        # Set non-existing filename
-        document.set_filename("doesnotexist")
-        self.assertEqual(document.source_filename, "none/none-0000001.pdf")
+        self.assertEqual(generate_filename(document), "0000001.pdf")
--- a/src/documents/tests/test_importer.py
+++ b/src/documents/tests/test_importer.py
@@ -1,9 +1,8 @@
 from django.core.management.base import CommandError
 from django.test import TestCase

-from ..management.commands.document_importer import Command
-
 from documents.settings import EXPORTER_FILE_NAME
+from ..management.commands.document_importer import Command


 class TestImporter(TestCase):
--- a/src/documents/tests/test_logger.py
+++ b/src/documents/tests/test_logger.py
@@ -1,6 +1,5 @@
 import logging
 import uuid
-
 from unittest import mock

 from django.test import TestCase
--- a/src/documents/tests/test_mail.py
+++ b/src/documents/tests/test_mail.py
@@ -1,91 +0,0 @@
-import base64
-import os
-import magic
-
-from hashlib import md5
-from unittest import mock
-
-from django.conf import settings
-from django.test import TestCase
-
-from ..mail import Message, Attachment
-
-
-class TestMessage(TestCase):
-
-    def __init__(self, *args, **kwargs):
-
-        TestCase.__init__(self, *args, **kwargs)
-        self.sample = os.path.join(
-            settings.BASE_DIR,
-            "documents",
-            "tests",
-            "samples",
-            "mail.txt"
-        )
-
-    def test_init(self):
-
-        with open(self.sample, "rb") as f:
-
-            with mock.patch("logging.StreamHandler.emit") as __:
-                message = Message(f.read())
-
-            self.assertTrue(message)
-            self.assertEqual(message.subject, "Test 0")
-
-            data = message.attachment.read()
-
-            self.assertEqual(
-                md5(data).hexdigest(), "7c89655f9e9eb7dd8cde8568e8115d59")
-
-            self.assertEqual(
-                message.attachment.content_type, "application/pdf")
-            with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
-                self.assertEqual(m.id_buffer(data), "application/pdf")
-
-
-class TestInlineMessage(TestCase):
-
-    def __init__(self, *args, **kwargs):
-
-        TestCase.__init__(self, *args, **kwargs)
-        self.sample = os.path.join(
-            settings.BASE_DIR,
-            "documents",
-            "tests",
-            "samples",
-            "inline_mail.txt"
-        )
-
-    def test_init(self):
-
-        with open(self.sample, "rb") as f:
-
-            with mock.patch("logging.StreamHandler.emit") as __:
-                message = Message(f.read())
-
-            self.assertTrue(message)
-            self.assertEqual(message.subject, "Paperless Inline Image")
-
-            data = message.attachment.read()
-
-            self.assertEqual(
-                md5(data).hexdigest(), "30c00a7b42913e65f7fdb0be40b9eef3")
-
-            self.assertEqual(
-                message.attachment.content_type, "image/png")
-            with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
-                self.assertEqual(m.id_buffer(data), "image/png")
-
-
-class TestAttachment(TestCase):
-
-    def test_init(self):
-        data = base64.encodebytes(b"0")
-        self.assertEqual(Attachment(data, "application/pdf").suffix, "pdf")
-        self.assertEqual(Attachment(data, "image/png").suffix, "png")
-        self.assertEqual(Attachment(data, "image/jpeg").suffix, "jpeg")
-        self.assertEqual(Attachment(data, "image/gif").suffix, "gif")
-        self.assertEqual(Attachment(data, "image/tiff").suffix, "tiff")
-        self.assertEqual(Attachment(data, "image/png").read(), data)
--- a/src/documents/tests/test_models.py
+++ b/src/documents/tests/test_models.py
@@ -1,7 +1,7 @@
 from django.test import TestCase

-from ..models import Document, Correspondent
 from .factories import DocumentFactory, CorrespondentFactory
+from ..models import Document, Correspondent


 class CorrespondentTestCase(TestCase):
--- a/src/documents/tests/test_parsers.py
+++ b/src/documents/tests/test_parsers.py
@@ -14,7 +14,7 @@ class TestParserDiscovery(TestCase):
            pass

        m.return_value = (
-            (None, lambda _: {"weight": 0, "parser": DummyParser}),
+            (None, {"weight": 0, "parser": DummyParser, "test": lambda _: True}),
        )

        self.assertEqual(
@@ -32,8 +32,8 @@ class TestParserDiscovery(TestCase):
            pass

        m.return_value = (
-            (None, lambda _: {"weight": 0, "parser": DummyParser1}),
-            (None, lambda _: {"weight": 1, "parser": DummyParser2}),
+            (None, {"weight": 0, "parser": DummyParser1, "test": lambda _: True}),
+            (None, {"weight": 1, "parser": DummyParser2, "test": lambda _: True}),
        )

        self.assertEqual(
@@ -43,7 +43,7 @@ class TestParserDiscovery(TestCase):

    @mock.patch("documents.parsers.document_consumer_declaration.send")
    def test__get_parser_class_0_parsers(self, m, *args):
-        m.return_value = ((None, lambda _: None),)
+        m.return_value = []
        with TemporaryDirectory() as tmpdir:
            self.assertIsNone(
                get_parser_class("doc.pdf")
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -1,14 +1,9 @@
 from django.db.models import Count, Max
-from django.http import HttpResponse, HttpResponseBadRequest
+from django.http import HttpResponse, HttpResponseBadRequest, Http404
 from django.views.decorators.cache import cache_control
 from django.views.generic import TemplateView
 from django_filters.rest_framework import DjangoFilterBackend
 from rest_framework.decorators import action
-from rest_framework.response import Response
-from rest_framework.views import APIView
-
-from paperless.db import GnuPG
-from paperless.views import StandardPagination
 from rest_framework.filters import OrderingFilter, SearchFilter
 from rest_framework.mixins import (
    DestroyModelMixin,
@@ -17,12 +12,17 @@ from rest_framework.mixins import (
    UpdateModelMixin
 )
 from rest_framework.permissions import IsAuthenticated
+from rest_framework.response import Response
+from rest_framework.views import APIView
 from rest_framework.viewsets import (
    GenericViewSet,
    ModelViewSet,
    ReadOnlyModelViewSet
 )

+import documents.index as index
+from paperless.db import GnuPG
+from paperless.views import StandardPagination
 from .filters import (
    CorrespondentFilterSet,
    DocumentFilterSet,
@@ -30,8 +30,6 @@ from .filters import (
    DocumentTypeFilterSet,
    LogFilterSet
 )
-
-import documents.index as index
 from .forms import UploadForm
 from .models import Correspondent, Document, Log, Tag, DocumentType
 from .serialisers import (
@@ -54,7 +52,7 @@ class CorrespondentViewSet(ModelViewSet):
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
    filter_backends = (DjangoFilterBackend, OrderingFilter)
-    filter_class = CorrespondentFilterSet
+    filterset_class = CorrespondentFilterSet
    ordering_fields = ("name", "matching_algorithm", "match", "document_count", "last_correspondence")


@@ -65,7 +63,7 @@ class TagViewSet(ModelViewSet):
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
    filter_backends = (DjangoFilterBackend, OrderingFilter)
-    filter_class = TagFilterSet
+    filterset_class = TagFilterSet
    ordering_fields = ("name", "matching_algorithm", "match", "document_count")


@@ -76,7 +74,7 @@ class DocumentTypeViewSet(ModelViewSet):
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
    filter_backends = (DjangoFilterBackend, OrderingFilter)
-    filter_class = DocumentTypeFilterSet
+    filterset_class = DocumentTypeFilterSet
    ordering_fields = ("name", "matching_algorithm", "match", "document_count")


@@ -91,7 +89,7 @@ class DocumentViewSet(RetrieveModelMixin,
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
    filter_backends = (DjangoFilterBackend, SearchFilter, OrderingFilter)
-    filter_class = DocumentFilterSet
+    filterset_class = DocumentFilterSet
    search_fields = ("title", "correspondent__name", "content")
    ordering_fields = (
        "id", "title", "correspondent__name", "document_type__name", "created", "modified", "added", "archive_serial_number")
@@ -106,7 +104,7 @@ class DocumentViewSet(RetrieveModelMixin,
        return super(DocumentViewSet, self).destroy(request, *args, **kwargs)

    def file_response(self, pk, disposition):
-        #TODO: this should not be necessary here.
+        # TODO: this should not be necessary here.
        content_types = {
            Document.TYPE_PDF: "application/pdf",
            Document.TYPE_PNG: "image/png",
@@ -114,7 +112,7 @@ class DocumentViewSet(RetrieveModelMixin,
            Document.TYPE_GIF: "image/gif",
            Document.TYPE_TIF: "image/tiff",
            Document.TYPE_CSV: "text/csv",
-            Document.TYPE_MD:  "text/markdown",
+            Document.TYPE_MD: "text/markdown",
            Document.TYPE_TXT: "text/plain"
        }

@@ -132,7 +130,7 @@ class DocumentViewSet(RetrieveModelMixin,

    @action(methods=['post'], detail=False)
    def post_document(self, request, pk=None):
-        #TODO: is this a good implementation?
+        # TODO: is this a good implementation?
        form = UploadForm(data=request.POST, files=request.FILES)
        if form.is_valid():
            form.save()
@@ -142,17 +140,26 @@ class DocumentViewSet(RetrieveModelMixin,

    @action(methods=['get'], detail=True)
    def preview(self, request, pk=None):
-        response = self.file_response(pk, "inline")
-        return response
+        try:
+            response = self.file_response(pk, "inline")
+            return response
+        except FileNotFoundError:
+            raise Http404("Document source file does not exist")

    @action(methods=['get'], detail=True)
    @cache_control(public=False, max_age=315360000)
    def thumb(self, request, pk=None):
-        return HttpResponse(Document.objects.get(id=pk).thumbnail_file, content_type='image/png')
+        try:
+            return HttpResponse(Document.objects.get(id=pk).thumbnail_file, content_type='image/png')
+        except FileNotFoundError:
+            raise Http404("Document thumbnail does not exist")

    @action(methods=['get'], detail=True)
    def download(self, request, pk=None):
-        return self.file_response(pk, "attachment")
+        try:
+            return self.file_response(pk, "attachment")
+        except FileNotFoundError:
+            raise Http404("Document source file does not exist")


 class LogViewSet(ReadOnlyModelViewSet):
@@ -163,7 +170,7 @@ class LogViewSet(ReadOnlyModelViewSet):
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
    filter_backends = (DjangoFilterBackend, OrderingFilter)
-    filter_class = LogFilterSet
+    filterset_class = LogFilterSet
    ordering_fields = ("created",)


@@ -191,13 +198,12 @@ class SearchView(APIView):
            except (ValueError, TypeError):
                page = 1

-            result_page = index.query_page(self.ix, query, page)
-
-            return Response(
-                {'count': len(result_page),
-                 'page': result_page.pagenum,
-                 'page_count': result_page.pagecount,
-                 'results': list(map(self.add_infos_to_hit, result_page))})
+            with index.query_page(self.ix, query, page) as result_page:
+                return Response(
+                    {'count': len(result_page),
+                     'page': result_page.pagenum,
+                     'page_count': result_page.pagecount,
+                     'results': list(map(self.add_infos_to_hit, result_page))})

        else:
            return Response({
@@ -217,17 +223,16 @@ class SearchAutoCompleteView(APIView):
        if 'term' in request.query_params:
            term = request.query_params['term']
        else:
-            term = None
+            return HttpResponseBadRequest("Term required")

        if 'limit' in request.query_params:
            limit = int(request.query_params['limit'])
+            if limit <= 0:
+                return HttpResponseBadRequest("Invalid limit")
        else:
            limit = 10

-        if term is not None:
-            return Response(index.autocomplete(self.ix, term, limit))
-        else:
-            return Response([])
+        return Response(index.autocomplete(self.ix, term, limit))


 class StatisticsView(APIView):