Merge branch 'dev' into celery-tasks

2025-09-12 21:35:40 -05:00 · 2020-11-19 22:10:57 +01:00
parent 4253f4aca7 cbee56ae8c
commit 196faa8fdc
145 changed files with 5228 additions and 11538 deletions
--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -1,5 +1,4 @@
 from django.contrib import admin
-from django.contrib.auth.models import Group, User
 from django.utils.html import format_html, format_html_join
 from django.utils.safestring import mark_safe
 from whoosh.writing import AsyncWriter
@@ -32,7 +31,7 @@ class TagAdmin(admin.ModelAdmin):
    list_filter = ("colour", "matching_algorithm")
    list_editable = ("colour", "match", "matching_algorithm")

-    readonly_fields = ("slug",)
+    readonly_fields = ("slug", )


 class DocumentTypeAdmin(admin.ModelAdmin):
@@ -51,9 +50,17 @@ class DocumentTypeAdmin(admin.ModelAdmin):
 class DocumentAdmin(admin.ModelAdmin):

    search_fields = ("correspondent__name", "title", "content", "tags__name")
-    readonly_fields = ("added", "file_type", "storage_type",)
-    list_display = ("title", "created", "added", "correspondent",
-                    "tags_", "archive_serial_number", "document_type")
+    readonly_fields = ("added", "file_type", "storage_type", "filename")
+    list_display = (
+        "title",
+        "created",
+        "added",
+        "correspondent",
+        "tags_",
+        "archive_serial_number",
+        "document_type",
+        "filename"
+    )
    list_filter = (
        "document_type",
        "tags",
@@ -120,8 +127,3 @@ admin.site.register(Tag, TagAdmin)
 admin.site.register(DocumentType, DocumentTypeAdmin)
 admin.site.register(Document, DocumentAdmin)
 admin.site.register(Log, LogAdmin)
-
-
-# Unless we implement multi-user, these default registrations don't make sense.
-admin.site.unregister(Group)
-admin.site.unregister(User)
--- a/src/documents/apps.py
+++ b/src/documents/apps.py
@@ -1,5 +1,4 @@
 from django.apps import AppConfig
-from django.db.models.signals import post_delete


 class DocumentsConfig(AppConfig):
@@ -14,7 +13,6 @@ class DocumentsConfig(AppConfig):
            add_inbox_tags,
            run_pre_consume_script,
            run_post_consume_script,
-            cleanup_document_deletion,
            set_log_entry,
            set_correspondent,
            set_document_type,
@@ -33,6 +31,4 @@ class DocumentsConfig(AppConfig):
        document_consumption_finished.connect(add_to_index)
        document_consumption_finished.connect(run_post_consume_script)

-        post_delete.connect(cleanup_document_deletion)
-
        AppConfig.ready(self)
--- a/src/documents/checks.py
+++ b/src/documents/checks.py
@@ -4,6 +4,8 @@ from django.conf import settings
 from django.core.checks import Error, register
 from django.db.utils import OperationalError, ProgrammingError

+from documents.signals import document_consumer_declaration
+

@register()
 def changed_password_check(app_configs, **kwargs):
@@ -37,3 +39,17 @@ def changed_password_check(app_configs, **kwargs):
                """))]

    return []
+
+
+@register()
+def parser_check(app_configs, **kwargs):
+
+    parsers = []
+    for response in document_consumer_declaration.send(None):
+        parsers.append(response[1])
+
+    if len(parsers) == 0:
+        return [Error("No parsers found. This is a bug. The consumer won't be "
+                      "able to onsume any documents without parsers.")]
+    else:
+        return []
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -3,7 +3,6 @@ import logging
 import os
 import pickle
 import re
-import time

 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.neural_network import MLPClassifier
@@ -64,7 +63,7 @@ class DocumentClassifier(object):

    def save_classifier(self):
        with open(settings.MODEL_FILE, "wb") as f:
-            pickle.dump(self.FORMAT_VERSION, f) # Version
+            pickle.dump(self.FORMAT_VERSION, f)
            pickle.dump(self.data_hash, f)
            pickle.dump(self.data_vectorizer, f)

@@ -89,16 +88,14 @@ class DocumentClassifier(object):
            data.append(preprocessed_content)

            y = -1
-            if doc.document_type:
-                if doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO:
-                    y = doc.document_type.pk
+            if doc.document_type and doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO:
+                y = doc.document_type.pk
            m.update(y.to_bytes(4, 'little', signed=True))
            labels_document_type.append(y)

            y = -1
-            if doc.correspondent:
-                if doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO:
-                    y = doc.correspondent.pk
+            if doc.correspondent and doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO:
+                y = doc.correspondent.pk
            m.update(y.to_bytes(4, 'little', signed=True))
            labels_correspondent.append(y)

@@ -120,8 +117,8 @@ class DocumentClassifier(object):

        num_tags = len(labels_tags_unique)
        # substract 1 since -1 (null) is also part of the classes.
-        num_correspondents = len(labels_correspondent) - 1
-        num_document_types = len(labels_document_type) - 1
+        num_correspondents = len(set(labels_correspondent)) - 1
+        num_document_types = len(set(labels_document_type)) - 1

        logging.getLogger(__name__).debug(
            "{} documents, {} tag(s), {} correspondent(s), "
@@ -137,7 +134,7 @@ class DocumentClassifier(object):
        logging.getLogger(__name__).debug("Vectorizing data...")
        self.data_vectorizer = CountVectorizer(
            analyzer="word",
-            ngram_range=(1,2),
+            ngram_range=(1, 2),
            min_df=0.01
        )
        data_vectorized = self.data_vectorizer.fit_transform(data)
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -3,7 +3,6 @@ import hashlib
 import logging
 import os
 import re
-import uuid

 from asgiref.sync import async_to_sync
 from channels.layers import get_channel_layer
@@ -13,7 +12,9 @@ from django.utils import timezone

 from paperless.db import GnuPG
 from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
-from .models import Document, FileInfo
+from .file_handling import generate_filename, create_source_path_directory
+from .loggers import LoggingMixin
+from .models import Document, FileInfo, Correspondent, DocumentType, Tag
 from .parsers import ParseError, get_parser_class
 from .signals import (
    document_consumption_finished,
@@ -25,17 +26,10 @@ class ConsumerError(Exception):
    pass


-class Consumer:
-    """
-    Loop over every file found in CONSUMPTION_DIR and:
-      1. Convert it to a greyscale pnm
-      2. Use tesseract on the pnm
-      3. Store the document in the MEDIA_ROOT with optional encryption
-      4. Store the OCR'd text in the database
-      5. Delete the document and image(s)
-    """
+class Consumer(LoggingMixin):

-    def _send_progress(self, filename, current_progress, max_progress, status, message, document_id=None):
+    def _send_progress(self, filename, current_progress, max_progress, status,
+                       message, document_id=None):
        payload = {
            'filename': os.path.basename(filename),
            'current_progress': current_progress,
@@ -44,156 +38,226 @@ class Consumer:
            'message': message,
            'document_id': document_id
        }
-        async_to_sync(self.channel_layer.group_send)("status_updates", {'type': 'status_update', 'data': payload})
+        async_to_sync(self.channel_layer.group_send)("status_updates",
+                                                     {'type': 'status_update',
+                                                      'data': payload})

-    def __init__(self, consume=settings.CONSUMPTION_DIR,
-                 scratch=settings.SCRATCH_DIR):
-
-        self.logger = logging.getLogger(__name__)
-        self.logging_group = None
-
-        self.consume = consume
-        self.scratch = scratch
-
-        self.classifier = DocumentClassifier()
+    def __init__(self):
+        super().__init__()
+        self.path = None
+        self.filename = None
+        self.override_title = None
+        self.override_correspondent_id = None
+        self.override_tag_ids = None
+        self.override_document_type_id = None

        self.channel_layer = get_channel_layer()

-        os.makedirs(self.scratch, exist_ok=True)
+    def pre_check_file_exists(self):
+        if not os.path.isfile(self.path):
+            raise ConsumerError("Cannot consume {}: It is not a file".format(
+                self.path))

-        self.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        if settings.PASSPHRASE:
-            self.storage_type = Document.STORAGE_TYPE_GPG
-
-        if not self.consume:
+    def pre_check_consumption_dir(self):
+        if not settings.CONSUMPTION_DIR:
            raise ConsumerError(
                "The CONSUMPTION_DIR settings variable does not appear to be "
-                "set."
-            )
+                "set.")

-        if not os.path.exists(self.consume):
+        if not os.path.isdir(settings.CONSUMPTION_DIR):
            raise ConsumerError(
-                "Consumption directory {} does not exist".format(self.consume))
+                "Consumption directory {} does not exist".format(
+                    settings.CONSUMPTION_DIR))

-    def log(self, level, message):
-        getattr(self.logger, level)(message, extra={
-            "group": self.logging_group
-        })
+    def pre_check_regex(self):
+        if not re.match(FileInfo.REGEXES["title"], self.filename):
+            raise ConsumerError(
+                "Filename {} does not seem to be safe to "
+                "consume".format(self.filename))

-    @transaction.atomic
-    def try_consume_file(self, file):
-        """
-        Return True if file was consumed
-        """
-
-        self.logging_group = uuid.uuid4()
-
-        if not re.match(FileInfo.REGEXES["title"], file):
-            return False
-
-        doc = file
-
-        if self._is_duplicate(doc):
-            self.log(
-                "warning",
-                "Skipping {} as it appears to be a duplicate".format(doc)
+    def pre_check_duplicate(self):
+        with open(self.path, "rb") as f:
+            checksum = hashlib.md5(f.read()).hexdigest()
+        if Document.objects.filter(checksum=checksum).exists():
+            if settings.CONSUMER_DELETE_DUPLICATES:
+                os.unlink(self.path)
+            raise ConsumerError(
+                "Not consuming {}: It is a duplicate.".format(self.filename)
            )
-            return False

-        self.log("info", "Consuming {}".format(doc))
+    def pre_check_directories(self):
+        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
+        os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True)
+        os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)

+    def try_consume_file(self,
+                         path,
+                         override_filename=None,
+                         override_title=None,
+                         override_correspondent_id=None,
+                         override_document_type_id=None,
+                         override_tag_ids=None):
+        """
+        Return the document object if it was successfully created.
+        """

-        parser_class = get_parser_class(doc)
+        self.path = path
+        self.filename = override_filename or os.path.basename(path)
+        self.override_title = override_title
+        self.override_correspondent_id = override_correspondent_id
+        self.override_document_type_id = override_document_type_id
+        self.override_tag_ids = override_tag_ids
+
+        # this is for grouping logging entries for this particular file
+        # together.
+
+        self.renew_logging_group()
+
+        # Make sure that preconditions for consuming the file are met.
+
+        self.pre_check_file_exists()
+        self.pre_check_consumption_dir()
+        self.pre_check_directories()
+        self.pre_check_regex()
+        self.pre_check_duplicate()
+
+        self.log("info", "Consuming {}".format(self.filename))
+
+        # Determine the parser class.
+
+        parser_class = get_parser_class(self.filename)
        if not parser_class:
-            self.log(
-                "error", "No parsers could be found for {}".format(doc))
-            return False
+            raise ConsumerError("No parsers abvailable for {}".format(self.filename))
        else:
-            self.log("info", "Parser: {}".format(parser_class.__name__))
+            self.log("debug", "Parser: {}".format(parser_class.__name__))

-        self._send_progress(file, 0, 100, 'WORKING', 'Consumption started')
+        # Notify all listeners that we're going to do some work.
+
+        self._send_progress(self.filename, 0, 100, 'WORKING', 'Consumption started')

        document_consumption_started.send(
            sender=self.__class__,
-            filename=doc,
+            filename=self.path,
            logging_group=self.logging_group
        )

        def progress_callback(current_progress, max_progress, message):
            # recalculate progress to be within 20 and 80
            p = int((current_progress / max_progress) * 60 + 20)
-            self._send_progress(file, p, 100, "WORKING", message)
+            self._send_progress(self.filename, p, 100, "WORKING", message)

-        document_parser = parser_class(doc, self.logging_group, progress_callback)
+        # This doesn't parse the document yet, but gives us a parser.
+
+        document_parser = parser_class(self.path, self.logging_group, progress_callback)
+
+        # However, this already created working directories which we have to
+        # clean up.
+
+        # Parse the document. This may take some time.

        try:
-            self.log("info", "Generating thumbnail for {}...".format(doc))
-            self._send_progress(file, 10, 100, 'WORKING',
+            self.log("debug", "Generating thumbnail for {}...".format(self.filename))
+            self._send_progress(self.filename, 10, 100, 'WORKING',
                                'Generating thumbnail...')
            thumbnail = document_parser.get_optimised_thumbnail()
-            self._send_progress(file, 20, 100, 'WORKING',
+            self.log("debug", "Parsing {}...".format(self.filename))
+            self._send_progress(self.filename, 20, 100, 'WORKING',
                                'Getting text from document...')
            text = document_parser.get_text()
-            self._send_progress(file, 80, 100, 'WORKING',
+            self._send_progress(self.filename, 80, 100, 'WORKING',
                                'Getting date from document...')
            date = document_parser.get_date()
-            self._send_progress(file, 85, 100, 'WORKING',
-                                'Storing the document...')
-            document = self._store(
-                text,
-                doc,
-                thumbnail,
-                date
-            )
        except ParseError as e:
-            self.log("fatal", "PARSE FAILURE for {}: {}".format(doc, e))
+            document_parser.cleanup()
+            self._send_progress(self.filename, 100, 100, 'FAILED',
+                                "Failed: {}".format(e))
+            raise ConsumerError(e)
+
+        # Prepare the document classifier.
+
+        # TODO: I don't really like to do this here, but this way we avoid
+        #   reloading the classifier multiple times, since there are multiple
+        #   post-consume hooks that all require the classifier.
+
+        try:
+            classifier = DocumentClassifier()
+            classifier.reload()
+        except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
+            logging.getLogger(__name__).warning(
+                "Cannot classify documents: {}.".format(e))
+            classifier = None
+        self._send_progress(self.filename, 85, 100, 'WORKING',
+                            'Storing the document...')
+        # now that everything is done, we can start to store the document
+        # in the system. This will be a transaction and reasonably fast.
+        try:
+            with transaction.atomic():
+
+                # store the document.
+                document = self._store(
+                    text=text,
+                    date=date
+                )
+
+                # If we get here, it was successful. Proceed with post-consume
+                # hooks. If they fail, nothing will get changed.
+
+                self._send_progress(self.filename, 90, 100, 'WORKING',
+                                    'Performing post-consumption tasks...')
+
+                document_consumption_finished.send(
+                    sender=self.__class__,
+                    document=document,
+                    logging_group=self.logging_group,
+                    classifier=classifier
+                )
+
+                # After everything is in the database, copy the files into
+                # place. If this fails, we'll also rollback the transaction.
+
+                create_source_path_directory(document.source_path)
+                self._write(document, self.path, document.source_path)
+                self._write(document, thumbnail, document.thumbnail_path)
+
+                # Delete the file only if it was successfully consumed
+                self.log("debug", "Deleting file {}".format(self.path))
+                os.unlink(self.path)
+        except Exception as e:
+            raise ConsumerError(e)
            self._send_progress(file, 100, 100, 'FAILED',
                                "Failed: {}".format(e))
-
+        finally:
            document_parser.cleanup()
-            return False
-        else:
-            document_parser.cleanup()
-            self._cleanup_doc(doc)

-            self.log(
-                "info",
-                "Document {} consumption finished".format(document)
-            )
+        self.log(
+            "info",
+            "Document {} consumption finished".format(document)
+        )

-            classifier = None
+        self._send_progress(file, 100, 100, 'SUCCESS',
+                            'Finished.', document.id)

-            try:
-                self.classifier.reload()
-                classifier = self.classifier
-            except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
-                logging.getLogger(__name__).warning("Cannot classify documents: {}.".format(e))
+        return document

-            self._send_progress(file, 90, 100, 'WORKING',
-                                'Performing post-consumption tasks...')
+    def _store(self, text, date):

-            document_consumption_finished.send(
-                sender=self.__class__,
-                document=document,
-                logging_group=self.logging_group,
-                classifier=classifier
-            )
-            self._send_progress(file, 100, 100, 'SUCCESS',
-                                'Finished.', document.id)
-            return True
+        # If someone gave us the original filename, use it instead of doc.

-    def _store(self, text, doc, thumbnail, date):
+        file_info = FileInfo.from_path(self.filename)

-        file_info = FileInfo.from_path(doc)
-
-        stats = os.stat(doc)
+        stats = os.stat(self.path)

        self.log("debug", "Saving record to database")

        created = file_info.created or date or timezone.make_aware(
-                    datetime.datetime.fromtimestamp(stats.st_mtime))
+            datetime.datetime.fromtimestamp(stats.st_mtime))

-        with open(doc, "rb") as f:
+        if settings.PASSPHRASE:
+            storage_type = Document.STORAGE_TYPE_GPG
+        else:
+            storage_type = Document.STORAGE_TYPE_UNENCRYPTED
+
+        with open(self.path, "rb") as f:
            document = Document.objects.create(
                correspondent=file_info.correspondent,
                title=file_info.title,
@@ -202,7 +266,7 @@ class Consumer:
                checksum=hashlib.md5(f.read()).hexdigest(),
                created=created,
                modified=created,
-                storage_type=self.storage_type
+                storage_type=storage_type
            )

        relevant_tags = set(file_info.tags)
@@ -211,14 +275,30 @@ class Consumer:
            self.log("debug", "Tagging with {}".format(tag_names))
            document.tags.add(*relevant_tags)

-        self._write(document, doc, document.source_path)
-        self._write(document, thumbnail, document.thumbnail_path)
+        self.apply_overrides(document)

-        #TODO: why do we need to save the document again?
+        document.filename = generate_filename(document)
+
+        # We need to save the document twice, since we need the PK of the
+        # document in order to create its filename above.
        document.save()

        return document

+    def apply_overrides(self, document):
+        if self.override_title:
+            document.title = self.override_title
+
+        if self.override_correspondent_id:
+            document.correspondent = Correspondent.objects.get(pk=self.override_correspondent_id)
+
+        if self.override_document_type_id:
+            document.document_type = DocumentType.objects.get(pk=self.override_document_type_id)
+
+        if self.override_tag_ids:
+            for tag_id in self.override_tag_ids:
+                document.tags.add(Tag.objects.get(pk=tag_id))
+
    def _write(self, document, source, target):
        with open(source, "rb") as read_file:
            with open(target, "wb") as write_file:
@@ -227,13 +307,3 @@ class Consumer:
                    return
                self.log("debug", "Encrypting")
                write_file.write(GnuPG.encrypted(read_file))
-
-    def _cleanup_doc(self, doc):
-        self.log("debug", "Deleting document {}".format(doc))
-        os.unlink(doc)
-
-    @staticmethod
-    def _is_duplicate(doc):
-        with open(doc, "rb") as f:
-            checksum = hashlib.md5(f.read()).hexdigest()
-        return Document.objects.filter(checksum=checksum).exists()
--- a/src/documents/file_handling.py
+++ b/src/documents/file_handling.py
@@ -0,0 +1,102 @@
+import logging
+import os
+from collections import defaultdict
+
+from django.conf import settings
+from django.template.defaultfilters import slugify
+
+
+def create_source_path_directory(source_path):
+    os.makedirs(os.path.dirname(source_path), exist_ok=True)
+
+
+def delete_empty_directories(directory):
+    # Go up in the directory hierarchy and try to delete all directories
+    directory = os.path.normpath(directory)
+    root = os.path.normpath(settings.ORIGINALS_DIR)
+
+    if not directory.startswith(root + os.path.sep):
+        # don't do anything outside our originals folder.
+
+        # append os.path.set so that we avoid these cases:
+        #   directory = /home/originals2/test
+        #   root = /home/originals ("/" gets appended and startswith fails)
+        return
+
+    while directory != root:
+        if not os.listdir(directory):
+            # it's empty
+            try:
+                os.rmdir(directory)
+            except OSError:
+                # whatever. empty directories aren't that bad anyway.
+                return
+        else:
+            # it's not empty.
+            return
+
+        # go one level up
+        directory = os.path.normpath(os.path.dirname(directory))
+
+
+def many_to_dictionary(field):
+    # Converts ManyToManyField to dictionary by assuming, that field
+    # entries contain an _ or - which will be used as a delimiter
+    mydictionary = dict()
+
+    for index, t in enumerate(field.all()):
+        # Populate tag names by index
+        mydictionary[index] = slugify(t.name)
+
+        # Find delimiter
+        delimiter = t.name.find('_')
+
+        if delimiter == -1:
+            delimiter = t.name.find('-')
+
+        if delimiter == -1:
+            continue
+
+        key = t.name[:delimiter]
+        value = t.name[delimiter + 1:]
+
+        mydictionary[slugify(key)] = slugify(value)
+
+    return mydictionary
+
+
+def generate_filename(document):
+    # Create filename based on configured format
+    path = ""
+
+    try:
+        if settings.PAPERLESS_FILENAME_FORMAT is not None:
+            tags = defaultdict(lambda: slugify(None),
+                               many_to_dictionary(document.tags))
+            path = settings.PAPERLESS_FILENAME_FORMAT.format(
+                correspondent=slugify(document.correspondent),
+                title=slugify(document.title),
+                created=slugify(document.created),
+                created_year=document.created.year if document.created else "none",
+                created_month=document.created.month if document.created else "none",
+                created_day=document.created.day if document.created else "none",
+                added=slugify(document.added),
+                added_year=document.added.year if document.added else "none",
+                added_month=document.added.month if document.added else "none",
+                added_day=document.added.day if document.added else "none",
+                tags=tags,
+            )
+    except (ValueError, KeyError, IndexError):
+        logging.getLogger(__name__).warning("Invalid PAPERLESS_FILENAME_FORMAT: {}, falling back to default,".format(settings.PAPERLESS_FILENAME_FORMAT))
+
+    # Always append the primary key to guarantee uniqueness of filename
+    if len(path) > 0:
+        filename = "%s-%07i.%s" % (path, document.pk, document.file_type)
+    else:
+        filename = "%07i.%s" % (document.pk, document.file_type)
+
+    # Append .gpg for encrypted files
+    if document.storage_type == document.STORAGE_TYPE_GPG:
+        filename += ".gpg"
+
+    return filename
--- a/src/documents/forms.py
+++ b/src/documents/forms.py
@@ -1,10 +1,11 @@
 import os
-
+import tempfile
 from datetime import datetime
 from time import mktime

 from django import forms
 from django.conf import settings
+from django_q.tasks import async_task
 from pathvalidate import validate_filename, ValidationError


@@ -19,12 +20,6 @@ class UploadForm(forms.Form):
            raise forms.ValidationError("That filename is suspicious.")
        return self.cleaned_data.get("document")

-    def get_filename(self, i=None):
-        return os.path.join(
-            settings.CONSUMPTION_DIR,
-            "{}_{}".format(str(i), self.cleaned_data.get("document").name) if i else self.cleaned_data.get("document").name
-        )
-
    def save(self):
        """
        Since the consumer already does a lot of work, it's easier just to save
@@ -33,15 +28,16 @@ class UploadForm(forms.Form):
        """

        document = self.cleaned_data.get("document").read()
+        original_filename = self.cleaned_data.get("document").name

        t = int(mktime(datetime.now().timetuple()))

-        file_name = self.get_filename()
-        i = 0
-        while os.path.exists(file_name):
-            i += 1
-            file_name = self.get_filename(i)
+        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
+
+        # TODO: dont just append pdf. This is here for taht weird regex check at the start of the consumer.
+        with tempfile.NamedTemporaryFile(prefix="paperless-upload-", suffix=".pdf", dir=settings.SCRATCH_DIR, delete=False) as f:

-        with open(file_name, "wb") as f:
            f.write(document)
-            os.utime(file_name, times=(t, t))
+            os.utime(f.name, times=(t, t))
+
+            async_task("documents.tasks.consume_file", f.name, override_filename=original_filename, task_name=os.path.basename(original_filename))
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -1,7 +1,6 @@
 import logging
+from contextlib import contextmanager

-from django.db import models
-from django.dispatch import receiver
 from whoosh import highlight
 from whoosh.fields import Schema, TEXT, NUMERIC
 from whoosh.highlight import Formatter, get_text
@@ -9,10 +8,8 @@ from whoosh.index import create_in, exists_in, open_dir
 from whoosh.qparser import MultifieldParser
 from whoosh.writing import AsyncWriter

-from documents.models import Document
 from paperless import settings

-
 logger = logging.getLogger(__name__)


@@ -69,6 +66,9 @@ def open_index(recreate=False):
    if exists_in(settings.INDEX_DIR) and not recreate:
        return open_dir(settings.INDEX_DIR)
    else:
+        # TODO: this is not thread safe. If 2 instances try to create the index
+        #  at the same time, this fails. This currently prevents parallel
+        #  tests.
        return create_in(settings.INDEX_DIR, get_schema())


@@ -99,15 +99,19 @@ def remove_document_from_index(document):
        remove_document(writer, document)


+@contextmanager
 def query_page(ix, query, page):
-    with ix.searcher() as searcher:
+    searcher = ix.searcher()
+    try:
        query_parser = MultifieldParser(["content", "title", "correspondent"],
                                        ix.schema).parse(query)
        result_page = searcher.search_page(query_parser, page)
        result_page.results.fragmenter = highlight.ContextFragmenter(
            surround=50)
        result_page.results.formatter = JsonFormatter()
-        return result_page
+        yield result_page
+    finally:
+        searcher.close()


 def autocomplete(ix, term, limit=10):
--- a/src/documents/loggers.py
+++ b/src/documents/loggers.py
@@ -1,4 +1,5 @@
 import logging
+import uuid


 class PaperlessHandler(logging.Handler):
@@ -13,3 +14,19 @@ class PaperlessHandler(logging.Handler):
            kwargs["group"] = record.group

        Log.objects.create(**kwargs)
+
+
+class LoggingMixin:
+
+    logging_group = None
+
+    def renew_logging_group(self):
+        self.logging_group = uuid.uuid4()
+
+    def log(self, level, message):
+        target = ".".join([self.__class__.__module__, self.__class__.__name__])
+        logger = logging.getLogger(target)
+
+        getattr(logger, level)(message, extra={
+            "group": self.logging_group
+        })
--- a/src/documents/mail.py
+++ b/src/documents/mail.py
@@ -1,250 +0,0 @@
-import datetime
-import imaplib
-import logging
-import os
-import re
-import time
-import uuid
-
-from base64 import b64decode
-from email import policy
-from email.parser import BytesParser
-from dateutil import parser
-
-from django.conf import settings
-
-from .models import Correspondent
-
-
-class MailFetcherError(Exception):
-    pass
-
-
-class InvalidMessageError(MailFetcherError):
-    pass
-
-
-class Loggable(object):
-
-    def __init__(self, group=None):
-        self.logger = logging.getLogger(__name__)
-        self.logging_group = group or uuid.uuid4()
-
-    def log(self, level, message):
-        getattr(self.logger, level)(message, extra={
-            "group": self.logging_group
-        })
-
-
-class Message(Loggable):
-    """
-    A crude, but simple email message class.  We assume that there's a subject
-    and n attachments, and that we don't care about the message body.
-    """
-
-    SECRET = os.getenv("PAPERLESS_EMAIL_SECRET")
-
-    def __init__(self, data, group=None):
-        """
-        Cribbed heavily from
-        https://www.ianlewis.org/en/parsing-email-attachments-python
-        """
-
-        Loggable.__init__(self, group=group)
-
-        self.subject = None
-        self.time = None
-        self.attachment = None
-
-        message = BytesParser(policy=policy.default).parsebytes(data)
-        self.subject = str(message["Subject"]).replace("\r\n", "")
-        self.body = str(message.get_body())
-
-        self.check_subject()
-        self.check_body()
-
-        self._set_time(message)
-
-        self.log("info", 'Importing email: "{}"'.format(self.subject))
-
-        attachments = []
-        for part in message.walk():
-
-            content_disposition = part.get("Content-Disposition")
-            if not content_disposition:
-                continue
-
-            dispositions = content_disposition.strip().split(";")
-            if len(dispositions) < 2:
-                continue
-
-            if not dispositions[0].lower() == "attachment" and \
-               "filename" not in dispositions[1].lower():
-                continue
-
-            file_data = part.get_payload()
-
-            attachments.append(Attachment(
-                b64decode(file_data), content_type=part.get_content_type()))
-
-        if len(attachments) == 0:
-            raise InvalidMessageError(
-                "There don't appear to be any attachments to this message")
-
-        if len(attachments) > 1:
-            raise InvalidMessageError(
-                "There's more than one attachment to this message. It cannot "
-                "be indexed automatically."
-            )
-
-        self.attachment = attachments[0]
-
-    def __bool__(self):
-        return bool(self.attachment)
-
-    def check_subject(self):
-        if self.subject is None:
-            raise InvalidMessageError("Message does not have a subject")
-        if not Correspondent.SAFE_REGEX.match(self.subject):
-            raise InvalidMessageError("Message subject is unsafe: {}".format(
-                self.subject))
-
-    def check_body(self):
-        if self.SECRET not in self.body:
-            raise InvalidMessageError("The secret wasn't in the body")
-
-    def _set_time(self, message):
-        self.time = datetime.datetime.now()
-        message_time = message.get("Date")
-        if message_time:
-            try:
-                self.time = parser.parse(message_time)
-            except (ValueError, AttributeError):
-                pass  # We assume that "now" is ok
-
-    @property
-    def file_name(self):
-        return "{}.{}".format(self.subject, self.attachment.suffix)
-
-
-class Attachment(object):
-
-    SAFE_SUFFIX_REGEX = re.compile(
-        r"^(application/(pdf))|(image/(png|jpeg|gif|tiff))$")
-
-    def __init__(self, data, content_type):
-
-        self.content_type = content_type
-        self.data = data
-        self.suffix = None
-
-        m = self.SAFE_SUFFIX_REGEX.match(self.content_type)
-        if not m:
-            raise MailFetcherError(
-                "Not-awesome file type: {}".format(self.content_type))
-        self.suffix = m.group(2) or m.group(4)
-
-    def read(self):
-        return self.data
-
-
-class MailFetcher(Loggable):
-
-    def __init__(self, consume=settings.CONSUMPTION_DIR):
-
-        Loggable.__init__(self)
-
-        self._connection = None
-        self._host = os.getenv("PAPERLESS_CONSUME_MAIL_HOST")
-        self._port = os.getenv("PAPERLESS_CONSUME_MAIL_PORT")
-        self._username = os.getenv("PAPERLESS_CONSUME_MAIL_USER")
-        self._password = os.getenv("PAPERLESS_CONSUME_MAIL_PASS")
-        self._inbox = os.getenv("PAPERLESS_CONSUME_MAIL_INBOX", "INBOX")
-
-        self._enabled = bool(self._host)
-        if self._enabled and Message.SECRET is None:
-            raise MailFetcherError("No PAPERLESS_EMAIL_SECRET defined")
-
-        self.last_checked = time.time()
-        self.consume = consume
-
-    def pull(self):
-        """
-        Fetch all available mail at the target address and store it locally in
-        the consumption directory so that the file consumer can pick it up and
-        do its thing.
-        """
-
-        if self._enabled:
-
-            # Reset the grouping id for each fetch
-            self.logging_group = uuid.uuid4()
-
-            self.log("debug", "Checking mail")
-
-            for message in self._get_messages():
-
-                self.log("info", 'Storing email: "{}"'.format(message.subject))
-
-                t = int(time.mktime(message.time.timetuple()))
-                file_name = os.path.join(self.consume, message.file_name)
-                with open(file_name, "wb") as f:
-                    f.write(message.attachment.data)
-                    os.utime(file_name, times=(t, t))
-
-        self.last_checked = time.time()
-
-    def _get_messages(self):
-
-        r = []
-        try:
-
-            self._connect()
-            self._login()
-
-            for message in self._fetch():
-                if message:
-                    r.append(message)
-
-            self._connection.expunge()
-            self._connection.close()
-            self._connection.logout()
-
-        except MailFetcherError as e:
-            self.log("error", str(e))
-
-        return r
-
-    def _connect(self):
-        try:
-            self._connection = imaplib.IMAP4_SSL(self._host, self._port)
-        except OSError as e:
-            msg = "Problem connecting to {}: {}".format(self._host, e.strerror)
-            raise MailFetcherError(msg)
-
-    def _login(self):
-
-        login = self._connection.login(self._username, self._password)
-        if not login[0] == "OK":
-            raise MailFetcherError("Can't log into mail: {}".format(login[1]))
-
-        inbox = self._connection.select(self._inbox)
-        if not inbox[0] == "OK":
-            raise MailFetcherError("Can't find the inbox: {}".format(inbox[1]))
-
-    def _fetch(self):
-
-        for num in self._connection.search(None, "ALL")[1][0].split():
-
-            __, data = self._connection.fetch(num, "(RFC822)")
-
-            message = None
-            try:
-                message = Message(data[0][1], self.logging_group)
-            except InvalidMessageError as e:
-                self.log("error", str(e))
-            else:
-                self._connection.store(num, "+FLAGS", "\\Deleted")
-
-            if message:
-                yield message
--- a/src/documents/management/commands/document_consumer.py
+++ b/src/documents/management/commands/document_consumer.py
@@ -3,11 +3,10 @@ import os

 from django.conf import settings
 from django.core.management.base import BaseCommand
-
-from watchdog.observers import Observer
+from django_q.tasks import async_task
 from watchdog.events import FileSystemEventHandler
-
-from documents.consumer import Consumer
+from watchdog.observers import Observer
+from watchdog.observers.polling import PollingObserver

 try:
    from inotify_simple import INotify, flags
@@ -17,17 +16,25 @@ except ImportError:

 class Handler(FileSystemEventHandler):

-    def __init__(self, consumer):
-        self.consumer = consumer
+    def _consume(self, file):
+        if os.path.isfile(file):
+            try:
+                async_task("documents.tasks.consume_file", file, task_name=os.path.basename(file))
+            except Exception as e:
+                # Catch all so that the consumer won't crash.
+                logging.getLogger(__name__).error("Error while consuming document: {}".format(e))

    def on_created(self, event):
-        self.consumer.try_consume_file(event.src_path)
+        self._consume(event.src_path)
+
+    def on_moved(self, event):
+        self._consume(event.src_path)


 class Command(BaseCommand):
    """
    On every iteration of an infinite loop, consume what we can from the
-    consumption directory, and fetch any mail available.
+    consumption directory.
    """

    def __init__(self, *args, **kwargs):
@@ -35,12 +42,6 @@ class Command(BaseCommand):
        self.verbosity = 0
        self.logger = logging.getLogger(__name__)

-        self.file_consumer = None
-        self.mail_fetcher = None
-        self.first_iteration = True
-
-        self.consumer = Consumer()
-
        BaseCommand.__init__(self, *args, **kwargs)

    def add_arguments(self, parser):
@@ -56,9 +57,6 @@ class Command(BaseCommand):
        self.verbosity = options["verbosity"]
        directory = options["directory"]

-        for d in (settings.ORIGINALS_DIR, settings.THUMBNAIL_DIR):
-            os.makedirs(d, exist_ok=True)
-
        logging.getLogger(__name__).info(
            "Starting document consumer at {}".format(
                directory
@@ -68,11 +66,16 @@ class Command(BaseCommand):
        # Consume all files as this is not done initially by the watchdog
        for entry in os.scandir(directory):
            if entry.is_file():
-                self.consumer.try_consume_file(entry.path)
+                async_task("documents.tasks.consume_file", entry.path, task_name=os.path.basename(entry.path))

        # Start the watchdog. Woof!
-        observer = Observer()
-        event_handler = Handler(self.consumer)
+        if settings.CONSUMER_POLLING > 0:
+            logging.getLogger(__name__).info('Using polling instead of file'
+                                             'system notifications.')
+            observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
+        else:
+            observer = Observer()
+        event_handler = Handler()
        observer.schedule(event_handler, directory, recursive=True)
        observer.start()
        try:
--- a/src/documents/management/commands/document_create_classifier.py
+++ b/src/documents/management/commands/document_create_classifier.py
@@ -1,4 +1,5 @@
 from django.core.management.base import BaseCommand
+
 from ...mixins import Renderable
 from ...tasks import train_classifier

--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -1,16 +1,15 @@
 import json
 import os
-import time
 import shutil
+import time

-from django.core.management.base import BaseCommand, CommandError
 from django.core import serializers
+from django.core.management.base import BaseCommand, CommandError

 from documents.models import Document, Correspondent, Tag, DocumentType
-from paperless.db import GnuPG
-
-from ...mixins import Renderable
 from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
+from paperless.db import GnuPG
+from ...mixins import Renderable


 class Command(Renderable, BaseCommand):
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -3,15 +3,14 @@ import os
 import shutil

 from django.conf import settings
-from django.core.management.base import BaseCommand, CommandError
 from django.core.management import call_command
+from django.core.management.base import BaseCommand, CommandError

 from documents.models import Document
-from paperless.db import GnuPG
-
-from ...mixins import Renderable
-
 from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
+from paperless.db import GnuPG
+from ...file_handling import generate_filename, create_source_path_directory
+from ...mixins import Renderable


 class Command(Renderable, BaseCommand):
@@ -82,6 +81,10 @@ class Command(Renderable, BaseCommand):

    def _import_files_from_manifest(self):

+        storage_type = Document.STORAGE_TYPE_UNENCRYPTED
+        if settings.PASSPHRASE:
+            storage_type = Document.STORAGE_TYPE_GPG
+
        for record in self.manifest:

            if not record["model"] == "documents.document":
@@ -94,6 +97,14 @@ class Command(Renderable, BaseCommand):
            document_path = os.path.join(self.source, doc_file)
            thumbnail_path = os.path.join(self.source, thumb_file)

+            document.storage_type = storage_type
+            document.filename = generate_filename(document)
+
+            if os.path.isfile(document.source_path):
+                raise FileExistsError(document.source_path)
+
+            create_source_path_directory(document.source_path)
+
            if settings.PASSPHRASE:

                with open(document_path, "rb") as unencrypted:
@@ -109,18 +120,8 @@ class Command(Renderable, BaseCommand):
                        encrypted.write(GnuPG.encrypted(unencrypted))

            else:
-
+                print("Moving {} to {}".format(document_path, document.source_path))
                shutil.copy(document_path, document.source_path)
                shutil.copy(thumbnail_path, document.thumbnail_path)

-        # Reset the storage type to whatever we've used while importing
-
-        storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        if settings.PASSPHRASE:
-            storage_type = Document.STORAGE_TYPE_GPG
-
-        Document.objects.filter(
-            pk__in=[r["pk"] for r in self.manifest]
-        ).update(
-            storage_type=storage_type
-        )
+            document.save()
--- a/src/documents/management/commands/document_logs.py
+++ b/src/documents/management/commands/document_logs.py
@@ -8,5 +8,5 @@ class Command(BaseCommand):
    help = "A quick & dirty way to see what's in the logs"

    def handle(self, *args, **options):
-        for l in Log.objects.order_by("pk"):
-            print(l)
+        for log in Log.objects.order_by("pk"):
+            print(log)
--- a/src/documents/management/commands/document_renamer.py
+++ b/src/documents/management/commands/document_renamer.py
@@ -1,7 +1,6 @@
 from django.core.management.base import BaseCommand

-from documents.models import Document, Tag
-
+from documents.models import Document
 from ...mixins import Renderable


--- a/src/documents/matching.py
+++ b/src/documents/matching.py
@@ -9,16 +9,14 @@ def match_correspondents(document_content, classifier):
    correspondents = Correspondent.objects.all()
    predicted_correspondent_id = classifier.predict_correspondent(document_content) if classifier else None

-    matched_correspondents = [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id]
-    return matched_correspondents
+    return [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id]


 def match_document_types(document_content, classifier):
    document_types = DocumentType.objects.all()
    predicted_document_type_id = classifier.predict_document_type(document_content) if classifier else None

-    matched_document_types = [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id]
-    return matched_document_types
+    return [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id]


 def match_tags(document_content, classifier):
--- a/src/documents/migrations/1000_update_paperless_all.py
+++ b/src/documents/migrations/1000_update_paperless_all.py
@@ -1,7 +1,4 @@
 # Generated by Django 3.1.3 on 2020-11-07 12:35
-import os
-
-from django.conf import settings
 from django.db import migrations, models
 import django.db.models.deletion

--- a/src/documents/migrations/1001_auto_20201109_1636.py
+++ b/src/documents/migrations/1001_auto_20201109_1636.py
@@ -9,11 +9,11 @@ from django_q.tasks import schedule
 def add_schedules(apps, schema_editor):
    schedule('documents.tasks.train_classifier', name="Train the classifier", schedule_type=Schedule.HOURLY)
    schedule('documents.tasks.index_optimize', name="Optimize the index", schedule_type=Schedule.DAILY)
-    schedule('documents.tasks.consume_mail', name="Check E-Mail", schedule_type=Schedule.MINUTES, minutes=10)


 def remove_schedules(apps, schema_editor):
-    Schedule.objects.all().delete()
+    Schedule.objects.filter(func='documents.tasks.train_classifier').delete()
+    Schedule.objects.filter(func='documents.tasks.index_optimize').delete()


 class Migration(migrations.Migration):
--- a/src/documents/migrations/1002_auto_20201111_1105.py
+++ b/src/documents/migrations/1002_auto_20201111_1105.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.1.3 on 2020-11-11 11:05
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '1001_auto_20201109_1636'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='document',
+            name='filename',
+            field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True),
+        ),
+    ]
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -3,18 +3,15 @@
 import logging
 import os
 import re
-from collections import OrderedDict, defaultdict
+from collections import OrderedDict

 import dateutil.parser
 from django.conf import settings
 from django.db import models
-from django.dispatch import receiver
-from django.template.defaultfilters import slugify
 from django.utils import timezone
 from django.utils.text import slugify


-
 class MatchingModel(models.Model):

    MATCH_ANY = 1
@@ -116,6 +113,7 @@ class DocumentType(MatchingModel):

 class Document(models.Model):

+    # TODO: why do we need an explicit list
    TYPE_PDF = "pdf"
    TYPE_PNG = "png"
    TYPE_JPG = "jpg"
@@ -192,7 +190,7 @@ class Document(models.Model):
        default=timezone.now, editable=False, db_index=True)

    filename = models.FilePathField(
-        max_length=256,
+        max_length=1024,
        editable=False,
        default=None,
        null=True,
@@ -220,123 +218,18 @@ class Document(models.Model):
            return "{}: {}".format(created, self.correspondent or self.title)
        return str(created)

-    def find_renamed_document(self, subdirectory=""):
-        suffix = "%07i.%s" % (self.pk, self.file_type)
-
-        # Append .gpg for encrypted files
-        if self.storage_type == self.STORAGE_TYPE_GPG:
-            suffix += ".gpg"
-
-        # Go up in the directory hierarchy and try to delete all directories
-        root = os.path.normpath(Document.filename_to_path(subdirectory))
-
-        for filename in os.listdir(root):
-            if filename.endswith(suffix):
-                return os.path.join(subdirectory, filename)
-
-            fullname = os.path.join(subdirectory, filename)
-            if os.path.isdir(Document.filename_to_path(fullname)):
-                return self.find_renamed_document(fullname)
-
-        return None
-
-    @property
-    def source_filename(self):
-        # Initial filename generation (for new documents)
-        if self.filename is None:
-            self.filename = self.generate_source_filename()
-
-        # Check if document is still available under filename
-        elif not os.path.isfile(Document.filename_to_path(self.filename)):
-            recovered_filename = self.find_renamed_document()
-
-            # If we have found the file so update the filename
-            if recovered_filename is not None:
-                logger = logging.getLogger(__name__)
-                logger.warning("Filename of document " + str(self.id) +
-                               " has changed and was successfully updated")
-                self.filename = recovered_filename
-
-                # Remove all empty subdirectories from MEDIA_ROOT
-                Document.delete_all_empty_subdirectories(
-                        Document.filename_to_path(""))
-            else:
-                logger = logging.getLogger(__name__)
-                logger.error("File of document " + str(self.id) + " has " +
-                             "gone and could not be recovered")
-
-        return self.filename
-
-    @staticmethod
-    def many_to_dictionary(field):
-        # Converts ManyToManyField to dictionary by assuming, that field
-        # entries contain an _ or - which will be used as a delimiter
-        mydictionary = dict()
-
-        for index, t in enumerate(field.all()):
-            # Populate tag names by index
-            mydictionary[index] = slugify(t.name)
-
-            # Find delimiter
-            delimiter = t.name.find('_')
-
-            if delimiter == -1:
-                delimiter = t.name.find('-')
-
-            if delimiter == -1:
-                continue
-
-            key = t.name[:delimiter]
-            value = t.name[delimiter+1:]
-
-            mydictionary[slugify(key)] = slugify(value)
-
-        return mydictionary
-
-    def generate_source_filename(self):
-        # Create filename based on configured format
-        if settings.PAPERLESS_FILENAME_FORMAT is not None:
-            tags = defaultdict(lambda: slugify(None),
-                               self.many_to_dictionary(self.tags))
-            path = settings.PAPERLESS_FILENAME_FORMAT.format(
-                   correspondent=slugify(self.correspondent),
-                   title=slugify(self.title),
-                   created=slugify(self.created),
-                   added=slugify(self.added),
-                   tags=tags)
-        else:
-            path = ""
-
-        # Always append the primary key to guarantee uniqueness of filename
-        if len(path) > 0:
-            filename = "%s-%07i.%s" % (path, self.pk, self.file_type)
-        else:
-            filename = "%07i.%s" % (self.pk, self.file_type)
-
-        # Append .gpg for encrypted files
-        if self.storage_type == self.STORAGE_TYPE_GPG:
-            filename += ".gpg"
-
-        return filename
-
-    def create_source_directory(self):
-        new_filename = self.generate_source_filename()
-
-        # Determine the full "target" path
-        dir_new = Document.filename_to_path(os.path.dirname(new_filename))
-
-        # Create new path
-        os.makedirs(dir_new, exist_ok=True)
-
    @property
    def source_path(self):
-        return Document.filename_to_path(self.source_filename)
+        if self.filename:
+            fname = str(self.filename)
+        else:
+            fname = "{:07}.{}".format(self.pk, self.file_type)
+            if self.storage_type == self.STORAGE_TYPE_GPG:
+                fname += ".gpg"

-    @staticmethod
-    def filename_to_path(filename):
        return os.path.join(
            settings.ORIGINALS_DIR,
-            filename
+            fname
        )

    @property
@@ -362,125 +255,6 @@ class Document(models.Model):
    def thumbnail_file(self):
        return open(self.thumbnail_path, "rb")

-    def set_filename(self, filename):
-        if os.path.isfile(Document.filename_to_path(filename)):
-            self.filename = filename
-
-    @staticmethod
-    def try_delete_empty_directories(directory):
-        # Go up in the directory hierarchy and try to delete all directories
-        directory = os.path.normpath(directory)
-        root = os.path.normpath(Document.filename_to_path(""))
-
-        while directory != root:
-            # Try to delete the current directory
-            try:
-                os.rmdir(directory)
-            except os.error:
-                # Directory not empty, no need to go further up
-                return
-
-            # Cut off actual directory and go one level up
-            directory, _ = os.path.split(directory)
-            directory = os.path.normpath(directory)
-
-    @staticmethod
-    def delete_all_empty_subdirectories(directory):
-        # Go through all folders and try to delete all directories
-        root = os.path.normpath(Document.filename_to_path(directory))
-
-        for filename in os.listdir(root):
-            fullname = os.path.join(directory, filename)
-
-            if not os.path.isdir(Document.filename_to_path(fullname)):
-                continue
-
-            # Go into subdirectory to see, if there is more to delete
-            Document.delete_all_empty_subdirectories(
-                    os.path.join(directory, filename))
-
-            # Try to delete the directory
-            try:
-                os.rmdir(Document.filename_to_path(fullname))
-                continue
-            except os.error:
-                # Directory not empty, no need to go further up
-                continue
-
-
-@receiver(models.signals.m2m_changed, sender=Document.tags.through)
-@receiver(models.signals.post_save, sender=Document)
-def update_filename(sender, instance, **kwargs):
-    # Skip if document has not been saved yet
-    if instance.filename is None:
-        return
-
-    # Check is file exists and update filename otherwise
-    if not os.path.isfile(Document.filename_to_path(instance.filename)):
-        instance.filename = instance.source_filename
-
-    # Build the new filename
-    new_filename = instance.generate_source_filename()
-
-    # If the filename is the same, then nothing needs to be done
-    if instance.filename == new_filename:
-        return
-
-    # Determine the full "target" path
-    path_new = instance.filename_to_path(new_filename)
-    dir_new = instance.filename_to_path(os.path.dirname(new_filename))
-
-    # Create new path
-    instance.create_source_directory()
-
-    # Determine the full "current" path
-    path_current = instance.filename_to_path(instance.source_filename)
-
-    # Move file
-    try:
-        os.rename(path_current, path_new)
-    except PermissionError:
-        # Do not update filename in object
-        return
-    except FileNotFoundError:
-        logger = logging.getLogger(__name__)
-        logger.error("Renaming of document " + str(instance.id) + " failed " +
-                     "as file " + instance.filename + " was no longer present")
-        return
-
-    # Delete empty directory
-    old_dir = os.path.dirname(instance.filename)
-    old_path = instance.filename_to_path(old_dir)
-    Document.try_delete_empty_directories(old_path)
-
-    instance.filename = new_filename
-
-    # Save instance
-    # This will not cause a cascade of post_save signals, as next time
-    # nothing needs to be renamed
-    instance.save()
-
-
-@receiver(models.signals.post_delete, sender=Document)
-def delete_files(sender, instance, **kwargs):
-    if instance.filename is None:
-        return
-
-    # Remove the document
-    old_file = instance.filename_to_path(instance.filename)
-
-    try:
-        os.remove(old_file)
-    except FileNotFoundError:
-        logger = logging.getLogger(__name__)
-        logger.warning("Deleted document " + str(instance.id) + " but file " +
-                       old_file + " was no longer present")
-
-    # And remove the directory (if applicable)
-    old_dir = os.path.dirname(instance.filename)
-    old_path = instance.filename_to_path(old_dir)
-    Document.try_delete_empty_directories(old_path)
-

 class Log(models.Model):

@@ -518,7 +292,7 @@ class FileInfo:
            non_separated_word=r"([\w,. ]|([^\s]-))"
        )
    )
-
+    # TODO: what is this used for
    formats = "pdf|jpe?g|png|gif|tiff?|te?xt|md|csv"
    REGEXES = OrderedDict([
        ("created-correspondent-title-tags", re.compile(
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -20,13 +20,16 @@ from django.utils import timezone
 # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
 # - MONTH ZZZZ, with ZZZZ being 4 digits
 # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
+from documents.loggers import LoggingMixin
 from documents.signals import document_consumer_declaration

+# TODO: isnt there a date parsing library for this?
+
 DATE_REGEX = re.compile(
-    r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' +  # NOQA: E501
-    r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' +  # NOQA: E501
-    r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' +  # NOQA: E501
-    r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
+    r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|'   # NOQA: E501
+    r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|'   # NOQA: E501
+    r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|'   # NOQA: E501
+    r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|'
    r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
 )

@@ -39,17 +42,16 @@ def get_parser_class(doc):
    Determine the appropriate parser class based on the file
    """

-    parsers = []
-    for response in document_consumer_declaration.send(None):
-        parsers.append(response[1])
-
-    #TODO: add a check that checks parser availability.
-
    options = []
-    for parser in parsers:
-        result = parser(doc)
-        if result:
-            options.append(result)
+
+    # Sein letzter Befehl war: KOMMT! Und sie kamen. Alle. Sogar die Parser.
+
+    for response in document_consumer_declaration.send(None):
+        parser_declaration = response[1]
+        parser_test = parser_declaration["test"]
+
+        if parser_test(doc):
+            options.append(parser_declaration)

    if not options:
        return None
@@ -59,7 +61,7 @@ def get_parser_class(doc):
        options, key=lambda _: _["weight"], reverse=True)[0]["parser"]


-def run_convert(input, output, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None):
+def run_convert(input_file, output_file, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None):
    environment = os.environ.copy()
    if settings.CONVERT_MEMORY_LIMIT:
        environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
@@ -74,7 +76,7 @@ def run_convert(input, output, density=None, scale=None, alpha=None, strip=False
    args += ['-trim'] if trim else []
    args += ['-type', str(type)] if type else []
    args += ['-depth', str(depth)] if depth else []
-    args += [input, output]
+    args += [input_file, output_file]

    logger.debug("Execute: " + " ".join(args), extra={'group': logging_group})

@@ -100,17 +102,17 @@ class ParseError(Exception):
    pass


-class DocumentParser:
+class DocumentParser(LoggingMixin):
    """
    Subclass this to make your own parser.  Have a look at
    `paperless_tesseract.parsers` for inspiration.
    """

    def __init__(self, path, logging_group, progress_callback):
+        super().__init__()
+        self.logging_group = logging_group
        self.document_path = path
        self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
-        self.logger = logging.getLogger(__name__)
-        self.logging_group = logging_group
        self.progress_callback = progress_callback

    def get_thumbnail(self):
@@ -121,16 +123,19 @@ class DocumentParser:

    def optimise_thumbnail(self, in_path):

-        out_path = os.path.join(self.tempdir, "optipng.png")
+        if settings.OPTIMIZE_THUMBNAILS:
+            out_path = os.path.join(self.tempdir, "optipng.png")

-        args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path)
+            args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path)

-        self.log('debug', 'Execute: ' + " ".join(args))
+            self.log('debug', 'Execute: ' + " ".join(args))

-        if not subprocess.Popen(args).wait() == 0:
-            raise ParseError("Optipng failed at {}".format(args))
+            if not subprocess.Popen(args).wait() == 0:
+                raise ParseError("Optipng failed at {}".format(args))

-        return out_path
+            return out_path
+        else:
+            return in_path

    def get_optimised_thumbnail(self):
        return self.optimise_thumbnail(self.get_thumbnail())
@@ -222,11 +227,6 @@ class DocumentParser:

        return date

-    def log(self, level, message):
-        getattr(self.logger, level)(message, extra={
-            "group": self.logging_group
-        })
-
    def cleanup(self):
        self.log("debug", "Deleting directory {}".format(self.tempdir))
        shutil.rmtree(self.tempdir)
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -105,7 +105,6 @@ class DocumentSerializer(serializers.ModelSerializer):

 class LogSerializer(serializers.ModelSerializer):

-
    class Meta:
        model = Log
        fields = (
--- a/src/documents/signals/init.py
+++ b/src/documents/signals/init.py
@@ -1,5 +1,5 @@
 from django.dispatch import Signal

-document_consumption_started = Signal(providing_args=["filename"])
-document_consumption_finished = Signal(providing_args=["document"])
-document_consumer_declaration = Signal(providing_args=[])
+document_consumption_started = Signal()
+document_consumption_finished = Signal()
+document_consumer_declaration = Signal()
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -6,9 +6,13 @@ from django.conf import settings
 from django.contrib.admin.models import ADDITION, LogEntry
 from django.contrib.auth.models import User
 from django.contrib.contenttypes.models import ContentType
+from django.db import models, DatabaseError
+from django.dispatch import receiver
 from django.utils import timezone

 from .. import index, matching
+from ..file_handling import delete_empty_directories, generate_filename, \
+    create_source_path_directory
 from ..models import Document, Tag


@@ -141,17 +145,65 @@ def run_post_consume_script(sender, document, **kwargs):
    )).wait()


+@receiver(models.signals.post_delete, sender=Document)
 def cleanup_document_deletion(sender, instance, using, **kwargs):
-
-    if not isinstance(instance, Document):
-        return
-
    for f in (instance.source_path, instance.thumbnail_path):
        try:
            os.unlink(f)
        except FileNotFoundError:
            pass  # The file's already gone, so we're cool with it.

+    delete_empty_directories(os.path.dirname(instance.source_path))
+
+
+@receiver(models.signals.m2m_changed, sender=Document.tags.through)
+@receiver(models.signals.post_save, sender=Document)
+def update_filename_and_move_files(sender, instance, **kwargs):
+
+    if not instance.filename:
+        # Can't update the filename if there is not filename to begin with
+        # This happens after the consumer creates a new document.
+        # The PK needs to be set first by saving the document once. When this
+        # happens, the file is not yet in the ORIGINALS_DIR, and thus can't be
+        # renamed anyway. In all other cases, instance.filename will be set.
+        return
+
+    old_filename = instance.filename
+    old_path = instance.source_path
+    new_filename = generate_filename(instance)
+
+    if new_filename == instance.filename:
+        # Don't do anything if its the same.
+        return
+
+    new_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
+
+    if not os.path.isfile(old_path):
+        # Can't do anything if the old file does not exist anymore.
+        logging.getLogger(__name__).fatal('Document {}: File {} has gone.'.format(str(instance), old_path))
+        return
+
+    if os.path.isfile(new_path):
+        # Can't do anything if the new file already exists. Skip updating file.
+        logging.getLogger(__name__).warning('Document {}: Cannot rename file since target path {} already exists.'.format(str(instance), new_path))
+        return
+
+    create_source_path_directory(new_path)
+
+    try:
+        os.rename(old_path, new_path)
+        instance.filename = new_filename
+        instance.save()
+
+    except OSError as e:
+        instance.filename = old_filename
+    except DatabaseError as e:
+        os.rename(new_path, old_path)
+        instance.filename = old_filename
+
+    if not os.path.isfile(old_path):
+        delete_empty_directories(os.path.dirname(old_path))
+

 def set_log_entry(sender, document=None, logging_group=None, **kwargs):

--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -1,20 +1,15 @@
 import logging

 from django.conf import settings
-from django_q.tasks import async_task, result
 from whoosh.writing import AsyncWriter

 from documents import index
 from documents.classifier import DocumentClassifier, \
    IncompatibleClassifierVersionError
-from documents.mail import MailFetcher
+from documents.consumer import Consumer, ConsumerError
 from documents.models import Document


-def consume_mail():
-    MailFetcher().pull()
-
-
 def index_optimize():
    index.open_index().optimize()

@@ -55,3 +50,27 @@ def train_classifier():
        logging.getLogger(__name__).error(
            "Classifier error: " + str(e)
        )
+
+
+def consume_file(path,
+                 override_filename=None,
+                 override_title=None,
+                 override_correspondent_id=None,
+                 override_document_type_id=None,
+                 override_tag_ids=None):
+
+    document = Consumer().try_consume_file(
+        path,
+        override_filename=override_filename,
+        override_title=override_title,
+        override_correspondent_id=override_correspondent_id,
+        override_document_type_id=override_document_type_id,
+        override_tag_ids=override_tag_ids)
+
+    if document:
+        return "Success. New document id {} created".format(
+            document.pk
+        )
+    else:
+        raise ConsumerError("Unknown error: Returned document was null, but "
+                            "no error message was given.")
--- a/src/documents/tests/samples/inline_mail.txt
+++ b/src/documents/tests/samples/inline_mail.txt
--- a/src/documents/tests/samples/mail.txt
+++ b/src/documents/tests/samples/mail.txt
@@ -1,208 +0,0 @@
-Return-Path: <sender@example.com>
-X-Original-To: sender@mailbox4.mailhost.com
-Delivered-To: sender@mailbox4.mailhost.com
-Received: from mx8.mailhost.com (mail8.mailhost.com [75.126.24.68])
-	by mailbox4.mailhost.com (Postfix) with ESMTP id B62BD5498001
-	for <sender@mailbox4.mailhost.com>; Thu,  4 Feb 2016 22:01:17 +0000 (UTC)
-Received: from localhost (localhost.localdomain [127.0.0.1])
-	by mx8.mailhost.com (Postfix) with ESMTP id B41796F190D
-	for <sender@mailbox4.mailhost.com>; Thu,  4 Feb 2016 22:01:17 +0000 (UTC)
-X-Spam-Flag: NO
-X-Spam-Score: 0
-X-Spam-Level: 
-X-Spam-Status: No, score=0 tagged_above=-999 required=3
-	tests=[RCVD_IN_DNSWL_NONE=-0.0001]
-Received: from mx8.mailhost.com ([127.0.0.1])
-	by localhost (mail8.mailhost.com [127.0.0.1]) (amavisd-new, port 10024)
-	with ESMTP id 3cj6d28FXsS3 for <sender@mailbox4.mailhost.com>;
-	Thu,  4 Feb 2016 22:01:17 +0000 (UTC)
-Received: from smtp.mailhost.com (smtp.mailhost.com [74.55.86.74])
-	by mx8.mailhost.com (Postfix) with ESMTP id 527D76F1529
-	for <paperless@example.com>; Thu,  4 Feb 2016 22:01:17 +0000 (UTC)
-Received: from [10.114.0.19] (nl3x.mullvad.net [46.166.136.162])
-	by smtp.mailhost.com (Postfix) with ESMTP id 9C52420C6FDA
-	for <paperless@example.com>; Thu,  4 Feb 2016 22:01:16 +0000 (UTC)
-To: paperless@example.com
-From: Daniel Quinn <sender@example.com>
-Subject: Test 0
-Message-ID: <56B3CA2A.6030806@example.com>
-Date: Thu, 4 Feb 2016 22:01:14 +0000
-User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101
- Thunderbird/38.5.0
-MIME-Version: 1.0
-Content-Type: multipart/mixed;
- boundary="------------090701020702030809070008"
-
-This is a multi-part message in MIME format.
--------------090701020702030809070008
-Content-Type: text/plain; charset=utf-8
-Content-Transfer-Encoding: 7bit
-
-The secret word is "paperless" :-)
-
--------------090701020702030809070008
-Content-Type: application/pdf;
- name="test0.pdf"
-Content-Transfer-Encoding: base64
-Content-Disposition: attachment;
- filename="test0.pdf"
-
-JVBERi0xLjQKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0
-ZURlY29kZT4+CnN0cmVhbQp4nFWLQQvCMAyF7/kVOQutSdeuHZSA0+3gbVDwIN6c3gR38e/b
-bF4kkPfyvReyjB94IyFVF7pgG0ze4TLDZYevLamzPKEvEFqbMEZfq+WO+5GRHZbHNROLy+So
-UfFi6g7/RyusEpUl9VsQxQTlHR2oV3wUEzOdhOnXG1aw/o1yK2cYCkww4RdbUCevCmVuZHN0
-cmVhbQplbmRvYmoKCjMgMCBvYmoKMTM5CmVuZG9iagoKNSAwIG9iago8PC9MZW5ndGggNiAw
-IFIvRmlsdGVyL0ZsYXRlRGVjb2RlL0xlbmd0aDEgMTA4MjQ+PgpzdHJlYW0KeJzlOWt0G9WZ
-95uRbNmWLckPWY4SaRTFedmybI8T4rw8sS3ZiZ1YfqWSCbFkS7YEtiQkJSE8GlNeOQ5pUmh5
-Zkt2l+XQNl3GhLaBpcWw0D19UGALLRRS0gM9nD0lxVBK9wCx97tXI0UJAc727L8d+c587/u9
-7p0rOZXYEyJaMkV4Io1OBuLOqmqBEPJLQqB0dG9K2NRTsQHhM4Rw/zkWH5+870e7PiRE9Rgh
-+Y+NT+wf+/b3e4YI0YYJKX41HAoEfxj6vUjIIgltrA0jYef8/nzEr0F8WXgydY2bP7QO8WOI
-SxOx0cDxxbUmxN9AfOlk4Jr4apWLI8SMKBGigcmQpYXrRBx9KtobjyVTQbJsgZDl91B+PBGK
-d9838hzipwjhjyIN8EMvLYJ5FOd4lTovX1NQWKQtLtGR/3eX+jCpIJ3qTURH4ux+wcWfIFXk
-XkIW3qXY+ft898LH/5deaNKPe8hD5DFymLxGrlAYbuIhEbIHKbnX0+QlpNLLQ4bId8n055g9
-QU4hPy3nJ0doJJe8PORucpL8xwWzeMgkuQ59+QF5DRrIz7BVYuQD0JAbyXNo9QOkbb+UKa4E
-b2MMHMuhvk7u5w6RbdzbiNxLOZyT05NnyTHYjZZTGOfhbMQbP2P0NnID3vtJmOxFmF3qTZ/+
-jhQs/AWjuoFsI18jW8hEjsaT8ABfiPUbIA9gTp9mNGeGmd/JX8n9kOPO3YnIN8g4jgBg7Nxh
-fsvnZOh/ffGDpBhW8dWk4FJcrono5j/mGhc+5JeRQjK4MJehLXQt/IUPzEdVw6rF6k2qX3zR
-HHnfUE2iNln44/x180H1DvVDWK2HcePouHzI5x0c6O/r9fTs2N7dtW1rZ4fb1d7WukVq2bxp
-44b1zesuW7umod5Z56hduWJ59TL7UpvVVG7Q60qKiwoLNPl5ahXPAakVZPC7ZL5aMLgDdpc9
-0OmoFVymcLuj1mV3+2UhIMj4UC23d3Yykj0gC35BXo6PQA7ZL0soOXaRpJSWlLKSoBc2ko10
-CrsgP99uF07BUK8X4cPtdp8gn2XwdgarljOkGBGbDTWYV9RbwSW794anXX70EWaKCtvsbaFC
-Ry2ZKSxCsAgheaU9PgMrNwMDuJWu9TMc0RTTaTFSVyAoe3q9rnazzeZz1G6VS+ztjEXamEk5
-r03OZyaFCHWdHBJmamenbz+lJyP+Gm3QHgzs8sp8AHWnedf09G2yoUZeZW+XV137tgkjD8m1
-9naXXEOtdvVl5+k6PyXI6mq9XZj+K8Fw7GffvZASUCh51fq/EgrKXJsMfV4bvcxuzPX0tNsu
-uKf904FTC1MjdkFvn57RaqfjLkw38XjRxKmFJw6ZZfftPlnvD8N6nxK6u69LLuu93Ctz1W4h
-HEAK/rXYbevMNkNWxvN5bIJpweRghm02moZDpyQygog81etN4wIZMT9KJGeNT+b8lDOb4VQM
-Us5UhpNV99uxtl393mlZVb01aHdhxg8F5KkR7K4raWHsernkI7PNPl1qEJqdPiYroFdbgxFB
-Vi/HJKFWrgL2DVWZ1jOk5KP046wZJ1huKBWa7WiG2nHZXX7lb2/YhAYETHRnTboRBryy1I6A
-FFAq5pqpd6JGwI8Fi7SzYspOe1wut7dmq0vdckX6vUxFUZPL22TiH1W0ZKeLrSvBNe1vT7tA
-bdl7vY8TceHMTJNgPimSJuJrp8LGNuyy5a5pb3BMtvrNQVx3Y4LXbJMlH1bYZ/eGfLTtMEOr
-zphZc/hYrwx4u/rtXb1D3nWKI2kGNaeqdl1kxu41p81gA8qaao3g5cy8DwX1SBDcCNhbN+Jd
-zq/W4NBjwhmVNm7rRsELZpKRRjfkVYIr1K7IUfwCo2raTm2dGWt5FEU7bZ1mm8+Wvhy1HLIF
-ZWLU0NCkdmZYuE0hQ4P92dbJSDSXJtr0gtcesvvsYUGWPF4aG00Py7KSDJZzpVYDF2A5ycI0
-ERuyMwhNpuyuMecmV+5geBbtvIi9NcMWpjX2rv5patyuGCTo+VaZ0BaW1hnMbC+gC9qOe6+g
-xyXNFvT0jCTRxRxeT43Ytwan7f3ejUwa95MbzNfSuUpJF3QNtDpqcWtrnbHDwd4ZCQ72D3kf
-1+O58OCA91EOuDZ/q29mGfK8jwv40mBUjlIpkSICRailPkQ0TN78uETIFOOqGIHho6eAMJom
-QwMyeopL0/TpiZaziSTCIUeV5kgZaRXSNGnaFKOxa4bQlEmFakkjFUharpgzzwAlPYqUJ/Ac
-WwDkpBaKwTyDWn2MfAqmZgokc1piCiWktIcHB89PPTjkPanFt7OZ3XGiVnphu5jCWGx8rbiE
-IG2U633hab+PLjZixNLgH8hg34xlsm9GR/K0cqE91CoX2VspvYXSW9L0PErPxxYFI6D6FNbe
-IwPtgMu9NlySwqKfmaf1Z2mlfLipTOv/6MCMVeP3hqfxDFoOG6XTpVwRp+ErjFqigQJeoykw
-8AW831fAl3KEG/aR0hYj6IxwxghPGeGIEQ4YYdgISBQY/ao5I7xghOOMFzdCjxGsjJGmy0Z4
-gLFiTE0yQj0TIEZ4k3GnGL2eUTYssHnSakcYo4fx5hhdzsyRVhCYzhwzNMummWJcdM2ZmeOK
-7HV15koo1+6L6J/hUB5pqTEQ0cTuBtHkHN59hWgohcpmg9hQb1tzmcG+VAd2g81gX1EHNWCo
-rIANr4jnrjC3qY61my0/v6bhlTVm1d3lL8GG+edeyi/65CrzGnqgAlKOJ7c/4neCJeQJaT8p
-L68qLikpqCqwWJcs8viWkHJEKqs8Pm1lRRnHqdWGPp9af9wKZ6wwawW9FYgVmhE5aoW4FfxW
-8FhBskK9FQQrWBkbWVMZLrJeZJqyFY7n0HOTk0hckAAldoy6RaSAyNJQCs0Ye/rTUA/l+ZtB
-bDRWYOA0G032pfkKuGKNDdz5nT9qufb6xPxVNzy0+6YD88F9t0Mj/1G4btXGr9927q4qh6OK
-231iybkyCqk5kwMXTg2eT0vV3aQIvy39gzRGtNo8g6HSyBf0+wgPep6vkCpKPb4KndagM3h8
-uorySlBVQvOHlXC0Erh4JfgrwVMJUiXMVoJcCccZKlSCvhJIJcwxCormSl7YIzQFwywL2fKT
-RSb9r7D4LAEGUQk+z750+ZqmtZgA/nzQ10mOWkmqdUiF/zhfdfwWqFG9mcalT9bTOHmhiq7B
-gYV3uV/zz5GVxCc12fLLFxVjS6xaXWzjKystHp+5Us8XeXz5vHFqNcRXg381eFaDsBoeWQ3D
-q6FnNWT8JVgewmpUSrA26QKhg1kPV6wRK41i45omJ9RxzN3KCvuK5faleRXlxkoLz/165vvu
-79Q7GrqueeZeX2hX43eOjt/vXL0m0Tu4fcedQy120Nx+dEnpOze1P3Rt0xJb+6j7+iPW5yed
-nvbmHYsa69p20q8ZpHPhXf5q/mlixt1lUmoxaKqrVYJWW6Xi8di/tHBpr89UYTAsxooZrAZO
-yxsMRFNozFdhjBWkwuMj+qkVMLwCpBWAwBVYBEw+MbEhljY708knzawn0yvQoESp9N8KDNbQ
-tBlaYE3TcrYu16yF/BKoKBcb114GL933jT3z82WJmfe3Hr/ncMe2YP/Sdf8E5KZbh4+0jzby
-T3/1a+duqXLsToBp93VbeNWdgV3OPc/b5y0q9e6obDWxNYs1c6huJEbSIa0oLCnJL+P5SpNK
-W6T1+Aryi3S4pg29PmJ8wASyCVpM4DTRMiUybSSKivfNpc2NjbSH1NhABvuaFhArxAq7oRzr
-dFlFCcAO//B1N4RafvvbDfXr++03lyfGuTsdK155ZeDcgS2t+i0mK8u5B3Puxh6qIIvJYWmo
-CkC3SFOhq1hiqSKY6CprFSa6qkpbWmr0+Er1WnWvT2uctYBsgeMWOGqBKQvELeC3gMcCxAKb
-8SFZoN4CggX0FphjciiU2R2yO+MVSnFoRUzOzMJINx5bGxXlFqBpx2CwBQ3YdYKhArDlbE3L
-QbXpwPjab9bX/8vO13/xq6cgMn93OAZ37ILXSqfv9ZQWrbPWvQvqjz6YH+uDYw8/ePJeGus2
-jPUd3C/LcMecknrKVUWkqkqv0lusZXqPrwz3A4yY5GOD5eurUIGr7PVxRtwGO3J3RsI2wSlG
-SQN+RldWvxLk+Z0v04HnNz4WXnWeXTA0leJKWr4JcNHT9gNWPMNyu8D9+uq75w/87uWJWN63
-oT01/9/z1qmbrx7yJeY/dQ/BH/4GUGm75UOT4+PHqxzw/E/+bQX3joHVcwfG+CjWsxA77Anp
-RoO6iKhJpUlT4vFp9Fy5BwMSTEBMcMYEHhPUm0BvgjmGvmiCWdZ1x01w1ARTJoibwG8CyQRp
-lQ0PMJKHkeoZVc8YufrHmWZaDe9XfO6bMbtdZpdpNkFYfL0tsy/mNyn7DPYC/+h858uvvvrG
-b3732FdvvWnPvhtvnoLX5w3z7//507/95dVnnjjz1o+fTb8baR52YB6MxC9txCwY1UbMgg7f
-hhq9sZwv7/XxRvR8c24kcyyGdABIf8QEw3TxZd3fnd3MxVxfq7E/BQPbFA10UxTSa5Df0XBi
-aP6y/3rttuOX1fSn5j/85+/dMdG8bBW8/6dz1vmPH3LOh1/+gY36akZfT/Mn0NdvScOktFil
-KigtqDSpy4xl2IpGnQqPpX2+Yr1RW4D+Vxxn2Z7NJL/5TE49CCtgtm5yJpw0RTBBbtpzX9NE
-eUUrj5yXNH0H0K5UenQFXY1VtGOh+fj1E18Hcd/8nzUdT7TMXQMW0J6wcu9UOT69r8rRvaIZ
-yrkxfFPRGPGdnFeF9WiAR6UFgzZv8WIbWbnS4bBpebGxoc7ja9CttC02aB01Do/PqqupqMrL
-Kygo7/MV6FfgMYev7vPx+r0i7BRhrQjLRDCKkCfCRyK8LcLLIvxUhAdFuEuEERHAI0K7CPVM
-rlwElQjhuYzgYyKkRJBEaGJs5H0owusizIogMxs3ixAUFRNpGX1G7EURnhXheyIcZWJXibBB
-BCEzx7r0BMdF8IswkJmjnGm+zTS/KcIUTi/V5PDNTPdt5gAnM4E4mx5n1YmgUdbL8BcfMy88
-heYcxM6r5wjlbE6Z45lyPsuc0CqzJzTWAOyEVknvVZA9ppVw+edPbcsvOrZ1PSy59izZ/kL7
-3P75wduPL3K5WioMh+dbDw0Oem86PL9z3z4o4/0165uaa1rn/6Qc5LwnNIXFqrVbMmi/b8m5
-quyBh/WRE5vhD9hHi8msdAMpKzMVabX5pvwllsV40l2sK0PEaPL4Co0VpbRt9LRtHrTA2xZ4
-1gL4QlFZoBmRb1ogZYGgBQYs0G6BJgsss4CZsfHNxuW+1/Bt9qIFsq+8LD03o8N/18n3wnPv
-RRls3/6v69Pn3t7BITz4Xnn11aDl/bXN2WOvt39YOfcq58HbFt6C/eQVPPeapCKSl6ct5gvu
-v5wvIy3KmRP3qpwDJ+x3NTW53KLo3tXQ2dkgut3s/y30Pzblq28Z1m38K2dN/9b/yzuXdJ7/
-JXfhrbwqNf0FXJMloV6+bd5FvpJLueDS5zXjN8a3SLWKkHKumdTwS8gAR397Pkw6ES/Hpwd5
-23DsQHgHPs2oU4NPJ0eUX9KfgR3wDLcaP8e4t/kh/pcqj+ohtSlvY97P895VZtWTRhoDi0SP
-/bILgX/nf0p4xrVANOvbzqyfgJI7FZgj+WRMgXk8i04qsAplDiqwmpSQexQ4j+jIQwqcT64l
-P1BgDX43dipwASmBNgUuhCj0KnARWcw9lf0vVx33ugIXkzV8gQKXkEX8Zuq9iv46f4L3KjAQ
-QaVSYI6UqJYpME/WqhoVWIUyYQVWk8WqgwqcRyyqBxU4n3yoekaBNWSl+ocKXEAWq3+vwIXc
-G+qPFbiIrNP8RoG1ZFdBiQIXkysLrlTgEtJU8HJ7ZDySilwbCgrBQCogjMbi+xOR8XBKWDm6
-Smisb6gXOmKx8YmQ0BZLxGOJQCoSi9YVtl0s1ij0oYnOQKpW2BodreuOjITSskJ/KBEZ6wuN
-75kIJLYkR0PRYCghOISLJS7Gd4YSSYo01tXX1zWc514sHEkKASGVCARDk4HEVUJs7EJHhERo
-PJJMhRJIjESFwbr+OsETSIWiKSEQDQoDWcWesbHIaIgRR0OJVACFY6kwunrlnkQkGYyM0tmS
-ddkIctLRnwrtDQnbA6lUKBmLtgaSOBd6NhCJxpK1wr5wZDQs7AskhWAoGRmPInNkv3ChjoDc
-AMYSjcb2osm9oVr0eywRSoYj0XEhSUNWtIVUOJCiQU+GUonIaGBiYj/WbDKOWiNYpH2RVBgn
-ngwlhR2hfUJfbDIQ/W5d2hXMzRgmVYhMxhOxvcxHR3I0EQpFcbJAMDASmYik0Fo4kAiMYsYw
-bZHRJMsIJkKIB6IO155ELB5CT7/S0X1eEB1MZzMZm9iLM1PpaCgUpDOi23tDE6iEE0/EYlfR
-eMZiCXQ0mAo7cjwfi0VTqBoTAsEgBo7Zio3umaR1wjSnMs4FRhMx5MUnAim0MpmsC6dS8fVO
-5759++oCSmlGsTJ1aNn5RbzU/nhIqUeCWpmc6MbyR2np9rD60iD6t3YLPXHMjxudExSBWiHT
-mg11DcoUmMZIPJWsS0Ym6mKJcWePu5u0kwgZx5HCcS0JkSARcAQQDyA0SmIkTvaTBJMKI1Ug
-K5G6Cp+NpJ404BBIB0rFkD+B+gJpQziBWvQeYHZjJErq8FtE25daa0SoT/Gik2nXIrQV9UfR
-QjfqjSA3165A+hklgvss1Rwne9CPAFK2kCRqhVAmyCQE4sDxZTa+jL+TQckspxH9qsdPHXp/
-Kd0vsxxBWwLLdYpxqK+TzP+rkBZDvS/KiIByIVa/JHJCDAsyq9T2IEr0MykP06S5SLHZokxq
-4BIz9uCMY6g/ymqZkRxltmlPpC3HEA4rWb0SM55gHgSZXia2JM782Rpcujv6mXd72ZzbGZ3i
-ScZrRTypxJXO2QDzIoZUmot96AmdN8zgAMtnkGnTLosqmiPYd8IXziMougGlLlE2x17FS6pT
-q+R7jN2TbN4oziEw/9JVvnBugeUpwLKervQkclNMdhTpE/jZr6yzScxKeq4RZSXtY+syrEQ8
-yewKZAc+97GuiLG6RW1LWY3PZyXdN2NKpwpMN45wjEWRyaOD1YZGEmKeUijA1v4IakywudO+
-hVl3BFhtQ0qtUyyCTL6CSqTU6zijOIiL9QVd8SElp1/BnaL7khbTGcztTVqTCeZvMsd2lHkb
-zMaYzjaVmlBmSkc8wXakq7L1GWP9ls5okFlzfE7Ox1huUsqsMeZRED/piqd7K4a6e1g90usp
-3c2pz2QuwPIbU/TibF9KKb5MsvURZh0YJ+vxbOlE7+injvVh7qoZVdZMneKz8+/Wo37FWQZz
-10ci68sk+titrP5odtXtyVm/mUr04x7UzfaLuNI/biVzwkUW6Kq5eNdsYPvlhVGkuzGCeIr5
-k2S5rGMxjCO/B2foZufo9DcHG/p0iWumwLNlBEIEIAzjpIxYwU92wDAZhC1kE0j4lJDXis82
-xOmzDjaRKZTbhPTNiG9E+gbcPK14b8HRg+MIDhWOtEQ9Sjjx6VRwB+K1qPEC3oENSm1BKn1u
-Q7wTnx3K0410Fz5dCr4VcXwSP+TjQbyF3Z8ClXQSzpyDF86BcA4OfAKeT2Dqg6MfcO/PrbI+
-MvfUHNfz3vB7j7zH178HuvdAQ87qz3rO+s/Gzx4/m1eoexe05E9geOvMOuubm04P/n7TG4Pk
-NEZ2uv605/TUafm0+jTwg2/wRqt+Vpitn43PTs2+OHtmdm5WM/WToz/hfvyk06p70vokZz3Z
-c/LASd7/MOgetj7Mee73388dPQa6Y9ZjzmP8fffWWe/tsFjvvmuF9cxdc3dxpxZmT95VbHA/
-CT3QTTZhDnec5Besj2ypgO0Ylg7vVhxOHD04YjiO4MDvPShuxeGEbmkdP/wtKLrDfEfNHdfd
-cegOdfzWqVuP3spP3XL0Fu6RvU/t5ZKeVdZYtMYa7VhtrRJNg/kiP5iH0+Ds0taR6pVu/7Bk
-HUahy4fqrUMdq6xlYumgGgNWoaCOt/ItfA8f44/wT/H5mj6PxdqL44xnzsNJngKtW9dj7XH2
-8KcWzkihLhta2xbfNrWN3+peZe3sWGfVdVg7nB0vdLzZ8V5H3nAHPIB/7kfcT7l5yb3K6Zbc
-Fpt7cad50ChWDBpAN6gXdYMcYKFFMujULeg4nW5Yd0DH60gL4aaMoIZTcHRmoL+mputU/kJf
-l6zxXC7DQbm6n96l3iE576BMBocu984AfN13y+HDpHVJl9zY75X9S3xdchABiQJTCOiXzBhJ
-qy+ZTNWwC2pqEN6Dd1KzpwaJu5NpKsnySU0SkrhHJZkS1FCBNA54r6E8JFA9QO3dSUJvlFmT
-VqLaScUcU07fGGDa/T/LhW2oCmVuZHN0cmVhbQplbmRvYmoKCjYgMCBvYmoKNjI5MQplbmRv
-YmoKCjcgMCBvYmoKPDwvVHlwZS9Gb250RGVzY3JpcHRvci9Gb250TmFtZS9CQUFBQUErTGli
-ZXJhdGlvblNlcmlmCi9GbGFncyA0Ci9Gb250QkJveFstNTQzIC0zMDMgMTI3NyA5ODFdL0l0
-YWxpY0FuZ2xlIDAKL0FzY2VudCA4OTEKL0Rlc2NlbnQgLTIxNgovQ2FwSGVpZ2h0IDk4MQov
-U3RlbVYgODAKL0ZvbnRGaWxlMiA1IDAgUgo+PgplbmRvYmoKCjggMCBvYmoKPDwvTGVuZ3Ro
-IDI5Mi9GaWx0ZXIvRmxhdGVEZWNvZGU+PgpzdHJlYW0KeJxdkctuwyAQRfd8Bct0EfmROA/J
-spQmseRFH6rbD3BgnCLVGGGy8N+XmUlbqQvQmZl7BxiSY3NqrAnJqx9VC0H2xmoP03jzCuQF
-rsaKLJfaqHCPaFdD50QSve08BRga249lKZK3WJuCn+XioMcLPIjkxWvwxl7l4uPYxri9OfcF
-A9ggU1FVUkMf+zx17rkbICHXstGxbMK8jJY/wfvsQOYUZ3wVNWqYXKfAd/YKokzTSpZ1XQmw
-+l8tK9hy6dVn56M0i9I0LdZV5Jx4s0NeMe+R18TbFXJBnKfIG9ZkyFvWUJ8d5wvkPTPlD8w1
-8iMz9Tyyl/Qnzp+Qz8xn5JrPPdOj7rfH5+H8f8Ym1c37ODL6JJoVTslY+P1HNzp00foG7l+O
-gwplbmRzdHJlYW0KZW5kb2JqCgo5IDAgb2JqCjw8L1R5cGUvRm9udC9TdWJ0eXBlL1RydWVU
-eXBlL0Jhc2VGb250L0JBQUFBQStMaWJlcmF0aW9uU2VyaWYKL0ZpcnN0Q2hhciAwCi9MYXN0
-Q2hhciAxNQovV2lkdGhzWzc3NyA2MTAgNTAwIDI3NyAzODkgMjUwIDQ0MyAyNzcgNDQzIDUw
-MCA1MDAgNDQzIDUwMCA3NzcgNTAwIDI1MApdCi9Gb250RGVzY3JpcHRvciA3IDAgUgovVG9V
-bmljb2RlIDggMCBSCj4+CmVuZG9iagoKMTAgMCBvYmoKPDwvRjEgOSAwIFIKPj4KZW5kb2Jq
-CgoxMSAwIG9iago8PC9Gb250IDEwIDAgUgovUHJvY1NldFsvUERGL1RleHRdCj4+CmVuZG9i
-agoKMSAwIG9iago8PC9UeXBlL1BhZ2UvUGFyZW50IDQgMCBSL1Jlc291cmNlcyAxMSAwIFIv
-TWVkaWFCb3hbMCAwIDU5NSA4NDJdL0dyb3VwPDwvUy9UcmFuc3BhcmVuY3kvQ1MvRGV2aWNl
-UkdCL0kgdHJ1ZT4+L0NvbnRlbnRzIDIgMCBSPj4KZW5kb2JqCgo0IDAgb2JqCjw8L1R5cGUv
-UGFnZXMKL1Jlc291cmNlcyAxMSAwIFIKL01lZGlhQm94WyAwIDAgNTk1IDg0MiBdCi9LaWRz
-WyAxIDAgUiBdCi9Db3VudCAxPj4KZW5kb2JqCgoxMiAwIG9iago8PC9UeXBlL0NhdGFsb2cv
-UGFnZXMgNCAwIFIKL09wZW5BY3Rpb25bMSAwIFIgL1hZWiBudWxsIG51bGwgMF0KL0xhbmco
-ZW4tR0IpCj4+CmVuZG9iagoKMTMgMCBvYmoKPDwvQ3JlYXRvcjxGRUZGMDA1NzAwNzIwMDY5
-MDA3NDAwNjUwMDcyPgovUHJvZHVjZXI8RkVGRjAwNEMwMDY5MDA2MjAwNzIwMDY1MDA0RjAw
-NjYwMDY2MDA2OTAwNjMwMDY1MDAyMDAwMzUwMDJFMDAzMD4KL0NyZWF0aW9uRGF0ZShEOjIw
-MTYwMjA0MjIwMDAyWicpPj4KZW5kb2JqCgp4cmVmCjAgMTQKMDAwMDAwMDAwMCA2NTUzNSBm
-IAowMDAwMDA3NTA5IDAwMDAwIG4gCjAwMDAwMDAwMTkgMDAwMDAgbiAKMDAwMDAwMDIyOSAw
-MDAwMCBuIAowMDAwMDA3NjUyIDAwMDAwIG4gCjAwMDAwMDAyNDkgMDAwMDAgbiAKMDAwMDAw
-NjYyNSAwMDAwMCBuIAowMDAwMDA2NjQ2IDAwMDAwIG4gCjAwMDAwMDY4NDEgMDAwMDAgbiAK
-MDAwMDAwNzIwMiAwMDAwMCBuIAowMDAwMDA3NDIyIDAwMDAwIG4gCjAwMDAwMDc0NTQgMDAw
-MDAgbiAKMDAwMDAwNzc1MSAwMDAwMCBuIAowMDAwMDA3ODQ4IDAwMDAwIG4gCnRyYWlsZXIK
-PDwvU2l6ZSAxNC9Sb290IDEyIDAgUgovSW5mbyAxMyAwIFIKL0lEIFsgPDRFN0ZCMEZCMjA4
-ODBCNURBQkIzQTNEOTQxNDlBRTQ3Pgo8NEU3RkIwRkIyMDg4MEI1REFCQjNBM0Q5NDE0OUFF
-NDc+IF0KL0RvY0NoZWNrc3VtIC8yQTY0RDMzNzRFQTVEODMwNTRDNEI2RDFEMUY4QzU1RQo+
-PgpzdGFydHhyZWYKODAxOAolJUVPRgo=
--------------090701020702030809070008--
--- a/src/documents/tests/test_api.py
+++ b/src/documents/tests/test_api.py
@@ -0,0 +1,217 @@
+import os
+import shutil
+import tempfile
+from unittest import mock
+
+from django.contrib.auth.models import User
+from django.test import override_settings
+from rest_framework.test import APITestCase
+
+from documents.models import Document, Correspondent, DocumentType, Tag
+
+
+class DocumentApiTest(APITestCase):
+
+    def setUp(self):
+        self.scratch_dir = tempfile.mkdtemp()
+        self.media_dir = tempfile.mkdtemp()
+        self.originals_dir = os.path.join(self.media_dir, "documents", "originals")
+        self.thumbnail_dir = os.path.join(self.media_dir, "documents", "thumbnails")
+
+        os.makedirs(self.originals_dir, exist_ok=True)
+        os.makedirs(self.thumbnail_dir, exist_ok=True)
+
+        override_settings(
+            SCRATCH_DIR=self.scratch_dir,
+            MEDIA_ROOT=self.media_dir,
+            ORIGINALS_DIR=self.originals_dir,
+            THUMBNAIL_DIR=self.thumbnail_dir
+        ).enable()
+
+        user = User.objects.create_superuser(username="temp_admin")
+        self.client.force_login(user=user)
+
+    def tearDown(self):
+        shutil.rmtree(self.scratch_dir, ignore_errors=True)
+        shutil.rmtree(self.media_dir, ignore_errors=True)
+
+    def testDocuments(self):
+
+        response = self.client.get("/api/documents/").data
+
+        self.assertEqual(response['count'], 0)
+
+        c = Correspondent.objects.create(name="c", pk=41)
+        dt = DocumentType.objects.create(name="dt", pk=63)
+        tag = Tag.objects.create(name="t", pk=85)
+
+        doc = Document.objects.create(title="WOW", content="the content", correspondent=c, document_type=dt, checksum="123")
+
+        doc.tags.add(tag)
+
+        response = self.client.get("/api/documents/", format='json')
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.data['count'], 1)
+
+        returned_doc = response.data['results'][0]
+        self.assertEqual(returned_doc['id'], doc.id)
+        self.assertEqual(returned_doc['title'], doc.title)
+        self.assertEqual(returned_doc['correspondent']['name'], c.name)
+        self.assertEqual(returned_doc['document_type']['name'], dt.name)
+        self.assertEqual(returned_doc['correspondent']['id'], c.id)
+        self.assertEqual(returned_doc['document_type']['id'], dt.id)
+        self.assertEqual(returned_doc['correspondent']['id'], returned_doc['correspondent_id'])
+        self.assertEqual(returned_doc['document_type']['id'], returned_doc['document_type_id'])
+        self.assertEqual(len(returned_doc['tags']), 1)
+        self.assertEqual(returned_doc['tags'][0]['name'], tag.name)
+        self.assertEqual(returned_doc['tags'][0]['id'], tag.id)
+        self.assertListEqual(returned_doc['tags_id'], [tag.id])
+
+        c2 = Correspondent.objects.create(name="c2")
+
+        returned_doc['correspondent_id'] = c2.pk
+        returned_doc['title'] = "the new title"
+
+        response = self.client.put('/api/documents/{}/'.format(doc.pk), returned_doc, format='json')
+
+        self.assertEqual(response.status_code, 200)
+
+        doc_after_save = Document.objects.get(id=doc.id)
+
+        self.assertEqual(doc_after_save.correspondent, c2)
+        self.assertEqual(doc_after_save.title, "the new title")
+
+        self.client.delete("/api/documents/{}/".format(doc_after_save.pk))
+
+        self.assertEqual(len(Document.objects.all()), 0)
+
+    def test_document_actions(self):
+
+        _, filename = tempfile.mkstemp(dir=self.originals_dir)
+
+        content = b"This is a test"
+        content_thumbnail = b"thumbnail content"
+
+        with open(filename, "wb") as f:
+            f.write(content)
+
+        doc = Document.objects.create(title="none", filename=os.path.basename(filename), file_type="pdf")
+
+        with open(os.path.join(self.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb") as f:
+            f.write(content_thumbnail)
+
+        response = self.client.get('/api/documents/{}/download/'.format(doc.pk))
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.content, content)
+
+        response = self.client.get('/api/documents/{}/preview/'.format(doc.pk))
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.content, content)
+
+        response = self.client.get('/api/documents/{}/thumb/'.format(doc.pk))
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.content, content_thumbnail)
+
+    def test_document_actions_not_existing_file(self):
+
+        doc = Document.objects.create(title="none", filename=os.path.basename("asd"), file_type="pdf")
+
+        response = self.client.get('/api/documents/{}/download/'.format(doc.pk))
+        self.assertEqual(response.status_code, 404)
+
+        response = self.client.get('/api/documents/{}/preview/'.format(doc.pk))
+        self.assertEqual(response.status_code, 404)
+
+        response = self.client.get('/api/documents/{}/thumb/'.format(doc.pk))
+        self.assertEqual(response.status_code, 404)
+
+    def test_document_filters(self):
+
+        doc1 = Document.objects.create(title="none1", checksum="A")
+        doc2 = Document.objects.create(title="none2", checksum="B")
+        doc3 = Document.objects.create(title="none3", checksum="C")
+
+        tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True)
+        tag_2 = Tag.objects.create(name="t2")
+        tag_3 = Tag.objects.create(name="t3")
+
+        doc1.tags.add(tag_inbox)
+        doc2.tags.add(tag_2)
+        doc3.tags.add(tag_2)
+        doc3.tags.add(tag_3)
+
+        response = self.client.get("/api/documents/?is_in_inbox=true")
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['id'], doc1.id)
+
+        response = self.client.get("/api/documents/?is_in_inbox=false")
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 2)
+        self.assertEqual(results[0]['id'], doc2.id)
+        self.assertEqual(results[1]['id'], doc3.id)
+
+        response = self.client.get("/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 2)
+        self.assertEqual(results[0]['id'], doc1.id)
+        self.assertEqual(results[1]['id'], doc3.id)
+
+        response = self.client.get("/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['id'], doc3.id)
+
+        response = self.client.get("/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 0)
+
+        response = self.client.get("/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 3)
+
+    @mock.patch("documents.index.autocomplete")
+    def test_search_autocomplete(self, m):
+        m.side_effect = lambda ix, term, limit: [term for _ in range(limit)]
+
+        response = self.client.get("/api/search/autocomplete/?term=test")
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(len(response.data), 10)
+
+        response = self.client.get("/api/search/autocomplete/?term=test&limit=20")
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(len(response.data), 20)
+
+        response = self.client.get("/api/search/autocomplete/?term=test&limit=-1")
+        self.assertEqual(response.status_code, 400)
+
+        response = self.client.get("/api/search/autocomplete/")
+        self.assertEqual(response.status_code, 400)
+
+        response = self.client.get("/api/search/autocomplete/?term=")
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(len(response.data), 10)
+
+    def test_statistics(self):
+
+        doc1 = Document.objects.create(title="none1", checksum="A")
+        doc2 = Document.objects.create(title="none2", checksum="B")
+        doc3 = Document.objects.create(title="none3", checksum="C")
+
+        tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True)
+
+        doc1.tags.add(tag_inbox)
+
+        response = self.client.get("/api/statistics/")
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.data['documents_total'], 3)
+        self.assertEqual(response.data['documents_inbox'], 1)
--- a/src/documents/tests/test_checks.py
+++ b/src/documents/tests/test_checks.py
@@ -2,9 +2,9 @@ import unittest

 from django.test import TestCase

+from .factories import DocumentFactory
 from ..checks import changed_password_check
 from ..models import Document
-from .factories import DocumentFactory


 class ChecksTestCase(TestCase):
--- a/src/documents/tests/test_classifier.py
+++ b/src/documents/tests/test_classifier.py
@@ -0,0 +1,85 @@
+import tempfile
+
+from django.test import TestCase, override_settings
+
+from documents.classifier import DocumentClassifier
+from documents.models import Correspondent, Document, Tag, DocumentType
+
+
+class TestClassifier(TestCase):
+
+    def setUp(self):
+
+        self.classifier = DocumentClassifier()
+
+    def generate_test_data(self):
+        self.c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
+        self.c2 = Correspondent.objects.create(name="c2")
+        self.t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
+        self.t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True)
+        self.t3 = Tag.objects.create(name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45)
+        self.dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
+
+        self.doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=self.c1, checksum="A", document_type=self.dt)
+        self.doc2 = Document.objects.create(title="doc1", content="this is another document, but from c2", correspondent=self.c2, checksum="B")
+        self.doc_inbox = Document.objects.create(title="doc235", content="aa", checksum="C")
+
+        self.doc1.tags.add(self.t1)
+        self.doc2.tags.add(self.t1)
+        self.doc2.tags.add(self.t3)
+        self.doc_inbox.tags.add(self.t2)
+
+    def testNoTrainingData(self):
+        try:
+            self.classifier.train()
+        except ValueError as e:
+            self.assertEqual(str(e), "No training data available.")
+        else:
+            self.fail("Should raise exception")
+
+    def testEmpty(self):
+        Document.objects.create(title="WOW", checksum="3457", content="ASD")
+        self.classifier.train()
+        self.assertIsNone(self.classifier.document_type_classifier)
+        self.assertIsNone(self.classifier.tags_classifier)
+        self.assertIsNone(self.classifier.correspondent_classifier)
+
+        self.assertListEqual(self.classifier.predict_tags(""), [])
+        self.assertIsNone(self.classifier.predict_document_type(""))
+        self.assertIsNone(self.classifier.predict_correspondent(""))
+
+    def testTrain(self):
+        self.generate_test_data()
+        self.classifier.train()
+        self.assertListEqual(list(self.classifier.correspondent_classifier.classes_), [-1, self.c1.pk])
+        self.assertListEqual(list(self.classifier.tags_binarizer.classes_), [self.t1.pk, self.t3.pk])
+
+    def testPredict(self):
+        self.generate_test_data()
+        self.classifier.train()
+        self.assertEqual(self.classifier.predict_correspondent(self.doc1.content), self.c1.pk)
+        self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
+        self.assertTupleEqual(self.classifier.predict_tags(self.doc1.content), (self.t1.pk,))
+        self.assertTupleEqual(self.classifier.predict_tags(self.doc2.content), (self.t1.pk, self.t3.pk))
+        self.assertEqual(self.classifier.predict_document_type(self.doc1.content), self.dt.pk)
+        self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
+
+    def testDatasetHashing(self):
+
+        self.generate_test_data()
+
+        self.assertTrue(self.classifier.train())
+        self.assertFalse(self.classifier.train())
+
+    @override_settings(DATA_DIR=tempfile.mkdtemp())
+    def testSaveClassifier(self):
+
+        self.generate_test_data()
+
+        self.classifier.train()
+
+        self.classifier.save_classifier()
+
+        new_classifier = DocumentClassifier()
+        new_classifier.reload()
+        self.assertFalse(new_classifier.train())
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -1,8 +1,15 @@
+import os
 import re
+import shutil
+import tempfile
+from unittest import mock
+from unittest.mock import MagicMock

-from django.test import TestCase
+from django.test import TestCase, override_settings

-from ..models import FileInfo, Tag
+from ..consumer import Consumer, ConsumerError
+from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
+from ..parsers import DocumentParser, ParseError


 class TestAttributes(TestCase):
@@ -394,3 +401,254 @@ class TestFieldPermutations(TestCase):
            self.assertEqual(info.created.year, 2019)
            self.assertEqual(info.created.month, 9)
            self.assertEqual(info.created.day, 8)
+
+
+class DummyParser(DocumentParser):
+
+    def get_thumbnail(self):
+        # not important during tests
+        raise NotImplementedError()
+
+    def __init__(self, path, logging_group, scratch_dir):
+        super(DummyParser, self).__init__(path, logging_group)
+        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
+
+    def get_optimised_thumbnail(self):
+        return self.fake_thumb
+
+    def get_text(self):
+        return "The Text"
+
+
+class FaultyParser(DocumentParser):
+
+    def get_thumbnail(self):
+        # not important during tests
+        raise NotImplementedError()
+
+    def __init__(self, path, logging_group, scratch_dir):
+        super(FaultyParser, self).__init__(path, logging_group)
+        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
+
+    def get_optimised_thumbnail(self):
+        return self.fake_thumb
+
+    def get_text(self):
+        raise ParseError("Does not compute.")
+
+
+class TestConsumer(TestCase):
+
+    def make_dummy_parser(self, path, logging_group):
+        return DummyParser(path, logging_group, self.scratch_dir)
+
+    def make_faulty_parser(self, path, logging_group):
+        return FaultyParser(path, logging_group, self.scratch_dir)
+
+    def setUp(self):
+        self.scratch_dir = tempfile.mkdtemp()
+        self.media_dir = tempfile.mkdtemp()
+        self.consumption_dir = tempfile.mkdtemp()
+
+        override_settings(
+            SCRATCH_DIR=self.scratch_dir,
+            MEDIA_ROOT=self.media_dir,
+            ORIGINALS_DIR=os.path.join(self.media_dir, "documents", "originals"),
+            THUMBNAIL_DIR=os.path.join(self.media_dir, "documents", "thumbnails"),
+            CONSUMPTION_DIR=self.consumption_dir
+        ).enable()
+
+        patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
+        m = patcher.start()
+        m.return_value = [(None, {
+            "parser": self.make_dummy_parser,
+            "test": lambda _: True,
+            "weight": 0
+        })]
+
+        self.addCleanup(patcher.stop)
+
+        self.consumer = Consumer()
+
+    def tearDown(self):
+        shutil.rmtree(self.scratch_dir, ignore_errors=True)
+        shutil.rmtree(self.media_dir, ignore_errors=True)
+        shutil.rmtree(self.consumption_dir, ignore_errors=True)
+
+    def get_test_file(self):
+        fd, f = tempfile.mkstemp(suffix=".pdf", dir=self.scratch_dir)
+        return f
+
+    def testNormalOperation(self):
+
+        filename = self.get_test_file()
+        document = self.consumer.try_consume_file(filename)
+
+        self.assertEqual(document.content, "The Text")
+        self.assertEqual(document.title, os.path.splitext(os.path.basename(filename))[0])
+        self.assertIsNone(document.correspondent)
+        self.assertIsNone(document.document_type)
+        self.assertEqual(document.filename, "0000001.pdf")
+
+        self.assertTrue(os.path.isfile(
+            document.source_path
+        ))
+
+        self.assertTrue(os.path.isfile(
+            document.thumbnail_path
+        ))
+
+        self.assertFalse(os.path.isfile(filename))
+
+    def testOverrideFilename(self):
+        filename = self.get_test_file()
+        override_filename = "My Bank - Statement for November.pdf"
+
+        document = self.consumer.try_consume_file(filename, override_filename=override_filename)
+
+        self.assertEqual(document.correspondent.name, "My Bank")
+        self.assertEqual(document.title, "Statement for November")
+
+    def testOverrideTitle(self):
+
+        document = self.consumer.try_consume_file(self.get_test_file(), override_title="Override Title")
+        self.assertEqual(document.title, "Override Title")
+
+    def testOverrideCorrespondent(self):
+        c = Correspondent.objects.create(name="test")
+
+        document = self.consumer.try_consume_file(self.get_test_file(), override_correspondent_id=c.pk)
+        self.assertEqual(document.correspondent.id, c.id)
+
+    def testOverrideDocumentType(self):
+        dt = DocumentType.objects.create(name="test")
+
+        document = self.consumer.try_consume_file(self.get_test_file(), override_document_type_id=dt.pk)
+        self.assertEqual(document.document_type.id, dt.id)
+
+    def testOverrideTags(self):
+        t1 = Tag.objects.create(name="t1")
+        t2 = Tag.objects.create(name="t2")
+        t3 = Tag.objects.create(name="t3")
+        document = self.consumer.try_consume_file(self.get_test_file(), override_tag_ids=[t1.id, t3.id])
+
+        self.assertIn(t1, document.tags.all())
+        self.assertNotIn(t2, document.tags.all())
+        self.assertIn(t3, document.tags.all())
+
+    def testNotAFile(self):
+        try:
+            self.consumer.try_consume_file("non-existing-file")
+        except ConsumerError as e:
+            self.assertTrue(str(e).endswith('It is not a file'))
+            return
+
+        self.fail("Should throw exception")
+
+    @override_settings(CONSUMPTION_DIR=None)
+    def testConsumptionDirUnset(self):
+        try:
+            self.consumer.try_consume_file(self.get_test_file())
+        except ConsumerError as e:
+            self.assertEqual(str(e), "The CONSUMPTION_DIR settings variable does not appear to be set.")
+            return
+
+        self.fail("Should throw exception")
+
+    @override_settings(CONSUMPTION_DIR="asd")
+    def testNoConsumptionDir(self):
+        try:
+            self.consumer.try_consume_file(self.get_test_file())
+        except ConsumerError as e:
+            self.assertEqual(str(e), "Consumption directory asd does not exist")
+            return
+
+        self.fail("Should throw exception")
+
+    def testDuplicates(self):
+        self.consumer.try_consume_file(self.get_test_file())
+
+        try:
+            self.consumer.try_consume_file(self.get_test_file())
+        except ConsumerError as e:
+            self.assertTrue(str(e).endswith("It is a duplicate."))
+            return
+
+        self.fail("Should throw exception")
+
+    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    def testNoParsers(self, m):
+        m.return_value = []
+
+        try:
+            self.consumer.try_consume_file(self.get_test_file())
+        except ConsumerError as e:
+            self.assertTrue(str(e).startswith("No parsers abvailable"))
+            return
+
+        self.fail("Should throw exception")
+
+    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    def testFaultyParser(self, m):
+        m.return_value = [(None, {
+            "parser": self.make_faulty_parser,
+            "test": lambda _: True,
+            "weight": 0
+        })]
+
+        try:
+            self.consumer.try_consume_file(self.get_test_file())
+        except ConsumerError as e:
+            self.assertEqual(str(e), "Does not compute.")
+            return
+
+        self.fail("Should throw exception.")
+
+    @mock.patch("documents.consumer.Consumer._write")
+    def testPostSaveError(self, m):
+        filename = self.get_test_file()
+        m.side_effect = OSError("NO.")
+        try:
+            self.consumer.try_consume_file(filename)
+        except ConsumerError as e:
+            self.assertEqual(str(e), "NO.")
+        else:
+            self.fail("Should raise exception")
+
+        # file not deleted
+        self.assertTrue(os.path.isfile(filename))
+
+        # Database empty
+        self.assertEqual(len(Document.objects.all()), 0)
+
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
+    def testFilenameHandling(self):
+        filename = self.get_test_file()
+
+        document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs")
+
+        print(document.source_path)
+        print("===")
+
+        self.assertEqual(document.title, "new docs")
+        self.assertEqual(document.correspondent.name, "Bank")
+        self.assertEqual(document.filename, "bank/new-docs-0000001.pdf")
+
+    @mock.patch("documents.consumer.DocumentClassifier")
+    def testClassifyDocument(self, m):
+        correspondent = Correspondent.objects.create(name="test")
+        dtype = DocumentType.objects.create(name="test")
+        t1 = Tag.objects.create(name="t1")
+        t2 = Tag.objects.create(name="t2")
+
+        m.return_value = MagicMock()
+        m.return_value.predict_correspondent.return_value = correspondent.pk
+        m.return_value.predict_document_type.return_value = dtype.pk
+        m.return_value.predict_tags.return_value = [t1.pk]
+
+        document = self.consumer.try_consume_file(self.get_test_file())
+
+        self.assertEqual(document.correspondent, correspondent)
+        self.assertEqual(document.document_type, dtype)
+        self.assertIn(t1, document.tags.all())
+        self.assertNotIn(t2, document.tags.all())
--- a/src/documents/tests/test_file_handling.py
+++ b/src/documents/tests/test_file_handling.py
@@ -1,17 +1,14 @@
-import datetime
 import os
 import shutil
-from unittest import mock
-from uuid import uuid4
 from pathlib import Path
-from shutil import rmtree
+from uuid import uuid4

-from dateutil import tz
+from django.conf import settings
 from django.test import TestCase, override_settings

-from django.utils.text import slugify
-from ..models import Tag, Document, Correspondent
-from django.conf import settings
+from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories
+from ..models import Document, Correspondent
+from ..signals.handlers import update_filename_and_move_files


 class TestDate(TestCase):
@@ -31,18 +28,6 @@ class TestDate(TestCase):
        for dirname in self.deletion_list:
            shutil.rmtree(dirname, ignore_errors=True)

-    @override_settings(PAPERLESS_FILENAME_FORMAT="")
-    def test_source_filename(self):
-        document = Document()
-        document.file_type = "pdf"
-        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        document.save()
-
-        self.assertEqual(document.source_filename, "0000001.pdf")
-
-        document.filename = "test.pdf"
-        self.assertEqual(document.source_filename, "test.pdf")
-
    @override_settings(PAPERLESS_FILENAME_FORMAT="")
    def test_generate_source_filename(self):
        document = Document()
@@ -50,58 +35,50 @@ class TestDate(TestCase):
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
        document.save()

-        self.assertEqual(document.generate_source_filename(), "0000001.pdf")
+        self.assertEqual(generate_filename(document), "{:07d}.pdf".format(document.pk))

        document.storage_type = Document.STORAGE_TYPE_GPG
-        self.assertEqual(document.generate_source_filename(),
-                         "0000001.pdf.gpg")
+        self.assertEqual(generate_filename(document),
+                         "{:07d}.pdf.gpg".format(document.pk))

-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_file_renaming(self):
        document = Document()
        document.file_type = "pdf"
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
        document.save()

-        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
+        # Test default source_path
+        self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/{:07d}.pdf".format(document.pk))

-        # Test source_path
-        self.assertEqual(document.source_path, settings.MEDIA_ROOT +
-                         "/documents/originals/none/none-0000001.pdf")
+        document.filename = generate_filename(document)
+
+        # Ensure that filename is properly generated
+        self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))

        # Enable encryption and check again
        document.storage_type = Document.STORAGE_TYPE_GPG
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf.gpg")
+        document.filename = generate_filename(document)
+        self.assertEqual(document.filename,
+                         "none/none-{:07d}.pdf.gpg".format(document.pk))
+
        document.save()

-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), True)
+        # test that creating dirs for the source_path creates the correct directory
+        create_source_path_directory(document.source_path)
+        Path(document.source_path).touch()
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True)

        # Set a correspondent and save the document
-        document.correspondent = Correspondent.objects.get_or_create(
-                name="test")[0]
+        document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
        document.save()

        # Check proper handling of files
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/test"), True)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), False)
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/test/test-0000001.pdf.gpg"), True)
-        self.assertEqual(document.generate_source_filename(),
-                         "test/test-0000001.pdf.gpg")
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
+        self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True)

-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_file_renaming_missing_permissions(self):
        document = Document()
        document.file_type = "pdf"
@@ -109,34 +86,67 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf")
-        document.create_source_directory()
+        document.filename = generate_filename(document)
+        self.assertEqual(document.filename,
+                         "none/none-{:07d}.pdf".format(document.pk))
+        create_source_path_directory(document.source_path)
        Path(document.source_path).touch()

        # Test source_path
-        self.assertEqual(document.source_path, settings.MEDIA_ROOT +
-                         "/documents/originals/none/none-0000001.pdf")
+        self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk))

        # Make the folder read- and execute-only (no writing and no renaming)
-        os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o555)
+        os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)

        # Set a correspondent and save the document
-        document.correspondent = Correspondent.objects.get_or_create(
-                name="test")[0]
+        document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
        document.save()

        # Check proper handling of files
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/none/none-0000001.pdf"), True)
-        self.assertEqual(document.source_filename,
-                         "none/none-0000001.pdf")
+        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True)
+        self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))

-        os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o777)
+        os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)

-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
+    def test_file_renaming_database_error(self):
+
+        document1 = Document.objects.create(file_type="pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA")
+
+        document = Document()
+        document.file_type = "pdf"
+        document.checksum = "BBBBB"
+        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
+        document.save()
+
+        # Ensure that filename is properly generated
+        document.filename = generate_filename(document)
+        self.assertEqual(document.filename,
+                         "none/none-{:07d}.pdf".format(document.pk))
+        create_source_path_directory(document.source_path)
+        Path(document.source_path).touch()
+
+        # Test source_path
+        self.assertTrue(os.path.isfile(document.source_path))
+
+        # Set a correspondent and save the document
+        document.correspondent = Correspondent.objects.get_or_create(
+            name="test")[0]
+
+        # This will cause save() to fail.
+        document.checksum = document1.checksum
+
+        # Assume saving the document initially works, this gets called.
+        # After renaming, an error occurs, and filename is not saved:
+        # document should still be available at document.filename.
+        update_filename_and_move_files(None, document)
+
+        # Check proper handling of files
+        self.assertTrue(os.path.isfile(document.source_path))
+        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True)
+        self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
+
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_document_delete(self):
        document = Document()
        document.file_type = "pdf"
@@ -144,21 +154,20 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf")
-        document.create_source_directory()
+        document.filename = generate_filename(document)
+        self.assertEqual(document.filename,
+                         "none/none-{:07d}.pdf".format(document.pk))
+
+        create_source_path_directory(document.source_path)
        Path(document.source_path).touch()

        # Ensure file deletion after delete
+        pk = document.pk
        document.delete()
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT +
-                         "/documents/originals/none/none-0000001.pdf"), False)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), False)
+        self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False)
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)

-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_document_delete_nofile(self):
        document = Document()
        document.file_type = "pdf"
@@ -167,8 +176,7 @@ class TestDate(TestCase):

        document.delete()

-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_directory_not_empty(self):
        document = Document()
        document.file_type = "pdf"
@@ -176,28 +184,24 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf")
-        document.create_source_directory()
+        document.filename = generate_filename(document)
+        self.assertEqual(document.filename,
+                         "none/none-{:07d}.pdf".format(document.pk))
+
+        create_source_path_directory(document.source_path)
+
        Path(document.source_path).touch()
-        Path(document.source_path + "test").touch()
+        important_file = document.source_path + "test"
+        Path(important_file).touch()

        # Set a correspondent and save the document
-        document.correspondent = Correspondent.objects.get_or_create(
-                name="test")[0]
+        document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
        document.save()

        # Check proper handling of files
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/test"), True)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), True)
-
-        # Cleanup
-        os.remove(settings.MEDIA_ROOT +
-                  "/documents/originals/none/none-0000001.pdftest")
-        os.rmdir(settings.MEDIA_ROOT + "/documents/originals/none")
+        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/test"), True)
+        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/none"), True)
+        self.assertTrue(os.path.isfile(important_file))

    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
    def test_tags_with_underscore(self):
@@ -212,13 +216,8 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "demo-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        document.delete()
+        self.assertEqual(generate_filename(document),
+                         "demo-{:07d}.pdf".format(document.pk))

    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
    def test_tags_with_dash(self):
@@ -233,13 +232,8 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "demo-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        document.delete()
+        self.assertEqual(generate_filename(document),
+                         "demo-{:07d}.pdf".format(document.pk))

    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
    def test_tags_malformed(self):
@@ -254,13 +248,8 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        document.delete()
+        self.assertEqual(generate_filename(document),
+                         "none-{:07d}.pdf".format(document.pk))

    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
    def test_tags_all(self):
@@ -274,64 +263,25 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "demo-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
+        self.assertEqual(generate_filename(document),
+                         "demo-{:07d}.pdf".format(document.pk))

-        document.delete()
-
-    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
-    def test_tags_out_of_bounds_0(self):
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
+    def test_tags_out_of_bounds(self):
        document = Document()
        document.file_type = "pdf"
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
        document.save()

-        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        document.delete()
-
-    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[10000000]}")
-    def test_tags_out_of_bounds_10000000(self):
-        document = Document()
-        document.file_type = "pdf"
-        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
+        # Add tag to document
+        document.tags.create(name="demo")
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
+        self.assertEqual(generate_filename(document),
+                         "none-{:07d}.pdf".format(document.pk))

-        document.delete()
-
-    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[99]}")
-    def test_tags_out_of_bounds_99(self):
-        document = Document()
-        document.file_type = "pdf"
-        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        document.save()
-
-        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        document.delete()
-
-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}/{correspondent}")
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
    def test_nested_directory_cleanup(self):
        document = Document()
        document.file_type = "pdf"
@@ -339,153 +289,34 @@ class TestDate(TestCase):
        document.save()

        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none/none-0000001.pdf")
-        document.create_source_directory()
+        document.filename = generate_filename(document)
+        self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk))
+        create_source_path_directory(document.source_path)
        Path(document.source_path).touch()

        # Check proper handling of files
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none/none"), True)
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), True)

+        pk = document.pk
        document.delete()

-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT +
-                         "/documents/originals/none/none/none-0000001.pdf"),
-                         False)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none/none"), False)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), False)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals"), True)
+        self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False)
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
+        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)

    @override_settings(PAPERLESS_FILENAME_FORMAT=None)
    def test_format_none(self):
        document = Document()
+        document.pk = 1
        document.file_type = "pdf"
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        document.save()

-        self.assertEqual(document.generate_source_filename(), "0000001.pdf")
-
-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
-    def test_document_renamed(self):
-        document = Document()
-        document.file_type = "pdf"
-        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        document.save()
-
-        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        # Test source_path
-        self.assertEqual(document.source_path, settings.MEDIA_ROOT +
-                         "/documents/originals/none/none-0000001.pdf")
-
-        # Rename the document "illegaly"
-        os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test")
-        os.rename(settings.MEDIA_ROOT + "/documents/originals/" +
-                                        "none/none-0000001.pdf",
-                  settings.MEDIA_ROOT + "/documents/originals/" +
-                                        "test/test-0000001.pdf")
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/test/test-0000001.pdf"), True)
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/none/none-0000001.pdf"), False)
-
-        # Set new correspondent and expect document to be saved properly
-        document.correspondent = Correspondent.objects.get_or_create(
-                name="foo")[0]
-        document.save()
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/foo/foo-0000001.pdf"), True)
-
-        # Check proper handling of files
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/foo"), True)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), False)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/test"), False)
-        self.assertEqual(document.generate_source_filename(),
-                         "foo/foo-0000001.pdf")
-
-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
-    def test_document_renamed_encrypted(self):
-        document = Document()
-        document.file_type = "pdf"
-        document.storage_type = Document.STORAGE_TYPE_GPG
-        document.save()
-
-        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf.gpg")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        # Test source_path
-        self.assertEqual(document.source_path, settings.MEDIA_ROOT +
-                         "/documents/originals/none/none-0000001.pdf.gpg")
-
-        # Rename the document "illegaly"
-        os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test")
-        os.rename(settings.MEDIA_ROOT + "/documents/originals/" +
-                                        "none/none-0000001.pdf.gpg",
-                  settings.MEDIA_ROOT + "/documents/originals/" +
-                                        "test/test-0000001.pdf.gpg")
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/test/test-0000001.pdf.gpg"), True)
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/none/none-0000001.pdf"), False)
-
-        # Set new correspondent and expect document to be saved properly
-        document.correspondent = Correspondent.objects.get_or_create(
-                name="foo")[0]
-        document.save()
-        self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
-                         "originals/foo/foo-0000001.pdf.gpg"), True)
-
-        # Check proper handling of files
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/foo"), True)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), False)
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/test"), False)
-        self.assertEqual(document.generate_source_filename(),
-                         "foo/foo-0000001.pdf.gpg")
-
-    def test_delete_all_empty_subdirectories(self):
-        # Create our working directory
-        tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
-        os.makedirs(tmp)
-        self.add_to_deletion_list(tmp)
-
-        os.makedirs(os.path.join(tmp, "empty"))
-        os.makedirs(os.path.join(tmp, "empty", "subdirectory"))
-
-        os.makedirs(os.path.join(tmp, "notempty"))
-        Path(os.path.join(tmp, "notempty", "file")).touch()
-
-        Document.delete_all_empty_subdirectories(tmp)
-
-        self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
-        self.assertEqual(os.path.isdir(os.path.join(tmp, "empty")), False)
-        self.assertEqual(os.path.isfile(
-            os.path.join(tmp, "notempty", "file")), True)
+        self.assertEqual(generate_filename(document), "0000001.pdf")

    def test_try_delete_empty_directories(self):
        # Create our working directory
-        tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
+        tmp = os.path.join(settings.ORIGINALS_DIR, "test_delete_empty")
        os.makedirs(tmp)
        self.add_to_deletion_list(tmp)

@@ -493,67 +324,27 @@ class TestDate(TestCase):
        Path(os.path.join(tmp, "notempty", "file")).touch()
        os.makedirs(os.path.join(tmp, "notempty", "empty"))

-        Document.try_delete_empty_directories(
-                os.path.join(tmp, "notempty", "empty"))
+        delete_empty_directories(os.path.join(tmp, "notempty", "empty"))
        self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
        self.assertEqual(os.path.isfile(
            os.path.join(tmp, "notempty", "file")), True)
        self.assertEqual(os.path.isdir(
            os.path.join(tmp, "notempty", "empty")), False)

-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
-    def test_document_accidentally_deleted(self):
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{created/[title]")
+    def test_invalid_format(self):
        document = Document()
+        document.pk = 1
        document.file_type = "pdf"
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        document.save()

-        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
+        self.assertEqual(generate_filename(document), "0000001.pdf")

-        # Test source_path
-        self.assertEqual(document.source_path, settings.MEDIA_ROOT +
-                         "/documents/originals/none/none-0000001.pdf")
-
-        # Delete the document "illegaly"
-        os.remove(settings.MEDIA_ROOT + "/documents/originals/" +
-                                        "none/none-0000001.pdf")
-
-        # Set new correspondent and expect document to be saved properly
-        document.correspondent = Correspondent.objects.get_or_create(
-                name="foo")[0]
-        document.save()
-
-        # Check proper handling of files
-        self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
-                         "/documents/originals/none"), True)
-        self.assertEqual(document.source_filename,
-                         "none/none-0000001.pdf")
-
-    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
-                       "{correspondent}")
-    def test_set_filename(self):
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{created__year}")
+    def test_invalid_format_key(self):
        document = Document()
+        document.pk = 1
        document.file_type = "pdf"
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
-        document.save()

-        # Ensure that filename is properly generated
-        tmp = document.source_filename
-        self.assertEqual(document.generate_source_filename(),
-                         "none/none-0000001.pdf")
-        document.create_source_directory()
-        Path(document.source_path).touch()
-
-        # Set existing filename
-        document.set_filename(tmp)
-        self.assertEqual(document.source_filename, "none/none-0000001.pdf")
-
-        # Set non-existing filename
-        document.set_filename("doesnotexist")
-        self.assertEqual(document.source_filename, "none/none-0000001.pdf")
+        self.assertEqual(generate_filename(document), "0000001.pdf")
--- a/src/documents/tests/test_importer.py
+++ b/src/documents/tests/test_importer.py
@@ -1,9 +1,8 @@
 from django.core.management.base import CommandError
 from django.test import TestCase

-from ..management.commands.document_importer import Command
-
 from documents.settings import EXPORTER_FILE_NAME
+from ..management.commands.document_importer import Command


 class TestImporter(TestCase):
--- a/src/documents/tests/test_logger.py
+++ b/src/documents/tests/test_logger.py
@@ -1,6 +1,5 @@
 import logging
 import uuid
-
 from unittest import mock

 from django.test import TestCase
--- a/src/documents/tests/test_mail.py
+++ b/src/documents/tests/test_mail.py
@@ -1,91 +0,0 @@
-import base64
-import os
-import magic
-
-from hashlib import md5
-from unittest import mock
-
-from django.conf import settings
-from django.test import TestCase
-
-from ..mail import Message, Attachment
-
-
-class TestMessage(TestCase):
-
-    def __init__(self, *args, **kwargs):
-
-        TestCase.__init__(self, *args, **kwargs)
-        self.sample = os.path.join(
-            settings.BASE_DIR,
-            "documents",
-            "tests",
-            "samples",
-            "mail.txt"
-        )
-
-    def test_init(self):
-
-        with open(self.sample, "rb") as f:
-
-            with mock.patch("logging.StreamHandler.emit") as __:
-                message = Message(f.read())
-
-            self.assertTrue(message)
-            self.assertEqual(message.subject, "Test 0")
-
-            data = message.attachment.read()
-
-            self.assertEqual(
-                md5(data).hexdigest(), "7c89655f9e9eb7dd8cde8568e8115d59")
-
-            self.assertEqual(
-                message.attachment.content_type, "application/pdf")
-            with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
-                self.assertEqual(m.id_buffer(data), "application/pdf")
-
-
-class TestInlineMessage(TestCase):
-
-    def __init__(self, *args, **kwargs):
-
-        TestCase.__init__(self, *args, **kwargs)
-        self.sample = os.path.join(
-            settings.BASE_DIR,
-            "documents",
-            "tests",
-            "samples",
-            "inline_mail.txt"
-        )
-
-    def test_init(self):
-
-        with open(self.sample, "rb") as f:
-
-            with mock.patch("logging.StreamHandler.emit") as __:
-                message = Message(f.read())
-
-            self.assertTrue(message)
-            self.assertEqual(message.subject, "Paperless Inline Image")
-
-            data = message.attachment.read()
-
-            self.assertEqual(
-                md5(data).hexdigest(), "30c00a7b42913e65f7fdb0be40b9eef3")
-
-            self.assertEqual(
-                message.attachment.content_type, "image/png")
-            with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
-                self.assertEqual(m.id_buffer(data), "image/png")
-
-
-class TestAttachment(TestCase):
-
-    def test_init(self):
-        data = base64.encodebytes(b"0")
-        self.assertEqual(Attachment(data, "application/pdf").suffix, "pdf")
-        self.assertEqual(Attachment(data, "image/png").suffix, "png")
-        self.assertEqual(Attachment(data, "image/jpeg").suffix, "jpeg")
-        self.assertEqual(Attachment(data, "image/gif").suffix, "gif")
-        self.assertEqual(Attachment(data, "image/tiff").suffix, "tiff")
-        self.assertEqual(Attachment(data, "image/png").read(), data)
--- a/src/documents/tests/test_models.py
+++ b/src/documents/tests/test_models.py
@@ -1,7 +1,7 @@
 from django.test import TestCase

-from ..models import Document, Correspondent
 from .factories import DocumentFactory, CorrespondentFactory
+from ..models import Document, Correspondent


 class CorrespondentTestCase(TestCase):
--- a/src/documents/tests/test_parsers.py
+++ b/src/documents/tests/test_parsers.py
@@ -14,7 +14,7 @@ class TestParserDiscovery(TestCase):
            pass

        m.return_value = (
-            (None, lambda _: {"weight": 0, "parser": DummyParser}),
+            (None, {"weight": 0, "parser": DummyParser, "test": lambda _: True}),
        )

        self.assertEqual(
@@ -32,8 +32,8 @@ class TestParserDiscovery(TestCase):
            pass

        m.return_value = (
-            (None, lambda _: {"weight": 0, "parser": DummyParser1}),
-            (None, lambda _: {"weight": 1, "parser": DummyParser2}),
+            (None, {"weight": 0, "parser": DummyParser1, "test": lambda _: True}),
+            (None, {"weight": 1, "parser": DummyParser2, "test": lambda _: True}),
        )

        self.assertEqual(
@@ -43,7 +43,7 @@ class TestParserDiscovery(TestCase):

    @mock.patch("documents.parsers.document_consumer_declaration.send")
    def test__get_parser_class_0_parsers(self, m, *args):
-        m.return_value = ((None, lambda _: None),)
+        m.return_value = []
        with TemporaryDirectory() as tmpdir:
            self.assertIsNone(
                get_parser_class("doc.pdf")
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -1,14 +1,9 @@
 from django.db.models import Count, Max
-from django.http import HttpResponse, HttpResponseBadRequest
+from django.http import HttpResponse, HttpResponseBadRequest, Http404
 from django.views.decorators.cache import cache_control
 from django.views.generic import TemplateView
 from django_filters.rest_framework import DjangoFilterBackend
 from rest_framework.decorators import action
-from rest_framework.response import Response
-from rest_framework.views import APIView
-
-from paperless.db import GnuPG
-from paperless.views import StandardPagination
 from rest_framework.filters import OrderingFilter, SearchFilter
 from rest_framework.mixins import (
    DestroyModelMixin,
@@ -17,12 +12,17 @@ from rest_framework.mixins import (
    UpdateModelMixin
 )
 from rest_framework.permissions import IsAuthenticated
+from rest_framework.response import Response
+from rest_framework.views import APIView
 from rest_framework.viewsets import (
    GenericViewSet,
    ModelViewSet,
    ReadOnlyModelViewSet
 )

+import documents.index as index
+from paperless.db import GnuPG
+from paperless.views import StandardPagination
 from .filters import (
    CorrespondentFilterSet,
    DocumentFilterSet,
@@ -30,8 +30,6 @@ from .filters import (
    DocumentTypeFilterSet,
    LogFilterSet
 )
-
-import documents.index as index
 from .forms import UploadForm
 from .models import Correspondent, Document, Log, Tag, DocumentType
 from .serialisers import (
@@ -54,7 +52,7 @@ class CorrespondentViewSet(ModelViewSet):
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
    filter_backends = (DjangoFilterBackend, OrderingFilter)
-    filter_class = CorrespondentFilterSet
+    filterset_class = CorrespondentFilterSet
    ordering_fields = ("name", "matching_algorithm", "match", "document_count", "last_correspondence")


@@ -65,7 +63,7 @@ class TagViewSet(ModelViewSet):
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
    filter_backends = (DjangoFilterBackend, OrderingFilter)
-    filter_class = TagFilterSet
+    filterset_class = TagFilterSet
    ordering_fields = ("name", "matching_algorithm", "match", "document_count")


@@ -76,7 +74,7 @@ class DocumentTypeViewSet(ModelViewSet):
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
    filter_backends = (DjangoFilterBackend, OrderingFilter)
-    filter_class = DocumentTypeFilterSet
+    filterset_class = DocumentTypeFilterSet
    ordering_fields = ("name", "matching_algorithm", "match", "document_count")


@@ -91,7 +89,7 @@ class DocumentViewSet(RetrieveModelMixin,
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
    filter_backends = (DjangoFilterBackend, SearchFilter, OrderingFilter)
-    filter_class = DocumentFilterSet
+    filterset_class = DocumentFilterSet
    search_fields = ("title", "correspondent__name", "content")
    ordering_fields = (
        "id", "title", "correspondent__name", "document_type__name", "created", "modified", "added", "archive_serial_number")
@@ -106,7 +104,7 @@ class DocumentViewSet(RetrieveModelMixin,
        return super(DocumentViewSet, self).destroy(request, *args, **kwargs)

    def file_response(self, pk, disposition):
-        #TODO: this should not be necessary here.
+        # TODO: this should not be necessary here.
        content_types = {
            Document.TYPE_PDF: "application/pdf",
            Document.TYPE_PNG: "image/png",
@@ -114,7 +112,7 @@ class DocumentViewSet(RetrieveModelMixin,
            Document.TYPE_GIF: "image/gif",
            Document.TYPE_TIF: "image/tiff",
            Document.TYPE_CSV: "text/csv",
-            Document.TYPE_MD:  "text/markdown",
+            Document.TYPE_MD: "text/markdown",
            Document.TYPE_TXT: "text/plain"
        }

@@ -132,7 +130,7 @@ class DocumentViewSet(RetrieveModelMixin,

    @action(methods=['post'], detail=False)
    def post_document(self, request, pk=None):
-        #TODO: is this a good implementation?
+        # TODO: is this a good implementation?
        form = UploadForm(data=request.POST, files=request.FILES)
        if form.is_valid():
            form.save()
@@ -142,17 +140,26 @@ class DocumentViewSet(RetrieveModelMixin,

    @action(methods=['get'], detail=True)
    def preview(self, request, pk=None):
-        response = self.file_response(pk, "inline")
-        return response
+        try:
+            response = self.file_response(pk, "inline")
+            return response
+        except FileNotFoundError:
+            raise Http404("Document source file does not exist")

    @action(methods=['get'], detail=True)
    @cache_control(public=False, max_age=315360000)
    def thumb(self, request, pk=None):
-        return HttpResponse(Document.objects.get(id=pk).thumbnail_file, content_type='image/png')
+        try:
+            return HttpResponse(Document.objects.get(id=pk).thumbnail_file, content_type='image/png')
+        except FileNotFoundError:
+            raise Http404("Document thumbnail does not exist")

    @action(methods=['get'], detail=True)
    def download(self, request, pk=None):
-        return self.file_response(pk, "attachment")
+        try:
+            return self.file_response(pk, "attachment")
+        except FileNotFoundError:
+            raise Http404("Document source file does not exist")


 class LogViewSet(ReadOnlyModelViewSet):
@@ -163,7 +170,7 @@ class LogViewSet(ReadOnlyModelViewSet):
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
    filter_backends = (DjangoFilterBackend, OrderingFilter)
-    filter_class = LogFilterSet
+    filterset_class = LogFilterSet
    ordering_fields = ("created",)


@@ -191,13 +198,12 @@ class SearchView(APIView):
            except (ValueError, TypeError):
                page = 1

-            result_page = index.query_page(self.ix, query, page)
-
-            return Response(
-                {'count': len(result_page),
-                 'page': result_page.pagenum,
-                 'page_count': result_page.pagecount,
-                 'results': list(map(self.add_infos_to_hit, result_page))})
+            with index.query_page(self.ix, query, page) as result_page:
+                return Response(
+                    {'count': len(result_page),
+                     'page': result_page.pagenum,
+                     'page_count': result_page.pagecount,
+                     'results': list(map(self.add_infos_to_hit, result_page))})

        else:
            return Response({
@@ -217,17 +223,16 @@ class SearchAutoCompleteView(APIView):
        if 'term' in request.query_params:
            term = request.query_params['term']
        else:
-            term = None
+            return HttpResponseBadRequest("Term required")

        if 'limit' in request.query_params:
            limit = int(request.query_params['limit'])
+            if limit <= 0:
+                return HttpResponseBadRequest("Invalid limit")
        else:
            limit = 10

-        if term is not None:
-            return Response(index.autocomplete(self.ix, term, limit))
-        else:
-            return Response([])
+        return Response(index.autocomplete(self.ix, term, limit))


 class StatisticsView(APIView):
--- a/src/paperless/checks.py
+++ b/src/paperless/checks.py
@@ -11,6 +11,8 @@ writeable_hint = (
    "Set the permissions of {} to be writeable by the user running the "
    "Paperless services"
 )
+
+
 def path_check(env_var):
    messages = []
    directory = os.getenv(env_var)
@@ -27,6 +29,7 @@ def path_check(env_var):
            ))
    return messages

+
@register()
 def paths_check(app_configs, **kwargs):
    """
@@ -34,9 +37,9 @@ def paths_check(app_configs, **kwargs):
    """

    check_messages = path_check("PAPERLESS_DATA_DIR") + \
-                     path_check("PAPERLESS_MEDIA_ROOT") + \
-                     path_check("PAPERLESS_CONSUMPTION_DIR") + \
-                     path_check("PAPERLESS_STATICDIR")
+        path_check("PAPERLESS_MEDIA_ROOT") + \
+        path_check("PAPERLESS_CONSUMPTION_DIR") + \
+        path_check("PAPERLESS_STATICDIR")

    return check_messages

@@ -64,3 +67,16 @@ def binaries_check(app_configs, **kwargs):
            check_messages.append(Warning(error.format(binary), hint))

    return check_messages
+
+
+@register()
+def debug_mode_check(app_configs, **kwargs):
+    if settings.DEBUG:
+        return [Warning(
+            "DEBUG mode is enabled. Disable Debug mode. This is a serious "
+            "security issue, since it puts security overides in place which "
+            "are meant to be only used during development. This "
+            "also means that paperless will tell anyone various "
+            "debugging information when something goes wrong.")]
+    else:
+        return []
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -1,4 +1,5 @@
 import json
+import math
 import multiprocessing
 import os
 import re
@@ -13,6 +14,18 @@ elif os.path.exists("/etc/paperless.conf"):
 elif os.path.exists("/usr/local/etc/paperless.conf"):
    load_dotenv("/usr/local/etc/paperless.conf")

+# There are multiple levels of concurrency in paperless:
+#  - Multiple consumers may be run in parallel.
+#  - Each consumer may process multiple pages in parallel.
+#  - Each Tesseract OCR run may spawn multiple threads to process a single page
+#    slightly faster.
+# The performance gains from having tesseract use multiple threads are minimal.
+# However, when multiple pages are processed in parallel, the total number of
+# OCR threads may exceed the number of available cpu cores, which will
+# dramatically slow down the consumption process. This settings limits each
+# Tesseract process to one thread.
+os.environ['OMP_THREAD_LIMIT'] = "1"
+

 def __get_boolean(key, default="NO"):
    """
@@ -21,9 +34,11 @@ def __get_boolean(key, default="NO"):
    """
    return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))

+
 # NEVER RUN WITH DEBUG IN PRODUCTION.
 DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")

+
 ###############################################################################
 # Directories                                                                 #
 ###############################################################################
@@ -65,6 +80,7 @@ INSTALLED_APPS = [
    "documents.apps.DocumentsConfig",
    "paperless_tesseract.apps.PaperlessTesseractConfig",
    "paperless_text.apps.PaperlessTextConfig",
+    "paperless_mail.apps.PaperlessMailConfig",

    "django.contrib.admin",

@@ -139,11 +155,11 @@ else:
    X_FRAME_OPTIONS = 'SAMEORIGIN'

 # We allow CORS from localhost:8080
-CORS_ORIGIN_WHITELIST = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8080,https://localhost:8080").split(","))
+CORS_ALLOWED_ORIGINS = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(","))

 if DEBUG:
    # Allow access from the angular development server during debugging
-    CORS_ORIGIN_WHITELIST += ('http://localhost:4200',)
+    CORS_ALLOWED_ORIGINS += ('http://localhost:4200',)

 # The secret key has a default that should be fine so long as you're hosting
 # Paperless on a closed network.  However, if you're putting this anywhere
@@ -195,11 +211,11 @@ DATABASES = {
    }
 }

-# Always have sqlite available as a second option for management commands
-# This is important when migrating to/from sqlite
-DATABASES['sqlite'] = DATABASES['default'].copy()
-
 if os.getenv("PAPERLESS_DBHOST"):
+    # Have sqlite available as a second option for management commands
+    # This is important when migrating to/from sqlite
+    DATABASES['sqlite'] = DATABASES['default'].copy()
+
    DATABASES["default"] = {
        "ENGINE": "django.db.backends.postgresql_psycopg2",
        "HOST": os.getenv("PAPERLESS_DBHOST"),
@@ -244,6 +260,14 @@ LOGGING = {
            "handlers": ["dbhandler", "streamhandler"],
            "level": "DEBUG"
        },
+        "paperless_mail": {
+            "handlers": ["dbhandler", "streamhandler"],
+            "level": "DEBUG"
+        },
+        "paperless_tesseract": {
+            "handlers": ["dbhandler", "streamhandler"],
+            "level": "DEBUG"
+        },
    },
 }

@@ -251,22 +275,60 @@ LOGGING = {
 # Task queue                                                                  #
 ###############################################################################

+
+# Sensible defaults for multitasking:
+# use a fair balance between worker processes and threads epr worker so that
+# both consuming many documents in parallel and consuming large documents is
+# reasonably fast.
+# Favors threads per worker on smaller systems and never exceeds cpu_count()
+# in total.
+
+def default_task_workers():
+    try:
+        return max(
+            math.floor(math.sqrt(multiprocessing.cpu_count())),
+            1
+        )
+    except NotImplementedError:
+        return 1
+
+
+TASK_WORKERS = int(os.getenv("PAPERLESS_TASK_WORKERS", default_task_workers()))
+
 Q_CLUSTER = {
    'name': 'paperless',
    'catch_up': False,
+    'workers': TASK_WORKERS,
    'redis': os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")
 }

+
+def default_threads_per_worker():
+    try:
+        return max(
+            math.floor(multiprocessing.cpu_count() / TASK_WORKERS),
+            1
+        )
+    except NotImplementedError:
+        return 1
+
+
+THREADS_PER_WORKER = os.getenv("PAPERLESS_THREADS_PER_WORKER", default_threads_per_worker())
+
 ###############################################################################
 # Paperless Specific Settings                                                 #
 ###############################################################################

+CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
+
+CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
+
+OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
+
 # The default language that tesseract will attempt to use when parsing
 # documents.  It should be a 3-letter language code consistent with ISO 639.
 OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")

-# The amount of threads to use for OCR
-OCR_THREADS = int(os.getenv("PAPERLESS_OCR_THREADS", multiprocessing.cpu_count()))

 # OCR all documents?
 OCR_ALWAYS = __get_boolean("PAPERLESS_OCR_ALWAYS", "false")
@@ -311,6 +373,7 @@ FILENAME_PARSE_TRANSFORMS = []
 for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
    FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"]))

+# TODO: this should not have a prefix.
 # Specify the filename format for out files
 PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")

--- a/src/paperless/urls.py
+++ b/src/paperless/urls.py
@@ -1,4 +1,4 @@
-from django.conf.urls import include, url
+from django.conf.urls import include
 from django.contrib import admin
 from django.contrib.auth.decorators import login_required
 from django.urls import path, re_path
@@ -7,7 +7,6 @@ from django.views.generic import RedirectView
 from rest_framework.routers import DefaultRouter

 from paperless.consumers import StatusConsumer
-from paperless.views import FaviconView
 from documents.views import (
    CorrespondentViewSet,
    DocumentViewSet,
@@ -19,6 +18,7 @@ from documents.views import (
    SearchAutoCompleteView,
    StatisticsView
 )
+from paperless.views import FaviconView

 api_router = DefaultRouter()
 api_router.register(r"correspondents", CorrespondentViewSet)
@@ -31,32 +31,32 @@ api_router.register(r"tags", TagViewSet)
 urlpatterns = [

    # API
-    url(r"^api/auth/",include(('rest_framework.urls', 'rest_framework'), namespace="rest_framework")),
-    url(r"^api/search/autocomplete/", SearchAutoCompleteView.as_view(), name="autocomplete"),
-    url(r"^api/search/", SearchView.as_view(), name="search"),
-    url(r"^api/statistics/", StatisticsView.as_view(), name="statistics"),
-    url(r"^api/", include((api_router.urls, 'drf'), namespace="drf")),
+    re_path(r"^api/auth/", include(('rest_framework.urls', 'rest_framework'), namespace="rest_framework")),
+    re_path(r"^api/search/autocomplete/", SearchAutoCompleteView.as_view(), name="autocomplete"),
+    re_path(r"^api/search/", SearchView.as_view(), name="search"),
+    re_path(r"^api/statistics/", StatisticsView.as_view(), name="statistics"),
+    re_path(r"^api/", include((api_router.urls, 'drf'), namespace="drf")),

    # Favicon
-    url(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
+    re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),

    # The Django admin
-    url(r"admin/", admin.site.urls),
+    re_path(r"admin/", admin.site.urls),

    # These redirects are here to support clients that use the old FetchView.
-    url(
+    re_path(
        r"^fetch/doc/(?P<pk>\d+)$",
        RedirectView.as_view(url='/api/documents/%(pk)s/download/'),
    ),
-    url(
+    re_path(
        r"^fetch/thumb/(?P<pk>\d+)$",
        RedirectView.as_view(url='/api/documents/%(pk)s/thumb/'),
    ),
-    url(
+    re_path(
        r"^fetch/preview/(?P<pk>\d+)$",
        RedirectView.as_view(url='/api/documents/%(pk)s/preview/'),
    ),
-    url(r"^push$", csrf_exempt(RedirectView.as_view(url='/api/documents/post_document/'))),
+    re_path(r"^push$", csrf_exempt(RedirectView.as_view(url='/api/documents/post_document/'))),

    # Frontend assets TODO: this is pretty bad.
    path('assets/<path:path>', RedirectView.as_view(url='/static/frontend/assets/%(path)s')),
@@ -64,7 +64,7 @@ urlpatterns = [
    path('accounts/', include('django.contrib.auth.urls')),

    # Root of the Frontent
-    url(r".*", login_required(IndexView.as_view())),
+    re_path(r".*", login_required(IndexView.as_view())),

 ]

@@ -74,8 +74,8 @@ websocket_urlpatterns = [
 ]

 # Text in each page's <h1> (and above login form).
-admin.site.site_header = 'Paperless'
+admin.site.site_header = 'Paperless-ng'
 # Text at the end of each page's <title>.
-admin.site.site_title = 'Paperless'
+admin.site.site_title = 'Paperless-ng'
 # Text at the top of the admin index page.
-admin.site.index_title = 'Paperless administration'
+admin.site.index_title = 'Paperless-ng administration'
--- a/src/paperless/version.py
+++ b/src/paperless/version.py
@@ -1 +1 @@
-__version__ = (1, 0, 0)
+__version__ = (0, 9, 1)
--- a/src/paperless_mail/init.py
+++ b/src/paperless_mail/init.py
--- a/src/paperless_mail/admin.py
+++ b/src/paperless_mail/admin.py
@@ -0,0 +1,18 @@
+from django.contrib import admin
+from paperless_mail.models import MailAccount, MailRule
+
+
+class MailAccountAdmin(admin.ModelAdmin):
+
+    list_display = ("name", "imap_server", "username")
+
+
+class MailRuleAdmin(admin.ModelAdmin):
+
+    list_filter = ("account",)
+
+    list_display = ("name", "account", "folder", "action")
+
+
+admin.site.register(MailAccount, MailAccountAdmin)
+admin.site.register(MailRule, MailRuleAdmin)
--- a/src/paperless_mail/apps.py
+++ b/src/paperless_mail/apps.py
@@ -0,0 +1,7 @@
+from django.apps import AppConfig
+
+
+class PaperlessMailConfig(AppConfig):
+    name = 'paperless_mail'
+
+    verbose_name = 'Paperless Mail'
--- a/src/paperless_mail/mail.py
+++ b/src/paperless_mail/mail.py
@@ -0,0 +1,279 @@
+import os
+import tempfile
+from datetime import timedelta, date
+
+from django.conf import settings
+from django.utils.text import slugify
+from django_q.tasks import async_task
+from imap_tools import MailBox, MailBoxUnencrypted, AND, MailMessageFlags, \
+    MailboxFolderSelectError
+
+from documents.loggers import LoggingMixin
+from documents.models import Correspondent
+from paperless_mail.models import MailAccount, MailRule
+
+
+class MailError(Exception):
+    pass
+
+
+class BaseMailAction:
+
+    def get_criteria(self):
+        return {}
+
+    def post_consume(self, M, message_uids, parameter):
+        pass
+
+
+class DeleteMailAction(BaseMailAction):
+
+    def post_consume(self, M, message_uids, parameter):
+        M.delete(message_uids)
+
+
+class MarkReadMailAction(BaseMailAction):
+
+    def get_criteria(self):
+        return {'seen': False}
+
+    def post_consume(self, M, message_uids, parameter):
+        M.seen(message_uids, True)
+
+
+class MoveMailAction(BaseMailAction):
+
+    def post_consume(self, M, message_uids, parameter):
+        M.move(message_uids, parameter)
+
+
+class FlagMailAction(BaseMailAction):
+
+    def get_criteria(self):
+        return {'flagged': False}
+
+    def post_consume(self, M, message_uids, parameter):
+        M.flag(message_uids, [MailMessageFlags.FLAGGED], True)
+
+
+def get_rule_action(rule):
+    if rule.action == MailRule.ACTION_FLAG:
+        return FlagMailAction()
+    elif rule.action == MailRule.ACTION_DELETE:
+        return DeleteMailAction()
+    elif rule.action == MailRule.ACTION_MOVE:
+        return MoveMailAction()
+    elif rule.action == MailRule.ACTION_MARK_READ:
+        return MarkReadMailAction()
+    else:
+        raise ValueError("Unknown action.")
+
+
+def make_criterias(rule):
+    maximum_age = date.today() - timedelta(days=rule.maximum_age)
+    criterias = {
+        "date_gte": maximum_age
+    }
+    if rule.filter_from:
+        criterias["from_"] = rule.filter_from
+    if rule.filter_subject:
+        criterias["subject"] = rule.filter_subject
+    if rule.filter_body:
+        criterias["body"] = rule.filter_body
+
+    return {**criterias, **get_rule_action(rule).get_criteria()}
+
+
+def get_title(message, att, rule):
+    if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT:
+        title = message.subject
+    elif rule.assign_title_from == MailRule.TITLE_FROM_FILENAME:
+        title = os.path.splitext(os.path.basename(att.filename))[0]
+    else:
+        raise ValueError("Unknown title selector.")
+
+    return title
+
+
+def get_correspondent(message, rule):
+    if rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_NOTHING:
+        correspondent = None
+    elif rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_EMAIL:
+        correspondent_name = message.from_
+        correspondent = Correspondent.objects.get_or_create(
+            name=correspondent_name, defaults={
+                "slug": slugify(correspondent_name)
+            })[0]
+    elif rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_NAME:
+        if message.from_values and \
+           'name' in message.from_values \
+           and message.from_values['name']:
+            correspondent_name = message.from_values['name']
+        else:
+            correspondent_name = message.from_
+
+        correspondent = Correspondent.objects.get_or_create(
+            name=correspondent_name, defaults={
+                "slug": slugify(correspondent_name)
+            })[0]
+    elif rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_CUSTOM:
+        correspondent = rule.assign_correspondent
+    else:
+        raise ValueError("Unknwown correspondent selector")
+
+    return correspondent
+
+
+def get_mailbox(server, port, security):
+    if security == MailAccount.IMAP_SECURITY_NONE:
+        mailbox = MailBoxUnencrypted(server, port)
+    elif security == MailAccount.IMAP_SECURITY_STARTTLS:
+        mailbox = MailBox(server, port, starttls=True)
+    elif security == MailAccount.IMAP_SECURITY_SSL:
+        mailbox = MailBox(server, port)
+    else:
+        raise ValueError("Unknown IMAP security")
+    return mailbox
+
+
+class MailAccountHandler(LoggingMixin):
+
+    def handle_mail_account(self, account):
+
+        self.renew_logging_group()
+
+        self.log('debug', f"Processing mail account {account}")
+
+        total_processed_files = 0
+
+        with get_mailbox(account.imap_server,
+                         account.imap_port,
+                         account.imap_security) as M:
+
+            try:
+                M.login(account.username, account.password)
+            except Exception:
+                raise MailError(
+                    f"Error while authenticating account {account.name}")
+
+            self.log('debug', f"Account {account}: Processing "
+                              f"{account.rules.count()} rule(s)")
+
+            for rule in account.rules.all():
+                self.log(
+                    'debug',
+                    f"Account {account}: Processing rule {rule.name}")
+
+                self.log(
+                    'debug',
+                    f"Rule {account}.{rule}: Selecting folder {rule.folder}")
+
+                try:
+                    M.folder.set(rule.folder)
+                except MailboxFolderSelectError:
+                    raise MailError(
+                        f"Rule {rule.name}: Folder {rule.folder} does not exist "
+                        f"in account {account.name}")
+
+                criterias = make_criterias(rule)
+
+                self.log(
+                    'debug',
+                    f"Rule {account}.{rule}: Searching folder with criteria "
+                    f"{str(AND(**criterias))}")
+
+                try:
+                    messages = M.fetch(criteria=AND(**criterias), mark_seen=False)
+                except Exception:
+                    raise MailError(
+                        f"Rule {rule.name}: Error while fetching folder "
+                        f"{rule.folder} of account {account.name}")
+
+                post_consume_messages = []
+
+                mails_processed = 0
+
+                for message in messages:
+                    try:
+                        processed_files = self.handle_message(message, rule)
+                    except Exception:
+                        raise MailError(
+                            f"Rule {rule.name}: Error while processing mail "
+                            f"{message.uid} of account {account.name}")
+                    if processed_files > 0:
+                        post_consume_messages.append(message.uid)
+
+                    total_processed_files += processed_files
+                    mails_processed += 1
+
+                self.log(
+                    'debug',
+                    f"Rule {account}.{rule}: Processed {mails_processed} "
+                    f"matching mail(s)")
+
+                self.log(
+                    'debug',
+                    f"Rule {account}.{rule}: Running mail actions on "
+                    f"{len(post_consume_messages)} mails")
+
+                try:
+                    get_rule_action(rule).post_consume(
+                        M,
+                        post_consume_messages,
+                        rule.action_parameter)
+
+                except Exception:
+                    raise MailError(
+                        f"Rule {rule.name}: Error while processing post-consume "
+                        f"actions for account {account.name}")
+
+        return total_processed_files
+
+    def handle_message(self, message, rule):
+        if not message.attachments:
+            return 0
+
+        self.log(
+            'debug',
+            f"Rule {rule.account}.{rule}: "
+            f"Processing mail {message.subject} from {message.from_} with "
+            f"{len(message.attachments)} attachment(s)")
+
+        correspondent = get_correspondent(message, rule)
+        tag = rule.assign_tag
+        doc_type = rule.assign_document_type
+
+        processed_attachments = 0
+
+        for att in message.attachments:
+
+            title = get_title(message, att, rule)
+
+            # TODO: check with parsers what files types are supported
+            if att.content_type == 'application/pdf':
+
+                os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
+                _, temp_filename = tempfile.mkstemp(prefix="paperless-mail-", dir=settings.SCRATCH_DIR)
+                with open(temp_filename, 'wb') as f:
+                    f.write(att.payload)
+
+                self.log(
+                    'info',
+                    f"Rule {rule.account}.{rule}: "
+                    f"Consuming attachment {att.filename} from mail "
+                    f"{message.subject} from {message.from_}")
+
+                async_task(
+                    "documents.tasks.consume_file",
+                    path=temp_filename,
+                    override_filename=att.filename,
+                    override_title=title,
+                    override_correspondent_id=correspondent.id if correspondent else None,
+                    override_document_type_id=doc_type.id if doc_type else None,
+                    override_tag_ids=[tag.id] if tag else None,
+                    task_name=f"Mail: {att.filename}"
+                )
+
+                processed_attachments += 1
+
+        return processed_attachments
--- a/src/paperless_mail/management/init.py
+++ b/src/paperless_mail/management/init.py
--- a/src/paperless_mail/management/commands/init.py
+++ b/src/paperless_mail/management/commands/init.py
--- a/src/paperless_mail/management/commands/mail_fetcher.py
+++ b/src/paperless_mail/management/commands/mail_fetcher.py
@@ -0,0 +1,13 @@
+from django.core.management.base import BaseCommand
+
+from paperless_mail import tasks
+
+
+class Command(BaseCommand):
+
+    help = """
+    """.replace("    ", "")
+
+    def handle(self, *args, **options):
+
+        tasks.process_mail_accounts()
--- a/src/paperless_mail/migrations/0001_initial.py
+++ b/src/paperless_mail/migrations/0001_initial.py
@@ -0,0 +1,48 @@
+# Generated by Django 3.1.3 on 2020-11-15 22:54
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    initial = True
+
+    dependencies = [
+        ('documents', '1002_auto_20201111_1105'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='MailAccount',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('name', models.CharField(max_length=256, unique=True)),
+                ('imap_server', models.CharField(max_length=256)),
+                ('imap_port', models.IntegerField(blank=True, null=True)),
+                ('imap_security', models.PositiveIntegerField(choices=[(1, 'No encryption'), (2, 'Use SSL'), (3, 'Use STARTTLS')], default=2)),
+                ('username', models.CharField(max_length=256)),
+                ('password', models.CharField(max_length=256)),
+            ],
+        ),
+        migrations.CreateModel(
+            name='MailRule',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('name', models.CharField(max_length=256)),
+                ('folder', models.CharField(default='INBOX', max_length=256)),
+                ('filter_from', models.CharField(blank=True, max_length=256, null=True)),
+                ('filter_subject', models.CharField(blank=True, max_length=256, null=True)),
+                ('filter_body', models.CharField(blank=True, max_length=256, null=True)),
+                ('maximum_age', models.PositiveIntegerField(default=30)),
+                ('action', models.PositiveIntegerField(choices=[(1, 'Delete'), (2, 'Move to specified folder'), (3, "Mark as read, don't process read mails"), (4, "Flag the mail, don't process flagged mails")], default=3, help_text='The action applied to the mail. This action is only performed when documents were consumed from the mail. Mails without attachments will remain entirely untouched.')),
+                ('action_parameter', models.CharField(blank=True, help_text='Additional parameter for the action selected above, i.e., the target folder of the move to folder action.', max_length=256, null=True)),
+                ('assign_title_from', models.PositiveIntegerField(choices=[(1, 'Use subject as title'), (2, 'Use attachment filename as title')], default=1)),
+                ('assign_correspondent_from', models.PositiveIntegerField(choices=[(1, 'Do not assign a correspondent'), (2, 'Use mail address'), (3, 'Use name (or mail address if not available)'), (4, 'Use correspondent selected below')], default=1)),
+                ('account', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='rules', to='paperless_mail.mailaccount')),
+                ('assign_correspondent', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='documents.correspondent')),
+                ('assign_document_type', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='documents.documenttype')),
+                ('assign_tag', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='documents.tag')),
+            ],
+        ),
+    ]
--- a/src/paperless_mail/migrations/0002_auto_20201117_1334.py
+++ b/src/paperless_mail/migrations/0002_auto_20201117_1334.py
@@ -0,0 +1,32 @@
+# Generated by Django 3.1.3 on 2020-11-17 13:34
+
+from django.db import migrations
+from django.db.migrations import RunPython
+from django_q.models import Schedule
+from django_q.tasks import schedule
+
+
+def add_schedules(apps, schema_editor):
+    schedule('paperless_mail.tasks.process_mail_accounts',
+             name="Check all e-mail accounts",
+             schedule_type=Schedule.MINUTES,
+             minutes=10)
+
+
+def remove_schedules(apps, schema_editor):
+    Schedule.objects.filter(
+        func='paperless_mail.tasks.process_mail_accounts').delete()
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('paperless_mail', '0001_initial'),
+        ('django_q', '0013_task_attempt_count'),
+    ]
+
+    operations = [
+        RunPython(add_schedules, remove_schedules)
+    ]
+
+
--- a/src/paperless_mail/migrations/0003_auto_20201118_1940.py
+++ b/src/paperless_mail/migrations/0003_auto_20201118_1940.py
@@ -0,0 +1,23 @@
+# Generated by Django 3.1.3 on 2020-11-18 19:40
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('paperless_mail', '0002_auto_20201117_1334'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='mailaccount',
+            name='imap_port',
+            field=models.IntegerField(blank=True, help_text='This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections.', null=True),
+        ),
+        migrations.AlterField(
+            model_name='mailrule',
+            name='name',
+            field=models.CharField(max_length=256, unique=True),
+        ),
+    ]
--- a/src/paperless_mail/migrations/init.py
+++ b/src/paperless_mail/migrations/init.py
--- a/src/paperless_mail/models.py
+++ b/src/paperless_mail/models.py
@@ -0,0 +1,138 @@
+from django.db import models
+
+import documents.models as document_models
+
+
+class MailAccount(models.Model):
+
+    IMAP_SECURITY_NONE = 1
+    IMAP_SECURITY_SSL = 2
+    IMAP_SECURITY_STARTTLS = 3
+
+    IMAP_SECURITY_OPTIONS = (
+        (IMAP_SECURITY_NONE, "No encryption"),
+        (IMAP_SECURITY_SSL, "Use SSL"),
+        (IMAP_SECURITY_STARTTLS, "Use STARTTLS"),
+    )
+
+    name = models.CharField(max_length=256, unique=True)
+
+    imap_server = models.CharField(max_length=256)
+
+    imap_port = models.IntegerField(
+        blank=True,
+        null=True,
+        help_text="This is usually 143 for unencrypted and STARTTLS "
+                  "connections, and 993 for SSL connections.")
+
+    imap_security = models.PositiveIntegerField(
+        choices=IMAP_SECURITY_OPTIONS,
+        default=IMAP_SECURITY_SSL
+    )
+
+    username = models.CharField(max_length=256)
+
+    password = models.CharField(max_length=256)
+
+    def __str__(self):
+        return self.name
+
+
+class MailRule(models.Model):
+
+    ACTION_DELETE = 1
+    ACTION_MOVE = 2
+    ACTION_MARK_READ = 3
+    ACTION_FLAG = 4
+
+    ACTIONS = (
+        (ACTION_DELETE, "Delete"),
+        (ACTION_MOVE, "Move to specified folder"),
+        (ACTION_MARK_READ, "Mark as read, don't process read mails"),
+        (ACTION_FLAG, "Flag the mail, don't process flagged mails")
+    )
+
+    TITLE_FROM_SUBJECT = 1
+    TITLE_FROM_FILENAME = 2
+
+    TITLE_SELECTOR = (
+        (TITLE_FROM_SUBJECT, "Use subject as title"),
+        (TITLE_FROM_FILENAME, "Use attachment filename as title")
+    )
+
+    CORRESPONDENT_FROM_NOTHING = 1
+    CORRESPONDENT_FROM_EMAIL = 2
+    CORRESPONDENT_FROM_NAME = 3
+    CORRESPONDENT_FROM_CUSTOM = 4
+
+    CORRESPONDENT_SELECTOR = (
+        (CORRESPONDENT_FROM_NOTHING, "Do not assign a correspondent"),
+        (CORRESPONDENT_FROM_EMAIL, "Use mail address"),
+        (CORRESPONDENT_FROM_NAME, "Use name (or mail address if not available)"),
+        (CORRESPONDENT_FROM_CUSTOM, "Use correspondent selected below")
+    )
+
+    name = models.CharField(max_length=256, unique=True)
+
+    account = models.ForeignKey(
+        MailAccount,
+        related_name="rules",
+        on_delete=models.CASCADE
+    )
+
+    folder = models.CharField(default='INBOX', max_length=256)
+
+    filter_from = models.CharField(max_length=256, null=True, blank=True)
+    filter_subject = models.CharField(max_length=256, null=True, blank=True)
+    filter_body = models.CharField(max_length=256, null=True, blank=True)
+
+    maximum_age = models.PositiveIntegerField(default=30)
+
+    action = models.PositiveIntegerField(
+        choices=ACTIONS,
+        default=ACTION_MARK_READ,
+        help_text="The action applied to the mail. This action is only "
+                  "performed when documents were consumed from the mail. "
+                  "Mails without attachments will remain entirely "
+                  "untouched."
+    )
+
+    action_parameter = models.CharField(
+        max_length=256, blank=True, null=True,
+        help_text="Additional parameter for the action selected above, i.e., "
+                  "the target folder of the move to folder action."
+    )
+
+    assign_title_from = models.PositiveIntegerField(
+        choices=TITLE_SELECTOR,
+        default=TITLE_FROM_SUBJECT
+    )
+
+    assign_tag = models.ForeignKey(
+        document_models.Tag,
+        null=True,
+        blank=True,
+        on_delete=models.SET_NULL
+    )
+
+    assign_document_type = models.ForeignKey(
+        document_models.DocumentType,
+        null=True,
+        blank=True,
+        on_delete=models.SET_NULL
+    )
+
+    assign_correspondent_from = models.PositiveIntegerField(
+        choices=CORRESPONDENT_SELECTOR,
+        default=CORRESPONDENT_FROM_NOTHING
+    )
+
+    assign_correspondent = models.ForeignKey(
+        document_models.Correspondent,
+        null=True,
+        blank=True,
+        on_delete=models.SET_NULL
+    )
+
+    def __str__(self):
+        return self.name
--- a/src/paperless_mail/tasks.py
+++ b/src/paperless_mail/tasks.py
@@ -0,0 +1,23 @@
+import logging
+
+from paperless_mail.mail import MailAccountHandler
+from paperless_mail.models import MailAccount
+
+
+def process_mail_accounts():
+    total_new_documents = 0
+    for account in MailAccount.objects.all():
+        total_new_documents += MailAccountHandler().handle_mail_account(account)
+
+    if total_new_documents > 0:
+        return f"Added {total_new_documents} document(s)."
+    else:
+        return "No new documents were added."
+
+
+def process_mail_account(name):
+    account = MailAccount.objects.find(name=name)
+    if account:
+        MailAccountHandler().handle_mail_account(account)
+    else:
+        logging.error("Unknown mail acccount: {}".format(name))
--- a/src/paperless_mail/tests/init.py
+++ b/src/paperless_mail/tests/init.py
--- a/src/paperless_mail/tests/test_mail.py
+++ b/src/paperless_mail/tests/test_mail.py
@@ -0,0 +1,360 @@
+import uuid
+from collections import namedtuple
+from typing import ContextManager
+from unittest import mock
+
+from django.test import TestCase
+from imap_tools import MailMessageFlags, MailboxFolderSelectError
+
+from documents.models import Correspondent
+from paperless_mail.mail import MailError, MailAccountHandler, get_correspondent, get_title
+from paperless_mail.models import MailRule, MailAccount
+
+
+class BogusFolderManager:
+
+    current_folder = "INBOX"
+
+    def set(self, new_folder):
+        if new_folder not in ["INBOX", "spam"]:
+            raise MailboxFolderSelectError(None, "uhm")
+        self.current_folder = new_folder
+
+
+class BogusMailBox(ContextManager):
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+
+    def __init__(self):
+        self.messages = []
+        self.messages_spam = []
+
+    def login(self, username, password):
+        if not (username == 'admin' and password == 'secret'):
+            raise Exception()
+
+    folder = BogusFolderManager()
+
+    def fetch(self, criteria, mark_seen):
+        msg = self.messages
+
+        criteria = str(criteria).strip('()').split(" ")
+
+        if 'UNSEEN' in criteria:
+            msg = filter(lambda m: not m.seen, msg)
+
+        if 'SUBJECT' in criteria:
+            subject = criteria[criteria.index('SUBJECT') + 1].strip('"')
+            msg = filter(lambda m: subject in m.subject, msg)
+
+        if 'BODY' in criteria:
+            body = criteria[criteria.index('BODY') + 1].strip('"')
+            msg = filter(lambda m: body in m.body, msg)
+
+        if 'FROM' in criteria:
+            from_ = criteria[criteria.index('FROM') + 1].strip('"')
+            msg = filter(lambda m: from_ in m.from_, msg)
+
+        if 'UNFLAGGED' in criteria:
+            msg = filter(lambda m: not m.flagged, msg)
+
+        return list(msg)
+
+    def seen(self, uid_list, seen_val):
+        for message in self.messages:
+            if message.uid in uid_list:
+                message.seen = seen_val
+
+    def delete(self, uid_list):
+        self.messages = list(filter(lambda m: m.uid not in uid_list, self.messages))
+
+    def flag(self, uid_list, flag_set, value):
+        for message in self.messages:
+            if message.uid in uid_list:
+                for flag in flag_set:
+                    if flag == MailMessageFlags.FLAGGED:
+                        message.flagged = value
+
+    def move(self, uid_list, folder):
+        if folder == "spam":
+            self.messages_spam.append(
+                filter(lambda m: m.uid in uid_list, self.messages)
+            )
+            self.messages = list(
+                filter(lambda m: m.uid not in uid_list, self.messages)
+            )
+        else:
+            raise Exception()
+
+
+def create_message(num_attachments=1, body="", subject="the suject", from_="noone@mail.com", seen=False, flagged=False):
+    message = namedtuple('MailMessage', [])
+
+    message.uid = uuid.uuid4()
+    message.subject = subject
+    message.attachments = []
+    message.from_ = from_
+    message.body = body
+    for i in range(num_attachments):
+        attachment = namedtuple('Attachment', [])
+        attachment.filename = 'some_file.pdf'
+        attachment.content_type = 'application/pdf'
+        attachment.payload = b'content of the attachment'
+        message.attachments.append(attachment)
+
+    message.seen = seen
+    message.flagged = flagged
+
+    return message
+
+
+class TestMail(TestCase):
+
+    def setUp(self):
+        patcher = mock.patch('paperless_mail.mail.MailBox')
+        m = patcher.start()
+        self.bogus_mailbox = BogusMailBox()
+        m.return_value = self.bogus_mailbox
+        self.addCleanup(patcher.stop)
+
+        patcher = mock.patch('paperless_mail.mail.async_task')
+        self.async_task = patcher.start()
+        self.addCleanup(patcher.stop)
+
+        self.reset_bogus_mailbox()
+
+        self.mail_account_handler = MailAccountHandler()
+
+    def reset_bogus_mailbox(self):
+        self.bogus_mailbox.messages = []
+        self.bogus_mailbox.messages_spam = []
+        self.bogus_mailbox.messages.append(create_message(subject="Invoice 1", from_="amazon@amazon.de", body="cables", seen=True, flagged=False))
+        self.bogus_mailbox.messages.append(create_message(subject="Invoice 2", body="from my favorite electronic store", seen=False, flagged=True))
+        self.bogus_mailbox.messages.append(create_message(subject="Claim your $10M price now!", from_="amazon@amazon-some-indian-site.org", seen=False))
+
+    def test_get_correspondent(self):
+        message = namedtuple('MailMessage', [])
+        message.from_ = "someone@somewhere.com"
+        message.from_values = {'name': "Someone!", 'email': "someone@somewhere.com"}
+
+        message2 = namedtuple('MailMessage', [])
+        message2.from_ = "me@localhost.com"
+        message2.from_values = {'name': "", 'email': "fake@localhost.com"}
+
+        me_localhost = Correspondent.objects.create(name=message2.from_)
+        someone_else = Correspondent.objects.create(name="someone else")
+
+        rule = MailRule(name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING)
+        self.assertIsNone(get_correspondent(message, rule))
+
+        rule = MailRule(name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL)
+        c = get_correspondent(message, rule)
+        self.assertIsNotNone(c)
+        self.assertEqual(c.name, "someone@somewhere.com")
+        c = get_correspondent(message2, rule)
+        self.assertIsNotNone(c)
+        self.assertEqual(c.name, "me@localhost.com")
+        self.assertEqual(c.id, me_localhost.id)
+
+        rule = MailRule(name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME)
+        c = get_correspondent(message, rule)
+        self.assertIsNotNone(c)
+        self.assertEqual(c.name, "Someone!")
+        c = get_correspondent(message2, rule)
+        self.assertIsNotNone(c)
+        self.assertEqual(c.id, me_localhost.id)
+
+        rule = MailRule(name="d", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else)
+        c = get_correspondent(message, rule)
+        self.assertEqual(c, someone_else)
+
+    def test_get_title(self):
+        message = namedtuple('MailMessage', [])
+        message.subject = "the message title"
+        att = namedtuple('Attachment', [])
+        att.filename = "this_is_the_file.pdf"
+        rule = MailRule(name="a", assign_title_from=MailRule.TITLE_FROM_FILENAME)
+        self.assertEqual(get_title(message, att, rule), "this_is_the_file")
+        rule = MailRule(name="b", assign_title_from=MailRule.TITLE_FROM_SUBJECT)
+        self.assertEqual(get_title(message, att, rule), "the message title")
+
+    def test_handle_message(self):
+        message = namedtuple('MailMessage', [])
+        message.subject = "the message title"
+        message.from_ = "Myself"
+
+        att = namedtuple('Attachment', [])
+        att.filename = "test1.pdf"
+        att.content_type = 'application/pdf'
+        att.payload = b"attachment contents"
+
+        att2 = namedtuple('Attachment', [])
+        att2.filename = "test2.pdf"
+        att2.content_type = 'application/pdf'
+        att2.payload = b"attachment contents"
+
+        att3 = namedtuple('Attachment', [])
+        att3.filename = "test3.pdf"
+        att3.content_type = 'application/invalid'
+        att3.payload = b"attachment contents"
+
+        message.attachments = [att, att2, att3]
+
+        account = MailAccount()
+        rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME, account=account)
+
+        result = self.mail_account_handler.handle_message(message, rule)
+
+        self.assertEqual(result, 2)
+
+        self.assertEqual(len(self.async_task.call_args_list), 2)
+
+        args1, kwargs1 = self.async_task.call_args_list[0]
+        args2, kwargs2 = self.async_task.call_args_list[1]
+
+        self.assertEqual(kwargs1['override_title'], "test1")
+        self.assertEqual(kwargs1['override_filename'], "test1.pdf")
+
+        self.assertEqual(kwargs2['override_title'], "test2")
+        self.assertEqual(kwargs2['override_filename'], "test2.pdf")
+
+    @mock.patch("paperless_mail.mail.async_task")
+    def test_handle_empty_message(self, m):
+        message = namedtuple('MailMessage', [])
+
+        message.attachments = []
+        rule = MailRule()
+
+        result = self.mail_account_handler.handle_message(message, rule)
+
+        self.assertFalse(m.called)
+        self.assertEqual(result, 0)
+
+    def test_handle_mail_account_mark_read(self):
+
+        account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="secret")
+
+        rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MARK_READ)
+
+        self.assertEqual(len(self.bogus_mailbox.messages), 3)
+        self.assertEqual(self.async_task.call_count, 0)
+        self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2)
+        self.mail_account_handler.handle_mail_account(account)
+        self.assertEqual(self.async_task.call_count, 2)
+        self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0)
+        self.assertEqual(len(self.bogus_mailbox.messages), 3)
+
+    def test_handle_mail_account_delete(self):
+
+        account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="secret")
+
+        rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_DELETE, filter_subject="Invoice")
+
+        self.assertEqual(self.async_task.call_count, 0)
+        self.assertEqual(len(self.bogus_mailbox.messages), 3)
+        self.mail_account_handler.handle_mail_account(account)
+        self.assertEqual(self.async_task.call_count, 2)
+        self.assertEqual(len(self.bogus_mailbox.messages), 1)
+
+    def test_handle_mail_account_flag(self):
+        account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="secret")
+
+        rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_FLAG, filter_subject="Invoice")
+
+        self.assertEqual(len(self.bogus_mailbox.messages), 3)
+        self.assertEqual(self.async_task.call_count, 0)
+        self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2)
+        self.mail_account_handler.handle_mail_account(account)
+        self.assertEqual(self.async_task.call_count, 1)
+        self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 1)
+        self.assertEqual(len(self.bogus_mailbox.messages), 3)
+
+    def test_handle_mail_account_move(self):
+        account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="secret")
+
+        rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE, action_parameter="spam", filter_subject="Claim")
+
+        self.assertEqual(self.async_task.call_count, 0)
+        self.assertEqual(len(self.bogus_mailbox.messages), 3)
+        self.assertEqual(len(self.bogus_mailbox.messages_spam), 0)
+        self.mail_account_handler.handle_mail_account(account)
+        self.assertEqual(self.async_task.call_count, 1)
+        self.assertEqual(len(self.bogus_mailbox.messages), 2)
+        self.assertEqual(len(self.bogus_mailbox.messages_spam), 1)
+
+    def test_errors(self):
+        account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wrong")
+
+        try:
+            self.mail_account_handler.handle_mail_account(account)
+        except MailError as e:
+            self.assertTrue(str(e).startswith("Error while authenticating account"))
+        else:
+            self.fail("Should raise exception")
+
+        account = MailAccount.objects.create(name="test2", imap_server="", username="admin", password="secret")
+        rule = MailRule.objects.create(name="testrule", account=account, folder="uuuh")
+
+        try:
+            self.mail_account_handler.handle_mail_account(account)
+        except MailError as e:
+            self.assertTrue("uuuh does not exist" in str(e))
+        else:
+            self.fail("Should raise exception")
+
+        account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret")
+
+        rule = MailRule.objects.create(name="testrule2", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim")
+
+        try:
+            self.mail_account_handler.handle_mail_account(account)
+        except MailError as e:
+            self.assertTrue("Error while processing post-consume actions" in str(e))
+        else:
+            self.fail("Should raise exception")
+
+    def test_filters(self):
+
+        account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret")
+        rule = MailRule.objects.create(name="testrule3", account=account, action=MailRule.ACTION_DELETE, filter_subject="Claim")
+
+        self.assertEqual(self.async_task.call_count, 0)
+
+        self.assertEqual(len(self.bogus_mailbox.messages), 3)
+        self.mail_account_handler.handle_mail_account(account)
+        self.assertEqual(len(self.bogus_mailbox.messages), 2)
+        self.assertEqual(self.async_task.call_count, 1)
+
+        self.reset_bogus_mailbox()
+
+        rule.filter_subject = None
+        rule.filter_body = "electronic"
+        rule.save()
+        self.assertEqual(len(self.bogus_mailbox.messages), 3)
+        self.mail_account_handler.handle_mail_account(account)
+        self.assertEqual(len(self.bogus_mailbox.messages), 2)
+        self.assertEqual(self.async_task.call_count, 2)
+
+        self.reset_bogus_mailbox()
+
+        rule.filter_from = "amazon"
+        rule.filter_body = None
+        rule.save()
+        self.assertEqual(len(self.bogus_mailbox.messages), 3)
+        self.mail_account_handler.handle_mail_account(account)
+        self.assertEqual(len(self.bogus_mailbox.messages), 1)
+        self.assertEqual(self.async_task.call_count, 4)
+
+        self.reset_bogus_mailbox()
+
+        rule.filter_from = "amazon"
+        rule.filter_body = "cables"
+        rule.filter_subject = "Invoice"
+        rule.save()
+        self.assertEqual(len(self.bogus_mailbox.messages), 3)
+        self.mail_account_handler.handle_mail_account(account)
+        self.assertEqual(len(self.bogus_mailbox.messages), 2)
+        self.assertEqual(self.async_task.call_count, 5)
--- a/src/paperless_tesseract/apps.py
+++ b/src/paperless_tesseract/apps.py
@@ -1,5 +1,7 @@
 from django.apps import AppConfig

+from paperless_tesseract.signals import tesseract_consumer_declaration
+

 class PaperlessTesseractConfig(AppConfig):

@@ -9,8 +11,6 @@ class PaperlessTesseractConfig(AppConfig):

        from documents.signals import document_consumer_declaration

-        from .signals import ConsumerDeclaration
-
-        document_consumer_declaration.connect(ConsumerDeclaration.handle)
+        document_consumer_declaration.connect(tesseract_consumer_declaration)

        AppConfig.ready(self)
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -2,18 +2,17 @@ import itertools
 import os
 import re
 import subprocess
-from multiprocessing.pool import Pool
+from multiprocessing.pool import ThreadPool

 import langdetect
+import pdftotext
 import pyocr
-from django.conf import settings
 from PIL import Image
+from django.conf import settings
 from pyocr import PyocrException

-import pdftotext
 from documents.parsers import DocumentParser, ParseError, run_unpaper, \
    run_convert
-
 from .languages import ISO639


@@ -45,8 +44,8 @@ class RasterisedDocumentParser(DocumentParser):
                        alpha="remove",
                        strip=True,
                        trim=True,
-                        input="{}[0]".format(self.document_path),
-                        output=out_path,
+                        input_file="{}[0]".format(self.document_path),
+                        output_file=out_path,
                        logging_group=self.logging_group)
        except ParseError:
            # if convert fails, fall back to extracting
@@ -66,8 +65,8 @@ class RasterisedDocumentParser(DocumentParser):
                        alpha="remove",
                        strip=True,
                        trim=True,
-                        input=gs_out_path,
-                        output=out_path,
+                        input_file=gs_out_path,
+                        output_file=out_path,
                        logging_group=self.logging_group)

        return out_path
@@ -87,7 +86,7 @@ class RasterisedDocumentParser(DocumentParser):
            return self._text

        if not settings.OCR_ALWAYS and self._is_ocred():
-            self.log("info", "Skipping OCR, using Text from PDF")
+            self.log("debug", "Skipping OCR, using Text from PDF")
            self._text = get_text_from_pdf(self.document_path)
            return self._text

@@ -100,7 +99,7 @@ class RasterisedDocumentParser(DocumentParser):
        try:

            sample_page_index = int(len(images) / 2)
-            self.log("info", "Attempting language detection on page {} of {}...".format(sample_page_index+1, len(images)))
+            self.log("debug", "Attempting language detection on page {} of {}...".format(sample_page_index + 1, len(images)))
            self.progress_callback(0.4, 1, "Language Detection.")
            sample_page_text = self._ocr([images[sample_page_index]], settings.OCR_LANGUAGE)[0]
            guessed_language = self._guess_language(sample_page_text)
@@ -111,7 +110,7 @@ class RasterisedDocumentParser(DocumentParser):
                ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text)

            elif ISO639[guessed_language] == settings.OCR_LANGUAGE:
-                self.log("info", "Detected language: {} (default language)".format(guessed_language))
+                self.log("debug", "Detected language: {} (default language)".format(guessed_language))
                ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text)

            elif not ISO639[guessed_language] in pyocr.get_available_tools()[0].get_available_languages():
@@ -119,10 +118,10 @@ class RasterisedDocumentParser(DocumentParser):
                ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text)

            else:
-                self.log("info", "Detected language: {}".format(guessed_language))
+                self.log("debug", "Detected language: {}".format(guessed_language))
                ocr_pages = self._ocr(images, ISO639[guessed_language], report_progress=True)

-            self.log("info", "OCR completed.")
+            self.log("debug", "OCR completed.")
            self._text = strip_excess_whitespace(" ".join(ocr_pages))
            return self._text

@@ -134,7 +133,7 @@ class RasterisedDocumentParser(DocumentParser):
        Greyscale images are easier for Tesseract to OCR
        """

-        self.log("info", "Converting document {} into greyscale images...".format(self.document_path))
+        self.log("debug", "Converting document {} into greyscale images...".format(self.document_path))

        # Convert PDF to multiple PNMs
        pnm = os.path.join(self.tempdir, "convert-%04d.pnm")
@@ -142,8 +141,8 @@ class RasterisedDocumentParser(DocumentParser):
        run_convert(density=settings.CONVERT_DENSITY,
                    depth="8",
                    type="grayscale",
-                    input=self.document_path,
-                    output=pnm,
+                    input_file=self.document_path,
+                    output_file=pnm,
                    logging_group=self.logging_group)

        # Get a list of converted images
@@ -152,12 +151,12 @@ class RasterisedDocumentParser(DocumentParser):
            if f.endswith(".pnm"):
                pnms.append(os.path.join(self.tempdir, f))

-        self.log("info", "Running unpaper on {} pages...".format(len(pnms)))
+        self.log("debug", "Running unpaper on {} pages...".format(len(pnms)))

        self.progress_callback(0.2,1, "Running unpaper on {} pages...".format(len(pnms)))

        # Run unpaper in parallel on converted images
-        with Pool(processes=settings.OCR_THREADS) as pool:
+        with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool:
            pnms = pool.map(run_unpaper, pnms)

        return sorted(filter(lambda __: os.path.isfile(__), pnms))
@@ -167,13 +166,13 @@ class RasterisedDocumentParser(DocumentParser):
            guess = langdetect.detect(text)
            return guess
        except Exception as e:
-            self.log('debug', "Language detection failed with: {}".format(e))
+            self.log('warning', "Language detection failed with: {}".format(e))
            return None

    def _ocr(self, imgs, lang, report_progress=False):
-        self.log("info", "Performing OCR on {} page(s) with language {}".format(len(imgs), lang))
+        self.log("debug", "Performing OCR on {} page(s) with language {}".format(len(imgs), lang))
        r = []
-        with Pool(processes=settings.OCR_THREADS) as pool:
+        with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool:
            # r = pool.map(image_to_string, itertools.product(imgs, [lang]))
            for i, page in enumerate(pool.imap(image_to_string, itertools.product(imgs, [lang]))):
                if report_progress:
@@ -191,7 +190,7 @@ class RasterisedDocumentParser(DocumentParser):
        images_copy = list(images)
        del images_copy[sample_page_index]
        if images_copy:
-            self.log('info', 'Continuing ocr with default language.')
+            self.log('debug', 'Continuing ocr with default language.')
            ocr_pages = self._ocr(images_copy, settings.OCR_LANGUAGE, report_progress=True)
            ocr_pages.insert(sample_page_index, sample_page)
            return ocr_pages
--- a/src/paperless_tesseract/signals.py
+++ b/src/paperless_tesseract/signals.py
@@ -3,21 +3,16 @@ import re
 from .parsers import RasterisedDocumentParser


-class ConsumerDeclaration:
+def tesseract_consumer_declaration(sender, **kwargs):
+    return {
+        "parser": RasterisedDocumentParser,
+        "weight": 0,
+        "test": tesseract_consumer_test
+    }

-    MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")

-    @classmethod
-    def handle(cls, sender, **kwargs):
-        return cls.test
+MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")

-    @classmethod
-    def test(cls, doc):

-        if cls.MATCHING_FILES.match(doc.lower()):
-            return {
-                "parser": RasterisedDocumentParser,
-                "weight": 0
-            }
-
-        return None
+def tesseract_consumer_test(doc):
+    return MATCHING_FILES.match(doc.lower())
--- a/src/paperless_tesseract/tests/samples/simple.pdf
+++ b/src/paperless_tesseract/tests/samples/simple.pdf
--- a/src/paperless_tesseract/tests/samples/simple.png
+++ b/src/paperless_tesseract/tests/samples/simple.png
--- a/src/paperless_tesseract/tests/test_date.py
+++ b/src/paperless_tesseract/tests/test_date.py
@@ -5,10 +5,10 @@ from unittest import mock
 from uuid import uuid4

 from dateutil import tz
+from django.conf import settings
 from django.test import TestCase, override_settings

 from ..parsers import RasterisedDocumentParser
-from django.conf import settings


 class TestDate(TestCase):
--- a/src/paperless_tesseract/tests/test_parser.py
+++ b/src/paperless_tesseract/tests/test_parser.py
@@ -0,0 +1,221 @@
+import os
+import shutil
+import tempfile
+import uuid
+from typing import ContextManager
+from unittest import mock
+
+from django.test import TestCase, override_settings
+from pyocr.error import TesseractError
+
+from documents.parsers import ParseError, run_convert
+from paperless_tesseract.parsers import RasterisedDocumentParser, get_text_from_pdf, image_to_string, OCRError
+
+image_to_string_calls = []
+
+
+class FakeTesseract(object):
+
+    @staticmethod
+    def can_detect_orientation():
+        return True
+
+    @staticmethod
+    def detect_orientation(file_handle, lang):
+        raise TesseractError("arbitrary status", "message")
+
+    @staticmethod
+    def get_available_languages():
+        return ['eng', 'deu']
+
+    @staticmethod
+    def image_to_string(file_handle, lang):
+        image_to_string_calls.append((file_handle.name, lang))
+        return file_handle.read()
+
+
+class FakePyOcr(object):
+
+    @staticmethod
+    def get_available_tools():
+        return [FakeTesseract]
+
+
+def fake_convert(input_file, output_file, **kwargs):
+    with open(input_file) as f:
+        lines = f.readlines()
+
+    for i, line in enumerate(lines):
+        with open(output_file % i, "w") as f2:
+            f2.write(line.strip())
+
+
+def fake_unpaper(pnm):
+    output = pnm + ".unpaper.pnm"
+    shutil.copy(pnm, output)
+    return output
+
+
+class FakeImageFile(ContextManager):
+    def __init__(self, fname):
+        self.fname = fname
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+
+    def __enter__(self):
+        return os.path.basename(self.fname)
+
+
+fake_image = FakeImageFile
+
+
+@mock.patch("paperless_tesseract.parsers.pyocr", FakePyOcr)
+@mock.patch("paperless_tesseract.parsers.run_convert", fake_convert)
+@mock.patch("paperless_tesseract.parsers.run_unpaper", fake_unpaper)
+@mock.patch("paperless_tesseract.parsers.Image.open", open)
+class TestRasterisedDocumentParser(TestCase):
+
+    def setUp(self):
+        self.scratch = tempfile.mkdtemp()
+
+        global image_to_string_calls
+
+        image_to_string_calls = []
+
+        override_settings(OCR_LANGUAGE="eng", SCRATCH_DIR=self.scratch).enable()
+
+    def tearDown(self):
+        shutil.rmtree(self.scratch)
+
+    def get_input_file(self, pages):
+        _, fname = tempfile.mkstemp(suffix=".pdf", dir=self.scratch)
+        with open(fname, "w") as f:
+            f.writelines([f"line {p}\n" for p in range(pages)])
+        return fname
+
+    @mock.patch("paperless_tesseract.parsers.langdetect.detect", lambda _: "en")
+    def test_parse_text_simple_language_match(self):
+        parser = RasterisedDocumentParser(self.get_input_file(1), uuid.uuid4())
+        text = parser.get_text()
+        self.assertEqual(text, "line 0")
+
+        self.assertListEqual([args[1] for args in image_to_string_calls], ["eng"])
+
+    @mock.patch("paperless_tesseract.parsers.langdetect.detect", lambda _: "en")
+    def test_parse_text_2_pages(self):
+        parser = RasterisedDocumentParser(self.get_input_file(2), uuid.uuid4())
+        text = parser.get_text()
+        self.assertEqual(text, "line 0 line 1")
+
+        self.assertListEqual([args[1] for args in image_to_string_calls], ["eng", "eng"])
+
+    @mock.patch("paperless_tesseract.parsers.langdetect.detect", lambda _: "en")
+    def test_parse_text_3_pages(self):
+        parser = RasterisedDocumentParser(self.get_input_file(3), uuid.uuid4())
+        text = parser.get_text()
+        self.assertEqual(text, "line 0 line 1 line 2")
+
+        self.assertListEqual([args[1] for args in image_to_string_calls], ["eng", "eng", "eng"])
+
+    @mock.patch("paperless_tesseract.parsers.langdetect.detect", lambda _: None)
+    def test_parse_text_lang_detect_failed(self):
+        parser = RasterisedDocumentParser(self.get_input_file(3), uuid.uuid4())
+        text = parser.get_text()
+        self.assertEqual(text, "line 0 line 1 line 2")
+
+        self.assertListEqual([args[1] for args in image_to_string_calls], ["eng", "eng", "eng"])
+
+    @mock.patch("paperless_tesseract.parsers.langdetect.detect", lambda _: "it")
+    def test_parse_text_lang_not_installed(self):
+        parser = RasterisedDocumentParser(self.get_input_file(4), uuid.uuid4())
+        text = parser.get_text()
+        self.assertEqual(text, "line 0 line 1 line 2 line 3")
+
+        self.assertListEqual([args[1] for args in image_to_string_calls], ["eng", "eng", "eng", "eng"])
+
+    @mock.patch("paperless_tesseract.parsers.langdetect.detect", lambda _: "de")
+    def test_parse_text_lang_mismatch(self):
+        parser = RasterisedDocumentParser(self.get_input_file(3), uuid.uuid4())
+        text = parser.get_text()
+        self.assertEqual(text, "line 0 line 1 line 2")
+
+        self.assertListEqual([args[1] for args in image_to_string_calls], ["eng", "deu", "deu", "deu"])
+
+    @mock.patch("paperless_tesseract.parsers.langdetect.detect", lambda _: "de")
+    def test_parse_empty_doc(self):
+        parser = RasterisedDocumentParser(self.get_input_file(0), uuid.uuid4())
+        try:
+            parser.get_text()
+        except ParseError as e:
+            self.assertEqual("Empty document, nothing to do.", str(e))
+        else:
+            self.fail("Should raise exception")
+
+
+class TestAuxilliaryFunctions(TestCase):
+
+    def setUp(self):
+        self.scratch = tempfile.mkdtemp()
+
+        override_settings(SCRATCH_DIR=self.scratch).enable()
+
+    def tearDown(self):
+        shutil.rmtree(self.scratch)
+
+    SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
+
+    def test_get_text_from_pdf(self):
+        text = get_text_from_pdf(os.path.join(self.SAMPLE_FILES, 'simple.pdf'))
+
+        self.assertEqual(text.strip(), "This is a test document.")
+
+    def test_get_text_from_pdf_error(self):
+        text = get_text_from_pdf(os.path.join(self.SAMPLE_FILES, 'simple.png'))
+
+        self.assertEqual(text.strip(), "")
+
+    def test_image_to_string(self):
+        text = image_to_string((os.path.join(self.SAMPLE_FILES, 'simple.png'), "eng"))
+
+        self.assertEqual(text, "This is a test document.")
+
+    def test_image_to_string_language_unavailable(self):
+        try:
+            image_to_string((os.path.join(self.SAMPLE_FILES, 'simple.png'), "ita"))
+        except OCRError as e:
+            self.assertTrue("Failed loading language" in str(e))
+        else:
+            self.fail("Should raise exception")
+
+    @override_settings(OCR_ALWAYS=False)
+    @mock.patch("paperless_tesseract.parsers.get_text_from_pdf")
+    @mock.patch("paperless_tesseract.parsers.RasterisedDocumentParser._get_greyscale")
+    def test_is_ocred(self, m2, m):
+        parser = RasterisedDocumentParser("", uuid.uuid4())
+        m.return_value = "lots of text lots of text lots of text lots of text lots of text lots of text " \
+                         "lots of text lots of text lots of text lots of text lots of text lots of text " \
+                         "lots of text lots of text lots of text lots of text lots of text lots of text "
+        parser.get_text()
+        self.assertEqual(m.call_count, 2)
+        self.assertEqual(m2.call_count, 0)
+
+    def test_thumbnail(self):
+        parser = RasterisedDocumentParser(os.path.join(self.SAMPLE_FILES, 'simple.pdf'), uuid.uuid4())
+        parser.get_thumbnail()
+        # dont really know how to test it, just call it and assert that it does not raise anything.
+
+    @mock.patch("paperless_tesseract.parsers.run_convert")
+    def test_thumbnail_fallback(self, m):
+
+        def call_convert(input_file, output_file, **kwargs):
+            if ".pdf" in input_file:
+                raise ParseError("Does not compute.")
+            else:
+                run_convert(input_file=input_file, output_file=output_file, **kwargs)
+
+        m.side_effect = call_convert
+
+        parser = RasterisedDocumentParser(os.path.join(self.SAMPLE_FILES, 'simple.pdf'), uuid.uuid4())
+        parser.get_thumbnail()
+        # dont really know how to test it, just call it and assert that it does not raise anything.
--- a/src/paperless_tesseract/tests/test_signals.py
+++ b/src/paperless_tesseract/tests/test_signals.py
@@ -1,6 +1,6 @@
 from django.test import TestCase

-from ..signals import ConsumerDeclaration
+from paperless_tesseract.signals import tesseract_consumer_test


 class SignalsTestCase(TestCase):
@@ -20,7 +20,7 @@ class SignalsTestCase(TestCase):
        for prefix in prefixes:
            for suffix in suffixes:
                name = "{}.{}".format(prefix, suffix)
-                self.assertTrue(ConsumerDeclaration.test(name))
+                self.assertTrue(tesseract_consumer_test(name))

    def test_test_handles_various_file_names_false(self):

@@ -30,7 +30,7 @@ class SignalsTestCase(TestCase):
        for prefix in prefixes:
            for suffix in suffixes:
                name = "{}.{}".format(prefix, suffix)
-                self.assertFalse(ConsumerDeclaration.test(name))
+                self.assertFalse(tesseract_consumer_test(name))

-        self.assertFalse(ConsumerDeclaration.test(""))
-        self.assertFalse(ConsumerDeclaration.test("doc"))
+        self.assertFalse(tesseract_consumer_test(""))
+        self.assertFalse(tesseract_consumer_test("doc"))
--- a/src/paperless_text/apps.py
+++ b/src/paperless_text/apps.py
@@ -1,5 +1,7 @@
 from django.apps import AppConfig

+from paperless_text.signals import text_consumer_declaration
+

 class PaperlessTextConfig(AppConfig):

@@ -9,8 +11,6 @@ class PaperlessTextConfig(AppConfig):

        from documents.signals import document_consumer_declaration

-        from .signals import ConsumerDeclaration
-
-        document_consumer_declaration.connect(ConsumerDeclaration.handle)
+        document_consumer_declaration.connect(text_consumer_declaration)

        AppConfig.ready(self)
--- a/src/paperless_text/parsers.py
+++ b/src/paperless_text/parsers.py
@@ -47,8 +47,8 @@ class TextDocumentParser(DocumentParser):

        def read_text():
            with open(self.document_path, 'r') as src:
-                lines = [l.strip() for l in src.readlines()]
-                text = "\n".join([l for l in lines[:n_lines]])
+                lines = [line.strip() for line in src.readlines()]
+                text = "\n".join([line for line in lines[:n_lines]])
                return text.replace('"', "'")

        def create_txlayer():
--- a/src/paperless_text/signals.py
+++ b/src/paperless_text/signals.py
@@ -3,21 +3,16 @@ import re
 from .parsers import TextDocumentParser


-class ConsumerDeclaration:
+def text_consumer_declaration(sender, **kwargs):
+    return {
+        "parser": TextDocumentParser,
+        "weight": 10,
+        "test": text_consumer_test
+    }

-    MATCHING_FILES = re.compile(r"^.*\.(te?xt|md|csv)$")

-    @classmethod
-    def handle(cls, sender, **kwargs):
-        return cls.test
+MATCHING_FILES = re.compile(r"^.*\.(te?xt|md|csv)$")

-    @classmethod
-    def test(cls, doc):

-        if cls.MATCHING_FILES.match(doc.lower()):
-            return {
-                "parser": TextDocumentParser,
-                "weight": 10
-            }
-
-        return None
+def text_consumer_test(doc):
+    return MATCHING_FILES.match(doc.lower())
--- a/src/setup.cfg
+++ b/src/setup.cfg
@@ -1,12 +1,11 @@
 [pycodestyle]
 exclude = migrations, paperless/settings.py, .tox
-
+ignore = E501

 [tool:pytest]
 DJANGO_SETTINGS_MODULE=paperless.settings
-addopts = --pythonwarnings=all -n auto
+addopts = --pythonwarnings=all
 env =
-  PAPERLESS_PASSPHRASE=THISISNOTASECRET
  PAPERLESS_SECRET=paperless
  PAPERLESS_EMAIL_SECRET=paperless

@@ -15,4 +14,4 @@ env =
 source =
  ./
 omit =
-  */tests
+  */tests/*