Merge branch 'dev' into feature-websockets-status

2026-01-30 23:08:59 -06:00 · 2021-01-23 22:22:17 +01:00
parent d153672f0d 227f7b6946
commit 05d69c0882
163 changed files with 5843 additions and 2520 deletions
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -91,6 +91,11 @@ class Consumer(LoggingMixin):
        if not settings.PRE_CONSUME_SCRIPT:
            return

+        if not os.path.isfile(settings.PRE_CONSUME_SCRIPT):
+            raise ConsumerError(
+                f"Configured pre-consume script "
+                f"{settings.PRE_CONSUME_SCRIPT} does not exist.")
+
        try:
            Popen((settings.PRE_CONSUME_SCRIPT, self.path)).wait()
        except Exception as e:
@@ -102,6 +107,11 @@ class Consumer(LoggingMixin):
        if not settings.POST_CONSUME_SCRIPT:
            return

+        if not os.path.isfile(settings.POST_CONSUME_SCRIPT):
+            raise ConsumerError(
+                f"Configured post-consume script "
+                f"{settings.POST_CONSUME_SCRIPT} does not exist.")
+
        try:
            Popen((
                settings.POST_CONSUME_SCRIPT,
--- a/src/documents/file_handling.py
+++ b/src/documents/file_handling.py
@@ -91,7 +91,7 @@ def generate_unique_filename(doc, root):
            return new_filename


-def generate_filename(doc, counter=0):
+def generate_filename(doc, counter=0, append_gpg=True):
    path = ""

    try:
@@ -151,7 +151,7 @@ def generate_filename(doc, counter=0):
        filename = f"{doc.pk:07}{counter_str}{doc.file_type}"

    # Append .gpg for encrypted files
-    if doc.storage_type == doc.STORAGE_TYPE_GPG:
+    if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
        filename += ".gpg"

    return filename
--- a/src/documents/management/commands/document_archiver.py
+++ b/src/documents/management/commands/document_archiver.py
@@ -11,6 +11,7 @@ from django import db
 from django.conf import settings
 from django.core.management.base import BaseCommand
 from django.db import transaction
+from filelock import FileLock
 from whoosh.writing import AsyncWriter

 from documents.models import Document
@@ -47,8 +48,10 @@ def handle_document(document_id):
                    archive_checksum=checksum,
                    content=parser.get_text()
                )
-                create_source_path_directory(document.archive_path)
-                shutil.move(parser.get_archive_path(), document.archive_path)
+                with FileLock(settings.MEDIA_LOCK):
+                    create_source_path_directory(document.archive_path)
+                    shutil.move(parser.get_archive_path(),
+                                document.archive_path)

        with AsyncWriter(index.open_index()) as writer:
            index.update_document(writer, document)
--- a/src/documents/management/commands/document_consumer.py
+++ b/src/documents/management/commands/document_consumer.py
@@ -5,7 +5,6 @@ from time import sleep

 from django.conf import settings
 from django.core.management.base import BaseCommand, CommandError
-from django.utils.text import slugify
 from django_q.tasks import async_task
 from watchdog.events import FileSystemEventHandler
 from watchdog.observers.polling import PollingObserver
@@ -46,7 +45,7 @@ def _consume(filepath):
        return

    if not is_file_ext_supported(os.path.splitext(filepath)[1]):
-        logger.debug(
+        logger.warning(
            f"Not consuming file {filepath}: Unknown file extension.")
        return

--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -1,15 +1,21 @@
+import hashlib
 import json
 import os
 import shutil
 import time

+import tqdm
+from django.conf import settings
 from django.core import serializers
 from django.core.management.base import BaseCommand, CommandError
+from django.db import transaction
+from filelock import FileLock

 from documents.models import Document, Correspondent, Tag, DocumentType
 from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
    EXPORTER_ARCHIVE_NAME
 from paperless.db import GnuPG
+from ...file_handling import generate_filename, delete_empty_directories
 from ...mixins import Renderable


@@ -24,13 +30,47 @@ class Command(Renderable, BaseCommand):
    def add_arguments(self, parser):
        parser.add_argument("target")

+        parser.add_argument(
+            "-c", "--compare-checksums",
+            default=False,
+            action="store_true",
+            help="Compare file checksums when determining whether to export "
+                 "a file or not. If not specified, file size and time "
+                 "modified is used instead."
+        )
+
+        parser.add_argument(
+            "-f", "--use-filename-format",
+            default=False,
+            action="store_true",
+            help="Use PAPERLESS_FILENAME_FORMAT for storing files in the "
+                 "export directory, if configured."
+        )
+
+        parser.add_argument(
+            "-d", "--delete",
+            default=False,
+            action="store_true",
+            help="After exporting, delete files in the export directory that "
+                 "do not belong to the current export, such as files from "
+                 "deleted documents."
+        )
+
    def __init__(self, *args, **kwargs):
        BaseCommand.__init__(self, *args, **kwargs)
        self.target = None
+        self.files_in_export_dir = []
+        self.exported_files = []
+        self.compare_checksums = False
+        self.use_filename_format = False
+        self.delete = False

    def handle(self, *args, **options):

        self.target = options["target"]
+        self.compare_checksums = options['compare_checksums']
+        self.use_filename_format = options['use_filename_format']
+        self.delete = options['delete']

        if not os.path.exists(self.target):
            raise CommandError("That path doesn't exist")
@@ -38,83 +78,148 @@ class Command(Renderable, BaseCommand):
        if not os.access(self.target, os.W_OK):
            raise CommandError("That path doesn't appear to be writable")

-        if os.listdir(self.target):
-            raise CommandError("That directory is not empty.")
-
-        self.dump()
+        with FileLock(settings.MEDIA_LOCK):
+            self.dump()

    def dump(self):
+        # 1. Take a snapshot of what files exist in the current export folder
+        for root, dirs, files in os.walk(self.target):
+            self.files_in_export_dir.extend(
+                map(lambda f: os.path.abspath(os.path.join(root, f)), files)
+            )

-        documents = Document.objects.all()
-        document_map = {d.pk: d for d in documents}
-        manifest = json.loads(serializers.serialize("json", documents))
+        # 2. Create manifest, containing all correspondents, types, tags and
+        # documents
+        with transaction.atomic():
+            manifest = json.loads(
+                serializers.serialize("json", Correspondent.objects.all()))

-        for index, document_dict in enumerate(manifest):
+            manifest += json.loads(serializers.serialize(
+                "json", Tag.objects.all()))

-            # Force output to unencrypted as that will be the current state.
-            # The importer will make the decision to encrypt or not.
-            manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED  # NOQA: E501
+            manifest += json.loads(serializers.serialize(
+                "json", DocumentType.objects.all()))
+
+            documents = Document.objects.order_by("id")
+            document_map = {d.pk: d for d in documents}
+            document_manifest = json.loads(
+                serializers.serialize("json", documents))
+            manifest += document_manifest
+
+        # 3. Export files from each document
+        for index, document_dict in tqdm.tqdm(enumerate(document_manifest),
+                                              total=len(document_manifest)):
+            # 3.1. store files unencrypted
+            document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED  # NOQA: E501

            document = document_map[document_dict["pk"]]

-            print(f"Exporting: {document}")
-
+            # 3.2. generate a unique filename
            filename_counter = 0
            while True:
-                original_name = document.get_public_filename(
-                    counter=filename_counter)
-                original_target = os.path.join(self.target, original_name)
+                if self.use_filename_format:
+                    base_name = generate_filename(
+                        document, counter=filename_counter,
+                        append_gpg=False)
+                else:
+                    base_name = document.get_public_filename(
+                        counter=filename_counter)

-                if not os.path.exists(original_target):
+                if base_name not in self.exported_files:
+                    self.exported_files.append(base_name)
                    break
                else:
                    filename_counter += 1

-            thumbnail_name = original_name + "-thumbnail.png"
-            thumbnail_target = os.path.join(self.target, thumbnail_name)
-
+            # 3.3. write filenames into manifest
+            original_name = base_name
+            original_target = os.path.join(self.target, original_name)
            document_dict[EXPORTER_FILE_NAME] = original_name
+
+            thumbnail_name = base_name + "-thumbnail.png"
+            thumbnail_target = os.path.join(self.target, thumbnail_name)
            document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name

            if os.path.exists(document.archive_path):
-                archive_name = document.get_public_filename(
-                    archive=True, counter=filename_counter, suffix="_archive")
+                archive_name = base_name + "-archive.pdf"
                archive_target = os.path.join(self.target, archive_name)
                document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
            else:
                archive_target = None

+            # 3.4. write files to target folder
            t = int(time.mktime(document.created.timetuple()))
            if document.storage_type == Document.STORAGE_TYPE_GPG:

+                os.makedirs(os.path.dirname(original_target), exist_ok=True)
                with open(original_target, "wb") as f:
                    f.write(GnuPG.decrypted(document.source_file))
                    os.utime(original_target, times=(t, t))

+                os.makedirs(os.path.dirname(thumbnail_target), exist_ok=True)
                with open(thumbnail_target, "wb") as f:
                    f.write(GnuPG.decrypted(document.thumbnail_file))
                    os.utime(thumbnail_target, times=(t, t))

                if archive_target:
+                    os.makedirs(os.path.dirname(archive_target), exist_ok=True)
                    with open(archive_target, "wb") as f:
                        f.write(GnuPG.decrypted(document.archive_path))
                        os.utime(archive_target, times=(t, t))
            else:
+                self.check_and_copy(document.source_path,
+                                    document.checksum,
+                                    original_target)

-                shutil.copy(document.source_path, original_target)
-                shutil.copy(document.thumbnail_path, thumbnail_target)
+                self.check_and_copy(document.thumbnail_path,
+                                    None,
+                                    thumbnail_target)

                if archive_target:
-                    shutil.copy(document.archive_path, archive_target)
+                    self.check_and_copy(document.archive_path,
+                                        document.archive_checksum,
+                                        archive_target)

-        manifest += json.loads(
-            serializers.serialize("json", Correspondent.objects.all()))
+        # 4. write manifest to target forlder
+        manifest_path = os.path.abspath(
+            os.path.join(self.target, "manifest.json"))

-        manifest += json.loads(serializers.serialize(
-            "json", Tag.objects.all()))
-
-        manifest += json.loads(serializers.serialize(
-            "json", DocumentType.objects.all()))
-
-        with open(os.path.join(self.target, "manifest.json"), "w") as f:
+        with open(manifest_path, "w") as f:
            json.dump(manifest, f, indent=2)
+
+        if self.delete:
+            # 5. Remove files which we did not explicitly export in this run
+
+            if manifest_path in self.files_in_export_dir:
+                self.files_in_export_dir.remove(manifest_path)
+
+            for f in self.files_in_export_dir:
+                os.remove(f)
+
+                delete_empty_directories(os.path.abspath(os.path.dirname(f)),
+                                         os.path.abspath(self.target))
+
+    def check_and_copy(self, source, source_checksum, target):
+        if os.path.abspath(target) in self.files_in_export_dir:
+            self.files_in_export_dir.remove(os.path.abspath(target))
+
+        perform_copy = False
+
+        if os.path.exists(target):
+            source_stat = os.stat(source)
+            target_stat = os.stat(target)
+            if self.compare_checksums and source_checksum:
+                with open(target, "rb") as f:
+                    target_checksum = hashlib.md5(f.read()).hexdigest()
+                perform_copy = target_checksum != source_checksum
+            elif source_stat.st_mtime != target_stat.st_mtime:
+                perform_copy = True
+            elif source_stat.st_size != target_stat.st_size:
+                perform_copy = True
+        else:
+            # Copy if it does not exist
+            perform_copy = True
+
+        if perform_copy:
+            os.makedirs(os.path.dirname(target), exist_ok=True)
+            shutil.copy2(source, target)
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -148,10 +148,10 @@ class Command(Renderable, BaseCommand):

                create_source_path_directory(document.source_path)

-                shutil.copy(document_path, document.source_path)
-                shutil.copy(thumbnail_path, document.thumbnail_path)
+                shutil.copy2(document_path, document.source_path)
+                shutil.copy2(thumbnail_path, document.thumbnail_path)
                if archive_path:
                    create_source_path_directory(document.archive_path)
-                    shutil.copy(archive_path, document.archive_path)
+                    shutil.copy2(archive_path, document.archive_path)

            document.save()
--- a/src/documents/management/commands/document_thumbnails.py
+++ b/src/documents/management/commands/document_thumbnails.py
@@ -13,8 +13,14 @@ from ...parsers import get_parser_class_for_mime_type

 def _process_document(doc_in):
    document = Document.objects.get(id=doc_in)
-    parser = get_parser_class_for_mime_type(document.mime_type)(
-        logging_group=None)
+    parser_class = get_parser_class_for_mime_type(document.mime_type)
+
+    if parser_class:
+        parser = parser_class(logging_group=None)
+    else:
+        print(f"{document} No parser for mime type {document.mime_type}")
+        return
+
    try:
        thumb = parser.get_optimised_thumbnail(
            document.source_path, document.mime_type)
--- a/src/documents/matching.py
+++ b/src/documents/matching.py
@@ -1,3 +1,4 @@
+import logging
 import re

 from fuzzywuzzy import fuzz
@@ -5,49 +6,59 @@ from fuzzywuzzy import fuzz
 from documents.models import MatchingModel, Correspondent, DocumentType, Tag


-def match_correspondents(document_content, classifier):
+logger = logging.getLogger(__name__)
+
+
+def log_reason(matching_model, document, reason):
+    class_name = type(matching_model).__name__
+    logger.debug(
+        f"Assigning {class_name} {matching_model.name} to document "
+        f"{document} because {reason}")
+
+
+def match_correspondents(document, classifier):
    if classifier:
-        pred_id = classifier.predict_correspondent(document_content)
+        pred_id = classifier.predict_correspondent(document.content)
    else:
        pred_id = None

    correspondents = Correspondent.objects.all()

    return list(filter(
-        lambda o: matches(o, document_content) or o.pk == pred_id,
+        lambda o: matches(o, document) or o.pk == pred_id,
        correspondents))


-def match_document_types(document_content, classifier):
+def match_document_types(document, classifier):
    if classifier:
-        pred_id = classifier.predict_document_type(document_content)
+        pred_id = classifier.predict_document_type(document.content)
    else:
        pred_id = None

    document_types = DocumentType.objects.all()

    return list(filter(
-        lambda o: matches(o, document_content) or o.pk == pred_id,
+        lambda o: matches(o, document) or o.pk == pred_id,
        document_types))


-def match_tags(document_content, classifier):
+def match_tags(document, classifier):
    if classifier:
-        predicted_tag_ids = classifier.predict_tags(document_content)
+        predicted_tag_ids = classifier.predict_tags(document.content)
    else:
        predicted_tag_ids = []

    tags = Tag.objects.all()

    return list(filter(
-        lambda o: matches(o, document_content) or o.pk in predicted_tag_ids,
+        lambda o: matches(o, document) or o.pk in predicted_tag_ids,
        tags))


-def matches(matching_model, document_content):
+def matches(matching_model, document):
    search_kwargs = {}

-    document_content = document_content.lower()
+    document_content = document.content.lower()

    # Check that match is not empty
    if matching_model.match.strip() == "":
@@ -62,26 +73,54 @@ def matches(matching_model, document_content):
                rf"\b{word}\b", document_content, **search_kwargs)
            if not search_result:
                return False
+        log_reason(
+            matching_model, document,
+            f"it contains all of these words: {matching_model.match}"
+        )
        return True

    elif matching_model.matching_algorithm == MatchingModel.MATCH_ANY:
        for word in _split_match(matching_model):
            if re.search(rf"\b{word}\b", document_content, **search_kwargs):
+                log_reason(
+                    matching_model, document,
+                    f"it contains this word: {word}"
+                )
                return True
        return False

    elif matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL:
-        return bool(re.search(
+        result = bool(re.search(
            rf"\b{matching_model.match}\b",
            document_content,
            **search_kwargs
        ))
+        if result:
+            log_reason(
+                matching_model, document,
+                f"it contains this string: \"{matching_model.match}\""
+            )
+        return result

    elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
-        return bool(re.search(
-            re.compile(matching_model.match, **search_kwargs),
-            document_content
-        ))
+        try:
+            match = re.search(
+                re.compile(matching_model.match, **search_kwargs),
+                document_content
+            )
+        except re.error:
+            logger.error(
+                f"Error while processing regular expression "
+                f"{matching_model.match}"
+            )
+            return False
+        if match:
+            log_reason(
+                matching_model, document,
+                f"the string {match.group()} matches the regular expression "
+                f"{matching_model.match}"
+            )
+        return bool(match)

    elif matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY:
        match = re.sub(r'[^\w\s]', '', matching_model.match)
@@ -89,8 +128,16 @@ def matches(matching_model, document_content):
        if matching_model.is_insensitive:
            match = match.lower()
            text = text.lower()
-
-        return fuzz.partial_ratio(match, text) >= 90
+        if fuzz.partial_ratio(match, text) >= 90:
+            # TODO: make this better
+            log_reason(
+                matching_model, document,
+                f"parts of the document content somehow match the string "
+                f"{matching_model.match}"
+            )
+            return True
+        else:
+            return False

    elif matching_model.matching_algorithm == MatchingModel.MATCH_AUTO:
        # this is done elsewhere.
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -12,6 +12,7 @@ from django.conf import settings
 from django.contrib.auth.models import User
 from django.db import models
 from django.utils import timezone
+from django.utils.timezone import is_aware

 from django.utils.translation import gettext_lazy as _

@@ -62,12 +63,6 @@ class MatchingModel(models.Model):
    def __str__(self):
        return self.name

-    def save(self, *args, **kwargs):
-
-        self.match = self.match.lower()
-
-        models.Model.save(self, *args, **kwargs)
-

 class Correspondent(MatchingModel):

@@ -233,7 +228,10 @@ class Document(models.Model):
        verbose_name_plural = _("documents")

    def __str__(self):
-        created = datetime.date.isoformat(self.created)
+        if is_aware(self.created):
+            created = timezone.localdate(self.created).isoformat()
+        else:
+            created = datetime.date.isoformat(self.created)
        if self.correspondent and self.title:
            return f"{created} {self.correspondent} {self.title}"
        else:
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -210,6 +210,13 @@ def parse_date(filename, text):
            }
        )

+    def __filter(date):
+        if date and date.year > 1900 and \
+                date <= timezone.now() and \
+                date.date() not in settings.IGNORE_DATES:
+            return date
+        return None
+
    date = None

    # if filename date parsing is enabled, search there first:
@@ -223,7 +230,8 @@ def parse_date(filename, text):
                # Skip all matches that do not parse to a proper date
                continue

-            if date and date.year > 1900 and date <= timezone.now():
+            date = __filter(date)
+            if date is not None:
                return date

    # Iterate through all regex matches in text and try to parse the date
@@ -236,10 +244,9 @@ def parse_date(filename, text):
            # Skip all matches that do not parse to a proper date
            continue

-        if date and date.year > 1900 and date <= timezone.now():
+        date = __filter(date)
+        if date is not None:
            break
-        else:
-            date = None

    return date

--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -382,13 +382,6 @@ class PostDocumentSerializer(serializers.Serializer):

        return document.name, document_data

-    def validate_title(self, title):
-        if title:
-            return title
-        else:
-            # do not return empty strings.
-            return None
-
    def validate_correspondent(self, correspondent):
        if correspondent:
            return correspondent.id
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -38,7 +38,7 @@ def set_correspondent(sender,
    if document.correspondent and not replace:
        return

-    potential_correspondents = matching.match_correspondents(document.content,
+    potential_correspondents = matching.match_correspondents(document,
                                                             classifier)

    potential_count = len(potential_correspondents)
@@ -81,7 +81,7 @@ def set_document_type(sender,
    if document.document_type and not replace:
        return

-    potential_document_type = matching.match_document_types(document.content,
+    potential_document_type = matching.match_document_types(document,
                                                            classifier)

    potential_count = len(potential_document_type)
@@ -130,7 +130,7 @@ def set_tags(sender,

    current_tags = set(document.tags.all())

-    matched_tags = matching.match_tags(document.content, classifier)
+    matched_tags = matching.match_tags(document, classifier)

    relevant_tags = set(matched_tags) - current_tags

--- a/src/documents/templates/index.html
+++ b/src/documents/templates/index.html
@@ -1,6 +1,7 @@
 <!doctype html>

 {% load static %}
+{% load i18n %}

 <html lang="en">
 <head>
@@ -16,7 +17,7 @@
 	<link rel="stylesheet" href="{% static styles_css %}">
 </head>
 <body>
-  <app-root>Loading...</app-root>
+  <app-root>{% translate "Paperless-ng is loading..." %}</app-root>
 	<script src="{% static runtime_js %}" defer></script>
 	<script src="{% static polyfills_js %}" defer></script>
 	<script src="{% static main_js %}" defer></script>
--- a/src/documents/templates/registration/logged_out.html
+++ b/src/documents/templates/registration/logged_out.html
@@ -1,6 +1,7 @@
 <!doctype html>

 {% load static %}
+{% load i18n %}

 <html lang="en">
  <head>
@@ -9,7 +10,7 @@
    <meta name="description" content="">
    <meta name="author" content="Mark Otto, Jacob Thornton, and Bootstrap contributors">
    <meta name="generator" content="Jekyll v4.1.1">
-    <title>Paperless Sign In</title>
+    <title>{% translate "Paperless-ng signed out" %}</title>

    <!-- Bootstrap core CSS -->
 		<link href="{% static 'bootstrap.min.css' %}" rel="stylesheet">
@@ -36,9 +37,9 @@

  <body class="text-center">
    <div class="form-signin">
-			<img class="mb-4" src="{% static 'frontend/assets/logo.svg' %}" alt="" width="300">
-			<p>You have been successfully logged out. Bye!</p>
-			<a href="/">Sign in again</a>
+			<img class="mb-4" src="{% static 'frontend/en-US/assets/logo.svg' %}" alt="" width="300">
+			<p>{% translate "You have been successfully logged out. Bye!" %}</p>
+			<a href="/">{% translate "Sign in again" %}</a>
 		</div>
 	</body>
 </html>
--- a/src/documents/templates/registration/login.html
+++ b/src/documents/templates/registration/login.html
@@ -1,6 +1,7 @@
 <!doctype html>

 {% load static %}
+{% load i18n %}

 <html lang="en">
  <head>
@@ -9,7 +10,7 @@
    <meta name="description" content="">
    <meta name="author" content="Mark Otto, Jacob Thornton, and Bootstrap contributors">
    <meta name="generator" content="Jekyll v4.1.1">
-    <title>Paperless Sign In</title>
+    <title>{% translate "Paperless-ng sign in" %}</title>

    <!-- Bootstrap core CSS -->
 		<link href="{% static 'bootstrap.min.css' %}" rel="stylesheet">
@@ -37,18 +38,20 @@
  <body class="text-center">
    <form class="form-signin" method="post">
 			{% csrf_token %}
-			<img class="mb-4" src="{% static 'frontend/assets/logo.svg' %}" alt="" width="300">
-			<p>Please sign in.</p>
+			<img class="mb-4" src="{% static 'frontend/en-US/assets/logo.svg' %}" alt="" width="300">
+			<p>{% translate "Please sign in." %}</p>
 			{% if form.errors %}
 				<div class="alert alert-danger" role="alert">
-					Your username and password didn't match. Please try again.
+					{% translate "Your username and password didn't match. Please try again." %}
 				</div>
 			{% endif %}
-			<label for="inputUsername" class="sr-only">Username</label>
-			<input type="text" name="username" id="inputUsername" class="form-control" placeholder="Username" required autofocus>
-			<label for="inputPassword" class="sr-only">Password</label>
-			<input type="password" name="password" id="inputPassword" class="form-control" placeholder="Password" required>
-			<button class="btn btn-lg btn-primary btn-block" type="submit">Sign in</button>
+			{% translate "Username" as i18n_username %}
+			{% translate "Password" as i18n_password %}
+			<label for="inputUsername" class="sr-only">{{ i18n_username }}</label>
+			<input type="text" name="username" id="inputUsername" class="form-control" placeholder="{{ i18n_username }}" required autofocus>
+			<label for="inputPassword" class="sr-only">{{ i18n_password }}</label>
+			<input type="password" name="password" id="inputPassword" class="form-control" placeholder="{{ i18n_password }}" required>
+			<button class="btn btn-lg btn-primary btn-block" type="submit">{% translate "Sign in" %}</button>
 		</form>
 	</body>
 </html>
--- a/src/documents/tests/samples/documents/originals/0000002.pdf
+++ b/src/documents/tests/samples/documents/originals/0000002.pdf
--- a/src/documents/tests/samples/documents/originals/0000002.pdf.gpg
+++ b/src/documents/tests/samples/documents/originals/0000002.pdf.gpg
--- a/src/documents/tests/samples/documents/originals/0000003.pdf
+++ b/src/documents/tests/samples/documents/originals/0000003.pdf
--- a/src/documents/tests/samples/documents/originals/0000004.pdf.gpg
+++ b/src/documents/tests/samples/documents/originals/0000004.pdf.gpg
--- a/src/documents/tests/samples/documents/thumbnails/0000002.png
+++ b/src/documents/tests/samples/documents/thumbnails/0000002.png
--- a/src/documents/tests/samples/documents/thumbnails/0000003.png
+++ b/src/documents/tests/samples/documents/thumbnails/0000003.png
--- a/src/documents/tests/samples/documents/thumbnails/0000004.png.gpg
+++ b/src/documents/tests/samples/documents/thumbnails/0000004.png.gpg
--- a/src/documents/tests/test_admin.py
+++ b/src/documents/tests/test_admin.py
@@ -5,12 +5,14 @@ from django.test import TestCase
 from django.utils import timezone

 from documents.admin import DocumentAdmin
-from documents.models import Document, Tag
+from documents.models import Document
+from documents.tests.utils import DirectoriesMixin


-class TestDocumentAdmin(TestCase):
+class TestDocumentAdmin(DirectoriesMixin, TestCase):

    def setUp(self) -> None:
+        super(TestDocumentAdmin, self).setUp()
        self.doc_admin = DocumentAdmin(model=Document, admin_site=AdminSite())

    @mock.patch("documents.admin.index.add_or_update_document")
--- a/src/documents/tests/test_api.py
+++ b/src/documents/tests/test_api.py
@@ -114,8 +114,6 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        results = response.data['results']
        self.assertEqual(len(results[0]), 0)

-
-
    def test_document_actions(self):

        _, filename = tempfile.mkstemp(dir=self.dirs.originals_dir)
@@ -230,6 +228,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(len(results), 2)
        self.assertCountEqual([results[0]['id'], results[1]['id']], [doc1.id, doc3.id])

+        response = self.client.get("/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 2)
+        self.assertCountEqual([results[0]['id'], results[1]['id']], [doc2.id, doc3.id])
+
        response = self.client.get("/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id))
        self.assertEqual(response.status_code, 200)
        results = response.data['results']
@@ -455,6 +459,23 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertIsNone(kwargs['override_document_type_id'])
        self.assertIsNone(kwargs['override_tag_ids'])

+    @mock.patch("documents.views.async_task")
+    def test_upload_empty_metadata(self, m):
+
+        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
+            response = self.client.post("/api/documents/post_document/", {"document": f, "title": "", "correspondent": "", "document_type": ""})
+
+        self.assertEqual(response.status_code, 200)
+
+        m.assert_called_once()
+
+        args, kwargs = m.call_args
+        self.assertEqual(kwargs['override_filename'], "simple.pdf")
+        self.assertIsNone(kwargs['override_title'])
+        self.assertIsNone(kwargs['override_correspondent_id'])
+        self.assertIsNone(kwargs['override_document_type_id'])
+        self.assertIsNone(kwargs['override_tag_ids'])
+
    @mock.patch("documents.views.async_task")
    def test_upload_invalid_form(self, m):

@@ -908,6 +929,14 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
        doc2 = Document.objects.get(id=self.doc2.id)
        self.assertEqual(doc2.correspondent, self.c1)

+    def test_api_no_correspondent(self):
+        response = self.client.post("/api/documents/bulk_edit/", json.dumps({
+            "documents": [self.doc2.id],
+            "method": "set_correspondent",
+            "parameters": {}
+        }), content_type='application/json')
+        self.assertEqual(response.status_code, 400)
+
    def test_api_invalid_document_type(self):
        self.assertEqual(self.doc2.document_type, self.dt1)
        response = self.client.post("/api/documents/bulk_edit/", json.dumps({
@@ -920,6 +949,14 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
        doc2 = Document.objects.get(id=self.doc2.id)
        self.assertEqual(doc2.document_type, self.dt1)

+    def test_api_no_document_type(self):
+        response = self.client.post("/api/documents/bulk_edit/", json.dumps({
+            "documents": [self.doc2.id],
+            "method": "set_document_type",
+            "parameters": {}
+        }), content_type='application/json')
+        self.assertEqual(response.status_code, 400)
+
    def test_api_add_invalid_tag(self):
        self.assertEqual(list(self.doc2.tags.all()), [self.t1])
        response = self.client.post("/api/documents/bulk_edit/", json.dumps({
@@ -931,6 +968,14 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):

        self.assertEqual(list(self.doc2.tags.all()), [self.t1])

+    def test_api_add_tag_no_tag(self):
+        response = self.client.post("/api/documents/bulk_edit/", json.dumps({
+            "documents": [self.doc2.id],
+            "method": "add_tag",
+            "parameters": {}
+        }), content_type='application/json')
+        self.assertEqual(response.status_code, 400)
+
    def test_api_delete_invalid_tag(self):
        self.assertEqual(list(self.doc2.tags.all()), [self.t1])
        response = self.client.post("/api/documents/bulk_edit/", json.dumps({
@@ -942,6 +987,14 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):

        self.assertEqual(list(self.doc2.tags.all()), [self.t1])

+    def test_api_delete_tag_no_tag(self):
+        response = self.client.post("/api/documents/bulk_edit/", json.dumps({
+            "documents": [self.doc2.id],
+            "method": "remove_tag",
+            "parameters": {}
+        }), content_type='application/json')
+        self.assertEqual(response.status_code, 400)
+
    def test_api_modify_invalid_tags(self):
        self.assertEqual(list(self.doc2.tags.all()), [self.t1])
        response = self.client.post("/api/documents/bulk_edit/", json.dumps({
@@ -951,6 +1004,21 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
        }), content_type='application/json')
        self.assertEqual(response.status_code, 400)

+    def test_api_modify_tags_no_tags(self):
+        response = self.client.post("/api/documents/bulk_edit/", json.dumps({
+            "documents": [self.doc2.id],
+            "method": "modify_tags",
+            "parameters": {"remove_tags": [1123123]}
+        }), content_type='application/json')
+        self.assertEqual(response.status_code, 400)
+
+        response = self.client.post("/api/documents/bulk_edit/", json.dumps({
+            "documents": [self.doc2.id],
+            "method": "modify_tags",
+            "parameters": {'add_tags': [self.t2.id, 1657]}
+        }), content_type='application/json')
+        self.assertEqual(response.status_code, 400)
+
    def test_api_selection_data_empty(self):
        response = self.client.post("/api/documents/selection_data/", json.dumps({
            "documents": []
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -468,6 +468,42 @@ class TestConsumer(DirectoriesMixin, TestCase):
        self.assertTrue(os.path.isfile(dst))


+class PreConsumeTestCase(TestCase):
+
+    @mock.patch("documents.consumer.Popen")
+    @override_settings(PRE_CONSUME_SCRIPT=None)
+    def test_no_pre_consume_script(self, m):
+        c = Consumer()
+        c.path = "path-to-file"
+        c.run_pre_consume_script()
+        m.assert_not_called()
+
+    @mock.patch("documents.consumer.Popen")
+    @override_settings(PRE_CONSUME_SCRIPT="does-not-exist")
+    def test_pre_consume_script_not_found(self, m):
+        c = Consumer()
+        c.path = "path-to-file"
+        self.assertRaises(ConsumerError, c.run_pre_consume_script)
+
+    @mock.patch("documents.consumer.Popen")
+    def test_pre_consume_script(self, m):
+        with tempfile.NamedTemporaryFile() as script:
+            with override_settings(PRE_CONSUME_SCRIPT=script.name):
+                c = Consumer()
+                c.path = "path-to-file"
+                c.run_pre_consume_script()
+
+                m.assert_called_once()
+
+                args, kwargs = m.call_args
+
+                command = args[0]
+
+                self.assertEqual(command[0], script.name)
+                self.assertEqual(command[1], "path-to-file")
+
+
+
 class PostConsumeTestCase(TestCase):

    @mock.patch("documents.consumer.Popen")
@@ -483,36 +519,45 @@ class PostConsumeTestCase(TestCase):

        m.assert_not_called()

-    @mock.patch("documents.consumer.Popen")
-    @override_settings(POST_CONSUME_SCRIPT="script")
-    def test_post_consume_script_simple(self, m):
+
+    @override_settings(POST_CONSUME_SCRIPT="does-not-exist")
+    def test_post_consume_script_not_found(self):
        doc = Document.objects.create(title="Test", mime_type="application/pdf")

-        Consumer().run_post_consume_script(doc)
-
-        m.assert_called_once()
+        self.assertRaises(ConsumerError, Consumer().run_post_consume_script, doc)
+
+    @mock.patch("documents.consumer.Popen")
+    def test_post_consume_script_simple(self, m):
+        with tempfile.NamedTemporaryFile() as script:
+            with override_settings(POST_CONSUME_SCRIPT=script.name):
+                doc = Document.objects.create(title="Test", mime_type="application/pdf")
+
+                Consumer().run_post_consume_script(doc)
+
+                m.assert_called_once()

    @mock.patch("documents.consumer.Popen")
-    @override_settings(POST_CONSUME_SCRIPT="script")
    def test_post_consume_script_with_correspondent(self, m):
-        c = Correspondent.objects.create(name="my_bank")
-        doc = Document.objects.create(title="Test", mime_type="application/pdf", correspondent=c)
-        tag1 = Tag.objects.create(name="a")
-        tag2 = Tag.objects.create(name="b")
-        doc.tags.add(tag1)
-        doc.tags.add(tag2)
+        with tempfile.NamedTemporaryFile() as script:
+            with override_settings(POST_CONSUME_SCRIPT=script.name):
+                c = Correspondent.objects.create(name="my_bank")
+                doc = Document.objects.create(title="Test", mime_type="application/pdf", correspondent=c)
+                tag1 = Tag.objects.create(name="a")
+                tag2 = Tag.objects.create(name="b")
+                doc.tags.add(tag1)
+                doc.tags.add(tag2)

-        Consumer().run_post_consume_script(doc)
+                Consumer().run_post_consume_script(doc)

-        m.assert_called_once()
+                m.assert_called_once()

-        args, kwargs = m.call_args
+                args, kwargs = m.call_args

-        command = args[0]
+                command = args[0]

-        self.assertEqual(command[0], "script")
-        self.assertEqual(command[1], str(doc.pk))
-        self.assertEqual(command[5], f"/api/documents/{doc.pk}/download/")
-        self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/")
-        self.assertEqual(command[7], "my_bank")
-        self.assertCountEqual(command[8].split(","), ["a", "b"])
+                self.assertEqual(command[0], script.name)
+                self.assertEqual(command[1], str(doc.pk))
+                self.assertEqual(command[5], f"/api/documents/{doc.pk}/download/")
+                self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/")
+                self.assertEqual(command[7], "my_bank")
+                self.assertCountEqual(command[8].split(","), ["a", "b"])
--- a/src/documents/tests/test_date_parsing.py
+++ b/src/documents/tests/test_date_parsing.py
@@ -138,3 +138,18 @@ class TestDate(TestCase):
    @override_settings(FILENAME_DATE_ORDER="YMD")
    def test_filename_date_parse_invalid(self, *args):
        self.assertIsNone(parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"))
+
+    @override_settings(IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)))
+    def test_ignored_dates(self, *args):
+        text = (
+            "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem "
+            "ipsum"
+        )
+        date = parse_date("", text)
+        self.assertEqual(
+            date,
+            datetime.datetime(
+                2018, 2, 13, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
+        )
--- a/src/documents/tests/test_document_model.py
+++ b/src/documents/tests/test_document_model.py
@@ -1,10 +1,10 @@
 import shutil
 import tempfile
-from datetime import datetime
 from pathlib import Path
 from unittest import mock

 from django.test import TestCase, override_settings
+from django.utils import timezone

 from ..models import Document, Correspondent

@@ -47,20 +47,20 @@ class TestDocument(TestCase):

    def test_file_name(self):

-        doc = Document(mime_type="application/pdf", title="test", created=datetime(2020, 12, 25))
+        doc = Document(mime_type="application/pdf", title="test", created=timezone.datetime(2020, 12, 25))
        self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf")

    def test_file_name_jpg(self):

-        doc = Document(mime_type="image/jpeg", title="test", created=datetime(2020, 12, 25))
+        doc = Document(mime_type="image/jpeg", title="test", created=timezone.datetime(2020, 12, 25))
        self.assertEqual(doc.get_public_filename(), "2020-12-25 test.jpg")

    def test_file_name_unknown(self):

-        doc = Document(mime_type="application/zip", title="test", created=datetime(2020, 12, 25))
+        doc = Document(mime_type="application/zip", title="test", created=timezone.datetime(2020, 12, 25))
        self.assertEqual(doc.get_public_filename(), "2020-12-25 test.zip")

    def test_file_name_invalid_type(self):

-        doc = Document(mime_type="image/jpegasd", title="test", created=datetime(2020, 12, 25))
+        doc = Document(mime_type="image/jpegasd", title="test", created=timezone.datetime(2020, 12, 25))
        self.assertEqual(doc.get_public_filename(), "2020-12-25 test")
--- a/src/documents/tests/test_management.py
+++ b/src/documents/tests/test_management.py
@@ -70,18 +70,18 @@ class TestDecryptDocuments(TestCase):
            PASSPHRASE="test"
        ).enable()

-        doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg",  mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
+        doc = Document.objects.create(checksum="82186aaa94f0b98697d704b90fd1c072", title="wow", filename="0000004.pdf.gpg",  mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)

-        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg"))
-        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000004.pdf.gpg"), os.path.join(originals_dir, "0000004.pdf.gpg"))
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000004.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))

        call_command('decrypt_documents')

        doc.refresh_from_db()

        self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
-        self.assertEqual(doc.filename, "0000002.pdf")
-        self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf")))
+        self.assertEqual(doc.filename, "0000004.pdf")
+        self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000004.pdf")))
        self.assertTrue(os.path.isfile(doc.source_path))
        self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png")))
        self.assertTrue(os.path.isfile(doc.thumbnail_path))
--- a/src/documents/tests/test_management_exporter.py
+++ b/src/documents/tests/test_management_exporter.py
@@ -3,6 +3,8 @@ import json
 import os
 import shutil
 import tempfile
+from pathlib import Path
+from unittest import mock

 from django.core.management import call_command
 from django.test import TestCase, override_settings
@@ -10,54 +12,87 @@ from django.test import TestCase, override_settings
 from documents.management.commands import document_exporter
 from documents.models import Document, Tag, DocumentType, Correspondent
 from documents.sanity_checker import check_sanity
+from documents.settings import EXPORTER_FILE_NAME
 from documents.tests.utils import DirectoriesMixin, paperless_environment


 class TestExportImport(DirectoriesMixin, TestCase):

+    def setUp(self) -> None:
+        self.target = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.target)
+
+        self.d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow1", filename="0000001.pdf", mime_type="application/pdf")
+        self.d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow2", filename="0000002.pdf", mime_type="application/pdf")
+        self.d3 = Document.objects.create(content="Content", checksum="d38d7ed02e988e072caf924e0f3fcb76", title="wow2", filename="0000003.pdf", mime_type="application/pdf")
+        self.d4 = Document.objects.create(content="Content", checksum="82186aaa94f0b98697d704b90fd1c072", title="wow_dec", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
+
+        self.t1 = Tag.objects.create(name="t")
+        self.dt1 = DocumentType.objects.create(name="dt")
+        self.c1 = Correspondent.objects.create(name="c")
+
+        self.d1.tags.add(self.t1)
+        self.d1.correspondent = self.c1
+        self.d1.document_type = self.dt1
+        self.d1.save()
+        super(TestExportImport, self).setUp()
+
+    def _get_document_from_manifest(self, manifest, id):
+        f = list(filter(lambda d: d['model'] == "documents.document" and d['pk'] == id, manifest))
+        if len(f) == 1:
+            return f[0]
+        else:
+            raise ValueError(f"document with id {id} does not exist in manifest")
+
    @override_settings(
        PASSPHRASE="test"
    )
-    def test_exporter(self):
+    def _do_export(self, use_filename_format=False, compare_checksums=False, delete=False):
+        args = ['document_exporter', self.target]
+        if use_filename_format:
+            args += ["--use-filename-format"]
+        if compare_checksums:
+            args += ["--compare-checksums"]
+        if delete:
+            args += ["--delete"]
+
+        call_command(*args)
+
+        with open(os.path.join(self.target, "manifest.json")) as f:
+            manifest = json.load(f)
+
+        return manifest
+
+    def test_exporter(self, use_filename_format=False):
        shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
        shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))

-        file = os.path.join(self.dirs.originals_dir, "0000001.pdf")
+        manifest = self._do_export(use_filename_format=use_filename_format)

-        d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow", filename="0000001.pdf", mime_type="application/pdf")
-        d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
-        t1 = Tag.objects.create(name="t")
-        dt1 = DocumentType.objects.create(name="dt")
-        c1 = Correspondent.objects.create(name="c")
+        self.assertEqual(len(manifest), 7)
+        self.assertEqual(len(list(filter(lambda e: e['model'] == 'documents.document', manifest))), 4)

-        d1.tags.add(t1)
-        d1.correspondents = c1
-        d1.document_type = dt1
-        d1.save()
-        d2.save()
+        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))

-        target = tempfile.mkdtemp()
-        self.addCleanup(shutil.rmtree, target)
-
-        call_command('document_exporter', target)
-
-        with open(os.path.join(target, "manifest.json")) as f:
-            manifest = json.load(f)
-
-        self.assertEqual(len(manifest), 5)
+        self.assertEqual(self._get_document_from_manifest(manifest, self.d1.id)['fields']['title'], "wow1")
+        self.assertEqual(self._get_document_from_manifest(manifest, self.d2.id)['fields']['title'], "wow2")
+        self.assertEqual(self._get_document_from_manifest(manifest, self.d3.id)['fields']['title'], "wow2")
+        self.assertEqual(self._get_document_from_manifest(manifest, self.d4.id)['fields']['title'], "wow_dec")

        for element in manifest:
            if element['model'] == 'documents.document':
-                fname = os.path.join(target, element[document_exporter.EXPORTER_FILE_NAME])
+                fname = os.path.join(self.target, element[document_exporter.EXPORTER_FILE_NAME])
                self.assertTrue(os.path.exists(fname))
-                self.assertTrue(os.path.exists(os.path.join(target, element[document_exporter.EXPORTER_THUMBNAIL_NAME])))
+                self.assertTrue(os.path.exists(os.path.join(self.target, element[document_exporter.EXPORTER_THUMBNAIL_NAME])))

                with open(fname, "rb") as f:
                    checksum = hashlib.md5(f.read()).hexdigest()
                self.assertEqual(checksum, element['fields']['checksum'])

+                self.assertEqual(element['fields']['storage_type'], Document.STORAGE_TYPE_UNENCRYPTED)
+
                if document_exporter.EXPORTER_ARCHIVE_NAME in element:
-                    fname = os.path.join(target, element[document_exporter.EXPORTER_ARCHIVE_NAME])
+                    fname = os.path.join(self.target, element[document_exporter.EXPORTER_ARCHIVE_NAME])
                    self.assertTrue(os.path.exists(fname))

                    with open(fname, "rb") as f:
@@ -65,24 +100,123 @@ class TestExportImport(DirectoriesMixin, TestCase):
                    self.assertEqual(checksum, element['fields']['archive_checksum'])

        with paperless_environment() as dirs:
-            self.assertEqual(Document.objects.count(), 2)
+            self.assertEqual(Document.objects.count(), 4)
            Document.objects.all().delete()
            Correspondent.objects.all().delete()
            DocumentType.objects.all().delete()
            Tag.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)

-            call_command('document_importer', target)
-            self.assertEqual(Document.objects.count(), 2)
+            call_command('document_importer', self.target)
+            self.assertEqual(Document.objects.count(), 4)
+            self.assertEqual(Tag.objects.count(), 1)
+            self.assertEqual(Correspondent.objects.count(), 1)
+            self.assertEqual(DocumentType.objects.count(), 1)
+            self.assertEqual(Document.objects.get(id=self.d1.id).title, "wow1")
+            self.assertEqual(Document.objects.get(id=self.d2.id).title, "wow2")
+            self.assertEqual(Document.objects.get(id=self.d3.id).title, "wow2")
+            self.assertEqual(Document.objects.get(id=self.d4.id).title, "wow_dec")
            messages = check_sanity()
            # everything is alright after the test
            self.assertEqual(len(messages), 0, str([str(m) for m in messages]))

-    @override_settings(
-        PAPERLESS_FILENAME_FORMAT="{title}"
-    )
    def test_exporter_with_filename_format(self):
-        self.test_exporter()
+        shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
+        shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
+
+        with override_settings(PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}"):
+            self.test_exporter(use_filename_format=True)
+
+    def test_update_export_changed_time(self):
+        shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
+        shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
+
+        self._do_export()
+        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
+
+        st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
+
+        with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
+            self._do_export()
+            m.assert_not_called()
+
+        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
+        st_mtime_2 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
+
+        Path(self.d1.source_path).touch()
+
+        with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
+            self._do_export()
+            self.assertEqual(m.call_count, 1)
+
+        st_mtime_3 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
+        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
+
+        self.assertNotEqual(st_mtime_1, st_mtime_2)
+        self.assertNotEqual(st_mtime_2, st_mtime_3)
+
+    def test_update_export_changed_checksum(self):
+        shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
+        shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
+
+        self._do_export()
+
+        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
+
+        with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
+            self._do_export()
+            m.assert_not_called()
+
+        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
+
+        self.d2.checksum = "asdfasdgf3"
+        self.d2.save()
+
+        with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
+            self._do_export(compare_checksums=True)
+            self.assertEqual(m.call_count, 1)
+
+        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
+
+    def test_update_export_deleted_document(self):
+        shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
+        shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
+
+        manifest = self._do_export()
+
+        self.assertTrue(len(manifest), 7)
+        doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
+        self.assertTrue(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
+        self.d3.delete()
+
+        manifest = self._do_export()
+        self.assertRaises(ValueError, self._get_document_from_manifest, manifest, self.d3.id)
+        self.assertTrue(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
+
+        manifest = self._do_export(delete=True)
+        self.assertFalse(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
+
+        self.assertTrue(len(manifest), 6)
+
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{title}/{correspondent}")
+    def test_update_export_changed_location(self):
+        shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
+        shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
+
+        m = self._do_export(use_filename_format=True)
+        self.assertTrue(os.path.isfile(os.path.join(self.target, "wow1", "c.pdf")))
+
+        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
+
+        self.d1.title = "new_title"
+        self.d1.save()
+        self._do_export(use_filename_format=True, delete=True)
+        self.assertFalse(os.path.isfile(os.path.join(self.target, "wow1", "c.pdf")))
+        self.assertFalse(os.path.isdir(os.path.join(self.target, "wow1")))
+        self.assertTrue(os.path.isfile(os.path.join(self.target, "new_title", "c.pdf")))
+        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
+        self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf")))
+        self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")))

    def test_export_missing_files(self):

--- a/src/documents/tests/test_management_thumbnails.py
+++ b/src/documents/tests/test_management_thumbnails.py
@@ -0,0 +1,52 @@
+import os
+import shutil
+from unittest import mock
+
+from django.core.management import call_command
+from django.test import TestCase
+
+from documents.management.commands.document_thumbnails import _process_document
+from documents.models import Document, Tag, Correspondent, DocumentType
+from documents.tests.utils import DirectoriesMixin
+
+
+class TestMakeThumbnails(DirectoriesMixin, TestCase):
+
+    def make_models(self):
+        self.d1 = Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf", filename="test.pdf")
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), self.d1.source_path)
+
+        self.d2 = Document.objects.create(checksum="Ass", title="A", content="first document", mime_type="application/pdf", filename="test2.pdf")
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), self.d2.source_path)
+
+    def setUp(self) -> None:
+        super(TestMakeThumbnails, self).setUp()
+        self.make_models()
+
+    def test_process_document(self):
+        self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
+        _process_document(self.d1.id)
+        self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
+
+    @mock.patch("documents.management.commands.document_thumbnails.shutil.move")
+    def test_process_document_invalid_mime_type(self, m):
+        self.d1.mime_type = "asdasdasd"
+        self.d1.save()
+
+        _process_document(self.d1.id)
+
+        m.assert_not_called()
+
+    def test_command(self):
+        self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
+        self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
+        call_command('document_thumbnails')
+        self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
+        self.assertTrue(os.path.isfile(self.d2.thumbnail_path))
+
+    def test_command_documentid(self):
+        self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
+        self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
+        call_command('document_thumbnails', '-d', f"{self.d1.id}")
+        self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
+        self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
--- a/src/documents/tests/test_matchables.py
+++ b/src/documents/tests/test_matchables.py
@@ -21,13 +21,15 @@ class TestMatching(TestCase):
                matching_algorithm=getattr(klass, algorithm)
            )
            for string in true:
+                doc = Document(content=string)
                self.assertTrue(
-                    matching.matches(instance, string),
+                    matching.matches(instance, doc),
                    '"%s" should match "%s" but it does not' % (text, string)
                )
            for string in false:
+                doc = Document(content=string)
                self.assertFalse(
-                    matching.matches(instance, string),
+                    matching.matches(instance, doc),
                    '"%s" should not match "%s" but it does' % (text, string)
                )

@@ -169,7 +171,7 @@ class TestMatching(TestCase):
    def test_match_regex(self):

        self._test_matching(
-            r"alpha\w+gamma",
+            "alpha\w+gamma",
            "MATCH_REGEX",
            (
                "I have alpha_and_gamma in me",
@@ -187,6 +189,16 @@ class TestMatching(TestCase):
            )
        )

+    def test_tach_invalid_regex(self):
+        self._test_matching(
+            "[[",
+            "MATCH_REGEX",
+            [],
+            [
+                "Don't match this"
+            ]
+        )
+
    def test_match_fuzzy(self):

        self._test_matching(
--- a/src/documents/tests/test_migrations.py
+++ b/src/documents/tests/test_migrations.py
@@ -98,7 +98,7 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations):

        doc2 = Document.objects.create(checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG)
        self.doc2_id = doc2.id
-        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), source_path_before(doc2))
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000004.pdf.gpg"), source_path_before(doc2))

    def testMimeTypesMigrated(self):
        Document = self.apps.get_model('documents', 'Document')
--- a/src/documents/tests/test_parsers.py
+++ b/src/documents/tests/test_parsers.py
@@ -120,3 +120,4 @@ class TestParserAvailability(TestCase):

        self.assertTrue(is_file_ext_supported('.pdf'))
        self.assertFalse(is_file_ext_supported('.hsdfh'))
+        self.assertFalse(is_file_ext_supported(''))
--- a/src/documents/tests/test_settings.py
+++ b/src/documents/tests/test_settings.py
@@ -0,0 +1,34 @@
+import logging
+from unittest import mock
+
+from django.test import TestCase
+
+from paperless.settings import default_task_workers, default_threads_per_worker
+
+
+class TestSettings(TestCase):
+
+    @mock.patch("paperless.settings.multiprocessing.cpu_count")
+    def test_single_core(self, cpu_count):
+        cpu_count.return_value = 1
+
+        default_workers = default_task_workers()
+
+        default_threads = default_threads_per_worker(default_workers)
+
+        self.assertEqual(default_workers, 1)
+        self.assertEqual(default_threads, 1)
+
+    def test_workers_threads(self):
+        for i in range(2, 64):
+            with mock.patch("paperless.settings.multiprocessing.cpu_count") as cpu_count:
+                cpu_count.return_value = i
+
+                default_workers = default_task_workers()
+
+                default_threads = default_threads_per_worker(default_workers)
+
+                self.assertTrue(default_workers >= 1)
+                self.assertTrue(default_threads >= 1)
+
+                self.assertTrue(default_workers * default_threads < i, f"{i}")
--- a/src/documents/tests/test_views.py
+++ b/src/documents/tests/test_views.py
@@ -0,0 +1,30 @@
+from django.conf import settings
+from django.contrib.auth.models import User
+from django.test import TestCase
+
+
+class TestViews(TestCase):
+
+    def setUp(self) -> None:
+        self.user = User.objects.create_user("testuser")
+
+    def test_login_redirect(self):
+        response = self.client.get('/')
+        self.assertEqual(response.status_code, 302)
+        self.assertEqual(response.url, "/accounts/login/?next=/")
+
+    def test_index(self):
+        self.client.force_login(self.user)
+        for (language_given, language_actual) in [("", "en-US"), ("en-US", "en-US"), ("de", "de"), ("en", "en-US"), ("en-us", "en-US"), ("fr", "fr"), ("jp", "en-US")]:
+            if language_given:
+                self.client.cookies.load({settings.LANGUAGE_COOKIE_NAME: language_given})
+            elif settings.LANGUAGE_COOKIE_NAME in self.client.cookies.keys():
+                self.client.cookies.pop(settings.LANGUAGE_COOKIE_NAME)
+
+            response = self.client.get('/', )
+            self.assertEqual(response.status_code, 200)
+            self.assertEqual(response.context_data['webmanifest'], f"frontend/{language_actual}/manifest.webmanifest")
+            self.assertEqual(response.context_data['styles_css'], f"frontend/{language_actual}/styles.css")
+            self.assertEqual(response.context_data['runtime_js'], f"frontend/{language_actual}/runtime.js")
+            self.assertEqual(response.context_data['polyfills_js'], f"frontend/{language_actual}/polyfills.js")
+            self.assertEqual(response.context_data['main_js'], f"frontend/{language_actual}/main.js")
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -1,3 +1,4 @@
+import logging
 import os
 import tempfile
 from datetime import datetime
@@ -79,7 +80,7 @@ class IndexView(TemplateView):
        context['runtime_js'] = f"frontend/{self.get_language()}/runtime.js"
        context['polyfills_js'] = f"frontend/{self.get_language()}/polyfills.js"  # NOQA: E501
        context['main_js'] = f"frontend/{self.get_language()}/main.js"
-        context['manifest'] = f"frontend/{self.get_language()}/manifest.webmanifest"  # NOQA: E501
+        context['webmanifest'] = f"frontend/{self.get_language()}/manifest.webmanifest"  # NOQA: E501
        return context


@@ -158,6 +159,9 @@ class DocumentViewSet(RetrieveModelMixin,
        "added",
        "archive_serial_number")

+    def get_queryset(self):
+        return Document.objects.distinct()
+
    def get_serializer(self, *args, **kwargs):
        fields_param = self.request.query_params.get('fields', None)
        if fields_param:
@@ -458,12 +462,21 @@ class SearchView(APIView):
        self.ix = index.open_index()

    def add_infos_to_hit(self, r):
-        doc = Document.objects.get(id=r['id'])
+        try:
+            doc = Document.objects.get(id=r['id'])
+        except Document.DoesNotExist:
+            logging.getLogger(__name__).warning(
+                f"Search index returned a non-existing document: "
+                f"id: {r['id']}, title: {r['title']}. "
+                f"Search index needs reindex."
+            )
+            doc = None
+
        return {'id': r['id'],
-                'highlights': r.highlights("content", text=doc.content),
+                'highlights': r.highlights("content", text=doc.content) if doc else None,  # NOQA: E501
                'score': r.score,
                'rank': r.rank,
-                'document': DocumentSerializer(doc).data,
+                'document': DocumentSerializer(doc).data if doc else None,
                'title': r['title']
                }