Merge remote-tracking branch 'paperless/dev' into feature-consume-eml

2025-09-14 21:45:37 -05:00 · 2022-10-23 20:37:22 +02:00
parent 9d6b725fa1 a44dc23979
commit 20a0ba6e57
225 changed files with 19278 additions and 25141 deletions
--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -42,6 +42,7 @@ class DocumentAdmin(admin.ModelAdmin):
        "checksum",
        "archive_filename",
        "archive_checksum",
+        "original_filename",
    )

    list_display_links = ("title",)
--- a/src/documents/barcodes.py
+++ b/src/documents/barcodes.py
@@ -3,12 +3,16 @@ import os
 import shutil
 import tempfile
 from functools import lru_cache
-from typing import List  # for type hinting. Can be removed, if only Python >3.8 is used
+from typing import List
+from typing import Optional
+from typing import Tuple

 import magic
 from django.conf import settings
 from pdf2image import convert_from_path
+from pikepdf import Page
 from pikepdf import Pdf
+from pikepdf import PdfImage
 from PIL import Image
 from PIL import ImageSequence
 from pyzbar import pyzbar
@@ -16,6 +20,10 @@ from pyzbar import pyzbar
 logger = logging.getLogger("paperless.barcodes")


+class BarcodeImageFormatError(Exception):
+    pass
+
+
@lru_cache(maxsize=8)
 def supported_file_type(mime_type) -> bool:
    """
@@ -31,7 +39,7 @@ def supported_file_type(mime_type) -> bool:
    return mime_type in supported_mime


-def barcode_reader(image) -> List[str]:
+def barcode_reader(image: Image) -> List[str]:
    """
    Read any barcodes contained in image
    Returns a list containing all found barcodes
@@ -98,21 +106,66 @@ def convert_from_tiff_to_pdf(filepath: str) -> str:
    return newpath


-def scan_file_for_separating_barcodes(filepath: str) -> List[int]:
+def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], List[int]]:
    """
    Scan the provided pdf file for page separating barcodes
-    Returns a list of pagenumbers, which separate the file
+    Returns a PDF filepath and a list of pagenumbers,
+    which separate the file into new files
    """
+
+    def _pikepdf_barcode_scan(pdf_filepath: str):
+        with Pdf.open(pdf_filepath) as pdf:
+            for page_num, page in enumerate(pdf.pages):
+                for image_key in page.images:
+                    pdfimage = PdfImage(page.images[image_key])
+
+                    if "/CCITTFaxDecode" in pdfimage.filters:
+                        raise BarcodeImageFormatError()
+
+                    # Not all images can be transcoded to a PIL image, which
+                    # is what pyzbar expects to receive
+                    pillow_img = pdfimage.as_pil_image()
+
+                    detected_barcodes = barcode_reader(pillow_img)
+
+                    if settings.CONSUMER_BARCODE_STRING in detected_barcodes:
+                        separator_page_numbers.append(page_num)
+
+    def _pdf2image_barcode_scan(pdf_filepath: str):
+        # use a temporary directory in case the file os too big to handle in memory
+        with tempfile.TemporaryDirectory() as path:
+            pages_from_path = convert_from_path(pdf_filepath, output_folder=path)
+            for current_page_number, page in enumerate(pages_from_path):
+                current_barcodes = barcode_reader(page)
+                if settings.CONSUMER_BARCODE_STRING in current_barcodes:
+                    separator_page_numbers.append(current_page_number)
+
    separator_page_numbers = []
-    separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
-    # use a temporary directory in case the file os too big to handle in memory
-    with tempfile.TemporaryDirectory() as path:
-        pages_from_path = convert_from_path(filepath, output_folder=path)
-        for current_page_number, page in enumerate(pages_from_path):
-            current_barcodes = barcode_reader(page)
-            if separator_barcode in current_barcodes:
-                separator_page_numbers.append(current_page_number)
-    return separator_page_numbers
+    pdf_filepath = None
+
+    mime_type = get_file_mime_type(filepath)
+
+    if supported_file_type(mime_type):
+        pdf_filepath = filepath
+        if mime_type == "image/tiff":
+            pdf_filepath = convert_from_tiff_to_pdf(filepath)
+
+        try:
+            _pikepdf_barcode_scan(pdf_filepath)
+        except Exception as e:
+
+            logger.warning(
+                f"Exception using pikepdf for barcodes, falling back to pdf2image: {e}",
+            )
+            # Reset this incase pikepdf got part way through
+            separator_page_numbers = []
+            _pdf2image_barcode_scan(pdf_filepath)
+
+    else:
+        logger.warning(
+            f"Unsupported file format for barcode reader: {str(mime_type)}",
+        )
+    return pdf_filepath, separator_page_numbers


 def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:
@@ -122,47 +175,56 @@ def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:
    Returns a list of (temporary) filepaths to consume.
    These will need to be deleted later.
    """
+
+    document_paths = []
+
+    if not pages_to_split_on:
+        logger.warning("No pages to split on!")
+        return document_paths
+
    os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
    tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
    fname = os.path.splitext(os.path.basename(filepath))[0]
    pdf = Pdf.open(filepath)
-    document_paths = []
-    logger.debug(f"Temp dir is {str(tempdir)}")
-    if not pages_to_split_on:
-        logger.warning("No pages to split on!")
-    else:
-        # go from the first page to the first separator page
+
+    # A list of documents, ie a list of lists of pages
+    documents: List[List[Page]] = []
+    # A single document, ie a list of pages
+    document: List[Page] = []
+
+    for idx, page in enumerate(pdf.pages):
+        # Keep building the new PDF as long as it is not a
+        # separator index
+        if idx not in pages_to_split_on:
+            document.append(page)
+            # Make sure to append the very last document to the documents
+            if idx == (len(pdf.pages) - 1):
+                documents.append(document)
+                document = []
+        else:
+            # This is a split index, save the current PDF pages, and restart
+            # a new destination page listing
+            logger.debug(f"Starting new document at idx {idx}")
+            documents.append(document)
+            document = []
+
+    documents = [x for x in documents if len(x)]
+
+    logger.debug(f"Split into {len(documents)} new documents")
+
+    # Write the new documents out
+    for doc_idx, document in enumerate(documents):
        dst = Pdf.new()
-        for n, page in enumerate(pdf.pages):
-            if n < pages_to_split_on[0]:
-                dst.pages.append(page)
-        output_filename = f"{fname}_document_0.pdf"
+        dst.pages.extend(document)
+
+        output_filename = f"{fname}_document_{doc_idx}.pdf"
+
+        logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages")
        savepath = os.path.join(tempdir, output_filename)
        with open(savepath, "wb") as out:
            dst.save(out)
-        document_paths = [savepath]
+        document_paths.append(savepath)

-        # iterate through the rest of the document
-        for count, page_number in enumerate(pages_to_split_on):
-            logger.debug(f"Count: {str(count)} page_number: {str(page_number)}")
-            dst = Pdf.new()
-            try:
-                next_page = pages_to_split_on[count + 1]
-            except IndexError:
-                next_page = len(pdf.pages)
-            # skip the first page_number. This contains the barcode page
-            for page in range(page_number + 1, next_page):
-                logger.debug(
-                    f"page_number: {str(page_number)} next_page: {str(next_page)}",
-                )
-                dst.pages.append(pdf.pages[page])
-            output_filename = f"{fname}_document_{str(count + 1)}.pdf"
-            logger.debug(f"pdf no:{str(count)} has {str(len(dst.pages))} pages")
-            savepath = os.path.join(tempdir, output_filename)
-            with open(savepath, "wb") as out:
-                dst.save(out)
-            document_paths.append(savepath)
-    logger.debug(f"Temp files are {str(document_paths)}")
    return document_paths


--- a/src/documents/bulk_edit.py
+++ b/src/documents/bulk_edit.py
@@ -1,11 +1,12 @@
 import itertools

 from django.db.models import Q
-from django_q.tasks import async_task
 from documents.models import Correspondent
 from documents.models import Document
 from documents.models import DocumentType
 from documents.models import StoragePath
+from documents.tasks import bulk_update_documents
+from documents.tasks import update_document_archive_file


 def set_correspondent(doc_ids, correspondent):
@@ -16,7 +17,7 @@ def set_correspondent(doc_ids, correspondent):
    affected_docs = [doc.id for doc in qs]
    qs.update(correspondent=correspondent)

-    async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
+    bulk_update_documents.delay(document_ids=affected_docs)

    return "OK"

@@ -31,8 +32,7 @@ def set_storage_path(doc_ids, storage_path):
    affected_docs = [doc.id for doc in qs]
    qs.update(storage_path=storage_path)

-    async_task(
-        "documents.tasks.bulk_update_documents",
+    bulk_update_documents.delay(
        document_ids=affected_docs,
    )

@@ -47,7 +47,7 @@ def set_document_type(doc_ids, document_type):
    affected_docs = [doc.id for doc in qs]
    qs.update(document_type=document_type)

-    async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
+    bulk_update_documents.delay(document_ids=affected_docs)

    return "OK"

@@ -63,7 +63,7 @@ def add_tag(doc_ids, tag):
        [DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs],
    )

-    async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
+    bulk_update_documents.delay(document_ids=affected_docs)

    return "OK"

@@ -79,7 +79,7 @@ def remove_tag(doc_ids, tag):
        Q(document_id__in=affected_docs) & Q(tag_id=tag),
    ).delete()

-    async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
+    bulk_update_documents.delay(document_ids=affected_docs)

    return "OK"

@@ -103,7 +103,7 @@ def modify_tags(doc_ids, add_tags, remove_tags):
        ignore_conflicts=True,
    )

-    async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
+    bulk_update_documents.delay(document_ids=affected_docs)

    return "OK"

@@ -122,6 +122,9 @@ def delete(doc_ids):

 def redo_ocr(doc_ids):

-    async_task("documents.tasks.redo_ocr", document_ids=doc_ids)
+    for document_id in doc_ids:
+        update_document_archive_file.delay(
+            document_id=document_id,
+        )

    return "OK"
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -5,12 +5,15 @@ import pickle
 import re
 import shutil
 import warnings
+from typing import List
 from typing import Optional

 from django.conf import settings
 from documents.models import Document
 from documents.models import MatchingModel

+logger = logging.getLogger("paperless.classifier")
+

 class IncompatibleClassifierVersionError(Exception):
    pass
@@ -20,15 +23,6 @@ class ClassifierModelCorruptError(Exception):
    pass


-logger = logging.getLogger("paperless.classifier")
-
-
-def preprocess_content(content: str) -> str:
-    content = content.lower().strip()
-    content = re.sub(r"\s+", " ", content)
-    return content
-
-
 def load_classifier() -> Optional["DocumentClassifier"]:
    if not os.path.isfile(settings.MODEL_FILE):
        logger.debug(
@@ -81,6 +75,9 @@ class DocumentClassifier:
        self.document_type_classifier = None
        self.storage_path_classifier = None

+        self._stemmer = None
+        self._stop_words = None
+
    def load(self):
        # Catch warnings for processing
        with warnings.catch_warnings(record=True) as w:
@@ -101,8 +98,8 @@ class DocumentClassifier:
                        self.correspondent_classifier = pickle.load(f)
                        self.document_type_classifier = pickle.load(f)
                        self.storage_path_classifier = pickle.load(f)
-                    except Exception:
-                        raise ClassifierModelCorruptError()
+                    except Exception as err:
+                        raise ClassifierModelCorruptError() from err

            # Check for the warning about unpickling from differing versions
            # and consider it incompatible
@@ -139,11 +136,11 @@ class DocumentClassifier:

    def train(self):

-        data = list()
-        labels_tags = list()
-        labels_correspondent = list()
-        labels_document_type = list()
-        labels_storage_path = list()
+        data = []
+        labels_tags = []
+        labels_correspondent = []
+        labels_document_type = []
+        labels_storage_path = []

        # Step 1: Extract and preprocess training data from the database.
        logger.debug("Gathering data from database...")
@@ -151,7 +148,7 @@ class DocumentClassifier:
        for doc in Document.objects.order_by("pk").exclude(
            tags__is_inbox_tag=True,
        ):
-            preprocessed_content = preprocess_content(doc.content)
+            preprocessed_content = self.preprocess_content(doc.content)
            m.update(preprocessed_content.encode("utf-8"))
            data.append(preprocessed_content)

@@ -231,6 +228,11 @@ class DocumentClassifier:
        )
        data_vectorized = self.data_vectorizer.fit_transform(data)

+        # See the notes here:
+        # https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html  # noqa: 501
+        # This attribute isn't needed to function and can be large
+        self.data_vectorizer.stop_words_ = None
+
        # Step 3: train the classifiers
        if num_tags > 0:
            logger.debug("Training tags classifier...")
@@ -296,9 +298,52 @@ class DocumentClassifier:

        return True

+    def preprocess_content(self, content: str) -> str:
+        """
+        Process to contents of a document, distilling it down into
+        words which are meaningful to the content
+        """
+
+        # Lower case the document
+        content = content.lower().strip()
+        # Reduce spaces
+        content = re.sub(r"\s+", " ", content)
+        # Get only the letters
+        content = re.sub(r"[^\w\s]", " ", content)
+
+        # If the NLTK language is supported, do further processing
+        if settings.NLTK_LANGUAGE is not None and settings.NLTK_ENABLED:
+
+            import nltk
+
+            from nltk.tokenize import word_tokenize
+            from nltk.corpus import stopwords
+            from nltk.stem import SnowballStemmer
+
+            # Not really hacky, since it isn't private and is documented, but
+            # set the search path for NLTK data to the single location it should be in
+            nltk.data.path = [settings.NLTK_DIR]
+
+            # Do some one time setup
+            if self._stemmer is None:
+                self._stemmer = SnowballStemmer(settings.NLTK_LANGUAGE)
+            if self._stop_words is None:
+                self._stop_words = set(stopwords.words(settings.NLTK_LANGUAGE))
+
+            # Tokenize
+            words: List[str] = word_tokenize(content, language=settings.NLTK_LANGUAGE)
+            # Remove stop words
+            meaningful_words = [w for w in words if w not in self._stop_words]
+            # Stem words
+            meaningful_words = [self._stemmer.stem(w) for w in meaningful_words]
+
+            return " ".join(meaningful_words)
+
+        return content
+
    def predict_correspondent(self, content):
        if self.correspondent_classifier:
-            X = self.data_vectorizer.transform([preprocess_content(content)])
+            X = self.data_vectorizer.transform([self.preprocess_content(content)])
            correspondent_id = self.correspondent_classifier.predict(X)
            if correspondent_id != -1:
                return correspondent_id
@@ -309,7 +354,7 @@ class DocumentClassifier:

    def predict_document_type(self, content):
        if self.document_type_classifier:
-            X = self.data_vectorizer.transform([preprocess_content(content)])
+            X = self.data_vectorizer.transform([self.preprocess_content(content)])
            document_type_id = self.document_type_classifier.predict(X)
            if document_type_id != -1:
                return document_type_id
@@ -322,7 +367,7 @@ class DocumentClassifier:
        from sklearn.utils.multiclass import type_of_target

        if self.tags_classifier:
-            X = self.data_vectorizer.transform([preprocess_content(content)])
+            X = self.data_vectorizer.transform([self.preprocess_content(content)])
            y = self.tags_classifier.predict(X)
            tags_ids = self.tags_binarizer.inverse_transform(y)[0]
            if type_of_target(y).startswith("multilabel"):
@@ -341,7 +386,7 @@ class DocumentClassifier:

    def predict_storage_path(self, content):
        if self.storage_path_classifier:
-            X = self.data_vectorizer.transform([preprocess_content(content)])
+            X = self.data_vectorizer.transform([self.preprocess_content(content)])
            storage_path_id = self.storage_path_classifier.predict(X)
            if storage_path_id != -1:
                return storage_path_id
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -78,10 +78,16 @@ class Consumer(LoggingMixin):
            {"type": "status_update", "data": payload},
        )

-    def _fail(self, message, log_message=None, exc_info=None):
+    def _fail(
+        self,
+        message,
+        log_message=None,
+        exc_info=None,
+        exception: Optional[Exception] = None,
+    ):
        self._send_progress(100, 100, "FAILED", message)
        self.log("error", log_message or message, exc_info=exc_info)
-        raise ConsumerError(f"{self.filename}: {log_message or message}")
+        raise ConsumerError(f"{self.filename}: {log_message or message}") from exception

    def __init__(self):
        super().__init__()
@@ -105,14 +111,16 @@ class Consumer(LoggingMixin):
    def pre_check_duplicate(self):
        with open(self.path, "rb") as f:
            checksum = hashlib.md5(f.read()).hexdigest()
-        if Document.objects.filter(
+        existing_doc = Document.objects.filter(
            Q(checksum=checksum) | Q(archive_checksum=checksum),
-        ).exists():
+        )
+        if existing_doc.exists():
            if settings.CONSUMER_DELETE_DUPLICATES:
                os.unlink(self.path)
            self._fail(
                MESSAGE_DOCUMENT_ALREADY_EXISTS,
-                f"Not consuming {self.filename}: It is a duplicate.",
+                f"Not consuming {self.filename}: It is a duplicate of"
+                f" {existing_doc.get().title} (#{existing_doc.get().pk})",
            )

    def pre_check_directories(self):
@@ -134,13 +142,25 @@ class Consumer(LoggingMixin):

        self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")

+        filepath_arg = os.path.normpath(self.path)
+
+        script_env = os.environ.copy()
+        script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg
+
        try:
-            Popen((settings.PRE_CONSUME_SCRIPT, self.path)).wait()
+            Popen(
+                (
+                    settings.PRE_CONSUME_SCRIPT,
+                    filepath_arg,
+                ),
+                env=script_env,
+            ).wait()
        except Exception as e:
            self._fail(
                MESSAGE_PRE_CONSUME_SCRIPT_ERROR,
                f"Error while executing pre-consume script: {e}",
                exc_info=True,
+                exception=e,
            )

    def run_post_consume_script(self, document):
@@ -159,6 +179,34 @@ class Consumer(LoggingMixin):
            f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}",
        )

+        script_env = os.environ.copy()
+
+        script_env["DOCUMENT_ID"] = str(document.pk)
+        script_env["DOCUMENT_CREATED"] = str(document.created)
+        script_env["DOCUMENT_MODIFIED"] = str(document.modified)
+        script_env["DOCUMENT_ADDED"] = str(document.added)
+        script_env["DOCUMENT_FILE_NAME"] = document.get_public_filename()
+        script_env["DOCUMENT_SOURCE_PATH"] = os.path.normpath(document.source_path)
+        script_env["DOCUMENT_ARCHIVE_PATH"] = os.path.normpath(
+            str(document.archive_path),
+        )
+        script_env["DOCUMENT_THUMBNAIL_PATH"] = os.path.normpath(
+            document.thumbnail_path,
+        )
+        script_env["DOCUMENT_DOWNLOAD_URL"] = reverse(
+            "document-download",
+            kwargs={"pk": document.pk},
+        )
+        script_env["DOCUMENT_THUMBNAIL_URL"] = reverse(
+            "document-thumb",
+            kwargs={"pk": document.pk},
+        )
+        script_env["DOCUMENT_CORRESPONDENT"] = str(document.correspondent)
+        script_env["DOCUMENT_TAGS"] = str(
+            ",".join(document.tags.all().values_list("name", flat=True)),
+        )
+        script_env["DOCUMENT_ORIGINAL_FILENAME"] = str(document.original_filename)
+
        try:
            Popen(
                (
@@ -172,12 +220,14 @@ class Consumer(LoggingMixin):
                    str(document.correspondent),
                    str(",".join(document.tags.all().values_list("name", flat=True))),
                ),
+                env=script_env,
            ).wait()
        except Exception as e:
            self._fail(
                MESSAGE_POST_CONSUME_SCRIPT_ERROR,
                f"Error while executing post-consume script: {e}",
                exc_info=True,
+                exception=e,
            )

    def try_consume_file(
@@ -292,6 +342,7 @@ class Consumer(LoggingMixin):
                str(e),
                f"Error while consuming document {self.filename}: {e}",
                exc_info=True,
+                exception=e,
            )

        # Prepare the document classifier.
@@ -376,6 +427,7 @@ class Consumer(LoggingMixin):
                f"The following error occurred while consuming "
                f"{self.filename}: {e}",
                exc_info=True,
+                exception=e,
            )
        finally:
            document_parser.cleanup()
@@ -426,6 +478,7 @@ class Consumer(LoggingMixin):
                created=create_date,
                modified=create_date,
                storage_type=storage_type,
+                original_filename=self.filename,
            )

        self.apply_overrides(document)
--- a/src/documents/management/commands/document_archiver.py
+++ b/src/documents/management/commands/document_archiver.py
@@ -1,85 +1,18 @@
-import hashlib
 import logging
 import multiprocessing
 import os
-import shutil
-import uuid

 import tqdm
 from django import db
 from django.conf import settings
 from django.core.management.base import BaseCommand
-from django.db import transaction
 from documents.models import Document
-from filelock import FileLock
-
-from ... import index
-from ...file_handling import create_source_path_directory
-from ...file_handling import generate_unique_filename
-from ...parsers import get_parser_class_for_mime_type
+from documents.tasks import update_document_archive_file


 logger = logging.getLogger("paperless.management.archiver")


-def handle_document(document_id):
-    document = Document.objects.get(id=document_id)
-
-    mime_type = document.mime_type
-
-    parser_class = get_parser_class_for_mime_type(mime_type)
-
-    if not parser_class:
-        logger.error(
-            f"No parser found for mime type {mime_type}, cannot "
-            f"archive document {document} (ID: {document_id})",
-        )
-        return
-
-    parser = parser_class(logging_group=uuid.uuid4())
-
-    try:
-        parser.parse(document.source_path, mime_type, document.get_public_filename())
-
-        thumbnail = parser.get_thumbnail(
-            document.source_path,
-            mime_type,
-            document.get_public_filename(),
-        )
-
-        if parser.get_archive_path():
-            with transaction.atomic():
-                with open(parser.get_archive_path(), "rb") as f:
-                    checksum = hashlib.md5(f.read()).hexdigest()
-                # I'm going to save first so that in case the file move
-                # fails, the database is rolled back.
-                # We also don't use save() since that triggers the filehandling
-                # logic, and we don't want that yet (file not yet in place)
-                document.archive_filename = generate_unique_filename(
-                    document,
-                    archive_filename=True,
-                )
-                Document.objects.filter(pk=document.pk).update(
-                    archive_checksum=checksum,
-                    content=parser.get_text(),
-                    archive_filename=document.archive_filename,
-                )
-                with FileLock(settings.MEDIA_LOCK):
-                    create_source_path_directory(document.archive_path)
-                    shutil.move(parser.get_archive_path(), document.archive_path)
-                    shutil.move(thumbnail, document.thumbnail_path)
-
-            with index.open_index_writer() as writer:
-                index.update_document(writer, document)
-
-    except Exception:
-        logger.exception(
-            f"Error while parsing document {document} " f"(ID: {document_id})",
-        )
-    finally:
-        parser.cleanup()
-
-
 class Command(BaseCommand):

    help = """
@@ -146,7 +79,7 @@ class Command(BaseCommand):
            with multiprocessing.Pool(processes=settings.TASK_WORKERS) as pool:
                list(
                    tqdm.tqdm(
-                        pool.imap_unordered(handle_document, document_ids),
+                        pool.imap_unordered(update_document_archive_file, document_ids),
                        total=len(document_ids),
                        disable=options["no_progress_bar"],
                    ),
--- a/src/documents/management/commands/document_consumer.py
+++ b/src/documents/management/commands/document_consumer.py
@@ -2,6 +2,7 @@ import logging
 import os
 from pathlib import Path
 from pathlib import PurePath
+from threading import Event
 from threading import Thread
 from time import monotonic
 from time import sleep
@@ -10,9 +11,9 @@ from typing import Final
 from django.conf import settings
 from django.core.management.base import BaseCommand
 from django.core.management.base import CommandError
-from django_q.tasks import async_task
 from documents.models import Tag
 from documents.parsers import is_file_ext_supported
+from documents.tasks import consume_file
 from watchdog.events import FileSystemEventHandler
 from watchdog.observers.polling import PollingObserver

@@ -91,11 +92,9 @@ def _consume(filepath):

    try:
        logger.info(f"Adding {filepath} to the task queue.")
-        async_task(
-            "documents.tasks.consume_file",
+        consume_file.delay(
            filepath,
            override_tag_ids=tag_ids if tag_ids else None,
-            task_name=os.path.basename(filepath)[:100],
        )
    except Exception:
        # Catch all so that the consumer won't crash.
@@ -148,9 +147,11 @@ class Command(BaseCommand):
    """

    # This is here primarily for the tests and is irrelevant in production.
-    stop_flag = False
-
-    observer = None
+    stop_flag = Event()
+    # Also only for testing, configures in one place the timeout used before checking
+    # the stop flag
+    testing_timeout_s: Final[float] = 0.5
+    testing_timeout_ms: Final[float] = testing_timeout_s * 1000.0

    def add_arguments(self, parser):
        parser.add_argument(
@@ -161,6 +162,16 @@ class Command(BaseCommand):
        )
        parser.add_argument("--oneshot", action="store_true", help="Run only once.")

+        # Only use during unit testing, will configure a timeout
+        # Leaving it unset or false and the consumer will exit when it
+        # receives SIGINT
+        parser.add_argument(
+            "--testing",
+            action="store_true",
+            help="Flag used only for unit testing",
+            default=False,
+        )
+
    def handle(self, *args, **options):
        directory = options["directory"]
        recursive = settings.CONSUMER_RECURSIVE
@@ -186,29 +197,40 @@ class Command(BaseCommand):
            return

        if settings.CONSUMER_POLLING == 0 and INotify:
-            self.handle_inotify(directory, recursive)
+            self.handle_inotify(directory, recursive, options["testing"])
        else:
-            self.handle_polling(directory, recursive)
+            self.handle_polling(directory, recursive, options["testing"])

        logger.debug("Consumer exiting.")

-    def handle_polling(self, directory, recursive):
+    def handle_polling(self, directory, recursive, is_testing: bool):
        logger.info(f"Polling directory for changes: {directory}")
-        self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
-        self.observer.schedule(Handler(), directory, recursive=recursive)
-        self.observer.start()
-        try:
-            while self.observer.is_alive():
-                self.observer.join(1)
-                if self.stop_flag:
-                    self.observer.stop()
-        except KeyboardInterrupt:
-            self.observer.stop()
-        self.observer.join()

-    def handle_inotify(self, directory, recursive):
+        timeout = None
+        if is_testing:
+            timeout = self.testing_timeout_s
+            logger.debug(f"Configuring timeout to {timeout}s")
+
+        observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
+        observer.schedule(Handler(), directory, recursive=recursive)
+        observer.start()
+        try:
+            while observer.is_alive():
+                observer.join(timeout)
+                if self.stop_flag.is_set():
+                    observer.stop()
+        except KeyboardInterrupt:
+            observer.stop()
+        observer.join()
+
+    def handle_inotify(self, directory, recursive, is_testing: bool):
        logger.info(f"Using inotify to watch directory for changes: {directory}")

+        timeout = None
+        if is_testing:
+            timeout = self.testing_timeout_ms
+            logger.debug(f"Configuring timeout to {timeout}ms")
+
        inotify = INotify()
        inotify_flags = flags.CLOSE_WRITE | flags.MOVED_TO
        if recursive:
@@ -216,14 +238,15 @@ class Command(BaseCommand):
        else:
            descriptor = inotify.add_watch(directory, inotify_flags)

-        try:
+        inotify_debounce: Final[float] = settings.CONSUMER_INOTIFY_DELAY

-            inotify_debounce: Final[float] = settings.CONSUMER_INOTIFY_DELAY
-            notified_files = {}
+        finished = False

-            while not self.stop_flag:
+        notified_files = {}

-                for event in inotify.read(timeout=1000):
+        while not finished:
+            try:
+                for event in inotify.read(timeout=timeout):
                    if recursive:
                        path = inotify.get_path(event.wd)
                    else:
@@ -256,8 +279,22 @@ class Command(BaseCommand):
                # These files are still waiting to hit the timeout
                notified_files = still_waiting

-        except KeyboardInterrupt:
-            pass
+                # If files are waiting, need to exit read() to check them
+                # Otherwise, go back to infinite sleep time, but only if not testing
+                if len(notified_files) > 0:
+                    timeout = inotify_debounce
+                elif is_testing:
+                    timeout = self.testing_timeout_ms
+                else:
+                    timeout = None
+
+                if self.stop_flag.is_set():
+                    logger.debug("Finishing because event is set")
+                    finished = True
+
+            except KeyboardInterrupt:
+                logger.info("Received SIGINT, stopping inotify")
+                finished = True

        inotify.rm_watch(descriptor)
        inotify.close()
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -12,11 +12,13 @@ from django.core import serializers
 from django.core.management.base import BaseCommand
 from django.core.management.base import CommandError
 from django.db import transaction
+from documents.models import Comment
 from documents.models import Correspondent
 from documents.models import Document
 from documents.models import DocumentType
 from documents.models import SavedView
 from documents.models import SavedViewFilterRule
+from documents.models import StoragePath
 from documents.models import Tag
 from documents.models import UiSettings
 from documents.settings import EXPORTER_ARCHIVE_NAME
@@ -113,8 +115,8 @@ class Command(BaseCommand):
                map(lambda f: os.path.abspath(os.path.join(root, f)), files),
            )

-        # 2. Create manifest, containing all correspondents, types, tags,
-        # documents and ui_settings
+        # 2. Create manifest, containing all correspondents, types, tags, storage paths
+        # comments, documents and ui_settings
        with transaction.atomic():
            manifest = json.loads(
                serializers.serialize("json", Correspondent.objects.all()),
@@ -126,6 +128,14 @@ class Command(BaseCommand):
                serializers.serialize("json", DocumentType.objects.all()),
            )

+            manifest += json.loads(
+                serializers.serialize("json", StoragePath.objects.all()),
+            )
+
+            manifest += json.loads(
+                serializers.serialize("json", Comment.objects.all()),
+            )
+
            documents = Document.objects.order_by("id")
            document_map = {d.pk: d for d in documents}
            document_manifest = json.loads(serializers.serialize("json", documents))
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -3,6 +3,7 @@ import logging
 import os
 import shutil
 from contextlib import contextmanager
+from pathlib import Path

 import tqdm
 from django.conf import settings
@@ -14,6 +15,7 @@ from django.core.serializers.base import DeserializationError
 from django.db.models.signals import m2m_changed
 from django.db.models.signals import post_save
 from documents.models import Document
+from documents.parsers import run_convert
 from documents.settings import EXPORTER_ARCHIVE_NAME
 from documents.settings import EXPORTER_FILE_NAME
 from documents.settings import EXPORTER_THUMBNAIL_NAME
@@ -192,7 +194,7 @@ class Command(BaseCommand):
            document_path = os.path.join(self.source, doc_file)

            thumb_file = record[EXPORTER_THUMBNAIL_NAME]
-            thumbnail_path = os.path.join(self.source, thumb_file)
+            thumbnail_path = Path(os.path.join(self.source, thumb_file)).resolve()

            if EXPORTER_ARCHIVE_NAME in record:
                archive_file = record[EXPORTER_ARCHIVE_NAME]
@@ -209,7 +211,20 @@ class Command(BaseCommand):
                create_source_path_directory(document.source_path)

                shutil.copy2(document_path, document.source_path)
-                shutil.copy2(thumbnail_path, document.thumbnail_path)
+
+                if thumbnail_path.suffix in {".png", ".PNG"}:
+                    run_convert(
+                        density=300,
+                        scale="500x5000>",
+                        alpha="remove",
+                        strip=True,
+                        trim=False,
+                        auto_orient=True,
+                        input_file=f"{thumbnail_path}[0]",
+                        output_file=str(document.thumbnail_path),
+                    )
+                else:
+                    shutil.copy2(thumbnail_path, document.thumbnail_path)
                if archive_path:
                    create_source_path_directory(document.archive_path)
                    # TODO: this assumes that the export is valid and
--- a/src/documents/management/commands/document_redo_ocr.py
+++ b/src/documents/management/commands/document_redo_ocr.py
@@ -1,35 +0,0 @@
-import tqdm
-from django.core.management.base import BaseCommand
-from documents.tasks import redo_ocr
-
-
-class Command(BaseCommand):
-
-    help = """
-        This will rename all documents to match the latest filename format.
-    """.replace(
-        "    ",
-        "",
-    )
-
-    def add_arguments(self, parser):
-
-        parser.add_argument(
-            "--no-progress-bar",
-            default=False,
-            action="store_true",
-            help="If set, the progress bar will not be shown",
-        )
-
-        parser.add_argument(
-            "documents",
-            nargs="+",
-            help="Document primary keys for re-processing OCR on",
-        )
-
-    def handle(self, *args, **options):
-        doc_pks = tqdm.tqdm(
-            options["documents"],
-            disable=options["no_progress_bar"],
-        )
-        redo_ocr(doc_pks)
--- a/src/documents/management/commands/document_retagger.py
+++ b/src/documents/management/commands/document_retagger.py
@@ -7,6 +7,7 @@ from documents.models import Document

 from ...signals.handlers import set_correspondent
 from ...signals.handlers import set_document_type
+from ...signals.handlers import set_storage_path
 from ...signals.handlers import set_tags


@@ -29,6 +30,7 @@ class Command(BaseCommand):
        parser.add_argument("-c", "--correspondent", default=False, action="store_true")
        parser.add_argument("-T", "--tags", default=False, action="store_true")
        parser.add_argument("-t", "--document_type", default=False, action="store_true")
+        parser.add_argument("-s", "--storage_path", default=False, action="store_true")
        parser.add_argument("-i", "--inbox-only", default=False, action="store_true")
        parser.add_argument(
            "--use-first",
@@ -112,3 +114,14 @@ class Command(BaseCommand):
                    base_url=options["base_url"],
                    color=color,
                )
+            if options["storage_path"]:
+                set_storage_path(
+                    sender=None,
+                    document=document,
+                    classifier=classifier,
+                    replace=options["overwrite"],
+                    use_first=options["use_first"],
+                    suggest=options["suggest"],
+                    base_url=options["base_url"],
+                    color=color,
+                )
--- a/src/documents/migrations/1001_auto_20201109_1636.py
+++ b/src/documents/migrations/1001_auto_20201109_1636.py
@@ -1,34 +1,14 @@
 # Generated by Django 3.1.3 on 2020-11-09 16:36

 from django.db import migrations
-from django.db.migrations import RunPython
-from django_q.models import Schedule
-from django_q.tasks import schedule
-
-
-def add_schedules(apps, schema_editor):
-    schedule(
-        "documents.tasks.train_classifier",
-        name="Train the classifier",
-        schedule_type=Schedule.HOURLY,
-    )
-    schedule(
-        "documents.tasks.index_optimize",
-        name="Optimize the index",
-        schedule_type=Schedule.DAILY,
-    )
-
-
-def remove_schedules(apps, schema_editor):
-    Schedule.objects.filter(func="documents.tasks.train_classifier").delete()
-    Schedule.objects.filter(func="documents.tasks.index_optimize").delete()


 class Migration(migrations.Migration):

    dependencies = [
        ("documents", "1000_update_paperless_all"),
-        ("django_q", "0013_task_attempt_count"),
    ]

-    operations = [RunPython(add_schedules, remove_schedules)]
+    operations = [
+        migrations.RunPython(migrations.RunPython.noop, migrations.RunPython.noop)
+    ]
--- a/src/documents/migrations/1004_sanity_check_schedule.py
+++ b/src/documents/migrations/1004_sanity_check_schedule.py
@@ -2,27 +2,12 @@

 from django.db import migrations
 from django.db.migrations import RunPython
-from django_q.models import Schedule
-from django_q.tasks import schedule
-
-
-def add_schedules(apps, schema_editor):
-    schedule(
-        "documents.tasks.sanity_check",
-        name="Perform sanity check",
-        schedule_type=Schedule.WEEKLY,
-    )
-
-
-def remove_schedules(apps, schema_editor):
-    Schedule.objects.filter(func="documents.tasks.sanity_check").delete()


 class Migration(migrations.Migration):

    dependencies = [
        ("documents", "1003_mime_types"),
-        ("django_q", "0013_task_attempt_count"),
    ]

-    operations = [RunPython(add_schedules, remove_schedules)]
+    operations = [RunPython(migrations.RunPython.noop, migrations.RunPython.noop)]
--- a/src/documents/migrations/1022_paperlesstask.py
+++ b/src/documents/migrations/1022_paperlesstask.py
@@ -4,28 +4,9 @@ from django.db import migrations, models
 import django.db.models.deletion


-def init_paperless_tasks(apps, schema_editor):
-    PaperlessTask = apps.get_model("documents", "PaperlessTask")
-    Task = apps.get_model("django_q", "Task")
-
-    for task in Task.objects.filter(func="documents.tasks.consume_file"):
-        if not hasattr(task, "paperlesstask"):
-            paperlesstask = PaperlessTask.objects.create(
-                attempted_task=task,
-                task_id=task.id,
-                name=task.name,
-                created=task.started,
-                started=task.started,
-                acknowledged=True,
-            )
-            task.paperlesstask = paperlesstask
-            task.save()
-
-
 class Migration(migrations.Migration):

    dependencies = [
-        ("django_q", "0014_schedule_cluster"),
        ("documents", "1021_webp_thumbnail_conversion"),
    ]

@@ -60,10 +41,12 @@ class Migration(migrations.Migration):
                        null=True,
                        on_delete=django.db.models.deletion.CASCADE,
                        related_name="attempted_task",
-                        to="django_q.task",
+                        # This is a dummy field, 1026 will fix up the column
+                        # This manual change is required, as django doesn't django doesn't really support
+                        # removing an app which has migration deps like this
+                        to="documents.document",
                    ),
                ),
            ],
-        ),
-        migrations.RunPython(init_paperless_tasks, migrations.RunPython.noop),
+        )
    ]
--- a/src/documents/migrations/1023_add_comments.py
+++ b/src/documents/migrations/1023_add_comments.py
@@ -0,0 +1,69 @@
+from django.db import migrations, models
+import django.utils.timezone
+from django.conf import settings
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("documents", "1022_paperlesstask"),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="Comment",
+            fields=[
+                (
+                    "id",
+                    models.AutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "comment",
+                    models.TextField(
+                        blank=True,
+                        help_text="Comment for the document",
+                        verbose_name="content",
+                    ),
+                ),
+                (
+                    "created",
+                    models.DateTimeField(
+                        db_index=True,
+                        default=django.utils.timezone.now,
+                        verbose_name="created",
+                    ),
+                ),
+                (
+                    "document",
+                    models.ForeignKey(
+                        blank=True,
+                        null=True,
+                        on_delete=django.db.models.deletion.CASCADE,
+                        related_name="documents",
+                        to="documents.document",
+                        verbose_name="document",
+                    ),
+                ),
+                (
+                    "user",
+                    models.ForeignKey(
+                        blank=True,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="users",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="user",
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "comment",
+                "verbose_name_plural": "comments",
+                "ordering": ("created",),
+            },
+        ),
+    ]
--- a/src/documents/migrations/1024_document_original_filename.py
+++ b/src/documents/migrations/1024_document_original_filename.py
@@ -0,0 +1,25 @@
+# Generated by Django 4.0.6 on 2022-07-25 06:34
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("documents", "1023_add_comments"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="document",
+            name="original_filename",
+            field=models.CharField(
+                default=None,
+                editable=False,
+                help_text="The original name of the file when it was uploaded",
+                max_length=1024,
+                null=True,
+                verbose_name="original filename",
+            ),
+        ),
+    ]
--- a/src/documents/migrations/1025_alter_savedviewfilterrule_rule_type.py
+++ b/src/documents/migrations/1025_alter_savedviewfilterrule_rule_type.py
@@ -0,0 +1,48 @@
+# Generated by Django 4.0.5 on 2022-08-26 16:49
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("documents", "1024_document_original_filename"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="savedviewfilterrule",
+            name="rule_type",
+            field=models.PositiveIntegerField(
+                choices=[
+                    (0, "title contains"),
+                    (1, "content contains"),
+                    (2, "ASN is"),
+                    (3, "correspondent is"),
+                    (4, "document type is"),
+                    (5, "is in inbox"),
+                    (6, "has tag"),
+                    (7, "has any tag"),
+                    (8, "created before"),
+                    (9, "created after"),
+                    (10, "created year is"),
+                    (11, "created month is"),
+                    (12, "created day is"),
+                    (13, "added before"),
+                    (14, "added after"),
+                    (15, "modified before"),
+                    (16, "modified after"),
+                    (17, "does not have tag"),
+                    (18, "does not have ASN"),
+                    (19, "title or content contains"),
+                    (20, "fulltext query"),
+                    (21, "more like this"),
+                    (22, "has tags in"),
+                    (23, "ASN greater than"),
+                    (24, "ASN less than"),
+                    (25, "storage path is"),
+                ],
+                verbose_name="rule type",
+            ),
+        ),
+    ]
--- a/src/documents/migrations/1026_transition_to_celery.py
+++ b/src/documents/migrations/1026_transition_to_celery.py
@@ -0,0 +1,57 @@
+# Generated by Django 4.1.1 on 2022-09-27 19:31
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("django_celery_results", "0011_taskresult_periodic_task_name"),
+        ("documents", "1025_alter_savedviewfilterrule_rule_type"),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="paperlesstask",
+            name="created",
+        ),
+        migrations.RemoveField(
+            model_name="paperlesstask",
+            name="name",
+        ),
+        migrations.RemoveField(
+            model_name="paperlesstask",
+            name="started",
+        ),
+        # Remove the field from the model
+        migrations.RemoveField(
+            model_name="paperlesstask",
+            name="attempted_task",
+        ),
+        # Add the field back, pointing to the correct model
+        # This resolves a problem where the temporary change in 1022
+        # results in a type mismatch
+        migrations.AddField(
+            model_name="paperlesstask",
+            name="attempted_task",
+            field=models.OneToOneField(
+                blank=True,
+                null=True,
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name="attempted_task",
+                to="django_celery_results.taskresult",
+            ),
+        ),
+        # Drop the django-q tables entirely
+        # Must be done last or there could be references here
+        migrations.RunSQL(
+            "DROP TABLE IF EXISTS django_q_ormq", reverse_sql=migrations.RunSQL.noop
+        ),
+        migrations.RunSQL(
+            "DROP TABLE IF EXISTS django_q_schedule", reverse_sql=migrations.RunSQL.noop
+        ),
+        migrations.RunSQL(
+            "DROP TABLE IF EXISTS django_q_task", reverse_sql=migrations.RunSQL.noop
+        ),
+    ]
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -12,7 +12,7 @@ from django.contrib.auth.models import User
 from django.db import models
 from django.utils import timezone
 from django.utils.translation import gettext_lazy as _
-from django_q.tasks import Task
+from django_celery_results.models import TaskResult
 from documents.parsers import get_default_file_extension


@@ -214,6 +214,16 @@ class Document(models.Model):
        help_text=_("Current archive filename in storage"),
    )

+    original_filename = models.CharField(
+        _("original filename"),
+        max_length=1024,
+        editable=False,
+        default=None,
+        unique=False,
+        null=True,
+        help_text=_("The original name of the file when it was uploaded"),
+    )
+
    archive_serial_number = models.IntegerField(
        _("archive serial number"),
        blank=True,
@@ -394,6 +404,9 @@ class SavedViewFilterRule(models.Model):
        (20, _("fulltext query")),
        (21, _("more like this")),
        (22, _("has tags in")),
+        (23, _("ASN greater than")),
+        (24, _("ASN less than")),
+        (25, _("storage path is")),
    ]

    saved_view = models.ForeignKey(
@@ -514,16 +527,53 @@ class UiSettings(models.Model):


 class PaperlessTask(models.Model):
-
    task_id = models.CharField(max_length=128)
-    name = models.CharField(max_length=256)
-    created = models.DateTimeField(_("created"), auto_now=True)
-    started = models.DateTimeField(_("started"), null=True)
+    acknowledged = models.BooleanField(default=False)
+
    attempted_task = models.OneToOneField(
-        Task,
+        TaskResult,
        on_delete=models.CASCADE,
        related_name="attempted_task",
        null=True,
        blank=True,
    )
-    acknowledged = models.BooleanField(default=False)
+
+
+class Comment(models.Model):
+    comment = models.TextField(
+        _("content"),
+        blank=True,
+        help_text=_("Comment for the document"),
+    )
+
+    created = models.DateTimeField(
+        _("created"),
+        default=timezone.now,
+        db_index=True,
+    )
+
+    document = models.ForeignKey(
+        Document,
+        blank=True,
+        null=True,
+        related_name="documents",
+        on_delete=models.CASCADE,
+        verbose_name=_("document"),
+    )
+
+    user = models.ForeignKey(
+        User,
+        blank=True,
+        null=True,
+        related_name="users",
+        on_delete=models.SET_NULL,
+        verbose_name=_("user"),
+    )
+
+    class Meta:
+        ordering = ("created",)
+        verbose_name = _("comment")
+        verbose_name_plural = _("comments")
+
+    def __str__(self):
+        return self.content
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -6,6 +6,8 @@ import re
 import shutil
 import subprocess
 import tempfile
+from typing import Iterator
+from typing import Match
 from typing import Optional
 from typing import Set

@@ -216,6 +218,10 @@ def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None) -> str:


 def parse_date(filename, text) -> Optional[datetime.datetime]:
+    return next(parse_date_generator(filename, text), None)
+
+
+def parse_date_generator(filename, text) -> Iterator[datetime.datetime]:
    """
    Returns the date of the document.
    """
@@ -246,38 +252,32 @@ def parse_date(filename, text) -> Optional[datetime.datetime]:
            return date
        return None

-    date = None
+    def __process_match(
+        match: Match[str],
+        date_order: str,
+    ) -> Optional[datetime.datetime]:
+        date_string = match.group(0)
+
+        try:
+            date = __parser(date_string, date_order)
+        except (TypeError, ValueError):
+            # Skip all matches that do not parse to a proper date
+            date = None
+
+        return __filter(date)
+
+    def __process_content(content: str, date_order: str) -> Iterator[datetime.datetime]:
+        for m in re.finditer(DATE_REGEX, content):
+            date = __process_match(m, date_order)
+            if date is not None:
+                yield date

    # if filename date parsing is enabled, search there first:
    if settings.FILENAME_DATE_ORDER:
-        for m in re.finditer(DATE_REGEX, filename):
-            date_string = m.group(0)
-
-            try:
-                date = __parser(date_string, settings.FILENAME_DATE_ORDER)
-            except (TypeError, ValueError):
-                # Skip all matches that do not parse to a proper date
-                continue
-
-            date = __filter(date)
-            if date is not None:
-                return date
+        yield from __process_content(filename, settings.FILENAME_DATE_ORDER)

    # Iterate through all regex matches in text and try to parse the date
-    for m in re.finditer(DATE_REGEX, text):
-        date_string = m.group(0)
-
-        try:
-            date = __parser(date_string, settings.DATE_ORDER)
-        except (TypeError, ValueError):
-            # Skip all matches that do not parse to a proper date
-            continue
-
-        date = __filter(date)
-        if date is not None:
-            return date
-
-    return date
+    yield from __process_content(text, settings.DATE_ORDER)


 class ParseError(Exception):
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -1,6 +1,14 @@
 import datetime
 import math
 import re
+from ast import literal_eval
+from asyncio.log import logger
+from pathlib import Path
+from typing import Dict
+from typing import Optional
+from typing import Tuple
+
+from celery import states

 try:
    import zoneinfo
@@ -18,12 +26,12 @@ from .models import Correspondent
 from .models import Document
 from .models import DocumentType
 from .models import MatchingModel
-from .models import PaperlessTask
 from .models import SavedView
 from .models import SavedViewFilterRule
 from .models import StoragePath
 from .models import Tag
 from .models import UiSettings
+from .models import PaperlessTask
 from .parsers import is_mime_type_supported


@@ -240,7 +248,8 @@ class DocumentSerializer(DynamicFieldsModelSerializer):
            )
            instance.created = new_datetime
            instance.save()
-        validated_data.pop("created_date")
+        if "created_date" in validated_data:
+            validated_data.pop("created_date")
        super().update(instance, validated_data)
        return instance

@@ -607,6 +616,15 @@ class UiSettingsViewSerializer(serializers.ModelSerializer):
            "settings",
        ]

+    def validate_settings(self, settings):
+        # we never save update checking backend setting
+        if "update_checking" in settings:
+            try:
+                settings["update_checking"].pop("backend_setting")
+            except KeyError:
+                pass
+        return settings
+
    def create(self, validated_data):
        ui_settings = UiSettings.objects.update_or_create(
            user=validated_data.get("user"),
@@ -619,7 +637,19 @@ class TasksViewSerializer(serializers.ModelSerializer):
    class Meta:
        model = PaperlessTask
        depth = 1
-        fields = "__all__"
+        fields = (
+            "id",
+            "task_id",
+            "date_created",
+            "date_done",
+            "type",
+            "status",
+            "result",
+            "acknowledged",
+            "task_name",
+            "name",
+            "related_document",
+        )

    type = serializers.SerializerMethodField()

@@ -631,24 +661,108 @@ class TasksViewSerializer(serializers.ModelSerializer):

    def get_result(self, obj):
        result = ""
-        if hasattr(obj, "attempted_task") and obj.attempted_task:
-            result = obj.attempted_task.result
+        if (
+            hasattr(obj, "attempted_task")
+            and obj.attempted_task
+            and obj.attempted_task.result
+        ):
+            try:
+                result: str = obj.attempted_task.result
+                if "exc_message" in result:
+                    # This is a dict in this case
+                    result: Dict = literal_eval(result)
+                    # This is a list, grab the first item (most recent)
+                    result = result["exc_message"][0]
+            except Exception as e:  # pragma: no cover
+                # Extra security if something is malformed
+                logger.warn(f"Error getting task result: {e}", exc_info=True)
        return result

    status = serializers.SerializerMethodField()

    def get_status(self, obj):
-        if obj.attempted_task is None:
-            if obj.started:
-                return "started"
-            else:
-                return "queued"
-        elif obj.attempted_task.success:
-            return "complete"
-        elif not obj.attempted_task.success:
-            return "failed"
-        else:
-            return "unknown"
+        result = "unknown"
+        if hasattr(obj, "attempted_task") and obj.attempted_task:
+            result = obj.attempted_task.status
+        return result
+
+    date_created = serializers.SerializerMethodField()
+
+    def get_date_created(self, obj):
+        result = ""
+        if hasattr(obj, "attempted_task") and obj.attempted_task:
+            result = obj.attempted_task.date_created
+        return result
+
+    date_done = serializers.SerializerMethodField()
+
+    def get_date_done(self, obj):
+        result = ""
+        if hasattr(obj, "attempted_task") and obj.attempted_task:
+            result = obj.attempted_task.date_done
+        return result
+
+    task_id = serializers.SerializerMethodField()
+
+    def get_task_id(self, obj):
+        result = ""
+        if hasattr(obj, "attempted_task") and obj.attempted_task:
+            result = obj.attempted_task.task_id
+        return result
+
+    task_name = serializers.SerializerMethodField()
+
+    def get_task_name(self, obj):
+        result = ""
+        if hasattr(obj, "attempted_task") and obj.attempted_task:
+            result = obj.attempted_task.task_name
+        return result
+
+    name = serializers.SerializerMethodField()
+
+    def get_name(self, obj):
+        result = ""
+        if hasattr(obj, "attempted_task") and obj.attempted_task:
+            try:
+                task_kwargs: Optional[str] = obj.attempted_task.task_kwargs
+                # Try the override filename first (this is a webui created task?)
+                if task_kwargs is not None:
+                    # It's a string, string of a dict.  Who knows why...
+                    kwargs = literal_eval(literal_eval(task_kwargs))
+                    if "override_filename" in kwargs:
+                        result = kwargs["override_filename"]
+
+                # Nothing was found, report the task first argument
+                if not len(result):
+                    # There are always some arguments to the consume
+                    task_args: Tuple = literal_eval(
+                        literal_eval(obj.attempted_task.task_args),
+                    )
+                    filepath = Path(task_args[0])
+                    result = filepath.name
+            except Exception as e:  # pragma: no cover
+                # Extra security if something is malformed
+                logger.warning(f"Error getting file name from task: {e}", exc_info=True)
+
+        return result
+
+    related_document = serializers.SerializerMethodField()
+
+    def get_related_document(self, obj):
+        result = ""
+        regexp = r"New document id (\d+) created"
+        if (
+            hasattr(obj, "attempted_task")
+            and obj.attempted_task
+            and obj.attempted_task.result
+            and obj.attempted_task.status == states.SUCCESS
+        ):
+            try:
+                result = re.search(regexp, obj.attempted_task.result).group(1)
+            except Exception:
+                pass
+
+        return result


 class AcknowledgeTasksViewSerializer(serializers.Serializer):
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -2,7 +2,6 @@ import logging
 import os
 import shutil

-import django_q
 from django.conf import settings
 from django.contrib.admin.models import ADDITION
 from django.contrib.admin.models import LogEntry
@@ -14,6 +13,7 @@ from django.db.models import Q
 from django.dispatch import receiver
 from django.utils import termcolors
 from django.utils import timezone
+from django_celery_results.models import TaskResult
 from filelock import FileLock

 from .. import matching
@@ -25,7 +25,6 @@ from ..models import MatchingModel
 from ..models import PaperlessTask
 from ..models import Tag

-
 logger = logging.getLogger("paperless.handlers")


@@ -291,7 +290,7 @@ def set_storage_path(
                    )
                    + f" [{document.pk}]",
                )
-            print(f"Sugest storage directory {selected}")
+            print(f"Suggest storage directory {selected}")
        else:
            logger.info(
                f"Assigning storage path {selected} to {document}",
@@ -503,34 +502,19 @@ def add_to_index(sender, document, **kwargs):
    index.add_or_update_document(document)


-@receiver(django_q.signals.pre_enqueue)
-def init_paperless_task(sender, task, **kwargs):
-    if task["func"] == "documents.tasks.consume_file":
-        paperless_task, created = PaperlessTask.objects.get_or_create(
-            task_id=task["id"],
-        )
-        paperless_task.name = task["name"]
-        paperless_task.created = task["started"]
-        paperless_task.save()
-
-
-@receiver(django_q.signals.pre_execute)
-def paperless_task_started(sender, task, **kwargs):
+@receiver(models.signals.post_save, sender=TaskResult)
+def update_paperless_task(sender, instance: TaskResult, **kwargs):
    try:
-        if task["func"] == "documents.tasks.consume_file":
-            paperless_task = PaperlessTask.objects.get(task_id=task["id"])
-            paperless_task.started = timezone.now()
-            paperless_task.save()
-    except PaperlessTask.DoesNotExist:
-        pass
-
-
-@receiver(models.signals.post_save, sender=django_q.models.Task)
-def update_paperless_task(sender, instance, **kwargs):
-    try:
-        if instance.func == "documents.tasks.consume_file":
-            paperless_task = PaperlessTask.objects.get(task_id=instance.id)
+        if instance.task_name == "documents.tasks.consume_file":
+            paperless_task, _ = PaperlessTask.objects.get_or_create(
+                task_id=instance.task_id,
+            )
+            paperless_task.name = instance.task_name
+            paperless_task.created = instance.date_created
+            paperless_task.completed = instance.date_done
            paperless_task.attempted_task = instance
            paperless_task.save()
-    except PaperlessTask.DoesNotExist:
-        pass
+    except Exception as e:
+        # Don't let an exception in the signal handlers prevent
+        # a document from being consumed.
+        logger.error(f"Creating PaperlessTask failed: {e}")
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -1,14 +1,17 @@
+import hashlib
 import logging
 import os
 import shutil
+import uuid
 from pathlib import Path
 from typing import Type

 import tqdm
 from asgiref.sync import async_to_sync
+from celery import shared_task
 from channels.layers import get_channel_layer
 from django.conf import settings
-from django.core.exceptions import ObjectDoesNotExist
+from django.db import transaction
 from django.db.models.signals import post_save
 from documents import barcodes
 from documents import index
@@ -17,6 +20,8 @@ from documents.classifier import DocumentClassifier
 from documents.classifier import load_classifier
 from documents.consumer import Consumer
 from documents.consumer import ConsumerError
+from documents.file_handling import create_source_path_directory
+from documents.file_handling import generate_unique_filename
 from documents.models import Correspondent
 from documents.models import Document
 from documents.models import DocumentType
@@ -24,14 +29,16 @@ from documents.models import StoragePath
 from documents.models import Tag
 from documents.parsers import DocumentParser
 from documents.parsers import get_parser_class_for_mime_type
-from documents.parsers import ParseError
 from documents.sanity_checker import SanityCheckFailedException
+from filelock import FileLock
+from redis.exceptions import ConnectionError
 from whoosh.writing import AsyncWriter


 logger = logging.getLogger("paperless.tasks")


+@shared_task
 def index_optimize():
    ix = index.open_index()
    writer = AsyncWriter(ix)
@@ -48,6 +55,7 @@ def index_reindex(progress_bar_disable=False):
            index.update_document(writer, document)


+@shared_task
 def train_classifier():
    if (
        not Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
@@ -76,6 +84,7 @@ def train_classifier():
        logger.warning("Classifier error: " + str(e))


+@shared_task
 def consume_file(
    path,
    override_filename=None,
@@ -87,32 +96,18 @@ def consume_file(
    override_created=None,
 ):

+    path = Path(path).resolve()
+
    # check for separators in current document
    if settings.CONSUMER_ENABLE_BARCODES:

-        mime_type = barcodes.get_file_mime_type(path)
+        pdf_filepath, separators = barcodes.scan_file_for_separating_barcodes(path)

-        if not barcodes.supported_file_type(mime_type):
-            # if not supported, skip this routine
-            logger.warning(
-                f"Unsupported file format for barcode reader: {str(mime_type)}",
+        if separators:
+            logger.debug(
+                f"Pages with separators found in: {str(path)}",
            )
-        else:
-            separators = []
-            document_list = []
-
-            if mime_type == "image/tiff":
-                file_to_process = barcodes.convert_from_tiff_to_pdf(path)
-            else:
-                file_to_process = path
-
-            separators = barcodes.scan_file_for_separating_barcodes(file_to_process)
-
-            if separators:
-                logger.debug(
-                    f"Pages with separators found in: {str(path)}",
-                )
-                document_list = barcodes.separate_pages(file_to_process, separators)
+            document_list = barcodes.separate_pages(pdf_filepath, separators)

            if document_list:
                for n, document in enumerate(document_list):
@@ -122,17 +117,31 @@ def consume_file(
                        newname = f"{str(n)}_" + override_filename
                    else:
                        newname = None
-                    barcodes.save_to_dir(document, newname=newname)

-                # if we got here, the document was successfully split
-                # and can safely be deleted
-                if mime_type == "image/tiff":
-                    # Remove the TIFF converted to PDF file
-                    logger.debug(f"Deleting file {file_to_process}")
-                    os.unlink(file_to_process)
-                # Remove the original file (new file is saved above)
-                logger.debug(f"Deleting file {path}")
-                os.unlink(path)
+                    # If the file is an upload, it's in the scratch directory
+                    # Move it to consume directory to be picked up
+                    # Otherwise, use the current parent to keep possible tags
+                    # from subdirectories
+                    try:
+                        # is_relative_to would be nicer, but new in 3.9
+                        _ = path.relative_to(settings.SCRATCH_DIR)
+                        save_to_dir = settings.CONSUMPTION_DIR
+                    except ValueError:
+                        save_to_dir = path.parent
+
+                    barcodes.save_to_dir(
+                        document,
+                        newname=newname,
+                        target_dir=save_to_dir,
+                    )
+
+                # Delete the PDF file which was split
+                os.remove(pdf_filepath)
+
+                # If the original was a TIFF, remove the original file as well
+                if str(pdf_filepath) != str(path):
+                    logger.debug(f"Deleting file {path}")
+                    os.unlink(path)

                # notify the sender, otherwise the progress bar
                # in the UI stays stuck
@@ -149,11 +158,8 @@ def consume_file(
                        "status_updates",
                        {"type": "status_update", "data": payload},
                    )
-                except OSError as e:
-                    logger.warning(
-                        "OSError. It could be, the broker cannot be reached.",
-                    )
-                    logger.warning(str(e))
+                except ConnectionError as e:
+                    logger.warning(f"ConnectionError on status send: {str(e)}")
                # consuming stops here, since the original document with
                # the barcodes has been split and will be consumed separately
                return "File successfully split"
@@ -179,6 +185,7 @@ def consume_file(
        )


+@shared_task
 def sanity_check():
    messages = sanity_checker.check_sanity()

@@ -194,6 +201,7 @@ def sanity_check():
        return "No issues detected."


+@shared_task
 def bulk_update_documents(document_ids):
    documents = Document.objects.filter(id__in=document_ids)

@@ -207,44 +215,63 @@ def bulk_update_documents(document_ids):
            index.update_document(writer, doc)


-def redo_ocr(document_ids):
-    all_docs = Document.objects.all()
+@shared_task
+def update_document_archive_file(document_id):
+    """
+    Re-creates the archive file of a document, including new OCR content and thumbnail
+    """
+    document = Document.objects.get(id=document_id)

-    for doc_pk in document_ids:
-        try:
-            logger.info(f"Parsing document {doc_pk}")
-            doc: Document = all_docs.get(pk=doc_pk)
-        except ObjectDoesNotExist:
-            logger.error(f"Document {doc_pk} does not exist")
-            continue
+    mime_type = document.mime_type

-        # Get the correct parser for this mime type
-        parser_class: Type[DocumentParser] = get_parser_class_for_mime_type(
-            doc.mime_type,
+    parser_class: Type[DocumentParser] = get_parser_class_for_mime_type(mime_type)
+
+    if not parser_class:
+        logger.error(
+            f"No parser found for mime type {mime_type}, cannot "
+            f"archive document {document} (ID: {document_id})",
        )
-        document_parser: DocumentParser = parser_class(
-            "redo-ocr",
+        return
+
+    parser: DocumentParser = parser_class(logging_group=uuid.uuid4())
+
+    try:
+        parser.parse(document.source_path, mime_type, document.get_public_filename())
+
+        thumbnail = parser.get_thumbnail(
+            document.source_path,
+            mime_type,
+            document.get_public_filename(),
        )

-        # Create a file path to copy the original file to for working on
-        temp_file = (Path(document_parser.tempdir) / Path("new-ocr-file")).resolve()
+        if parser.get_archive_path():
+            with transaction.atomic():
+                with open(parser.get_archive_path(), "rb") as f:
+                    checksum = hashlib.md5(f.read()).hexdigest()
+                # I'm going to save first so that in case the file move
+                # fails, the database is rolled back.
+                # We also don't use save() since that triggers the filehandling
+                # logic, and we don't want that yet (file not yet in place)
+                document.archive_filename = generate_unique_filename(
+                    document,
+                    archive_filename=True,
+                )
+                Document.objects.filter(pk=document.pk).update(
+                    archive_checksum=checksum,
+                    content=parser.get_text(),
+                    archive_filename=document.archive_filename,
+                )
+                with FileLock(settings.MEDIA_LOCK):
+                    create_source_path_directory(document.archive_path)
+                    shutil.move(parser.get_archive_path(), document.archive_path)
+                    shutil.move(thumbnail, document.thumbnail_path)

-        shutil.copy(doc.source_path, temp_file)
+            with index.open_index_writer() as writer:
+                index.update_document(writer, document)

-        try:
-            logger.info(
-                f"Using {type(document_parser).__name__} for document",
-            )
-            # Try to re-parse the document into text
-            document_parser.parse(str(temp_file), doc.mime_type)
-
-            doc.content = document_parser.get_text()
-            doc.save()
-            logger.info("Document OCR updated")
-
-        except ParseError as e:
-            logger.error(f"Error parsing document: {e}")
-        finally:
-            # Remove the file path if it was created
-            if temp_file.exists() and temp_file.is_file():
-                temp_file.unlink()
+    except Exception:
+        logger.exception(
+            f"Error while parsing document {document} " f"(ID: {document_id})",
+        )
+    finally:
+        parser.cleanup()
--- a/src/documents/tests/samples/barcodes/barcode-fax-image.pdf
+++ b/src/documents/tests/samples/barcodes/barcode-fax-image.pdf
--- a/src/documents/tests/samples/barcodes/patch-code-t-double.pdf
+++ b/src/documents/tests/samples/barcodes/patch-code-t-double.pdf
--- a/src/documents/tests/test_api.py
+++ b/src/documents/tests/test_api.py
@@ -10,6 +10,8 @@ import zipfile
 from unittest import mock
 from unittest.mock import MagicMock

+import celery
+
 try:
    import zoneinfo
 except ImportError:
@@ -20,7 +22,6 @@ from django.conf import settings
 from django.contrib.auth.models import User
 from django.test import override_settings
 from django.utils import timezone
-from django_q.models import Task
 from documents import bulk_edit
 from documents import index
 from documents.models import Correspondent
@@ -31,7 +32,8 @@ from documents.models import PaperlessTask
 from documents.models import SavedView
 from documents.models import StoragePath
 from documents.models import Tag
-from documents.models import UiSettings
+from django_celery_results.models import TaskResult
+from documents.models import Comment
 from documents.models import StoragePath
 from documents.tests.utils import DirectoriesMixin
 from paperless import version
@@ -789,7 +791,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.data["documents_inbox"], None)

-    @mock.patch("documents.views.async_task")
+    @mock.patch("documents.views.consume_file.delay")
    def test_upload(self, m):

        with open(
@@ -812,7 +814,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertIsNone(kwargs["override_document_type_id"])
        self.assertIsNone(kwargs["override_tag_ids"])

-    @mock.patch("documents.views.async_task")
+    @mock.patch("documents.views.consume_file.delay")
    def test_upload_empty_metadata(self, m):

        with open(
@@ -835,7 +837,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertIsNone(kwargs["override_document_type_id"])
        self.assertIsNone(kwargs["override_tag_ids"])

-    @mock.patch("documents.views.async_task")
+    @mock.patch("documents.views.consume_file.delay")
    def test_upload_invalid_form(self, m):

        with open(
@@ -849,7 +851,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, 400)
        m.assert_not_called()

-    @mock.patch("documents.views.async_task")
+    @mock.patch("documents.views.consume_file.delay")
    def test_upload_invalid_file(self, m):

        with open(
@@ -863,7 +865,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, 400)
        m.assert_not_called()

-    @mock.patch("documents.views.async_task")
+    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_title(self, async_task):
        with open(
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -881,7 +883,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

        self.assertEqual(kwargs["override_title"], "my custom title")

-    @mock.patch("documents.views.async_task")
+    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_correspondent(self, async_task):
        c = Correspondent.objects.create(name="test-corres")
        with open(
@@ -900,7 +902,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

        self.assertEqual(kwargs["override_correspondent_id"], c.id)

-    @mock.patch("documents.views.async_task")
+    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_invalid_correspondent(self, async_task):
        with open(
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -914,7 +916,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

        async_task.assert_not_called()

-    @mock.patch("documents.views.async_task")
+    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_document_type(self, async_task):
        dt = DocumentType.objects.create(name="invoice")
        with open(
@@ -933,7 +935,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

        self.assertEqual(kwargs["override_document_type_id"], dt.id)

-    @mock.patch("documents.views.async_task")
+    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_invalid_document_type(self, async_task):
        with open(
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -947,7 +949,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

        async_task.assert_not_called()

-    @mock.patch("documents.views.async_task")
+    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_tags(self, async_task):
        t1 = Tag.objects.create(name="tag1")
        t2 = Tag.objects.create(name="tag2")
@@ -967,7 +969,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

        self.assertCountEqual(kwargs["override_tag_ids"], [t1.id, t2.id])

-    @mock.patch("documents.views.async_task")
+    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_invalid_tags(self, async_task):
        t1 = Tag.objects.create(name="tag1")
        t2 = Tag.objects.create(name="tag2")
@@ -983,7 +985,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

        async_task.assert_not_called()

-    @mock.patch("documents.views.async_task")
+    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_created(self, async_task):
        created = datetime.datetime(
            2022,
@@ -1107,6 +1109,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
                "tags": [],
                "document_types": [],
                "storage_paths": [],
+                "dates": [],
            },
        )

@@ -1118,6 +1121,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
    @mock.patch("documents.views.match_document_types")
    @mock.patch("documents.views.match_tags")
    @mock.patch("documents.views.match_correspondents")
+    @override_settings(NUMBER_OF_SUGGESTED_DATES=10)
    def test_get_suggestions(
        self,
        match_correspondents,
@@ -1128,7 +1132,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        doc = Document.objects.create(
            title="test",
            mime_type="application/pdf",
-            content="this is an invoice!",
+            content="this is an invoice from 12.04.2022!",
        )

        match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
@@ -1144,6 +1148,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
                "tags": [56, 123],
                "document_types": [23],
                "storage_paths": [99, 77],
+                "dates": ["2022-04-12"],
            },
        )

@@ -1354,6 +1359,133 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
            1,
        )

+    def test_get_existing_comments(self):
+        """
+        GIVEN:
+            - A document with a single comment
+        WHEN:
+            - API reuqest for document comments is made
+        THEN:
+            - The associated comment is returned
+        """
+        doc = Document.objects.create(
+            title="test",
+            mime_type="application/pdf",
+            content="this is a document which will have comments!",
+        )
+        comment = Comment.objects.create(
+            comment="This is a comment.",
+            document=doc,
+            user=self.user,
+        )
+
+        response = self.client.get(
+            f"/api/documents/{doc.pk}/comments/",
+            format="json",
+        )
+
+        self.assertEqual(response.status_code, 200)
+
+        resp_data = response.json()
+
+        self.assertEqual(len(resp_data), 1)
+
+        resp_data = resp_data[0]
+        del resp_data["created"]
+
+        self.assertDictEqual(
+            resp_data,
+            {
+                "id": comment.id,
+                "comment": comment.comment,
+                "user": {
+                    "id": comment.user.id,
+                    "username": comment.user.username,
+                    "firstname": comment.user.first_name,
+                    "lastname": comment.user.last_name,
+                },
+            },
+        )
+
+    def test_create_comment(self):
+        """
+        GIVEN:
+            - Existing document
+        WHEN:
+            - API request is made to add a comment
+        THEN:
+            - Comment is created and associated with document
+        """
+        doc = Document.objects.create(
+            title="test",
+            mime_type="application/pdf",
+            content="this is a document which will have comments added",
+        )
+        resp = self.client.post(
+            f"/api/documents/{doc.pk}/comments/",
+            data={"comment": "this is a posted comment"},
+        )
+        self.assertEqual(resp.status_code, 200)
+
+        response = self.client.get(
+            f"/api/documents/{doc.pk}/comments/",
+            format="json",
+        )
+
+        self.assertEqual(response.status_code, 200)
+
+        resp_data = response.json()
+
+        self.assertEqual(len(resp_data), 1)
+
+        resp_data = resp_data[0]
+
+        self.assertEqual(resp_data["comment"], "this is a posted comment")
+
+    def test_delete_comment(self):
+        """
+        GIVEN:
+            - Existing document
+        WHEN:
+            - API request is made to add a comment
+        THEN:
+            - Comment is created and associated with document
+        """
+        doc = Document.objects.create(
+            title="test",
+            mime_type="application/pdf",
+            content="this is a document which will have comments!",
+        )
+        comment = Comment.objects.create(
+            comment="This is a comment.",
+            document=doc,
+            user=self.user,
+        )
+
+        response = self.client.delete(
+            f"/api/documents/{doc.pk}/comments/?id={comment.pk}",
+            format="json",
+        )
+
+        self.assertEqual(response.status_code, 200)
+
+        self.assertEqual(len(Comment.objects.all()), 0)
+
+    def test_get_comments_no_doc(self):
+        """
+        GIVEN:
+            - A request to get comments from a non-existent document
+        WHEN:
+            - API request for document comments is made
+        THEN:
+            - HTTP 404 is returned
+        """
+        response = self.client.get(
+            "/api/documents/500/comments/",
+            format="json",
+        )
+        self.assertEqual(response.status_code, 404)
+

 class TestDocumentApiV2(DirectoriesMixin, APITestCase):
    def setUp(self):
@@ -1450,7 +1582,11 @@ class TestApiUiSettings(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, 200)
        self.assertDictEqual(
            response.data["settings"],
-            {},
+            {
+                "update_checking": {
+                    "backend_setting": "default",
+                },
+            },
        )

    def test_api_set_ui_settings(self):
@@ -1484,7 +1620,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
        user = User.objects.create_superuser(username="temp_admin")
        self.client.force_authenticate(user=user)

-        patcher = mock.patch("documents.bulk_edit.async_task")
+        patcher = mock.patch("documents.bulk_edit.bulk_update_documents.delay")
        self.async_task = patcher.start()
        self.addCleanup(patcher.stop)
        self.c1 = Correspondent.objects.create(name="c1")
@@ -2411,38 +2547,6 @@ class TestApiRemoteVersion(DirectoriesMixin, APITestCase):
    def setUp(self):
        super().setUp()

-    def test_remote_version_default(self):
-        response = self.client.get(self.ENDPOINT)
-
-        self.assertEqual(response.status_code, 200)
-        self.assertDictEqual(
-            response.data,
-            {
-                "version": "0.0.0",
-                "update_available": False,
-                "feature_is_set": False,
-            },
-        )
-
-    @override_settings(
-        ENABLE_UPDATE_CHECK=False,
-    )
-    def test_remote_version_disabled(self):
-        response = self.client.get(self.ENDPOINT)
-
-        self.assertEqual(response.status_code, 200)
-        self.assertDictEqual(
-            response.data,
-            {
-                "version": "0.0.0",
-                "update_available": False,
-                "feature_is_set": True,
-            },
-        )
-
-    @override_settings(
-        ENABLE_UPDATE_CHECK=True,
-    )
    @mock.patch("urllib.request.urlopen")
    def test_remote_version_enabled_no_update_prefix(self, urlopen_mock):

@@ -2460,13 +2564,9 @@ class TestApiRemoteVersion(DirectoriesMixin, APITestCase):
            {
                "version": "1.6.0",
                "update_available": False,
-                "feature_is_set": True,
            },
        )

-    @override_settings(
-        ENABLE_UPDATE_CHECK=True,
-    )
    @mock.patch("urllib.request.urlopen")
    def test_remote_version_enabled_no_update_no_prefix(self, urlopen_mock):

@@ -2486,13 +2586,9 @@ class TestApiRemoteVersion(DirectoriesMixin, APITestCase):
            {
                "version": version.__full_version_str__,
                "update_available": False,
-                "feature_is_set": True,
            },
        )

-    @override_settings(
-        ENABLE_UPDATE_CHECK=True,
-    )
    @mock.patch("urllib.request.urlopen")
    def test_remote_version_enabled_update(self, urlopen_mock):

@@ -2519,13 +2615,9 @@ class TestApiRemoteVersion(DirectoriesMixin, APITestCase):
            {
                "version": new_version_str,
                "update_available": True,
-                "feature_is_set": True,
            },
        )

-    @override_settings(
-        ENABLE_UPDATE_CHECK=True,
-    )
    @mock.patch("urllib.request.urlopen")
    def test_remote_version_bad_json(self, urlopen_mock):

@@ -2543,13 +2635,9 @@ class TestApiRemoteVersion(DirectoriesMixin, APITestCase):
            {
                "version": "0.0.0",
                "update_available": False,
-                "feature_is_set": True,
            },
        )

-    @override_settings(
-        ENABLE_UPDATE_CHECK=True,
-    )
    @mock.patch("urllib.request.urlopen")
    def test_remote_version_exception(self, urlopen_mock):

@@ -2567,7 +2655,6 @@ class TestApiRemoteVersion(DirectoriesMixin, APITestCase):
            {
                "version": "0.0.0",
                "update_available": False,
-                "feature_is_set": True,
            },
        )

@@ -2652,7 +2739,7 @@ class TestApiStoragePaths(DirectoriesMixin, APITestCase):

 class TestTasks(APITestCase):
    ENDPOINT = "/api/tasks/"
-    ENDPOINT_ACKOWLEDGE = "/api/acknowledge_tasks/"
+    ENDPOINT_ACKNOWLEDGE = "/api/acknowledge_tasks/"

    def setUp(self):
        super().setUp()
@@ -2661,16 +2748,27 @@ class TestTasks(APITestCase):
        self.client.force_authenticate(user=self.user)

    def test_get_tasks(self):
-        task_id1 = str(uuid.uuid4())
-        PaperlessTask.objects.create(task_id=task_id1)
-        Task.objects.create(
-            id=task_id1,
-            started=timezone.now() - datetime.timedelta(seconds=30),
-            stopped=timezone.now(),
-            func="documents.tasks.consume_file",
+        """
+        GIVEN:
+            - Attempted celery tasks
+        WHEN:
+            - API call is made to get tasks
+        THEN:
+            - Attempting and pending tasks are serialized and provided
+        """
+        result1 = TaskResult.objects.create(
+            task_id=str(uuid.uuid4()),
+            task_name="documents.tasks.some_great_task",
+            status=celery.states.PENDING,
        )
-        task_id2 = str(uuid.uuid4())
-        PaperlessTask.objects.create(task_id=task_id2)
+        PaperlessTask.objects.create(attempted_task=result1)
+
+        result2 = TaskResult.objects.create(
+            task_id=str(uuid.uuid4()),
+            task_name="documents.tasks.some_awesome_task",
+            status=celery.states.STARTED,
+        )
+        PaperlessTask.objects.create(attempted_task=result2)

        response = self.client.get(self.ENDPOINT)

@@ -2678,25 +2776,155 @@ class TestTasks(APITestCase):
        self.assertEqual(len(response.data), 2)
        returned_task1 = response.data[1]
        returned_task2 = response.data[0]
-        self.assertEqual(returned_task1["task_id"], task_id1)
-        self.assertEqual(returned_task1["status"], "complete")
-        self.assertIsNotNone(returned_task1["attempted_task"])
-        self.assertEqual(returned_task2["task_id"], task_id2)
-        self.assertEqual(returned_task2["status"], "queued")
-        self.assertIsNone(returned_task2["attempted_task"])
+
+        self.assertEqual(returned_task1["task_id"], result1.task_id)
+        self.assertEqual(returned_task1["status"], celery.states.PENDING)
+        self.assertEqual(returned_task1["task_name"], result1.task_name)
+
+        self.assertEqual(returned_task2["task_id"], result2.task_id)
+        self.assertEqual(returned_task2["status"], celery.states.STARTED)
+        self.assertEqual(returned_task2["task_name"], result2.task_name)

    def test_acknowledge_tasks(self):
-        task_id = str(uuid.uuid4())
-        task = PaperlessTask.objects.create(task_id=task_id)
+        """
+        GIVEN:
+            - Attempted celery tasks
+        WHEN:
+            - API call is made to get mark task as acknowledged
+        THEN:
+            - Task is marked as acknowledged
+        """
+        result1 = TaskResult.objects.create(
+            task_id=str(uuid.uuid4()),
+            task_name="documents.tasks.some_task",
+            status=celery.states.PENDING,
+        )
+        task = PaperlessTask.objects.create(attempted_task=result1)

        response = self.client.get(self.ENDPOINT)
        self.assertEqual(len(response.data), 1)

        response = self.client.post(
-            self.ENDPOINT_ACKOWLEDGE,
+            self.ENDPOINT_ACKNOWLEDGE,
            {"tasks": [task.id]},
        )
        self.assertEqual(response.status_code, 200)

        response = self.client.get(self.ENDPOINT)
        self.assertEqual(len(response.data), 0)
+
+    def test_task_result_no_error(self):
+        """
+        GIVEN:
+            - A celery task completed without error
+        WHEN:
+            - API call is made to get tasks
+        THEN:
+            - The returned data includes the task result
+        """
+        result1 = TaskResult.objects.create(
+            task_id=str(uuid.uuid4()),
+            task_name="documents.tasks.some_task",
+            status=celery.states.SUCCESS,
+            result="Success. New document id 1 created",
+        )
+        _ = PaperlessTask.objects.create(attempted_task=result1)
+
+        response = self.client.get(self.ENDPOINT)
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(len(response.data), 1)
+
+        returned_data = response.data[0]
+
+        self.assertEqual(returned_data["result"], "Success. New document id 1 created")
+        self.assertEqual(returned_data["related_document"], "1")
+
+    def test_task_result_with_error(self):
+        """
+        GIVEN:
+            - A celery task completed with an exception
+        WHEN:
+            - API call is made to get tasks
+        THEN:
+            - The returned result is the exception info
+        """
+        result1 = TaskResult.objects.create(
+            task_id=str(uuid.uuid4()),
+            task_name="documents.tasks.some_task",
+            status=celery.states.SUCCESS,
+            result={
+                "exc_type": "ConsumerError",
+                "exc_message": ["test.pdf: Not consuming test.pdf: It is a duplicate."],
+                "exc_module": "documents.consumer",
+            },
+        )
+        _ = PaperlessTask.objects.create(attempted_task=result1)
+
+        response = self.client.get(self.ENDPOINT)
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(len(response.data), 1)
+
+        returned_data = response.data[0]
+
+        self.assertEqual(
+            returned_data["result"],
+            "test.pdf: Not consuming test.pdf: It is a duplicate.",
+        )
+
+    def test_task_name_webui(self):
+        """
+        GIVEN:
+            - Attempted celery task
+            - Task was created through the webui
+        WHEN:
+            - API call is made to get tasks
+        THEN:
+            - Returned data include the filename
+        """
+        result1 = TaskResult.objects.create(
+            task_id=str(uuid.uuid4()),
+            task_name="documents.tasks.some_task",
+            status=celery.states.SUCCESS,
+            task_args="\"('/tmp/paperless/paperless-upload-5iq7skzc',)\"",
+            task_kwargs="\"{'override_filename': 'test.pdf', 'override_title': None, 'override_correspondent_id': None, 'override_document_type_id': None, 'override_tag_ids': None, 'task_id': '466e8fe7-7193-4698-9fff-72f0340e2082', 'override_created': None}\"",
+        )
+        _ = PaperlessTask.objects.create(attempted_task=result1)
+
+        response = self.client.get(self.ENDPOINT)
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(len(response.data), 1)
+
+        returned_data = response.data[0]
+
+        self.assertEqual(returned_data["name"], "test.pdf")
+
+    def test_task_name_consume_folder(self):
+        """
+        GIVEN:
+            - Attempted celery task
+            - Task was created through the consume folder
+        WHEN:
+            - API call is made to get tasks
+        THEN:
+            - Returned data include the filename
+        """
+        result1 = TaskResult.objects.create(
+            task_id=str(uuid.uuid4()),
+            task_name="documents.tasks.some_task",
+            status=celery.states.SUCCESS,
+            task_args="\"('/consume/anothertest.pdf',)\"",
+            task_kwargs="\"{'override_tag_ids': None}\"",
+        )
+        _ = PaperlessTask.objects.create(attempted_task=result1)
+
+        response = self.client.get(self.ENDPOINT)
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(len(response.data), 1)
+
+        returned_data = response.data[0]
+
+        self.assertEqual(returned_data["name"], "anothertest.pdf")
--- a/src/documents/tests/test_barcodes.py
+++ b/src/documents/tests/test_barcodes.py
@@ -3,6 +3,7 @@ import shutil
 import tempfile
 from unittest import mock

+import pikepdf
 from django.conf import settings
 from django.test import override_settings
 from django.test import TestCase
@@ -13,22 +14,23 @@ from PIL import Image


 class TestBarcode(DirectoriesMixin, TestCase):
+
+    SAMPLE_DIR = os.path.join(
+        os.path.dirname(__file__),
+        "samples",
+    )
+
+    BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes")
+
    def test_barcode_reader(self):
-        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
-            "barcode-39-PATCHT.png",
-        )
+        test_file = os.path.join(self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT.png")
        img = Image.open(test_file)
        separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
        self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])

    def test_barcode_reader2(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "patch-code-t.pbm",
        )
        img = Image.open(test_file)
@@ -37,9 +39,7 @@ class TestBarcode(DirectoriesMixin, TestCase):

    def test_barcode_reader_distorsion(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "barcode-39-PATCHT-distorsion.png",
        )
        img = Image.open(test_file)
@@ -48,9 +48,7 @@ class TestBarcode(DirectoriesMixin, TestCase):

    def test_barcode_reader_distorsion2(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "barcode-39-PATCHT-distorsion2.png",
        )
        img = Image.open(test_file)
@@ -59,9 +57,7 @@ class TestBarcode(DirectoriesMixin, TestCase):

    def test_barcode_reader_unreadable(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "barcode-39-PATCHT-unreadable.png",
        )
        img = Image.open(test_file)
@@ -69,9 +65,7 @@ class TestBarcode(DirectoriesMixin, TestCase):

    def test_barcode_reader_qr(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "qr-code-PATCHT.png",
        )
        img = Image.open(test_file)
@@ -80,9 +74,7 @@ class TestBarcode(DirectoriesMixin, TestCase):

    def test_barcode_reader_128(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "barcode-128-PATCHT.png",
        )
        img = Image.open(test_file)
@@ -90,15 +82,13 @@ class TestBarcode(DirectoriesMixin, TestCase):
        self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])

    def test_barcode_reader_no_barcode(self):
-        test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png")
+        test_file = os.path.join(self.SAMPLE_DIR, "simple.png")
        img = Image.open(test_file)
        self.assertEqual(barcodes.barcode_reader(img), [])

    def test_barcode_reader_custom_separator(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "barcode-39-custom.png",
        )
        img = Image.open(test_file)
@@ -106,9 +96,7 @@ class TestBarcode(DirectoriesMixin, TestCase):

    def test_barcode_reader_custom_qr_separator(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "barcode-qr-custom.png",
        )
        img = Image.open(test_file)
@@ -116,9 +104,7 @@ class TestBarcode(DirectoriesMixin, TestCase):

    def test_barcode_reader_custom_128_separator(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "barcode-128-custom.png",
        )
        img = Image.open(test_file)
@@ -126,19 +112,15 @@ class TestBarcode(DirectoriesMixin, TestCase):

    def test_get_mime_type(self):
        tiff_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
+            self.SAMPLE_DIR,
            "simple.tiff",
        )
        pdf_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
+            self.SAMPLE_DIR,
            "simple.pdf",
        )
        png_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "barcode-128-custom.png",
        )
        tiff_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile1")
@@ -173,8 +155,7 @@ class TestBarcode(DirectoriesMixin, TestCase):

    def test_convert_error_from_pdf_to_pdf(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
+            self.SAMPLE_DIR,
            "simple.pdf",
        )
        dst = os.path.join(settings.SCRATCH_DIR, "simple.pdf")
@@ -183,117 +164,235 @@ class TestBarcode(DirectoriesMixin, TestCase):

    def test_scan_file_for_separating_barcodes(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "patch-code-t.pdf",
        )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [0])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [0])

    def test_scan_file_for_separating_barcodes2(self):
-        test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [])
+        test_file = os.path.join(self.SAMPLE_DIR, "simple.pdf")
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [])

    def test_scan_file_for_separating_barcodes3(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "patch-code-t-middle.pdf",
        )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [1])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [1])

    def test_scan_file_for_separating_barcodes4(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "several-patcht-codes.pdf",
        )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [2, 5])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [2, 5])

    def test_scan_file_for_separating_barcodes_upsidedown(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "patch-code-t-middle_reverse.pdf",
        )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [1])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [1])
+
+    def test_scan_file_for_separating_barcodes_pillow_transcode_error(self):
+        """
+        GIVEN:
+            - A PDF containing an image which cannot be transcoded to a PIL image
+        WHEN:
+            - The image tries to be transcoded to a PIL image, but fails
+        THEN:
+            - The barcode reader is still called
+        """
+
+        def _build_device_n_pdf(self, save_path: str):
+            # Based on the pikepdf tests
+            # https://github.com/pikepdf/pikepdf/blob/abb35ebe17d579d76abe08265e00cf8890a12a95/tests/test_image_access.py
+            pdf = pikepdf.new()
+            pdf.add_blank_page(page_size=(72, 72))
+            imobj = pikepdf.Stream(
+                pdf,
+                bytes(range(0, 256)),
+                BitsPerComponent=8,
+                ColorSpace=pikepdf.Array(
+                    [
+                        pikepdf.Name.DeviceN,
+                        pikepdf.Array([pikepdf.Name.Black]),
+                        pikepdf.Name.DeviceCMYK,
+                        pikepdf.Stream(
+                            pdf,
+                            b"{0 0 0 4 -1 roll}",  # Colorspace conversion function
+                            FunctionType=4,
+                            Domain=[0.0, 1.0],
+                            Range=[0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0],
+                        ),
+                    ],
+                ),
+                Width=16,
+                Height=16,
+                Type=pikepdf.Name.XObject,
+                Subtype=pikepdf.Name.Image,
+            )
+            pim = pikepdf.PdfImage(imobj)
+            self.assertEqual(pim.mode, "DeviceN")
+            self.assertTrue(pim.is_device_n)
+
+            pdf.pages[0].Contents = pikepdf.Stream(pdf, b"72 0 0 72 0 0 cm /Im0 Do")
+            pdf.pages[0].Resources = pikepdf.Dictionary(
+                XObject=pikepdf.Dictionary(Im0=imobj),
+            )
+            pdf.save(save_path)
+
+        with tempfile.NamedTemporaryFile(suffix="pdf") as device_n_pdf:
+            # Build an offending file
+            _build_device_n_pdf(self, str(device_n_pdf.name))
+            with mock.patch("documents.barcodes.barcode_reader") as reader:
+                reader.return_value = list()
+
+                _, _ = barcodes.scan_file_for_separating_barcodes(
+                    str(device_n_pdf.name),
+                )
+
+                reader.assert_called()
+
+    def test_scan_file_for_separating_barcodes_fax_decode(self):
+        """
+        GIVEN:
+            - A PDF containing an image encoded as CCITT Group 4 encoding
+        WHEN:
+            - Barcode processing happens with the file
+        THEN:
+            - The barcode is still detected
+        """
+        test_file = os.path.join(
+            self.BARCODE_SAMPLE_DIR,
+            "barcode-fax-image.pdf",
+        )
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [1])

    def test_scan_file_for_separating_qr_barcodes(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "patch-code-t-qr.pdf",
        )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [0])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [0])

    @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
    def test_scan_file_for_separating_custom_barcodes(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "barcode-39-custom.pdf",
        )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [0])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [0])

    @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
    def test_scan_file_for_separating_custom_qr_barcodes(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "barcode-qr-custom.pdf",
        )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [0])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [0])

    @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
    def test_scan_file_for_separating_custom_128_barcodes(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "barcode-128-custom.pdf",
        )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [0])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [0])

    def test_scan_file_for_separating_wrong_qr_barcodes(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "barcode-39-custom.pdf",
        )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [])

    def test_separate_pages(self):
+        test_file = os.path.join(
+            self.BARCODE_SAMPLE_DIR,
+            "patch-code-t-middle.pdf",
+        )
+        pages = barcodes.separate_pages(test_file, [1])
+
+        self.assertEqual(len(pages), 2)
+
+    def test_separate_pages_double_code(self):
+        """
+        GIVEN:
+            - Input PDF with two patch code pages in a row
+        WHEN:
+            - The input file is split
+        THEN:
+            - Only two files are output
+        """
        test_file = os.path.join(
            os.path.dirname(__file__),
            "samples",
            "barcodes",
-            "patch-code-t-middle.pdf",
+            "patch-code-t-double.pdf",
        )
-        pages = barcodes.separate_pages(test_file, [1])
+        pages = barcodes.separate_pages(test_file, [1, 2])
+
        self.assertEqual(len(pages), 2)

    def test_separate_pages_no_list(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "patch-code-t-middle.pdf",
        )
        with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
@@ -308,9 +407,7 @@ class TestBarcode(DirectoriesMixin, TestCase):

    def test_save_to_dir(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "patch-code-t.pdf",
        )
        tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
@@ -320,9 +417,7 @@ class TestBarcode(DirectoriesMixin, TestCase):

    def test_save_to_dir2(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "patch-code-t.pdf",
        )
        nonexistingdir = "/nowhere"
@@ -340,9 +435,7 @@ class TestBarcode(DirectoriesMixin, TestCase):

    def test_save_to_dir3(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "patch-code-t.pdf",
        )
        tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
@@ -352,35 +445,41 @@ class TestBarcode(DirectoriesMixin, TestCase):

    def test_barcode_splitter(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "patch-code-t-middle.pdf",
        )
        tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
-        separators = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertTrue(separators)
-        document_list = barcodes.separate_pages(test_file, separators)
+
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(test_file, pdf_file)
+        self.assertTrue(len(separator_page_numbers) > 0)
+
+        document_list = barcodes.separate_pages(test_file, separator_page_numbers)
        self.assertTrue(document_list)
        for document in document_list:
            barcodes.save_to_dir(document, target_dir=tempdir)
+
        target_file1 = os.path.join(tempdir, "patch-code-t-middle_document_0.pdf")
        target_file2 = os.path.join(tempdir, "patch-code-t-middle_document_1.pdf")
+
        self.assertTrue(os.path.isfile(target_file1))
        self.assertTrue(os.path.isfile(target_file2))

    @override_settings(CONSUMER_ENABLE_BARCODES=True)
    def test_consume_barcode_file(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "patch-code-t-middle.pdf",
        )
+
        dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pdf")
        shutil.copy(test_file, dst)

-        self.assertEqual(tasks.consume_file(dst), "File successfully split")
+        with mock.patch("documents.tasks.async_to_sync"):
+            self.assertEqual(tasks.consume_file(dst), "File successfully split")

    @override_settings(
        CONSUMER_ENABLE_BARCODES=True,
@@ -388,15 +487,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
    )
    def test_consume_barcode_tiff_file(self):
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "patch-code-t-middle.tiff",
        )
        dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.tiff")
        shutil.copy(test_file, dst)

-        self.assertEqual(tasks.consume_file(dst), "File successfully split")
+        with mock.patch("documents.tasks.async_to_sync"):
+            self.assertEqual(tasks.consume_file(dst), "File successfully split")

    @override_settings(
        CONSUMER_ENABLE_BARCODES=True,
@@ -412,18 +510,17 @@ class TestBarcode(DirectoriesMixin, TestCase):
        and continue archiving the file as is.
        """
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
+            self.SAMPLE_DIR,
            "simple.jpg",
        )
        dst = os.path.join(settings.SCRATCH_DIR, "simple.jpg")
        shutil.copy(test_file, dst)
-        with self.assertLogs("paperless.tasks", level="WARNING") as cm:
+        with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
            self.assertIn("Success", tasks.consume_file(dst))
        self.assertListEqual(
            cm.output,
            [
-                "WARNING:paperless.tasks:Unsupported file format for barcode reader: image/jpeg",
+                "WARNING:paperless.barcodes:Unsupported file format for barcode reader: image/jpeg",
            ],
        )
        m.assert_called_once()
@@ -445,12 +542,11 @@ class TestBarcode(DirectoriesMixin, TestCase):
        the user uploads a supported image file, but without extension
        """
        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
            "patch-code-t-middle.tiff",
        )
        dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle")
        shutil.copy(test_file, dst)

-        self.assertEqual(tasks.consume_file(dst), "File successfully split")
+        with mock.patch("documents.tasks.async_to_sync"):
+            self.assertEqual(tasks.consume_file(dst), "File successfully split")
--- a/src/documents/tests/test_classifier.py
+++ b/src/documents/tests/test_classifier.py
@@ -1,9 +1,9 @@
 import os
+import re
 import tempfile
 from pathlib import Path
 from unittest import mock

-import documents
 import pytest
 from django.conf import settings
 from django.test import override_settings
@@ -20,10 +20,19 @@ from documents.models import Tag
 from documents.tests.utils import DirectoriesMixin


+def dummy_preprocess(content: str):
+    content = content.lower().strip()
+    content = re.sub(r"\s+", " ", content)
+    return content
+
+
 class TestClassifier(DirectoriesMixin, TestCase):
    def setUp(self):
        super().setUp()
        self.classifier = DocumentClassifier()
+        self.classifier.preprocess_content = mock.MagicMock(
+            side_effect=dummy_preprocess,
+        )

    def generate_test_data(self):
        self.c1 = Correspondent.objects.create(
@@ -192,6 +201,8 @@ class TestClassifier(DirectoriesMixin, TestCase):

        new_classifier = DocumentClassifier()
        new_classifier.load()
+        new_classifier.preprocess_content = mock.MagicMock(side_effect=dummy_preprocess)
+
        self.assertFalse(new_classifier.train())

    # @override_settings(
@@ -215,6 +226,7 @@ class TestClassifier(DirectoriesMixin, TestCase):

        new_classifier = DocumentClassifier()
        new_classifier.load()
+        new_classifier.preprocess_content = mock.MagicMock(side_effect=dummy_preprocess)

        self.assertCountEqual(new_classifier.predict_tags(self.doc2.content), [45, 12])

--- a/src/documents/tests/test_date_parsing.py
+++ b/src/documents/tests/test_date_parsing.py
@@ -8,6 +8,7 @@ from django.conf import settings
 from django.test import override_settings
 from django.test import TestCase
 from documents.parsers import parse_date
+from documents.parsers import parse_date_generator
 from paperless.settings import DATE_ORDER


@@ -161,6 +162,25 @@ class TestDate(TestCase):
    def test_crazy_date_with_spaces(self, *args):
        self.assertIsNone(parse_date("", "20 408000l 2475"))

+    def test_multiple_dates(self):
+        text = """This text has multiple dates.
+                  For example 02.02.2018, 22 July 2022 and Dezember 2021.
+                  But not 24-12-9999 because its in the future..."""
+        dates = list(parse_date_generator("", text))
+        self.assertEqual(len(dates), 3)
+        self.assertEqual(
+            dates[0],
+            datetime.datetime(2018, 2, 2, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
+        )
+        self.assertEqual(
+            dates[1],
+            datetime.datetime(2022, 7, 22, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
+        )
+        self.assertEqual(
+            dates[2],
+            datetime.datetime(2021, 12, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
+        )
+
    @override_settings(FILENAME_DATE_ORDER="YMD")
    def test_filename_date_parse_valid_ymd(self, *args):
        """
--- a/src/documents/tests/test_management.py
+++ b/src/documents/tests/test_management.py
@@ -10,8 +10,8 @@ from django.core.management import call_command
 from django.test import override_settings
 from django.test import TestCase
 from documents.file_handling import generate_filename
-from documents.management.commands.document_archiver import handle_document
 from documents.models import Document
+from documents.tasks import update_document_archive_file
 from documents.tests.utils import DirectoriesMixin


@@ -46,7 +46,7 @@ class TestArchiver(DirectoriesMixin, TestCase):
            os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
        )

-        handle_document(doc.pk)
+        update_document_archive_file(doc.pk)

        doc = Document.objects.get(id=doc.id)

@@ -63,7 +63,7 @@ class TestArchiver(DirectoriesMixin, TestCase):
        doc.save()
        shutil.copy(sample_file, doc.source_path)

-        handle_document(doc.pk)
+        update_document_archive_file(doc.pk)

        doc = Document.objects.get(id=doc.id)

@@ -94,8 +94,8 @@ class TestArchiver(DirectoriesMixin, TestCase):
            os.path.join(self.dirs.originals_dir, f"document_01.pdf"),
        )

-        handle_document(doc2.pk)
-        handle_document(doc1.pk)
+        update_document_archive_file(doc2.pk)
+        update_document_archive_file(doc1.pk)

        doc1 = Document.objects.get(id=doc1.id)
        doc2 = Document.objects.get(id=doc2.id)
--- a/src/documents/tests/test_management_consumer.py
+++ b/src/documents/tests/test_management_consumer.py
@@ -20,13 +20,14 @@ class ConsumerThread(Thread):
    def __init__(self):
        super().__init__()
        self.cmd = document_consumer.Command()
+        self.cmd.stop_flag.clear()

    def run(self) -> None:
-        self.cmd.handle(directory=settings.CONSUMPTION_DIR, oneshot=False)
+        self.cmd.handle(directory=settings.CONSUMPTION_DIR, oneshot=False, testing=True)

    def stop(self):
        # Consumer checks this every second.
-        self.cmd.stop_flag = True
+        self.cmd.stop_flag.set()


 def chunked(size, source):
@@ -42,7 +43,7 @@ class ConsumerMixin:
        super().setUp()
        self.t = None
        patcher = mock.patch(
-            "documents.management.commands.document_consumer.async_task",
+            "documents.tasks.consume_file.delay",
        )
        self.task_mock = patcher.start()
        self.addCleanup(patcher.stop)
@@ -59,13 +60,14 @@ class ConsumerMixin:
            self.t.stop()
            # wait for the consumer to exit.
            self.t.join()
+            self.t = None

        super().tearDown()

-    def wait_for_task_mock_call(self, excpeted_call_count=1):
+    def wait_for_task_mock_call(self, expected_call_count=1):
        n = 0
-        while n < 100:
-            if self.task_mock.call_count >= excpeted_call_count:
+        while n < 50:
+            if self.task_mock.call_count >= expected_call_count:
                # give task_mock some time to finish and raise errors
                sleep(1)
                return
@@ -74,7 +76,7 @@ class ConsumerMixin:

    # A bogus async_task that will simply check the file for
    # completeness and raise an exception otherwise.
-    def bogus_task(self, func, filename, **kwargs):
+    def bogus_task(self, filename, **kwargs):
        eq = filecmp.cmp(filename, self.sample_file, shallow=False)
        if not eq:
            print("Consumed an INVALID file.")
@@ -113,7 +115,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
        self.task_mock.assert_called_once()

        args, kwargs = self.task_mock.call_args
-        self.assertEqual(args[1], f)
+        self.assertEqual(args[0], f)

    def test_consume_file_invalid_ext(self):
        self.t_start()
@@ -133,7 +135,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
        self.task_mock.assert_called_once()

        args, kwargs = self.task_mock.call_args
-        self.assertEqual(args[1], f)
+        self.assertEqual(args[0], f)

    @mock.patch("documents.management.commands.document_consumer.logger.error")
    def test_slow_write_pdf(self, error_logger):
@@ -153,7 +155,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
        self.task_mock.assert_called_once()

        args, kwargs = self.task_mock.call_args
-        self.assertEqual(args[1], fname)
+        self.assertEqual(args[0], fname)

    @mock.patch("documents.management.commands.document_consumer.logger.error")
    def test_slow_write_and_move(self, error_logger):
@@ -173,7 +175,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
        self.task_mock.assert_called_once()

        args, kwargs = self.task_mock.call_args
-        self.assertEqual(args[1], fname2)
+        self.assertEqual(args[0], fname2)

        error_logger.assert_not_called()

@@ -191,7 +193,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):

        self.task_mock.assert_called_once()
        args, kwargs = self.task_mock.call_args
-        self.assertEqual(args[1], fname)
+        self.assertEqual(args[0], fname)

        # assert that we have an error logged with this invalid file.
        error_logger.assert_called_once()
@@ -234,12 +236,12 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):

        sleep(5)

-        self.wait_for_task_mock_call(excpeted_call_count=2)
+        self.wait_for_task_mock_call(expected_call_count=2)

        self.assertEqual(2, self.task_mock.call_count)

        fnames = [
-            os.path.basename(args[1]) for args, _ in self.task_mock.call_args_list
+            os.path.basename(args[0]) for args, _ in self.task_mock.call_args_list
        ]
        self.assertCountEqual(fnames, ["my_file.pdf", "my_second_file.pdf"])

@@ -281,6 +283,8 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):

@override_settings(
    CONSUMER_POLLING=1,
+    # please leave the delay here and down below
+    # see https://github.com/paperless-ngx/paperless-ngx/pull/66
    CONSUMER_POLLING_DELAY=3,
    CONSUMER_POLLING_RETRY_COUNT=20,
 )
@@ -307,8 +311,7 @@ class TestConsumerRecursivePolling(TestConsumer):


 class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
-    @override_settings(CONSUMER_RECURSIVE=True)
-    @override_settings(CONSUMER_SUBDIRS_AS_TAGS=True)
+    @override_settings(CONSUMER_RECURSIVE=True, CONSUMER_SUBDIRS_AS_TAGS=True)
    def test_consume_file_with_path_tags(self):

        tag_names = ("existingTag", "Space Tag")
@@ -335,7 +338,7 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
        tag_ids.append(Tag.objects.get(name=tag_names[1]).pk)

        args, kwargs = self.task_mock.call_args
-        self.assertEqual(args[1], f)
+        self.assertEqual(args[0], f)

        # assertCountEqual has a bad name, but test that the first
        # sequence contains the same elements as second, regardless of
@@ -344,7 +347,7 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):

    @override_settings(
        CONSUMER_POLLING=1,
-        CONSUMER_POLLING_DELAY=1,
+        CONSUMER_POLLING_DELAY=3,
        CONSUMER_POLLING_RETRY_COUNT=20,
    )
    def test_consume_file_with_path_tags_polling(self):
--- a/src/documents/tests/test_management_exporter.py
+++ b/src/documents/tests/test_management_exporter.py
@@ -10,10 +10,13 @@ from django.core.management import call_command
 from django.test import override_settings
 from django.test import TestCase
 from documents.management.commands import document_exporter
+from documents.models import Comment
 from documents.models import Correspondent
 from documents.models import Document
 from documents.models import DocumentType
+from documents.models import StoragePath
 from documents.models import Tag
+from documents.models import User
 from documents.sanity_checker import check_sanity
 from documents.settings import EXPORTER_FILE_NAME
 from documents.tests.utils import DirectoriesMixin
@@ -25,6 +28,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
        self.target = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, self.target)

+        self.user = User.objects.create(username="temp_admin")
+
        self.d1 = Document.objects.create(
            content="Content",
            checksum="42995833e01aea9b3edee44bbfdd7ce1",
@@ -57,14 +62,23 @@ class TestExportImport(DirectoriesMixin, TestCase):
            storage_type=Document.STORAGE_TYPE_GPG,
        )

+        self.comment = Comment.objects.create(
+            comment="This is a comment. amaze.",
+            document=self.d1,
+            user=self.user,
+        )
+
        self.t1 = Tag.objects.create(name="t")
        self.dt1 = DocumentType.objects.create(name="dt")
        self.c1 = Correspondent.objects.create(name="c")
+        self.sp1 = StoragePath.objects.create(path="{created_year}-{title}")

        self.d1.tags.add(self.t1)
        self.d1.correspondent = self.c1
        self.d1.document_type = self.dt1
        self.d1.save()
+        self.d4.storage_path = self.sp1
+        self.d4.save()
        super().setUp()

    def _get_document_from_manifest(self, manifest, id):
@@ -110,7 +124,7 @@ class TestExportImport(DirectoriesMixin, TestCase):

        manifest = self._do_export(use_filename_format=use_filename_format)

-        self.assertEqual(len(manifest), 8)
+        self.assertEqual(len(manifest), 11)
        self.assertEqual(
            len(list(filter(lambda e: e["model"] == "documents.document", manifest))),
            4,
@@ -171,6 +185,11 @@ class TestExportImport(DirectoriesMixin, TestCase):
                        checksum = hashlib.md5(f.read()).hexdigest()
                    self.assertEqual(checksum, element["fields"]["archive_checksum"])

+            elif element["model"] == "documents.comment":
+                self.assertEqual(element["fields"]["comment"], self.comment.comment)
+                self.assertEqual(element["fields"]["document"], self.d1.id)
+                self.assertEqual(element["fields"]["user"], self.user.id)
+
        with paperless_environment() as dirs:
            self.assertEqual(Document.objects.count(), 4)
            Document.objects.all().delete()
@@ -184,6 +203,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
            self.assertEqual(Tag.objects.count(), 1)
            self.assertEqual(Correspondent.objects.count(), 1)
            self.assertEqual(DocumentType.objects.count(), 1)
+            self.assertEqual(StoragePath.objects.count(), 1)
            self.assertEqual(Document.objects.get(id=self.d1.id).title, "wow1")
            self.assertEqual(Document.objects.get(id=self.d2.id).title, "wow2")
            self.assertEqual(Document.objects.get(id=self.d3.id).title, "wow2")
--- a/src/documents/tests/test_management_retagger.py
+++ b/src/documents/tests/test_management_retagger.py
@@ -3,12 +3,34 @@ from django.test import TestCase
 from documents.models import Correspondent
 from documents.models import Document
 from documents.models import DocumentType
+from documents.models import StoragePath
 from documents.models import Tag
 from documents.tests.utils import DirectoriesMixin


 class TestRetagger(DirectoriesMixin, TestCase):
    def make_models(self):
+
+        self.sp1 = StoragePath.objects.create(
+            name="dummy a",
+            path="{created_data}/{title}",
+            match="auto document",
+            matching_algorithm=StoragePath.MATCH_LITERAL,
+        )
+        self.sp2 = StoragePath.objects.create(
+            name="dummy b",
+            path="{title}",
+            match="^first|^unrelated",
+            matching_algorithm=StoragePath.MATCH_REGEX,
+        )
+
+        self.sp3 = StoragePath.objects.create(
+            name="dummy c",
+            path="{title}",
+            match="^blah",
+            matching_algorithm=StoragePath.MATCH_REGEX,
+        )
+
        self.d1 = Document.objects.create(
            checksum="A",
            title="A",
@@ -23,6 +45,7 @@ class TestRetagger(DirectoriesMixin, TestCase):
            checksum="C",
            title="C",
            content="unrelated document",
+            storage_path=self.sp3,
        )
        self.d4 = Document.objects.create(
            checksum="D",
@@ -146,15 +169,15 @@ class TestRetagger(DirectoriesMixin, TestCase):
        call_command("document_retagger", "--document_type", "--suggest")
        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

-        self.assertEqual(d_first.document_type, None)
-        self.assertEqual(d_second.document_type, None)
+        self.assertIsNone(d_first.document_type)
+        self.assertIsNone(d_second.document_type)

    def test_add_correspondent_suggest(self):
        call_command("document_retagger", "--correspondent", "--suggest")
        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

-        self.assertEqual(d_first.correspondent, None)
-        self.assertEqual(d_second.correspondent, None)
+        self.assertIsNone(d_first.correspondent)
+        self.assertIsNone(d_second.correspondent)

    def test_add_tags_suggest_url(self):
        call_command(
@@ -178,8 +201,8 @@ class TestRetagger(DirectoriesMixin, TestCase):
        )
        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

-        self.assertEqual(d_first.document_type, None)
-        self.assertEqual(d_second.document_type, None)
+        self.assertIsNone(d_first.document_type)
+        self.assertIsNone(d_second.document_type)

    def test_add_correspondent_suggest_url(self):
        call_command(
@@ -190,5 +213,48 @@ class TestRetagger(DirectoriesMixin, TestCase):
        )
        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

-        self.assertEqual(d_first.correspondent, None)
-        self.assertEqual(d_second.correspondent, None)
+        self.assertIsNone(d_first.correspondent)
+        self.assertIsNone(d_second.correspondent)
+
+    def test_add_storage_path(self):
+        """
+        GIVEN:
+            - 2 storage paths with documents which match them
+            - 1 document which matches but has a storage path
+        WHEN:
+            - document retagger is called
+        THEN:
+            - Matching document's storage paths updated
+            - Non-matching documents have no storage path
+            - Existing storage patch left unchanged
+        """
+        call_command(
+            "document_retagger",
+            "--storage_path",
+        )
+        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
+
+        self.assertEqual(d_first.storage_path, self.sp2)
+        self.assertEqual(d_auto.storage_path, self.sp1)
+        self.assertIsNone(d_second.storage_path)
+        self.assertEqual(d_unrelated.storage_path, self.sp3)
+
+    def test_overwrite_storage_path(self):
+        """
+        GIVEN:
+            - 2 storage paths with documents which match them
+            - 1 document which matches but has a storage path
+        WHEN:
+            - document retagger is called with overwrite
+        THEN:
+            - Matching document's storage paths updated
+            - Non-matching documents have no storage path
+            - Existing storage patch overwritten
+        """
+        call_command("document_retagger", "--storage_path", "--overwrite")
+        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
+
+        self.assertEqual(d_first.storage_path, self.sp2)
+        self.assertEqual(d_auto.storage_path, self.sp1)
+        self.assertIsNone(d_second.storage_path)
+        self.assertEqual(d_unrelated.storage_path, self.sp2)
--- a/src/documents/tests/test_settings.py
+++ b/src/documents/tests/test_settings.py
@@ -1,35 +0,0 @@
-import logging
-from unittest import mock
-
-from django.test import TestCase
-from paperless.settings import default_task_workers
-from paperless.settings import default_threads_per_worker
-
-
-class TestSettings(TestCase):
-    @mock.patch("paperless.settings.multiprocessing.cpu_count")
-    def test_single_core(self, cpu_count):
-        cpu_count.return_value = 1
-
-        default_workers = default_task_workers()
-
-        default_threads = default_threads_per_worker(default_workers)
-
-        self.assertEqual(default_workers, 1)
-        self.assertEqual(default_threads, 1)
-
-    def test_workers_threads(self):
-        for i in range(1, 64):
-            with mock.patch(
-                "paperless.settings.multiprocessing.cpu_count",
-            ) as cpu_count:
-                cpu_count.return_value = i
-
-                default_workers = default_task_workers()
-
-                default_threads = default_threads_per_worker(default_workers)
-
-                self.assertTrue(default_workers >= 1)
-                self.assertTrue(default_threads >= 1)
-
-                self.assertTrue(default_workers * default_threads <= i, f"{i}")
--- a/src/documents/tests/test_tasks.py
+++ b/src/documents/tests/test_tasks.py
@@ -11,6 +11,7 @@ from documents.models import DocumentType
 from documents.models import Tag
 from documents.sanity_checker import SanityCheckFailedException
 from documents.sanity_checker import SanityCheckMessages
+from documents.tests.test_classifier import dummy_preprocess
 from documents.tests.utils import DirectoriesMixin


@@ -75,21 +76,26 @@ class TestClassifier(DirectoriesMixin, TestCase):
        doc = Document.objects.create(correspondent=c, content="test", title="test")
        self.assertFalse(os.path.isfile(settings.MODEL_FILE))

-        tasks.train_classifier()
-        self.assertTrue(os.path.isfile(settings.MODEL_FILE))
-        mtime = os.stat(settings.MODEL_FILE).st_mtime
+        with mock.patch(
+            "documents.classifier.DocumentClassifier.preprocess_content",
+        ) as pre_proc_mock:
+            pre_proc_mock.side_effect = dummy_preprocess

-        tasks.train_classifier()
-        self.assertTrue(os.path.isfile(settings.MODEL_FILE))
-        mtime2 = os.stat(settings.MODEL_FILE).st_mtime
-        self.assertEqual(mtime, mtime2)
+            tasks.train_classifier()
+            self.assertTrue(os.path.isfile(settings.MODEL_FILE))
+            mtime = os.stat(settings.MODEL_FILE).st_mtime

-        doc.content = "test2"
-        doc.save()
-        tasks.train_classifier()
-        self.assertTrue(os.path.isfile(settings.MODEL_FILE))
-        mtime3 = os.stat(settings.MODEL_FILE).st_mtime
-        self.assertNotEqual(mtime2, mtime3)
+            tasks.train_classifier()
+            self.assertTrue(os.path.isfile(settings.MODEL_FILE))
+            mtime2 = os.stat(settings.MODEL_FILE).st_mtime
+            self.assertEqual(mtime, mtime2)
+
+            doc.content = "test2"
+            doc.save()
+            tasks.train_classifier()
+            self.assertTrue(os.path.isfile(settings.MODEL_FILE))
+            mtime3 = os.stat(settings.MODEL_FILE).st_mtime
+            self.assertNotEqual(mtime2, mtime3)


 class TestSanityCheck(DirectoriesMixin, TestCase):
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -1,3 +1,4 @@
+import itertools
 import json
 import logging
 import os
@@ -21,12 +22,13 @@ from django.db.models.functions import Lower
 from django.http import Http404
 from django.http import HttpResponse
 from django.http import HttpResponseBadRequest
+from django.shortcuts import get_object_or_404
 from django.utils.decorators import method_decorator
 from django.utils.translation import get_language
 from django.views.decorators.cache import cache_control
 from django.views.generic import TemplateView
 from django_filters.rest_framework import DjangoFilterBackend
-from django_q.tasks import async_task
+from documents.tasks import consume_file
 from packaging import version as packaging_version
 from paperless import version
 from paperless.db import GnuPG
@@ -62,6 +64,7 @@ from .matching import match_correspondents
 from .matching import match_document_types
 from .matching import match_storage_paths
 from .matching import match_tags
+from .models import Comment
 from .models import Correspondent
 from .models import Document
 from .models import DocumentType
@@ -70,6 +73,7 @@ from .models import SavedView
 from .models import StoragePath
 from .models import Tag
 from .parsers import get_parser_class_for_mime_type
+from .parsers import parse_date_generator
 from .serialisers import AcknowledgeTasksViewSerializer
 from .serialisers import BulkDownloadSerializer
 from .serialisers import BulkEditSerializer
@@ -257,6 +261,9 @@ class DocumentViewSet(
            file_handle = doc.source_file
            filename = doc.get_public_filename()
            mime_type = doc.mime_type
+            # Support browser previewing csv files by using text mime type
+            if mime_type in {"application/csv", "text/csv"} and disposition == "inline":
+                mime_type = "text/plain"

        if doc.storage_type == Document.STORAGE_TYPE_GPG:
            file_handle = GnuPG.decrypted(file_handle)
@@ -313,6 +320,7 @@ class DocumentViewSet(
            "original_metadata": self.get_metadata(doc.source_path, doc.mime_type),
            "archive_checksum": doc.archive_checksum,
            "archive_media_filename": doc.archive_filename,
+            "original_filename": doc.original_filename,
        }

        if doc.has_archive_version:
@@ -329,13 +337,15 @@ class DocumentViewSet(

    @action(methods=["get"], detail=True)
    def suggestions(self, request, pk=None):
-        try:
-            doc = Document.objects.get(pk=pk)
-        except Document.DoesNotExist:
-            raise Http404()
+        doc = get_object_or_404(Document, pk=pk)

        classifier = load_classifier()

+        gen = parse_date_generator(doc.filename, doc.content)
+        dates = sorted(
+            {i for i in itertools.islice(gen, settings.NUMBER_OF_SUGGESTED_DATES)},
+        )
+
        return Response(
            {
                "correspondents": [c.id for c in match_correspondents(doc, classifier)],
@@ -344,6 +354,9 @@ class DocumentViewSet(
                    dt.id for dt in match_document_types(doc, classifier)
                ],
                "storage_paths": [dt.id for dt in match_storage_paths(doc, classifier)],
+                "dates": [
+                    date.strftime("%Y-%m-%d") for date in dates if date is not None
+                ],
            },
        )

@@ -378,6 +391,67 @@ class DocumentViewSet(
        except (FileNotFoundError, Document.DoesNotExist):
            raise Http404()

+    def getComments(self, doc):
+        return [
+            {
+                "id": c.id,
+                "comment": c.comment,
+                "created": c.created,
+                "user": {
+                    "id": c.user.id,
+                    "username": c.user.username,
+                    "firstname": c.user.first_name,
+                    "lastname": c.user.last_name,
+                },
+            }
+            for c in Comment.objects.filter(document=doc).order_by("-created")
+        ]
+
+    @action(methods=["get", "post", "delete"], detail=True)
+    def comments(self, request, pk=None):
+        try:
+            doc = Document.objects.get(pk=pk)
+        except Document.DoesNotExist:
+            raise Http404()
+
+        currentUser = request.user
+
+        if request.method == "GET":
+            try:
+                return Response(self.getComments(doc))
+            except Exception as e:
+                logger.warning(f"An error occurred retrieving comments: {str(e)}")
+                return Response(
+                    {"error": "Error retreiving comments, check logs for more detail."},
+                )
+        elif request.method == "POST":
+            try:
+                c = Comment.objects.create(
+                    document=doc,
+                    comment=request.data["comment"],
+                    user=currentUser,
+                )
+                c.save()
+
+                return Response(self.getComments(doc))
+            except Exception as e:
+                logger.warning(f"An error occurred saving comment: {str(e)}")
+                return Response(
+                    {
+                        "error": "Error saving comment, check logs for more detail.",
+                    },
+                )
+        elif request.method == "DELETE":
+            comment = Comment.objects.get(id=int(request.GET.get("id")))
+            comment.delete()
+            return Response(self.getComments(doc))
+
+        return Response(
+            {
+                "error": "error",
+            },
+        )
+

 class SearchResultSerializer(DocumentSerializer):
    def to_representation(self, instance):
@@ -541,8 +615,7 @@ class PostDocumentView(GenericAPIView):

        task_id = str(uuid.uuid4())

-        async_task(
-            "documents.tasks.consume_file",
+        consume_file.delay(
            temp_filename,
            override_filename=doc_name,
            override_title=title,
@@ -550,7 +623,6 @@ class PostDocumentView(GenericAPIView):
            override_document_type_id=document_type_id,
            override_tag_ids=tag_ids,
            task_id=task_id,
-            task_name=os.path.basename(doc_name)[:100],
            override_created=created,
        )

@@ -709,42 +781,38 @@ class RemoteVersionView(GenericAPIView):
        remote_version = "0.0.0"
        is_greater_than_current = False
        current_version = packaging_version.parse(version.__full_version_str__)
-        # TODO: this can likely be removed when frontend settings are saved to DB
-        feature_is_set = settings.ENABLE_UPDATE_CHECK != "default"
-        if feature_is_set and settings.ENABLE_UPDATE_CHECK:
-            try:
-                req = urllib.request.Request(
-                    "https://api.github.com/repos/paperless-ngx/"
-                    "paperless-ngx/releases/latest",
-                )
-                # Ensure a JSON response
-                req.add_header("Accept", "application/json")
-
-                with urllib.request.urlopen(req) as response:
-                    remote = response.read().decode("utf-8")
-                try:
-                    remote_json = json.loads(remote)
-                    remote_version = remote_json["tag_name"]
-                    # Basically PEP 616 but that only went in 3.9
-                    if remote_version.startswith("ngx-"):
-                        remote_version = remote_version[len("ngx-") :]
-                except ValueError:
-                    logger.debug("An error occurred parsing remote version json")
-            except urllib.error.URLError:
-                logger.debug("An error occurred checking for available updates")
-
-            is_greater_than_current = (
-                packaging_version.parse(
-                    remote_version,
-                )
-                > current_version
+        try:
+            req = urllib.request.Request(
+                "https://api.github.com/repos/paperless-ngx/"
+                "paperless-ngx/releases/latest",
            )
+            # Ensure a JSON response
+            req.add_header("Accept", "application/json")
+
+            with urllib.request.urlopen(req) as response:
+                remote = response.read().decode("utf-8")
+            try:
+                remote_json = json.loads(remote)
+                remote_version = remote_json["tag_name"]
+                # Basically PEP 616 but that only went in 3.9
+                if remote_version.startswith("ngx-"):
+                    remote_version = remote_version[len("ngx-") :]
+            except ValueError:
+                logger.debug("An error occurred parsing remote version json")
+        except urllib.error.URLError:
+            logger.debug("An error occurred checking for available updates")
+
+        is_greater_than_current = (
+            packaging_version.parse(
+                remote_version,
+            )
+            > current_version
+        )

        return Response(
            {
                "version": remote_version,
                "update_available": is_greater_than_current,
-                "feature_is_set": feature_is_set,
            },
        )

@@ -777,15 +845,23 @@ class UiSettingsView(GenericAPIView):
        displayname = user.username
        if user.first_name or user.last_name:
            displayname = " ".join([user.first_name, user.last_name])
-        settings = {}
+        ui_settings = {}
        if hasattr(user, "ui_settings"):
-            settings = user.ui_settings.settings
+            ui_settings = user.ui_settings.settings
+        if "update_checking" in ui_settings:
+            ui_settings["update_checking"][
+                "backend_setting"
+            ] = settings.ENABLE_UPDATE_CHECK
+        else:
+            ui_settings["update_checking"] = {
+                "backend_setting": settings.ENABLE_UPDATE_CHECK,
+            }
        return Response(
            {
                "user_id": user.id,
                "username": user.username,
                "display_name": displayname,
-                "settings": settings,
+                "settings": ui_settings,
            },
        )

@@ -810,8 +886,9 @@ class TasksViewSet(ReadOnlyModelViewSet):
    queryset = (
        PaperlessTask.objects.filter(
            acknowledged=False,
+            attempted_task__isnull=False,
        )
-        .order_by("created")
+        .order_by("attempted_task__date_created")
        .reverse()
    )

--- a/src/locale/ar_AR/LC_MESSAGES/django.po
+++ b/src/locale/ar_AR/LC_MESSAGES/django.po
@@ -5,15 +5,15 @@ msgstr ""
 "POT-Creation-Date: 2022-07-08 14:11-0700\n"
 "PO-Revision-Date: 2022-07-08 22:07\n"
 "Last-Translator: \n"
-"Language-Team: Arabic, Saudi Arabia\n"
-"Language: ar_SA\n"
+"Language-Team: Arabic, Arabic\n"
+"Language: ar_AR\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
 "Plural-Forms: nplurals=6; plural=(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5);\n"
 "X-Crowdin-Project: paperless-ngx\n"
 "X-Crowdin-Project-ID: 500308\n"
-"X-Crowdin-Language: ar-SA\n"
+"X-Crowdin-Language: ar-AR\n"
 "X-Crowdin-File: /dev/src/locale/en_US/LC_MESSAGES/django.po\n"
 "X-Crowdin-File-ID: 14\n"

--- a/src/locale/be_BY/LC_MESSAGES/django.po
+++ b/src/locale/be_BY/LC_MESSAGES/django.po
@@ -3,7 +3,7 @@ msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-07-08 14:11-0700\n"
-"PO-Revision-Date: 2022-07-08 22:07\n"
+"PO-Revision-Date: 2022-07-29 20:44\n"
 "Last-Translator: \n"
 "Language-Team: Belarusian\n"
 "Language: be_BY\n"
@@ -100,7 +100,7 @@ msgstr "тыпы дакументаў"

 #: documents/models.py:90
 msgid "path"
-msgstr ""
+msgstr "шлях"

 #: documents/models.py:96 documents/models.py:124
 msgid "storage path"
--- a/src/locale/de_DE/LC_MESSAGES/django.po
+++ b/src/locale/de_DE/LC_MESSAGES/django.po
@@ -3,7 +3,7 @@ msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-07-08 14:11-0700\n"
-"PO-Revision-Date: 2022-07-08 22:07\n"
+"PO-Revision-Date: 2022-09-04 11:44\n"
 "Last-Translator: \n"
 "Language-Team: German\n"
 "Language: de_DE\n"
@@ -376,7 +376,7 @@ msgstr "Filterregeln"

 #: documents/models.py:521
 msgid "started"
-msgstr ""
+msgstr "gestartet"

 #: documents/serialisers.py:70
 #, python-format
@@ -402,7 +402,7 @@ msgstr "Paperless-ngx wird geladen..."

 #: documents/templates/index.html:79
 msgid "Still here?! Hmm, something might be wrong."
-msgstr "Du bist noch hier?! Hmm, da muss wohl etwas schief gelaufen sein."
+msgstr "Du bist noch hier? Hmm, da muss wohl etwas schiefgelaufen sein."

 #: documents/templates/index.html:79
 msgid "Here's a link to the docs."
@@ -654,7 +654,7 @@ msgstr "Als wichtig markieren, markierte E-Mails nicht verarbeiten"

 #: paperless_mail/models.py:68
 msgid "Tag the mail with specified tag, don't process tagged mails"
-msgstr ""
+msgstr "Markiere die Mail mit dem angegebenen Tag, verarbeite nicht markierte Mails"

 #: paperless_mail/models.py:71
 msgid "Use subject as title"
--- a/src/locale/fi_FI/LC_MESSAGES/django.po
+++ b/src/locale/fi_FI/LC_MESSAGES/django.po
@@ -3,7 +3,7 @@ msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-07-08 14:11-0700\n"
-"PO-Revision-Date: 2022-07-08 22:07\n"
+"PO-Revision-Date: 2022-09-06 20:21\n"
 "Last-Translator: \n"
 "Language-Team: Finnish\n"
 "Language: fi_FI\n"
@@ -376,7 +376,7 @@ msgstr "suodatussäännöt"

 #: documents/models.py:521
 msgid "started"
-msgstr ""
+msgstr "aloitettu"

 #: documents/serialisers.py:70
 #, python-format
@@ -638,11 +638,11 @@ msgstr "Prosessoi kaikki tiedostot, sisältäen \"inline\"-liitteet."

 #: paperless_mail/models.py:64
 msgid "Delete"
-msgstr ""
+msgstr "Poista"

 #: paperless_mail/models.py:65
 msgid "Move to specified folder"
-msgstr ""
+msgstr "Siirrä määritettyyn kansioon"

 #: paperless_mail/models.py:66
 msgid "Mark as read, don't process read mails"
@@ -650,117 +650,117 @@ msgstr "Merkitse luetuksi, älä prosessoi luettuja sähköposteja"

 #: paperless_mail/models.py:67
 msgid "Flag the mail, don't process flagged mails"
-msgstr ""
+msgstr "Liputa sähköposti, älä käsittele liputettuja sähköposteja"

 #: paperless_mail/models.py:68
 msgid "Tag the mail with specified tag, don't process tagged mails"
-msgstr ""
+msgstr "Merkitse viesti määrätyllä tagilla, älä käsittele tageja"

 #: paperless_mail/models.py:71
 msgid "Use subject as title"
-msgstr ""
+msgstr "Käytä aihetta otsikkona"

 #: paperless_mail/models.py:72
 msgid "Use attachment filename as title"
-msgstr ""
+msgstr "Käytä liitteen tiedostonimeä otsikkona"

 #: paperless_mail/models.py:75
 msgid "Do not assign a correspondent"
-msgstr ""
+msgstr "Älä määritä yhteyshenkilöä"

 #: paperless_mail/models.py:76
 msgid "Use mail address"
-msgstr ""
+msgstr "Käytä sähköpostiosoitetta"

 #: paperless_mail/models.py:77
 msgid "Use name (or mail address if not available)"
-msgstr ""
+msgstr "Käytä nimeä (tai sähköpostiosoitetta, jos ei ole saatavilla)"

 #: paperless_mail/models.py:78
 msgid "Use correspondent selected below"
-msgstr ""
+msgstr "Käytä alla valittua yhteyshenkilöä"

 #: paperless_mail/models.py:82
 msgid "order"
-msgstr ""
+msgstr "järjestys"

 #: paperless_mail/models.py:88
 msgid "account"
-msgstr ""
+msgstr "tili"

 #: paperless_mail/models.py:92
 msgid "folder"
-msgstr ""
+msgstr "kansio"

 #: paperless_mail/models.py:96
 msgid "Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server."
-msgstr ""
+msgstr "Alikansiot on erotettava erottimella, usein pisteellä ('.') tai kauttaviivalla ('/'), mutta se vaihtelee postipalvelimen mukaan."

 #: paperless_mail/models.py:102
 msgid "filter from"
-msgstr ""
+msgstr "suodata lähettäjä-kenttä"

 #: paperless_mail/models.py:108
 msgid "filter subject"
-msgstr ""
+msgstr "suodata aihe"

 #: paperless_mail/models.py:114
 msgid "filter body"
-msgstr ""
+msgstr "suodata runko"

 #: paperless_mail/models.py:121
 msgid "filter attachment filename"
-msgstr ""
+msgstr "suodata liitteen tiedostonimi"

 #: paperless_mail/models.py:126
 msgid "Only consume documents which entirely match this filename if specified. Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
-msgstr ""
+msgstr "Tuo vain dokumentit jotka täsmäävät täysin tiedostonimen suhteen. Jokerimerkit kuten *.pdf tai *lasku* ovat sallittuja. Kirjainkoko ei merkitse."

 #: paperless_mail/models.py:133
 msgid "maximum age"
-msgstr ""
+msgstr "ikä enintään"

 #: paperless_mail/models.py:135
 msgid "Specified in days."
-msgstr ""
+msgstr "Määritetty päivinä."

 #: paperless_mail/models.py:139
 msgid "attachment type"
-msgstr ""
+msgstr "liitteen tyyppi"

 #: paperless_mail/models.py:143
 msgid "Inline attachments include embedded images, so it's best to combine this option with a filename filter."
-msgstr ""
+msgstr "Sisäiset liitteet sisältävät upotettuja kuvia, joten on parasta yhdistää tämä vaihtoehto tiedostonimen suodattimeen."

 #: paperless_mail/models.py:149
 msgid "action"
-msgstr ""
+msgstr "toiminto"

 #: paperless_mail/models.py:155
 msgid "action parameter"
-msgstr ""
+msgstr "toiminnon parametrit"

 #: paperless_mail/models.py:160
 msgid "Additional parameter for the action selected above, i.e., the target folder of the move to folder action. Subfolders must be separated by dots."
-msgstr ""
+msgstr "Yllä valitun toiminnon lisäparametri eli siirrä hakemistoon -toiminnon kohdehakemisto. Alikansiot on erotettava toisistaan pisteillä."

 #: paperless_mail/models.py:168
 msgid "assign title from"
-msgstr ""
+msgstr "aseta otsikko kohteesta"

 #: paperless_mail/models.py:176
 msgid "assign this tag"
-msgstr ""
+msgstr "määritä tämä tunniste"

 #: paperless_mail/models.py:184
 msgid "assign this document type"
-msgstr ""
+msgstr "määritä tämä asiakirjatyyppi"

 #: paperless_mail/models.py:188
 msgid "assign correspondent from"
-msgstr ""
+msgstr "määritä kirjeenvaihtaja kohteesta"

 #: paperless_mail/models.py:198
 msgid "assign this correspondent"
-msgstr ""
+msgstr "määritä tämä kirjeenvaihtaja"

--- a/src/locale/fr_FR/LC_MESSAGES/django.po
+++ b/src/locale/fr_FR/LC_MESSAGES/django.po
@@ -3,7 +3,7 @@ msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-07-08 14:11-0700\n"
-"PO-Revision-Date: 2022-07-08 22:07\n"
+"PO-Revision-Date: 2022-09-07 21:41\n"
 "Last-Translator: \n"
 "Language-Team: French\n"
 "Language: fr_FR\n"
@@ -100,15 +100,15 @@ msgstr "types de document"

 #: documents/models.py:90
 msgid "path"
-msgstr ""
+msgstr "chemin"

 #: documents/models.py:96 documents/models.py:124
 msgid "storage path"
-msgstr ""
+msgstr "chemin de stockage"

 #: documents/models.py:97
 msgid "storage paths"
-msgstr ""
+msgstr "chemins de stockage"

 #: documents/models.py:105
 msgid "Unencrypted"
@@ -376,7 +376,7 @@ msgstr "règles de filtrage"

 #: documents/models.py:521
 msgid "started"
-msgstr ""
+msgstr "démarré"

 #: documents/serialisers.py:70
 #, python-format
@@ -394,7 +394,7 @@ msgstr "Type de fichier %(type)s non pris en charge"

 #: documents/serialisers.py:596
 msgid "Invalid variable detected."
-msgstr ""
+msgstr "Variable non valide détectée."

 #: documents/templates/index.html:78
 msgid "Paperless-ngx is loading..."
@@ -402,11 +402,11 @@ msgstr "Paperless-ngx est en cours de chargement..."

 #: documents/templates/index.html:79
 msgid "Still here?! Hmm, something might be wrong."
-msgstr ""
+msgstr "Toujours ici ? Hum, quelque chose a dû mal se passer."

 #: documents/templates/index.html:79
 msgid "Here's a link to the docs."
-msgstr ""
+msgstr "Lien vers la documentation."

 #: documents/templates/registration/logged_out.html:14
 msgid "Paperless-ngx signed out"
@@ -450,7 +450,7 @@ msgstr "Anglais (US)"

 #: paperless/settings.py:340
 msgid "Belarusian"
-msgstr ""
+msgstr "Biélorusse"

 #: paperless/settings.py:341
 msgid "Czech"
@@ -510,11 +510,11 @@ msgstr "Russe"

 #: paperless/settings.py:355
 msgid "Slovenian"
-msgstr ""
+msgstr "Slovène"

 #: paperless/settings.py:356
 msgid "Serbian"
-msgstr ""
+msgstr "Serbe"

 #: paperless/settings.py:357
 msgid "Swedish"
@@ -522,11 +522,11 @@ msgstr "Suédois"

 #: paperless/settings.py:358
 msgid "Turkish"
-msgstr ""
+msgstr "Turc"

 #: paperless/settings.py:359
 msgid "Chinese Simplified"
-msgstr ""
+msgstr "Chinois simplifié"

 #: paperless/urls.py:161
 msgid "Paperless-ngx administration"
@@ -654,7 +654,7 @@ msgstr "Marquer le courriel, ne pas traiter les courriels marqués"

 #: paperless_mail/models.py:68
 msgid "Tag the mail with specified tag, don't process tagged mails"
-msgstr ""
+msgstr "Affecter l’étiquette spécifée au courrier, ne pas traiter les courriels étiquetés"

 #: paperless_mail/models.py:71
 msgid "Use subject as title"
@@ -694,7 +694,7 @@ msgstr "répertoire"

 #: paperless_mail/models.py:96
 msgid "Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server."
-msgstr ""
+msgstr "Les sous-dossiers doivent être séparés par un délimiteurs, souvent un point ('.') ou un slash ('/'), en fonction du serveur de messagerie."

 #: paperless_mail/models.py:102
 msgid "filter from"
--- a/src/locale/it_IT/LC_MESSAGES/django.po
+++ b/src/locale/it_IT/LC_MESSAGES/django.po
@@ -3,7 +3,7 @@ msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-07-08 14:11-0700\n"
-"PO-Revision-Date: 2022-07-08 22:07\n"
+"PO-Revision-Date: 2022-08-03 11:24\n"
 "Last-Translator: \n"
 "Language-Team: Italian\n"
 "Language: it_IT\n"
@@ -376,7 +376,7 @@ msgstr "regole filtro"

 #: documents/models.py:521
 msgid "started"
-msgstr ""
+msgstr "avviato"

 #: documents/serialisers.py:70
 #, python-format
@@ -654,7 +654,7 @@ msgstr "Contrassegna la email, non elaborare le email elaborate."

 #: paperless_mail/models.py:68
 msgid "Tag the mail with specified tag, don't process tagged mails"
-msgstr ""
+msgstr "Etichetta la posta con il tag specificato, non processare le email etichettate"

 #: paperless_mail/models.py:71
 msgid "Use subject as title"
--- a/src/locale/nl_NL/LC_MESSAGES/django.po
+++ b/src/locale/nl_NL/LC_MESSAGES/django.po
@@ -3,7 +3,7 @@ msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-07-08 14:11-0700\n"
-"PO-Revision-Date: 2022-07-08 22:07\n"
+"PO-Revision-Date: 2022-08-26 20:54\n"
 "Last-Translator: \n"
 "Language-Team: Dutch\n"
 "Language: nl_NL\n"
@@ -100,15 +100,15 @@ msgstr "documenttypen"

 #: documents/models.py:90
 msgid "path"
-msgstr ""
+msgstr "pad"

 #: documents/models.py:96 documents/models.py:124
 msgid "storage path"
-msgstr ""
+msgstr "opslag pad"

 #: documents/models.py:97
 msgid "storage paths"
-msgstr ""
+msgstr "opslag paden"

 #: documents/models.py:105
 msgid "Unencrypted"
@@ -376,7 +376,7 @@ msgstr "filterregels"

 #: documents/models.py:521
 msgid "started"
-msgstr ""
+msgstr "gestart"

 #: documents/serialisers.py:70
 #, python-format
@@ -394,7 +394,7 @@ msgstr "Bestandstype %(type)s niet ondersteund"

 #: documents/serialisers.py:596
 msgid "Invalid variable detected."
-msgstr ""
+msgstr "Ongeldige variabele ontdekt."

 #: documents/templates/index.html:78
 msgid "Paperless-ngx is loading..."
@@ -402,7 +402,7 @@ msgstr "Paperless-ngx is aan het laden..."

 #: documents/templates/index.html:79
 msgid "Still here?! Hmm, something might be wrong."
-msgstr ""
+msgstr "Nog steeds hier?! Hmm, er kan iets mis zijn."

 #: documents/templates/index.html:79
 msgid "Here's a link to the docs."
@@ -450,7 +450,7 @@ msgstr "Engels (US)"

 #: paperless/settings.py:340
 msgid "Belarusian"
-msgstr ""
+msgstr "Wit-Russisch"

 #: paperless/settings.py:341
 msgid "Czech"
@@ -510,11 +510,11 @@ msgstr "Russisch"

 #: paperless/settings.py:355
 msgid "Slovenian"
-msgstr ""
+msgstr "Sloveens"

 #: paperless/settings.py:356
 msgid "Serbian"
-msgstr ""
+msgstr "Servisch"

 #: paperless/settings.py:357
 msgid "Swedish"
@@ -522,11 +522,11 @@ msgstr "Zweeds"

 #: paperless/settings.py:358
 msgid "Turkish"
-msgstr ""
+msgstr "Turks"

 #: paperless/settings.py:359
 msgid "Chinese Simplified"
-msgstr ""
+msgstr "Chinees (vereenvoudigd)"

 #: paperless/urls.py:161
 msgid "Paperless-ngx administration"
@@ -654,7 +654,7 @@ msgstr "Markeer de mail, verwerk geen mails met markering"

 #: paperless_mail/models.py:68
 msgid "Tag the mail with specified tag, don't process tagged mails"
-msgstr ""
+msgstr "Tag de mail met de opgegeven tag, verwerk geen getagde mails"

 #: paperless_mail/models.py:71
 msgid "Use subject as title"
@@ -694,7 +694,7 @@ msgstr "map"

 #: paperless_mail/models.py:96
 msgid "Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server."
-msgstr ""
+msgstr "Submappen moeten gescheiden worden door een scheidingsteken, vaak een punt ('.') of slash ('/'), maar het varieert per mailserver."

 #: paperless_mail/models.py:102
 msgid "filter from"
--- a/src/locale/no_NO/LC_MESSAGES/django.po
+++ b/src/locale/no_NO/LC_MESSAGES/django.po
@@ -3,7 +3,7 @@ msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-07-08 14:11-0700\n"
-"PO-Revision-Date: 2022-07-08 22:07\n"
+"PO-Revision-Date: 2022-08-03 08:59\n"
 "Last-Translator: \n"
 "Language-Team: Norwegian\n"
 "Language: no_NO\n"
@@ -220,7 +220,7 @@ msgstr "kritisk"

 #: documents/models.py:325
 msgid "group"
-msgstr ""
+msgstr "gruppe"

 #: documents/models.py:327
 msgid "message"
@@ -228,11 +228,11 @@ msgstr "melding"

 #: documents/models.py:330
 msgid "level"
-msgstr ""
+msgstr "nivå"

 #: documents/models.py:339
 msgid "log"
-msgstr "log"
+msgstr "Logg"

 #: documents/models.py:340
 msgid "logs"
@@ -240,11 +240,11 @@ msgstr "logger"

 #: documents/models.py:350 documents/models.py:403
 msgid "saved view"
-msgstr ""
+msgstr "lagret visning"

 #: documents/models.py:351
 msgid "saved views"
-msgstr ""
+msgstr "lagrede visninger"

 #: documents/models.py:353
 msgid "user"
@@ -252,35 +252,35 @@ msgstr "bruker"

 #: documents/models.py:357
 msgid "show on dashboard"
-msgstr ""
+msgstr "vis på dashbordet"

 #: documents/models.py:360
 msgid "show in sidebar"
-msgstr ""
+msgstr "vis i sidestolpen"

 #: documents/models.py:364
 msgid "sort field"
-msgstr ""
+msgstr "sorter felt"

 #: documents/models.py:369
 msgid "sort reverse"
-msgstr ""
+msgstr "sorter på baksiden"

 #: documents/models.py:374
 msgid "title contains"
-msgstr ""
+msgstr "tittelen inneholder"

 #: documents/models.py:375
 msgid "content contains"
-msgstr ""
+msgstr "innholdet inneholder"

 #: documents/models.py:376
 msgid "ASN is"
-msgstr ""
+msgstr "ASN er"

 #: documents/models.py:377
 msgid "correspondent is"
-msgstr ""
+msgstr "tilsvarendet er"

 #: documents/models.py:378
 msgid "document type is"
@@ -288,15 +288,15 @@ msgstr "dokumenttype er"

 #: documents/models.py:379
 msgid "is in inbox"
-msgstr ""
+msgstr "er i innboksen"

 #: documents/models.py:380
 msgid "has tag"
-msgstr ""
+msgstr "har tagg"

 #: documents/models.py:381
 msgid "has any tag"
-msgstr ""
+msgstr "har en tag"

 #: documents/models.py:382
 msgid "created before"
@@ -304,125 +304,125 @@ msgstr "opprettet før"

 #: documents/models.py:383
 msgid "created after"
-msgstr ""
+msgstr "opprettet etter"

 #: documents/models.py:384
 msgid "created year is"
-msgstr ""
+msgstr "opprettet år er"

 #: documents/models.py:385
 msgid "created month is"
-msgstr ""
+msgstr "opprettet måned er"

 #: documents/models.py:386
 msgid "created day is"
-msgstr ""
+msgstr "opprettet dag er"

 #: documents/models.py:387
 msgid "added before"
-msgstr ""
+msgstr "lagt til før"

 #: documents/models.py:388
 msgid "added after"
-msgstr ""
+msgstr "lagt til etter"

 #: documents/models.py:389
 msgid "modified before"
-msgstr ""
+msgstr "endret før"

 #: documents/models.py:390
 msgid "modified after"
-msgstr ""
+msgstr "endret etter"

 #: documents/models.py:391
 msgid "does not have tag"
-msgstr ""
+msgstr "har ikke tagg"

 #: documents/models.py:392
 msgid "does not have ASN"
-msgstr ""
+msgstr "har ikke ASN"

 #: documents/models.py:393
 msgid "title or content contains"
-msgstr ""
+msgstr "tittel eller innhold inneholder"

 #: documents/models.py:394
 msgid "fulltext query"
-msgstr ""
+msgstr "full tekst spørring"

 #: documents/models.py:395
 msgid "more like this"
-msgstr ""
+msgstr "mer som dette"

 #: documents/models.py:396
 msgid "has tags in"
-msgstr ""
+msgstr "har tags i"

 #: documents/models.py:406
 msgid "rule type"
-msgstr ""
+msgstr "Type regel"

 #: documents/models.py:408
 msgid "value"
-msgstr ""
+msgstr "verdi"

 #: documents/models.py:411
 msgid "filter rule"
-msgstr ""
+msgstr "filtrer regel"

 #: documents/models.py:412
 msgid "filter rules"
-msgstr ""
+msgstr "filtrer regler"

 #: documents/models.py:521
 msgid "started"
-msgstr ""
+msgstr "startet"

 #: documents/serialisers.py:70
 #, python-format
 msgid "Invalid regular expression: %(error)s"
-msgstr ""
+msgstr "Ugyldig regulært uttrykk: %(error)s"

 #: documents/serialisers.py:191
 msgid "Invalid color."
-msgstr ""
+msgstr "Ugyldig farge."

 #: documents/serialisers.py:515
 #, python-format
 msgid "File type %(type)s not supported"
-msgstr ""
+msgstr "Filtype %(type)s støttes ikke"

 #: documents/serialisers.py:596
 msgid "Invalid variable detected."
-msgstr ""
+msgstr "Ugyldig variabel oppdaget."

 #: documents/templates/index.html:78
 msgid "Paperless-ngx is loading..."
-msgstr ""
+msgstr "Paperless-ngx laster..."

 #: documents/templates/index.html:79
 msgid "Still here?! Hmm, something might be wrong."
-msgstr ""
+msgstr "Fortsatt her?! Hmm, noe kan være galt."

 #: documents/templates/index.html:79
 msgid "Here's a link to the docs."
-msgstr ""
+msgstr "Her er en lenke til dokkene."

 #: documents/templates/registration/logged_out.html:14
 msgid "Paperless-ngx signed out"
-msgstr ""
+msgstr "Paperless-ngx logget ut"

 #: documents/templates/registration/logged_out.html:59
 msgid "You have been successfully logged out. Bye!"
-msgstr ""
+msgstr "Du har blitt logget ut. Av!"

 #: documents/templates/registration/logged_out.html:60
 msgid "Sign in again"
-msgstr ""
+msgstr "Logg inn igjen"

 #: documents/templates/registration/login.html:15
 msgid "Paperless-ngx sign in"
-msgstr ""
+msgstr "Paperless-ngx-tegn inn"

 #: documents/templates/registration/login.html:61
 msgid "Please sign in."
@@ -450,63 +450,63 @@ msgstr "Engelsk (US)"

 #: paperless/settings.py:340
 msgid "Belarusian"
-msgstr "Belarusian"
+msgstr "Hviterussisk"

 #: paperless/settings.py:341
 msgid "Czech"
-msgstr "Czech"
+msgstr "Tsjekkisk"

 #: paperless/settings.py:342
 msgid "Danish"
-msgstr "Danish"
+msgstr "Dansk"

 #: paperless/settings.py:343
 msgid "German"
-msgstr "German"
+msgstr "Tysk"

 #: paperless/settings.py:344
 msgid "English (GB)"
-msgstr "English (GB)"
+msgstr "Engelsk (GB)"

 #: paperless/settings.py:345
 msgid "Spanish"
-msgstr "Spanish"
+msgstr "Spansk"

 #: paperless/settings.py:346
 msgid "French"
-msgstr "French"
+msgstr "Fransk"

 #: paperless/settings.py:347
 msgid "Italian"
-msgstr "Italian"
+msgstr "Italiensk"

 #: paperless/settings.py:348
 msgid "Luxembourgish"
-msgstr "Luxembourgish"
+msgstr "Luxembourgsk"

 #: paperless/settings.py:349
 msgid "Dutch"
-msgstr "Dutch"
+msgstr "Nederlandsk"

 #: paperless/settings.py:350
 msgid "Polish"
-msgstr "Polish"
+msgstr "Polsk"

 #: paperless/settings.py:351
 msgid "Portuguese (Brazil)"
-msgstr "Portuguese (Brazil)"
+msgstr "Portugisisk (Brasil)"

 #: paperless/settings.py:352
 msgid "Portuguese"
-msgstr "Portuguese"
+msgstr "Portugisisk"

 #: paperless/settings.py:353
 msgid "Romanian"
-msgstr "Romanian"
+msgstr "Rumensk"

 #: paperless/settings.py:354
 msgid "Russian"
-msgstr "Russian"
+msgstr "Russisk"

 #: paperless/settings.py:355
 msgid "Slovenian"
@@ -514,19 +514,19 @@ msgstr "Slovenian"

 #: paperless/settings.py:356
 msgid "Serbian"
-msgstr "Serbian"
+msgstr "Serbisk"

 #: paperless/settings.py:357
 msgid "Swedish"
-msgstr "Swedish"
+msgstr "Svensk"

 #: paperless/settings.py:358
 msgid "Turkish"
-msgstr "Turkish"
+msgstr "Tyrkisk"

 #: paperless/settings.py:359
 msgid "Chinese Simplified"
-msgstr "Chinese Simplified"
+msgstr "Kinesisk forenklet"

 #: paperless/urls.py:161
 msgid "Paperless-ngx administration"
@@ -542,7 +542,7 @@ msgstr "Avanserte innstillinger"

 #: paperless_mail/admin.py:47
 msgid "Filter"
-msgstr "Filter"
+msgstr "Filtrer"

 #: paperless_mail/admin.py:50
 msgid "Paperless will only process mails that match ALL of the filters given below."
@@ -554,19 +554,19 @@ msgstr "Handlinger"

 #: paperless_mail/admin.py:67
 msgid "The action applied to the mail. This action is only performed when documents were consumed from the mail. Mails without attachments will remain entirely untouched."
-msgstr ""
+msgstr "Handlingen som brukes på e-posten. Denne handlingen blir bare utført når dokumenter blir forbrukt av e-posten. Mailer uten vedlegg forblir helt urørte."

 #: paperless_mail/admin.py:75
 msgid "Metadata"
-msgstr "Metadata"
+msgstr "Nøkkeldata"

 #: paperless_mail/admin.py:78
 msgid "Assign metadata to documents consumed from this rule automatically. If you do not assign tags, types or correspondents here, paperless will still process all matching rules that you have defined."
-msgstr ""
+msgstr "Tilordne metadata til dokumenter som brukes fra denne regelen automatisk. Hvis du ikke tilordner etiketter, typer eller korrespondenter her, vil papirløs fremdeles behandle alle matchende regler som du har definert."

 #: paperless_mail/apps.py:8
 msgid "Paperless mail"
-msgstr ""
+msgstr "Paperløst e-post"

 #: paperless_mail/models.py:8
 msgid "mail account"
@@ -586,23 +586,23 @@ msgstr "Bruk SSL"

 #: paperless_mail/models.py:14
 msgid "Use STARTTLS"
-msgstr ""
+msgstr "Bruk STARTTLS"

 #: paperless_mail/models.py:18
 msgid "IMAP server"
-msgstr ""
+msgstr "IMAP tjener"

 #: paperless_mail/models.py:21
 msgid "IMAP port"
-msgstr ""
+msgstr "IMAP port"

 #: paperless_mail/models.py:25
 msgid "This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections."
-msgstr ""
+msgstr "Dette er vanligvis 143 for ukrypterte og STARTTLS-tilkoblinger, og 993 for SSL-tilkoblinger."

 #: paperless_mail/models.py:31
 msgid "IMAP security"
-msgstr ""
+msgstr "IMAP sikkerhet"

 #: paperless_mail/models.py:36
 msgid "username"
@@ -618,7 +618,7 @@ msgstr "tegnsett"

 #: paperless_mail/models.py:45
 msgid "The character set to use when communicating with the mail server, such as 'UTF-8' or 'US-ASCII'."
-msgstr ""
+msgstr "Tegnet som skal brukes ved kommunikasjon med e-posttjeneren, som for eksempel 'UTF-8' eller 'US-ASCII'."

 #: paperless_mail/models.py:56
 msgid "mail rule"
@@ -626,141 +626,141 @@ msgstr "e-post regel"

 #: paperless_mail/models.py:57
 msgid "mail rules"
-msgstr ""
+msgstr "Epost regler"

 #: paperless_mail/models.py:60
 msgid "Only process attachments."
-msgstr ""
+msgstr "Bare behandle vedlegg."

 #: paperless_mail/models.py:61
 msgid "Process all files, including 'inline' attachments."
-msgstr ""
+msgstr "Behandle alle filer, inkludert \"inline\"-vedlegg."

 #: paperless_mail/models.py:64
 msgid "Delete"
-msgstr ""
+msgstr "Slett"

 #: paperless_mail/models.py:65
 msgid "Move to specified folder"
-msgstr ""
+msgstr "Flytt til angitt mappe"

 #: paperless_mail/models.py:66
 msgid "Mark as read, don't process read mails"
-msgstr ""
+msgstr "Merk som lest og ikke behandle e-post"

 #: paperless_mail/models.py:67
 msgid "Flag the mail, don't process flagged mails"
-msgstr ""
+msgstr "Marker posten, ikke behandle flaggede meldinger"

 #: paperless_mail/models.py:68
 msgid "Tag the mail with specified tag, don't process tagged mails"
-msgstr ""
+msgstr "Merk e-post med angitte tag, ikke bruk merkede meldinger"

 #: paperless_mail/models.py:71
 msgid "Use subject as title"
-msgstr ""
+msgstr "Bruk emne som tittel"

 #: paperless_mail/models.py:72
 msgid "Use attachment filename as title"
-msgstr ""
+msgstr "Bruk vedlagte filnavn som tittel"

 #: paperless_mail/models.py:75
 msgid "Do not assign a correspondent"
-msgstr ""
+msgstr "Ikke tildel en korrespondent"

 #: paperless_mail/models.py:76
 msgid "Use mail address"
-msgstr ""
+msgstr "Bruk e-postadresse"

 #: paperless_mail/models.py:77
 msgid "Use name (or mail address if not available)"
-msgstr ""
+msgstr "Bruk navn (eller e-postadresse hvis det ikke er tilgjengelig)"

 #: paperless_mail/models.py:78
 msgid "Use correspondent selected below"
-msgstr ""
+msgstr "Bruk tilsvarende valgt nedenfor"

 #: paperless_mail/models.py:82
 msgid "order"
-msgstr ""
+msgstr "ordre"

 #: paperless_mail/models.py:88
 msgid "account"
-msgstr ""
+msgstr "konto"

 #: paperless_mail/models.py:92
 msgid "folder"
-msgstr ""
+msgstr "mappe"

 #: paperless_mail/models.py:96
 msgid "Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server."
-msgstr ""
+msgstr "Undermapper må være atskilt av en skilletegn, ofte en punktum ('.') eller skråstrek ('/'), men den varierer fra e-postserver."

 #: paperless_mail/models.py:102
 msgid "filter from"
-msgstr ""
+msgstr "filtrer fra"

 #: paperless_mail/models.py:108
 msgid "filter subject"
-msgstr ""
+msgstr "filtrer emne"

 #: paperless_mail/models.py:114
 msgid "filter body"
-msgstr ""
+msgstr "filtrer innhold"

 #: paperless_mail/models.py:121
 msgid "filter attachment filename"
-msgstr ""
+msgstr "filtrer vedlagte filnavn"

 #: paperless_mail/models.py:126
 msgid "Only consume documents which entirely match this filename if specified. Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
-msgstr ""
+msgstr "Bare bruke dokumenter som samsvarer med dette filnavnet hvis angitt. Jokertegn som *.pdf eller *faktura* er tillatt. Saksfortegnet."

 #: paperless_mail/models.py:133
 msgid "maximum age"
-msgstr ""
+msgstr "maksimal alder"

 #: paperless_mail/models.py:135
 msgid "Specified in days."
-msgstr ""
+msgstr "Spesifisert i dager"

 #: paperless_mail/models.py:139
 msgid "attachment type"
-msgstr ""
+msgstr "vedlegg type"

 #: paperless_mail/models.py:143
 msgid "Inline attachments include embedded images, so it's best to combine this option with a filename filter."
-msgstr ""
+msgstr "Innebygde vedlegg inkluderer innebygde bilder, så det er best å kombinere dette alternativet med et filter."

 #: paperless_mail/models.py:149
 msgid "action"
-msgstr ""
+msgstr "handling"

 #: paperless_mail/models.py:155
 msgid "action parameter"
-msgstr ""
+msgstr "parameter for handling"

 #: paperless_mail/models.py:160
 msgid "Additional parameter for the action selected above, i.e., the target folder of the move to folder action. Subfolders must be separated by dots."
-msgstr ""
+msgstr "Ytterligere parameter for handlingen valgt ovenfor, dvs. målmappen for flytting til mappehandling. Undermapper må separeres med punkter."

 #: paperless_mail/models.py:168
 msgid "assign title from"
-msgstr ""
+msgstr "tilordne tittel fra"

 #: paperless_mail/models.py:176
 msgid "assign this tag"
-msgstr ""
+msgstr "tilordne denne taggen"

 #: paperless_mail/models.py:184
 msgid "assign this document type"
-msgstr ""
+msgstr "tilordne denne dokumenttypen"

 #: paperless_mail/models.py:188
 msgid "assign correspondent from"
-msgstr ""
+msgstr "Tildel korrespondent fra"

 #: paperless_mail/models.py:198
 msgid "assign this correspondent"
-msgstr ""
+msgstr "Tildel denne korrespondenten"

--- a/src/locale/pl_PL/LC_MESSAGES/django.po
+++ b/src/locale/pl_PL/LC_MESSAGES/django.po
@@ -3,7 +3,7 @@ msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-07-08 14:11-0700\n"
-"PO-Revision-Date: 2022-07-08 22:07\n"
+"PO-Revision-Date: 2022-08-17 11:20\n"
 "Last-Translator: \n"
 "Language-Team: Polish\n"
 "Language: pl_PL\n"
@@ -376,7 +376,7 @@ msgstr "reguły filtrowania"

 #: documents/models.py:521
 msgid "started"
-msgstr ""
+msgstr "start"

 #: documents/serialisers.py:70
 #, python-format
@@ -654,7 +654,7 @@ msgstr "Oznacz wiadomość, nie przetwarzaj oznaczonych wiadomości"

 #: paperless_mail/models.py:68
 msgid "Tag the mail with specified tag, don't process tagged mails"
-msgstr ""
+msgstr "Oznacz pocztę z podanym tagiem, nie przetwarzaj otagowanych wiadomości"

 #: paperless_mail/models.py:71
 msgid "Use subject as title"
--- a/src/locale/ru_RU/LC_MESSAGES/django.po
+++ b/src/locale/ru_RU/LC_MESSAGES/django.po
@@ -3,7 +3,7 @@ msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-07-08 14:11-0700\n"
-"PO-Revision-Date: 2022-07-08 22:07\n"
+"PO-Revision-Date: 2022-08-03 16:12\n"
 "Last-Translator: \n"
 "Language-Team: Russian\n"
 "Language: ru_RU\n"
@@ -100,15 +100,15 @@ msgstr "типы документов"

 #: documents/models.py:90
 msgid "path"
-msgstr ""
+msgstr "путь"

 #: documents/models.py:96 documents/models.py:124
 msgid "storage path"
-msgstr ""
+msgstr "путь к хранилищу"

 #: documents/models.py:97
 msgid "storage paths"
-msgstr ""
+msgstr "пути хранения"

 #: documents/models.py:105
 msgid "Unencrypted"
@@ -376,7 +376,7 @@ msgstr "правила фильтрации"

 #: documents/models.py:521
 msgid "started"
-msgstr ""
+msgstr "запущено"

 #: documents/serialisers.py:70
 #, python-format
@@ -394,7 +394,7 @@ msgstr "Тип файла %(type)s не поддерживается"

 #: documents/serialisers.py:596
 msgid "Invalid variable detected."
-msgstr ""
+msgstr "Обнаружена неверная переменная."

 #: documents/templates/index.html:78
 msgid "Paperless-ngx is loading..."
@@ -402,11 +402,11 @@ msgstr "Paperless-ngx загружается..."

 #: documents/templates/index.html:79
 msgid "Still here?! Hmm, something might be wrong."
-msgstr ""
+msgstr "Все еще здесь?! Хмм, возможно что-то не так."

 #: documents/templates/index.html:79
 msgid "Here's a link to the docs."
-msgstr ""
+msgstr "Вот ссылка на документацию."

 #: documents/templates/registration/logged_out.html:14
 msgid "Paperless-ngx signed out"
@@ -450,7 +450,7 @@ msgstr "Английский (США)"

 #: paperless/settings.py:340
 msgid "Belarusian"
-msgstr ""
+msgstr "Белорусский"

 #: paperless/settings.py:341
 msgid "Czech"
@@ -510,11 +510,11 @@ msgstr "Русский"

 #: paperless/settings.py:355
 msgid "Slovenian"
-msgstr ""
+msgstr "Словенский"

 #: paperless/settings.py:356
 msgid "Serbian"
-msgstr ""
+msgstr "Сербский"

 #: paperless/settings.py:357
 msgid "Swedish"
@@ -522,11 +522,11 @@ msgstr "Шведский"

 #: paperless/settings.py:358
 msgid "Turkish"
-msgstr ""
+msgstr "Турецкий"

 #: paperless/settings.py:359
 msgid "Chinese Simplified"
-msgstr ""
+msgstr "Китайский упрощенный"

 #: paperless/urls.py:161
 msgid "Paperless-ngx administration"
@@ -654,7 +654,7 @@ msgstr "Пометить почту, не обрабатывать помече

 #: paperless_mail/models.py:68
 msgid "Tag the mail with specified tag, don't process tagged mails"
-msgstr ""
+msgstr "Отметить почту указанным тегом, не обрабатывать помеченные письма"

 #: paperless_mail/models.py:71
 msgid "Use subject as title"
@@ -694,7 +694,7 @@ msgstr "каталог"

 #: paperless_mail/models.py:96
 msgid "Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server."
-msgstr ""
+msgstr "Подпапки должны быть отделены разделителем, часто точкой ('.') или косой чертой ('/'), но это зависит от почтового сервера."

 #: paperless_mail/models.py:102
 msgid "filter from"
--- a/src/locale/sl_SI/LC_MESSAGES/django.po
+++ b/src/locale/sl_SI/LC_MESSAGES/django.po
@@ -3,7 +3,7 @@ msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-07-08 14:11-0700\n"
-"PO-Revision-Date: 2022-07-08 22:07\n"
+"PO-Revision-Date: 2022-08-25 12:46\n"
 "Last-Translator: \n"
 "Language-Team: Slovenian\n"
 "Language: sl_SI\n"
@@ -100,15 +100,15 @@ msgstr "vrste dokumentov"

 #: documents/models.py:90
 msgid "path"
-msgstr ""
+msgstr "pot"

 #: documents/models.py:96 documents/models.py:124
 msgid "storage path"
-msgstr ""
+msgstr "pot do shrambe"

 #: documents/models.py:97
 msgid "storage paths"
-msgstr ""
+msgstr "poti do shrambe"

 #: documents/models.py:105
 msgid "Unencrypted"
@@ -376,7 +376,7 @@ msgstr "filtriraj pravila"

 #: documents/models.py:521
 msgid "started"
-msgstr ""
+msgstr "zagnano"

 #: documents/serialisers.py:70
 #, python-format
@@ -394,7 +394,7 @@ msgstr "Vrsta datoteke %(type)s ni podprta"

 #: documents/serialisers.py:596
 msgid "Invalid variable detected."
-msgstr ""
+msgstr "Zaznani neveljavni znaki."

 #: documents/templates/index.html:78
 msgid "Paperless-ngx is loading..."
@@ -402,11 +402,11 @@ msgstr "Paperless-ngx se nalaga..."

 #: documents/templates/index.html:79
 msgid "Still here?! Hmm, something might be wrong."
-msgstr ""
+msgstr "Še vedno tam? Hmm, kot kaže je šlo nekaj narobe."

 #: documents/templates/index.html:79
 msgid "Here's a link to the docs."
-msgstr ""
+msgstr "Tu je povezava do dokumentacije."

 #: documents/templates/registration/logged_out.html:14
 msgid "Paperless-ngx signed out"
@@ -450,7 +450,7 @@ msgstr "Angleščina (ZDA)"

 #: paperless/settings.py:340
 msgid "Belarusian"
-msgstr ""
+msgstr "Beloruščina"

 #: paperless/settings.py:341
 msgid "Czech"
@@ -510,11 +510,11 @@ msgstr "Ruščina"

 #: paperless/settings.py:355
 msgid "Slovenian"
-msgstr ""
+msgstr "Slovenščina"

 #: paperless/settings.py:356
 msgid "Serbian"
-msgstr ""
+msgstr "Srbščina"

 #: paperless/settings.py:357
 msgid "Swedish"
@@ -522,11 +522,11 @@ msgstr "Švedščina"

 #: paperless/settings.py:358
 msgid "Turkish"
-msgstr ""
+msgstr "Turščina"

 #: paperless/settings.py:359
 msgid "Chinese Simplified"
-msgstr ""
+msgstr "Poenostavljena kitajščina"

 #: paperless/urls.py:161
 msgid "Paperless-ngx administration"
@@ -654,7 +654,7 @@ msgstr "Označite pošto z zastavico, ne obdelujte označene pošte"

 #: paperless_mail/models.py:68
 msgid "Tag the mail with specified tag, don't process tagged mails"
-msgstr ""
+msgstr "Označi pošto s določeno oznako, ne procesiraj označene pošte"

 #: paperless_mail/models.py:71
 msgid "Use subject as title"
@@ -694,7 +694,7 @@ msgstr "mapa"

 #: paperless_mail/models.py:96
 msgid "Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server."
-msgstr ""
+msgstr "Podmape morajo biti ločene s znakom, običajno je to pika (.) ali slash ('/'), je pa odvisno od poštnega strežnika."

 #: paperless_mail/models.py:102
 msgid "filter from"
--- a/src/locale/sr_CS/LC_MESSAGES/django.po
+++ b/src/locale/sr_CS/LC_MESSAGES/django.po
@@ -3,7 +3,7 @@ msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-07-08 14:11-0700\n"
-"PO-Revision-Date: 2022-07-08 22:07\n"
+"PO-Revision-Date: 2022-08-04 23:55\n"
 "Last-Translator: \n"
 "Language-Team: Serbian (Latin)\n"
 "Language: sr_CS\n"
@@ -60,15 +60,15 @@ msgstr "algoritam podudaranja"

 #: documents/models.py:47
 msgid "is insensitive"
-msgstr ""
+msgstr "bez razlike veliko/malo slovo"

 #: documents/models.py:60 documents/models.py:115
 msgid "correspondent"
-msgstr "dopisnik"
+msgstr "korespodent"

 #: documents/models.py:61
 msgid "correspondents"
-msgstr "dopisnici"
+msgstr "korespodenti"

 #: documents/models.py:66
 msgid "color"
@@ -80,7 +80,7 @@ msgstr "je oznaka prijemnog sandučeta"

 #: documents/models.py:72
 msgid "Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags."
-msgstr ""
+msgstr "Označava ovu oznaku kao oznaku prijemnog sandučeta (inbox): Svi novoobrađeni dokumenti će biti označeni oznakama prijemnog sandučeta (inbox)."

 #: documents/models.py:78
 msgid "tag"
@@ -100,23 +100,23 @@ msgstr "tipovi dokumenta"

 #: documents/models.py:90
 msgid "path"
-msgstr ""
+msgstr "putanja"

 #: documents/models.py:96 documents/models.py:124
 msgid "storage path"
-msgstr ""
+msgstr "putanja skladišta"

 #: documents/models.py:97
 msgid "storage paths"
-msgstr ""
+msgstr "putanja skladišta"

 #: documents/models.py:105
 msgid "Unencrypted"
-msgstr ""
+msgstr "Nešifrovano"

 #: documents/models.py:106
 msgid "Encrypted with GNU Privacy Guard"
-msgstr ""
+msgstr "Šifrovano pomoću GNU Privacy Guard"

 #: documents/models.py:127
 msgid "title"
@@ -128,7 +128,7 @@ msgstr "sadržaj"

 #: documents/models.py:142
 msgid "The raw, text-only data of the document. This field is primarily used for searching."
-msgstr ""
+msgstr "Neobrađeni tekstualni podaci dokumenta. Ovo se polje koristi prvenstveno za pretraživanje."

 #: documents/models.py:147
 msgid "mime type"
@@ -172,7 +172,7 @@ msgstr "naziv fajla"

 #: documents/models.py:204
 msgid "Current filename in storage"
-msgstr ""
+msgstr "Trenutni naziv sačuvane datoteke"

 #: documents/models.py:208
 msgid "archive filename"
@@ -180,7 +180,7 @@ msgstr "naziv fajla arhive"

 #: documents/models.py:214
 msgid "Current archive filename in storage"
-msgstr ""
+msgstr "Trenutni naziv arhivirane sačuvane datoteke"

 #: documents/models.py:218
 msgid "archive serial number"
@@ -188,7 +188,7 @@ msgstr "arhivski serijski broj"

 #: documents/models.py:224
 msgid "The position of this document in your physical document archive."
-msgstr ""
+msgstr "Položaj ovog dokumenta u vašoj fizičkoj arhivi dokumenata."

 #: documents/models.py:230
 msgid "document"
@@ -264,7 +264,7 @@ msgstr "polje za sortiranje"

 #: documents/models.py:369
 msgid "sort reverse"
-msgstr ""
+msgstr "obrnuto sortiranje"

 #: documents/models.py:374
 msgid "title contains"
@@ -280,7 +280,7 @@ msgstr "ASN je"

 #: documents/models.py:377
 msgid "correspondent is"
-msgstr "dopisnik je"
+msgstr "korespodent je"

 #: documents/models.py:378
 msgid "document type is"
@@ -348,7 +348,7 @@ msgstr "naslov i sadržaj sadrži"

 #: documents/models.py:394
 msgid "fulltext query"
-msgstr ""
+msgstr "upit za ceo tekst"

 #: documents/models.py:395
 msgid "more like this"
@@ -376,12 +376,12 @@ msgstr "filter pravila"

 #: documents/models.py:521
 msgid "started"
-msgstr ""
+msgstr "pokrenuto"

 #: documents/serialisers.py:70
 #, python-format
 msgid "Invalid regular expression: %(error)s"
-msgstr ""
+msgstr "Nevažeći regularni izraz: %(error)s"

 #: documents/serialisers.py:191
 msgid "Invalid color."
@@ -390,11 +390,11 @@ msgstr "Nevažeća boja."
 #: documents/serialisers.py:515
 #, python-format
 msgid "File type %(type)s not supported"
-msgstr ""
+msgstr "Vrsta datoteke %(type)s nije podržana"

 #: documents/serialisers.py:596
 msgid "Invalid variable detected."
-msgstr ""
+msgstr "Otkrivena je nevažeća promenljiva."

 #: documents/templates/index.html:78
 msgid "Paperless-ngx is loading..."
@@ -402,19 +402,19 @@ msgstr "Paperless-ngx se učitava..."

 #: documents/templates/index.html:79
 msgid "Still here?! Hmm, something might be wrong."
-msgstr ""
+msgstr "Još uvek si ovde?! Hmm, možda nešto nije u redu."

 #: documents/templates/index.html:79
 msgid "Here's a link to the docs."
-msgstr ""
+msgstr "Veze ka dokumentima."

 #: documents/templates/registration/logged_out.html:14
 msgid "Paperless-ngx signed out"
-msgstr ""
+msgstr "Paperless-ngx odjavljen"

 #: documents/templates/registration/logged_out.html:59
 msgid "You have been successfully logged out. Bye!"
-msgstr ""
+msgstr "Uspešno ste se odjavili!"

 #: documents/templates/registration/logged_out.html:60
 msgid "Sign in again"
@@ -422,7 +422,7 @@ msgstr "Prijavitе sе ponovo"

 #: documents/templates/registration/login.html:15
 msgid "Paperless-ngx sign in"
-msgstr ""
+msgstr "Paperless-ngx prijava"

 #: documents/templates/registration/login.html:61
 msgid "Please sign in."
@@ -430,7 +430,7 @@ msgstr "Prijavite se."

 #: documents/templates/registration/login.html:64
 msgid "Your username and password didn't match. Please try again."
-msgstr ""
+msgstr "Vaše korisničko ime i lozinka ne odgovaraju. Molimo pokušajte ponovo."

 #: documents/templates/registration/login.html:67
 msgid "Username"
@@ -450,7 +450,7 @@ msgstr "Engleski (US)"

 #: paperless/settings.py:340
 msgid "Belarusian"
-msgstr ""
+msgstr "Beloruski"

 #: paperless/settings.py:341
 msgid "Czech"
@@ -510,11 +510,11 @@ msgstr "Ruski"

 #: paperless/settings.py:355
 msgid "Slovenian"
-msgstr ""
+msgstr "Slovenački"

 #: paperless/settings.py:356
 msgid "Serbian"
-msgstr ""
+msgstr "Srpski"

 #: paperless/settings.py:357
 msgid "Swedish"
@@ -522,11 +522,11 @@ msgstr "Švedski"

 #: paperless/settings.py:358
 msgid "Turkish"
-msgstr ""
+msgstr "Turski"

 #: paperless/settings.py:359
 msgid "Chinese Simplified"
-msgstr ""
+msgstr "Kineski pojednostavljen"

 #: paperless/urls.py:161
 msgid "Paperless-ngx administration"
@@ -534,7 +534,7 @@ msgstr "Paperless-ngx administracija"

 #: paperless_mail/admin.py:29
 msgid "Authentication"
-msgstr ""
+msgstr "Autentifikacija"

 #: paperless_mail/admin.py:30
 msgid "Advanced settings"
@@ -546,7 +546,7 @@ msgstr "Filter"

 #: paperless_mail/admin.py:50
 msgid "Paperless will only process mails that match ALL of the filters given below."
-msgstr ""
+msgstr "Paperless-ngx će obrađivati samo e-poštu koja odgovara SVIM filterima navedenim u nastavku."

 #: paperless_mail/admin.py:64
 msgid "Actions"
@@ -554,7 +554,7 @@ msgstr "Radnje"

 #: paperless_mail/admin.py:67
 msgid "The action applied to the mail. This action is only performed when documents were consumed from the mail. Mails without attachments will remain entirely untouched."
-msgstr ""
+msgstr "Akcija se odnosi na e-poštu. Ova se radnja izvodi samo ako su dokumenti konzumirani iz e-pošte. E-pošta bez priloga ostat će u potpunosti netaknuta."

 #: paperless_mail/admin.py:75
 msgid "Metadata"
@@ -562,7 +562,7 @@ msgstr "Metapodaci"

 #: paperless_mail/admin.py:78
 msgid "Assign metadata to documents consumed from this rule automatically. If you do not assign tags, types or correspondents here, paperless will still process all matching rules that you have defined."
-msgstr ""
+msgstr "Automatski dodelite metapodatke dokumentima koji se koriste iz ovog pravila. Ako ne dodelite oznaku, vrstu ili korespodenta, Paperless-ngx će i dalje obraditi sva pravila podudaranja koja ste definisali."

 #: paperless_mail/apps.py:8
 msgid "Paperless mail"
@@ -578,7 +578,7 @@ msgstr "mejl nalozi"

 #: paperless_mail/models.py:12
 msgid "No encryption"
-msgstr ""
+msgstr "Nema enkripcije"

 #: paperless_mail/models.py:13
 msgid "Use SSL"
@@ -598,7 +598,7 @@ msgstr "IMAP port"

 #: paperless_mail/models.py:25
 msgid "This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections."
-msgstr ""
+msgstr "Uobičajno 143 za nešifrovane i STARTTLS veze, a 993 za SSL veze."

 #: paperless_mail/models.py:31
 msgid "IMAP security"
@@ -618,23 +618,23 @@ msgstr "karakter set"

 #: paperless_mail/models.py:45
 msgid "The character set to use when communicating with the mail server, such as 'UTF-8' or 'US-ASCII'."
-msgstr ""
+msgstr "Skup znakova koji se koristi pri komunikaciji sa mejl serverom, poput 'UTF-8' ili 'US-ASCII'."

 #: paperless_mail/models.py:56
 msgid "mail rule"
-msgstr ""
+msgstr "pravilo e-pošte"

 #: paperless_mail/models.py:57
 msgid "mail rules"
-msgstr ""
+msgstr "pravila e-pošte"

 #: paperless_mail/models.py:60
 msgid "Only process attachments."
-msgstr ""
+msgstr "Obradi samo priloge."

 #: paperless_mail/models.py:61
 msgid "Process all files, including 'inline' attachments."
-msgstr ""
+msgstr "Obradite sve datoteke, uključujući \"umetnute\" priloge."

 #: paperless_mail/models.py:64
 msgid "Delete"
@@ -642,31 +642,31 @@ msgstr "Obriši"

 #: paperless_mail/models.py:65
 msgid "Move to specified folder"
-msgstr ""
+msgstr "Premesti u određen folder"

 #: paperless_mail/models.py:66
 msgid "Mark as read, don't process read mails"
-msgstr ""
+msgstr "Označi kao pročitano. Ne obrađuj pročitanu e-poštu"

 #: paperless_mail/models.py:67
 msgid "Flag the mail, don't process flagged mails"
-msgstr ""
+msgstr "Označi poštu zastavicom. Ne obrađuj e-poštu sa zastavicom"

 #: paperless_mail/models.py:68
 msgid "Tag the mail with specified tag, don't process tagged mails"
-msgstr ""
+msgstr "Označite poštu specifičnom oznakom. Ne obrađuj e-poštu s specifičnom oznakom"

 #: paperless_mail/models.py:71
 msgid "Use subject as title"
-msgstr ""
+msgstr "Koristi predmet kao naziv"

 #: paperless_mail/models.py:72
 msgid "Use attachment filename as title"
-msgstr ""
+msgstr "Koristi naziv datoteke priloga kao naziv"

 #: paperless_mail/models.py:75
 msgid "Do not assign a correspondent"
-msgstr "Ne dodeljuj dopisnika"
+msgstr "Ne dodeljuj korespodenta"

 #: paperless_mail/models.py:76
 msgid "Use mail address"
@@ -678,7 +678,7 @@ msgstr "Koristi naziv (ili mejl adresu ako nije dostupno)"

 #: paperless_mail/models.py:78
 msgid "Use correspondent selected below"
-msgstr "Koristi dopisnika ispod"
+msgstr "Koristi koreespodenta ispod"

 #: paperless_mail/models.py:82
 msgid "order"
@@ -694,7 +694,7 @@ msgstr "folder"

 #: paperless_mail/models.py:96
 msgid "Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server."
-msgstr ""
+msgstr "Podfolderi moraju biti odvojeni separatorom, često tačkom ('.') ili kosom crtom ('/'), ali to se razlikuje zavisno od servera e-pošte."

 #: paperless_mail/models.py:102
 msgid "filter from"
@@ -714,15 +714,15 @@ msgstr "filter naziv fajla priloga"

 #: paperless_mail/models.py:126
 msgid "Only consume documents which entirely match this filename if specified. Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
-msgstr ""
+msgstr "Konzumirajte samo dokumente koji u potpunosti odgovaraju ovom nazivu datoteke ako je navedeno. Dopušteni su zamenski znakovi kao što su *.pdf ili *faktura*. Neosetljivo je na mala i mala slova."

 #: paperless_mail/models.py:133
 msgid "maximum age"
-msgstr ""
+msgstr "maksimalna starost"

 #: paperless_mail/models.py:135
 msgid "Specified in days."
-msgstr ""
+msgstr "Navedeno u danima."

 #: paperless_mail/models.py:139
 msgid "attachment type"
@@ -730,7 +730,7 @@ msgstr "tip priloga"

 #: paperless_mail/models.py:143
 msgid "Inline attachments include embedded images, so it's best to combine this option with a filename filter."
-msgstr ""
+msgstr "Ugrađeni prilozi uključuju ugrađene slike, pa je najbolje kombinovati ovu opciju s filterom naziva datoteke."

 #: paperless_mail/models.py:149
 msgid "action"
@@ -738,11 +738,11 @@ msgstr "radnja"

 #: paperless_mail/models.py:155
 msgid "action parameter"
-msgstr ""
+msgstr "parametar akcije"

 #: paperless_mail/models.py:160
 msgid "Additional parameter for the action selected above, i.e., the target folder of the move to folder action. Subfolders must be separated by dots."
-msgstr ""
+msgstr "Dodatni parametar za gore odabranu akciju, tj. ciljani folder za premeštanje u folder akcije. Podfolderi moraju biti odvojeni tačkama."

 #: paperless_mail/models.py:168
 msgid "assign title from"
@@ -758,9 +758,9 @@ msgstr "dodeli ovaj tip dokumenta"

 #: paperless_mail/models.py:188
 msgid "assign correspondent from"
-msgstr "dodeli dopisnika iz"
+msgstr "dodeli korespodenta iz"

 #: paperless_mail/models.py:198
 msgid "assign this correspondent"
-msgstr "dodeli ovog dopisnika"
+msgstr "dodeli ovog korspodenta"

--- a/src/locale/tr_TR/LC_MESSAGES/django.po
+++ b/src/locale/tr_TR/LC_MESSAGES/django.po
@@ -3,7 +3,7 @@ msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-07-08 14:11-0700\n"
-"PO-Revision-Date: 2022-07-08 22:07\n"
+"PO-Revision-Date: 2022-08-01 19:02\n"
 "Last-Translator: \n"
 "Language-Team: Turkish\n"
 "Language: tr_TR\n"
@@ -80,7 +80,7 @@ msgstr "gelen kutu etiketidir"

 #: documents/models.py:72
 msgid "Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags."
-msgstr "Bu etiketi, gelen kutusu etiketi olarak işaretle: Tüm yeni olarak tüketilen dökümanlar gelen kutusu etiketi ile etiketlendirileceklerdir."
+msgstr "Bu etiketi, gelen kutusu etiketi olarak işaretle: Yeni aktarılan tüm dokümanlar gelen kutusu etiketi ile etiketlendirileceklerdir."

 #: documents/models.py:78
 msgid "tag"
@@ -376,7 +376,7 @@ msgstr "filtreleme kuralları"

 #: documents/models.py:521
 msgid "started"
-msgstr ""
+msgstr "başladı"

 #: documents/serialisers.py:70
 #, python-format
@@ -394,7 +394,7 @@ msgstr "Dosya türü %(type)s desteklenmiyor"

 #: documents/serialisers.py:596
 msgid "Invalid variable detected."
-msgstr ""
+msgstr "Geçersiz değişken algılandı."

 #: documents/templates/index.html:78
 msgid "Paperless-ngx is loading..."
@@ -402,7 +402,7 @@ msgstr "Paperless-ngx yükleniyor..."

 #: documents/templates/index.html:79
 msgid "Still here?! Hmm, something might be wrong."
-msgstr ""
+msgstr "Hâlâ burada mısınız? Hmm, bir şeyler yanlış olabilir."

 #: documents/templates/index.html:79
 msgid "Here's a link to the docs."
@@ -450,7 +450,7 @@ msgstr "İngilizce (Birleşik Devletler)"

 #: paperless/settings.py:340
 msgid "Belarusian"
-msgstr ""
+msgstr "Belarusça"

 #: paperless/settings.py:341
 msgid "Czech"
@@ -510,11 +510,11 @@ msgstr "Rusça"

 #: paperless/settings.py:355
 msgid "Slovenian"
-msgstr ""
+msgstr "Slovakça"

 #: paperless/settings.py:356
 msgid "Serbian"
-msgstr ""
+msgstr "Sırpça"

 #: paperless/settings.py:357
 msgid "Swedish"
@@ -522,11 +522,11 @@ msgstr "İsveççe"

 #: paperless/settings.py:358
 msgid "Turkish"
-msgstr ""
+msgstr "Türkçe"

 #: paperless/settings.py:359
 msgid "Chinese Simplified"
-msgstr ""
+msgstr "Basitleştirilmiş Çince"

 #: paperless/urls.py:161
 msgid "Paperless-ngx administration"
--- a/src/locale/zh_CN/LC_MESSAGES/django.po
+++ b/src/locale/zh_CN/LC_MESSAGES/django.po
@@ -3,7 +3,7 @@ msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-07-08 14:11-0700\n"
-"PO-Revision-Date: 2022-07-08 22:07\n"
+"PO-Revision-Date: 2022-07-15 04:02\n"
 "Last-Translator: \n"
 "Language-Team: Chinese Simplified\n"
 "Language: zh_CN\n"
@@ -376,7 +376,7 @@ msgstr "过滤规则"

 #: documents/models.py:521
 msgid "started"
-msgstr ""
+msgstr "已开始"

 #: documents/serialisers.py:70
 #, python-format
@@ -654,7 +654,7 @@ msgstr "标记邮件，不处理已标记的邮件"

 #: paperless_mail/models.py:68
 msgid "Tag the mail with specified tag, don't process tagged mails"
-msgstr ""
+msgstr "用指定标签标记邮件，不要处理已标记的邮件"

 #: paperless_mail/models.py:71
 msgid "Use subject as title"
--- a/src/paperless/init.py
+++ b/src/paperless/init.py
@@ -1,4 +1,11 @@
+from .celery import app as celery_app
 from .checks import binaries_check
 from .checks import paths_check
+from .checks import settings_values_check

-__all__ = ["binaries_check", "paths_check"]
+__all__ = [
+    "celery_app",
+    "binaries_check",
+    "paths_check",
+    "settings_values_check",
+]
--- a/src/paperless/celery.py
+++ b/src/paperless/celery.py
@@ -0,0 +1,17 @@
+import os
+
+from celery import Celery
+
+# Set the default Django settings module for the 'celery' program.
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
+
+app = Celery("paperless")
+
+# Using a string here means the worker doesn't have to serialize
+# the configuration object to child processes.
+# - namespace='CELERY' means all celery-related configuration keys
+#   should have a `CELERY_` prefix.
+app.config_from_object("django.conf:settings", namespace="CELERY")
+
+# Load task modules from all registered Django apps.
+app.autodiscover_tasks()
--- a/src/paperless/checks.py
+++ b/src/paperless/checks.py
@@ -1,4 +1,6 @@
+import grp
 import os
+import pwd
 import shutil
 import stat

@@ -32,12 +34,15 @@ def path_check(var, directory):
                with open(test_file, "w"):
                    pass
            except PermissionError:
+                dir_stat = os.stat(directory)
+                dir_mode = stat.filemode(dir_stat.st_mode)
+                dir_owner = pwd.getpwuid(dir_stat.st_uid).pw_name
+                dir_group = grp.getgrgid(dir_stat.st_gid).gr_name
                messages.append(
                    Error(
                        writeable_message.format(var),
                        writeable_hint.format(
-                            f"\n{stat.filemode(os.stat(directory).st_mode)} "
-                            f"{directory}\n",
+                            f"\n{dir_mode} {dir_owner} {dir_group} " f"{directory}\n",
                        ),
                    ),
                )
@@ -96,3 +101,52 @@ def debug_mode_check(app_configs, **kwargs):
        ]
    else:
        return []
+
+
+@register()
+def settings_values_check(app_configs, **kwargs):
+    """
+    Validates at least some of the user provided settings
+    """
+
+    def _ocrmypdf_settings_check():
+        """
+        Validates some of the arguments which will be provided to ocrmypdf
+        against the valid options.  Use "ocrmypdf --help" to see the valid
+        inputs
+        """
+        msgs = []
+        if settings.OCR_OUTPUT_TYPE not in {
+            "pdfa",
+            "pdf",
+            "pdfa-1",
+            "pdfa-2",
+            "pdfa-3",
+        }:
+            msgs.append(
+                Error(f'OCR output type "{settings.OCR_OUTPUT_TYPE}" is not valid'),
+            )
+
+        if settings.OCR_MODE not in {"force", "skip", "redo", "skip_noarchive"}:
+            msgs.append(Error(f'OCR output mode "{settings.OCR_MODE}" is not valid'))
+
+        if settings.OCR_CLEAN not in {"clean", "clean-final", "none"}:
+            msgs.append(Error(f'OCR clean mode "{settings.OCR_CLEAN}" is not valid'))
+        return msgs
+
+    def _timezone_validate():
+        """
+        Validates the user provided timezone is a valid timezone
+        """
+        try:
+            import zoneinfo
+        except ImportError:  # pragma: nocover
+            import backports.zoneinfo as zoneinfo
+        msgs = []
+        if settings.TIME_ZONE not in zoneinfo.available_timezones():
+            msgs.append(
+                Error(f'Timezone "{settings.TIME_ZONE}" is not a valid timezone'),
+            )
+        return msgs
+
+    return _ocrmypdf_settings_check() + _timezone_validate()
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -4,11 +4,13 @@ import math
 import multiprocessing
 import os
 import re
+import tempfile
 from typing import Final
 from typing import Optional
 from typing import Set
 from urllib.parse import urlparse

+from celery.schedules import crontab
 from concurrent_log_handler.queue import setup_logging_queues
 from django.utils.translation import gettext_lazy as _
 from dotenv import load_dotenv
@@ -56,6 +58,13 @@ def __get_float(key: str, default: float) -> float:
    return float(os.getenv(key, default))


+def __get_path(key: str, default: str) -> str:
+    """
+    Return a normalized, absolute path based on the environment variable or a default
+    """
+    return os.path.abspath(os.path.normpath(os.environ.get(key, default)))
+
+
 # NEVER RUN WITH DEBUG IN PRODUCTION.
 DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")

@@ -66,14 +75,16 @@ DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")

 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

-STATIC_ROOT = os.getenv("PAPERLESS_STATICDIR", os.path.join(BASE_DIR, "..", "static"))
+STATIC_ROOT = __get_path("PAPERLESS_STATICDIR", os.path.join(BASE_DIR, "..", "static"))

-MEDIA_ROOT = os.getenv("PAPERLESS_MEDIA_ROOT", os.path.join(BASE_DIR, "..", "media"))
+MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", os.path.join(BASE_DIR, "..", "media"))
 ORIGINALS_DIR = os.path.join(MEDIA_ROOT, "documents", "originals")
 ARCHIVE_DIR = os.path.join(MEDIA_ROOT, "documents", "archive")
 THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")

-DATA_DIR = os.getenv("PAPERLESS_DATA_DIR", os.path.join(BASE_DIR, "..", "data"))
+DATA_DIR = __get_path("PAPERLESS_DATA_DIR", os.path.join(BASE_DIR, "..", "data"))
+
+NLTK_DIR = os.path.join(DATA_DIR, "nltk")

 TRASH_DIR = os.getenv("PAPERLESS_TRASH_DIR")

@@ -83,15 +94,18 @@ MEDIA_LOCK = os.path.join(MEDIA_ROOT, "media.lock")
 INDEX_DIR = os.path.join(DATA_DIR, "index")
 MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")

-LOGGING_DIR = os.getenv("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log"))
+LOGGING_DIR = __get_path("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log"))

-CONSUMPTION_DIR = os.getenv(
+CONSUMPTION_DIR = __get_path(
    "PAPERLESS_CONSUMPTION_DIR",
    os.path.join(BASE_DIR, "..", "consume"),
 )

 # This will be created if it doesn't exist
-SCRATCH_DIR = os.getenv("PAPERLESS_SCRATCH_DIR", "/tmp/paperless")
+SCRATCH_DIR = __get_path(
+    "PAPERLESS_SCRATCH_DIR",
+    os.path.join(tempfile.gettempdir(), "paperless"),
+)

 ###############################################################################
 # Application Definition                                                      #
@@ -117,7 +131,7 @@ INSTALLED_APPS = [
    "rest_framework",
    "rest_framework.authtoken",
    "django_filters",
-    "django_q",
+    "django_celery_results",
 ] + env_apps

 if DEBUG:
@@ -168,6 +182,8 @@ ASGI_APPLICATION = "paperless.asgi.application"
 STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", BASE_URL + "static/")
 WHITENOISE_STATIC_PREFIX = "/static/"

+_REDIS_URL = os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")
+
 # TODO: what is this used for?
 TEMPLATES = [
    {
@@ -189,7 +205,7 @@ CHANNEL_LAYERS = {
    "default": {
        "BACKEND": "channels_redis.core.RedisChannelLayer",
        "CONFIG": {
-            "hosts": [os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")],
+            "hosts": [_REDIS_URL],
            "capacity": 2000,  # default 100
            "expiry": 15,  # default 60
        },
@@ -274,7 +290,7 @@ SECRET_KEY = os.getenv(

 AUTH_PASSWORD_VALIDATORS = [
    {
-        "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
+        "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",  # noqa: E501
    },
    {
        "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
@@ -308,6 +324,7 @@ DATABASES = {
    "default": {
        "ENGINE": "django.db.backends.sqlite3",
        "NAME": os.path.join(DATA_DIR, "db.sqlite3"),
+        "OPTIONS": {},
    },
 }

@@ -317,16 +334,31 @@ if os.getenv("PAPERLESS_DBHOST"):
    DATABASES["sqlite"] = DATABASES["default"].copy()

    DATABASES["default"] = {
-        "ENGINE": "django.db.backends.postgresql_psycopg2",
        "HOST": os.getenv("PAPERLESS_DBHOST"),
        "NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
        "USER": os.getenv("PAPERLESS_DBUSER", "paperless"),
        "PASSWORD": os.getenv("PAPERLESS_DBPASS", "paperless"),
-        "OPTIONS": {"sslmode": os.getenv("PAPERLESS_DBSSLMODE", "prefer")},
+        "OPTIONS": {},
    }
    if os.getenv("PAPERLESS_DBPORT"):
        DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")

+    # Leave room for future extensibility
+    if os.getenv("PAPERLESS_DBENGINE") == "mariadb":
+        engine = "django.db.backends.mysql"
+        options = {"read_default_file": "/etc/mysql/my.cnf", "charset": "utf8mb4"}
+    else:  # Default to PostgresDB
+        engine = "django.db.backends.postgresql_psycopg2"
+        options = {"sslmode": os.getenv("PAPERLESS_DBSSLMODE", "prefer")}
+
+    DATABASES["default"]["ENGINE"] = engine
+    DATABASES["default"]["OPTIONS"].update(options)
+
+if os.getenv("PAPERLESS_DB_TIMEOUT") is not None:
+    DATABASES["default"]["OPTIONS"].update(
+        {"timeout": float(os.getenv("PAPERLESS_DB_TIMEOUT"))},
+    )
+
 DEFAULT_AUTO_FIELD = "django.db.models.AutoField"

 ###############################################################################
@@ -425,47 +457,57 @@ LOGGING = {
 # Task queue                                                                  #
 ###############################################################################

+TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", 1)

-# Sensible defaults for multitasking:
-# use a fair balance between worker processes and threads epr worker so that
-# both consuming many documents in parallel and consuming large documents is
-# reasonably fast.
-# Favors threads per worker on smaller systems and never exceeds cpu_count()
-# in total.
+WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)

+CELERY_BROKER_URL = _REDIS_URL
+CELERY_TIMEZONE = TIME_ZONE

-def default_task_workers() -> int:
-    # always leave one core open
-    available_cores = max(multiprocessing.cpu_count(), 1)
-    try:
-        if available_cores < 4:
-            return available_cores
-        return max(math.floor(math.sqrt(available_cores)), 1)
-    except NotImplementedError:
-        return 1
+CELERY_WORKER_HIJACK_ROOT_LOGGER = False
+CELERY_WORKER_CONCURRENCY = TASK_WORKERS
+CELERY_WORKER_MAX_TASKS_PER_CHILD = 1
+CELERY_WORKER_SEND_TASK_EVENTS = True

+CELERY_SEND_TASK_SENT_EVENT = True

-TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", default_task_workers())
+CELERY_TASK_TRACK_STARTED = True
+CELERY_TASK_TIME_LIMIT = WORKER_TIMEOUT

-PAPERLESS_WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
+CELERY_RESULT_EXTENDED = True
+CELERY_RESULT_BACKEND = "django-db"
+CELERY_CACHE_BACKEND = "default"

-# Per django-q docs, timeout must be smaller than retry
-# We default retry to 10s more than the timeout
-PAPERLESS_WORKER_RETRY: Final[int] = __get_int(
-    "PAPERLESS_WORKER_RETRY",
-    PAPERLESS_WORKER_TIMEOUT + 10,
-)
+CELERY_BEAT_SCHEDULE = {
+    # Every ten minutes
+    "Check all e-mail accounts": {
+        "task": "paperless_mail.tasks.process_mail_accounts",
+        "schedule": crontab(minute="*/10"),
+    },
+    # Hourly at 5 minutes past the hour
+    "Train the classifier": {
+        "task": "documents.tasks.train_classifier",
+        "schedule": crontab(minute="5", hour="*/1"),
+    },
+    # Daily at midnight
+    "Optimize the index": {
+        "task": "documents.tasks.index_optimize",
+        "schedule": crontab(minute=0, hour=0),
+    },
+    # Weekly, Sunday at 00:30
+    "Perform sanity check": {
+        "task": "documents.tasks.sanity_check",
+        "schedule": crontab(minute=30, hour=0, day_of_week="sun"),
+    },
+}
+CELERY_BEAT_SCHEDULE_FILENAME = os.path.join(DATA_DIR, "celerybeat-schedule.db")

-Q_CLUSTER = {
-    "name": "paperless",
-    "guard_cycle": 5,
-    "catch_up": False,
-    "recycle": 1,
-    "retry": PAPERLESS_WORKER_RETRY,
-    "timeout": PAPERLESS_WORKER_TIMEOUT,
-    "workers": TASK_WORKERS,
-    "redis": os.getenv("PAPERLESS_REDIS", "redis://localhost:6379"),
-    "log_level": "DEBUG" if DEBUG else "INFO",
+# django setting.
+CACHES = {
+    "default": {
+        "BACKEND": "django.core.cache.backends.redis.RedisCache",
+        "LOCATION": _REDIS_URL,
+    },
 }


@@ -509,7 +551,7 @@ CONSUMER_IGNORE_PATTERNS = list(
    json.loads(
        os.getenv(
            "PAPERLESS_CONSUMER_IGNORE_PATTERNS",
-            '[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]',
+            '[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]',  # noqa: E501
        ),
    ),
 )
@@ -533,11 +575,9 @@ OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
 OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")

 # OCRmyPDF --output-type options are available.
-# TODO: validate this setting.
 OCR_OUTPUT_TYPE = os.getenv("PAPERLESS_OCR_OUTPUT_TYPE", "pdfa")

 # skip. redo, force
-# TODO: validate this.
 OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")

 OCR_IMAGE_DPI = os.getenv("PAPERLESS_OCR_IMAGE_DPI")
@@ -590,6 +630,11 @@ POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")
 DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
 FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")

+# Maximum number of dates taken from document start to end to show as suggestions for
+# `created` date in the frontend. Duplicates are removed, which can result in
+# fewer dates shown.
+NUMBER_OF_SUGGESTED_DATES = __get_int("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
+
 # Transformations applied before filename parsing
 FILENAME_PARSE_TRANSFORMS = []
 for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
@@ -598,7 +643,8 @@ for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
 # Specify the filename format for out files
 FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")

-# If this is enabled, variables in filename format will resolve to empty-string instead of 'none'.
+# If this is enabled, variables in filename format will resolve to
+# empty-string instead of 'none'.
 # Directories with 'empty names' are omitted, too.
 FILENAME_FORMAT_REMOVE_NONE = __get_boolean(
    "PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
@@ -610,16 +656,15 @@ THUMBNAIL_FONT_NAME = os.getenv(
    "/usr/share/fonts/liberation/LiberationSerif-Regular.ttf",
 )

-# TODO: this should not have a prefix.
 # Tika settings
-PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
-PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
-PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv(
+TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
+TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
+TIKA_GOTENBERG_ENDPOINT = os.getenv(
    "PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
    "http://localhost:3000",
 )

-if PAPERLESS_TIKA_ENABLED:
+if TIKA_ENABLED:
    INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")


@@ -632,8 +677,9 @@ def _parse_ignore_dates(
    user provided string(s) into dates

    Args:
-        env_ignore (str): The value of the environment variable, comma seperated dates
-        date_order (str, optional): The format of the date strings. Defaults to DATE_ORDER.
+        env_ignore (str): The value of the environment variable, comma separated dates
+        date_order (str, optional): The format of the date strings.
+                                    Defaults to DATE_ORDER.

    Returns:
        Set[datetime.datetime]: The set of parsed date objects
@@ -662,3 +708,40 @@ if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
 ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
 if ENABLE_UPDATE_CHECK != "default":
    ENABLE_UPDATE_CHECK = __get_boolean("PAPERLESS_ENABLE_UPDATE_CHECK")
+
+###############################################################################
+# Machine Learning                                                            #
+###############################################################################
+
+
+def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]:
+    """
+    Maps an ISO-639-1 language code supported by Tesseract into
+    an optional NLTK language name.  This is the set of common supported
+    languages for all the NLTK data used.
+
+    Assumption: The primary language is first
+    """
+    ocr_lang = ocr_lang.split("+")[0]
+    iso_code_to_nltk = {
+        "dan": "danish",
+        "nld": "dutch",
+        "eng": "english",
+        "fin": "finnish",
+        "fra": "french",
+        "deu": "german",
+        "ita": "italian",
+        "nor": "norwegian",
+        "por": "portuguese",
+        "rus": "russian",
+        "spa": "spanish",
+        "swe": "swedish",
+        "tur": "turkish",
+    }
+
+    return iso_code_to_nltk.get(ocr_lang, None)
+
+
+NLTK_ENABLED: Final[bool] = __get_boolean("PAPERLESS_ENABLE_NLTK", "yes")
+
+NLTK_LANGUAGE: Optional[str] = _get_nltk_language_setting(OCR_LANGUAGE)
--- a/src/paperless/tests/test_checks.py
+++ b/src/paperless/tests/test_checks.py
@@ -1,12 +1,12 @@
 import os
-import shutil

 from django.test import override_settings
 from django.test import TestCase
 from documents.tests.utils import DirectoriesMixin
-from paperless import binaries_check
-from paperless import paths_check
+from paperless.checks import binaries_check
 from paperless.checks import debug_mode_check
+from paperless.checks import paths_check
+from paperless.checks import settings_values_check


 class TestChecks(DirectoriesMixin, TestCase):
@@ -54,3 +54,89 @@ class TestChecks(DirectoriesMixin, TestCase):
    @override_settings(DEBUG=True)
    def test_debug_enabled(self):
        self.assertEqual(len(debug_mode_check(None)), 1)
+
+
+class TestSettingsChecks(DirectoriesMixin, TestCase):
+    def test_all_valid(self):
+        """
+        GIVEN:
+            - Default settings
+        WHEN:
+            - Settings are validated
+        THEN:
+            - No system check errors reported
+        """
+        msgs = settings_values_check(None)
+        self.assertEqual(len(msgs), 0)
+
+    @override_settings(OCR_OUTPUT_TYPE="notapdf")
+    def test_invalid_output_type(self):
+        """
+        GIVEN:
+            - Default settings
+            - OCR output type is invalid
+        WHEN:
+            - Settings are validated
+        THEN:
+            - system check error reported for OCR output type
+        """
+        msgs = settings_values_check(None)
+        self.assertEqual(len(msgs), 1)
+
+        msg = msgs[0]
+
+        self.assertIn('OCR output type "notapdf"', msg.msg)
+
+    @override_settings(OCR_MODE="makeitso")
+    def test_invalid_ocr_type(self):
+        """
+        GIVEN:
+            - Default settings
+            - OCR type is invalid
+        WHEN:
+            - Settings are validated
+        THEN:
+            - system check error reported for OCR type
+        """
+        msgs = settings_values_check(None)
+        self.assertEqual(len(msgs), 1)
+
+        msg = msgs[0]
+
+        self.assertIn('OCR output mode "makeitso"', msg.msg)
+
+    @override_settings(OCR_CLEAN="cleanme")
+    def test_invalid_ocr_clean(self):
+        """
+        GIVEN:
+            - Default settings
+            - OCR cleaning type is invalid
+        WHEN:
+            - Settings are validated
+        THEN:
+            - system check error reported for OCR cleaning type
+        """
+        msgs = settings_values_check(None)
+        self.assertEqual(len(msgs), 1)
+
+        msg = msgs[0]
+
+        self.assertIn('OCR clean mode "cleanme"', msg.msg)
+
+    @override_settings(TIME_ZONE="TheMoon\\MyCrater")
+    def test_invalid_timezone(self):
+        """
+        GIVEN:
+            - Default settings
+            - Timezone is invalid
+        WHEN:
+            - Settings are validated
+        THEN:
+            - system check error reported for timezone
+        """
+        msgs = settings_values_check(None)
+        self.assertEqual(len(msgs), 1)
+
+        msg = msgs[0]
+
+        self.assertIn('Timezone "TheMoon\\MyCrater"', msg.msg)
--- a/src/paperless/tests/test_settings.py
+++ b/src/paperless/tests/test_settings.py
@@ -1,7 +1,9 @@
 import datetime
+from unittest import mock
 from unittest import TestCase

 from paperless.settings import _parse_ignore_dates
+from paperless.settings import default_threads_per_worker


 class TestIgnoreDateParsing(TestCase):
@@ -56,3 +58,27 @@ class TestIgnoreDateParsing(TestCase):
        ]

        self._parse_checker(test_cases)
+
+    def test_workers_threads(self):
+        """
+        GIVEN:
+            - Certain CPU counts
+        WHEN:
+            - Threads per worker is calculated
+        THEN:
+            - Threads per worker less than or equal to CPU count
+            - At least 1 thread per worker
+        """
+        default_workers = 1
+
+        for i in range(1, 64):
+            with mock.patch(
+                "paperless.settings.multiprocessing.cpu_count",
+            ) as cpu_count:
+                cpu_count.return_value = i
+
+                default_threads = default_threads_per_worker(default_workers)
+
+                self.assertGreaterEqual(default_threads, 1)
+
+                self.assertLessEqual(default_workers * default_threads, i)
--- a/src/paperless/version.py
+++ b/src/paperless/version.py
@@ -1,7 +1,7 @@
 from typing import Final
 from typing import Tuple

-__version__: Final[Tuple[int, int, int]] = (1, 7, 1)
+__version__: Final[Tuple[int, int, int]] = (1, 9, 2)
 # Version string like X.Y.Z
 __full_version_str__: Final[str] = ".".join(map(str, __version__))
 # Version string like X.Y
--- a/src/paperless_mail/mail.py
+++ b/src/paperless_mail/mail.py
@@ -1,24 +1,26 @@
 import os
+import re
 import tempfile
 from datetime import date
 from datetime import timedelta
 from fnmatch import fnmatch
-from imaplib import IMAP4
+from typing import Dict

 import magic
 import pathvalidate
 from django.conf import settings
 from django.db import DatabaseError
-from django_q.tasks import async_task
 from documents.loggers import LoggingMixin
 from documents.models import Correspondent
 from documents.parsers import is_mime_type_supported
+from documents.tasks import consume_file
 from imap_tools import AND
 from imap_tools import MailBox
 from imap_tools import MailboxFolderSelectError
 from imap_tools import MailBoxUnencrypted
 from imap_tools import MailMessage
 from imap_tools import MailMessageFlags
+from imap_tools import NOT
 from imap_tools.mailbox import MailBoxTls
 from paperless_mail.models import MailAccount
 from paperless_mail.models import MailRule
@@ -29,7 +31,7 @@ class MailError(Exception):


 class BaseMailAction:
-    def get_criteria(self):
+    def get_criteria(self) -> Dict:
        return {}

    def post_consume(self, M, message_uids, parameter):
@@ -67,13 +69,17 @@ class TagMailAction(BaseMailAction):
        self.keyword = parameter

    def get_criteria(self):
-        return {"no_keyword": self.keyword}
+        return {"no_keyword": self.keyword, "gmail_label": self.keyword}

    def post_consume(self, M: MailBox, message_uids, parameter):
-        M.flag(message_uids, [self.keyword], True)
+        if re.search(r"gmail\.com$|googlemail\.com$", M._host):
+            for uid in message_uids:
+                M.client.uid("STORE", uid, "X-GM-LABELS", self.keyword)
+        else:
+            M.flag(message_uids, [self.keyword], True)


-def get_rule_action(rule):
+def get_rule_action(rule) -> BaseMailAction:
    if rule.action == MailRule.MailAction.FLAG:
        return FlagMailAction()
    elif rule.action == MailRule.MailAction.DELETE:
@@ -103,7 +109,7 @@ def make_criterias(rule):
    return {**criterias, **get_rule_action(rule).get_criteria()}


-def get_mailbox(server, port, security):
+def get_mailbox(server, port, security) -> MailBox:
    if security == MailAccount.ImapSecurity.NONE:
        mailbox = MailBoxUnencrypted(server, port)
    elif security == MailAccount.ImapSecurity.STARTTLS:
@@ -162,7 +168,7 @@ class MailAccountHandler(LoggingMixin):
                "Unknown correspondent selector",
            )  # pragma: nocover

-    def handle_mail_account(self, account):
+    def handle_mail_account(self, account: MailAccount):

        self.renew_logging_group()

@@ -176,33 +182,29 @@ class MailAccountHandler(LoggingMixin):
                account.imap_security,
            ) as M:

+                supports_gmail_labels = "X-GM-EXT-1" in M.client.capabilities
+                supports_auth_plain = "AUTH=PLAIN" in M.client.capabilities
+
+                self.log("debug", f"GMAIL Label Support: {supports_gmail_labels}")
+                self.log("debug", f"AUTH=PLAIN Support: {supports_auth_plain}")
+
                try:
+
                    M.login(account.username, account.password)

                except UnicodeEncodeError:
                    self.log("debug", "Falling back to AUTH=PLAIN")
-                    try:
-                        # rfc2595 section 6 - PLAIN SASL mechanism
-                        client: IMAP4 = M.client
-                        encoded = (
-                            b"\0"
-                            + account.username.encode("utf8")
-                            + b"\0"
-                            + account.password.encode("utf8")
-                        )
-                        # Assumption is the server supports AUTH=PLAIN capability
-                        # Could check the list with client.capability(), but then what?
-                        # We're failing anyway then
-                        client.authenticate("PLAIN", lambda x: encoded)

-                        # Need to transition out of AUTH state to SELECTED
-                        M.folder.set("INBOX")
-                    except Exception:
+                    try:
+                        M.login_utf8(account.username, account.password)
+                    except Exception as err:
                        self.log(
                            "error",
                            "Unable to authenticate with mail server using AUTH=PLAIN",
                        )
-                        raise MailError(f"Error while authenticating account {account}")
+                        raise MailError(
+                            f"Error while authenticating account {account}",
+                        ) from err
                except Exception as e:
                    self.log(
                        "error",
@@ -221,7 +223,11 @@ class MailAccountHandler(LoggingMixin):

                for rule in account.rules.order_by("order"):
                    try:
-                        total_processed_files += self.handle_mail_rule(M, rule)
+                        total_processed_files += self.handle_mail_rule(
+                            M,
+                            rule,
+                            supports_gmail_labels,
+                        )
                    except Exception as e:
                        self.log(
                            "error",
@@ -239,13 +245,18 @@ class MailAccountHandler(LoggingMixin):

        return total_processed_files

-    def handle_mail_rule(self, M: MailBox, rule: MailRule):
+    def handle_mail_rule(
+        self,
+        M: MailBox,
+        rule: MailRule,
+        supports_gmail_labels: bool = False,
+    ):

        self.log("debug", f"Rule {rule}: Selecting folder {rule.folder}")

        try:
            M.folder.set(rule.folder)
-        except MailboxFolderSelectError:
+        except MailboxFolderSelectError as err:

            self.log(
                "error",
@@ -264,23 +275,38 @@ class MailAccountHandler(LoggingMixin):
            raise MailError(
                f"Rule {rule}: Folder {rule.folder} "
                f"does not exist in account {rule.account}",
-            )
+            ) from err

        criterias = make_criterias(rule)

+        # Deal with the Gmail label extension
+        if "gmail_label" in criterias:
+
+            gmail_label = criterias["gmail_label"]
+            del criterias["gmail_label"]
+
+            if not supports_gmail_labels:
+                criterias_imap = AND(**criterias)
+            else:
+                criterias_imap = AND(NOT(gmail_label=gmail_label), **criterias)
+        else:
+            criterias_imap = AND(**criterias)
+
        self.log(
            "debug",
-            f"Rule {rule}: Searching folder with criteria " f"{str(AND(**criterias))}",
+            f"Rule {rule}: Searching folder with criteria " f"{str(criterias_imap)}",
        )

        try:
            messages = M.fetch(
-                criteria=AND(**criterias),
+                criteria=criterias_imap,
                mark_seen=False,
                charset=rule.account.character_set,
            )
-        except Exception:
-            raise MailError(f"Rule {rule}: Error while fetching folder {rule.folder}")
+        except Exception as err:
+            raise MailError(
+                f"Rule {rule}: Error while fetching folder {rule.folder}",
+            ) from err

        post_consume_messages = []

@@ -320,7 +346,7 @@ class MailAccountHandler(LoggingMixin):
        except Exception as e:
            raise MailError(
                f"Rule {rule}: Error while processing post-consume actions: " f"{e}",
-            )
+            ) from e

        return total_processed_files

@@ -382,8 +408,7 @@ class MailAccountHandler(LoggingMixin):
                f"{message.subject} from {message.from_}",
            )

-            async_task(
-                "documents.tasks.consume_file",
+            consume_file.delay(
                path=temp_filename,
                override_filename=pathvalidate.sanitize_filename(
                    message.subject + ".eml",
@@ -447,8 +472,7 @@ class MailAccountHandler(LoggingMixin):
                        f"{message.subject} from {message.from_}",
                    )

-                    async_task(
-                        "documents.tasks.consume_file",
+                    consume_file.delay(
                        path=temp_filename,
                        override_filename=pathvalidate.sanitize_filename(
                            att.filename,
--- a/src/paperless_mail/migrations/0002_auto_20201117_1334.py
+++ b/src/paperless_mail/migrations/0002_auto_20201117_1334.py
@@ -2,28 +2,12 @@

 from django.db import migrations
 from django.db.migrations import RunPython
-from django_q.models import Schedule
-from django_q.tasks import schedule
-
-
-def add_schedules(apps, schema_editor):
-    schedule(
-        "paperless_mail.tasks.process_mail_accounts",
-        name="Check all e-mail accounts",
-        schedule_type=Schedule.MINUTES,
-        minutes=10,
-    )
-
-
-def remove_schedules(apps, schema_editor):
-    Schedule.objects.filter(func="paperless_mail.tasks.process_mail_accounts").delete()


 class Migration(migrations.Migration):

    dependencies = [
        ("paperless_mail", "0001_initial"),
-        ("django_q", "0013_task_attempt_count"),
    ]

-    operations = [RunPython(add_schedules, remove_schedules)]
+    operations = [RunPython(migrations.RunPython.noop, migrations.RunPython.noop)]
--- a/src/paperless_mail/tasks.py
+++ b/src/paperless_mail/tasks.py
@@ -1,13 +1,14 @@
 import logging

+from celery import shared_task
 from paperless_mail.mail import MailAccountHandler
 from paperless_mail.mail import MailError
 from paperless_mail.models import MailAccount

-
 logger = logging.getLogger("paperless.mail.tasks")


+@shared_task
 def process_mail_accounts():
    total_new_documents = 0
    for account in MailAccount.objects.all():
@@ -20,11 +21,3 @@ def process_mail_accounts():
        return f"Added {total_new_documents} document(s)."
    else:
        return "No new documents were added."
-
-
-def process_mail_account(name):
-    try:
-        account = MailAccount.objects.get(name=name)
-        MailAccountHandler().handle_mail_account(account)
-    except MailAccount.DoesNotExist:
-        logger.error(f"Unknown mail acccount: {name}")
--- a/src/paperless_mail/tests/test_live_mail.py
+++ b/src/paperless_mail/tests/test_live_mail.py
@@ -0,0 +1,70 @@
+import os
+
+import pytest
+from django.test import TestCase
+from paperless_mail.mail import MailAccountHandler
+from paperless_mail.mail import MailError
+from paperless_mail.models import MailAccount
+from paperless_mail.models import MailRule
+
+# Only run if the environment is setup
+# And the environment is not empty (forks, I think)
+@pytest.mark.skipif(
+    "PAPERLESS_MAIL_TEST_HOST" not in os.environ
+    or not len(os.environ["PAPERLESS_MAIL_TEST_HOST"]),
+    reason="Live server testing not enabled",
+)
+class TestMailLiveServer(TestCase):
+    def setUp(self) -> None:
+
+        self.mail_account_handler = MailAccountHandler()
+        self.account = MailAccount.objects.create(
+            name="test",
+            imap_server=os.environ["PAPERLESS_MAIL_TEST_HOST"],
+            username=os.environ["PAPERLESS_MAIL_TEST_USER"],
+            password=os.environ["PAPERLESS_MAIL_TEST_PASSWD"],
+            imap_port=993,
+        )
+
+        return super().setUp()
+
+    def tearDown(self) -> None:
+        self.account.delete()
+        return super().tearDown()
+
+    def test_process_non_gmail_server_flag(self):
+
+        try:
+            rule1 = MailRule.objects.create(
+                name="testrule",
+                account=self.account,
+                action=MailRule.MailAction.FLAG,
+            )
+
+            self.mail_account_handler.handle_mail_account(self.account)
+
+            rule1.delete()
+
+        except MailError as e:
+            self.fail(f"Failure: {e}")
+        except Exception as e:
+            pass
+
+    def test_process_non_gmail_server_tag(self):
+
+        try:
+
+            rule2 = MailRule.objects.create(
+                name="testrule",
+                account=self.account,
+                action=MailRule.MailAction.TAG,
+            )
+
+            self.mail_account_handler.handle_mail_account(self.account)
+
+            rule2.delete()
+
+        except MailError as e:
+            self.fail(f"Failure: {e}")
+        except Exception as e:
+            pass
--- a/src/paperless_mail/tests/test_mail.py
+++ b/src/paperless_mail/tests/test_mail.py
@@ -20,6 +20,7 @@ from imap_tools import MailboxFolderSelectError
 from imap_tools import MailboxLoginError
 from imap_tools import MailMessage
 from imap_tools import MailMessageFlags
+from imap_tools import NOT
 from paperless_mail import tasks
 from paperless_mail.mail import MailAccountHandler
 from paperless_mail.mail import MailError
@@ -46,31 +47,66 @@ class BogusFolderManager:


 class BogusClient:
-    def authenticate(self, mechanism, authobject):
-        # authobject must be a callable object
-        auth_bytes = authobject(None)
-        if auth_bytes != b"\x00admin\x00w57\xc3\xa4\xc3\xb6\xc3\xbcw4b6huwb6nhu":
-            raise MailboxLoginError("BAD", "OK")
+    def __init__(self, messages):
+        self.messages: List[MailMessage] = messages
+        self.capabilities: List[str] = []

-
-class BogusMailBox(ContextManager):
    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        pass

+    def authenticate(self, mechanism, authobject):
+        # authobject must be a callable object
+        auth_bytes = authobject(None)
+        if auth_bytes != b"\x00admin\x00w57\xc3\xa4\xc3\xb6\xc3\xbcw4b6huwb6nhu":
+            raise MailboxLoginError("BAD", "OK")
+
+    def uid(self, command, *args):
+        if command == "STORE":
+            for message in self.messages:
+                if message.uid == args[0]:
+                    flag = args[2]
+                    if flag == "processed":
+                        message._raw_flag_data.append(f"+FLAGS (processed)".encode())
+                        MailMessage.flags.fget.cache_clear()
+
+
+class BogusMailBox(ContextManager):
+
+    # Common values so tests don't need to remember an accepted login
+    USERNAME: str = "admin"
+    ASCII_PASSWORD: str = "secret"
+    # Note the non-ascii characters here
+    UTF_PASSWORD: str = "w57äöüw4b6huwb6nhu"
+
    def __init__(self):
        self.messages: List[MailMessage] = []
        self.messages_spam: List[MailMessage] = []
        self.folder = BogusFolderManager()
-        self.client = BogusClient()
+        self.client = BogusClient(self.messages)
+        self._host = ""
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+
+    def updateClient(self):
+        self.client = BogusClient(self.messages)

    def login(self, username, password):
        # This will raise a UnicodeEncodeError if the password is not ASCII only
        password.encode("ascii")
        # Otherwise, check for correct values
-        if username != "admin" or password not in {"secret"}:
+        if username != self.USERNAME or password != self.ASCII_PASSWORD:
+            raise MailboxLoginError("BAD", "OK")
+
+    def login_utf8(self, username, password):
+        # Expected to only be called with the UTF-8 password
+        if username != self.USERNAME or password != self.UTF_PASSWORD:
            raise MailboxLoginError("BAD", "OK")

    def fetch(self, criteria, mark_seen, charset=""):
@@ -100,6 +136,9 @@ class BogusMailBox(ContextManager):
            tag = criteria[criteria.index("UNKEYWORD") + 1].strip("'")
            msg = filter(lambda m: "processed" not in m.flags, msg)

+        if "(X-GM-LABELS" in criteria:  # ['NOT', '(X-GM-LABELS', '"processed"']
+            msg = filter(lambda m: "processed" not in m.flags, msg)
+
        return list(msg)

    def delete(self, uid_list):
@@ -209,7 +248,7 @@ class TestMail(DirectoriesMixin, TestCase):
        m.return_value = self.bogus_mailbox
        self.addCleanup(patcher.stop)

-        patcher = mock.patch("paperless_mail.mail.async_task")
+        patcher = mock.patch("paperless_mail.mail.consume_file.delay")
        self.async_task = patcher.start()
        self.addCleanup(patcher.stop)

@@ -247,6 +286,7 @@ class TestMail(DirectoriesMixin, TestCase):
                seen=False,
            ),
        )
+        self.bogus_mailbox.updateClient()

    def test_get_correspondent(self):
        message = namedtuple("MailMessage", [])
@@ -607,6 +647,33 @@ class TestMail(DirectoriesMixin, TestCase):
        self.assertEqual(len(self.bogus_mailbox.fetch("UNKEYWORD processed", False)), 0)
        self.assertEqual(len(self.bogus_mailbox.messages), 3)

+    def test_handle_mail_account_tag_gmail(self):
+        self.bogus_mailbox._host = "imap.gmail.com"
+        self.bogus_mailbox.client.capabilities = ["X-GM-EXT-1"]
+
+        account = MailAccount.objects.create(
+            name="test",
+            imap_server="",
+            username="admin",
+            password="secret",
+        )
+
+        _ = MailRule.objects.create(
+            name="testrule",
+            account=account,
+            action=MailRule.MailAction.TAG,
+            action_parameter="processed",
+        )
+
+        self.assertEqual(len(self.bogus_mailbox.messages), 3)
+        self.assertEqual(self.async_task.call_count, 0)
+        criteria = NOT(gmail_label="processed")
+        self.assertEqual(len(self.bogus_mailbox.fetch(criteria, False)), 2)
+        self.mail_account_handler.handle_mail_account(account)
+        self.assertEqual(self.async_task.call_count, 2)
+        self.assertEqual(len(self.bogus_mailbox.fetch(criteria, False)), 0)
+        self.assertEqual(len(self.bogus_mailbox.messages), 3)
+
    def test_error_login(self):
        account = MailAccount.objects.create(
            name="test",
@@ -878,9 +945,9 @@ class TestMail(DirectoriesMixin, TestCase):
        account = MailAccount.objects.create(
            name="test",
            imap_server="",
-            username="admin",
+            username=BogusMailBox.USERNAME,
            # Note the non-ascii characters here
-            password="w57äöüw4b6huwb6nhu",
+            password=BogusMailBox.UTF_PASSWORD,
        )

        _ = MailRule.objects.create(
@@ -910,7 +977,7 @@ class TestMail(DirectoriesMixin, TestCase):
        account = MailAccount.objects.create(
            name="test",
            imap_server="",
-            username="admin",
+            username=BogusMailBox.USERNAME,
            # Note the non-ascii characters here
            # Passes the check in login, not in authenticate
            password="réception",
@@ -965,20 +1032,3 @@ class TestTasks(TestCase):
        m.side_effect = lambda account: 0
        result = tasks.process_mail_accounts()
        self.assertIn("No new", result)
-
-    @mock.patch("paperless_mail.tasks.MailAccountHandler.handle_mail_account")
-    def test_single_accounts(self, m):
-        MailAccount.objects.create(
-            name="A",
-            imap_server="A",
-            username="A",
-            password="A",
-        )
-
-        tasks.process_mail_account("A")
-
-        m.assert_called_once()
-        m.reset_mock()
-
-        tasks.process_mail_account("B")
-        m.assert_not_called()
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -249,16 +249,22 @@ class RasterisedDocumentParser(DocumentParser):

        if mime_type == "application/pdf":
            text_original = self.extract_text(None, document_path)
-            original_has_text = text_original and len(text_original) > 50
+            original_has_text = text_original is not None and len(text_original) > 50
        else:
            text_original = None
            original_has_text = False

+        # If the original has text, and the user doesn't want an archive,
+        # we're done here
        if settings.OCR_MODE == "skip_noarchive" and original_has_text:
            self.log("debug", "Document has text, skipping OCRmyPDF entirely.")
            self.text = text_original
            return

+        # Either no text was in the original or there should be an archive
+        # file created, so OCR the file and create an archive with any
+        # test located via OCR
+
        import ocrmypdf
        from ocrmypdf import InputFileError, EncryptedPdfError

@@ -277,6 +283,7 @@ class RasterisedDocumentParser(DocumentParser):
            ocrmypdf.ocr(**args)

            self.archive_path = archive_path
+
            self.text = self.extract_text(sidecar_file, archive_path)

            if not self.text:
@@ -323,11 +330,11 @@ class RasterisedDocumentParser(DocumentParser):

            except Exception as e:
                # If this fails, we have a serious issue at hand.
-                raise ParseError(f"{e.__class__.__name__}: {str(e)}")
+                raise ParseError(f"{e.__class__.__name__}: {str(e)}") from e

        except Exception as e:
            # Anything else is probably serious.
-            raise ParseError(f"{e.__class__.__name__}: {str(e)}")
+            raise ParseError(f"{e.__class__.__name__}: {str(e)}") from e

        # As a last resort, if we still don't have any text for any reason,
        # try to extract the text from the original document.
--- a/src/paperless_tesseract/tests/test_parser.py
+++ b/src/paperless_tesseract/tests/test_parser.py
@@ -341,6 +341,17 @@ class TestParser(DirectoriesMixin, TestCase):

    @override_settings(OCR_PAGES=2, OCR_MODE="redo")
    def test_multi_page_analog_pages_redo(self):
+        """
+        GIVEN:
+            - File with text contained in images but no text layer
+            - OCR of only pages 1 and 2 requested
+            - OCR mode set to redo
+        WHEN:
+            - Document is parsed
+        THEN:
+            - Text of page 1 and 2 extracted
+            - An archive file is created
+        """
        parser = RasterisedDocumentParser(None)
        parser.parse(
            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
@@ -352,6 +363,17 @@ class TestParser(DirectoriesMixin, TestCase):

    @override_settings(OCR_PAGES=1, OCR_MODE="force")
    def test_multi_page_analog_pages_force(self):
+        """
+        GIVEN:
+            - File with text contained in images but no text layer
+            - OCR of only page 1 requested
+            - OCR mode set to force
+        WHEN:
+            - Document is parsed
+        THEN:
+            - Only text of page 1 is extracted
+            - An archive file is created
+        """
        parser = RasterisedDocumentParser(None)
        parser.parse(
            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
@@ -364,6 +386,16 @@ class TestParser(DirectoriesMixin, TestCase):

    @override_settings(OCR_MODE="skip_noarchive")
    def test_skip_noarchive_withtext(self):
+        """
+        GIVEN:
+            - File with existing text layer
+            - OCR mode set to skip_noarchive
+        WHEN:
+            - Document is parsed
+        THEN:
+            - Text from images is extracted
+            - No archive file is created
+        """
        parser = RasterisedDocumentParser(None)
        parser.parse(
            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
@@ -377,24 +409,47 @@ class TestParser(DirectoriesMixin, TestCase):

    @override_settings(OCR_MODE="skip_noarchive")
    def test_skip_noarchive_notext(self):
+        """
+        GIVEN:
+            - File with text contained in images but no text layer
+            - OCR mode set to skip_noarchive
+        WHEN:
+            - Document is parsed
+        THEN:
+            - Text from images is extracted
+            - An archive file is created with the OCRd text
+        """
        parser = RasterisedDocumentParser(None)
        parser.parse(
            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
            "application/pdf",
        )
-        self.assertTrue(os.path.isfile(parser.archive_path))
+
        self.assertContainsStrings(
            parser.get_text().lower(),
            ["page 1", "page 2", "page 3"],
        )

+        self.assertIsNotNone(parser.archive_path)
+
    @override_settings(OCR_MODE="skip")
    def test_multi_page_mixed(self):
+        """
+        GIVEN:
+            - File with some text contained in images and some in text layer
+            - OCR mode set to skip
+        WHEN:
+            - Document is parsed
+        THEN:
+            - Text from images is extracted
+            - An archive file is created with the OCRd text and the original text
+        """
        parser = RasterisedDocumentParser(None)
        parser.parse(
            os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
            "application/pdf",
        )
+        self.assertIsNotNone(parser.archive_path)
        self.assertTrue(os.path.isfile(parser.archive_path))
        self.assertContainsStrings(
            parser.get_text().lower(),
@@ -408,6 +463,16 @@ class TestParser(DirectoriesMixin, TestCase):

    @override_settings(OCR_MODE="skip_noarchive")
    def test_multi_page_mixed_no_archive(self):
+        """
+        GIVEN:
+            - File with some text contained in images and some in text layer
+            - OCR mode set to skip_noarchive
+        WHEN:
+            - Document is parsed
+        THEN:
+            - Text from images is extracted
+            - No archive file is created as original file contains text
+        """
        parser = RasterisedDocumentParser(None)
        parser.parse(
            os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
--- a/src/paperless_text/signals.py
+++ b/src/paperless_text/signals.py
@@ -11,5 +11,6 @@ def text_consumer_declaration(sender, **kwargs):
        "mime_types": {
            "text/plain": ".txt",
            "text/csv": ".csv",
+            "application/csv": ".csv",
        },
    }
--- a/src/paperless_tika/apps.py
+++ b/src/paperless_tika/apps.py
@@ -9,6 +9,6 @@ class PaperlessTikaConfig(AppConfig):
    def ready(self):
        from documents.signals import document_consumer_declaration

-        if settings.PAPERLESS_TIKA_ENABLED:
+        if settings.TIKA_ENABLED:
            document_consumer_declaration.connect(tika_consumer_declaration)
        AppConfig.ready(self)
--- a/src/paperless_tika/parsers.py
+++ b/src/paperless_tika/parsers.py
@@ -1,4 +1,5 @@
 import os
+from pathlib import Path

 import dateutil.parser
 import requests
@@ -27,7 +28,12 @@ class TikaDocumentParser(DocumentParser):
        )

    def extract_metadata(self, document_path, mime_type):
-        tika_server = settings.PAPERLESS_TIKA_ENDPOINT
+        tika_server = settings.TIKA_ENDPOINT
+
+        # tika does not support a PathLike, only strings
+        # ensure this is a string
+        document_path = str(document_path)
+
        try:
            parsed = parser.from_file(document_path, tika_server)
        except Exception as e:
@@ -47,9 +53,13 @@ class TikaDocumentParser(DocumentParser):
            for key in parsed["metadata"]
        ]

-    def parse(self, document_path, mime_type, file_name=None):
+    def parse(self, document_path: Path, mime_type, file_name=None):
        self.log("info", f"Sending {document_path} to Tika server")
-        tika_server = settings.PAPERLESS_TIKA_ENDPOINT
+        tika_server = settings.TIKA_ENDPOINT
+
+        # tika does not support a PathLike, only strings
+        # ensure this is a string
+        document_path = str(document_path)

        try:
            parsed = parser.from_file(document_path, tika_server)
@@ -57,7 +67,7 @@ class TikaDocumentParser(DocumentParser):
            raise ParseError(
                f"Could not parse {document_path} with tika server at "
                f"{tika_server}: {err}",
-            )
+            ) from err

        self.text = parsed["content"].strip()

@@ -73,7 +83,7 @@ class TikaDocumentParser(DocumentParser):

    def convert_to_pdf(self, document_path, file_name):
        pdf_path = os.path.join(self.tempdir, "convert.pdf")
-        gotenberg_server = settings.PAPERLESS_TIKA_GOTENBERG_ENDPOINT
+        gotenberg_server = settings.TIKA_GOTENBERG_ENDPOINT
        url = gotenberg_server + "/forms/libreoffice/convert"

        self.log("info", f"Converting {document_path} to PDF as {pdf_path}")
@@ -90,7 +100,9 @@ class TikaDocumentParser(DocumentParser):
                response = requests.post(url, files=files, headers=headers)
                response.raise_for_status()  # ensure we notice bad responses
            except Exception as err:
-                raise ParseError(f"Error while converting document to PDF: {err}")
+                raise ParseError(
+                    f"Error while converting document to PDF: {err}",
+                ) from err

        with open(pdf_path, "wb") as file:
            file.write(response.content)
--- a/src/paperless_tika/tests/samples/sample.docx
+++ b/src/paperless_tika/tests/samples/sample.docx
--- a/src/paperless_tika/tests/samples/sample.odt
+++ b/src/paperless_tika/tests/samples/sample.odt
--- a/src/paperless_tika/tests/test_live_tika.py
+++ b/src/paperless_tika/tests/test_live_tika.py
@@ -0,0 +1,78 @@
+import datetime
+import os
+from pathlib import Path
+from typing import Final
+
+import pytest
+from django.test import TestCase
+from paperless_tika.parsers import TikaDocumentParser
+
+
+@pytest.mark.skipif("TIKA_LIVE" not in os.environ, reason="No tika server")
+class TestTikaParserAgainstServer(TestCase):
+    """
+    This test case tests the Tika parsing against a live tika server,
+    if the environment contains the correct value indicating such a server
+    is available.
+    """
+
+    SAMPLE_DIR: Final[Path] = (Path(__file__).parent / Path("samples")).resolve()
+
+    def setUp(self) -> None:
+        self.parser = TikaDocumentParser(logging_group=None)
+
+    def tearDown(self) -> None:
+        self.parser.cleanup()
+
+    def test_basic_parse_odt(self):
+        """
+        GIVEN:
+            - An input ODT format document
+        WHEN:
+            - The document is parsed
+        THEN:
+            - Document content is correct
+            - Document date is correct
+        """
+        test_file = self.SAMPLE_DIR / Path("sample.odt")
+
+        self.parser.parse(test_file, "application/vnd.oasis.opendocument.text")
+
+        self.assertEqual(
+            self.parser.text,
+            "This is an ODT test document, created September 14, 2022",
+        )
+        self.assertIsNotNone(self.parser.archive_path)
+        with open(self.parser.archive_path, "rb") as f:
+            # PDFs begin with the bytes PDF-x.y
+            self.assertTrue(b"PDF-" in f.read()[:10])
+
+        # TODO: Unsure what can set the Creation-Date field in a document, enable when possible
+        # self.assertEqual(self.parser.date, datetime.datetime(2022, 9, 14))
+
+    def test_basic_parse_docx(self):
+        """
+        GIVEN:
+            - An input DOCX format document
+        WHEN:
+            - The document is parsed
+        THEN:
+            - Document content is correct
+            - Document date is correct
+        """
+        test_file = self.SAMPLE_DIR / Path("sample.docx")
+
+        self.parser.parse(
+            test_file,
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+        )
+
+        self.assertEqual(
+            self.parser.text,
+            "This is an DOCX test document, also made September 14, 2022",
+        )
+        self.assertIsNotNone(self.parser.archive_path)
+        with open(self.parser.archive_path, "rb") as f:
+            self.assertTrue(b"PDF-" in f.read()[:10])
+
+        # self.assertEqual(self.parser.date, datetime.datetime(2022, 9, 14))
--- a/src/setup.cfg
+++ b/src/setup.cfg
@@ -1,5 +1,5 @@
 [flake8]
-extend-exclude = */migrations/*, paperless/settings.py, */tests/*
+extend-exclude = */migrations/*, */tests/*
 # E203 - https://www.flake8rules.com/rules/E203.html
 # W503 - https://www.flake8rules.com/rules/W503.html
 ignore = E203,W503