Feature: Cache metadata and suggestions in Redis (#5638)

2025-08-14 00:26:21 +00:00 · 2024-02-04 10:42:21 -08:00
parent 45e2b7f814
commit 25542c56b9
7 changed files with 415 additions and 78 deletions
--- a/src/documents/caching.py
+++ b/src/documents/caching.py
@@ -0,0 +1,197 @@
 import logging
 from binascii import hexlify
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 from typing import Final
 from typing import Optional
 from django.core.cache import cache
 from documents.models import Document
 if TYPE_CHECKING:
    from documents.classifier import DocumentClassifier
 logger = logging.getLogger("paperless.caching")
@dataclass(frozen=True)
 class MetadataCacheData:
    original_checksum: str
    original_metadata: list
    archive_checksum: Optional[str]
    archive_metadata: Optional[list]
@dataclass(frozen=True)
 class SuggestionCacheData:
    classifier_version: int
    classifier_hash: str
    suggestions: dict
 CLASSIFIER_VERSION_KEY: Final[str] = "classifier_version"
 CLASSIFIER_HASH_KEY: Final[str] = "classifier_hash"
 CLASSIFIER_MODIFIED_KEY: Final[str] = "classifier_modified"
 CACHE_1_MINUTE: Final[int] = 60
 CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE
 CACHE_50_MINUTES: Final[int] = 50 * CACHE_1_MINUTE
 def get_suggestion_cache_key(document_id: int) -> str:
    """
    Returns the basic key for a document's suggestions
    """
    return f"doc_{document_id}_suggest"
 def get_suggestion_cache(document_id: int) -> Optional[SuggestionCacheData]:
    """
    If possible, return the cached suggestions for the given document ID.
    The classifier needs to be matching in format and hash and the suggestions need to
    have been cached once.
    """
    from documents.classifier import DocumentClassifier
    doc_key = get_suggestion_cache_key(document_id)
    cache_hits = cache.get_many([CLASSIFIER_VERSION_KEY, CLASSIFIER_HASH_KEY, doc_key])
    # The document suggestions are in the cache
    if doc_key in cache_hits:
        doc_suggestions: SuggestionCacheData = cache_hits[doc_key]
        # The classifier format is the same
        # The classifier hash is the same
        # Then the suggestions can be used
        if (
            CLASSIFIER_VERSION_KEY in cache_hits
            and cache_hits[CLASSIFIER_VERSION_KEY] == DocumentClassifier.FORMAT_VERSION
            and cache_hits[CLASSIFIER_VERSION_KEY] == doc_suggestions.classifier_version
        ) and (
            CLASSIFIER_HASH_KEY in cache_hits
            and cache_hits[CLASSIFIER_HASH_KEY] == doc_suggestions.classifier_hash
        ):
            return doc_suggestions
        else:  # pragma: no cover
            # Remove the key because something didn't match
            cache.delete(doc_key)
    return None
 def set_suggestions_cache(
    document_id: int,
    suggestions: dict,
    classifier: Optional["DocumentClassifier"],
    *,
    timeout=CACHE_50_MINUTES,
 ) -> None:
    """
    Caches the given suggestions, which were generated by the given classifier.  If there is no classifier,
    this function is a no-op (there won't be suggestions then anyway)
    """
    if classifier is not None:
        doc_key = get_suggestion_cache_key(document_id)
        print(classifier.last_auto_type_hash)
        cache.set(
            doc_key,
            SuggestionCacheData(
                classifier.FORMAT_VERSION,
                hexlify(classifier.last_auto_type_hash).decode(),
                suggestions,
            ),
            timeout,
        )
 def refresh_suggestions_cache(
    document_id: int,
    *,
    timeout: int = CACHE_50_MINUTES,
 ) -> None:
    """
    Refreshes the expiration of the suggestions for the given document ID
    to the given timeout
    """
    doc_key = get_suggestion_cache_key(document_id)
    cache.touch(doc_key, timeout)
 def get_metadata_cache_key(document_id: int) -> str:
    """
    Returns the basic key for a document's metadata
    """
    return f"doc_{document_id}_metadata"
 def get_metadata_cache(document_id: int) -> Optional[MetadataCacheData]:
    """
    Returns the cached document metadata for the given document ID, as long as the metadata
    was cached once and the checksums have not changed
    """
    doc_key = get_metadata_cache_key(document_id)
    doc_metadata: Optional[MetadataCacheData] = cache.get(doc_key)
    # The metadata exists in the cache
    if doc_metadata is not None:
        try:
            doc = Document.objects.get(pk=document_id)
            # The original checksums match
            # If it has one, the archive checksums match
            # Then, we can use the metadata
            if (
                doc_metadata.original_checksum == doc.checksum
                and doc.has_archive_version
                and doc_metadata.archive_checksum is not None
                and doc_metadata.archive_checksum == doc.archive_checksum
            ):
                # Refresh cache
                cache.touch(doc_key, CACHE_50_MINUTES)
                return doc_metadata
            else:  # pragma: no cover
                # Something didn't match, delete the key
                cache.delete(doc_key)
        except Document.DoesNotExist:  # pragma: no cover
            # Basically impossible, but the key existed, but the Document didn't
            cache.delete(doc_key)
    return None
 def set_metadata_cache(
    document: Document,
    original_metadata: list,
    archive_metadata: Optional[list],
    *,
    timeout=CACHE_50_MINUTES,
 ) -> None:
    """
    Sets the metadata into cache for the given Document
    """
    doc_key = get_metadata_cache_key(document.pk)
    cache.set(
        doc_key,
        MetadataCacheData(
            document.checksum,
            original_metadata,
            document.archive_checksum,
            archive_metadata,
        ),
        timeout,
    )
 def refresh_metadata_cache(
    document_id: int,
    *,
    timeout: int = CACHE_50_MINUTES,
 ) -> None:
    """
    Refreshes the expiration of the metadata for the given document ID
    to the given timeout
    """
    doc_key = get_metadata_cache_key(document_id)
    cache.touch(doc_key, timeout)
 def get_thumbnail_modified_key(document_id: int) -> str:
    """
    Builds the key to store a thumbnail's timestamp
    """
    return f"doc_{document_id}_thumbnail_modified"
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -10,8 +10,13 @@ from pathlib import Path
 from typing import Optional
 from django.conf import settings
 from django.core.cache import cache
 from sklearn.exceptions import InconsistentVersionWarning
 from documents.caching import CACHE_50_MINUTES
 from documents.caching import CLASSIFIER_HASH_KEY
 from documents.caching import CLASSIFIER_MODIFIED_KEY
 from documents.caching import CLASSIFIER_VERSION_KEY
 from documents.models import Document
 from documents.models import MatchingModel
@@ -208,6 +213,15 @@ class DocumentClassifier:
            and self.last_doc_change_time >= latest_doc_change
        ) and self.last_auto_type_hash == hasher.digest():
            logger.info("No updates since last training")
            # Set the classifier information into the cache
            # Caching for 50 minutes, so slightly less than the normal retrain time
            cache.set(
                CLASSIFIER_MODIFIED_KEY,
                self.last_doc_change_time,
                CACHE_50_MINUTES,
            )
            cache.set(CLASSIFIER_HASH_KEY, hasher.hexdigest(), CACHE_50_MINUTES)
            cache.set(CLASSIFIER_VERSION_KEY, self.FORMAT_VERSION, CACHE_50_MINUTES)
            return False
        # subtract 1 since -1 (null) is also part of the classes.
@@ -322,6 +336,12 @@ class DocumentClassifier:
        self.last_doc_change_time = latest_doc_change
        self.last_auto_type_hash = hasher.digest()
        # Set the classifier information into the cache
        # Caching for 50 minutes, so slightly less than the normal retrain time
        cache.set(CLASSIFIER_MODIFIED_KEY, self.last_doc_change_time, CACHE_50_MINUTES)
        cache.set(CLASSIFIER_HASH_KEY, hasher.hexdigest(), CACHE_50_MINUTES)
        cache.set(CLASSIFIER_VERSION_KEY, self.FORMAT_VERSION, CACHE_50_MINUTES)
        return True
    def preprocess_content(self, content: str) -> str:  # pragma: no cover
--- a/src/documents/conditionals.py
+++ b/src/documents/conditionals.py
@@ -1,9 +1,16 @@
 import pickle
 from datetime import datetime
 from datetime import timezone
 from typing import Optional
 from django.conf import settings
 from django.core.cache import cache
 from documents.caching import CACHE_5_MINUTES
 from documents.caching import CACHE_50_MINUTES
 from documents.caching import CLASSIFIER_HASH_KEY
 from documents.caching import CLASSIFIER_MODIFIED_KEY
 from documents.caching import CLASSIFIER_VERSION_KEY
 from documents.caching import get_thumbnail_modified_key
 from documents.classifier import DocumentClassifier
 from documents.models import Document
@@ -14,18 +21,25 @@ def suggestions_etag(request, pk: int) -> Optional[str]:
    suggestions if the classifier has not been changed and the suggested dates
    setting is also unchanged
    TODO: It would be nice to not duplicate the partial loading and the loading
    between here and the actual classifier
    """
    # If no model file, no etag at all
    if not settings.MODEL_FILE.exists():
        return None
-    with open(settings.MODEL_FILE, "rb") as f:
+    # Check cache information
-        schema_version = pickle.load(f)
+    cache_hits = cache.get_many(
-        if schema_version != DocumentClassifier.FORMAT_VERSION:
+        [CLASSIFIER_VERSION_KEY, CLASSIFIER_HASH_KEY],
    )
    # If the version differs somehow, no etag
    if (
        CLASSIFIER_VERSION_KEY in cache_hits
        and cache_hits[CLASSIFIER_VERSION_KEY] != DocumentClassifier.FORMAT_VERSION
    ):
        return None
    elif CLASSIFIER_HASH_KEY in cache_hits:
        # Refresh the cache and return the hash digest and the dates setting
        cache.touch(CLASSIFIER_HASH_KEY, CACHE_5_MINUTES)
        return f"{cache_hits[CLASSIFIER_HASH_KEY]}:{settings.NUMBER_OF_SUGGESTED_DATES}"
    return None
        _ = pickle.load(f)
        last_auto_type_hash: bytes = pickle.load(f)
        return f"{last_auto_type_hash}:{settings.NUMBER_OF_SUGGESTED_DATES}"
 def suggestions_last_modified(request, pk: int) -> Optional[datetime]:
@@ -34,14 +48,23 @@ def suggestions_last_modified(request, pk: int) -> Optional[datetime]:
    as there is not way to track the suggested date setting modification, but it seems
    unlikely that changes too often
    """
    # No file, no last modified
    if not settings.MODEL_FILE.exists():
        return None
-    with open(settings.MODEL_FILE, "rb") as f:
+    cache_hits = cache.get_many(
-        schema_version = pickle.load(f)
+        [CLASSIFIER_VERSION_KEY, CLASSIFIER_MODIFIED_KEY],
-        if schema_version != DocumentClassifier.FORMAT_VERSION:
+    )
    # If the version differs somehow, no last modified
    if (
        CLASSIFIER_VERSION_KEY in cache_hits
        and cache_hits[CLASSIFIER_VERSION_KEY] != DocumentClassifier.FORMAT_VERSION
    ):
        return None
    elif CLASSIFIER_MODIFIED_KEY in cache_hits:
        # Refresh the cache and return the last modified
        cache.touch(CLASSIFIER_MODIFIED_KEY, CACHE_5_MINUTES)
        return cache_hits[CLASSIFIER_MODIFIED_KEY]
    return None
        last_doc_change_time = pickle.load(f)
        return last_doc_change_time
 def metadata_etag(request, pk: int) -> Optional[str]:
@@ -52,7 +75,7 @@ def metadata_etag(request, pk: int) -> Optional[str]:
    try:
        doc = Document.objects.get(pk=pk)
        return doc.checksum
-    except Document.DoesNotExist:
+    except Document.DoesNotExist:  # pragma: no cover
        return None
    return None
@@ -66,7 +89,7 @@ def metadata_last_modified(request, pk: int) -> Optional[datetime]:
    try:
        doc = Document.objects.get(pk=pk)
        return doc.modified
-    except Document.DoesNotExist:
+    except Document.DoesNotExist:  # pragma: no cover
        return None
    return None
@@ -82,6 +105,46 @@ def preview_etag(request, pk: int) -> Optional[str]:
            and request.query_params["original"] == "true"
        )
        return doc.checksum if use_original else doc.archive_checksum
-    except Document.DoesNotExist:
+    except Document.DoesNotExist:  # pragma: no cover
        return None
    return None
 def preview_last_modified(request, pk: int) -> Optional[datetime]:
    """
    Uses the documents modified time to set the Last-Modified header.  Not strictly
    speaking correct, but close enough and quick
    """
    try:
        doc = Document.objects.get(pk=pk)
        return doc.modified
    except Document.DoesNotExist:  # pragma: no cover
        return None
    return None
 def thumbnail_last_modified(request, pk: int) -> Optional[datetime]:
    """
    Returns the filesystem last modified either from cache or from filesystem.
    Cache should be (slightly?) faster than filesystem
    """
    try:
        doc = Document.objects.get(pk=pk)
        if not doc.thumbnail_path.exists():
            return None
        doc_key = get_thumbnail_modified_key(pk)
        cache_hit = cache.get(doc_key)
        if cache_hit is not None:
            cache.touch(doc_key, CACHE_50_MINUTES)
            return cache_hit
        # No cache, get the timestamp and cache the datetime
        last_modified = datetime.fromtimestamp(
            doc.thumbnail_path.stat().st_mtime,
            tz=timezone.utc,
        )
        cache.set(doc_key, last_modified, CACHE_50_MINUTES)
        return last_modified
    except Document.DoesNotExist:  # pragma: no cover
        return None
--- a/src/documents/tests/test_api_documents.py
+++ b/src/documents/tests/test_api_documents.py
@@ -4,6 +4,7 @@ import shutil
 import tempfile
 import uuid
 import zoneinfo
 from binascii import hexlify
 from datetime import timedelta
 from pathlib import Path
 from unittest import mock
@@ -13,12 +14,17 @@ from dateutil import parser
 from django.conf import settings
 from django.contrib.auth.models import Permission
 from django.contrib.auth.models import User
 from django.core.cache import cache
 from django.test import override_settings
 from django.utils import timezone
 from guardian.shortcuts import assign_perm
 from rest_framework import status
 from rest_framework.test import APITestCase
 from documents.caching import CACHE_50_MINUTES
 from documents.caching import CLASSIFIER_HASH_KEY
 from documents.caching import CLASSIFIER_MODIFIED_KEY
 from documents.caching import CLASSIFIER_VERSION_KEY
 from documents.models import Correspondent
 from documents.models import CustomField
 from documents.models import CustomFieldInstance
@@ -40,6 +46,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
        self.user = User.objects.create_superuser(username="temp_admin")
        self.client.force_authenticate(user=self.user)
        cache.clear()
    def testDocuments(self):
        response = self.client.get("/api/documents/").data
@@ -1162,6 +1169,9 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
        self.assertEqual(meta["original_size"], os.stat(source_file).st_size)
        self.assertEqual(meta["archive_size"], os.stat(archive_file).st_size)
        response = self.client.get(f"/api/documents/{doc.pk}/metadata/")
        self.assertEqual(response.status_code, status.HTTP_200_OK)
    def test_get_metadata_invalid_doc(self):
        response = self.client.get("/api/documents/34576/metadata/")
        self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
@@ -1266,7 +1276,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
            },
        )
-    @mock.patch("documents.conditionals.pickle.load")
+    @mock.patch("documents.views.load_classifier")
    @mock.patch("documents.views.match_storage_paths")
    @mock.patch("documents.views.match_document_types")
    @mock.patch("documents.views.match_tags")
@@ -1278,7 +1288,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
        match_tags,
        match_document_types,
        match_storage_paths,
-        mocked_pickle_load,
+        mocked_load,
    ):
        """
        GIVEN:
@@ -1287,23 +1297,43 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
          - Classifier has not been modified
        THEN:
          - Subsequent requests are returned alright
-          - ETag and last modified are called
+          - ETag and last modified headers are set
        """
        settings.MODEL_FILE.touch()
        # setup the cache how the classifier does it
        from documents.classifier import DocumentClassifier
-        last_modified = timezone.now()
+        settings.MODEL_FILE.touch()
-        # ETag first, then modified
+        classifier_checksum_bytes = b"thisisachecksum"
-        mock_effect = [
+        classifier_checksum_hex = hexlify(classifier_checksum_bytes).decode()
-            DocumentClassifier.FORMAT_VERSION,
+
-            "dont care",
+        # Two loads, so two side effects
-            b"thisisachecksum",
+        mocked_load.side_effect = [
-            DocumentClassifier.FORMAT_VERSION,
+            mock.Mock(
-            last_modified,
+                last_auto_type_hash=classifier_checksum_bytes,
                FORMAT_VERSION=DocumentClassifier.FORMAT_VERSION,
            ),
            mock.Mock(
                last_auto_type_hash=classifier_checksum_bytes,
                FORMAT_VERSION=DocumentClassifier.FORMAT_VERSION,
            ),
        ]
-        mocked_pickle_load.side_effect = mock_effect
+
        last_modified = timezone.now()
        cache.set(CLASSIFIER_MODIFIED_KEY, last_modified, CACHE_50_MINUTES)
        cache.set(CLASSIFIER_HASH_KEY, classifier_checksum_hex, CACHE_50_MINUTES)
        cache.set(
            CLASSIFIER_VERSION_KEY,
            DocumentClassifier.FORMAT_VERSION,
            CACHE_50_MINUTES,
        )
        # Mock the matching
        match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
        match_tags.return_value = [Tag(id=56), Tag(id=123)]
        match_document_types.return_value = [DocumentType(id=23)]
        match_storage_paths.return_value = [StoragePath(id=99), StoragePath(id=77)]
        doc = Document.objects.create(
            title="test",
@@ -1311,12 +1341,8 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
            content="this is an invoice from 12.04.2022!",
        )
        match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
        match_tags.return_value = [Tag(id=56), Tag(id=123)]
        match_document_types.return_value = [DocumentType(id=23)]
        match_storage_paths.return_value = [StoragePath(id=99), StoragePath(id=77)]
        response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
        self.assertEqual(response.status_code, status.HTTP_200_OK)
        self.assertEqual(
            response.data,
            {
@@ -1327,7 +1353,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
                "dates": ["2022-04-12"],
            },
        )
        mocked_pickle_load.assert_called()
        self.assertIn("Last-Modified", response.headers)
        self.assertEqual(
            response.headers["Last-Modified"],
@@ -1336,15 +1361,11 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
        self.assertIn("ETag", response.headers)
        self.assertEqual(
            response.headers["ETag"],
-            f"\"b'thisisachecksum':{settings.NUMBER_OF_SUGGESTED_DATES}\"",
+            f'"{classifier_checksum_hex}:{settings.NUMBER_OF_SUGGESTED_DATES}"',
        )
        mocked_pickle_load.rest_mock()
        mocked_pickle_load.side_effect = mock_effect
        response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
        self.assertEqual(response.status_code, status.HTTP_200_OK)
        mocked_pickle_load.assert_called()
    @mock.patch("documents.parsers.parse_date_generator")
    @override_settings(NUMBER_OF_SUGGESTED_DATES=0)
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -35,6 +35,7 @@ from django.utils.translation import get_language
 from django.views import View
 from django.views.decorators.cache import cache_control
 from django.views.decorators.http import condition
 from django.views.decorators.http import last_modified
 from django.views.generic import TemplateView
 from django_filters.rest_framework import DjangoFilterBackend
 from langdetect import detect
@@ -62,12 +63,21 @@ from documents import bulk_edit
 from documents.bulk_download import ArchiveOnlyStrategy
 from documents.bulk_download import OriginalAndArchiveStrategy
 from documents.bulk_download import OriginalsOnlyStrategy
 from documents.caching import CACHE_50_MINUTES
 from documents.caching import get_metadata_cache
 from documents.caching import get_suggestion_cache
 from documents.caching import refresh_metadata_cache
 from documents.caching import refresh_suggestions_cache
 from documents.caching import set_metadata_cache
 from documents.caching import set_suggestions_cache
 from documents.classifier import load_classifier
 from documents.conditionals import metadata_etag
 from documents.conditionals import metadata_last_modified
 from documents.conditionals import preview_etag
 from documents.conditionals import preview_last_modified
 from documents.conditionals import suggestions_etag
 from documents.conditionals import suggestions_last_modified
 from documents.conditionals import thumbnail_last_modified
 from documents.data_models import ConsumableDocument
 from documents.data_models import DocumentMetadataOverrides
 from documents.data_models import DocumentSource
@@ -379,10 +389,12 @@ class DocumentViewSet(
            try:
                return parser.extract_metadata(file, mime_type)
-            except Exception:
+            except Exception:  # pragma: no cover
                logger.exception(f"Issue getting metadata for {file}")
                # TODO: cover GPG errors, remove later.
                return []
-        else:
+        else:  # pragma: no cover
            logger.warning(f"No parser for {mime_type}")
            return []
    def get_filesize(self, filename):
@@ -407,16 +419,37 @@ class DocumentViewSet(
        except Document.DoesNotExist:
            raise Http404
        document_cached_metadata = get_metadata_cache(doc.pk)
        archive_metadata = None
        archive_filesize = None
        if document_cached_metadata is not None:
            original_metadata = document_cached_metadata.original_metadata
            archive_metadata = document_cached_metadata.archive_metadata
            refresh_metadata_cache(doc.pk)
        else:
            original_metadata = self.get_metadata(doc.source_path, doc.mime_type)
            if doc.has_archive_version:
                archive_filesize = self.get_filesize(doc.archive_path)
                archive_metadata = self.get_metadata(
                    doc.archive_path,
                    "application/pdf",
                )
            set_metadata_cache(doc, original_metadata, archive_metadata)
        meta = {
            "original_checksum": doc.checksum,
            "original_size": self.get_filesize(doc.source_path),
            "original_mime_type": doc.mime_type,
            "media_filename": doc.filename,
            "has_archive_version": doc.has_archive_version,
-            "original_metadata": self.get_metadata(doc.source_path, doc.mime_type),
+            "original_metadata": original_metadata,
            "archive_checksum": doc.archive_checksum,
            "archive_media_filename": doc.archive_filename,
            "original_filename": doc.original_filename,
            "archive_size": archive_filesize,
            "archive_metadata": archive_metadata,
        }
        lang = "en"
@@ -426,16 +459,6 @@ class DocumentViewSet(
            pass
        meta["lang"] = lang
        if doc.has_archive_version:
            meta["archive_size"] = self.get_filesize(doc.archive_path)
            meta["archive_metadata"] = self.get_metadata(
                doc.archive_path,
                "application/pdf",
            )
        else:
            meta["archive_size"] = None
            meta["archive_metadata"] = None
        return Response(meta)
    @action(methods=["get"], detail=True)
@@ -454,6 +477,12 @@ class DocumentViewSet(
        ):
            return HttpResponseForbidden("Insufficient permissions")
        document_suggestions = get_suggestion_cache(doc.pk)
        if document_suggestions is not None:
            refresh_suggestions_cache(doc.pk)
            return Response(document_suggestions.suggestions)
        classifier = load_classifier()
        dates = []
@@ -463,8 +492,7 @@ class DocumentViewSet(
                {i for i in itertools.islice(gen, settings.NUMBER_OF_SUGGESTED_DATES)},
            )
-        return Response(
+        resp_data = {
            {
            "correspondents": [
                c.id for c in match_correspondents(doc, classifier, request.user)
            ],
@@ -475,15 +503,19 @@ class DocumentViewSet(
            "storage_paths": [
                dt.id for dt in match_storage_paths(doc, classifier, request.user)
            ],
-                "dates": [
+            "dates": [date.strftime("%Y-%m-%d") for date in dates if date is not None],
-                    date.strftime("%Y-%m-%d") for date in dates if date is not None
+        }
-                ],
+
-            },
+        # Cache the suggestions and the classifier hash for later
-        )
+        set_suggestions_cache(doc.pk, resp_data, classifier)
        return Response(resp_data)
    @action(methods=["get"], detail=True)
    @method_decorator(cache_control(public=False, max_age=5 * 60))
-    @method_decorator(condition(etag_func=preview_etag))
+    @method_decorator(
        condition(etag_func=preview_etag, last_modified_func=preview_last_modified),
    )
    def preview(self, request, pk=None):
        try:
            response = self.file_response(pk, request, "inline")
@@ -492,7 +524,8 @@ class DocumentViewSet(
            raise Http404
    @action(methods=["get"], detail=True)
-    @method_decorator(cache_control(public=False, max_age=315360000))
+    @method_decorator(cache_control(public=False, max_age=CACHE_50_MINUTES))
    @method_decorator(last_modified(thumbnail_last_modified))
    def thumb(self, request, pk=None):
        try:
            doc = Document.objects.get(id=pk)
@@ -506,8 +539,6 @@ class DocumentViewSet(
                handle = GnuPG.decrypted(doc.thumbnail_file)
            else:
                handle = doc.thumbnail_file
            # TODO: Send ETag information and use that to send new thumbnails
            #  if available
            return HttpResponse(handle, content_type="image/webp")
        except (FileNotFoundError, Document.DoesNotExist):
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -762,8 +762,12 @@ CELERY_BEAT_SCHEDULE_FILENAME = os.path.join(DATA_DIR, "celerybeat-schedule.db")
 # django setting.
 CACHES = {
    "default": {
-        "BACKEND": "django.core.cache.backends.redis.RedisCache",
+        "BACKEND": os.environ.get(
            "PAPERLESS_CACHE_BACKEND",
            "django.core.cache.backends.redis.RedisCache",
        ),
        "LOCATION": _CHANNELS_REDIS_URL,
        "KEY_PREFIX": os.getenv("PAPERLESS_REDIS_PREFIX", ""),
    },
 }
--- a/src/setup.cfg
+++ b/src/setup.cfg
@@ -3,6 +3,7 @@ DJANGO_SETTINGS_MODULE = paperless.settings
 addopts = --pythonwarnings=all --cov --cov-report=html --cov-report=xml --numprocesses auto --maxprocesses=16 --quiet --durations=50
 env =
    PAPERLESS_DISABLE_DBHANDLER=true
    PAPERLESS_CACHE_BACKEND=django.core.cache.backends.locmem.LocMemCache
 [coverage:run]
 source =