From 25542c56b9dd5f4f02edfb67df5c6802cbcd1345 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Sun, 4 Feb 2024 10:42:21 -0800 Subject: [PATCH] Feature: Cache metadata and suggestions in Redis (#5638) --- src/documents/caching.py | 197 ++++++++++++++++++++++ src/documents/classifier.py | 20 +++ src/documents/conditionals.py | 101 ++++++++--- src/documents/tests/test_api_documents.py | 69 +++++--- src/documents/views.py | 99 +++++++---- src/paperless/settings.py | 6 +- src/setup.cfg | 1 + 7 files changed, 415 insertions(+), 78 deletions(-) create mode 100644 src/documents/caching.py diff --git a/src/documents/caching.py b/src/documents/caching.py new file mode 100644 index 000000000..9b8607dd8 --- /dev/null +++ b/src/documents/caching.py @@ -0,0 +1,197 @@ +import logging +from binascii import hexlify +from dataclasses import dataclass +from typing import TYPE_CHECKING +from typing import Final +from typing import Optional + +from django.core.cache import cache + +from documents.models import Document + +if TYPE_CHECKING: + from documents.classifier import DocumentClassifier + +logger = logging.getLogger("paperless.caching") + + +@dataclass(frozen=True) +class MetadataCacheData: + original_checksum: str + original_metadata: list + archive_checksum: Optional[str] + archive_metadata: Optional[list] + + +@dataclass(frozen=True) +class SuggestionCacheData: + classifier_version: int + classifier_hash: str + suggestions: dict + + +CLASSIFIER_VERSION_KEY: Final[str] = "classifier_version" +CLASSIFIER_HASH_KEY: Final[str] = "classifier_hash" +CLASSIFIER_MODIFIED_KEY: Final[str] = "classifier_modified" + +CACHE_1_MINUTE: Final[int] = 60 +CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE +CACHE_50_MINUTES: Final[int] = 50 * CACHE_1_MINUTE + + +def get_suggestion_cache_key(document_id: int) -> str: + """ + Returns the basic key for a document's suggestions + """ + return f"doc_{document_id}_suggest" + + +def get_suggestion_cache(document_id: int) -> Optional[SuggestionCacheData]: + """ + If possible, return the cached suggestions for the given document ID. + The classifier needs to be matching in format and hash and the suggestions need to + have been cached once. + """ + from documents.classifier import DocumentClassifier + + doc_key = get_suggestion_cache_key(document_id) + cache_hits = cache.get_many([CLASSIFIER_VERSION_KEY, CLASSIFIER_HASH_KEY, doc_key]) + # The document suggestions are in the cache + if doc_key in cache_hits: + doc_suggestions: SuggestionCacheData = cache_hits[doc_key] + # The classifier format is the same + # The classifier hash is the same + # Then the suggestions can be used + if ( + CLASSIFIER_VERSION_KEY in cache_hits + and cache_hits[CLASSIFIER_VERSION_KEY] == DocumentClassifier.FORMAT_VERSION + and cache_hits[CLASSIFIER_VERSION_KEY] == doc_suggestions.classifier_version + ) and ( + CLASSIFIER_HASH_KEY in cache_hits + and cache_hits[CLASSIFIER_HASH_KEY] == doc_suggestions.classifier_hash + ): + return doc_suggestions + else: # pragma: no cover + # Remove the key because something didn't match + cache.delete(doc_key) + return None + + +def set_suggestions_cache( + document_id: int, + suggestions: dict, + classifier: Optional["DocumentClassifier"], + *, + timeout=CACHE_50_MINUTES, +) -> None: + """ + Caches the given suggestions, which were generated by the given classifier. If there is no classifier, + this function is a no-op (there won't be suggestions then anyway) + """ + if classifier is not None: + doc_key = get_suggestion_cache_key(document_id) + print(classifier.last_auto_type_hash) + cache.set( + doc_key, + SuggestionCacheData( + classifier.FORMAT_VERSION, + hexlify(classifier.last_auto_type_hash).decode(), + suggestions, + ), + timeout, + ) + + +def refresh_suggestions_cache( + document_id: int, + *, + timeout: int = CACHE_50_MINUTES, +) -> None: + """ + Refreshes the expiration of the suggestions for the given document ID + to the given timeout + """ + doc_key = get_suggestion_cache_key(document_id) + cache.touch(doc_key, timeout) + + +def get_metadata_cache_key(document_id: int) -> str: + """ + Returns the basic key for a document's metadata + """ + return f"doc_{document_id}_metadata" + + +def get_metadata_cache(document_id: int) -> Optional[MetadataCacheData]: + """ + Returns the cached document metadata for the given document ID, as long as the metadata + was cached once and the checksums have not changed + """ + doc_key = get_metadata_cache_key(document_id) + doc_metadata: Optional[MetadataCacheData] = cache.get(doc_key) + # The metadata exists in the cache + if doc_metadata is not None: + try: + doc = Document.objects.get(pk=document_id) + # The original checksums match + # If it has one, the archive checksums match + # Then, we can use the metadata + if ( + doc_metadata.original_checksum == doc.checksum + and doc.has_archive_version + and doc_metadata.archive_checksum is not None + and doc_metadata.archive_checksum == doc.archive_checksum + ): + # Refresh cache + cache.touch(doc_key, CACHE_50_MINUTES) + return doc_metadata + else: # pragma: no cover + # Something didn't match, delete the key + cache.delete(doc_key) + except Document.DoesNotExist: # pragma: no cover + # Basically impossible, but the key existed, but the Document didn't + cache.delete(doc_key) + return None + + +def set_metadata_cache( + document: Document, + original_metadata: list, + archive_metadata: Optional[list], + *, + timeout=CACHE_50_MINUTES, +) -> None: + """ + Sets the metadata into cache for the given Document + """ + doc_key = get_metadata_cache_key(document.pk) + cache.set( + doc_key, + MetadataCacheData( + document.checksum, + original_metadata, + document.archive_checksum, + archive_metadata, + ), + timeout, + ) + + +def refresh_metadata_cache( + document_id: int, + *, + timeout: int = CACHE_50_MINUTES, +) -> None: + """ + Refreshes the expiration of the metadata for the given document ID + to the given timeout + """ + doc_key = get_metadata_cache_key(document_id) + cache.touch(doc_key, timeout) + + +def get_thumbnail_modified_key(document_id: int) -> str: + """ + Builds the key to store a thumbnail's timestamp + """ + return f"doc_{document_id}_thumbnail_modified" diff --git a/src/documents/classifier.py b/src/documents/classifier.py index 5833e373e..6180a8671 100644 --- a/src/documents/classifier.py +++ b/src/documents/classifier.py @@ -10,8 +10,13 @@ from pathlib import Path from typing import Optional from django.conf import settings +from django.core.cache import cache from sklearn.exceptions import InconsistentVersionWarning +from documents.caching import CACHE_50_MINUTES +from documents.caching import CLASSIFIER_HASH_KEY +from documents.caching import CLASSIFIER_MODIFIED_KEY +from documents.caching import CLASSIFIER_VERSION_KEY from documents.models import Document from documents.models import MatchingModel @@ -208,6 +213,15 @@ class DocumentClassifier: and self.last_doc_change_time >= latest_doc_change ) and self.last_auto_type_hash == hasher.digest(): logger.info("No updates since last training") + # Set the classifier information into the cache + # Caching for 50 minutes, so slightly less than the normal retrain time + cache.set( + CLASSIFIER_MODIFIED_KEY, + self.last_doc_change_time, + CACHE_50_MINUTES, + ) + cache.set(CLASSIFIER_HASH_KEY, hasher.hexdigest(), CACHE_50_MINUTES) + cache.set(CLASSIFIER_VERSION_KEY, self.FORMAT_VERSION, CACHE_50_MINUTES) return False # subtract 1 since -1 (null) is also part of the classes. @@ -322,6 +336,12 @@ class DocumentClassifier: self.last_doc_change_time = latest_doc_change self.last_auto_type_hash = hasher.digest() + # Set the classifier information into the cache + # Caching for 50 minutes, so slightly less than the normal retrain time + cache.set(CLASSIFIER_MODIFIED_KEY, self.last_doc_change_time, CACHE_50_MINUTES) + cache.set(CLASSIFIER_HASH_KEY, hasher.hexdigest(), CACHE_50_MINUTES) + cache.set(CLASSIFIER_VERSION_KEY, self.FORMAT_VERSION, CACHE_50_MINUTES) + return True def preprocess_content(self, content: str) -> str: # pragma: no cover diff --git a/src/documents/conditionals.py b/src/documents/conditionals.py index 07e6850fb..1b53dfe2b 100644 --- a/src/documents/conditionals.py +++ b/src/documents/conditionals.py @@ -1,9 +1,16 @@ -import pickle from datetime import datetime +from datetime import timezone from typing import Optional from django.conf import settings +from django.core.cache import cache +from documents.caching import CACHE_5_MINUTES +from documents.caching import CACHE_50_MINUTES +from documents.caching import CLASSIFIER_HASH_KEY +from documents.caching import CLASSIFIER_MODIFIED_KEY +from documents.caching import CLASSIFIER_VERSION_KEY +from documents.caching import get_thumbnail_modified_key from documents.classifier import DocumentClassifier from documents.models import Document @@ -14,18 +21,25 @@ def suggestions_etag(request, pk: int) -> Optional[str]: suggestions if the classifier has not been changed and the suggested dates setting is also unchanged - TODO: It would be nice to not duplicate the partial loading and the loading - between here and the actual classifier """ + # If no model file, no etag at all if not settings.MODEL_FILE.exists(): return None - with open(settings.MODEL_FILE, "rb") as f: - schema_version = pickle.load(f) - if schema_version != DocumentClassifier.FORMAT_VERSION: - return None - _ = pickle.load(f) - last_auto_type_hash: bytes = pickle.load(f) - return f"{last_auto_type_hash}:{settings.NUMBER_OF_SUGGESTED_DATES}" + # Check cache information + cache_hits = cache.get_many( + [CLASSIFIER_VERSION_KEY, CLASSIFIER_HASH_KEY], + ) + # If the version differs somehow, no etag + if ( + CLASSIFIER_VERSION_KEY in cache_hits + and cache_hits[CLASSIFIER_VERSION_KEY] != DocumentClassifier.FORMAT_VERSION + ): + return None + elif CLASSIFIER_HASH_KEY in cache_hits: + # Refresh the cache and return the hash digest and the dates setting + cache.touch(CLASSIFIER_HASH_KEY, CACHE_5_MINUTES) + return f"{cache_hits[CLASSIFIER_HASH_KEY]}:{settings.NUMBER_OF_SUGGESTED_DATES}" + return None def suggestions_last_modified(request, pk: int) -> Optional[datetime]: @@ -34,14 +48,23 @@ def suggestions_last_modified(request, pk: int) -> Optional[datetime]: as there is not way to track the suggested date setting modification, but it seems unlikely that changes too often """ + # No file, no last modified if not settings.MODEL_FILE.exists(): return None - with open(settings.MODEL_FILE, "rb") as f: - schema_version = pickle.load(f) - if schema_version != DocumentClassifier.FORMAT_VERSION: - return None - last_doc_change_time = pickle.load(f) - return last_doc_change_time + cache_hits = cache.get_many( + [CLASSIFIER_VERSION_KEY, CLASSIFIER_MODIFIED_KEY], + ) + # If the version differs somehow, no last modified + if ( + CLASSIFIER_VERSION_KEY in cache_hits + and cache_hits[CLASSIFIER_VERSION_KEY] != DocumentClassifier.FORMAT_VERSION + ): + return None + elif CLASSIFIER_MODIFIED_KEY in cache_hits: + # Refresh the cache and return the last modified + cache.touch(CLASSIFIER_MODIFIED_KEY, CACHE_5_MINUTES) + return cache_hits[CLASSIFIER_MODIFIED_KEY] + return None def metadata_etag(request, pk: int) -> Optional[str]: @@ -52,7 +75,7 @@ def metadata_etag(request, pk: int) -> Optional[str]: try: doc = Document.objects.get(pk=pk) return doc.checksum - except Document.DoesNotExist: + except Document.DoesNotExist: # pragma: no cover return None return None @@ -66,7 +89,7 @@ def metadata_last_modified(request, pk: int) -> Optional[datetime]: try: doc = Document.objects.get(pk=pk) return doc.modified - except Document.DoesNotExist: + except Document.DoesNotExist: # pragma: no cover return None return None @@ -82,6 +105,46 @@ def preview_etag(request, pk: int) -> Optional[str]: and request.query_params["original"] == "true" ) return doc.checksum if use_original else doc.archive_checksum - except Document.DoesNotExist: + except Document.DoesNotExist: # pragma: no cover return None return None + + +def preview_last_modified(request, pk: int) -> Optional[datetime]: + """ + Uses the documents modified time to set the Last-Modified header. Not strictly + speaking correct, but close enough and quick + """ + try: + doc = Document.objects.get(pk=pk) + return doc.modified + except Document.DoesNotExist: # pragma: no cover + return None + return None + + +def thumbnail_last_modified(request, pk: int) -> Optional[datetime]: + """ + Returns the filesystem last modified either from cache or from filesystem. + Cache should be (slightly?) faster than filesystem + """ + try: + doc = Document.objects.get(pk=pk) + if not doc.thumbnail_path.exists(): + return None + doc_key = get_thumbnail_modified_key(pk) + + cache_hit = cache.get(doc_key) + if cache_hit is not None: + cache.touch(doc_key, CACHE_50_MINUTES) + return cache_hit + + # No cache, get the timestamp and cache the datetime + last_modified = datetime.fromtimestamp( + doc.thumbnail_path.stat().st_mtime, + tz=timezone.utc, + ) + cache.set(doc_key, last_modified, CACHE_50_MINUTES) + return last_modified + except Document.DoesNotExist: # pragma: no cover + return None diff --git a/src/documents/tests/test_api_documents.py b/src/documents/tests/test_api_documents.py index 20dd64d82..d7ae1eeb7 100644 --- a/src/documents/tests/test_api_documents.py +++ b/src/documents/tests/test_api_documents.py @@ -4,6 +4,7 @@ import shutil import tempfile import uuid import zoneinfo +from binascii import hexlify from datetime import timedelta from pathlib import Path from unittest import mock @@ -13,12 +14,17 @@ from dateutil import parser from django.conf import settings from django.contrib.auth.models import Permission from django.contrib.auth.models import User +from django.core.cache import cache from django.test import override_settings from django.utils import timezone from guardian.shortcuts import assign_perm from rest_framework import status from rest_framework.test import APITestCase +from documents.caching import CACHE_50_MINUTES +from documents.caching import CLASSIFIER_HASH_KEY +from documents.caching import CLASSIFIER_MODIFIED_KEY +from documents.caching import CLASSIFIER_VERSION_KEY from documents.models import Correspondent from documents.models import CustomField from documents.models import CustomFieldInstance @@ -40,6 +46,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): self.user = User.objects.create_superuser(username="temp_admin") self.client.force_authenticate(user=self.user) + cache.clear() def testDocuments(self): response = self.client.get("/api/documents/").data @@ -1162,6 +1169,9 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): self.assertEqual(meta["original_size"], os.stat(source_file).st_size) self.assertEqual(meta["archive_size"], os.stat(archive_file).st_size) + response = self.client.get(f"/api/documents/{doc.pk}/metadata/") + self.assertEqual(response.status_code, status.HTTP_200_OK) + def test_get_metadata_invalid_doc(self): response = self.client.get("/api/documents/34576/metadata/") self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) @@ -1266,7 +1276,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): }, ) - @mock.patch("documents.conditionals.pickle.load") + @mock.patch("documents.views.load_classifier") @mock.patch("documents.views.match_storage_paths") @mock.patch("documents.views.match_document_types") @mock.patch("documents.views.match_tags") @@ -1278,7 +1288,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): match_tags, match_document_types, match_storage_paths, - mocked_pickle_load, + mocked_load, ): """ GIVEN: @@ -1287,23 +1297,43 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): - Classifier has not been modified THEN: - Subsequent requests are returned alright - - ETag and last modified are called + - ETag and last modified headers are set """ - settings.MODEL_FILE.touch() + # setup the cache how the classifier does it from documents.classifier import DocumentClassifier - last_modified = timezone.now() + settings.MODEL_FILE.touch() - # ETag first, then modified - mock_effect = [ - DocumentClassifier.FORMAT_VERSION, - "dont care", - b"thisisachecksum", - DocumentClassifier.FORMAT_VERSION, - last_modified, + classifier_checksum_bytes = b"thisisachecksum" + classifier_checksum_hex = hexlify(classifier_checksum_bytes).decode() + + # Two loads, so two side effects + mocked_load.side_effect = [ + mock.Mock( + last_auto_type_hash=classifier_checksum_bytes, + FORMAT_VERSION=DocumentClassifier.FORMAT_VERSION, + ), + mock.Mock( + last_auto_type_hash=classifier_checksum_bytes, + FORMAT_VERSION=DocumentClassifier.FORMAT_VERSION, + ), ] - mocked_pickle_load.side_effect = mock_effect + + last_modified = timezone.now() + cache.set(CLASSIFIER_MODIFIED_KEY, last_modified, CACHE_50_MINUTES) + cache.set(CLASSIFIER_HASH_KEY, classifier_checksum_hex, CACHE_50_MINUTES) + cache.set( + CLASSIFIER_VERSION_KEY, + DocumentClassifier.FORMAT_VERSION, + CACHE_50_MINUTES, + ) + + # Mock the matching + match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)] + match_tags.return_value = [Tag(id=56), Tag(id=123)] + match_document_types.return_value = [DocumentType(id=23)] + match_storage_paths.return_value = [StoragePath(id=99), StoragePath(id=77)] doc = Document.objects.create( title="test", @@ -1311,12 +1341,8 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): content="this is an invoice from 12.04.2022!", ) - match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)] - match_tags.return_value = [Tag(id=56), Tag(id=123)] - match_document_types.return_value = [DocumentType(id=23)] - match_storage_paths.return_value = [StoragePath(id=99), StoragePath(id=77)] - response = self.client.get(f"/api/documents/{doc.pk}/suggestions/") + self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual( response.data, { @@ -1327,7 +1353,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): "dates": ["2022-04-12"], }, ) - mocked_pickle_load.assert_called() self.assertIn("Last-Modified", response.headers) self.assertEqual( response.headers["Last-Modified"], @@ -1336,15 +1361,11 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): self.assertIn("ETag", response.headers) self.assertEqual( response.headers["ETag"], - f"\"b'thisisachecksum':{settings.NUMBER_OF_SUGGESTED_DATES}\"", + f'"{classifier_checksum_hex}:{settings.NUMBER_OF_SUGGESTED_DATES}"', ) - mocked_pickle_load.rest_mock() - mocked_pickle_load.side_effect = mock_effect - response = self.client.get(f"/api/documents/{doc.pk}/suggestions/") self.assertEqual(response.status_code, status.HTTP_200_OK) - mocked_pickle_load.assert_called() @mock.patch("documents.parsers.parse_date_generator") @override_settings(NUMBER_OF_SUGGESTED_DATES=0) diff --git a/src/documents/views.py b/src/documents/views.py index 11fb5b1f2..0578cdb24 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -35,6 +35,7 @@ from django.utils.translation import get_language from django.views import View from django.views.decorators.cache import cache_control from django.views.decorators.http import condition +from django.views.decorators.http import last_modified from django.views.generic import TemplateView from django_filters.rest_framework import DjangoFilterBackend from langdetect import detect @@ -62,12 +63,21 @@ from documents import bulk_edit from documents.bulk_download import ArchiveOnlyStrategy from documents.bulk_download import OriginalAndArchiveStrategy from documents.bulk_download import OriginalsOnlyStrategy +from documents.caching import CACHE_50_MINUTES +from documents.caching import get_metadata_cache +from documents.caching import get_suggestion_cache +from documents.caching import refresh_metadata_cache +from documents.caching import refresh_suggestions_cache +from documents.caching import set_metadata_cache +from documents.caching import set_suggestions_cache from documents.classifier import load_classifier from documents.conditionals import metadata_etag from documents.conditionals import metadata_last_modified from documents.conditionals import preview_etag +from documents.conditionals import preview_last_modified from documents.conditionals import suggestions_etag from documents.conditionals import suggestions_last_modified +from documents.conditionals import thumbnail_last_modified from documents.data_models import ConsumableDocument from documents.data_models import DocumentMetadataOverrides from documents.data_models import DocumentSource @@ -379,10 +389,12 @@ class DocumentViewSet( try: return parser.extract_metadata(file, mime_type) - except Exception: + except Exception: # pragma: no cover + logger.exception(f"Issue getting metadata for {file}") # TODO: cover GPG errors, remove later. return [] - else: + else: # pragma: no cover + logger.warning(f"No parser for {mime_type}") return [] def get_filesize(self, filename): @@ -407,16 +419,37 @@ class DocumentViewSet( except Document.DoesNotExist: raise Http404 + document_cached_metadata = get_metadata_cache(doc.pk) + + archive_metadata = None + archive_filesize = None + if document_cached_metadata is not None: + original_metadata = document_cached_metadata.original_metadata + archive_metadata = document_cached_metadata.archive_metadata + refresh_metadata_cache(doc.pk) + else: + original_metadata = self.get_metadata(doc.source_path, doc.mime_type) + + if doc.has_archive_version: + archive_filesize = self.get_filesize(doc.archive_path) + archive_metadata = self.get_metadata( + doc.archive_path, + "application/pdf", + ) + set_metadata_cache(doc, original_metadata, archive_metadata) + meta = { "original_checksum": doc.checksum, "original_size": self.get_filesize(doc.source_path), "original_mime_type": doc.mime_type, "media_filename": doc.filename, "has_archive_version": doc.has_archive_version, - "original_metadata": self.get_metadata(doc.source_path, doc.mime_type), + "original_metadata": original_metadata, "archive_checksum": doc.archive_checksum, "archive_media_filename": doc.archive_filename, "original_filename": doc.original_filename, + "archive_size": archive_filesize, + "archive_metadata": archive_metadata, } lang = "en" @@ -426,16 +459,6 @@ class DocumentViewSet( pass meta["lang"] = lang - if doc.has_archive_version: - meta["archive_size"] = self.get_filesize(doc.archive_path) - meta["archive_metadata"] = self.get_metadata( - doc.archive_path, - "application/pdf", - ) - else: - meta["archive_size"] = None - meta["archive_metadata"] = None - return Response(meta) @action(methods=["get"], detail=True) @@ -454,6 +477,12 @@ class DocumentViewSet( ): return HttpResponseForbidden("Insufficient permissions") + document_suggestions = get_suggestion_cache(doc.pk) + + if document_suggestions is not None: + refresh_suggestions_cache(doc.pk) + return Response(document_suggestions.suggestions) + classifier = load_classifier() dates = [] @@ -463,27 +492,30 @@ class DocumentViewSet( {i for i in itertools.islice(gen, settings.NUMBER_OF_SUGGESTED_DATES)}, ) - return Response( - { - "correspondents": [ - c.id for c in match_correspondents(doc, classifier, request.user) - ], - "tags": [t.id for t in match_tags(doc, classifier, request.user)], - "document_types": [ - dt.id for dt in match_document_types(doc, classifier, request.user) - ], - "storage_paths": [ - dt.id for dt in match_storage_paths(doc, classifier, request.user) - ], - "dates": [ - date.strftime("%Y-%m-%d") for date in dates if date is not None - ], - }, - ) + resp_data = { + "correspondents": [ + c.id for c in match_correspondents(doc, classifier, request.user) + ], + "tags": [t.id for t in match_tags(doc, classifier, request.user)], + "document_types": [ + dt.id for dt in match_document_types(doc, classifier, request.user) + ], + "storage_paths": [ + dt.id for dt in match_storage_paths(doc, classifier, request.user) + ], + "dates": [date.strftime("%Y-%m-%d") for date in dates if date is not None], + } + + # Cache the suggestions and the classifier hash for later + set_suggestions_cache(doc.pk, resp_data, classifier) + + return Response(resp_data) @action(methods=["get"], detail=True) @method_decorator(cache_control(public=False, max_age=5 * 60)) - @method_decorator(condition(etag_func=preview_etag)) + @method_decorator( + condition(etag_func=preview_etag, last_modified_func=preview_last_modified), + ) def preview(self, request, pk=None): try: response = self.file_response(pk, request, "inline") @@ -492,7 +524,8 @@ class DocumentViewSet( raise Http404 @action(methods=["get"], detail=True) - @method_decorator(cache_control(public=False, max_age=315360000)) + @method_decorator(cache_control(public=False, max_age=CACHE_50_MINUTES)) + @method_decorator(last_modified(thumbnail_last_modified)) def thumb(self, request, pk=None): try: doc = Document.objects.get(id=pk) @@ -506,8 +539,6 @@ class DocumentViewSet( handle = GnuPG.decrypted(doc.thumbnail_file) else: handle = doc.thumbnail_file - # TODO: Send ETag information and use that to send new thumbnails - # if available return HttpResponse(handle, content_type="image/webp") except (FileNotFoundError, Document.DoesNotExist): diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 17ec2765d..7179f0358 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -762,8 +762,12 @@ CELERY_BEAT_SCHEDULE_FILENAME = os.path.join(DATA_DIR, "celerybeat-schedule.db") # django setting. CACHES = { "default": { - "BACKEND": "django.core.cache.backends.redis.RedisCache", + "BACKEND": os.environ.get( + "PAPERLESS_CACHE_BACKEND", + "django.core.cache.backends.redis.RedisCache", + ), "LOCATION": _CHANNELS_REDIS_URL, + "KEY_PREFIX": os.getenv("PAPERLESS_REDIS_PREFIX", ""), }, } diff --git a/src/setup.cfg b/src/setup.cfg index dc5e9e33a..1877cb16e 100644 --- a/src/setup.cfg +++ b/src/setup.cfg @@ -3,6 +3,7 @@ DJANGO_SETTINGS_MODULE = paperless.settings addopts = --pythonwarnings=all --cov --cov-report=html --cov-report=xml --numprocesses auto --maxprocesses=16 --quiet --durations=50 env = PAPERLESS_DISABLE_DBHANDLER=true + PAPERLESS_CACHE_BACKEND=django.core.cache.backends.locmem.LocMemCache [coverage:run] source =