mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Feature: Add additional caching support to suggestions and metadata (#5414)
* Adds ETag and Last-Modified headers to suggestions, metadata and previews * Slight update to the suggestions etag * Small user message for why classifier didn't train again
This commit is contained in:
parent
0068f091bb
commit
e16645b146
@ -207,6 +207,7 @@ class DocumentClassifier:
|
|||||||
self.last_doc_change_time is not None
|
self.last_doc_change_time is not None
|
||||||
and self.last_doc_change_time >= latest_doc_change
|
and self.last_doc_change_time >= latest_doc_change
|
||||||
) and self.last_auto_type_hash == hasher.digest():
|
) and self.last_auto_type_hash == hasher.digest():
|
||||||
|
logger.info("No updates since last training")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# subtract 1 since -1 (null) is also part of the classes.
|
# subtract 1 since -1 (null) is also part of the classes.
|
||||||
|
87
src/documents/conditionals.py
Normal file
87
src/documents/conditionals.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
import pickle
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from documents.classifier import DocumentClassifier
|
||||||
|
from documents.models import Document
|
||||||
|
|
||||||
|
|
||||||
|
def suggestions_etag(request, pk: int) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Returns an optional string for the ETag, allowing browser caching of
|
||||||
|
suggestions if the classifier has not been changed and the suggested dates
|
||||||
|
setting is also unchanged
|
||||||
|
|
||||||
|
TODO: It would be nice to not duplicate the partial loading and the loading
|
||||||
|
between here and the actual classifier
|
||||||
|
"""
|
||||||
|
if not settings.MODEL_FILE.exists():
|
||||||
|
return None
|
||||||
|
with open(settings.MODEL_FILE, "rb") as f:
|
||||||
|
schema_version = pickle.load(f)
|
||||||
|
if schema_version != DocumentClassifier.FORMAT_VERSION:
|
||||||
|
return None
|
||||||
|
_ = pickle.load(f)
|
||||||
|
last_auto_type_hash: bytes = pickle.load(f)
|
||||||
|
return f"{last_auto_type_hash}:{settings.NUMBER_OF_SUGGESTED_DATES}"
|
||||||
|
|
||||||
|
|
||||||
|
def suggestions_last_modified(request, pk: int) -> Optional[datetime]:
|
||||||
|
"""
|
||||||
|
Returns the datetime of classifier last modification. This is slightly off,
|
||||||
|
as there is not way to track the suggested date setting modification, but it seems
|
||||||
|
unlikely that changes too often
|
||||||
|
"""
|
||||||
|
if not settings.MODEL_FILE.exists():
|
||||||
|
return None
|
||||||
|
with open(settings.MODEL_FILE, "rb") as f:
|
||||||
|
schema_version = pickle.load(f)
|
||||||
|
if schema_version != DocumentClassifier.FORMAT_VERSION:
|
||||||
|
return None
|
||||||
|
last_doc_change_time = pickle.load(f)
|
||||||
|
return last_doc_change_time
|
||||||
|
|
||||||
|
|
||||||
|
def metadata_etag(request, pk: int) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Metadata is extracted from the original file, so use its checksum as the
|
||||||
|
ETag
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
doc = Document.objects.get(pk=pk)
|
||||||
|
return doc.checksum
|
||||||
|
except Document.DoesNotExist:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def metadata_last_modified(request, pk: int) -> Optional[datetime]:
|
||||||
|
"""
|
||||||
|
Metadata is extracted from the original file, so use its modified. Strictly speaking, this is
|
||||||
|
not the modification of the original file, but of the database object, but might as well
|
||||||
|
error on the side of more cautious
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
doc = Document.objects.get(pk=pk)
|
||||||
|
return doc.modified
|
||||||
|
except Document.DoesNotExist:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def preview_etag(request, pk: int) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
ETag for the document preview, using the original or archive checksum, depending on the request
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
doc = Document.objects.get(pk=pk)
|
||||||
|
use_original = (
|
||||||
|
"original" in request.query_params
|
||||||
|
and request.query_params["original"] == "true"
|
||||||
|
)
|
||||||
|
return doc.checksum if use_original else doc.archive_checksum
|
||||||
|
except Document.DoesNotExist:
|
||||||
|
return None
|
||||||
|
return None
|
@ -1266,6 +1266,86 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@mock.patch("documents.conditionals.pickle.load")
|
||||||
|
@mock.patch("documents.views.match_storage_paths")
|
||||||
|
@mock.patch("documents.views.match_document_types")
|
||||||
|
@mock.patch("documents.views.match_tags")
|
||||||
|
@mock.patch("documents.views.match_correspondents")
|
||||||
|
@override_settings(NUMBER_OF_SUGGESTED_DATES=10)
|
||||||
|
def test_get_suggestions_cached(
|
||||||
|
self,
|
||||||
|
match_correspondents,
|
||||||
|
match_tags,
|
||||||
|
match_document_types,
|
||||||
|
match_storage_paths,
|
||||||
|
mocked_pickle_load,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Request for suggestions for a document
|
||||||
|
WHEN:
|
||||||
|
- Classifier has not been modified
|
||||||
|
THEN:
|
||||||
|
- Subsequent requests are returned alright
|
||||||
|
- ETag and last modified are called
|
||||||
|
"""
|
||||||
|
settings.MODEL_FILE.touch()
|
||||||
|
|
||||||
|
from documents.classifier import DocumentClassifier
|
||||||
|
|
||||||
|
last_modified = timezone.now()
|
||||||
|
|
||||||
|
# ETag first, then modified
|
||||||
|
mock_effect = [
|
||||||
|
DocumentClassifier.FORMAT_VERSION,
|
||||||
|
"dont care",
|
||||||
|
b"thisisachecksum",
|
||||||
|
DocumentClassifier.FORMAT_VERSION,
|
||||||
|
last_modified,
|
||||||
|
]
|
||||||
|
mocked_pickle_load.side_effect = mock_effect
|
||||||
|
|
||||||
|
doc = Document.objects.create(
|
||||||
|
title="test",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
content="this is an invoice from 12.04.2022!",
|
||||||
|
)
|
||||||
|
|
||||||
|
match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
|
||||||
|
match_tags.return_value = [Tag(id=56), Tag(id=123)]
|
||||||
|
match_document_types.return_value = [DocumentType(id=23)]
|
||||||
|
match_storage_paths.return_value = [StoragePath(id=99), StoragePath(id=77)]
|
||||||
|
|
||||||
|
response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
|
||||||
|
self.assertEqual(
|
||||||
|
response.data,
|
||||||
|
{
|
||||||
|
"correspondents": [88, 2],
|
||||||
|
"tags": [56, 123],
|
||||||
|
"document_types": [23],
|
||||||
|
"storage_paths": [99, 77],
|
||||||
|
"dates": ["2022-04-12"],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
mocked_pickle_load.assert_called()
|
||||||
|
self.assertIn("Last-Modified", response.headers)
|
||||||
|
self.assertEqual(
|
||||||
|
response.headers["Last-Modified"],
|
||||||
|
last_modified.strftime("%a, %d %b %Y %H:%M:%S %Z").replace("UTC", "GMT"),
|
||||||
|
)
|
||||||
|
self.assertIn("ETag", response.headers)
|
||||||
|
self.assertEqual(
|
||||||
|
response.headers["ETag"],
|
||||||
|
f"\"b'thisisachecksum':{settings.NUMBER_OF_SUGGESTED_DATES}\"",
|
||||||
|
)
|
||||||
|
|
||||||
|
mocked_pickle_load.rest_mock()
|
||||||
|
mocked_pickle_load.side_effect = mock_effect
|
||||||
|
|
||||||
|
response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
|
||||||
|
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||||
|
mocked_pickle_load.assert_called()
|
||||||
|
|
||||||
@mock.patch("documents.parsers.parse_date_generator")
|
@mock.patch("documents.parsers.parse_date_generator")
|
||||||
@override_settings(NUMBER_OF_SUGGESTED_DATES=0)
|
@override_settings(NUMBER_OF_SUGGESTED_DATES=0)
|
||||||
def test_get_suggestions_dates_disabled(
|
def test_get_suggestions_dates_disabled(
|
||||||
|
@ -34,6 +34,7 @@ from django.utils.decorators import method_decorator
|
|||||||
from django.utils.translation import get_language
|
from django.utils.translation import get_language
|
||||||
from django.views import View
|
from django.views import View
|
||||||
from django.views.decorators.cache import cache_control
|
from django.views.decorators.cache import cache_control
|
||||||
|
from django.views.decorators.http import condition
|
||||||
from django.views.generic import TemplateView
|
from django.views.generic import TemplateView
|
||||||
from django_filters.rest_framework import DjangoFilterBackend
|
from django_filters.rest_framework import DjangoFilterBackend
|
||||||
from langdetect import detect
|
from langdetect import detect
|
||||||
@ -62,6 +63,11 @@ from documents.bulk_download import ArchiveOnlyStrategy
|
|||||||
from documents.bulk_download import OriginalAndArchiveStrategy
|
from documents.bulk_download import OriginalAndArchiveStrategy
|
||||||
from documents.bulk_download import OriginalsOnlyStrategy
|
from documents.bulk_download import OriginalsOnlyStrategy
|
||||||
from documents.classifier import load_classifier
|
from documents.classifier import load_classifier
|
||||||
|
from documents.conditionals import metadata_etag
|
||||||
|
from documents.conditionals import metadata_last_modified
|
||||||
|
from documents.conditionals import preview_etag
|
||||||
|
from documents.conditionals import suggestions_etag
|
||||||
|
from documents.conditionals import suggestions_last_modified
|
||||||
from documents.data_models import ConsumableDocument
|
from documents.data_models import ConsumableDocument
|
||||||
from documents.data_models import DocumentMetadataOverrides
|
from documents.data_models import DocumentMetadataOverrides
|
||||||
from documents.data_models import DocumentSource
|
from documents.data_models import DocumentSource
|
||||||
@ -386,6 +392,9 @@ class DocumentViewSet(
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
@action(methods=["get"], detail=True)
|
@action(methods=["get"], detail=True)
|
||||||
|
@method_decorator(
|
||||||
|
condition(etag_func=metadata_etag, last_modified_func=metadata_last_modified),
|
||||||
|
)
|
||||||
def metadata(self, request, pk=None):
|
def metadata(self, request, pk=None):
|
||||||
try:
|
try:
|
||||||
doc = Document.objects.get(pk=pk)
|
doc = Document.objects.get(pk=pk)
|
||||||
@ -430,6 +439,12 @@ class DocumentViewSet(
|
|||||||
return Response(meta)
|
return Response(meta)
|
||||||
|
|
||||||
@action(methods=["get"], detail=True)
|
@action(methods=["get"], detail=True)
|
||||||
|
@method_decorator(
|
||||||
|
condition(
|
||||||
|
etag_func=suggestions_etag,
|
||||||
|
last_modified_func=suggestions_last_modified,
|
||||||
|
),
|
||||||
|
)
|
||||||
def suggestions(self, request, pk=None):
|
def suggestions(self, request, pk=None):
|
||||||
doc = get_object_or_404(Document, pk=pk)
|
doc = get_object_or_404(Document, pk=pk)
|
||||||
if request.user is not None and not has_perms_owner_aware(
|
if request.user is not None and not has_perms_owner_aware(
|
||||||
@ -467,6 +482,8 @@ class DocumentViewSet(
|
|||||||
)
|
)
|
||||||
|
|
||||||
@action(methods=["get"], detail=True)
|
@action(methods=["get"], detail=True)
|
||||||
|
@method_decorator(cache_control(public=False, max_age=5 * 60))
|
||||||
|
@method_decorator(condition(etag_func=preview_etag))
|
||||||
def preview(self, request, pk=None):
|
def preview(self, request, pk=None):
|
||||||
try:
|
try:
|
||||||
response = self.file_response(pk, request, "inline")
|
response = self.file_response(pk, request, "inline")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user