mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 03:16:10 -06:00 
			
		
		
		
	Feature: Add additional caching support to suggestions and metadata (#5414)
* Adds ETag and Last-Modified headers to suggestions, metadata and previews * Slight update to the suggestions etag * Small user message for why classifier didn't train again
This commit is contained in:
		@@ -207,6 +207,7 @@ class DocumentClassifier:
 | 
				
			|||||||
            self.last_doc_change_time is not None
 | 
					            self.last_doc_change_time is not None
 | 
				
			||||||
            and self.last_doc_change_time >= latest_doc_change
 | 
					            and self.last_doc_change_time >= latest_doc_change
 | 
				
			||||||
        ) and self.last_auto_type_hash == hasher.digest():
 | 
					        ) and self.last_auto_type_hash == hasher.digest():
 | 
				
			||||||
 | 
					            logger.info("No updates since last training")
 | 
				
			||||||
            return False
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # subtract 1 since -1 (null) is also part of the classes.
 | 
					        # subtract 1 since -1 (null) is also part of the classes.
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										87
									
								
								src/documents/conditionals.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								src/documents/conditionals.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,87 @@
 | 
				
			|||||||
 | 
					import pickle
 | 
				
			||||||
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					from typing import Optional
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from django.conf import settings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from documents.classifier import DocumentClassifier
 | 
				
			||||||
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def suggestions_etag(request, pk: int) -> Optional[str]:
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Returns an optional string for the ETag, allowing browser caching of
 | 
				
			||||||
 | 
					    suggestions if the classifier has not been changed and the suggested dates
 | 
				
			||||||
 | 
					    setting is also unchanged
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    TODO: It would be nice to not duplicate the partial loading and the loading
 | 
				
			||||||
 | 
					    between here and the actual classifier
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    if not settings.MODEL_FILE.exists():
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					    with open(settings.MODEL_FILE, "rb") as f:
 | 
				
			||||||
 | 
					        schema_version = pickle.load(f)
 | 
				
			||||||
 | 
					        if schema_version != DocumentClassifier.FORMAT_VERSION:
 | 
				
			||||||
 | 
					            return None
 | 
				
			||||||
 | 
					        _ = pickle.load(f)
 | 
				
			||||||
 | 
					        last_auto_type_hash: bytes = pickle.load(f)
 | 
				
			||||||
 | 
					        return f"{last_auto_type_hash}:{settings.NUMBER_OF_SUGGESTED_DATES}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def suggestions_last_modified(request, pk: int) -> Optional[datetime]:
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Returns the datetime of classifier last modification.  This is slightly off,
 | 
				
			||||||
 | 
					    as there is not way to track the suggested date setting modification, but it seems
 | 
				
			||||||
 | 
					    unlikely that changes too often
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    if not settings.MODEL_FILE.exists():
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					    with open(settings.MODEL_FILE, "rb") as f:
 | 
				
			||||||
 | 
					        schema_version = pickle.load(f)
 | 
				
			||||||
 | 
					        if schema_version != DocumentClassifier.FORMAT_VERSION:
 | 
				
			||||||
 | 
					            return None
 | 
				
			||||||
 | 
					        last_doc_change_time = pickle.load(f)
 | 
				
			||||||
 | 
					        return last_doc_change_time
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def metadata_etag(request, pk: int) -> Optional[str]:
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Metadata is extracted from the original file, so use its checksum as the
 | 
				
			||||||
 | 
					    ETag
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        doc = Document.objects.get(pk=pk)
 | 
				
			||||||
 | 
					        return doc.checksum
 | 
				
			||||||
 | 
					    except Document.DoesNotExist:
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					    return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def metadata_last_modified(request, pk: int) -> Optional[datetime]:
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Metadata is extracted from the original file, so use its modified.  Strictly speaking, this is
 | 
				
			||||||
 | 
					    not the modification of the original file, but of the database object, but might as well
 | 
				
			||||||
 | 
					    error on the side of more cautious
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        doc = Document.objects.get(pk=pk)
 | 
				
			||||||
 | 
					        return doc.modified
 | 
				
			||||||
 | 
					    except Document.DoesNotExist:
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					    return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def preview_etag(request, pk: int) -> Optional[str]:
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    ETag for the document preview, using the original or archive checksum, depending on the request
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        doc = Document.objects.get(pk=pk)
 | 
				
			||||||
 | 
					        use_original = (
 | 
				
			||||||
 | 
					            "original" in request.query_params
 | 
				
			||||||
 | 
					            and request.query_params["original"] == "true"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        return doc.checksum if use_original else doc.archive_checksum
 | 
				
			||||||
 | 
					    except Document.DoesNotExist:
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					    return None
 | 
				
			||||||
@@ -1266,6 +1266,86 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
 | 
				
			|||||||
            },
 | 
					            },
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @mock.patch("documents.conditionals.pickle.load")
 | 
				
			||||||
 | 
					    @mock.patch("documents.views.match_storage_paths")
 | 
				
			||||||
 | 
					    @mock.patch("documents.views.match_document_types")
 | 
				
			||||||
 | 
					    @mock.patch("documents.views.match_tags")
 | 
				
			||||||
 | 
					    @mock.patch("documents.views.match_correspondents")
 | 
				
			||||||
 | 
					    @override_settings(NUMBER_OF_SUGGESTED_DATES=10)
 | 
				
			||||||
 | 
					    def test_get_suggestions_cached(
 | 
				
			||||||
 | 
					        self,
 | 
				
			||||||
 | 
					        match_correspondents,
 | 
				
			||||||
 | 
					        match_tags,
 | 
				
			||||||
 | 
					        match_document_types,
 | 
				
			||||||
 | 
					        match_storage_paths,
 | 
				
			||||||
 | 
					        mocked_pickle_load,
 | 
				
			||||||
 | 
					    ):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        GIVEN:
 | 
				
			||||||
 | 
					           - Request for suggestions for a document
 | 
				
			||||||
 | 
					        WHEN:
 | 
				
			||||||
 | 
					          - Classifier has not been modified
 | 
				
			||||||
 | 
					        THEN:
 | 
				
			||||||
 | 
					          - Subsequent requests are returned alright
 | 
				
			||||||
 | 
					          - ETag and last modified are called
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        settings.MODEL_FILE.touch()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        from documents.classifier import DocumentClassifier
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        last_modified = timezone.now()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # ETag first, then modified
 | 
				
			||||||
 | 
					        mock_effect = [
 | 
				
			||||||
 | 
					            DocumentClassifier.FORMAT_VERSION,
 | 
				
			||||||
 | 
					            "dont care",
 | 
				
			||||||
 | 
					            b"thisisachecksum",
 | 
				
			||||||
 | 
					            DocumentClassifier.FORMAT_VERSION,
 | 
				
			||||||
 | 
					            last_modified,
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
 | 
					        mocked_pickle_load.side_effect = mock_effect
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        doc = Document.objects.create(
 | 
				
			||||||
 | 
					            title="test",
 | 
				
			||||||
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
 | 
					            content="this is an invoice from 12.04.2022!",
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
 | 
				
			||||||
 | 
					        match_tags.return_value = [Tag(id=56), Tag(id=123)]
 | 
				
			||||||
 | 
					        match_document_types.return_value = [DocumentType(id=23)]
 | 
				
			||||||
 | 
					        match_storage_paths.return_value = [StoragePath(id=99), StoragePath(id=77)]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
 | 
				
			||||||
 | 
					        self.assertEqual(
 | 
				
			||||||
 | 
					            response.data,
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                "correspondents": [88, 2],
 | 
				
			||||||
 | 
					                "tags": [56, 123],
 | 
				
			||||||
 | 
					                "document_types": [23],
 | 
				
			||||||
 | 
					                "storage_paths": [99, 77],
 | 
				
			||||||
 | 
					                "dates": ["2022-04-12"],
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        mocked_pickle_load.assert_called()
 | 
				
			||||||
 | 
					        self.assertIn("Last-Modified", response.headers)
 | 
				
			||||||
 | 
					        self.assertEqual(
 | 
				
			||||||
 | 
					            response.headers["Last-Modified"],
 | 
				
			||||||
 | 
					            last_modified.strftime("%a, %d %b %Y %H:%M:%S %Z").replace("UTC", "GMT"),
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        self.assertIn("ETag", response.headers)
 | 
				
			||||||
 | 
					        self.assertEqual(
 | 
				
			||||||
 | 
					            response.headers["ETag"],
 | 
				
			||||||
 | 
					            f"\"b'thisisachecksum':{settings.NUMBER_OF_SUGGESTED_DATES}\"",
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mocked_pickle_load.rest_mock()
 | 
				
			||||||
 | 
					        mocked_pickle_load.side_effect = mock_effect
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
 | 
				
			||||||
 | 
					        self.assertEqual(response.status_code, status.HTTP_200_OK)
 | 
				
			||||||
 | 
					        mocked_pickle_load.assert_called()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @mock.patch("documents.parsers.parse_date_generator")
 | 
					    @mock.patch("documents.parsers.parse_date_generator")
 | 
				
			||||||
    @override_settings(NUMBER_OF_SUGGESTED_DATES=0)
 | 
					    @override_settings(NUMBER_OF_SUGGESTED_DATES=0)
 | 
				
			||||||
    def test_get_suggestions_dates_disabled(
 | 
					    def test_get_suggestions_dates_disabled(
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,6 +34,7 @@ from django.utils.decorators import method_decorator
 | 
				
			|||||||
from django.utils.translation import get_language
 | 
					from django.utils.translation import get_language
 | 
				
			||||||
from django.views import View
 | 
					from django.views import View
 | 
				
			||||||
from django.views.decorators.cache import cache_control
 | 
					from django.views.decorators.cache import cache_control
 | 
				
			||||||
 | 
					from django.views.decorators.http import condition
 | 
				
			||||||
from django.views.generic import TemplateView
 | 
					from django.views.generic import TemplateView
 | 
				
			||||||
from django_filters.rest_framework import DjangoFilterBackend
 | 
					from django_filters.rest_framework import DjangoFilterBackend
 | 
				
			||||||
from langdetect import detect
 | 
					from langdetect import detect
 | 
				
			||||||
@@ -62,6 +63,11 @@ from documents.bulk_download import ArchiveOnlyStrategy
 | 
				
			|||||||
from documents.bulk_download import OriginalAndArchiveStrategy
 | 
					from documents.bulk_download import OriginalAndArchiveStrategy
 | 
				
			||||||
from documents.bulk_download import OriginalsOnlyStrategy
 | 
					from documents.bulk_download import OriginalsOnlyStrategy
 | 
				
			||||||
from documents.classifier import load_classifier
 | 
					from documents.classifier import load_classifier
 | 
				
			||||||
 | 
					from documents.conditionals import metadata_etag
 | 
				
			||||||
 | 
					from documents.conditionals import metadata_last_modified
 | 
				
			||||||
 | 
					from documents.conditionals import preview_etag
 | 
				
			||||||
 | 
					from documents.conditionals import suggestions_etag
 | 
				
			||||||
 | 
					from documents.conditionals import suggestions_last_modified
 | 
				
			||||||
from documents.data_models import ConsumableDocument
 | 
					from documents.data_models import ConsumableDocument
 | 
				
			||||||
from documents.data_models import DocumentMetadataOverrides
 | 
					from documents.data_models import DocumentMetadataOverrides
 | 
				
			||||||
from documents.data_models import DocumentSource
 | 
					from documents.data_models import DocumentSource
 | 
				
			||||||
@@ -386,6 +392,9 @@ class DocumentViewSet(
 | 
				
			|||||||
            return None
 | 
					            return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @action(methods=["get"], detail=True)
 | 
					    @action(methods=["get"], detail=True)
 | 
				
			||||||
 | 
					    @method_decorator(
 | 
				
			||||||
 | 
					        condition(etag_func=metadata_etag, last_modified_func=metadata_last_modified),
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
    def metadata(self, request, pk=None):
 | 
					    def metadata(self, request, pk=None):
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            doc = Document.objects.get(pk=pk)
 | 
					            doc = Document.objects.get(pk=pk)
 | 
				
			||||||
@@ -430,6 +439,12 @@ class DocumentViewSet(
 | 
				
			|||||||
        return Response(meta)
 | 
					        return Response(meta)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @action(methods=["get"], detail=True)
 | 
					    @action(methods=["get"], detail=True)
 | 
				
			||||||
 | 
					    @method_decorator(
 | 
				
			||||||
 | 
					        condition(
 | 
				
			||||||
 | 
					            etag_func=suggestions_etag,
 | 
				
			||||||
 | 
					            last_modified_func=suggestions_last_modified,
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
    def suggestions(self, request, pk=None):
 | 
					    def suggestions(self, request, pk=None):
 | 
				
			||||||
        doc = get_object_or_404(Document, pk=pk)
 | 
					        doc = get_object_or_404(Document, pk=pk)
 | 
				
			||||||
        if request.user is not None and not has_perms_owner_aware(
 | 
					        if request.user is not None and not has_perms_owner_aware(
 | 
				
			||||||
@@ -467,6 +482,8 @@ class DocumentViewSet(
 | 
				
			|||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @action(methods=["get"], detail=True)
 | 
					    @action(methods=["get"], detail=True)
 | 
				
			||||||
 | 
					    @method_decorator(cache_control(public=False, max_age=5 * 60))
 | 
				
			||||||
 | 
					    @method_decorator(condition(etag_func=preview_etag))
 | 
				
			||||||
    def preview(self, request, pk=None):
 | 
					    def preview(self, request, pk=None):
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            response = self.file_response(pk, request, "inline")
 | 
					            response = self.file_response(pk, request, "inline")
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user