mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Feature: Add additional caching support to suggestions and metadata (#5414)
* Adds ETag and Last-Modified headers to suggestions, metadata and previews * Slight update to the suggestions etag * Small user message for why classifier didn't train again
This commit is contained in:
		| @@ -207,6 +207,7 @@ class DocumentClassifier: | ||||
|             self.last_doc_change_time is not None | ||||
|             and self.last_doc_change_time >= latest_doc_change | ||||
|         ) and self.last_auto_type_hash == hasher.digest(): | ||||
|             logger.info("No updates since last training") | ||||
|             return False | ||||
|  | ||||
|         # subtract 1 since -1 (null) is also part of the classes. | ||||
|   | ||||
							
								
								
									
										87
									
								
								src/documents/conditionals.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								src/documents/conditionals.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,87 @@ | ||||
| import pickle | ||||
| from datetime import datetime | ||||
| from typing import Optional | ||||
|  | ||||
| from django.conf import settings | ||||
|  | ||||
| from documents.classifier import DocumentClassifier | ||||
| from documents.models import Document | ||||
|  | ||||
|  | ||||
| def suggestions_etag(request, pk: int) -> Optional[str]: | ||||
|     """ | ||||
|     Returns an optional string for the ETag, allowing browser caching of | ||||
|     suggestions if the classifier has not been changed and the suggested dates | ||||
|     setting is also unchanged | ||||
|  | ||||
|     TODO: It would be nice to not duplicate the partial loading and the loading | ||||
|     between here and the actual classifier | ||||
|     """ | ||||
|     if not settings.MODEL_FILE.exists(): | ||||
|         return None | ||||
|     with open(settings.MODEL_FILE, "rb") as f: | ||||
|         schema_version = pickle.load(f) | ||||
|         if schema_version != DocumentClassifier.FORMAT_VERSION: | ||||
|             return None | ||||
|         _ = pickle.load(f) | ||||
|         last_auto_type_hash: bytes = pickle.load(f) | ||||
|         return f"{last_auto_type_hash}:{settings.NUMBER_OF_SUGGESTED_DATES}" | ||||
|  | ||||
|  | ||||
| def suggestions_last_modified(request, pk: int) -> Optional[datetime]: | ||||
|     """ | ||||
|     Returns the datetime of classifier last modification.  This is slightly off, | ||||
|     as there is not way to track the suggested date setting modification, but it seems | ||||
|     unlikely that changes too often | ||||
|     """ | ||||
|     if not settings.MODEL_FILE.exists(): | ||||
|         return None | ||||
|     with open(settings.MODEL_FILE, "rb") as f: | ||||
|         schema_version = pickle.load(f) | ||||
|         if schema_version != DocumentClassifier.FORMAT_VERSION: | ||||
|             return None | ||||
|         last_doc_change_time = pickle.load(f) | ||||
|         return last_doc_change_time | ||||
|  | ||||
|  | ||||
| def metadata_etag(request, pk: int) -> Optional[str]: | ||||
|     """ | ||||
|     Metadata is extracted from the original file, so use its checksum as the | ||||
|     ETag | ||||
|     """ | ||||
|     try: | ||||
|         doc = Document.objects.get(pk=pk) | ||||
|         return doc.checksum | ||||
|     except Document.DoesNotExist: | ||||
|         return None | ||||
|     return None | ||||
|  | ||||
|  | ||||
| def metadata_last_modified(request, pk: int) -> Optional[datetime]: | ||||
|     """ | ||||
|     Metadata is extracted from the original file, so use its modified.  Strictly speaking, this is | ||||
|     not the modification of the original file, but of the database object, but might as well | ||||
|     error on the side of more cautious | ||||
|     """ | ||||
|     try: | ||||
|         doc = Document.objects.get(pk=pk) | ||||
|         return doc.modified | ||||
|     except Document.DoesNotExist: | ||||
|         return None | ||||
|     return None | ||||
|  | ||||
|  | ||||
| def preview_etag(request, pk: int) -> Optional[str]: | ||||
|     """ | ||||
|     ETag for the document preview, using the original or archive checksum, depending on the request | ||||
|     """ | ||||
|     try: | ||||
|         doc = Document.objects.get(pk=pk) | ||||
|         use_original = ( | ||||
|             "original" in request.query_params | ||||
|             and request.query_params["original"] == "true" | ||||
|         ) | ||||
|         return doc.checksum if use_original else doc.archive_checksum | ||||
|     except Document.DoesNotExist: | ||||
|         return None | ||||
|     return None | ||||
| @@ -1266,6 +1266,86 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): | ||||
|             }, | ||||
|         ) | ||||
|  | ||||
|     @mock.patch("documents.conditionals.pickle.load") | ||||
|     @mock.patch("documents.views.match_storage_paths") | ||||
|     @mock.patch("documents.views.match_document_types") | ||||
|     @mock.patch("documents.views.match_tags") | ||||
|     @mock.patch("documents.views.match_correspondents") | ||||
|     @override_settings(NUMBER_OF_SUGGESTED_DATES=10) | ||||
|     def test_get_suggestions_cached( | ||||
|         self, | ||||
|         match_correspondents, | ||||
|         match_tags, | ||||
|         match_document_types, | ||||
|         match_storage_paths, | ||||
|         mocked_pickle_load, | ||||
|     ): | ||||
|         """ | ||||
|         GIVEN: | ||||
|            - Request for suggestions for a document | ||||
|         WHEN: | ||||
|           - Classifier has not been modified | ||||
|         THEN: | ||||
|           - Subsequent requests are returned alright | ||||
|           - ETag and last modified are called | ||||
|         """ | ||||
|         settings.MODEL_FILE.touch() | ||||
|  | ||||
|         from documents.classifier import DocumentClassifier | ||||
|  | ||||
|         last_modified = timezone.now() | ||||
|  | ||||
|         # ETag first, then modified | ||||
|         mock_effect = [ | ||||
|             DocumentClassifier.FORMAT_VERSION, | ||||
|             "dont care", | ||||
|             b"thisisachecksum", | ||||
|             DocumentClassifier.FORMAT_VERSION, | ||||
|             last_modified, | ||||
|         ] | ||||
|         mocked_pickle_load.side_effect = mock_effect | ||||
|  | ||||
|         doc = Document.objects.create( | ||||
|             title="test", | ||||
|             mime_type="application/pdf", | ||||
|             content="this is an invoice from 12.04.2022!", | ||||
|         ) | ||||
|  | ||||
|         match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)] | ||||
|         match_tags.return_value = [Tag(id=56), Tag(id=123)] | ||||
|         match_document_types.return_value = [DocumentType(id=23)] | ||||
|         match_storage_paths.return_value = [StoragePath(id=99), StoragePath(id=77)] | ||||
|  | ||||
|         response = self.client.get(f"/api/documents/{doc.pk}/suggestions/") | ||||
|         self.assertEqual( | ||||
|             response.data, | ||||
|             { | ||||
|                 "correspondents": [88, 2], | ||||
|                 "tags": [56, 123], | ||||
|                 "document_types": [23], | ||||
|                 "storage_paths": [99, 77], | ||||
|                 "dates": ["2022-04-12"], | ||||
|             }, | ||||
|         ) | ||||
|         mocked_pickle_load.assert_called() | ||||
|         self.assertIn("Last-Modified", response.headers) | ||||
|         self.assertEqual( | ||||
|             response.headers["Last-Modified"], | ||||
|             last_modified.strftime("%a, %d %b %Y %H:%M:%S %Z").replace("UTC", "GMT"), | ||||
|         ) | ||||
|         self.assertIn("ETag", response.headers) | ||||
|         self.assertEqual( | ||||
|             response.headers["ETag"], | ||||
|             f"\"b'thisisachecksum':{settings.NUMBER_OF_SUGGESTED_DATES}\"", | ||||
|         ) | ||||
|  | ||||
|         mocked_pickle_load.rest_mock() | ||||
|         mocked_pickle_load.side_effect = mock_effect | ||||
|  | ||||
|         response = self.client.get(f"/api/documents/{doc.pk}/suggestions/") | ||||
|         self.assertEqual(response.status_code, status.HTTP_200_OK) | ||||
|         mocked_pickle_load.assert_called() | ||||
|  | ||||
|     @mock.patch("documents.parsers.parse_date_generator") | ||||
|     @override_settings(NUMBER_OF_SUGGESTED_DATES=0) | ||||
|     def test_get_suggestions_dates_disabled( | ||||
|   | ||||
| @@ -34,6 +34,7 @@ from django.utils.decorators import method_decorator | ||||
| from django.utils.translation import get_language | ||||
| from django.views import View | ||||
| from django.views.decorators.cache import cache_control | ||||
| from django.views.decorators.http import condition | ||||
| from django.views.generic import TemplateView | ||||
| from django_filters.rest_framework import DjangoFilterBackend | ||||
| from langdetect import detect | ||||
| @@ -62,6 +63,11 @@ from documents.bulk_download import ArchiveOnlyStrategy | ||||
| from documents.bulk_download import OriginalAndArchiveStrategy | ||||
| from documents.bulk_download import OriginalsOnlyStrategy | ||||
| from documents.classifier import load_classifier | ||||
| from documents.conditionals import metadata_etag | ||||
| from documents.conditionals import metadata_last_modified | ||||
| from documents.conditionals import preview_etag | ||||
| from documents.conditionals import suggestions_etag | ||||
| from documents.conditionals import suggestions_last_modified | ||||
| from documents.data_models import ConsumableDocument | ||||
| from documents.data_models import DocumentMetadataOverrides | ||||
| from documents.data_models import DocumentSource | ||||
| @@ -386,6 +392,9 @@ class DocumentViewSet( | ||||
|             return None | ||||
|  | ||||
|     @action(methods=["get"], detail=True) | ||||
|     @method_decorator( | ||||
|         condition(etag_func=metadata_etag, last_modified_func=metadata_last_modified), | ||||
|     ) | ||||
|     def metadata(self, request, pk=None): | ||||
|         try: | ||||
|             doc = Document.objects.get(pk=pk) | ||||
| @@ -430,6 +439,12 @@ class DocumentViewSet( | ||||
|         return Response(meta) | ||||
|  | ||||
|     @action(methods=["get"], detail=True) | ||||
|     @method_decorator( | ||||
|         condition( | ||||
|             etag_func=suggestions_etag, | ||||
|             last_modified_func=suggestions_last_modified, | ||||
|         ), | ||||
|     ) | ||||
|     def suggestions(self, request, pk=None): | ||||
|         doc = get_object_or_404(Document, pk=pk) | ||||
|         if request.user is not None and not has_perms_owner_aware( | ||||
| @@ -467,6 +482,8 @@ class DocumentViewSet( | ||||
|         ) | ||||
|  | ||||
|     @action(methods=["get"], detail=True) | ||||
|     @method_decorator(cache_control(public=False, max_age=5 * 60)) | ||||
|     @method_decorator(condition(etag_func=preview_etag)) | ||||
|     def preview(self, request, pk=None): | ||||
|         try: | ||||
|             response = self.file_response(pk, request, "inline") | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Trenton H
					Trenton H