mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-09-03 01:56:16 +00:00
Merge caching
This commit is contained in:
@@ -1,215 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from binascii import hexlify
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Final
|
||||
|
||||
from django.core.cache import cache
|
||||
|
||||
from paperless.models import Document
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from paperless.classifier import DocumentClassifier
|
||||
|
||||
logger = logging.getLogger("paperless.caching")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MetadataCacheData:
|
||||
original_checksum: str
|
||||
original_metadata: list
|
||||
archive_checksum: str | None
|
||||
archive_metadata: list | None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SuggestionCacheData:
|
||||
classifier_version: int
|
||||
classifier_hash: str
|
||||
suggestions: dict
|
||||
|
||||
|
||||
CLASSIFIER_VERSION_KEY: Final[str] = "classifier_version"
|
||||
CLASSIFIER_HASH_KEY: Final[str] = "classifier_hash"
|
||||
CLASSIFIER_MODIFIED_KEY: Final[str] = "classifier_modified"
|
||||
|
||||
CACHE_1_MINUTE: Final[int] = 60
|
||||
CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE
|
||||
CACHE_50_MINUTES: Final[int] = 50 * CACHE_1_MINUTE
|
||||
|
||||
|
||||
def get_suggestion_cache_key(document_id: int) -> str:
|
||||
"""
|
||||
Returns the basic key for a document's suggestions
|
||||
"""
|
||||
return f"doc_{document_id}_suggest"
|
||||
|
||||
|
||||
def get_suggestion_cache(document_id: int) -> SuggestionCacheData | None:
|
||||
"""
|
||||
If possible, return the cached suggestions for the given document ID.
|
||||
The classifier needs to be matching in format and hash and the suggestions need to
|
||||
have been cached once.
|
||||
"""
|
||||
from paperless.classifier import DocumentClassifier
|
||||
|
||||
doc_key = get_suggestion_cache_key(document_id)
|
||||
cache_hits = cache.get_many([CLASSIFIER_VERSION_KEY, CLASSIFIER_HASH_KEY, doc_key])
|
||||
# The document suggestions are in the cache
|
||||
if doc_key in cache_hits:
|
||||
doc_suggestions: SuggestionCacheData = cache_hits[doc_key]
|
||||
# The classifier format is the same
|
||||
# The classifier hash is the same
|
||||
# Then the suggestions can be used
|
||||
if (
|
||||
CLASSIFIER_VERSION_KEY in cache_hits
|
||||
and cache_hits[CLASSIFIER_VERSION_KEY] == DocumentClassifier.FORMAT_VERSION
|
||||
and cache_hits[CLASSIFIER_VERSION_KEY] == doc_suggestions.classifier_version
|
||||
) and (
|
||||
CLASSIFIER_HASH_KEY in cache_hits
|
||||
and cache_hits[CLASSIFIER_HASH_KEY] == doc_suggestions.classifier_hash
|
||||
):
|
||||
return doc_suggestions
|
||||
else: # pragma: no cover
|
||||
# Remove the key because something didn't match
|
||||
cache.delete(doc_key)
|
||||
return None
|
||||
|
||||
|
||||
def set_suggestions_cache(
|
||||
document_id: int,
|
||||
suggestions: dict,
|
||||
classifier: DocumentClassifier | None,
|
||||
*,
|
||||
timeout=CACHE_50_MINUTES,
|
||||
) -> None:
|
||||
"""
|
||||
Caches the given suggestions, which were generated by the given classifier. If there is no classifier,
|
||||
this function is a no-op (there won't be suggestions then anyway)
|
||||
"""
|
||||
if classifier is not None:
|
||||
doc_key = get_suggestion_cache_key(document_id)
|
||||
cache.set(
|
||||
doc_key,
|
||||
SuggestionCacheData(
|
||||
classifier.FORMAT_VERSION,
|
||||
hexlify(classifier.last_auto_type_hash).decode(),
|
||||
suggestions,
|
||||
),
|
||||
timeout,
|
||||
)
|
||||
|
||||
|
||||
def refresh_suggestions_cache(
|
||||
document_id: int,
|
||||
*,
|
||||
timeout: int = CACHE_50_MINUTES,
|
||||
) -> None:
|
||||
"""
|
||||
Refreshes the expiration of the suggestions for the given document ID
|
||||
to the given timeout
|
||||
"""
|
||||
doc_key = get_suggestion_cache_key(document_id)
|
||||
cache.touch(doc_key, timeout)
|
||||
|
||||
|
||||
def get_metadata_cache_key(document_id: int) -> str:
|
||||
"""
|
||||
Returns the basic key for a document's metadata
|
||||
"""
|
||||
return f"doc_{document_id}_metadata"
|
||||
|
||||
|
||||
def get_metadata_cache(document_id: int) -> MetadataCacheData | None:
|
||||
"""
|
||||
Returns the cached document metadata for the given document ID, as long as the metadata
|
||||
was cached once and the checksums have not changed
|
||||
"""
|
||||
doc_key = get_metadata_cache_key(document_id)
|
||||
doc_metadata: MetadataCacheData | None = cache.get(doc_key)
|
||||
# The metadata exists in the cache
|
||||
if doc_metadata is not None:
|
||||
try:
|
||||
doc = Document.objects.only(
|
||||
"pk",
|
||||
"checksum",
|
||||
"archive_checksum",
|
||||
"archive_filename",
|
||||
).get(pk=document_id)
|
||||
# The original checksums match
|
||||
# If it has one, the archive checksums match
|
||||
# Then, we can use the metadata
|
||||
if (
|
||||
doc_metadata.original_checksum == doc.checksum
|
||||
and doc.has_archive_version
|
||||
and doc_metadata.archive_checksum is not None
|
||||
and doc_metadata.archive_checksum == doc.archive_checksum
|
||||
):
|
||||
# Refresh cache
|
||||
cache.touch(doc_key, CACHE_50_MINUTES)
|
||||
return doc_metadata
|
||||
else: # pragma: no cover
|
||||
# Something didn't match, delete the key
|
||||
cache.delete(doc_key)
|
||||
except Document.DoesNotExist: # pragma: no cover
|
||||
# Basically impossible, but the key existed, but the Document didn't
|
||||
cache.delete(doc_key)
|
||||
return None
|
||||
|
||||
|
||||
def set_metadata_cache(
|
||||
document: Document,
|
||||
original_metadata: list,
|
||||
archive_metadata: list | None,
|
||||
*,
|
||||
timeout=CACHE_50_MINUTES,
|
||||
) -> None:
|
||||
"""
|
||||
Sets the metadata into cache for the given Document
|
||||
"""
|
||||
doc_key = get_metadata_cache_key(document.pk)
|
||||
cache.set(
|
||||
doc_key,
|
||||
MetadataCacheData(
|
||||
document.checksum,
|
||||
original_metadata,
|
||||
document.archive_checksum,
|
||||
archive_metadata,
|
||||
),
|
||||
timeout,
|
||||
)
|
||||
|
||||
|
||||
def refresh_metadata_cache(
|
||||
document_id: int,
|
||||
*,
|
||||
timeout: int = CACHE_50_MINUTES,
|
||||
) -> None:
|
||||
"""
|
||||
Refreshes the expiration of the metadata for the given document ID
|
||||
to the given timeout
|
||||
"""
|
||||
doc_key = get_metadata_cache_key(document_id)
|
||||
cache.touch(doc_key, timeout)
|
||||
|
||||
|
||||
def get_thumbnail_modified_key(document_id: int) -> str:
|
||||
"""
|
||||
Builds the key to store a thumbnail's timestamp
|
||||
"""
|
||||
return f"doc_{document_id}_thumbnail_modified"
|
||||
|
||||
|
||||
def clear_document_caches(document_id: int) -> None:
|
||||
"""
|
||||
Removes all cached items for the given document
|
||||
"""
|
||||
cache.delete_many(
|
||||
[
|
||||
get_suggestion_cache_key(document_id),
|
||||
get_metadata_cache_key(document_id),
|
||||
get_thumbnail_modified_key(document_id),
|
||||
],
|
||||
)
|
@@ -23,10 +23,10 @@ from django.utils import timezone
|
||||
from filelock import FileLock
|
||||
from guardian.shortcuts import remove_perm
|
||||
|
||||
from documents.caching import clear_document_caches
|
||||
from documents.mail import send_email
|
||||
from documents.templating.workflows import parse_w_workflow_placeholders
|
||||
from paperless import matching
|
||||
from paperless.caching import clear_document_caches
|
||||
from paperless.file_handling import create_source_path_directory
|
||||
from paperless.file_handling import delete_empty_directories
|
||||
from paperless.file_handling import generate_unique_filename
|
||||
|
@@ -20,7 +20,6 @@ from filelock import FileLock
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents import sanity_checker
|
||||
from documents.caching import clear_document_caches
|
||||
from documents.plugins.base import ConsumeTaskPlugin
|
||||
from documents.plugins.base import ProgressManager
|
||||
from documents.plugins.base import StopConsumeTaskError
|
||||
@@ -31,6 +30,7 @@ from documents.signals.handlers import cleanup_document_deletion
|
||||
from documents.signals.handlers import run_workflows
|
||||
from paperless import index
|
||||
from paperless.barcodes import BarcodePlugin
|
||||
from paperless.caching import clear_document_caches
|
||||
from paperless.classifier import DocumentClassifier
|
||||
from paperless.classifier import load_classifier
|
||||
from paperless.consumer import ConsumerPlugin
|
||||
|
@@ -24,13 +24,13 @@ from guardian.shortcuts import assign_perm
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
|
||||
from documents.caching import CACHE_50_MINUTES
|
||||
from documents.caching import CLASSIFIER_HASH_KEY
|
||||
from documents.caching import CLASSIFIER_MODIFIED_KEY
|
||||
from documents.caching import CLASSIFIER_VERSION_KEY
|
||||
from documents.signals.handlers import run_workflows
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import DocumentConsumeDelayMixin
|
||||
from paperless.caching import CACHE_50_MINUTES
|
||||
from paperless.caching import CLASSIFIER_HASH_KEY
|
||||
from paperless.caching import CLASSIFIER_MODIFIED_KEY
|
||||
from paperless.caching import CLASSIFIER_VERSION_KEY
|
||||
from paperless.data_models import DocumentSource
|
||||
from paperless.models import Correspondent
|
||||
from paperless.models import CustomField
|
||||
|
Reference in New Issue
Block a user