paperless-ngx/src/documents/conditionals.py
2024-09-26 12:22:24 -07:00

150 lines
5.0 KiB
Python

from datetime import datetime
from datetime import timezone
from django.conf import settings
from django.core.cache import cache
from documents.caching import CACHE_5_MINUTES
from documents.caching import CACHE_50_MINUTES
from documents.caching import CLASSIFIER_HASH_KEY
from documents.caching import CLASSIFIER_MODIFIED_KEY
from documents.caching import CLASSIFIER_VERSION_KEY
from documents.caching import get_thumbnail_modified_key
from documents.classifier import DocumentClassifier
from documents.models import Document
def suggestions_etag(request, pk: int) -> str | None:
"""
Returns an optional string for the ETag, allowing browser caching of
suggestions if the classifier has not been changed and the suggested dates
setting is also unchanged
"""
# If no model file, no etag at all
if not settings.MODEL_FILE.exists():
return None
# Check cache information
cache_hits = cache.get_many(
[CLASSIFIER_VERSION_KEY, CLASSIFIER_HASH_KEY],
)
# If the version differs somehow, no etag
if (
CLASSIFIER_VERSION_KEY in cache_hits
and cache_hits[CLASSIFIER_VERSION_KEY] != DocumentClassifier.FORMAT_VERSION
):
return None
elif CLASSIFIER_HASH_KEY in cache_hits:
# Refresh the cache and return the hash digest and the dates setting
cache.touch(CLASSIFIER_HASH_KEY, CACHE_5_MINUTES)
return f"{cache_hits[CLASSIFIER_HASH_KEY]}:{settings.NUMBER_OF_SUGGESTED_DATES}"
return None
def suggestions_last_modified(request, pk: int) -> datetime | None:
"""
Returns the datetime of classifier last modification. This is slightly off,
as there is not way to track the suggested date setting modification, but it seems
unlikely that changes too often
"""
# No file, no last modified
if not settings.MODEL_FILE.exists():
return None
cache_hits = cache.get_many(
[CLASSIFIER_VERSION_KEY, CLASSIFIER_MODIFIED_KEY],
)
# If the version differs somehow, no last modified
if (
CLASSIFIER_VERSION_KEY in cache_hits
and cache_hits[CLASSIFIER_VERSION_KEY] != DocumentClassifier.FORMAT_VERSION
):
return None
elif CLASSIFIER_MODIFIED_KEY in cache_hits:
# Refresh the cache and return the last modified
cache.touch(CLASSIFIER_MODIFIED_KEY, CACHE_5_MINUTES)
return cache_hits[CLASSIFIER_MODIFIED_KEY]
return None
def metadata_etag(request, pk: int) -> str | None:
"""
Metadata is extracted from the original file, so use its checksum as the
ETag
"""
try:
doc = Document.objects.only("checksum").get(pk=pk)
return doc.checksum
except Document.DoesNotExist: # pragma: no cover
return None
return None
def metadata_last_modified(request, pk: int) -> datetime | None:
"""
Metadata is extracted from the original file, so use its modified. Strictly speaking, this is
not the modification of the original file, but of the database object, but might as well
error on the side of more cautious
"""
try:
doc = Document.objects.only("modified").get(pk=pk)
return doc.modified
except Document.DoesNotExist: # pragma: no cover
return None
return None
def preview_etag(request, pk: int) -> str | None:
"""
ETag for the document preview, using the original or archive checksum, depending on the request
"""
try:
doc = Document.objects.only("checksum", "archive_checksum").get(pk=pk)
use_original = (
"original" in request.query_params
and request.query_params["original"] == "true"
)
return doc.checksum if use_original else doc.archive_checksum
except Document.DoesNotExist: # pragma: no cover
return None
return None
def preview_last_modified(request, pk: int) -> datetime | None:
"""
Uses the documents modified time to set the Last-Modified header. Not strictly
speaking correct, but close enough and quick
"""
try:
doc = Document.objects.only("modified").get(pk=pk)
return doc.modified
except Document.DoesNotExist: # pragma: no cover
return None
return None
def thumbnail_last_modified(request, pk: int) -> datetime | None:
"""
Returns the filesystem last modified either from cache or from filesystem.
Cache should be (slightly?) faster than filesystem
"""
try:
doc = Document.objects.only("storage_type").get(pk=pk)
if not doc.thumbnail_path.exists():
return None
doc_key = get_thumbnail_modified_key(pk)
cache_hit = cache.get(doc_key)
if cache_hit is not None:
cache.touch(doc_key, CACHE_50_MINUTES)
return cache_hit
# No cache, get the timestamp and cache the datetime
last_modified = datetime.fromtimestamp(
doc.thumbnail_path.stat().st_mtime,
tz=timezone.utc,
)
cache.set(doc_key, last_modified, CACHE_50_MINUTES)
return last_modified
except Document.DoesNotExist: # pragma: no cover
return None