Simpler thumbnail caching

This commit is contained in:
Trenton H 2024-01-29 12:19:36 -08:00
parent 8a4059b9bf
commit 4a2b652a39
5 changed files with 46 additions and 37 deletions

View File

@ -8,7 +8,23 @@ CACHE_1_MINUTE: Final[int] = 60
CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE
CACHE_50_MINUTES: Final[int] = 50 * CACHE_1_MINUTE CACHE_50_MINUTES: Final[int] = 50 * CACHE_1_MINUTE
DOC_SUGGESTIONS_BASE: Final[str] = "doc_{}_suggest"
DOC_METADATA_BASE: Final[str] = "doc_{}_metadata" def get_suggestion_key(document_id: int) -> str:
DOC_THUMBNAIL_ETAG_BASE: Final[str] = "doc_{}_thumbnail_etag" """
DOC_THUMBNAIL_MODIFIED_BASE: Final[str] = "doc_{}_thumbnail_modified" Builds the key to store a document's suggestion data in the cache
"""
return f"doc_{document_id}_suggest"
def get_metadata_key(document_id: int) -> str:
"""
Builds the key to store a document's metadata data in the cache
"""
return f"doc_{document_id}_metadata"
def get_thumbnail_modified_key(document_id: int) -> str:
"""
Builds the key to store a thumbnail's timestamp
"""
return f"doc_{document_id}_thumbnail_modified"

View File

@ -213,6 +213,15 @@ class DocumentClassifier:
and self.last_doc_change_time >= latest_doc_change and self.last_doc_change_time >= latest_doc_change
) and self.last_auto_type_hash == hasher.digest(): ) and self.last_auto_type_hash == hasher.digest():
logger.info("No updates since last training") logger.info("No updates since last training")
# Set the classifier information into the cache
# Caching for 50 minutes, so slightly less than the normal retrain time
cache.set(
CLASSIFIER_MODIFIED_KEY,
self.last_doc_change_time,
CACHE_50_MINUTES,
)
cache.set(CLASSIFIER_HASH_KEY, hasher.hexdigest(), CACHE_50_MINUTES)
cache.set(CLASSIFIER_VERSION_KEY, self.FORMAT_VERSION, CACHE_50_MINUTES)
return False return False
# subtract 1 since -1 (null) is also part of the classes. # subtract 1 since -1 (null) is also part of the classes.

View File

@ -1,6 +1,5 @@
from datetime import datetime from datetime import datetime
from datetime import timezone from datetime import timezone
from hashlib import sha256
from typing import Optional from typing import Optional
from django.conf import settings from django.conf import settings
@ -11,8 +10,7 @@ from documents.caching import CACHE_50_MINUTES
from documents.caching import CLASSIFIER_HASH_KEY from documents.caching import CLASSIFIER_HASH_KEY
from documents.caching import CLASSIFIER_MODIFIED_KEY from documents.caching import CLASSIFIER_MODIFIED_KEY
from documents.caching import CLASSIFIER_VERSION_KEY from documents.caching import CLASSIFIER_VERSION_KEY
from documents.caching import DOC_THUMBNAIL_ETAG_BASE from documents.caching import get_thumbnail_modified_key
from documents.caching import DOC_THUMBNAIL_MODIFIED_BASE
from documents.classifier import DocumentClassifier from documents.classifier import DocumentClassifier
from documents.models import Document from documents.models import Document
@ -113,7 +111,10 @@ def preview_etag(request, pk: int) -> Optional[str]:
def preview_last_modified(request, pk: int) -> Optional[str]: def preview_last_modified(request, pk: int) -> Optional[str]:
""" """ """
Uses the documents modified time to set the Last-Modified header. Not strictly
speaking correct, but close enough and quick
"""
try: try:
doc = Document.objects.get(pk=pk) doc = Document.objects.get(pk=pk)
return doc.modified return doc.modified
@ -122,26 +123,6 @@ def preview_last_modified(request, pk: int) -> Optional[str]:
return None return None
def thumbnail_etag(request, pk: int) -> Optional[str]:
"""
Returns the SHA256 of a thumbnail, either from cache or calculated
"""
try:
doc = Document.objects.get(pk=pk)
if not doc.thumbnail_path.exists():
return None
doc_key = DOC_THUMBNAIL_ETAG_BASE.format(pk)
cache_hit = cache.get(doc_key)
if cache_hit is not None:
return cache_hit
hasher = sha256()
hasher.update(doc.thumbnail_path.read_bytes())
thumb_checksum = hasher.hexdigest()
cache.set(doc_key, thumb_checksum, CACHE_50_MINUTES)
except Document.DoesNotExist: # pragma: no cover
return None
def thumbnail_last_modified(request, pk: int) -> Optional[int]: def thumbnail_last_modified(request, pk: int) -> Optional[int]:
""" """
Returns the filesystem last modified either from cache or from filesystem Returns the filesystem last modified either from cache or from filesystem
@ -150,10 +131,14 @@ def thumbnail_last_modified(request, pk: int) -> Optional[int]:
doc = Document.objects.get(pk=pk) doc = Document.objects.get(pk=pk)
if not doc.thumbnail_path.exists(): if not doc.thumbnail_path.exists():
return None return None
doc_key = DOC_THUMBNAIL_MODIFIED_BASE.format(pk) doc_key = get_thumbnail_modified_key(pk)
cache_hit = cache.get(doc_key) cache_hit = cache.get(doc_key)
if cache_hit is not None: if cache_hit is not None:
cache.touch(doc_key, CACHE_50_MINUTES)
return cache_hit return cache_hit
# No cache, get the timestamp and cache the datetime
last_modified = datetime.fromtimestamp( last_modified = datetime.fromtimestamp(
doc.thumbnail_path.stat().st_mtime, doc.thumbnail_path.stat().st_mtime,
tz=timezone.utc, tz=timezone.utc,

View File

@ -36,6 +36,7 @@ from django.utils.translation import get_language
from django.views import View from django.views import View
from django.views.decorators.cache import cache_control from django.views.decorators.cache import cache_control
from django.views.decorators.http import condition from django.views.decorators.http import condition
from django.views.decorators.http import last_modified
from django.views.generic import TemplateView from django.views.generic import TemplateView
from django_filters.rest_framework import DjangoFilterBackend from django_filters.rest_framework import DjangoFilterBackend
from langdetect import detect from langdetect import detect
@ -65,8 +66,8 @@ from documents.bulk_download import OriginalAndArchiveStrategy
from documents.bulk_download import OriginalsOnlyStrategy from documents.bulk_download import OriginalsOnlyStrategy
from documents.caching import CACHE_5_MINUTES from documents.caching import CACHE_5_MINUTES
from documents.caching import CACHE_50_MINUTES from documents.caching import CACHE_50_MINUTES
from documents.caching import DOC_METADATA_BASE from documents.caching import get_metadata_key
from documents.caching import DOC_SUGGESTIONS_BASE from documents.caching import get_suggestion_key
from documents.classifier import load_classifier from documents.classifier import load_classifier
from documents.conditionals import metadata_etag from documents.conditionals import metadata_etag
from documents.conditionals import metadata_last_modified from documents.conditionals import metadata_last_modified
@ -74,7 +75,6 @@ from documents.conditionals import preview_etag
from documents.conditionals import preview_last_modified from documents.conditionals import preview_last_modified
from documents.conditionals import suggestions_etag from documents.conditionals import suggestions_etag
from documents.conditionals import suggestions_last_modified from documents.conditionals import suggestions_last_modified
from documents.conditionals import thumbnail_etag
from documents.conditionals import thumbnail_last_modified from documents.conditionals import thumbnail_last_modified
from documents.data_models import ConsumableDocument from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides from documents.data_models import DocumentMetadataOverrides
@ -415,7 +415,7 @@ class DocumentViewSet(
except Document.DoesNotExist: except Document.DoesNotExist:
raise Http404 raise Http404
doc_key = DOC_METADATA_BASE.format(doc.pk) doc_key = get_metadata_key(doc.pk)
cache_hit = cache.get(doc_key) cache_hit = cache.get(doc_key)
@ -472,7 +472,7 @@ class DocumentViewSet(
): ):
return HttpResponseForbidden("Insufficient permissions") return HttpResponseForbidden("Insufficient permissions")
doc_key = DOC_SUGGESTIONS_BASE.format(doc.pk) doc_key = get_suggestion_key(doc.pk)
cache_hit = cache.get(doc_key) cache_hit = cache.get(doc_key)
@ -521,9 +521,7 @@ class DocumentViewSet(
@action(methods=["get"], detail=True) @action(methods=["get"], detail=True)
@method_decorator(cache_control(public=False, max_age=CACHE_50_MINUTES)) @method_decorator(cache_control(public=False, max_age=CACHE_50_MINUTES))
@method_decorator( @method_decorator(last_modified(thumbnail_last_modified))
condition(etag_func=thumbnail_etag, last_modified_func=thumbnail_last_modified),
)
def thumb(self, request, pk=None): def thumb(self, request, pk=None):
try: try:
doc = Document.objects.get(id=pk) doc = Document.objects.get(id=pk)

View File

@ -767,6 +767,7 @@ CACHES = {
"django.core.cache.backends.redis.RedisCache", "django.core.cache.backends.redis.RedisCache",
), ),
"LOCATION": _CHANNELS_REDIS_URL, "LOCATION": _CHANNELS_REDIS_URL,
"KEY_PREFIX": os.getenv("PAPERLESS_REDIS_PREFIX", ""),
}, },
} }