Simpler thumbnail caching

This commit is contained in:
Trenton H 2024-01-29 12:19:36 -08:00
parent 8a4059b9bf
commit 4a2b652a39
5 changed files with 46 additions and 37 deletions

View File

@ -8,7 +8,23 @@ CACHE_1_MINUTE: Final[int] = 60
CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE
CACHE_50_MINUTES: Final[int] = 50 * CACHE_1_MINUTE
DOC_SUGGESTIONS_BASE: Final[str] = "doc_{}_suggest"
DOC_METADATA_BASE: Final[str] = "doc_{}_metadata"
DOC_THUMBNAIL_ETAG_BASE: Final[str] = "doc_{}_thumbnail_etag"
DOC_THUMBNAIL_MODIFIED_BASE: Final[str] = "doc_{}_thumbnail_modified"
def get_suggestion_key(document_id: int) -> str:
"""
Builds the key to store a document's suggestion data in the cache
"""
return f"doc_{document_id}_suggest"
def get_metadata_key(document_id: int) -> str:
"""
Builds the key to store a document's metadata data in the cache
"""
return f"doc_{document_id}_metadata"
def get_thumbnail_modified_key(document_id: int) -> str:
"""
Builds the key to store a thumbnail's timestamp
"""
return f"doc_{document_id}_thumbnail_modified"

View File

@ -213,6 +213,15 @@ class DocumentClassifier:
and self.last_doc_change_time >= latest_doc_change
) and self.last_auto_type_hash == hasher.digest():
logger.info("No updates since last training")
# Set the classifier information into the cache
# Caching for 50 minutes, so slightly less than the normal retrain time
cache.set(
CLASSIFIER_MODIFIED_KEY,
self.last_doc_change_time,
CACHE_50_MINUTES,
)
cache.set(CLASSIFIER_HASH_KEY, hasher.hexdigest(), CACHE_50_MINUTES)
cache.set(CLASSIFIER_VERSION_KEY, self.FORMAT_VERSION, CACHE_50_MINUTES)
return False
# subtract 1 since -1 (null) is also part of the classes.

View File

@ -1,6 +1,5 @@
from datetime import datetime
from datetime import timezone
from hashlib import sha256
from typing import Optional
from django.conf import settings
@ -11,8 +10,7 @@ from documents.caching import CACHE_50_MINUTES
from documents.caching import CLASSIFIER_HASH_KEY
from documents.caching import CLASSIFIER_MODIFIED_KEY
from documents.caching import CLASSIFIER_VERSION_KEY
from documents.caching import DOC_THUMBNAIL_ETAG_BASE
from documents.caching import DOC_THUMBNAIL_MODIFIED_BASE
from documents.caching import get_thumbnail_modified_key
from documents.classifier import DocumentClassifier
from documents.models import Document
@ -113,7 +111,10 @@ def preview_etag(request, pk: int) -> Optional[str]:
def preview_last_modified(request, pk: int) -> Optional[str]:
""" """
"""
Uses the documents modified time to set the Last-Modified header. Not strictly
speaking correct, but close enough and quick
"""
try:
doc = Document.objects.get(pk=pk)
return doc.modified
@ -122,26 +123,6 @@ def preview_last_modified(request, pk: int) -> Optional[str]:
return None
def thumbnail_etag(request, pk: int) -> Optional[str]:
"""
Returns the SHA256 of a thumbnail, either from cache or calculated
"""
try:
doc = Document.objects.get(pk=pk)
if not doc.thumbnail_path.exists():
return None
doc_key = DOC_THUMBNAIL_ETAG_BASE.format(pk)
cache_hit = cache.get(doc_key)
if cache_hit is not None:
return cache_hit
hasher = sha256()
hasher.update(doc.thumbnail_path.read_bytes())
thumb_checksum = hasher.hexdigest()
cache.set(doc_key, thumb_checksum, CACHE_50_MINUTES)
except Document.DoesNotExist: # pragma: no cover
return None
def thumbnail_last_modified(request, pk: int) -> Optional[int]:
"""
Returns the filesystem last modified either from cache or from filesystem
@ -150,10 +131,14 @@ def thumbnail_last_modified(request, pk: int) -> Optional[int]:
doc = Document.objects.get(pk=pk)
if not doc.thumbnail_path.exists():
return None
doc_key = DOC_THUMBNAIL_MODIFIED_BASE.format(pk)
doc_key = get_thumbnail_modified_key(pk)
cache_hit = cache.get(doc_key)
if cache_hit is not None:
cache.touch(doc_key, CACHE_50_MINUTES)
return cache_hit
# No cache, get the timestamp and cache the datetime
last_modified = datetime.fromtimestamp(
doc.thumbnail_path.stat().st_mtime,
tz=timezone.utc,

View File

@ -36,6 +36,7 @@ from django.utils.translation import get_language
from django.views import View
from django.views.decorators.cache import cache_control
from django.views.decorators.http import condition
from django.views.decorators.http import last_modified
from django.views.generic import TemplateView
from django_filters.rest_framework import DjangoFilterBackend
from langdetect import detect
@ -65,8 +66,8 @@ from documents.bulk_download import OriginalAndArchiveStrategy
from documents.bulk_download import OriginalsOnlyStrategy
from documents.caching import CACHE_5_MINUTES
from documents.caching import CACHE_50_MINUTES
from documents.caching import DOC_METADATA_BASE
from documents.caching import DOC_SUGGESTIONS_BASE
from documents.caching import get_metadata_key
from documents.caching import get_suggestion_key
from documents.classifier import load_classifier
from documents.conditionals import metadata_etag
from documents.conditionals import metadata_last_modified
@ -74,7 +75,6 @@ from documents.conditionals import preview_etag
from documents.conditionals import preview_last_modified
from documents.conditionals import suggestions_etag
from documents.conditionals import suggestions_last_modified
from documents.conditionals import thumbnail_etag
from documents.conditionals import thumbnail_last_modified
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
@ -415,7 +415,7 @@ class DocumentViewSet(
except Document.DoesNotExist:
raise Http404
doc_key = DOC_METADATA_BASE.format(doc.pk)
doc_key = get_metadata_key(doc.pk)
cache_hit = cache.get(doc_key)
@ -472,7 +472,7 @@ class DocumentViewSet(
):
return HttpResponseForbidden("Insufficient permissions")
doc_key = DOC_SUGGESTIONS_BASE.format(doc.pk)
doc_key = get_suggestion_key(doc.pk)
cache_hit = cache.get(doc_key)
@ -521,9 +521,7 @@ class DocumentViewSet(
@action(methods=["get"], detail=True)
@method_decorator(cache_control(public=False, max_age=CACHE_50_MINUTES))
@method_decorator(
condition(etag_func=thumbnail_etag, last_modified_func=thumbnail_last_modified),
)
@method_decorator(last_modified(thumbnail_last_modified))
def thumb(self, request, pk=None):
try:
doc = Document.objects.get(id=pk)

View File

@ -767,6 +767,7 @@ CACHES = {
"django.core.cache.backends.redis.RedisCache",
),
"LOCATION": _CHANNELS_REDIS_URL,
"KEY_PREFIX": os.getenv("PAPERLESS_REDIS_PREFIX", ""),
},
}