From 4a2b652a39b914c820dd3ec083f1109db52147d5 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Mon, 29 Jan 2024 12:19:36 -0800 Subject: [PATCH] Simpler thumbnail caching --- src/documents/caching.py | 24 ++++++++++++++++++++---- src/documents/classifier.py | 9 +++++++++ src/documents/conditionals.py | 35 ++++++++++------------------------- src/documents/views.py | 14 ++++++-------- src/paperless/settings.py | 1 + 5 files changed, 46 insertions(+), 37 deletions(-) diff --git a/src/documents/caching.py b/src/documents/caching.py index fd11a0825..1435fa631 100644 --- a/src/documents/caching.py +++ b/src/documents/caching.py @@ -8,7 +8,23 @@ CACHE_1_MINUTE: Final[int] = 60 CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE CACHE_50_MINUTES: Final[int] = 50 * CACHE_1_MINUTE -DOC_SUGGESTIONS_BASE: Final[str] = "doc_{}_suggest" -DOC_METADATA_BASE: Final[str] = "doc_{}_metadata" -DOC_THUMBNAIL_ETAG_BASE: Final[str] = "doc_{}_thumbnail_etag" -DOC_THUMBNAIL_MODIFIED_BASE: Final[str] = "doc_{}_thumbnail_modified" + +def get_suggestion_key(document_id: int) -> str: + """ + Builds the key to store a document's suggestion data in the cache + """ + return f"doc_{document_id}_suggest" + + +def get_metadata_key(document_id: int) -> str: + """ + Builds the key to store a document's metadata data in the cache + """ + return f"doc_{document_id}_metadata" + + +def get_thumbnail_modified_key(document_id: int) -> str: + """ + Builds the key to store a thumbnail's timestamp + """ + return f"doc_{document_id}_thumbnail_modified" diff --git a/src/documents/classifier.py b/src/documents/classifier.py index 7daca71b8..6180a8671 100644 --- a/src/documents/classifier.py +++ b/src/documents/classifier.py @@ -213,6 +213,15 @@ class DocumentClassifier: and self.last_doc_change_time >= latest_doc_change ) and self.last_auto_type_hash == hasher.digest(): logger.info("No updates since last training") + # Set the classifier information into the cache + # Caching for 50 minutes, so slightly less than the normal retrain time + cache.set( + CLASSIFIER_MODIFIED_KEY, + self.last_doc_change_time, + CACHE_50_MINUTES, + ) + cache.set(CLASSIFIER_HASH_KEY, hasher.hexdigest(), CACHE_50_MINUTES) + cache.set(CLASSIFIER_VERSION_KEY, self.FORMAT_VERSION, CACHE_50_MINUTES) return False # subtract 1 since -1 (null) is also part of the classes. diff --git a/src/documents/conditionals.py b/src/documents/conditionals.py index 06d764543..357a2b968 100644 --- a/src/documents/conditionals.py +++ b/src/documents/conditionals.py @@ -1,6 +1,5 @@ from datetime import datetime from datetime import timezone -from hashlib import sha256 from typing import Optional from django.conf import settings @@ -11,8 +10,7 @@ from documents.caching import CACHE_50_MINUTES from documents.caching import CLASSIFIER_HASH_KEY from documents.caching import CLASSIFIER_MODIFIED_KEY from documents.caching import CLASSIFIER_VERSION_KEY -from documents.caching import DOC_THUMBNAIL_ETAG_BASE -from documents.caching import DOC_THUMBNAIL_MODIFIED_BASE +from documents.caching import get_thumbnail_modified_key from documents.classifier import DocumentClassifier from documents.models import Document @@ -113,7 +111,10 @@ def preview_etag(request, pk: int) -> Optional[str]: def preview_last_modified(request, pk: int) -> Optional[str]: - """ """ + """ + Uses the documents modified time to set the Last-Modified header. Not strictly + speaking correct, but close enough and quick + """ try: doc = Document.objects.get(pk=pk) return doc.modified @@ -122,26 +123,6 @@ def preview_last_modified(request, pk: int) -> Optional[str]: return None -def thumbnail_etag(request, pk: int) -> Optional[str]: - """ - Returns the SHA256 of a thumbnail, either from cache or calculated - """ - try: - doc = Document.objects.get(pk=pk) - if not doc.thumbnail_path.exists(): - return None - doc_key = DOC_THUMBNAIL_ETAG_BASE.format(pk) - cache_hit = cache.get(doc_key) - if cache_hit is not None: - return cache_hit - hasher = sha256() - hasher.update(doc.thumbnail_path.read_bytes()) - thumb_checksum = hasher.hexdigest() - cache.set(doc_key, thumb_checksum, CACHE_50_MINUTES) - except Document.DoesNotExist: # pragma: no cover - return None - - def thumbnail_last_modified(request, pk: int) -> Optional[int]: """ Returns the filesystem last modified either from cache or from filesystem @@ -150,10 +131,14 @@ def thumbnail_last_modified(request, pk: int) -> Optional[int]: doc = Document.objects.get(pk=pk) if not doc.thumbnail_path.exists(): return None - doc_key = DOC_THUMBNAIL_MODIFIED_BASE.format(pk) + doc_key = get_thumbnail_modified_key(pk) + cache_hit = cache.get(doc_key) if cache_hit is not None: + cache.touch(doc_key, CACHE_50_MINUTES) return cache_hit + + # No cache, get the timestamp and cache the datetime last_modified = datetime.fromtimestamp( doc.thumbnail_path.stat().st_mtime, tz=timezone.utc, diff --git a/src/documents/views.py b/src/documents/views.py index 7e50ffa1c..8809c48b8 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -36,6 +36,7 @@ from django.utils.translation import get_language from django.views import View from django.views.decorators.cache import cache_control from django.views.decorators.http import condition +from django.views.decorators.http import last_modified from django.views.generic import TemplateView from django_filters.rest_framework import DjangoFilterBackend from langdetect import detect @@ -65,8 +66,8 @@ from documents.bulk_download import OriginalAndArchiveStrategy from documents.bulk_download import OriginalsOnlyStrategy from documents.caching import CACHE_5_MINUTES from documents.caching import CACHE_50_MINUTES -from documents.caching import DOC_METADATA_BASE -from documents.caching import DOC_SUGGESTIONS_BASE +from documents.caching import get_metadata_key +from documents.caching import get_suggestion_key from documents.classifier import load_classifier from documents.conditionals import metadata_etag from documents.conditionals import metadata_last_modified @@ -74,7 +75,6 @@ from documents.conditionals import preview_etag from documents.conditionals import preview_last_modified from documents.conditionals import suggestions_etag from documents.conditionals import suggestions_last_modified -from documents.conditionals import thumbnail_etag from documents.conditionals import thumbnail_last_modified from documents.data_models import ConsumableDocument from documents.data_models import DocumentMetadataOverrides @@ -415,7 +415,7 @@ class DocumentViewSet( except Document.DoesNotExist: raise Http404 - doc_key = DOC_METADATA_BASE.format(doc.pk) + doc_key = get_metadata_key(doc.pk) cache_hit = cache.get(doc_key) @@ -472,7 +472,7 @@ class DocumentViewSet( ): return HttpResponseForbidden("Insufficient permissions") - doc_key = DOC_SUGGESTIONS_BASE.format(doc.pk) + doc_key = get_suggestion_key(doc.pk) cache_hit = cache.get(doc_key) @@ -521,9 +521,7 @@ class DocumentViewSet( @action(methods=["get"], detail=True) @method_decorator(cache_control(public=False, max_age=CACHE_50_MINUTES)) - @method_decorator( - condition(etag_func=thumbnail_etag, last_modified_func=thumbnail_last_modified), - ) + @method_decorator(last_modified(thumbnail_last_modified)) def thumb(self, request, pk=None): try: doc = Document.objects.get(id=pk) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 86b2b2524..7179f0358 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -767,6 +767,7 @@ CACHES = { "django.core.cache.backends.redis.RedisCache", ), "LOCATION": _CHANNELS_REDIS_URL, + "KEY_PREFIX": os.getenv("PAPERLESS_REDIS_PREFIX", ""), }, }