Use Redis for more caching
This commit is contained in:
parent
f90248de09
commit
8a4059b9bf
@ -6,6 +6,9 @@ CLASSIFIER_MODIFIED_KEY: Final[str] = "classifier_modified"
|
||||
|
||||
CACHE_1_MINUTE: Final[int] = 60
|
||||
CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE
|
||||
CACHE_50_MINUTES: Final[int] = 50 * CACHE_1_MINUTE
|
||||
|
||||
DOC_SUGGESTIONS_BASE: Final[str] = "doc_{}_suggest"
|
||||
DOC_METADATA_BASE: Final[str] = "doc_{}_metadata"
|
||||
DOC_THUMBNAIL_ETAG_BASE: Final[str] = "doc_{}_thumbnail_etag"
|
||||
DOC_THUMBNAIL_MODIFIED_BASE: Final[str] = "doc_{}_thumbnail_modified"
|
||||
|
@ -13,7 +13,7 @@ from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
from sklearn.exceptions import InconsistentVersionWarning
|
||||
|
||||
from documents.caching import CACHE_5_MINUTES
|
||||
from documents.caching import CACHE_50_MINUTES
|
||||
from documents.caching import CLASSIFIER_HASH_KEY
|
||||
from documents.caching import CLASSIFIER_MODIFIED_KEY
|
||||
from documents.caching import CLASSIFIER_VERSION_KEY
|
||||
@ -327,9 +327,11 @@ class DocumentClassifier:
|
||||
self.last_doc_change_time = latest_doc_change
|
||||
self.last_auto_type_hash = hasher.digest()
|
||||
|
||||
cache.set(CLASSIFIER_MODIFIED_KEY, self.last_doc_change_time, CACHE_5_MINUTES)
|
||||
cache.set(CLASSIFIER_HASH_KEY, hasher.hexdigest(), CACHE_5_MINUTES)
|
||||
cache.set(CLASSIFIER_VERSION_KEY, self.FORMAT_VERSION, CACHE_5_MINUTES)
|
||||
# Set the classifier information into the cache
|
||||
# Caching for 50 minutes, so slightly less than the normal retrain time
|
||||
cache.set(CLASSIFIER_MODIFIED_KEY, self.last_doc_change_time, CACHE_50_MINUTES)
|
||||
cache.set(CLASSIFIER_HASH_KEY, hasher.hexdigest(), CACHE_50_MINUTES)
|
||||
cache.set(CLASSIFIER_VERSION_KEY, self.FORMAT_VERSION, CACHE_50_MINUTES)
|
||||
|
||||
return True
|
||||
|
||||
|
@ -1,13 +1,18 @@
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from hashlib import sha256
|
||||
from typing import Optional
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
|
||||
from documents.caching import CACHE_5_MINUTES
|
||||
from documents.caching import CACHE_50_MINUTES
|
||||
from documents.caching import CLASSIFIER_HASH_KEY
|
||||
from documents.caching import CLASSIFIER_MODIFIED_KEY
|
||||
from documents.caching import CLASSIFIER_VERSION_KEY
|
||||
from documents.caching import DOC_THUMBNAIL_ETAG_BASE
|
||||
from documents.caching import DOC_THUMBNAIL_MODIFIED_BASE
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.models import Document
|
||||
|
||||
@ -18,20 +23,22 @@ def suggestions_etag(request, pk: int) -> Optional[str]:
|
||||
suggestions if the classifier has not been changed and the suggested dates
|
||||
setting is also unchanged
|
||||
|
||||
TODO: It would be nice to not duplicate the partial loading and the loading
|
||||
between here and the actual classifier
|
||||
"""
|
||||
# If no model file, no etag at all
|
||||
if not settings.MODEL_FILE.exists():
|
||||
return None
|
||||
# Check cache information
|
||||
cache_hits = cache.get_many(
|
||||
[CLASSIFIER_VERSION_KEY, CLASSIFIER_HASH_KEY],
|
||||
)
|
||||
# If the version differs somehow, no etag
|
||||
if (
|
||||
CLASSIFIER_VERSION_KEY in cache_hits
|
||||
and cache_hits[CLASSIFIER_VERSION_KEY] != DocumentClassifier.FORMAT_VERSION
|
||||
):
|
||||
return None
|
||||
elif CLASSIFIER_HASH_KEY in cache_hits:
|
||||
# Refresh the cache and return the hash digest and the dates setting
|
||||
cache.touch(CLASSIFIER_HASH_KEY, CACHE_5_MINUTES)
|
||||
return f"{cache_hits[CLASSIFIER_HASH_KEY]}:{settings.NUMBER_OF_SUGGESTED_DATES}"
|
||||
return None
|
||||
@ -43,17 +50,20 @@ def suggestions_last_modified(request, pk: int) -> Optional[datetime]:
|
||||
as there is not way to track the suggested date setting modification, but it seems
|
||||
unlikely that changes too often
|
||||
"""
|
||||
# No file, no last modified
|
||||
if not settings.MODEL_FILE.exists():
|
||||
return None
|
||||
cache_hits = cache.get_many(
|
||||
[CLASSIFIER_VERSION_KEY, CLASSIFIER_MODIFIED_KEY],
|
||||
)
|
||||
# If the version differs somehow, no last modified
|
||||
if (
|
||||
CLASSIFIER_VERSION_KEY in cache_hits
|
||||
and cache_hits[CLASSIFIER_VERSION_KEY] != DocumentClassifier.FORMAT_VERSION
|
||||
):
|
||||
return None
|
||||
elif CLASSIFIER_MODIFIED_KEY in cache_hits:
|
||||
# Refresh the cache and return the last modified
|
||||
cache.touch(CLASSIFIER_MODIFIED_KEY, CACHE_5_MINUTES)
|
||||
return cache_hits[CLASSIFIER_MODIFIED_KEY]
|
||||
return None
|
||||
@ -67,7 +77,7 @@ def metadata_etag(request, pk: int) -> Optional[str]:
|
||||
try:
|
||||
doc = Document.objects.get(pk=pk)
|
||||
return doc.checksum
|
||||
except Document.DoesNotExist:
|
||||
except Document.DoesNotExist: # pragma: no cover
|
||||
return None
|
||||
return None
|
||||
|
||||
@ -81,7 +91,7 @@ def metadata_last_modified(request, pk: int) -> Optional[datetime]:
|
||||
try:
|
||||
doc = Document.objects.get(pk=pk)
|
||||
return doc.modified
|
||||
except Document.DoesNotExist:
|
||||
except Document.DoesNotExist: # pragma: no cover
|
||||
return None
|
||||
return None
|
||||
|
||||
@ -97,6 +107,58 @@ def preview_etag(request, pk: int) -> Optional[str]:
|
||||
and request.query_params["original"] == "true"
|
||||
)
|
||||
return doc.checksum if use_original else doc.archive_checksum
|
||||
except Document.DoesNotExist:
|
||||
except Document.DoesNotExist: # pragma: no cover
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def preview_last_modified(request, pk: int) -> Optional[str]:
|
||||
""" """
|
||||
try:
|
||||
doc = Document.objects.get(pk=pk)
|
||||
return doc.modified
|
||||
except Document.DoesNotExist: # pragma: no cover
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def thumbnail_etag(request, pk: int) -> Optional[str]:
|
||||
"""
|
||||
Returns the SHA256 of a thumbnail, either from cache or calculated
|
||||
"""
|
||||
try:
|
||||
doc = Document.objects.get(pk=pk)
|
||||
if not doc.thumbnail_path.exists():
|
||||
return None
|
||||
doc_key = DOC_THUMBNAIL_ETAG_BASE.format(pk)
|
||||
cache_hit = cache.get(doc_key)
|
||||
if cache_hit is not None:
|
||||
return cache_hit
|
||||
hasher = sha256()
|
||||
hasher.update(doc.thumbnail_path.read_bytes())
|
||||
thumb_checksum = hasher.hexdigest()
|
||||
cache.set(doc_key, thumb_checksum, CACHE_50_MINUTES)
|
||||
except Document.DoesNotExist: # pragma: no cover
|
||||
return None
|
||||
|
||||
|
||||
def thumbnail_last_modified(request, pk: int) -> Optional[int]:
|
||||
"""
|
||||
Returns the filesystem last modified either from cache or from filesystem
|
||||
"""
|
||||
try:
|
||||
doc = Document.objects.get(pk=pk)
|
||||
if not doc.thumbnail_path.exists():
|
||||
return None
|
||||
doc_key = DOC_THUMBNAIL_MODIFIED_BASE.format(pk)
|
||||
cache_hit = cache.get(doc_key)
|
||||
if cache_hit is not None:
|
||||
return cache_hit
|
||||
last_modified = datetime.fromtimestamp(
|
||||
doc.thumbnail_path.stat().st_mtime,
|
||||
tz=timezone.utc,
|
||||
)
|
||||
cache.set(doc_key, last_modified, CACHE_50_MINUTES)
|
||||
return last_modified
|
||||
except Document.DoesNotExist: # pragma: no cover
|
||||
return None
|
||||
|
@ -13,12 +13,17 @@ from dateutil import parser
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Permission
|
||||
from django.contrib.auth.models import User
|
||||
from django.core.cache import cache
|
||||
from django.test import override_settings
|
||||
from django.utils import timezone
|
||||
from guardian.shortcuts import assign_perm
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
|
||||
from documents.caching import CACHE_50_MINUTES
|
||||
from documents.caching import CLASSIFIER_HASH_KEY
|
||||
from documents.caching import CLASSIFIER_MODIFIED_KEY
|
||||
from documents.caching import CLASSIFIER_VERSION_KEY
|
||||
from documents.models import Correspondent
|
||||
from documents.models import CustomField
|
||||
from documents.models import CustomFieldInstance
|
||||
@ -40,6 +45,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
|
||||
self.user = User.objects.create_superuser(username="temp_admin")
|
||||
self.client.force_authenticate(user=self.user)
|
||||
cache.clear()
|
||||
|
||||
def testDocuments(self):
|
||||
response = self.client.get("/api/documents/").data
|
||||
@ -1266,7 +1272,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
},
|
||||
)
|
||||
|
||||
@mock.patch("documents.conditionals.pickle.load")
|
||||
@mock.patch("documents.views.load_classifier")
|
||||
@mock.patch("documents.views.match_storage_paths")
|
||||
@mock.patch("documents.views.match_document_types")
|
||||
@mock.patch("documents.views.match_tags")
|
||||
@ -1278,7 +1284,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
match_tags,
|
||||
match_document_types,
|
||||
match_storage_paths,
|
||||
mocked_pickle_load,
|
||||
mocked_load,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
@ -1287,23 +1293,28 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
- Classifier has not been modified
|
||||
THEN:
|
||||
- Subsequent requests are returned alright
|
||||
- ETag and last modified are called
|
||||
- ETag and last modified headers are set
|
||||
"""
|
||||
settings.MODEL_FILE.touch()
|
||||
|
||||
# setup the cache how the classifier does it
|
||||
from documents.classifier import DocumentClassifier
|
||||
|
||||
last_modified = timezone.now()
|
||||
settings.MODEL_FILE.touch()
|
||||
|
||||
# ETag first, then modified
|
||||
mock_effect = [
|
||||
last_modified = timezone.now()
|
||||
cache.set(CLASSIFIER_MODIFIED_KEY, last_modified, CACHE_50_MINUTES)
|
||||
cache.set(CLASSIFIER_HASH_KEY, "thisisachecksum", CACHE_50_MINUTES)
|
||||
cache.set(
|
||||
CLASSIFIER_VERSION_KEY,
|
||||
DocumentClassifier.FORMAT_VERSION,
|
||||
"dont care",
|
||||
b"thisisachecksum",
|
||||
DocumentClassifier.FORMAT_VERSION,
|
||||
last_modified,
|
||||
]
|
||||
mocked_pickle_load.side_effect = mock_effect
|
||||
CACHE_50_MINUTES,
|
||||
)
|
||||
|
||||
# Mock the matching
|
||||
match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
|
||||
match_tags.return_value = [Tag(id=56), Tag(id=123)]
|
||||
match_document_types.return_value = [DocumentType(id=23)]
|
||||
match_storage_paths.return_value = [StoragePath(id=99), StoragePath(id=77)]
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="test",
|
||||
@ -1311,12 +1322,8 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
content="this is an invoice from 12.04.2022!",
|
||||
)
|
||||
|
||||
match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
|
||||
match_tags.return_value = [Tag(id=56), Tag(id=123)]
|
||||
match_document_types.return_value = [DocumentType(id=23)]
|
||||
match_storage_paths.return_value = [StoragePath(id=99), StoragePath(id=77)]
|
||||
|
||||
response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(
|
||||
response.data,
|
||||
{
|
||||
@ -1327,7 +1334,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
"dates": ["2022-04-12"],
|
||||
},
|
||||
)
|
||||
mocked_pickle_load.assert_called()
|
||||
self.assertIn("Last-Modified", response.headers)
|
||||
self.assertEqual(
|
||||
response.headers["Last-Modified"],
|
||||
@ -1336,15 +1342,11 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
self.assertIn("ETag", response.headers)
|
||||
self.assertEqual(
|
||||
response.headers["ETag"],
|
||||
f"\"b'thisisachecksum':{settings.NUMBER_OF_SUGGESTED_DATES}\"",
|
||||
f'"thisisachecksum:{settings.NUMBER_OF_SUGGESTED_DATES}"',
|
||||
)
|
||||
|
||||
mocked_pickle_load.rest_mock()
|
||||
mocked_pickle_load.side_effect = mock_effect
|
||||
|
||||
response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
mocked_pickle_load.assert_called()
|
||||
|
||||
@mock.patch("documents.parsers.parse_date_generator")
|
||||
@override_settings(NUMBER_OF_SUGGESTED_DATES=0)
|
||||
|
@ -64,14 +64,18 @@ from documents.bulk_download import ArchiveOnlyStrategy
|
||||
from documents.bulk_download import OriginalAndArchiveStrategy
|
||||
from documents.bulk_download import OriginalsOnlyStrategy
|
||||
from documents.caching import CACHE_5_MINUTES
|
||||
from documents.caching import CACHE_50_MINUTES
|
||||
from documents.caching import DOC_METADATA_BASE
|
||||
from documents.caching import DOC_SUGGESTIONS_BASE
|
||||
from documents.classifier import load_classifier
|
||||
from documents.conditionals import metadata_etag
|
||||
from documents.conditionals import metadata_last_modified
|
||||
from documents.conditionals import preview_etag
|
||||
from documents.conditionals import preview_last_modified
|
||||
from documents.conditionals import suggestions_etag
|
||||
from documents.conditionals import suggestions_last_modified
|
||||
from documents.conditionals import thumbnail_etag
|
||||
from documents.conditionals import thumbnail_last_modified
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.data_models import DocumentSource
|
||||
@ -505,7 +509,9 @@ class DocumentViewSet(
|
||||
|
||||
@action(methods=["get"], detail=True)
|
||||
@method_decorator(cache_control(public=False, max_age=5 * 60))
|
||||
@method_decorator(condition(etag_func=preview_etag))
|
||||
@method_decorator(
|
||||
condition(etag_func=preview_etag, last_modified_func=preview_last_modified),
|
||||
)
|
||||
def preview(self, request, pk=None):
|
||||
try:
|
||||
response = self.file_response(pk, request, "inline")
|
||||
@ -514,7 +520,10 @@ class DocumentViewSet(
|
||||
raise Http404
|
||||
|
||||
@action(methods=["get"], detail=True)
|
||||
@method_decorator(cache_control(public=False, max_age=315360000))
|
||||
@method_decorator(cache_control(public=False, max_age=CACHE_50_MINUTES))
|
||||
@method_decorator(
|
||||
condition(etag_func=thumbnail_etag, last_modified_func=thumbnail_last_modified),
|
||||
)
|
||||
def thumb(self, request, pk=None):
|
||||
try:
|
||||
doc = Document.objects.get(id=pk)
|
||||
@ -528,8 +537,6 @@ class DocumentViewSet(
|
||||
handle = GnuPG.decrypted(doc.thumbnail_file)
|
||||
else:
|
||||
handle = doc.thumbnail_file
|
||||
# TODO: Send ETag information and use that to send new thumbnails
|
||||
# if available
|
||||
|
||||
return HttpResponse(handle, content_type="image/webp")
|
||||
except (FileNotFoundError, Document.DoesNotExist):
|
||||
|
@ -3,7 +3,7 @@ DJANGO_SETTINGS_MODULE = paperless.settings
|
||||
addopts = --pythonwarnings=all --cov --cov-report=html --cov-report=xml --numprocesses auto --maxprocesses=16 --quiet --durations=50
|
||||
env =
|
||||
PAPERLESS_DISABLE_DBHANDLER=true
|
||||
PAPERLESS_CACHE_BACKEND=django.core.cache.backends.dummy.DummyCache
|
||||
PAPERLESS_CACHE_BACKEND=django.core.cache.backends.locmem.LocMemCache
|
||||
|
||||
[coverage:run]
|
||||
source =
|
||||
|
Loading…
x
Reference in New Issue
Block a user