Playing around with the Redis caching
This commit is contained in:
parent
45e2b7f814
commit
f90248de09
11
src/documents/caching.py
Normal file
11
src/documents/caching.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
from typing import Final
|
||||||
|
|
||||||
|
CLASSIFIER_VERSION_KEY: Final[str] = "classifier_version"
|
||||||
|
CLASSIFIER_HASH_KEY: Final[str] = "classifier_hash"
|
||||||
|
CLASSIFIER_MODIFIED_KEY: Final[str] = "classifier_modified"
|
||||||
|
|
||||||
|
CACHE_1_MINUTE: Final[int] = 60
|
||||||
|
CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE
|
||||||
|
|
||||||
|
DOC_SUGGESTIONS_BASE: Final[str] = "doc_{}_suggest"
|
||||||
|
DOC_METADATA_BASE: Final[str] = "doc_{}_metadata"
|
@ -10,8 +10,13 @@ from pathlib import Path
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from django.core.cache import cache
|
||||||
from sklearn.exceptions import InconsistentVersionWarning
|
from sklearn.exceptions import InconsistentVersionWarning
|
||||||
|
|
||||||
|
from documents.caching import CACHE_5_MINUTES
|
||||||
|
from documents.caching import CLASSIFIER_HASH_KEY
|
||||||
|
from documents.caching import CLASSIFIER_MODIFIED_KEY
|
||||||
|
from documents.caching import CLASSIFIER_VERSION_KEY
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.models import MatchingModel
|
from documents.models import MatchingModel
|
||||||
|
|
||||||
@ -322,6 +327,10 @@ class DocumentClassifier:
|
|||||||
self.last_doc_change_time = latest_doc_change
|
self.last_doc_change_time = latest_doc_change
|
||||||
self.last_auto_type_hash = hasher.digest()
|
self.last_auto_type_hash = hasher.digest()
|
||||||
|
|
||||||
|
cache.set(CLASSIFIER_MODIFIED_KEY, self.last_doc_change_time, CACHE_5_MINUTES)
|
||||||
|
cache.set(CLASSIFIER_HASH_KEY, hasher.hexdigest(), CACHE_5_MINUTES)
|
||||||
|
cache.set(CLASSIFIER_VERSION_KEY, self.FORMAT_VERSION, CACHE_5_MINUTES)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def preprocess_content(self, content: str) -> str: # pragma: no cover
|
def preprocess_content(self, content: str) -> str: # pragma: no cover
|
||||||
|
@ -1,9 +1,13 @@
|
|||||||
import pickle
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from django.core.cache import cache
|
||||||
|
|
||||||
|
from documents.caching import CACHE_5_MINUTES
|
||||||
|
from documents.caching import CLASSIFIER_HASH_KEY
|
||||||
|
from documents.caching import CLASSIFIER_MODIFIED_KEY
|
||||||
|
from documents.caching import CLASSIFIER_VERSION_KEY
|
||||||
from documents.classifier import DocumentClassifier
|
from documents.classifier import DocumentClassifier
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
|
|
||||||
@ -19,13 +23,18 @@ def suggestions_etag(request, pk: int) -> Optional[str]:
|
|||||||
"""
|
"""
|
||||||
if not settings.MODEL_FILE.exists():
|
if not settings.MODEL_FILE.exists():
|
||||||
return None
|
return None
|
||||||
with open(settings.MODEL_FILE, "rb") as f:
|
cache_hits = cache.get_many(
|
||||||
schema_version = pickle.load(f)
|
[CLASSIFIER_VERSION_KEY, CLASSIFIER_HASH_KEY],
|
||||||
if schema_version != DocumentClassifier.FORMAT_VERSION:
|
)
|
||||||
return None
|
if (
|
||||||
_ = pickle.load(f)
|
CLASSIFIER_VERSION_KEY in cache_hits
|
||||||
last_auto_type_hash: bytes = pickle.load(f)
|
and cache_hits[CLASSIFIER_VERSION_KEY] != DocumentClassifier.FORMAT_VERSION
|
||||||
return f"{last_auto_type_hash}:{settings.NUMBER_OF_SUGGESTED_DATES}"
|
):
|
||||||
|
return None
|
||||||
|
elif CLASSIFIER_HASH_KEY in cache_hits:
|
||||||
|
cache.touch(CLASSIFIER_HASH_KEY, CACHE_5_MINUTES)
|
||||||
|
return f"{cache_hits[CLASSIFIER_HASH_KEY]}:{settings.NUMBER_OF_SUGGESTED_DATES}"
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def suggestions_last_modified(request, pk: int) -> Optional[datetime]:
|
def suggestions_last_modified(request, pk: int) -> Optional[datetime]:
|
||||||
@ -36,12 +45,18 @@ def suggestions_last_modified(request, pk: int) -> Optional[datetime]:
|
|||||||
"""
|
"""
|
||||||
if not settings.MODEL_FILE.exists():
|
if not settings.MODEL_FILE.exists():
|
||||||
return None
|
return None
|
||||||
with open(settings.MODEL_FILE, "rb") as f:
|
cache_hits = cache.get_many(
|
||||||
schema_version = pickle.load(f)
|
[CLASSIFIER_VERSION_KEY, CLASSIFIER_MODIFIED_KEY],
|
||||||
if schema_version != DocumentClassifier.FORMAT_VERSION:
|
)
|
||||||
return None
|
if (
|
||||||
last_doc_change_time = pickle.load(f)
|
CLASSIFIER_VERSION_KEY in cache_hits
|
||||||
return last_doc_change_time
|
and cache_hits[CLASSIFIER_VERSION_KEY] != DocumentClassifier.FORMAT_VERSION
|
||||||
|
):
|
||||||
|
return None
|
||||||
|
elif CLASSIFIER_MODIFIED_KEY in cache_hits:
|
||||||
|
cache.touch(CLASSIFIER_MODIFIED_KEY, CACHE_5_MINUTES)
|
||||||
|
return cache_hits[CLASSIFIER_MODIFIED_KEY]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def metadata_etag(request, pk: int) -> Optional[str]:
|
def metadata_etag(request, pk: int) -> Optional[str]:
|
||||||
|
@ -15,6 +15,7 @@ from urllib.parse import quote
|
|||||||
import pathvalidate
|
import pathvalidate
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
|
from django.core.cache import cache
|
||||||
from django.db.models import Case
|
from django.db.models import Case
|
||||||
from django.db.models import Count
|
from django.db.models import Count
|
||||||
from django.db.models import IntegerField
|
from django.db.models import IntegerField
|
||||||
@ -62,6 +63,9 @@ from documents import bulk_edit
|
|||||||
from documents.bulk_download import ArchiveOnlyStrategy
|
from documents.bulk_download import ArchiveOnlyStrategy
|
||||||
from documents.bulk_download import OriginalAndArchiveStrategy
|
from documents.bulk_download import OriginalAndArchiveStrategy
|
||||||
from documents.bulk_download import OriginalsOnlyStrategy
|
from documents.bulk_download import OriginalsOnlyStrategy
|
||||||
|
from documents.caching import CACHE_5_MINUTES
|
||||||
|
from documents.caching import DOC_METADATA_BASE
|
||||||
|
from documents.caching import DOC_SUGGESTIONS_BASE
|
||||||
from documents.classifier import load_classifier
|
from documents.classifier import load_classifier
|
||||||
from documents.conditionals import metadata_etag
|
from documents.conditionals import metadata_etag
|
||||||
from documents.conditionals import metadata_last_modified
|
from documents.conditionals import metadata_last_modified
|
||||||
@ -407,6 +411,14 @@ class DocumentViewSet(
|
|||||||
except Document.DoesNotExist:
|
except Document.DoesNotExist:
|
||||||
raise Http404
|
raise Http404
|
||||||
|
|
||||||
|
doc_key = DOC_METADATA_BASE.format(doc.pk)
|
||||||
|
|
||||||
|
cache_hit = cache.get(doc_key)
|
||||||
|
|
||||||
|
if cache_hit is not None:
|
||||||
|
cache.touch(doc_key, CACHE_5_MINUTES)
|
||||||
|
return Response(cache_hit)
|
||||||
|
|
||||||
meta = {
|
meta = {
|
||||||
"original_checksum": doc.checksum,
|
"original_checksum": doc.checksum,
|
||||||
"original_size": self.get_filesize(doc.source_path),
|
"original_size": self.get_filesize(doc.source_path),
|
||||||
@ -436,6 +448,8 @@ class DocumentViewSet(
|
|||||||
meta["archive_size"] = None
|
meta["archive_size"] = None
|
||||||
meta["archive_metadata"] = None
|
meta["archive_metadata"] = None
|
||||||
|
|
||||||
|
cache.set(doc_key, meta, CACHE_5_MINUTES)
|
||||||
|
|
||||||
return Response(meta)
|
return Response(meta)
|
||||||
|
|
||||||
@action(methods=["get"], detail=True)
|
@action(methods=["get"], detail=True)
|
||||||
@ -454,6 +468,14 @@ class DocumentViewSet(
|
|||||||
):
|
):
|
||||||
return HttpResponseForbidden("Insufficient permissions")
|
return HttpResponseForbidden("Insufficient permissions")
|
||||||
|
|
||||||
|
doc_key = DOC_SUGGESTIONS_BASE.format(doc.pk)
|
||||||
|
|
||||||
|
cache_hit = cache.get(doc_key)
|
||||||
|
|
||||||
|
if cache_hit is not None:
|
||||||
|
cache.touch(doc_key, CACHE_5_MINUTES)
|
||||||
|
return Response(cache_hit)
|
||||||
|
|
||||||
classifier = load_classifier()
|
classifier = load_classifier()
|
||||||
|
|
||||||
dates = []
|
dates = []
|
||||||
@ -463,23 +485,23 @@ class DocumentViewSet(
|
|||||||
{i for i in itertools.islice(gen, settings.NUMBER_OF_SUGGESTED_DATES)},
|
{i for i in itertools.islice(gen, settings.NUMBER_OF_SUGGESTED_DATES)},
|
||||||
)
|
)
|
||||||
|
|
||||||
return Response(
|
resp_data = {
|
||||||
{
|
"correspondents": [
|
||||||
"correspondents": [
|
c.id for c in match_correspondents(doc, classifier, request.user)
|
||||||
c.id for c in match_correspondents(doc, classifier, request.user)
|
],
|
||||||
],
|
"tags": [t.id for t in match_tags(doc, classifier, request.user)],
|
||||||
"tags": [t.id for t in match_tags(doc, classifier, request.user)],
|
"document_types": [
|
||||||
"document_types": [
|
dt.id for dt in match_document_types(doc, classifier, request.user)
|
||||||
dt.id for dt in match_document_types(doc, classifier, request.user)
|
],
|
||||||
],
|
"storage_paths": [
|
||||||
"storage_paths": [
|
dt.id for dt in match_storage_paths(doc, classifier, request.user)
|
||||||
dt.id for dt in match_storage_paths(doc, classifier, request.user)
|
],
|
||||||
],
|
"dates": [date.strftime("%Y-%m-%d") for date in dates if date is not None],
|
||||||
"dates": [
|
}
|
||||||
date.strftime("%Y-%m-%d") for date in dates if date is not None
|
|
||||||
],
|
cache.set(doc_key, resp_data, CACHE_5_MINUTES)
|
||||||
},
|
|
||||||
)
|
return Response(resp_data)
|
||||||
|
|
||||||
@action(methods=["get"], detail=True)
|
@action(methods=["get"], detail=True)
|
||||||
@method_decorator(cache_control(public=False, max_age=5 * 60))
|
@method_decorator(cache_control(public=False, max_age=5 * 60))
|
||||||
|
@ -762,7 +762,10 @@ CELERY_BEAT_SCHEDULE_FILENAME = os.path.join(DATA_DIR, "celerybeat-schedule.db")
|
|||||||
# django setting.
|
# django setting.
|
||||||
CACHES = {
|
CACHES = {
|
||||||
"default": {
|
"default": {
|
||||||
"BACKEND": "django.core.cache.backends.redis.RedisCache",
|
"BACKEND": os.environ.get(
|
||||||
|
"PAPERLESS_CACHE_BACKEND",
|
||||||
|
"django.core.cache.backends.redis.RedisCache",
|
||||||
|
),
|
||||||
"LOCATION": _CHANNELS_REDIS_URL,
|
"LOCATION": _CHANNELS_REDIS_URL,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,7 @@ DJANGO_SETTINGS_MODULE = paperless.settings
|
|||||||
addopts = --pythonwarnings=all --cov --cov-report=html --cov-report=xml --numprocesses auto --maxprocesses=16 --quiet --durations=50
|
addopts = --pythonwarnings=all --cov --cov-report=html --cov-report=xml --numprocesses auto --maxprocesses=16 --quiet --durations=50
|
||||||
env =
|
env =
|
||||||
PAPERLESS_DISABLE_DBHANDLER=true
|
PAPERLESS_DISABLE_DBHANDLER=true
|
||||||
|
PAPERLESS_CACHE_BACKEND=django.core.cache.backends.dummy.DummyCache
|
||||||
|
|
||||||
[coverage:run]
|
[coverage:run]
|
||||||
source =
|
source =
|
||||||
|
Loading…
x
Reference in New Issue
Block a user