Fixed #28977 -- Changed local-memory cache to use LRU culling.

LRU culling turns every read into a kind of write to the cache: cache keys
are moved to the first position in the OrderedDict when they are retrieved.
The RWLock which permitted multiple readers while prioritizing a single
writer is obsolete since all accesses are now writes.
This commit is contained in:
Grant Jenks 2018-01-24 09:26:19 -08:00 committed by Tim Graham
parent f9844f4841
commit d38a3169a4
6 changed files with 94 additions and 147 deletions

View File

@ -301,6 +301,7 @@ answer newbie questions, and generally made Django that much better:
Gonzalo Saavedra <gonzalosaavedra@gmail.com> Gonzalo Saavedra <gonzalosaavedra@gmail.com>
Gopal Narayanan <gopastro@gmail.com> Gopal Narayanan <gopastro@gmail.com>
Graham Carlyle <graham.carlyle@maplecroft.net> Graham Carlyle <graham.carlyle@maplecroft.net>
Grant Jenks <contact@grantjenks.com>
Greg Chapple <gregchapple1@gmail.com> Greg Chapple <gregchapple1@gmail.com>
Gregor Müllegger <gregor@muellegger.de> Gregor Müllegger <gregor@muellegger.de>
Grigory Fateyev <greg@dial.com.ru> Grigory Fateyev <greg@dial.com.ru>

View File

@ -1,10 +1,10 @@
"Thread-safe in-memory cache backend." "Thread-safe in-memory cache backend."
import pickle import pickle
import time import time
from contextlib import contextmanager from collections import OrderedDict
from threading import Lock
from django.core.cache.backends.base import DEFAULT_TIMEOUT, BaseCache from django.core.cache.backends.base import DEFAULT_TIMEOUT, BaseCache
from django.utils.synch import RWLock
# Global in-memory store of cache data. Keyed by name, to provide # Global in-memory store of cache data. Keyed by name, to provide
# multiple named local memory caches. # multiple named local memory caches.
@ -13,88 +13,71 @@ _expire_info = {}
_locks = {} _locks = {}
@contextmanager
def dummy():
"""A context manager that does nothing special."""
yield
class LocMemCache(BaseCache): class LocMemCache(BaseCache):
def __init__(self, name, params): def __init__(self, name, params):
super().__init__(params) super().__init__(params)
self._cache = _caches.setdefault(name, {}) self._cache = _caches.setdefault(name, OrderedDict())
self._expire_info = _expire_info.setdefault(name, {}) self._expire_info = _expire_info.setdefault(name, {})
self._lock = _locks.setdefault(name, RWLock()) self._lock = _locks.setdefault(name, Lock())
def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None): def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
key = self.make_key(key, version=version) key = self.make_key(key, version=version)
self.validate_key(key) self.validate_key(key)
pickled = pickle.dumps(value, pickle.HIGHEST_PROTOCOL) pickled = pickle.dumps(value, pickle.HIGHEST_PROTOCOL)
with self._lock.writer(): with self._lock:
if self._has_expired(key): if self._has_expired(key):
self._set(key, pickled, timeout) self._set(key, pickled, timeout)
return True return True
return False return False
def get(self, key, default=None, version=None, acquire_lock=True): def get(self, key, default=None, version=None):
key = self.make_key(key, version=version) key = self.make_key(key, version=version)
self.validate_key(key) self.validate_key(key)
pickled = None with self._lock:
with (self._lock.reader() if acquire_lock else dummy()): if self._has_expired(key):
if not self._has_expired(key): self._delete(key)
return default
pickled = self._cache[key] pickled = self._cache[key]
if pickled is not None: self._cache.move_to_end(key, last=False)
try:
return pickle.loads(pickled) return pickle.loads(pickled)
except pickle.PickleError:
return default
with (self._lock.writer() if acquire_lock else dummy()):
try:
del self._cache[key]
del self._expire_info[key]
except KeyError:
pass
return default
def _set(self, key, value, timeout=DEFAULT_TIMEOUT): def _set(self, key, value, timeout=DEFAULT_TIMEOUT):
if len(self._cache) >= self._max_entries: if len(self._cache) >= self._max_entries:
self._cull() self._cull()
self._cache[key] = value self._cache[key] = value
self._cache.move_to_end(key, last=False)
self._expire_info[key] = self.get_backend_timeout(timeout) self._expire_info[key] = self.get_backend_timeout(timeout)
def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None): def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
key = self.make_key(key, version=version) key = self.make_key(key, version=version)
self.validate_key(key) self.validate_key(key)
pickled = pickle.dumps(value, pickle.HIGHEST_PROTOCOL) pickled = pickle.dumps(value, pickle.HIGHEST_PROTOCOL)
with self._lock.writer(): with self._lock:
self._set(key, pickled, timeout) self._set(key, pickled, timeout)
def incr(self, key, delta=1, version=None): def incr(self, key, delta=1, version=None):
with self._lock.writer():
value = self.get(key, version=version, acquire_lock=False)
if value is None:
raise ValueError("Key '%s' not found" % key)
new_value = value + delta
key = self.make_key(key, version=version) key = self.make_key(key, version=version)
self.validate_key(key)
with self._lock:
if self._has_expired(key):
self._delete(key)
raise ValueError("Key '%s' not found" % key)
pickled = self._cache[key]
value = pickle.loads(pickled)
new_value = value + delta
pickled = pickle.dumps(new_value, pickle.HIGHEST_PROTOCOL) pickled = pickle.dumps(new_value, pickle.HIGHEST_PROTOCOL)
self._cache[key] = pickled self._cache[key] = pickled
self._cache.move_to_end(key, last=False)
return new_value return new_value
def has_key(self, key, version=None): def has_key(self, key, version=None):
key = self.make_key(key, version=version) key = self.make_key(key, version=version)
self.validate_key(key) self.validate_key(key)
with self._lock.reader(): with self._lock:
if not self._has_expired(key): if self._has_expired(key):
return True self._delete(key)
with self._lock.writer():
try:
del self._cache[key]
del self._expire_info[key]
except KeyError:
pass
return False return False
return True
def _has_expired(self, key): def _has_expired(self, key):
exp = self._expire_info.get(key, -1) exp = self._expire_info.get(key, -1)
@ -102,18 +85,17 @@ class LocMemCache(BaseCache):
def _cull(self): def _cull(self):
if self._cull_frequency == 0: if self._cull_frequency == 0:
self.clear() self._cache.clear()
self._expire_info.clear()
else: else:
doomed = [k for (i, k) in enumerate(self._cache) if i % self._cull_frequency == 0] count = len(self._cache) // self._cull_frequency
for k in doomed: for i in range(count):
self._delete(k) key, _ = self._cache.popitem()
del self._expire_info[key]
def _delete(self, key): def _delete(self, key):
try: try:
del self._cache[key] del self._cache[key]
except KeyError:
pass
try:
del self._expire_info[key] del self._expire_info[key]
except KeyError: except KeyError:
pass pass
@ -121,9 +103,10 @@ class LocMemCache(BaseCache):
def delete(self, key, version=None): def delete(self, key, version=None):
key = self.make_key(key, version=version) key = self.make_key(key, version=version)
self.validate_key(key) self.validate_key(key)
with self._lock.writer(): with self._lock:
self._delete(key) self._delete(key)
def clear(self): def clear(self):
with self._lock:
self._cache.clear() self._cache.clear()
self._expire_info.clear() self._expire_info.clear()

View File

@ -1,90 +0,0 @@
"""
Synchronization primitives:
- reader-writer lock (preference to writers)
(Contributed to Django by eugene@lazutkin.com)
"""
import contextlib
import threading
class RWLock:
"""
Classic implementation of reader-writer lock with preference to writers.
Readers can access a resource simultaneously.
Writers get an exclusive access.
API is self-descriptive:
reader_enters()
reader_leaves()
writer_enters()
writer_leaves()
"""
def __init__(self):
self.mutex = threading.RLock()
self.can_read = threading.Semaphore(0)
self.can_write = threading.Semaphore(0)
self.active_readers = 0
self.active_writers = 0
self.waiting_readers = 0
self.waiting_writers = 0
def reader_enters(self):
with self.mutex:
if self.active_writers == 0 and self.waiting_writers == 0:
self.active_readers += 1
self.can_read.release()
else:
self.waiting_readers += 1
self.can_read.acquire()
def reader_leaves(self):
with self.mutex:
self.active_readers -= 1
if self.active_readers == 0 and self.waiting_writers != 0:
self.active_writers += 1
self.waiting_writers -= 1
self.can_write.release()
@contextlib.contextmanager
def reader(self):
self.reader_enters()
try:
yield
finally:
self.reader_leaves()
def writer_enters(self):
with self.mutex:
if self.active_writers == 0 and self.waiting_writers == 0 and self.active_readers == 0:
self.active_writers = 1
self.can_write.release()
else:
self.waiting_writers += 1
self.can_write.acquire()
def writer_leaves(self):
with self.mutex:
self.active_writers -= 1
if self.waiting_writers != 0:
self.active_writers += 1
self.waiting_writers -= 1
self.can_write.release()
elif self.waiting_readers != 0:
t = self.waiting_readers
self.waiting_readers = 0
self.active_readers += t
while t > 0:
self.can_read.release()
t -= 1
@contextlib.contextmanager
def writer(self):
self.writer_enters()
try:
yield
finally:
self.writer_leaves()

View File

@ -108,7 +108,8 @@ Minor features
Cache Cache
~~~~~ ~~~~~
* ... * The :ref:`local-memory cache backend <local-memory-caching>` now uses a
least-recently-used (LRU) culling strategy rather than a pseudo-random one.
CSRF CSRF
~~~~ ~~~~

View File

@ -322,11 +322,17 @@ memory stores. If you only have one ``locmem`` cache, you can omit the
memory cache, you will need to assign a name to at least one of them in memory cache, you will need to assign a name to at least one of them in
order to keep them separate. order to keep them separate.
The cache uses a least-recently-used (LRU) culling strategy.
Note that each process will have its own private cache instance, which means no Note that each process will have its own private cache instance, which means no
cross-process caching is possible. This obviously also means the local memory cross-process caching is possible. This obviously also means the local memory
cache isn't particularly memory-efficient, so it's probably not a good choice cache isn't particularly memory-efficient, so it's probably not a good choice
for production environments. It's nice for development. for production environments. It's nice for development.
.. versionchanged:: 2.1
Older versions use a pseudo-random culling strategy rather than LRU.
Dummy caching (for development) Dummy caching (for development)
------------------------------- -------------------------------

50
tests/cache/tests.py vendored
View File

@ -1084,11 +1084,16 @@ class PicklingSideEffect:
self.locked = False self.locked = False
def __getstate__(self): def __getstate__(self):
if self.cache._lock.active_writers: self.locked = self.cache._lock.locked()
self.locked = True
return {} return {}
limit_locmem_entries = override_settings(CACHES=caches_setting_for_tests(
BACKEND='django.core.cache.backends.locmem.LocMemCache',
OPTIONS={'MAX_ENTRIES': 9},
))
@override_settings(CACHES=caches_setting_for_tests( @override_settings(CACHES=caches_setting_for_tests(
BACKEND='django.core.cache.backends.locmem.LocMemCache', BACKEND='django.core.cache.backends.locmem.LocMemCache',
)) ))
@ -1144,6 +1149,47 @@ class LocMemCacheTests(BaseCacheTests, TestCase):
cache.decr(key) cache.decr(key)
self.assertEqual(expire, cache._expire_info[_key]) self.assertEqual(expire, cache._expire_info[_key])
@limit_locmem_entries
def test_lru_get(self):
"""get() moves cache keys."""
for key in range(9):
cache.set(key, key, timeout=None)
for key in range(6):
self.assertEqual(cache.get(key), key)
cache.set(9, 9, timeout=None)
for key in range(6):
self.assertEqual(cache.get(key), key)
for key in range(6, 9):
self.assertIsNone(cache.get(key))
self.assertEqual(cache.get(9), 9)
@limit_locmem_entries
def test_lru_set(self):
"""set() moves cache keys."""
for key in range(9):
cache.set(key, key, timeout=None)
for key in range(3, 9):
cache.set(key, key, timeout=None)
cache.set(9, 9, timeout=None)
for key in range(3, 10):
self.assertEqual(cache.get(key), key)
for key in range(3):
self.assertIsNone(cache.get(key))
@limit_locmem_entries
def test_lru_incr(self):
"""incr() moves cache keys."""
for key in range(9):
cache.set(key, key, timeout=None)
for key in range(6):
cache.incr(key)
cache.set(9, 9, timeout=None)
for key in range(6):
self.assertEqual(cache.get(key), key + 1)
for key in range(6, 9):
self.assertIsNone(cache.get(key))
self.assertEqual(cache.get(9), 9)
# memcached backend isn't guaranteed to be available. # memcached backend isn't guaranteed to be available.
# To check the memcached backend, the test settings file will # To check the memcached backend, the test settings file will