Fixed #28977 -- Changed local-memory cache to use LRU culling.

LRU culling turns every read into a kind of write to the cache: cache keys
are moved to the first position in the OrderedDict when they are retrieved.
The RWLock which permitted multiple readers while prioritizing a single
writer is obsolete since all accesses are now writes.
This commit is contained in:
Grant Jenks 2018-01-24 09:26:19 -08:00 committed by Tim Graham
parent f9844f4841
commit d38a3169a4
6 changed files with 94 additions and 147 deletions

View File

@ -301,6 +301,7 @@ answer newbie questions, and generally made Django that much better:
Gonzalo Saavedra <gonzalosaavedra@gmail.com>
Gopal Narayanan <gopastro@gmail.com>
Graham Carlyle <graham.carlyle@maplecroft.net>
Grant Jenks <contact@grantjenks.com>
Greg Chapple <gregchapple1@gmail.com>
Gregor Müllegger <gregor@muellegger.de>
Grigory Fateyev <greg@dial.com.ru>

View File

@ -1,10 +1,10 @@
"Thread-safe in-memory cache backend."
import pickle
import time
from contextlib import contextmanager
from collections import OrderedDict
from threading import Lock
from django.core.cache.backends.base import DEFAULT_TIMEOUT, BaseCache
from django.utils.synch import RWLock
# Global in-memory store of cache data. Keyed by name, to provide
# multiple named local memory caches.
@ -13,88 +13,71 @@ _expire_info = {}
_locks = {}
@contextmanager
def dummy():
"""A context manager that does nothing special."""
yield
class LocMemCache(BaseCache):
def __init__(self, name, params):
super().__init__(params)
self._cache = _caches.setdefault(name, {})
self._cache = _caches.setdefault(name, OrderedDict())
self._expire_info = _expire_info.setdefault(name, {})
self._lock = _locks.setdefault(name, RWLock())
self._lock = _locks.setdefault(name, Lock())
def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
key = self.make_key(key, version=version)
self.validate_key(key)
pickled = pickle.dumps(value, pickle.HIGHEST_PROTOCOL)
with self._lock.writer():
with self._lock:
if self._has_expired(key):
self._set(key, pickled, timeout)
return True
return False
def get(self, key, default=None, version=None, acquire_lock=True):
def get(self, key, default=None, version=None):
key = self.make_key(key, version=version)
self.validate_key(key)
pickled = None
with (self._lock.reader() if acquire_lock else dummy()):
if not self._has_expired(key):
with self._lock:
if self._has_expired(key):
self._delete(key)
return default
pickled = self._cache[key]
if pickled is not None:
try:
self._cache.move_to_end(key, last=False)
return pickle.loads(pickled)
except pickle.PickleError:
return default
with (self._lock.writer() if acquire_lock else dummy()):
try:
del self._cache[key]
del self._expire_info[key]
except KeyError:
pass
return default
def _set(self, key, value, timeout=DEFAULT_TIMEOUT):
if len(self._cache) >= self._max_entries:
self._cull()
self._cache[key] = value
self._cache.move_to_end(key, last=False)
self._expire_info[key] = self.get_backend_timeout(timeout)
def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
key = self.make_key(key, version=version)
self.validate_key(key)
pickled = pickle.dumps(value, pickle.HIGHEST_PROTOCOL)
with self._lock.writer():
with self._lock:
self._set(key, pickled, timeout)
def incr(self, key, delta=1, version=None):
with self._lock.writer():
value = self.get(key, version=version, acquire_lock=False)
if value is None:
raise ValueError("Key '%s' not found" % key)
new_value = value + delta
key = self.make_key(key, version=version)
self.validate_key(key)
with self._lock:
if self._has_expired(key):
self._delete(key)
raise ValueError("Key '%s' not found" % key)
pickled = self._cache[key]
value = pickle.loads(pickled)
new_value = value + delta
pickled = pickle.dumps(new_value, pickle.HIGHEST_PROTOCOL)
self._cache[key] = pickled
self._cache.move_to_end(key, last=False)
return new_value
def has_key(self, key, version=None):
key = self.make_key(key, version=version)
self.validate_key(key)
with self._lock.reader():
if not self._has_expired(key):
return True
with self._lock.writer():
try:
del self._cache[key]
del self._expire_info[key]
except KeyError:
pass
with self._lock:
if self._has_expired(key):
self._delete(key)
return False
return True
def _has_expired(self, key):
exp = self._expire_info.get(key, -1)
@ -102,18 +85,17 @@ class LocMemCache(BaseCache):
def _cull(self):
if self._cull_frequency == 0:
self.clear()
self._cache.clear()
self._expire_info.clear()
else:
doomed = [k for (i, k) in enumerate(self._cache) if i % self._cull_frequency == 0]
for k in doomed:
self._delete(k)
count = len(self._cache) // self._cull_frequency
for i in range(count):
key, _ = self._cache.popitem()
del self._expire_info[key]
def _delete(self, key):
try:
del self._cache[key]
except KeyError:
pass
try:
del self._expire_info[key]
except KeyError:
pass
@ -121,9 +103,10 @@ class LocMemCache(BaseCache):
def delete(self, key, version=None):
key = self.make_key(key, version=version)
self.validate_key(key)
with self._lock.writer():
with self._lock:
self._delete(key)
def clear(self):
with self._lock:
self._cache.clear()
self._expire_info.clear()

View File

@ -1,90 +0,0 @@
"""
Synchronization primitives:
- reader-writer lock (preference to writers)
(Contributed to Django by eugene@lazutkin.com)
"""
import contextlib
import threading
class RWLock:
"""
Classic implementation of reader-writer lock with preference to writers.
Readers can access a resource simultaneously.
Writers get an exclusive access.
API is self-descriptive:
reader_enters()
reader_leaves()
writer_enters()
writer_leaves()
"""
def __init__(self):
self.mutex = threading.RLock()
self.can_read = threading.Semaphore(0)
self.can_write = threading.Semaphore(0)
self.active_readers = 0
self.active_writers = 0
self.waiting_readers = 0
self.waiting_writers = 0
def reader_enters(self):
with self.mutex:
if self.active_writers == 0 and self.waiting_writers == 0:
self.active_readers += 1
self.can_read.release()
else:
self.waiting_readers += 1
self.can_read.acquire()
def reader_leaves(self):
with self.mutex:
self.active_readers -= 1
if self.active_readers == 0 and self.waiting_writers != 0:
self.active_writers += 1
self.waiting_writers -= 1
self.can_write.release()
@contextlib.contextmanager
def reader(self):
self.reader_enters()
try:
yield
finally:
self.reader_leaves()
def writer_enters(self):
with self.mutex:
if self.active_writers == 0 and self.waiting_writers == 0 and self.active_readers == 0:
self.active_writers = 1
self.can_write.release()
else:
self.waiting_writers += 1
self.can_write.acquire()
def writer_leaves(self):
with self.mutex:
self.active_writers -= 1
if self.waiting_writers != 0:
self.active_writers += 1
self.waiting_writers -= 1
self.can_write.release()
elif self.waiting_readers != 0:
t = self.waiting_readers
self.waiting_readers = 0
self.active_readers += t
while t > 0:
self.can_read.release()
t -= 1
@contextlib.contextmanager
def writer(self):
self.writer_enters()
try:
yield
finally:
self.writer_leaves()

View File

@ -108,7 +108,8 @@ Minor features
Cache
~~~~~
* ...
* The :ref:`local-memory cache backend <local-memory-caching>` now uses a
least-recently-used (LRU) culling strategy rather than a pseudo-random one.
CSRF
~~~~

View File

@ -322,11 +322,17 @@ memory stores. If you only have one ``locmem`` cache, you can omit the
memory cache, you will need to assign a name to at least one of them in
order to keep them separate.
The cache uses a least-recently-used (LRU) culling strategy.
Note that each process will have its own private cache instance, which means no
cross-process caching is possible. This obviously also means the local memory
cache isn't particularly memory-efficient, so it's probably not a good choice
for production environments. It's nice for development.
.. versionchanged:: 2.1
Older versions use a pseudo-random culling strategy rather than LRU.
Dummy caching (for development)
-------------------------------

50
tests/cache/tests.py vendored
View File

@ -1084,11 +1084,16 @@ class PicklingSideEffect:
self.locked = False
def __getstate__(self):
if self.cache._lock.active_writers:
self.locked = True
self.locked = self.cache._lock.locked()
return {}
limit_locmem_entries = override_settings(CACHES=caches_setting_for_tests(
BACKEND='django.core.cache.backends.locmem.LocMemCache',
OPTIONS={'MAX_ENTRIES': 9},
))
@override_settings(CACHES=caches_setting_for_tests(
BACKEND='django.core.cache.backends.locmem.LocMemCache',
))
@ -1144,6 +1149,47 @@ class LocMemCacheTests(BaseCacheTests, TestCase):
cache.decr(key)
self.assertEqual(expire, cache._expire_info[_key])
@limit_locmem_entries
def test_lru_get(self):
"""get() moves cache keys."""
for key in range(9):
cache.set(key, key, timeout=None)
for key in range(6):
self.assertEqual(cache.get(key), key)
cache.set(9, 9, timeout=None)
for key in range(6):
self.assertEqual(cache.get(key), key)
for key in range(6, 9):
self.assertIsNone(cache.get(key))
self.assertEqual(cache.get(9), 9)
@limit_locmem_entries
def test_lru_set(self):
"""set() moves cache keys."""
for key in range(9):
cache.set(key, key, timeout=None)
for key in range(3, 9):
cache.set(key, key, timeout=None)
cache.set(9, 9, timeout=None)
for key in range(3, 10):
self.assertEqual(cache.get(key), key)
for key in range(3):
self.assertIsNone(cache.get(key))
@limit_locmem_entries
def test_lru_incr(self):
"""incr() moves cache keys."""
for key in range(9):
cache.set(key, key, timeout=None)
for key in range(6):
cache.incr(key)
cache.set(9, 9, timeout=None)
for key in range(6):
self.assertEqual(cache.get(key), key + 1)
for key in range(6, 9):
self.assertIsNone(cache.get(key))
self.assertEqual(cache.get(9), 9)
# memcached backend isn't guaranteed to be available.
# To check the memcached backend, the test settings file will