Fixed #20536 -- rewrite of the file based cache backend

* Safer for use in multiprocess environments
 * Better random culling
 * Cache files use less disk space
 * Safer delete behavior

Also fixed #15806, fixed #15825.
This commit is contained in:
Jaap Roes 2013-08-26 16:34:02 +02:00 committed by Anssi Kääriäinen
parent ac2d86f8d3
commit 7be638390e
3 changed files with 143 additions and 145 deletions

View File

@ -1,156 +1,156 @@
"File-based cache backend" "File-based cache backend"
import errno
import glob
import hashlib import hashlib
import io
import os import os
import shutil import random
import tempfile
import time import time
import zlib
from django.core.cache.backends.base import BaseCache, DEFAULT_TIMEOUT
from django.core.files.move import file_move_safe
from django.utils.encoding import force_bytes
try: try:
from django.utils.six.moves import cPickle as pickle from django.utils.six.moves import cPickle as pickle
except ImportError: except ImportError:
import pickle import pickle
from django.core.cache.backends.base import BaseCache, DEFAULT_TIMEOUT
from django.utils.encoding import force_bytes
class FileBasedCache(BaseCache): class FileBasedCache(BaseCache):
cache_suffix = '.djcache'
def __init__(self, dir, params): def __init__(self, dir, params):
BaseCache.__init__(self, params) super(FileBasedCache, self).__init__(params)
self._dir = dir self._dir = os.path.abspath(dir)
if not os.path.exists(self._dir): self._createdir()
self._createdir()
def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None): def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
if self.has_key(key, version=version): if self.has_key(key, version):
return False return False
self.set(key, value, timeout, version)
self.set(key, value, timeout, version=version)
return True return True
def get(self, key, default=None, version=None): def get(self, key, default=None, version=None):
key = self.make_key(key, version=version) fname = self._key_to_file(key, version)
self.validate_key(key) if os.path.exists(fname):
try:
fname = self._key_to_file(key) with io.open(fname, 'rb') as f:
try: if not self._is_expired(f):
with open(fname, 'rb') as f: return pickle.loads(zlib.decompress(f.read()))
exp = pickle.load(f) except IOError as e:
now = time.time() if e.errno == errno.ENOENT:
if exp is not None and exp < now: pass # Cache file was removed after the exists check
self._delete(fname)
else:
return pickle.load(f)
except (IOError, OSError, EOFError, pickle.PickleError):
pass
return default return default
def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None): def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
key = self.make_key(key, version=version) self._createdir() # Cache dir can be deleted at any time.
self.validate_key(key) fname = self._key_to_file(key, version)
self._cull() # make some room if necessary
fname = self._key_to_file(key) fd, tmp_path = tempfile.mkstemp(dir=self._dir)
dirname = os.path.dirname(fname) renamed = False
self._cull()
try: try:
if not os.path.exists(dirname): with io.open(fd, 'wb') as f:
os.makedirs(dirname)
with open(fname, 'wb') as f:
expiry = self.get_backend_timeout(timeout) expiry = self.get_backend_timeout(timeout)
pickle.dump(expiry, f, pickle.HIGHEST_PROTOCOL) f.write(pickle.dumps(expiry, -1))
pickle.dump(value, f, pickle.HIGHEST_PROTOCOL) f.write(zlib.compress(pickle.dumps(value), -1))
except (IOError, OSError): file_move_safe(tmp_path, fname, allow_overwrite=True)
pass renamed = True
finally:
if not renamed:
os.remove(tmp_path)
def delete(self, key, version=None): def delete(self, key, version=None):
key = self.make_key(key, version=version) self._delete(self._key_to_file(key, version))
self.validate_key(key)
try:
self._delete(self._key_to_file(key))
except (IOError, OSError):
pass
def _delete(self, fname): def _delete(self, fname):
os.remove(fname) if not fname.startswith(self._dir) or not os.path.exists(fname):
return
try: try:
# Remove the 2 subdirs if they're empty os.remove(fname)
dirname = os.path.dirname(fname) except OSError as e:
os.rmdir(dirname) # ENOENT can happen if the cache file is removed (by another
os.rmdir(os.path.dirname(dirname)) # process) after the os.path.exists check.
except (IOError, OSError): if e.errno != errno.ENOENT:
pass raise
def has_key(self, key, version=None): def has_key(self, key, version=None):
key = self.make_key(key, version=version) fname = self._key_to_file(key, version)
self.validate_key(key) if os.path.exists(fname):
fname = self._key_to_file(key) with io.open(fname, 'rb') as f:
try: return not self._is_expired(f)
with open(fname, 'rb') as f: return False
exp = pickle.load(f)
now = time.time()
if exp < now:
self._delete(fname)
return False
else:
return True
except (IOError, OSError, EOFError, pickle.PickleError):
return False
def _cull(self): def _cull(self):
if int(self._num_entries) < self._max_entries: """
return Removes random cache entries if max_entries is reached at a ratio
of num_entries / cull_frequency. A value of 0 for CULL_FREQUENCY means
try: that the entire cache will be purged.
filelist = sorted(os.listdir(self._dir)) """
except (IOError, OSError): filelist = self._list_cache_files()
return num_entries = len(filelist)
if num_entries < self._max_entries:
return # return early if no culling is required
if self._cull_frequency == 0: if self._cull_frequency == 0:
doomed = filelist return self.clear() # Clear the cache when CULL_FREQUENCY = 0
else: # Delete a random selection of entries
doomed = [os.path.join(self._dir, k) for (i, k) in enumerate(filelist) if i % self._cull_frequency == 0] filelist = random.sample(filelist,
int(num_entries / self._cull_frequency))
for topdir in doomed: for fname in filelist:
try: self._delete(fname)
for root, _, files in os.walk(topdir):
for f in files:
self._delete(os.path.join(root, f))
except (IOError, OSError):
pass
def _createdir(self): def _createdir(self):
try: if not os.path.exists(self._dir):
os.makedirs(self._dir) try:
except OSError: os.makedirs(self._dir, 0o700)
raise EnvironmentError("Cache directory '%s' does not exist and could not be created'" % self._dir) except OSError as e:
if e.errno != errno.EEXIST:
raise EnvironmentError(
"Cache directory '%s' does not exist "
"and could not be created'" % self._dir)
def _key_to_file(self, key): def _key_to_file(self, key, version=None):
""" """
Convert the filename into an md5 string. We'll turn the first couple Convert a key into a cache file path. Basically this is the
bits of the path into directory prefixes to be nice to filesystems root cache path joined with the md5sum of the key and a suffix.
that have problems with large numbers of files in a directory.
Thus, a cache key of "foo" gets turnned into a file named
``{cache-dir}ac/bd/18db4cc2f85cedef654fccc4a4d8``.
""" """
path = hashlib.md5(force_bytes(key)).hexdigest() key = self.make_key(key, version=version)
path = os.path.join(path[:2], path[2:4], path[4:]) self.validate_key(key)
return os.path.join(self._dir, path) return os.path.join(self._dir, ''.join(
[hashlib.md5(force_bytes(key)).hexdigest(), self.cache_suffix]))
def _get_num_entries(self):
count = 0
for _, _, files in os.walk(self._dir):
count += len(files)
return count
_num_entries = property(_get_num_entries)
def clear(self): def clear(self):
try: """
shutil.rmtree(self._dir) Remove all the cache files.
except (IOError, OSError): """
pass if not os.path.exists(self._dir):
return
for fname in self._list_cache_files():
self._delete(fname)
def _is_expired(self, f):
"""
Takes an open cache file and determines if it has expired,
deletes the file if it is has passed its expiry time.
"""
exp = pickle.load(f)
if exp is not None and exp < time.time():
f.close() # On Windows a file has to be closed before deleting
self._delete(f.name)
return True
return False
def _list_cache_files(self):
"""
Get a list of paths to all the cache files. These are all the files
in the root cache dir that end on the cache_suffix.
"""
if not os.path.exists(self._dir):
return []
filelist = [os.path.join(self._dir, fname) for fname
in glob.glob1(self._dir, '*%s' % self.cache_suffix)]
return filelist
# For backwards compatibility # For backwards compatibility

View File

@ -253,10 +253,11 @@ model.
Filesystem caching Filesystem caching
------------------ ------------------
To store cached items on a filesystem, use The file-based backend serializes and stores each cache value as a separate
``"django.core.cache.backends.filebased.FileBasedCache"`` for file. To use this backend set :setting:`BACKEND <CACHES-BACKEND>` to
:setting:`BACKEND <CACHES-BACKEND>`. For example, to store cached data in ``"django.core.cache.backends.filebased.FileBasedCache"`` and
``/var/tmp/django_cache``, use this setting:: :setting:`LOCATION <CACHES-LOCATION>` to a suitable directory. For example,
to store cached data in ``/var/tmp/django_cache``, use this setting::
CACHES = { CACHES = {
'default': { 'default': {
@ -265,7 +266,6 @@ To store cached items on a filesystem, use
} }
} }
If you're on Windows, put the drive letter at the beginning of the path, If you're on Windows, put the drive letter at the beginning of the path,
like this:: like this::
@ -286,10 +286,6 @@ above example, if your server runs as the user ``apache``, make sure the
directory ``/var/tmp/django_cache`` exists and is readable and writable by the directory ``/var/tmp/django_cache`` exists and is readable and writable by the
user ``apache``. user ``apache``.
Each cache value will be stored as a separate file whose contents are the
cache data saved in a serialized ("pickled") format, using Python's ``pickle``
module. Each file's name is the cache key, escaped for safe filesystem use.
Local-memory caching Local-memory caching
-------------------- --------------------

48
tests/cache/tests.py vendored
View File

@ -1076,33 +1076,35 @@ class FileBasedCacheTests(unittest.TestCase, BaseCacheTests):
def tearDown(self): def tearDown(self):
self.cache.clear() self.cache.clear()
os.rmdir(self.dirname)
def test_hashing(self):
"""Test that keys are hashed into subdirectories correctly"""
self.cache.set("foo", "bar")
key = self.cache.make_key("foo")
keyhash = hashlib.md5(key.encode()).hexdigest()
keypath = os.path.join(self.dirname, keyhash[:2], keyhash[2:4], keyhash[4:])
self.assertTrue(os.path.exists(keypath))
def test_subdirectory_removal(self):
"""
Make sure that the created subdirectories are correctly removed when empty.
"""
self.cache.set("foo", "bar")
key = self.cache.make_key("foo")
keyhash = hashlib.md5(key.encode()).hexdigest()
keypath = os.path.join(self.dirname, keyhash[:2], keyhash[2:4], keyhash[4:])
self.assertTrue(os.path.exists(keypath))
self.cache.delete("foo")
self.assertTrue(not os.path.exists(keypath))
self.assertTrue(not os.path.exists(os.path.dirname(keypath)))
self.assertTrue(not os.path.exists(os.path.dirname(os.path.dirname(keypath))))
def test_cull(self): def test_cull(self):
self.perform_cull_test(50, 29) self.perform_cull_test(50, 29)
def test_ignores_non_cache_files(self):
fname = os.path.join(self.dirname, 'not-a-cache-file')
with open(fname, 'w'):
os.utime(fname, None)
self.cache.clear()
self.assertTrue(os.path.exists(fname),
'Expected cache.clear to ignore non cache files')
os.remove(fname)
def test_clear_does_not_remove_cache_dir(self):
self.cache.clear()
self.assertTrue(os.path.exists(self.dirname),
'Expected cache.clear to keep the cache dir')
def test_creates_cache_dir_if_nonexistent(self):
os.rmdir(self.dirname)
self.cache.set('foo', 'bar')
os.path.exists(self.dirname)
def test_zero_cull(self):
# Regression test for #15806
self.cache = get_cache(self.backend_name, LOCATION=self.dirname, OPTIONS={'MAX_ENTRIES': 30, 'CULL_FREQUENCY': 0})
self.perform_cull_test(50, 19)
class CustomCacheKeyValidationTests(unittest.TestCase): class CustomCacheKeyValidationTests(unittest.TestCase):
""" """