Fixed #20536 -- rewrite of the file based cache backend
* Safer for use in multiprocess environments * Better random culling * Cache files use less disk space * Safer delete behavior Also fixed #15806, fixed #15825.
This commit is contained in:
parent
ac2d86f8d3
commit
7be638390e
|
@ -1,156 +1,156 @@
|
|||
"File-based cache backend"
|
||||
|
||||
import errno
|
||||
import glob
|
||||
import hashlib
|
||||
import io
|
||||
import os
|
||||
import shutil
|
||||
import random
|
||||
import tempfile
|
||||
import time
|
||||
import zlib
|
||||
from django.core.cache.backends.base import BaseCache, DEFAULT_TIMEOUT
|
||||
from django.core.files.move import file_move_safe
|
||||
from django.utils.encoding import force_bytes
|
||||
try:
|
||||
from django.utils.six.moves import cPickle as pickle
|
||||
except ImportError:
|
||||
import pickle
|
||||
|
||||
from django.core.cache.backends.base import BaseCache, DEFAULT_TIMEOUT
|
||||
from django.utils.encoding import force_bytes
|
||||
|
||||
|
||||
class FileBasedCache(BaseCache):
|
||||
cache_suffix = '.djcache'
|
||||
|
||||
def __init__(self, dir, params):
|
||||
BaseCache.__init__(self, params)
|
||||
self._dir = dir
|
||||
if not os.path.exists(self._dir):
|
||||
super(FileBasedCache, self).__init__(params)
|
||||
self._dir = os.path.abspath(dir)
|
||||
self._createdir()
|
||||
|
||||
def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
|
||||
if self.has_key(key, version=version):
|
||||
if self.has_key(key, version):
|
||||
return False
|
||||
|
||||
self.set(key, value, timeout, version=version)
|
||||
self.set(key, value, timeout, version)
|
||||
return True
|
||||
|
||||
def get(self, key, default=None, version=None):
|
||||
key = self.make_key(key, version=version)
|
||||
self.validate_key(key)
|
||||
|
||||
fname = self._key_to_file(key)
|
||||
fname = self._key_to_file(key, version)
|
||||
if os.path.exists(fname):
|
||||
try:
|
||||
with open(fname, 'rb') as f:
|
||||
exp = pickle.load(f)
|
||||
now = time.time()
|
||||
if exp is not None and exp < now:
|
||||
self._delete(fname)
|
||||
else:
|
||||
return pickle.load(f)
|
||||
except (IOError, OSError, EOFError, pickle.PickleError):
|
||||
pass
|
||||
with io.open(fname, 'rb') as f:
|
||||
if not self._is_expired(f):
|
||||
return pickle.loads(zlib.decompress(f.read()))
|
||||
except IOError as e:
|
||||
if e.errno == errno.ENOENT:
|
||||
pass # Cache file was removed after the exists check
|
||||
return default
|
||||
|
||||
def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
|
||||
key = self.make_key(key, version=version)
|
||||
self.validate_key(key)
|
||||
|
||||
fname = self._key_to_file(key)
|
||||
dirname = os.path.dirname(fname)
|
||||
|
||||
self._cull()
|
||||
|
||||
self._createdir() # Cache dir can be deleted at any time.
|
||||
fname = self._key_to_file(key, version)
|
||||
self._cull() # make some room if necessary
|
||||
fd, tmp_path = tempfile.mkstemp(dir=self._dir)
|
||||
renamed = False
|
||||
try:
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
|
||||
with open(fname, 'wb') as f:
|
||||
with io.open(fd, 'wb') as f:
|
||||
expiry = self.get_backend_timeout(timeout)
|
||||
pickle.dump(expiry, f, pickle.HIGHEST_PROTOCOL)
|
||||
pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
f.write(pickle.dumps(expiry, -1))
|
||||
f.write(zlib.compress(pickle.dumps(value), -1))
|
||||
file_move_safe(tmp_path, fname, allow_overwrite=True)
|
||||
renamed = True
|
||||
finally:
|
||||
if not renamed:
|
||||
os.remove(tmp_path)
|
||||
|
||||
def delete(self, key, version=None):
|
||||
key = self.make_key(key, version=version)
|
||||
self.validate_key(key)
|
||||
try:
|
||||
self._delete(self._key_to_file(key))
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
self._delete(self._key_to_file(key, version))
|
||||
|
||||
def _delete(self, fname):
|
||||
os.remove(fname)
|
||||
if not fname.startswith(self._dir) or not os.path.exists(fname):
|
||||
return
|
||||
try:
|
||||
# Remove the 2 subdirs if they're empty
|
||||
dirname = os.path.dirname(fname)
|
||||
os.rmdir(dirname)
|
||||
os.rmdir(os.path.dirname(dirname))
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
os.remove(fname)
|
||||
except OSError as e:
|
||||
# ENOENT can happen if the cache file is removed (by another
|
||||
# process) after the os.path.exists check.
|
||||
if e.errno != errno.ENOENT:
|
||||
raise
|
||||
|
||||
def has_key(self, key, version=None):
|
||||
key = self.make_key(key, version=version)
|
||||
self.validate_key(key)
|
||||
fname = self._key_to_file(key)
|
||||
try:
|
||||
with open(fname, 'rb') as f:
|
||||
exp = pickle.load(f)
|
||||
now = time.time()
|
||||
if exp < now:
|
||||
self._delete(fname)
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
except (IOError, OSError, EOFError, pickle.PickleError):
|
||||
fname = self._key_to_file(key, version)
|
||||
if os.path.exists(fname):
|
||||
with io.open(fname, 'rb') as f:
|
||||
return not self._is_expired(f)
|
||||
return False
|
||||
|
||||
def _cull(self):
|
||||
if int(self._num_entries) < self._max_entries:
|
||||
return
|
||||
|
||||
try:
|
||||
filelist = sorted(os.listdir(self._dir))
|
||||
except (IOError, OSError):
|
||||
return
|
||||
|
||||
"""
|
||||
Removes random cache entries if max_entries is reached at a ratio
|
||||
of num_entries / cull_frequency. A value of 0 for CULL_FREQUENCY means
|
||||
that the entire cache will be purged.
|
||||
"""
|
||||
filelist = self._list_cache_files()
|
||||
num_entries = len(filelist)
|
||||
if num_entries < self._max_entries:
|
||||
return # return early if no culling is required
|
||||
if self._cull_frequency == 0:
|
||||
doomed = filelist
|
||||
else:
|
||||
doomed = [os.path.join(self._dir, k) for (i, k) in enumerate(filelist) if i % self._cull_frequency == 0]
|
||||
|
||||
for topdir in doomed:
|
||||
try:
|
||||
for root, _, files in os.walk(topdir):
|
||||
for f in files:
|
||||
self._delete(os.path.join(root, f))
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
return self.clear() # Clear the cache when CULL_FREQUENCY = 0
|
||||
# Delete a random selection of entries
|
||||
filelist = random.sample(filelist,
|
||||
int(num_entries / self._cull_frequency))
|
||||
for fname in filelist:
|
||||
self._delete(fname)
|
||||
|
||||
def _createdir(self):
|
||||
if not os.path.exists(self._dir):
|
||||
try:
|
||||
os.makedirs(self._dir)
|
||||
except OSError:
|
||||
raise EnvironmentError("Cache directory '%s' does not exist and could not be created'" % self._dir)
|
||||
os.makedirs(self._dir, 0o700)
|
||||
except OSError as e:
|
||||
if e.errno != errno.EEXIST:
|
||||
raise EnvironmentError(
|
||||
"Cache directory '%s' does not exist "
|
||||
"and could not be created'" % self._dir)
|
||||
|
||||
def _key_to_file(self, key):
|
||||
def _key_to_file(self, key, version=None):
|
||||
"""
|
||||
Convert the filename into an md5 string. We'll turn the first couple
|
||||
bits of the path into directory prefixes to be nice to filesystems
|
||||
that have problems with large numbers of files in a directory.
|
||||
|
||||
Thus, a cache key of "foo" gets turnned into a file named
|
||||
``{cache-dir}ac/bd/18db4cc2f85cedef654fccc4a4d8``.
|
||||
Convert a key into a cache file path. Basically this is the
|
||||
root cache path joined with the md5sum of the key and a suffix.
|
||||
"""
|
||||
path = hashlib.md5(force_bytes(key)).hexdigest()
|
||||
path = os.path.join(path[:2], path[2:4], path[4:])
|
||||
return os.path.join(self._dir, path)
|
||||
|
||||
def _get_num_entries(self):
|
||||
count = 0
|
||||
for _, _, files in os.walk(self._dir):
|
||||
count += len(files)
|
||||
return count
|
||||
_num_entries = property(_get_num_entries)
|
||||
key = self.make_key(key, version=version)
|
||||
self.validate_key(key)
|
||||
return os.path.join(self._dir, ''.join(
|
||||
[hashlib.md5(force_bytes(key)).hexdigest(), self.cache_suffix]))
|
||||
|
||||
def clear(self):
|
||||
try:
|
||||
shutil.rmtree(self._dir)
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
"""
|
||||
Remove all the cache files.
|
||||
"""
|
||||
if not os.path.exists(self._dir):
|
||||
return
|
||||
for fname in self._list_cache_files():
|
||||
self._delete(fname)
|
||||
|
||||
def _is_expired(self, f):
|
||||
"""
|
||||
Takes an open cache file and determines if it has expired,
|
||||
deletes the file if it is has passed its expiry time.
|
||||
"""
|
||||
exp = pickle.load(f)
|
||||
if exp is not None and exp < time.time():
|
||||
f.close() # On Windows a file has to be closed before deleting
|
||||
self._delete(f.name)
|
||||
return True
|
||||
return False
|
||||
|
||||
def _list_cache_files(self):
|
||||
"""
|
||||
Get a list of paths to all the cache files. These are all the files
|
||||
in the root cache dir that end on the cache_suffix.
|
||||
"""
|
||||
if not os.path.exists(self._dir):
|
||||
return []
|
||||
filelist = [os.path.join(self._dir, fname) for fname
|
||||
in glob.glob1(self._dir, '*%s' % self.cache_suffix)]
|
||||
return filelist
|
||||
|
||||
|
||||
|
||||
# For backwards compatibility
|
||||
|
|
|
@ -253,10 +253,11 @@ model.
|
|||
Filesystem caching
|
||||
------------------
|
||||
|
||||
To store cached items on a filesystem, use
|
||||
``"django.core.cache.backends.filebased.FileBasedCache"`` for
|
||||
:setting:`BACKEND <CACHES-BACKEND>`. For example, to store cached data in
|
||||
``/var/tmp/django_cache``, use this setting::
|
||||
The file-based backend serializes and stores each cache value as a separate
|
||||
file. To use this backend set :setting:`BACKEND <CACHES-BACKEND>` to
|
||||
``"django.core.cache.backends.filebased.FileBasedCache"`` and
|
||||
:setting:`LOCATION <CACHES-LOCATION>` to a suitable directory. For example,
|
||||
to store cached data in ``/var/tmp/django_cache``, use this setting::
|
||||
|
||||
CACHES = {
|
||||
'default': {
|
||||
|
@ -265,7 +266,6 @@ To store cached items on a filesystem, use
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
If you're on Windows, put the drive letter at the beginning of the path,
|
||||
like this::
|
||||
|
||||
|
@ -286,10 +286,6 @@ above example, if your server runs as the user ``apache``, make sure the
|
|||
directory ``/var/tmp/django_cache`` exists and is readable and writable by the
|
||||
user ``apache``.
|
||||
|
||||
Each cache value will be stored as a separate file whose contents are the
|
||||
cache data saved in a serialized ("pickled") format, using Python's ``pickle``
|
||||
module. Each file's name is the cache key, escaped for safe filesystem use.
|
||||
|
||||
Local-memory caching
|
||||
--------------------
|
||||
|
||||
|
|
|
@ -1076,33 +1076,35 @@ class FileBasedCacheTests(unittest.TestCase, BaseCacheTests):
|
|||
|
||||
def tearDown(self):
|
||||
self.cache.clear()
|
||||
|
||||
def test_hashing(self):
|
||||
"""Test that keys are hashed into subdirectories correctly"""
|
||||
self.cache.set("foo", "bar")
|
||||
key = self.cache.make_key("foo")
|
||||
keyhash = hashlib.md5(key.encode()).hexdigest()
|
||||
keypath = os.path.join(self.dirname, keyhash[:2], keyhash[2:4], keyhash[4:])
|
||||
self.assertTrue(os.path.exists(keypath))
|
||||
|
||||
def test_subdirectory_removal(self):
|
||||
"""
|
||||
Make sure that the created subdirectories are correctly removed when empty.
|
||||
"""
|
||||
self.cache.set("foo", "bar")
|
||||
key = self.cache.make_key("foo")
|
||||
keyhash = hashlib.md5(key.encode()).hexdigest()
|
||||
keypath = os.path.join(self.dirname, keyhash[:2], keyhash[2:4], keyhash[4:])
|
||||
self.assertTrue(os.path.exists(keypath))
|
||||
|
||||
self.cache.delete("foo")
|
||||
self.assertTrue(not os.path.exists(keypath))
|
||||
self.assertTrue(not os.path.exists(os.path.dirname(keypath)))
|
||||
self.assertTrue(not os.path.exists(os.path.dirname(os.path.dirname(keypath))))
|
||||
os.rmdir(self.dirname)
|
||||
|
||||
def test_cull(self):
|
||||
self.perform_cull_test(50, 29)
|
||||
|
||||
def test_ignores_non_cache_files(self):
|
||||
fname = os.path.join(self.dirname, 'not-a-cache-file')
|
||||
with open(fname, 'w'):
|
||||
os.utime(fname, None)
|
||||
self.cache.clear()
|
||||
self.assertTrue(os.path.exists(fname),
|
||||
'Expected cache.clear to ignore non cache files')
|
||||
os.remove(fname)
|
||||
|
||||
def test_clear_does_not_remove_cache_dir(self):
|
||||
self.cache.clear()
|
||||
self.assertTrue(os.path.exists(self.dirname),
|
||||
'Expected cache.clear to keep the cache dir')
|
||||
|
||||
def test_creates_cache_dir_if_nonexistent(self):
|
||||
os.rmdir(self.dirname)
|
||||
self.cache.set('foo', 'bar')
|
||||
os.path.exists(self.dirname)
|
||||
|
||||
def test_zero_cull(self):
|
||||
# Regression test for #15806
|
||||
self.cache = get_cache(self.backend_name, LOCATION=self.dirname, OPTIONS={'MAX_ENTRIES': 30, 'CULL_FREQUENCY': 0})
|
||||
self.perform_cull_test(50, 19)
|
||||
|
||||
|
||||
class CustomCacheKeyValidationTests(unittest.TestCase):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue