Fixed #20536 -- rewrite of the file based cache backend
* Safer for use in multiprocess environments * Better random culling * Cache files use less disk space * Safer delete behavior Also fixed #15806, fixed #15825.
This commit is contained in:
parent
ac2d86f8d3
commit
7be638390e
|
@ -1,156 +1,156 @@
|
||||||
"File-based cache backend"
|
"File-based cache backend"
|
||||||
|
import errno
|
||||||
|
import glob
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import io
|
||||||
import os
|
import os
|
||||||
import shutil
|
import random
|
||||||
|
import tempfile
|
||||||
import time
|
import time
|
||||||
|
import zlib
|
||||||
|
from django.core.cache.backends.base import BaseCache, DEFAULT_TIMEOUT
|
||||||
|
from django.core.files.move import file_move_safe
|
||||||
|
from django.utils.encoding import force_bytes
|
||||||
try:
|
try:
|
||||||
from django.utils.six.moves import cPickle as pickle
|
from django.utils.six.moves import cPickle as pickle
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
from django.core.cache.backends.base import BaseCache, DEFAULT_TIMEOUT
|
|
||||||
from django.utils.encoding import force_bytes
|
|
||||||
|
|
||||||
|
|
||||||
class FileBasedCache(BaseCache):
|
class FileBasedCache(BaseCache):
|
||||||
|
cache_suffix = '.djcache'
|
||||||
|
|
||||||
def __init__(self, dir, params):
|
def __init__(self, dir, params):
|
||||||
BaseCache.__init__(self, params)
|
super(FileBasedCache, self).__init__(params)
|
||||||
self._dir = dir
|
self._dir = os.path.abspath(dir)
|
||||||
if not os.path.exists(self._dir):
|
self._createdir()
|
||||||
self._createdir()
|
|
||||||
|
|
||||||
def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
|
def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
|
||||||
if self.has_key(key, version=version):
|
if self.has_key(key, version):
|
||||||
return False
|
return False
|
||||||
|
self.set(key, value, timeout, version)
|
||||||
self.set(key, value, timeout, version=version)
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get(self, key, default=None, version=None):
|
def get(self, key, default=None, version=None):
|
||||||
key = self.make_key(key, version=version)
|
fname = self._key_to_file(key, version)
|
||||||
self.validate_key(key)
|
if os.path.exists(fname):
|
||||||
|
try:
|
||||||
fname = self._key_to_file(key)
|
with io.open(fname, 'rb') as f:
|
||||||
try:
|
if not self._is_expired(f):
|
||||||
with open(fname, 'rb') as f:
|
return pickle.loads(zlib.decompress(f.read()))
|
||||||
exp = pickle.load(f)
|
except IOError as e:
|
||||||
now = time.time()
|
if e.errno == errno.ENOENT:
|
||||||
if exp is not None and exp < now:
|
pass # Cache file was removed after the exists check
|
||||||
self._delete(fname)
|
|
||||||
else:
|
|
||||||
return pickle.load(f)
|
|
||||||
except (IOError, OSError, EOFError, pickle.PickleError):
|
|
||||||
pass
|
|
||||||
return default
|
return default
|
||||||
|
|
||||||
def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
|
def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
|
||||||
key = self.make_key(key, version=version)
|
self._createdir() # Cache dir can be deleted at any time.
|
||||||
self.validate_key(key)
|
fname = self._key_to_file(key, version)
|
||||||
|
self._cull() # make some room if necessary
|
||||||
fname = self._key_to_file(key)
|
fd, tmp_path = tempfile.mkstemp(dir=self._dir)
|
||||||
dirname = os.path.dirname(fname)
|
renamed = False
|
||||||
|
|
||||||
self._cull()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not os.path.exists(dirname):
|
with io.open(fd, 'wb') as f:
|
||||||
os.makedirs(dirname)
|
|
||||||
|
|
||||||
with open(fname, 'wb') as f:
|
|
||||||
expiry = self.get_backend_timeout(timeout)
|
expiry = self.get_backend_timeout(timeout)
|
||||||
pickle.dump(expiry, f, pickle.HIGHEST_PROTOCOL)
|
f.write(pickle.dumps(expiry, -1))
|
||||||
pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)
|
f.write(zlib.compress(pickle.dumps(value), -1))
|
||||||
except (IOError, OSError):
|
file_move_safe(tmp_path, fname, allow_overwrite=True)
|
||||||
pass
|
renamed = True
|
||||||
|
finally:
|
||||||
|
if not renamed:
|
||||||
|
os.remove(tmp_path)
|
||||||
|
|
||||||
def delete(self, key, version=None):
|
def delete(self, key, version=None):
|
||||||
key = self.make_key(key, version=version)
|
self._delete(self._key_to_file(key, version))
|
||||||
self.validate_key(key)
|
|
||||||
try:
|
|
||||||
self._delete(self._key_to_file(key))
|
|
||||||
except (IOError, OSError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _delete(self, fname):
|
def _delete(self, fname):
|
||||||
os.remove(fname)
|
if not fname.startswith(self._dir) or not os.path.exists(fname):
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
# Remove the 2 subdirs if they're empty
|
os.remove(fname)
|
||||||
dirname = os.path.dirname(fname)
|
except OSError as e:
|
||||||
os.rmdir(dirname)
|
# ENOENT can happen if the cache file is removed (by another
|
||||||
os.rmdir(os.path.dirname(dirname))
|
# process) after the os.path.exists check.
|
||||||
except (IOError, OSError):
|
if e.errno != errno.ENOENT:
|
||||||
pass
|
raise
|
||||||
|
|
||||||
def has_key(self, key, version=None):
|
def has_key(self, key, version=None):
|
||||||
key = self.make_key(key, version=version)
|
fname = self._key_to_file(key, version)
|
||||||
self.validate_key(key)
|
if os.path.exists(fname):
|
||||||
fname = self._key_to_file(key)
|
with io.open(fname, 'rb') as f:
|
||||||
try:
|
return not self._is_expired(f)
|
||||||
with open(fname, 'rb') as f:
|
return False
|
||||||
exp = pickle.load(f)
|
|
||||||
now = time.time()
|
|
||||||
if exp < now:
|
|
||||||
self._delete(fname)
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
except (IOError, OSError, EOFError, pickle.PickleError):
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _cull(self):
|
def _cull(self):
|
||||||
if int(self._num_entries) < self._max_entries:
|
"""
|
||||||
return
|
Removes random cache entries if max_entries is reached at a ratio
|
||||||
|
of num_entries / cull_frequency. A value of 0 for CULL_FREQUENCY means
|
||||||
try:
|
that the entire cache will be purged.
|
||||||
filelist = sorted(os.listdir(self._dir))
|
"""
|
||||||
except (IOError, OSError):
|
filelist = self._list_cache_files()
|
||||||
return
|
num_entries = len(filelist)
|
||||||
|
if num_entries < self._max_entries:
|
||||||
|
return # return early if no culling is required
|
||||||
if self._cull_frequency == 0:
|
if self._cull_frequency == 0:
|
||||||
doomed = filelist
|
return self.clear() # Clear the cache when CULL_FREQUENCY = 0
|
||||||
else:
|
# Delete a random selection of entries
|
||||||
doomed = [os.path.join(self._dir, k) for (i, k) in enumerate(filelist) if i % self._cull_frequency == 0]
|
filelist = random.sample(filelist,
|
||||||
|
int(num_entries / self._cull_frequency))
|
||||||
for topdir in doomed:
|
for fname in filelist:
|
||||||
try:
|
self._delete(fname)
|
||||||
for root, _, files in os.walk(topdir):
|
|
||||||
for f in files:
|
|
||||||
self._delete(os.path.join(root, f))
|
|
||||||
except (IOError, OSError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _createdir(self):
|
def _createdir(self):
|
||||||
try:
|
if not os.path.exists(self._dir):
|
||||||
os.makedirs(self._dir)
|
try:
|
||||||
except OSError:
|
os.makedirs(self._dir, 0o700)
|
||||||
raise EnvironmentError("Cache directory '%s' does not exist and could not be created'" % self._dir)
|
except OSError as e:
|
||||||
|
if e.errno != errno.EEXIST:
|
||||||
|
raise EnvironmentError(
|
||||||
|
"Cache directory '%s' does not exist "
|
||||||
|
"and could not be created'" % self._dir)
|
||||||
|
|
||||||
def _key_to_file(self, key):
|
def _key_to_file(self, key, version=None):
|
||||||
"""
|
"""
|
||||||
Convert the filename into an md5 string. We'll turn the first couple
|
Convert a key into a cache file path. Basically this is the
|
||||||
bits of the path into directory prefixes to be nice to filesystems
|
root cache path joined with the md5sum of the key and a suffix.
|
||||||
that have problems with large numbers of files in a directory.
|
|
||||||
|
|
||||||
Thus, a cache key of "foo" gets turnned into a file named
|
|
||||||
``{cache-dir}ac/bd/18db4cc2f85cedef654fccc4a4d8``.
|
|
||||||
"""
|
"""
|
||||||
path = hashlib.md5(force_bytes(key)).hexdigest()
|
key = self.make_key(key, version=version)
|
||||||
path = os.path.join(path[:2], path[2:4], path[4:])
|
self.validate_key(key)
|
||||||
return os.path.join(self._dir, path)
|
return os.path.join(self._dir, ''.join(
|
||||||
|
[hashlib.md5(force_bytes(key)).hexdigest(), self.cache_suffix]))
|
||||||
def _get_num_entries(self):
|
|
||||||
count = 0
|
|
||||||
for _, _, files in os.walk(self._dir):
|
|
||||||
count += len(files)
|
|
||||||
return count
|
|
||||||
_num_entries = property(_get_num_entries)
|
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
try:
|
"""
|
||||||
shutil.rmtree(self._dir)
|
Remove all the cache files.
|
||||||
except (IOError, OSError):
|
"""
|
||||||
pass
|
if not os.path.exists(self._dir):
|
||||||
|
return
|
||||||
|
for fname in self._list_cache_files():
|
||||||
|
self._delete(fname)
|
||||||
|
|
||||||
|
def _is_expired(self, f):
|
||||||
|
"""
|
||||||
|
Takes an open cache file and determines if it has expired,
|
||||||
|
deletes the file if it is has passed its expiry time.
|
||||||
|
"""
|
||||||
|
exp = pickle.load(f)
|
||||||
|
if exp is not None and exp < time.time():
|
||||||
|
f.close() # On Windows a file has to be closed before deleting
|
||||||
|
self._delete(f.name)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _list_cache_files(self):
|
||||||
|
"""
|
||||||
|
Get a list of paths to all the cache files. These are all the files
|
||||||
|
in the root cache dir that end on the cache_suffix.
|
||||||
|
"""
|
||||||
|
if not os.path.exists(self._dir):
|
||||||
|
return []
|
||||||
|
filelist = [os.path.join(self._dir, fname) for fname
|
||||||
|
in glob.glob1(self._dir, '*%s' % self.cache_suffix)]
|
||||||
|
return filelist
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# For backwards compatibility
|
# For backwards compatibility
|
||||||
|
|
|
@ -253,10 +253,11 @@ model.
|
||||||
Filesystem caching
|
Filesystem caching
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
To store cached items on a filesystem, use
|
The file-based backend serializes and stores each cache value as a separate
|
||||||
``"django.core.cache.backends.filebased.FileBasedCache"`` for
|
file. To use this backend set :setting:`BACKEND <CACHES-BACKEND>` to
|
||||||
:setting:`BACKEND <CACHES-BACKEND>`. For example, to store cached data in
|
``"django.core.cache.backends.filebased.FileBasedCache"`` and
|
||||||
``/var/tmp/django_cache``, use this setting::
|
:setting:`LOCATION <CACHES-LOCATION>` to a suitable directory. For example,
|
||||||
|
to store cached data in ``/var/tmp/django_cache``, use this setting::
|
||||||
|
|
||||||
CACHES = {
|
CACHES = {
|
||||||
'default': {
|
'default': {
|
||||||
|
@ -265,7 +266,6 @@ To store cached items on a filesystem, use
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
If you're on Windows, put the drive letter at the beginning of the path,
|
If you're on Windows, put the drive letter at the beginning of the path,
|
||||||
like this::
|
like this::
|
||||||
|
|
||||||
|
@ -286,10 +286,6 @@ above example, if your server runs as the user ``apache``, make sure the
|
||||||
directory ``/var/tmp/django_cache`` exists and is readable and writable by the
|
directory ``/var/tmp/django_cache`` exists and is readable and writable by the
|
||||||
user ``apache``.
|
user ``apache``.
|
||||||
|
|
||||||
Each cache value will be stored as a separate file whose contents are the
|
|
||||||
cache data saved in a serialized ("pickled") format, using Python's ``pickle``
|
|
||||||
module. Each file's name is the cache key, escaped for safe filesystem use.
|
|
||||||
|
|
||||||
Local-memory caching
|
Local-memory caching
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
|
|
|
@ -1076,33 +1076,35 @@ class FileBasedCacheTests(unittest.TestCase, BaseCacheTests):
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
self.cache.clear()
|
self.cache.clear()
|
||||||
|
os.rmdir(self.dirname)
|
||||||
def test_hashing(self):
|
|
||||||
"""Test that keys are hashed into subdirectories correctly"""
|
|
||||||
self.cache.set("foo", "bar")
|
|
||||||
key = self.cache.make_key("foo")
|
|
||||||
keyhash = hashlib.md5(key.encode()).hexdigest()
|
|
||||||
keypath = os.path.join(self.dirname, keyhash[:2], keyhash[2:4], keyhash[4:])
|
|
||||||
self.assertTrue(os.path.exists(keypath))
|
|
||||||
|
|
||||||
def test_subdirectory_removal(self):
|
|
||||||
"""
|
|
||||||
Make sure that the created subdirectories are correctly removed when empty.
|
|
||||||
"""
|
|
||||||
self.cache.set("foo", "bar")
|
|
||||||
key = self.cache.make_key("foo")
|
|
||||||
keyhash = hashlib.md5(key.encode()).hexdigest()
|
|
||||||
keypath = os.path.join(self.dirname, keyhash[:2], keyhash[2:4], keyhash[4:])
|
|
||||||
self.assertTrue(os.path.exists(keypath))
|
|
||||||
|
|
||||||
self.cache.delete("foo")
|
|
||||||
self.assertTrue(not os.path.exists(keypath))
|
|
||||||
self.assertTrue(not os.path.exists(os.path.dirname(keypath)))
|
|
||||||
self.assertTrue(not os.path.exists(os.path.dirname(os.path.dirname(keypath))))
|
|
||||||
|
|
||||||
def test_cull(self):
|
def test_cull(self):
|
||||||
self.perform_cull_test(50, 29)
|
self.perform_cull_test(50, 29)
|
||||||
|
|
||||||
|
def test_ignores_non_cache_files(self):
|
||||||
|
fname = os.path.join(self.dirname, 'not-a-cache-file')
|
||||||
|
with open(fname, 'w'):
|
||||||
|
os.utime(fname, None)
|
||||||
|
self.cache.clear()
|
||||||
|
self.assertTrue(os.path.exists(fname),
|
||||||
|
'Expected cache.clear to ignore non cache files')
|
||||||
|
os.remove(fname)
|
||||||
|
|
||||||
|
def test_clear_does_not_remove_cache_dir(self):
|
||||||
|
self.cache.clear()
|
||||||
|
self.assertTrue(os.path.exists(self.dirname),
|
||||||
|
'Expected cache.clear to keep the cache dir')
|
||||||
|
|
||||||
|
def test_creates_cache_dir_if_nonexistent(self):
|
||||||
|
os.rmdir(self.dirname)
|
||||||
|
self.cache.set('foo', 'bar')
|
||||||
|
os.path.exists(self.dirname)
|
||||||
|
|
||||||
|
def test_zero_cull(self):
|
||||||
|
# Regression test for #15806
|
||||||
|
self.cache = get_cache(self.backend_name, LOCATION=self.dirname, OPTIONS={'MAX_ENTRIES': 30, 'CULL_FREQUENCY': 0})
|
||||||
|
self.perform_cull_test(50, 19)
|
||||||
|
|
||||||
|
|
||||||
class CustomCacheKeyValidationTests(unittest.TestCase):
|
class CustomCacheKeyValidationTests(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in New Issue