From 7be638390e18fcbfaaed638f9908673360c280d3 Mon Sep 17 00:00:00 2001 From: Jaap Roes Date: Mon, 26 Aug 2013 16:34:02 +0200 Subject: [PATCH] Fixed #20536 -- rewrite of the file based cache backend * Safer for use in multiprocess environments * Better random culling * Cache files use less disk space * Safer delete behavior Also fixed #15806, fixed #15825. --- django/core/cache/backends/filebased.py | 226 ++++++++++++------------ docs/topics/cache.txt | 14 +- tests/cache/tests.py | 48 ++--- 3 files changed, 143 insertions(+), 145 deletions(-) diff --git a/django/core/cache/backends/filebased.py b/django/core/cache/backends/filebased.py index db9b242dd4..57b0f6fba8 100644 --- a/django/core/cache/backends/filebased.py +++ b/django/core/cache/backends/filebased.py @@ -1,156 +1,156 @@ "File-based cache backend" - +import errno +import glob import hashlib +import io import os -import shutil +import random +import tempfile import time +import zlib +from django.core.cache.backends.base import BaseCache, DEFAULT_TIMEOUT +from django.core.files.move import file_move_safe +from django.utils.encoding import force_bytes try: from django.utils.six.moves import cPickle as pickle except ImportError: import pickle -from django.core.cache.backends.base import BaseCache, DEFAULT_TIMEOUT -from django.utils.encoding import force_bytes - class FileBasedCache(BaseCache): + cache_suffix = '.djcache' + def __init__(self, dir, params): - BaseCache.__init__(self, params) - self._dir = dir - if not os.path.exists(self._dir): - self._createdir() + super(FileBasedCache, self).__init__(params) + self._dir = os.path.abspath(dir) + self._createdir() def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None): - if self.has_key(key, version=version): + if self.has_key(key, version): return False - - self.set(key, value, timeout, version=version) + self.set(key, value, timeout, version) return True def get(self, key, default=None, version=None): - key = self.make_key(key, version=version) - self.validate_key(key) - - fname = self._key_to_file(key) - try: - with open(fname, 'rb') as f: - exp = pickle.load(f) - now = time.time() - if exp is not None and exp < now: - self._delete(fname) - else: - return pickle.load(f) - except (IOError, OSError, EOFError, pickle.PickleError): - pass + fname = self._key_to_file(key, version) + if os.path.exists(fname): + try: + with io.open(fname, 'rb') as f: + if not self._is_expired(f): + return pickle.loads(zlib.decompress(f.read())) + except IOError as e: + if e.errno == errno.ENOENT: + pass # Cache file was removed after the exists check return default def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None): - key = self.make_key(key, version=version) - self.validate_key(key) - - fname = self._key_to_file(key) - dirname = os.path.dirname(fname) - - self._cull() - + self._createdir() # Cache dir can be deleted at any time. + fname = self._key_to_file(key, version) + self._cull() # make some room if necessary + fd, tmp_path = tempfile.mkstemp(dir=self._dir) + renamed = False try: - if not os.path.exists(dirname): - os.makedirs(dirname) - - with open(fname, 'wb') as f: + with io.open(fd, 'wb') as f: expiry = self.get_backend_timeout(timeout) - pickle.dump(expiry, f, pickle.HIGHEST_PROTOCOL) - pickle.dump(value, f, pickle.HIGHEST_PROTOCOL) - except (IOError, OSError): - pass + f.write(pickle.dumps(expiry, -1)) + f.write(zlib.compress(pickle.dumps(value), -1)) + file_move_safe(tmp_path, fname, allow_overwrite=True) + renamed = True + finally: + if not renamed: + os.remove(tmp_path) def delete(self, key, version=None): - key = self.make_key(key, version=version) - self.validate_key(key) - try: - self._delete(self._key_to_file(key)) - except (IOError, OSError): - pass + self._delete(self._key_to_file(key, version)) def _delete(self, fname): - os.remove(fname) + if not fname.startswith(self._dir) or not os.path.exists(fname): + return try: - # Remove the 2 subdirs if they're empty - dirname = os.path.dirname(fname) - os.rmdir(dirname) - os.rmdir(os.path.dirname(dirname)) - except (IOError, OSError): - pass + os.remove(fname) + except OSError as e: + # ENOENT can happen if the cache file is removed (by another + # process) after the os.path.exists check. + if e.errno != errno.ENOENT: + raise def has_key(self, key, version=None): - key = self.make_key(key, version=version) - self.validate_key(key) - fname = self._key_to_file(key) - try: - with open(fname, 'rb') as f: - exp = pickle.load(f) - now = time.time() - if exp < now: - self._delete(fname) - return False - else: - return True - except (IOError, OSError, EOFError, pickle.PickleError): - return False + fname = self._key_to_file(key, version) + if os.path.exists(fname): + with io.open(fname, 'rb') as f: + return not self._is_expired(f) + return False def _cull(self): - if int(self._num_entries) < self._max_entries: - return - - try: - filelist = sorted(os.listdir(self._dir)) - except (IOError, OSError): - return - + """ + Removes random cache entries if max_entries is reached at a ratio + of num_entries / cull_frequency. A value of 0 for CULL_FREQUENCY means + that the entire cache will be purged. + """ + filelist = self._list_cache_files() + num_entries = len(filelist) + if num_entries < self._max_entries: + return # return early if no culling is required if self._cull_frequency == 0: - doomed = filelist - else: - doomed = [os.path.join(self._dir, k) for (i, k) in enumerate(filelist) if i % self._cull_frequency == 0] - - for topdir in doomed: - try: - for root, _, files in os.walk(topdir): - for f in files: - self._delete(os.path.join(root, f)) - except (IOError, OSError): - pass + return self.clear() # Clear the cache when CULL_FREQUENCY = 0 + # Delete a random selection of entries + filelist = random.sample(filelist, + int(num_entries / self._cull_frequency)) + for fname in filelist: + self._delete(fname) def _createdir(self): - try: - os.makedirs(self._dir) - except OSError: - raise EnvironmentError("Cache directory '%s' does not exist and could not be created'" % self._dir) + if not os.path.exists(self._dir): + try: + os.makedirs(self._dir, 0o700) + except OSError as e: + if e.errno != errno.EEXIST: + raise EnvironmentError( + "Cache directory '%s' does not exist " + "and could not be created'" % self._dir) - def _key_to_file(self, key): + def _key_to_file(self, key, version=None): """ - Convert the filename into an md5 string. We'll turn the first couple - bits of the path into directory prefixes to be nice to filesystems - that have problems with large numbers of files in a directory. - - Thus, a cache key of "foo" gets turnned into a file named - ``{cache-dir}ac/bd/18db4cc2f85cedef654fccc4a4d8``. + Convert a key into a cache file path. Basically this is the + root cache path joined with the md5sum of the key and a suffix. """ - path = hashlib.md5(force_bytes(key)).hexdigest() - path = os.path.join(path[:2], path[2:4], path[4:]) - return os.path.join(self._dir, path) - - def _get_num_entries(self): - count = 0 - for _, _, files in os.walk(self._dir): - count += len(files) - return count - _num_entries = property(_get_num_entries) + key = self.make_key(key, version=version) + self.validate_key(key) + return os.path.join(self._dir, ''.join( + [hashlib.md5(force_bytes(key)).hexdigest(), self.cache_suffix])) def clear(self): - try: - shutil.rmtree(self._dir) - except (IOError, OSError): - pass + """ + Remove all the cache files. + """ + if not os.path.exists(self._dir): + return + for fname in self._list_cache_files(): + self._delete(fname) + + def _is_expired(self, f): + """ + Takes an open cache file and determines if it has expired, + deletes the file if it is has passed its expiry time. + """ + exp = pickle.load(f) + if exp is not None and exp < time.time(): + f.close() # On Windows a file has to be closed before deleting + self._delete(f.name) + return True + return False + + def _list_cache_files(self): + """ + Get a list of paths to all the cache files. These are all the files + in the root cache dir that end on the cache_suffix. + """ + if not os.path.exists(self._dir): + return [] + filelist = [os.path.join(self._dir, fname) for fname + in glob.glob1(self._dir, '*%s' % self.cache_suffix)] + return filelist + # For backwards compatibility diff --git a/docs/topics/cache.txt b/docs/topics/cache.txt index f61e3ffbd3..7f710613e9 100644 --- a/docs/topics/cache.txt +++ b/docs/topics/cache.txt @@ -253,10 +253,11 @@ model. Filesystem caching ------------------ -To store cached items on a filesystem, use -``"django.core.cache.backends.filebased.FileBasedCache"`` for -:setting:`BACKEND `. For example, to store cached data in -``/var/tmp/django_cache``, use this setting:: +The file-based backend serializes and stores each cache value as a separate +file. To use this backend set :setting:`BACKEND ` to +``"django.core.cache.backends.filebased.FileBasedCache"`` and +:setting:`LOCATION ` to a suitable directory. For example, +to store cached data in ``/var/tmp/django_cache``, use this setting:: CACHES = { 'default': { @@ -265,7 +266,6 @@ To store cached items on a filesystem, use } } - If you're on Windows, put the drive letter at the beginning of the path, like this:: @@ -286,10 +286,6 @@ above example, if your server runs as the user ``apache``, make sure the directory ``/var/tmp/django_cache`` exists and is readable and writable by the user ``apache``. -Each cache value will be stored as a separate file whose contents are the -cache data saved in a serialized ("pickled") format, using Python's ``pickle`` -module. Each file's name is the cache key, escaped for safe filesystem use. - Local-memory caching -------------------- diff --git a/tests/cache/tests.py b/tests/cache/tests.py index 053ee14a58..e4ef951295 100644 --- a/tests/cache/tests.py +++ b/tests/cache/tests.py @@ -1076,33 +1076,35 @@ class FileBasedCacheTests(unittest.TestCase, BaseCacheTests): def tearDown(self): self.cache.clear() - - def test_hashing(self): - """Test that keys are hashed into subdirectories correctly""" - self.cache.set("foo", "bar") - key = self.cache.make_key("foo") - keyhash = hashlib.md5(key.encode()).hexdigest() - keypath = os.path.join(self.dirname, keyhash[:2], keyhash[2:4], keyhash[4:]) - self.assertTrue(os.path.exists(keypath)) - - def test_subdirectory_removal(self): - """ - Make sure that the created subdirectories are correctly removed when empty. - """ - self.cache.set("foo", "bar") - key = self.cache.make_key("foo") - keyhash = hashlib.md5(key.encode()).hexdigest() - keypath = os.path.join(self.dirname, keyhash[:2], keyhash[2:4], keyhash[4:]) - self.assertTrue(os.path.exists(keypath)) - - self.cache.delete("foo") - self.assertTrue(not os.path.exists(keypath)) - self.assertTrue(not os.path.exists(os.path.dirname(keypath))) - self.assertTrue(not os.path.exists(os.path.dirname(os.path.dirname(keypath)))) + os.rmdir(self.dirname) def test_cull(self): self.perform_cull_test(50, 29) + def test_ignores_non_cache_files(self): + fname = os.path.join(self.dirname, 'not-a-cache-file') + with open(fname, 'w'): + os.utime(fname, None) + self.cache.clear() + self.assertTrue(os.path.exists(fname), + 'Expected cache.clear to ignore non cache files') + os.remove(fname) + + def test_clear_does_not_remove_cache_dir(self): + self.cache.clear() + self.assertTrue(os.path.exists(self.dirname), + 'Expected cache.clear to keep the cache dir') + + def test_creates_cache_dir_if_nonexistent(self): + os.rmdir(self.dirname) + self.cache.set('foo', 'bar') + os.path.exists(self.dirname) + + def test_zero_cull(self): + # Regression test for #15806 + self.cache = get_cache(self.backend_name, LOCATION=self.dirname, OPTIONS={'MAX_ENTRIES': 30, 'CULL_FREQUENCY': 0}) + self.perform_cull_test(50, 19) + class CustomCacheKeyValidationTests(unittest.TestCase): """