2005-10-09 07:19:21 +08:00
|
|
|
"""
|
2005-10-09 08:37:56 +08:00
|
|
|
This module contains helper functions for controlling caching. It does so by
|
|
|
|
managing the "Vary" header of responses. It includes functions to patch the
|
|
|
|
header of response objects directly and decorators that change functions to do
|
|
|
|
that header-patching themselves.
|
2005-10-09 07:19:21 +08:00
|
|
|
|
|
|
|
For information on the Vary header, see:
|
|
|
|
|
|
|
|
http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.44
|
|
|
|
|
|
|
|
Essentially, the "Vary" HTTP header defines which headers a cache should take
|
|
|
|
into account when building its cache key. Requests with the same path but
|
|
|
|
different header content for headers named in "Vary" need to get different
|
|
|
|
cache keys to prevent delivery of wrong content.
|
|
|
|
|
2007-10-29 06:32:25 +08:00
|
|
|
An example: i18n middleware would need to distinguish caches by the
|
2005-10-09 07:19:21 +08:00
|
|
|
"Accept-language" header.
|
|
|
|
"""
|
2012-08-13 18:09:20 +08:00
|
|
|
from __future__ import unicode_literals
|
2005-10-09 07:19:21 +08:00
|
|
|
|
2011-03-28 10:11:19 +08:00
|
|
|
import hashlib
|
2007-07-16 11:50:22 +08:00
|
|
|
import re
|
|
|
|
import time
|
2007-10-29 06:32:25 +08:00
|
|
|
|
2005-10-09 07:19:21 +08:00
|
|
|
from django.conf import settings
|
2010-12-22 15:52:44 +08:00
|
|
|
from django.core.cache import get_cache
|
2012-08-13 04:22:56 +08:00
|
|
|
from django.utils.encoding import iri_to_uri, force_text, smart_bytes
|
2007-10-31 11:59:40 +08:00
|
|
|
from django.utils.http import http_date
|
2011-11-18 21:01:06 +08:00
|
|
|
from django.utils.timezone import get_current_timezone_name
|
2010-03-01 18:19:01 +08:00
|
|
|
from django.utils.translation import get_language
|
2005-10-09 07:19:21 +08:00
|
|
|
|
2005-10-30 01:00:20 +08:00
|
|
|
cc_delim_re = re.compile(r'\s*,\s*')
|
2005-10-31 11:14:57 +08:00
|
|
|
|
2005-10-30 01:00:20 +08:00
|
|
|
def patch_cache_control(response, **kwargs):
|
|
|
|
"""
|
|
|
|
This function patches the Cache-Control header by adding all
|
|
|
|
keyword arguments to it. The transformation is as follows:
|
|
|
|
|
2005-10-31 11:14:57 +08:00
|
|
|
* All keyword parameter names are turned to lowercase, and underscores
|
|
|
|
are converted to hyphens.
|
|
|
|
* If the value of a parameter is True (exactly True, not just a
|
|
|
|
true value), only the parameter name is added to the header.
|
|
|
|
* All other parameters are added with their value, after applying
|
|
|
|
str() to it.
|
2005-10-30 01:00:20 +08:00
|
|
|
"""
|
|
|
|
def dictitem(s):
|
2007-10-29 06:32:25 +08:00
|
|
|
t = s.split('=', 1)
|
2005-10-30 01:00:20 +08:00
|
|
|
if len(t) > 1:
|
2007-09-15 05:42:26 +08:00
|
|
|
return (t[0].lower(), t[1])
|
2005-10-30 01:00:20 +08:00
|
|
|
else:
|
2007-09-15 05:42:26 +08:00
|
|
|
return (t[0].lower(), True)
|
2005-10-30 01:00:20 +08:00
|
|
|
|
|
|
|
def dictvalue(t):
|
2007-07-16 11:50:22 +08:00
|
|
|
if t[1] is True:
|
2005-10-30 01:00:20 +08:00
|
|
|
return t[0]
|
|
|
|
else:
|
2012-05-29 03:13:09 +08:00
|
|
|
return '%s=%s' % (t[0], t[1])
|
2005-10-30 01:00:20 +08:00
|
|
|
|
|
|
|
if response.has_header('Cache-Control'):
|
|
|
|
cc = cc_delim_re.split(response['Cache-Control'])
|
|
|
|
cc = dict([dictitem(el) for el in cc])
|
|
|
|
else:
|
|
|
|
cc = {}
|
2007-09-29 06:30:59 +08:00
|
|
|
|
|
|
|
# If there's already a max-age header but we're being asked to set a new
|
2008-08-16 00:38:41 +08:00
|
|
|
# max-age, use the minimum of the two ages. In practice this happens when
|
2007-09-29 06:30:59 +08:00
|
|
|
# a decorator and a piece of middleware both operate on a given view.
|
|
|
|
if 'max-age' in cc and 'max_age' in kwargs:
|
2012-08-14 20:36:11 +08:00
|
|
|
kwargs['max_age'] = min(int(cc['max-age']), kwargs['max_age'])
|
2007-09-29 06:30:59 +08:00
|
|
|
|
2011-08-23 11:51:10 +08:00
|
|
|
# Allow overriding private caching and vice versa
|
|
|
|
if 'private' in cc and 'public' in kwargs:
|
|
|
|
del cc['private']
|
|
|
|
elif 'public' in cc and 'private' in kwargs:
|
|
|
|
del cc['public']
|
|
|
|
|
2007-10-29 06:32:25 +08:00
|
|
|
for (k, v) in kwargs.items():
|
2005-10-30 01:00:20 +08:00
|
|
|
cc[k.replace('_', '-')] = v
|
|
|
|
cc = ', '.join([dictvalue(el) for el in cc.items()])
|
|
|
|
response['Cache-Control'] = cc
|
|
|
|
|
2007-11-30 00:57:18 +08:00
|
|
|
def get_max_age(response):
|
|
|
|
"""
|
|
|
|
Returns the max-age from the response Cache-Control header as an integer
|
|
|
|
(or ``None`` if it wasn't found or wasn't an integer.
|
|
|
|
"""
|
|
|
|
if not response.has_header('Cache-Control'):
|
|
|
|
return
|
|
|
|
cc = dict([_to_tuple(el) for el in
|
|
|
|
cc_delim_re.split(response['Cache-Control'])])
|
|
|
|
if 'max-age' in cc:
|
|
|
|
try:
|
|
|
|
return int(cc['max-age'])
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
pass
|
|
|
|
|
2011-09-08 21:25:31 +08:00
|
|
|
def _set_response_etag(response):
|
|
|
|
response['ETag'] = '"%s"' % hashlib.md5(response.content).hexdigest()
|
|
|
|
return response
|
|
|
|
|
2005-10-09 07:19:21 +08:00
|
|
|
def patch_response_headers(response, cache_timeout=None):
|
|
|
|
"""
|
|
|
|
Adds some useful headers to the given HttpResponse object:
|
|
|
|
ETag, Last-Modified, Expires and Cache-Control
|
|
|
|
|
|
|
|
Each header is only added if it isn't already set.
|
|
|
|
|
|
|
|
cache_timeout is in seconds. The CACHE_MIDDLEWARE_SECONDS setting is used
|
|
|
|
by default.
|
|
|
|
"""
|
|
|
|
if cache_timeout is None:
|
|
|
|
cache_timeout = settings.CACHE_MIDDLEWARE_SECONDS
|
2007-07-16 11:50:22 +08:00
|
|
|
if cache_timeout < 0:
|
|
|
|
cache_timeout = 0 # Can't have max-age negative
|
2010-12-13 06:57:17 +08:00
|
|
|
if settings.USE_ETAGS and not response.has_header('ETag'):
|
2011-09-08 21:25:31 +08:00
|
|
|
if hasattr(response, 'render') and callable(response.render):
|
|
|
|
response.add_post_render_callback(_set_response_etag)
|
|
|
|
else:
|
|
|
|
response = _set_response_etag(response)
|
2005-10-09 07:19:21 +08:00
|
|
|
if not response.has_header('Last-Modified'):
|
2007-10-31 11:59:40 +08:00
|
|
|
response['Last-Modified'] = http_date()
|
2005-10-09 07:19:21 +08:00
|
|
|
if not response.has_header('Expires'):
|
2007-10-31 11:59:40 +08:00
|
|
|
response['Expires'] = http_date(time.time() + cache_timeout)
|
2005-10-30 01:00:20 +08:00
|
|
|
patch_cache_control(response, max_age=cache_timeout)
|
2005-10-09 07:19:21 +08:00
|
|
|
|
2006-05-02 09:31:56 +08:00
|
|
|
def add_never_cache_headers(response):
|
|
|
|
"""
|
2007-10-29 06:32:25 +08:00
|
|
|
Adds headers to a response to indicate that a page should never be cached.
|
2006-05-02 09:31:56 +08:00
|
|
|
"""
|
|
|
|
patch_response_headers(response, cache_timeout=-1)
|
|
|
|
|
2005-10-09 07:19:21 +08:00
|
|
|
def patch_vary_headers(response, newheaders):
|
|
|
|
"""
|
|
|
|
Adds (or updates) the "Vary" header in the given HttpResponse object.
|
|
|
|
newheaders is a list of header names that should be in "Vary". Existing
|
|
|
|
headers in "Vary" aren't removed.
|
|
|
|
"""
|
|
|
|
# Note that we need to keep the original order intact, because cache
|
|
|
|
# implementations may rely on the order of the Vary contents in, say,
|
|
|
|
# computing an MD5 hash.
|
|
|
|
if response.has_header('Vary'):
|
2007-11-19 12:00:41 +08:00
|
|
|
vary_headers = cc_delim_re.split(response['Vary'])
|
|
|
|
else:
|
|
|
|
vary_headers = []
|
|
|
|
# Use .lower() here so we treat headers as case-insensitive.
|
|
|
|
existing_headers = set([header.lower() for header in vary_headers])
|
|
|
|
additional_headers = [newheader for newheader in newheaders
|
|
|
|
if newheader.lower() not in existing_headers]
|
|
|
|
response['Vary'] = ', '.join(vary_headers + additional_headers)
|
2005-10-09 07:19:21 +08:00
|
|
|
|
2011-02-01 08:20:31 +08:00
|
|
|
def has_vary_header(response, header_query):
|
|
|
|
"""
|
|
|
|
Checks to see if the response has a given header name in its Vary header.
|
|
|
|
"""
|
|
|
|
if not response.has_header('Vary'):
|
|
|
|
return False
|
|
|
|
vary_headers = cc_delim_re.split(response['Vary'])
|
|
|
|
existing_headers = set([header.lower() for header in vary_headers])
|
|
|
|
return header_query.lower() in existing_headers
|
|
|
|
|
2010-03-01 18:19:01 +08:00
|
|
|
def _i18n_cache_key_suffix(request, cache_key):
|
2011-11-18 21:01:06 +08:00
|
|
|
"""If necessary, adds the current locale or time zone to the cache key."""
|
2011-11-01 22:02:31 +08:00
|
|
|
if settings.USE_I18N or settings.USE_L10N:
|
2010-03-01 18:19:01 +08:00
|
|
|
# first check if LocaleMiddleware or another middleware added
|
|
|
|
# LANGUAGE_CODE to request, then fall back to the active language
|
|
|
|
# which in turn can also fall back to settings.LANGUAGE_CODE
|
|
|
|
cache_key += '.%s' % getattr(request, 'LANGUAGE_CODE', get_language())
|
2011-11-18 21:01:06 +08:00
|
|
|
if settings.USE_TZ:
|
2011-12-29 21:57:32 +08:00
|
|
|
# The datetime module doesn't restrict the output of tzname().
|
|
|
|
# Windows is known to use non-standard, locale-dependant names.
|
|
|
|
# User-defined tzinfo classes may return absolutely anything.
|
|
|
|
# Hence this paranoid conversion to create a valid cache key.
|
2012-07-21 16:00:10 +08:00
|
|
|
tz_name = force_text(get_current_timezone_name(), errors='ignore')
|
2012-08-13 18:09:20 +08:00
|
|
|
cache_key += '.%s' % tz_name.encode('ascii', 'ignore').decode('ascii').replace(' ', '_')
|
2010-03-01 18:19:01 +08:00
|
|
|
return cache_key
|
|
|
|
|
2010-10-29 09:31:15 +08:00
|
|
|
def _generate_cache_key(request, method, headerlist, key_prefix):
|
2007-10-29 06:32:25 +08:00
|
|
|
"""Returns a cache key from the headers given in the header list."""
|
2011-03-28 10:11:19 +08:00
|
|
|
ctx = hashlib.md5()
|
2005-10-09 07:19:21 +08:00
|
|
|
for header in headerlist:
|
|
|
|
value = request.META.get(header, None)
|
|
|
|
if value is not None:
|
|
|
|
ctx.update(value)
|
2012-08-13 04:22:56 +08:00
|
|
|
path = hashlib.md5(smart_bytes(iri_to_uri(request.get_full_path())))
|
2010-10-29 09:31:15 +08:00
|
|
|
cache_key = 'views.decorators.cache.cache_page.%s.%s.%s.%s' % (
|
2011-10-27 06:47:04 +08:00
|
|
|
key_prefix, method, path.hexdigest(), ctx.hexdigest())
|
2010-03-01 18:19:01 +08:00
|
|
|
return _i18n_cache_key_suffix(request, cache_key)
|
2005-10-09 07:19:21 +08:00
|
|
|
|
2009-03-31 06:37:59 +08:00
|
|
|
def _generate_cache_header_key(key_prefix, request):
|
|
|
|
"""Returns a cache key for the header cache."""
|
2012-08-13 04:22:56 +08:00
|
|
|
path = hashlib.md5(smart_bytes(iri_to_uri(request.get_full_path())))
|
2010-02-24 04:45:28 +08:00
|
|
|
cache_key = 'views.decorators.cache.cache_header.%s.%s' % (
|
|
|
|
key_prefix, path.hexdigest())
|
2010-03-01 18:19:01 +08:00
|
|
|
return _i18n_cache_key_suffix(request, cache_key)
|
2009-03-31 06:37:59 +08:00
|
|
|
|
2010-12-22 15:52:44 +08:00
|
|
|
def get_cache_key(request, key_prefix=None, method='GET', cache=None):
|
2005-10-09 07:19:21 +08:00
|
|
|
"""
|
2011-03-02 20:47:36 +08:00
|
|
|
Returns a cache key based on the request path and query. It can be used
|
|
|
|
in the request phase because it pulls the list of headers to take into
|
|
|
|
account from the global path registry and uses those to build a cache key
|
|
|
|
to check against.
|
2005-10-09 07:19:21 +08:00
|
|
|
|
|
|
|
If there is no headerlist stored, the page needs to be rebuilt, so this
|
|
|
|
function returns None.
|
|
|
|
"""
|
|
|
|
if key_prefix is None:
|
|
|
|
key_prefix = settings.CACHE_MIDDLEWARE_KEY_PREFIX
|
2009-03-31 06:37:59 +08:00
|
|
|
cache_key = _generate_cache_header_key(key_prefix, request)
|
2010-12-22 15:52:44 +08:00
|
|
|
if cache is None:
|
|
|
|
cache = get_cache(settings.CACHE_MIDDLEWARE_ALIAS)
|
2005-10-09 07:19:21 +08:00
|
|
|
headerlist = cache.get(cache_key, None)
|
|
|
|
if headerlist is not None:
|
2010-10-29 09:31:15 +08:00
|
|
|
return _generate_cache_key(request, method, headerlist, key_prefix)
|
2005-10-09 07:19:21 +08:00
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
2010-12-22 15:52:44 +08:00
|
|
|
def learn_cache_key(request, response, cache_timeout=None, key_prefix=None, cache=None):
|
2005-10-09 07:19:21 +08:00
|
|
|
"""
|
|
|
|
Learns what headers to take into account for some request path from the
|
|
|
|
response object. It stores those headers in a global path registry so that
|
|
|
|
later access to that path will know what headers to take into account
|
|
|
|
without building the response object itself. The headers are named in the
|
|
|
|
Vary header of the response, but we want to prevent response generation.
|
|
|
|
|
|
|
|
The list of headers to use for cache key generation is stored in the same
|
|
|
|
cache as the pages themselves. If the cache ages some data out of the
|
|
|
|
cache, this just means that we have to build the response once to get at
|
|
|
|
the Vary header and so at the list of headers to use for the cache key.
|
|
|
|
"""
|
|
|
|
if key_prefix is None:
|
|
|
|
key_prefix = settings.CACHE_MIDDLEWARE_KEY_PREFIX
|
|
|
|
if cache_timeout is None:
|
|
|
|
cache_timeout = settings.CACHE_MIDDLEWARE_SECONDS
|
2009-03-31 06:37:59 +08:00
|
|
|
cache_key = _generate_cache_header_key(key_prefix, request)
|
2010-12-22 15:52:44 +08:00
|
|
|
if cache is None:
|
|
|
|
cache = get_cache(settings.CACHE_MIDDLEWARE_ALIAS)
|
2005-10-09 07:19:21 +08:00
|
|
|
if response.has_header('Vary'):
|
2007-10-29 06:32:25 +08:00
|
|
|
headerlist = ['HTTP_'+header.upper().replace('-', '_')
|
2007-11-19 11:12:19 +08:00
|
|
|
for header in cc_delim_re.split(response['Vary'])]
|
2005-10-09 07:19:21 +08:00
|
|
|
cache.set(cache_key, headerlist, cache_timeout)
|
2010-10-29 09:31:15 +08:00
|
|
|
return _generate_cache_key(request, request.method, headerlist, key_prefix)
|
2005-10-09 07:19:21 +08:00
|
|
|
else:
|
|
|
|
# if there is no Vary header, we still need a cache key
|
2011-03-02 20:47:36 +08:00
|
|
|
# for the request.get_full_path()
|
2005-10-09 07:19:21 +08:00
|
|
|
cache.set(cache_key, [], cache_timeout)
|
2010-10-29 09:31:15 +08:00
|
|
|
return _generate_cache_key(request, request.method, [], key_prefix)
|
2007-11-30 00:57:18 +08:00
|
|
|
|
|
|
|
|
|
|
|
def _to_tuple(s):
|
|
|
|
t = s.split('=',1)
|
|
|
|
if len(t) == 2:
|
|
|
|
return t[0].lower(), t[1]
|
|
|
|
return t[0].lower(), True
|