diff --git a/django/middleware/cache.py b/django/middleware/cache.py index 7f4057eec7..8216c40ae1 100644 --- a/django/middleware/cache.py +++ b/django/middleware/cache.py @@ -1,88 +1,70 @@ +import copy from django.conf import settings from django.core.cache import cache +from django.utils.cache import get_cache_key, learn_cache_key, patch_response_headers from django.utils.httpwrappers import HttpResponseNotModified -from django.utils.text import compress_string -import datetime, md5 class CacheMiddleware: """ Cache middleware. If this is enabled, each Django-powered page will be - cached for CACHE_MIDDLEWARE_SECONDS seconds. Cache is based on URLs. Pages - with GET or POST parameters are not cached. + cached for CACHE_MIDDLEWARE_SECONDS seconds. Cache is based on URLs. - If the cache is shared across multiple sites using the same Django - installation, set the CACHE_MIDDLEWARE_KEY_PREFIX to the name of the site, - or some other string that is unique to this Django instance, to prevent key - collisions. + Only parameter-less GET or HEAD-requests with status code 200 are cached. - This middleware will also make the following optimizations: + This middleware expects that a HEAD request is answered with a response + exactly like the corresponding GET request. - * If the CACHE_MIDDLEWARE_GZIP setting is True, the content will be - gzipped. + When a hit occurs, a shallow copy of the original response object is + returned from process_request. - * ETags will be added, using a simple MD5 hash of the page's content. + Pages will be cached based on the contents of the request headers + listed in the response's "Vary" header. This means that pages shouldn't + change their "Vary" header. + + This middleware also sets ETag, Last-Modified, Expires and Cache-Control + headers on the response object. """ + def __init__(self, cache_timeout=None, key_prefix=None): + self.cache_timeout = cache_timeout + if cache_timeout is None: + self.cache_timeout = settings.CACHE_MIDDLEWARE_SECONDS + self.key_prefix = key_prefix + if key_prefix is None: + self.key_prefix = settings.CACHE_MIDDLEWARE_KEY_PREFIX + def process_request(self, request): - """ - Checks whether the page is already cached. If it is, returns the cached - version. Also handles ETag stuff. - """ - if request.GET or request.POST: - request._cache_middleware_set_cache = False + "Checks whether the page is already cached and returns the cached version if available." + if not request.META['REQUEST_METHOD'] in ('GET', 'HEAD') or request.GET: + request._cache_update_cache = False return None # Don't bother checking the cache. - accept_encoding = '' - if settings.CACHE_MIDDLEWARE_GZIP: - try: - accept_encoding = request.META['HTTP_ACCEPT_ENCODING'] - except KeyError: - pass - accepts_gzip = 'gzip' in accept_encoding - request._cache_middleware_accepts_gzip = accepts_gzip - - # This uses the same cache_key as views.decorators.cache.cache_page, - # so the cache can be shared. - cache_key = 'views.decorators.cache.cache_page.%s.%s.%s' % \ - (settings.CACHE_MIDDLEWARE_KEY_PREFIX, request.path, accepts_gzip) - request._cache_middleware_key = cache_key + cache_key = get_cache_key(request, self.key_prefix) + if cache_key is None: + request._cache_update_cache = True + return None # No cache information available, need to rebuild. response = cache.get(cache_key, None) if response is None: - request._cache_middleware_set_cache = True - return None - else: - request._cache_middleware_set_cache = False - # Logic is from http://simon.incutio.com/archive/2003/04/23/conditionalGet - try: - if_none_match = request.META['HTTP_IF_NONE_MATCH'] - except KeyError: - if_none_match = None - try: - if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE'] - except KeyError: - if_modified_since = None - if if_none_match is None and if_modified_since is None: - pass - elif if_none_match is not None and response['ETag'] != if_none_match: - pass - elif if_modified_since is not None and response['Last-Modified'] != if_modified_since: - pass - else: - return HttpResponseNotModified() - return response + request._cache_update_cache = True + return None # No cache information available, need to rebuild. + + request._cache_update_cache = False + return copy.copy(response) def process_response(self, request, response): - """ - Sets the cache, if needed. - """ - if request._cache_middleware_set_cache: - content = response.get_content_as_string(settings.DEFAULT_CHARSET) - if request._cache_middleware_accepts_gzip: - content = compress_string(content) - response.content = content - response['Content-Encoding'] = 'gzip' - response['ETag'] = md5.new(content).hexdigest() - response['Content-Length'] = '%d' % len(content) - response['Last-Modified'] = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT') - cache.set(request._cache_middleware_key, response, settings.CACHE_MIDDLEWARE_SECONDS) + "Sets the cache, if needed." + if not request._cache_update_cache: + # We don't need to update the cache, just return. + return response + if not request.META['REQUEST_METHOD'] == 'GET': + # This is a stronger requirement than above. It is needed + # because of interactions between this middleware and the + # HTTPMiddleware, which throws the body of a HEAD-request + # away before this middleware gets a chance to cache it. + return response + if not response.status_code == 200: + return response + patch_response_headers(response, self.cache_timeout) + cache_key = learn_cache_key(request, response, self.cache_timeout, self.key_prefix) + cache.set(cache_key, response, self.cache_timeout) return response diff --git a/django/middleware/gzip.py b/django/middleware/gzip.py new file mode 100644 index 0000000000..201bec2000 --- /dev/null +++ b/django/middleware/gzip.py @@ -0,0 +1,24 @@ +import re +from django.utils.text import compress_string +from django.utils.cache import patch_vary_headers + +re_accepts_gzip = re.compile(r'\bgzip\b') + +class GZipMiddleware: + """ + This middleware compresses content if the browser allows gzip compression. + It sets the Vary header accordingly, so that caches will base their storage + on the Accept-Encoding header. + """ + def process_response(self, request, response): + patch_vary_headers(response, ('Accept-Encoding',)) + if response.has_header('Content-Encoding'): + return response + + ae = request.META.get('HTTP_ACCEPT_ENCODING', '') + if not re_accepts_gzip.search(ae): + return response + + response.content = compress_string(response.content) + response['Content-Encoding'] = 'gzip' + return response diff --git a/django/middleware/http.py b/django/middleware/http.py new file mode 100644 index 0000000000..2bccd60903 --- /dev/null +++ b/django/middleware/http.py @@ -0,0 +1,37 @@ +import datetime + +class ConditionalGetMiddleware: + """ + Handles conditional GET operations. If the response has a ETag or + Last-Modified header, and the request has If-None-Match or + If-Modified-Since, the response is replaced by an HttpNotModified. + + Removes the content from any response to a HEAD request. + + Also sets the Date and Content-Length response-headers. + """ + def process_response(self, request, response): + now = datetime.datetime.utcnow() + response['Date'] = now.strftime('%a, %d %b %Y %H:%M:%S GMT') + if not response.has_header('Content-Length'): + response['Content-Length'] = str(len(response.content)) + + if response.has_header('ETag'): + if_none_match = request.META.get('HTTP_IF_NONE_MATCH', None) + if if_none_match == response['ETag']: + response.status_code = 304 + response.content = '' + response['Content-Length'] = '0' + + if response.has_header('Last-Modified'): + last_mod = response['Last-Modified'] + if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE', None) + if if_modified_since == response['Last-Modified']: + response.status_code = 304 + response.content = '' + response['Content-Length'] = '0' + + if request.META['REQUEST_METHOD'] == 'HEAD': + response.content = '' + + return response diff --git a/django/middleware/sessions.py b/django/middleware/sessions.py index a588e3e95b..42b2118410 100644 --- a/django/middleware/sessions.py +++ b/django/middleware/sessions.py @@ -1,5 +1,6 @@ from django.conf.settings import SESSION_COOKIE_NAME, SESSION_COOKIE_AGE, SESSION_COOKIE_DOMAIN from django.models.core import sessions +from django.utils.cache import patch_vary_headers import datetime TEST_COOKIE_NAME = 'testcookie' @@ -61,6 +62,7 @@ class SessionMiddleware: def process_response(self, request, response): # If request.session was modified, or if response.session was set, save # those changes and set a session cookie. + patch_vary_headers(response, ('Cookie',)) try: modified = request.session.modified except AttributeError: diff --git a/django/views/decorators/cache.py b/django/views/decorators/cache.py index de80851363..09f9a0139f 100644 --- a/django/views/decorators/cache.py +++ b/django/views/decorators/cache.py @@ -1,57 +1,17 @@ -from django.core.cache import cache -from django.utils.httpwrappers import HttpResponseNotModified -from django.utils.text import compress_string -from django.conf.settings import DEFAULT_CHARSET -import datetime, md5 +""" +Decorator for views that tries getting the page from the cache and +populates the cache if the page isn't in the cache yet. -def cache_page(view_func, cache_timeout, key_prefix=''): - """ - Decorator for views that tries getting the page from the cache and - populates the cache if the page isn't in the cache yet. Also takes care - of ETags and gzips the page if the client supports it. +The cache is keyed by the URL and some data from the headers. Additionally +there is the key prefix that is used to distinguish different cache areas +in a multi-site setup. You could use the sites.get_current().domain, for +example, as that is unique across a Django project. - The cache is keyed off of the page's URL plus the optional key_prefix - variable. Use key_prefix if your Django setup has multiple sites that - use cache; otherwise the cache for one site would affect the other. A good - example of key_prefix is to use sites.get_current().domain, because that's - unique across all Django instances on a particular server. - """ - def _check_cache(request, *args, **kwargs): - try: - accept_encoding = request.META['HTTP_ACCEPT_ENCODING'] - except KeyError: - accept_encoding = '' - accepts_gzip = 'gzip' in accept_encoding - cache_key = 'views.decorators.cache.cache_page.%s.%s.%s' % (key_prefix, request.path, accepts_gzip) - response = cache.get(cache_key, None) - if response is None: - response = view_func(request, *args, **kwargs) - content = response.get_content_as_string(DEFAULT_CHARSET) - if accepts_gzip: - content = compress_string(content) - response.content = content - response['Content-Encoding'] = 'gzip' - response['ETag'] = md5.new(content).hexdigest() - response['Content-Length'] = '%d' % len(content) - response['Last-Modified'] = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT') - cache.set(cache_key, response, cache_timeout) - else: - # Logic is from http://simon.incutio.com/archive/2003/04/23/conditionalGet - try: - if_none_match = request.META['HTTP_IF_NONE_MATCH'] - except KeyError: - if_none_match = None - try: - if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE'] - except KeyError: - if_modified_since = None - if if_none_match is None and if_modified_since is None: - pass - elif if_none_match is not None and response['ETag'] != if_none_match: - pass - elif if_modified_since is not None and response['Last-Modified'] != if_modified_since: - pass - else: - return HttpResponseNotModified() - return response - return _check_cache +Additionally, all headers from the response's Vary header will be taken into +account on caching -- just like the middleware does. +""" + +from django.utils.decorators import decorator_from_middleware +from django.middleware.cache import CacheMiddleware + +cache_page = decorator_from_middleware(CacheMiddleware) diff --git a/django/views/decorators/gzip.py b/django/views/decorators/gzip.py new file mode 100644 index 0000000000..dc6edad049 --- /dev/null +++ b/django/views/decorators/gzip.py @@ -0,0 +1,6 @@ +"Decorator for views that gzips pages if the client supports it." + +from django.utils.decorators import decorator_from_middleware +from django.middleware.gzip import GZipMiddleware + +gzip_page = decorator_from_middleware(GZipMiddleware) diff --git a/django/views/decorators/http.py b/django/views/decorators/http.py new file mode 100644 index 0000000000..13062b630f --- /dev/null +++ b/django/views/decorators/http.py @@ -0,0 +1,9 @@ +""" +Decorator for views that supports conditional get on ETag and Last-Modified +headers. +""" + +from django.utils.decorators import decorator_from_middleware +from django.middleware.http import ConditionalGetMiddleware + +conditional_page = decorator_from_middleware(ConditionalGetMiddleware) diff --git a/docs/cache.txt b/docs/cache.txt index 0a7ee1c25a..f690e5f904 100644 --- a/docs/cache.txt +++ b/docs/cache.txt @@ -2,25 +2,27 @@ Django's cache framework ======================== -So, you got slashdotted. Now what? +So, you got slashdotted_. Now what? Django's cache framework gives you three methods of caching dynamic pages in memory or in a database. You can cache the output of entire pages, you can cache only the pieces that are difficult to produce, or you can cache your entire site. +.. _slashdotted: http://en.wikipedia.org/wiki/Slashdot_effect + Setting up the cache ==================== -The cache framework is split into a set of "backends" that provide different -methods of caching data. There's a simple single-process memory cache (mostly -useful as a fallback) and a memcached_ backend (the fastest option, by far, if -you've got the RAM). +The cache framework allows for different "backends" -- different methods of +caching data. There's a simple single-process memory cache (mostly useful as a +fallback) and a memcached_ backend (the fastest option, by far, if you've got +the RAM). Before using the cache, you'll need to tell Django which cache backend you'd like to use. Do this by setting the ``CACHE_BACKEND`` in your settings file. -The CACHE_BACKEND setting is a "fake" URI (really an unregistered scheme). +The ``CACHE_BACKEND`` setting is a "fake" URI (really an unregistered scheme). Examples: ============================== =========================================== @@ -39,7 +41,7 @@ Examples: simple:/// A simple single-process memory cache; you probably don't want to use this except for testing. Note that this cache backend is - NOT threadsafe! + NOT thread-safe! locmem:/// A more sophisticated local memory cache; this is multi-process- and thread-safe. @@ -72,22 +74,24 @@ For example:: Invalid arguments are silently ignored, as are invalid values of known arguments. +.. _memcached: http://www.danga.com/memcached/ + The per-site cache ================== -Once the cache is set up, the simplest way to use the cache is to simply -cache your entire site. Just add ``django.middleware.cache.CacheMiddleware`` -to your ``MIDDLEWARE_CLASSES`` setting, as in this example:: +Once the cache is set up, the simplest way to use the cache is to cache your +entire site. Just add ``django.middleware.cache.CacheMiddleware`` to your +``MIDDLEWARE_CLASSES`` setting, as in this example:: MIDDLEWARE_CLASSES = ( "django.middleware.cache.CacheMiddleware", "django.middleware.common.CommonMiddleware", ) -Make sure it's the first entry in ``MIDDLEWARE_CLASSES``. (The order of -``MIDDLEWARE_CLASSES`` matters.) +(The order of ``MIDDLEWARE_CLASSES`` matters. See "Order of MIDDLEWARE_CLASSES" +below.) -Then, add the following three required settings: +Then, add the following three required settings to your Django settings file: * ``CACHE_MIDDLEWARE_SECONDS`` -- The number of seconds each page should be cached. @@ -102,16 +106,20 @@ Then, add the following three required settings: in the cache. That means subsequent requests won't have the overhead of zipping, and the cache will hold more pages because each one is smaller. -Pages with GET or POST parameters won't be cached. +The cache middleware caches every page that doesn't have GET or POST +parameters. Additionally, ``CacheMiddleware`` automatically sets a few headers +in each ``HttpResponse``: -The cache middleware also makes a few more optimizations: - -* Sets and deals with ``ETag`` headers. -* Sets the ``Content-Length`` header. * Sets the ``Last-Modified`` header to the current date/time when a fresh (uncached) version of the page is requested. +* Sets the ``Expires`` header to the current date/time plus the defined + ``CACHE_MIDDLEWARE_SECONDS``. +* Sets the ``Cache-Control`` header to give a max age for the page -- again, + from the ``CACHE_MIDDLEWARE_SECONDS`` setting. -It doesn't matter where in the middleware stack you put the cache middleware. +See the `middleware documentation`_ for more on middleware. + +.. _`middleware documentation`: http://www.djangoproject.com/documentation/middleware/ The per-page cache ================== @@ -134,25 +142,25 @@ Or, using Python 2.4's decorator syntax:: def slashdot_this(request): ... -This will cache the result of that view for 15 minutes. (The cache timeout is -in seconds.) +``cache_page`` takes a single argument: the cache timeout, in seconds. In the +above example, the result of the ``slashdot_this()`` view will be cached for 15 +minutes. The low-level cache API ======================= -There are times, however, that caching an entire rendered page doesn't gain -you very much. The Django developers have found it's only necessary to cache a -list of object IDs from an intensive database query, for example. In cases like -these, you can use the cache API to store objects in the cache with any level -of granularity you like. +Sometimes, however, caching an entire rendered page doesn't gain you very much. +For example, you may find it's only necessary to cache the result of an +intensive database. In cases like this, you can use the low-level cache API to +store objects in the cache with any level of granularity you like. The cache API is simple:: - # the cache module exports a cache object that's automatically - # created from the CACHE_BACKEND setting + # The cache module exports a cache object that's automatically + # created from the CACHE_BACKEND setting. >>> from django.core.cache import cache - # The basic interface is set(key, value, timeout_seconds) and get(key) + # The basic interface is set(key, value, timeout_seconds) and get(key). >>> cache.set('my_key', 'hello, world!', 30) >>> cache.get('my_key') 'hello, world!' @@ -161,7 +169,7 @@ The cache API is simple:: >>> cache.get('my_key') None - # get() can take a default argument + # get() can take a default argument. >>> cache.get('my_key', 'has_expired') 'has_expired' @@ -183,4 +191,108 @@ The cache API is simple:: That's it. The cache has very few restrictions: You can cache any object that can be pickled safely, although keys must be strings. -.. _memcached: http://www.danga.com/memcached/ +Controlling cache: Using Vary headers +===================================== + +The Django cache framework works with `HTTP Vary headers`_ to allow developers +to instruct caching mechanisms to differ their cache contents depending on +request HTTP headers. + +Essentially, the ``Vary`` response HTTP header defines which request headers a +cache mechanism should take into account when building its cache key. + +By default, Django's cache system creates its cache keys using the requested +path -- e.g., ``"/stories/2005/jun/23/bank_robbed/"``. This means every request +to that URL will use the same cached version, regardless of user-agent +differences such as cookies or language preferences. + +That's where ``Vary`` comes in. + +If your Django-powered page outputs different content based on some difference +in request headers -- such as a cookie, or language, or user-agent -- you'll +need to use the ``Vary`` header to tell caching mechanisms that the page output +depends on those things. + +To do this in Django, use the convenient ``vary_on_headers`` view decorator, +like so:: + + from django.views.decorators.vary import vary_on_headers + + # Python 2.3 syntax. + def my_view(request): + ... + my_view = vary_on_headers(my_view, 'User-Agent') + + # Python 2.4 decorator syntax. + @vary_on_headers('User-Agent') + def my_view(request): + ... + +In this case, a caching mechanism (such as Django's own cache middleware) will +cache a separate version of the page for each unique user-agent. + +The advantage to using the ``vary_on_headers`` decorator rather than manually +setting the ``Vary`` header (using something like +``response['Vary'] = 'user-agent'``) is that the decorator adds to the ``Vary`` +header (which may already exist) rather than setting it from scratch. + +Note that you can pass multiple headers to ``vary_on_headers()``: + + @vary_on_headers('User-Agent', 'Cookie') + def my_view(request): + ... + +Because varying on cookie is such a common case, there's a ``vary_on_cookie`` +decorator. These two views are equivalent:: + + @vary_on_cookie + def my_view(request): + ... + + @vary_on_headers('Cookie') + def my_view(request): + ... + +Also note that the headers you pass to ``vary_on_headers`` are not case +sensitive. ``"User-Agent"`` is the same thing as ``"user-agent"``. + +You can also use a helper function, ``patch_vary_headers()``, directly:: + + from django.utils.cache import patch_vary_headers + def my_view(request): + ... + response = render_to_response('template_name', context) + patch_vary_headers(response, ['Cookie']) + return response + +``patch_vary_headers`` takes an ``HttpResponse`` instance as its first argument +and a list/tuple of header names as its second argument. + +.. _`HTTP Vary headers`: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.44 + +Other optimizations +=================== + +Django comes with a few other pieces of middleware that can help optimize your +apps' performance: + + * ``django.middleware.http.ConditionalGetMiddleware`` adds support for + conditional GET. This makes use of ``ETag`` and ``Last-Modified`` + headers. + + * ``django.middleware.gzip.GZipMiddleware`` compresses content for browsers + that understand gzip compression (all modern browsers). + +Order of MIDDLEWARE_CLASSES +=========================== + +If you use ``CacheMiddleware``, it's important to put it in the right place +within the ``MIDDLEWARE_CLASSES`` setting, because the cache middleware needs +to know which headers by which to vary the cache storage. Middleware always +adds something the ``Vary`` response header when it can. + +Put the ``CacheMiddleware`` after any middlewares that might add something to +the ``Vary`` header. The following middlewares do so: + + * ``SessionMiddleware`` adds ``Cookie`` + * ``GzipMiddleware`` adds ``Accept-Encoding`` diff --git a/docs/middleware.txt b/docs/middleware.txt index f3901bb693..21e62fa18c 100644 --- a/docs/middleware.txt +++ b/docs/middleware.txt @@ -88,6 +88,18 @@ Available middleware addresses defined in the ``INTERNAL_IPS`` setting. This is used by Django's automatic documentation system. +``django.middleware.gzip.GZipMiddleware`` + Compresses content for browsers that understand gzip compression (all + modern browsers). + +``django.middleware.http.ConditionalGetMiddleware`` + Handles conditional GET operations. If the response has a ``ETag`` or + ``Last-Modified`` header, and the request has ``If-None-Match`` or + ``If-Modified-Since``, the response is replaced by an HttpNotModified. + + Also removes the content from any response to a HEAD request and sets the + ``Date`` and ``Content-Length`` response-headers. + ``django.middleware.sessions.SessionMiddleware`` Enables session support. See the `session documentation`_.