Fixed #580 -- Added mega support for generating Vary headers, including some view decorators, and changed the CacheMiddleware to account for the Vary header. Also added GZipMiddleware and ConditionalGetMiddleware, which are no longer handled by CacheMiddleware itself. Also updated the cache.txt and middleware.txt docs. Thanks to Hugo and Sune for the excellent patches

git-svn-id: http://code.djangoproject.com/svn/django/trunk@810 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Adrian Holovaty 2005-10-09 00:55:08 +00:00
parent a5a89b5a43
commit d65526d688
9 changed files with 297 additions and 153 deletions

View File

@ -1,88 +1,70 @@
import copy
from django.conf import settings
from django.core.cache import cache
from django.utils.cache import get_cache_key, learn_cache_key, patch_response_headers
from django.utils.httpwrappers import HttpResponseNotModified
from django.utils.text import compress_string
import datetime, md5
class CacheMiddleware:
"""
Cache middleware. If this is enabled, each Django-powered page will be
cached for CACHE_MIDDLEWARE_SECONDS seconds. Cache is based on URLs. Pages
with GET or POST parameters are not cached.
cached for CACHE_MIDDLEWARE_SECONDS seconds. Cache is based on URLs.
If the cache is shared across multiple sites using the same Django
installation, set the CACHE_MIDDLEWARE_KEY_PREFIX to the name of the site,
or some other string that is unique to this Django instance, to prevent key
collisions.
Only parameter-less GET or HEAD-requests with status code 200 are cached.
This middleware will also make the following optimizations:
This middleware expects that a HEAD request is answered with a response
exactly like the corresponding GET request.
* If the CACHE_MIDDLEWARE_GZIP setting is True, the content will be
gzipped.
When a hit occurs, a shallow copy of the original response object is
returned from process_request.
* ETags will be added, using a simple MD5 hash of the page's content.
Pages will be cached based on the contents of the request headers
listed in the response's "Vary" header. This means that pages shouldn't
change their "Vary" header.
This middleware also sets ETag, Last-Modified, Expires and Cache-Control
headers on the response object.
"""
def __init__(self, cache_timeout=None, key_prefix=None):
self.cache_timeout = cache_timeout
if cache_timeout is None:
self.cache_timeout = settings.CACHE_MIDDLEWARE_SECONDS
self.key_prefix = key_prefix
if key_prefix is None:
self.key_prefix = settings.CACHE_MIDDLEWARE_KEY_PREFIX
def process_request(self, request):
"""
Checks whether the page is already cached. If it is, returns the cached
version. Also handles ETag stuff.
"""
if request.GET or request.POST:
request._cache_middleware_set_cache = False
"Checks whether the page is already cached and returns the cached version if available."
if not request.META['REQUEST_METHOD'] in ('GET', 'HEAD') or request.GET:
request._cache_update_cache = False
return None # Don't bother checking the cache.
accept_encoding = ''
if settings.CACHE_MIDDLEWARE_GZIP:
try:
accept_encoding = request.META['HTTP_ACCEPT_ENCODING']
except KeyError:
pass
accepts_gzip = 'gzip' in accept_encoding
request._cache_middleware_accepts_gzip = accepts_gzip
# This uses the same cache_key as views.decorators.cache.cache_page,
# so the cache can be shared.
cache_key = 'views.decorators.cache.cache_page.%s.%s.%s' % \
(settings.CACHE_MIDDLEWARE_KEY_PREFIX, request.path, accepts_gzip)
request._cache_middleware_key = cache_key
cache_key = get_cache_key(request, self.key_prefix)
if cache_key is None:
request._cache_update_cache = True
return None # No cache information available, need to rebuild.
response = cache.get(cache_key, None)
if response is None:
request._cache_middleware_set_cache = True
return None
else:
request._cache_middleware_set_cache = False
# Logic is from http://simon.incutio.com/archive/2003/04/23/conditionalGet
try:
if_none_match = request.META['HTTP_IF_NONE_MATCH']
except KeyError:
if_none_match = None
try:
if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
except KeyError:
if_modified_since = None
if if_none_match is None and if_modified_since is None:
pass
elif if_none_match is not None and response['ETag'] != if_none_match:
pass
elif if_modified_since is not None and response['Last-Modified'] != if_modified_since:
pass
else:
return HttpResponseNotModified()
return response
request._cache_update_cache = True
return None # No cache information available, need to rebuild.
request._cache_update_cache = False
return copy.copy(response)
def process_response(self, request, response):
"""
Sets the cache, if needed.
"""
if request._cache_middleware_set_cache:
content = response.get_content_as_string(settings.DEFAULT_CHARSET)
if request._cache_middleware_accepts_gzip:
content = compress_string(content)
response.content = content
response['Content-Encoding'] = 'gzip'
response['ETag'] = md5.new(content).hexdigest()
response['Content-Length'] = '%d' % len(content)
response['Last-Modified'] = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
cache.set(request._cache_middleware_key, response, settings.CACHE_MIDDLEWARE_SECONDS)
"Sets the cache, if needed."
if not request._cache_update_cache:
# We don't need to update the cache, just return.
return response
if not request.META['REQUEST_METHOD'] == 'GET':
# This is a stronger requirement than above. It is needed
# because of interactions between this middleware and the
# HTTPMiddleware, which throws the body of a HEAD-request
# away before this middleware gets a chance to cache it.
return response
if not response.status_code == 200:
return response
patch_response_headers(response, self.cache_timeout)
cache_key = learn_cache_key(request, response, self.cache_timeout, self.key_prefix)
cache.set(cache_key, response, self.cache_timeout)
return response

24
django/middleware/gzip.py Normal file
View File

@ -0,0 +1,24 @@
import re
from django.utils.text import compress_string
from django.utils.cache import patch_vary_headers
re_accepts_gzip = re.compile(r'\bgzip\b')
class GZipMiddleware:
"""
This middleware compresses content if the browser allows gzip compression.
It sets the Vary header accordingly, so that caches will base their storage
on the Accept-Encoding header.
"""
def process_response(self, request, response):
patch_vary_headers(response, ('Accept-Encoding',))
if response.has_header('Content-Encoding'):
return response
ae = request.META.get('HTTP_ACCEPT_ENCODING', '')
if not re_accepts_gzip.search(ae):
return response
response.content = compress_string(response.content)
response['Content-Encoding'] = 'gzip'
return response

37
django/middleware/http.py Normal file
View File

@ -0,0 +1,37 @@
import datetime
class ConditionalGetMiddleware:
"""
Handles conditional GET operations. If the response has a ETag or
Last-Modified header, and the request has If-None-Match or
If-Modified-Since, the response is replaced by an HttpNotModified.
Removes the content from any response to a HEAD request.
Also sets the Date and Content-Length response-headers.
"""
def process_response(self, request, response):
now = datetime.datetime.utcnow()
response['Date'] = now.strftime('%a, %d %b %Y %H:%M:%S GMT')
if not response.has_header('Content-Length'):
response['Content-Length'] = str(len(response.content))
if response.has_header('ETag'):
if_none_match = request.META.get('HTTP_IF_NONE_MATCH', None)
if if_none_match == response['ETag']:
response.status_code = 304
response.content = ''
response['Content-Length'] = '0'
if response.has_header('Last-Modified'):
last_mod = response['Last-Modified']
if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE', None)
if if_modified_since == response['Last-Modified']:
response.status_code = 304
response.content = ''
response['Content-Length'] = '0'
if request.META['REQUEST_METHOD'] == 'HEAD':
response.content = ''
return response

View File

@ -1,5 +1,6 @@
from django.conf.settings import SESSION_COOKIE_NAME, SESSION_COOKIE_AGE, SESSION_COOKIE_DOMAIN
from django.models.core import sessions
from django.utils.cache import patch_vary_headers
import datetime
TEST_COOKIE_NAME = 'testcookie'
@ -61,6 +62,7 @@ class SessionMiddleware:
def process_response(self, request, response):
# If request.session was modified, or if response.session was set, save
# those changes and set a session cookie.
patch_vary_headers(response, ('Cookie',))
try:
modified = request.session.modified
except AttributeError:

View File

@ -1,57 +1,17 @@
from django.core.cache import cache
from django.utils.httpwrappers import HttpResponseNotModified
from django.utils.text import compress_string
from django.conf.settings import DEFAULT_CHARSET
import datetime, md5
"""
Decorator for views that tries getting the page from the cache and
populates the cache if the page isn't in the cache yet.
def cache_page(view_func, cache_timeout, key_prefix=''):
"""
Decorator for views that tries getting the page from the cache and
populates the cache if the page isn't in the cache yet. Also takes care
of ETags and gzips the page if the client supports it.
The cache is keyed by the URL and some data from the headers. Additionally
there is the key prefix that is used to distinguish different cache areas
in a multi-site setup. You could use the sites.get_current().domain, for
example, as that is unique across a Django project.
The cache is keyed off of the page's URL plus the optional key_prefix
variable. Use key_prefix if your Django setup has multiple sites that
use cache; otherwise the cache for one site would affect the other. A good
example of key_prefix is to use sites.get_current().domain, because that's
unique across all Django instances on a particular server.
"""
def _check_cache(request, *args, **kwargs):
try:
accept_encoding = request.META['HTTP_ACCEPT_ENCODING']
except KeyError:
accept_encoding = ''
accepts_gzip = 'gzip' in accept_encoding
cache_key = 'views.decorators.cache.cache_page.%s.%s.%s' % (key_prefix, request.path, accepts_gzip)
response = cache.get(cache_key, None)
if response is None:
response = view_func(request, *args, **kwargs)
content = response.get_content_as_string(DEFAULT_CHARSET)
if accepts_gzip:
content = compress_string(content)
response.content = content
response['Content-Encoding'] = 'gzip'
response['ETag'] = md5.new(content).hexdigest()
response['Content-Length'] = '%d' % len(content)
response['Last-Modified'] = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
cache.set(cache_key, response, cache_timeout)
else:
# Logic is from http://simon.incutio.com/archive/2003/04/23/conditionalGet
try:
if_none_match = request.META['HTTP_IF_NONE_MATCH']
except KeyError:
if_none_match = None
try:
if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
except KeyError:
if_modified_since = None
if if_none_match is None and if_modified_since is None:
pass
elif if_none_match is not None and response['ETag'] != if_none_match:
pass
elif if_modified_since is not None and response['Last-Modified'] != if_modified_since:
pass
else:
return HttpResponseNotModified()
return response
return _check_cache
Additionally, all headers from the response's Vary header will be taken into
account on caching -- just like the middleware does.
"""
from django.utils.decorators import decorator_from_middleware
from django.middleware.cache import CacheMiddleware
cache_page = decorator_from_middleware(CacheMiddleware)

View File

@ -0,0 +1,6 @@
"Decorator for views that gzips pages if the client supports it."
from django.utils.decorators import decorator_from_middleware
from django.middleware.gzip import GZipMiddleware
gzip_page = decorator_from_middleware(GZipMiddleware)

View File

@ -0,0 +1,9 @@
"""
Decorator for views that supports conditional get on ETag and Last-Modified
headers.
"""
from django.utils.decorators import decorator_from_middleware
from django.middleware.http import ConditionalGetMiddleware
conditional_page = decorator_from_middleware(ConditionalGetMiddleware)

View File

@ -2,25 +2,27 @@
Django's cache framework
========================
So, you got slashdotted. Now what?
So, you got slashdotted_. Now what?
Django's cache framework gives you three methods of caching dynamic pages in
memory or in a database. You can cache the output of entire pages, you can
cache only the pieces that are difficult to produce, or you can cache your
entire site.
.. _slashdotted: http://en.wikipedia.org/wiki/Slashdot_effect
Setting up the cache
====================
The cache framework is split into a set of "backends" that provide different
methods of caching data. There's a simple single-process memory cache (mostly
useful as a fallback) and a memcached_ backend (the fastest option, by far, if
you've got the RAM).
The cache framework allows for different "backends" -- different methods of
caching data. There's a simple single-process memory cache (mostly useful as a
fallback) and a memcached_ backend (the fastest option, by far, if you've got
the RAM).
Before using the cache, you'll need to tell Django which cache backend you'd
like to use. Do this by setting the ``CACHE_BACKEND`` in your settings file.
The CACHE_BACKEND setting is a "fake" URI (really an unregistered scheme).
The ``CACHE_BACKEND`` setting is a "fake" URI (really an unregistered scheme).
Examples:
============================== ===========================================
@ -39,7 +41,7 @@ Examples:
simple:/// A simple single-process memory cache; you
probably don't want to use this except for
testing. Note that this cache backend is
NOT threadsafe!
NOT thread-safe!
locmem:/// A more sophisticated local memory cache;
this is multi-process- and thread-safe.
@ -72,22 +74,24 @@ For example::
Invalid arguments are silently ignored, as are invalid values of known
arguments.
.. _memcached: http://www.danga.com/memcached/
The per-site cache
==================
Once the cache is set up, the simplest way to use the cache is to simply
cache your entire site. Just add ``django.middleware.cache.CacheMiddleware``
to your ``MIDDLEWARE_CLASSES`` setting, as in this example::
Once the cache is set up, the simplest way to use the cache is to cache your
entire site. Just add ``django.middleware.cache.CacheMiddleware`` to your
``MIDDLEWARE_CLASSES`` setting, as in this example::
MIDDLEWARE_CLASSES = (
"django.middleware.cache.CacheMiddleware",
"django.middleware.common.CommonMiddleware",
)
Make sure it's the first entry in ``MIDDLEWARE_CLASSES``. (The order of
``MIDDLEWARE_CLASSES`` matters.)
(The order of ``MIDDLEWARE_CLASSES`` matters. See "Order of MIDDLEWARE_CLASSES"
below.)
Then, add the following three required settings:
Then, add the following three required settings to your Django settings file:
* ``CACHE_MIDDLEWARE_SECONDS`` -- The number of seconds each page should be
cached.
@ -102,16 +106,20 @@ Then, add the following three required settings:
in the cache. That means subsequent requests won't have the overhead of
zipping, and the cache will hold more pages because each one is smaller.
Pages with GET or POST parameters won't be cached.
The cache middleware caches every page that doesn't have GET or POST
parameters. Additionally, ``CacheMiddleware`` automatically sets a few headers
in each ``HttpResponse``:
The cache middleware also makes a few more optimizations:
* Sets and deals with ``ETag`` headers.
* Sets the ``Content-Length`` header.
* Sets the ``Last-Modified`` header to the current date/time when a fresh
(uncached) version of the page is requested.
* Sets the ``Expires`` header to the current date/time plus the defined
``CACHE_MIDDLEWARE_SECONDS``.
* Sets the ``Cache-Control`` header to give a max age for the page -- again,
from the ``CACHE_MIDDLEWARE_SECONDS`` setting.
It doesn't matter where in the middleware stack you put the cache middleware.
See the `middleware documentation`_ for more on middleware.
.. _`middleware documentation`: http://www.djangoproject.com/documentation/middleware/
The per-page cache
==================
@ -134,25 +142,25 @@ Or, using Python 2.4's decorator syntax::
def slashdot_this(request):
...
This will cache the result of that view for 15 minutes. (The cache timeout is
in seconds.)
``cache_page`` takes a single argument: the cache timeout, in seconds. In the
above example, the result of the ``slashdot_this()`` view will be cached for 15
minutes.
The low-level cache API
=======================
There are times, however, that caching an entire rendered page doesn't gain
you very much. The Django developers have found it's only necessary to cache a
list of object IDs from an intensive database query, for example. In cases like
these, you can use the cache API to store objects in the cache with any level
of granularity you like.
Sometimes, however, caching an entire rendered page doesn't gain you very much.
For example, you may find it's only necessary to cache the result of an
intensive database. In cases like this, you can use the low-level cache API to
store objects in the cache with any level of granularity you like.
The cache API is simple::
# the cache module exports a cache object that's automatically
# created from the CACHE_BACKEND setting
# The cache module exports a cache object that's automatically
# created from the CACHE_BACKEND setting.
>>> from django.core.cache import cache
# The basic interface is set(key, value, timeout_seconds) and get(key)
# The basic interface is set(key, value, timeout_seconds) and get(key).
>>> cache.set('my_key', 'hello, world!', 30)
>>> cache.get('my_key')
'hello, world!'
@ -161,7 +169,7 @@ The cache API is simple::
>>> cache.get('my_key')
None
# get() can take a default argument
# get() can take a default argument.
>>> cache.get('my_key', 'has_expired')
'has_expired'
@ -183,4 +191,108 @@ The cache API is simple::
That's it. The cache has very few restrictions: You can cache any object that
can be pickled safely, although keys must be strings.
.. _memcached: http://www.danga.com/memcached/
Controlling cache: Using Vary headers
=====================================
The Django cache framework works with `HTTP Vary headers`_ to allow developers
to instruct caching mechanisms to differ their cache contents depending on
request HTTP headers.
Essentially, the ``Vary`` response HTTP header defines which request headers a
cache mechanism should take into account when building its cache key.
By default, Django's cache system creates its cache keys using the requested
path -- e.g., ``"/stories/2005/jun/23/bank_robbed/"``. This means every request
to that URL will use the same cached version, regardless of user-agent
differences such as cookies or language preferences.
That's where ``Vary`` comes in.
If your Django-powered page outputs different content based on some difference
in request headers -- such as a cookie, or language, or user-agent -- you'll
need to use the ``Vary`` header to tell caching mechanisms that the page output
depends on those things.
To do this in Django, use the convenient ``vary_on_headers`` view decorator,
like so::
from django.views.decorators.vary import vary_on_headers
# Python 2.3 syntax.
def my_view(request):
...
my_view = vary_on_headers(my_view, 'User-Agent')
# Python 2.4 decorator syntax.
@vary_on_headers('User-Agent')
def my_view(request):
...
In this case, a caching mechanism (such as Django's own cache middleware) will
cache a separate version of the page for each unique user-agent.
The advantage to using the ``vary_on_headers`` decorator rather than manually
setting the ``Vary`` header (using something like
``response['Vary'] = 'user-agent'``) is that the decorator adds to the ``Vary``
header (which may already exist) rather than setting it from scratch.
Note that you can pass multiple headers to ``vary_on_headers()``:
@vary_on_headers('User-Agent', 'Cookie')
def my_view(request):
...
Because varying on cookie is such a common case, there's a ``vary_on_cookie``
decorator. These two views are equivalent::
@vary_on_cookie
def my_view(request):
...
@vary_on_headers('Cookie')
def my_view(request):
...
Also note that the headers you pass to ``vary_on_headers`` are not case
sensitive. ``"User-Agent"`` is the same thing as ``"user-agent"``.
You can also use a helper function, ``patch_vary_headers()``, directly::
from django.utils.cache import patch_vary_headers
def my_view(request):
...
response = render_to_response('template_name', context)
patch_vary_headers(response, ['Cookie'])
return response
``patch_vary_headers`` takes an ``HttpResponse`` instance as its first argument
and a list/tuple of header names as its second argument.
.. _`HTTP Vary headers`: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.44
Other optimizations
===================
Django comes with a few other pieces of middleware that can help optimize your
apps' performance:
* ``django.middleware.http.ConditionalGetMiddleware`` adds support for
conditional GET. This makes use of ``ETag`` and ``Last-Modified``
headers.
* ``django.middleware.gzip.GZipMiddleware`` compresses content for browsers
that understand gzip compression (all modern browsers).
Order of MIDDLEWARE_CLASSES
===========================
If you use ``CacheMiddleware``, it's important to put it in the right place
within the ``MIDDLEWARE_CLASSES`` setting, because the cache middleware needs
to know which headers by which to vary the cache storage. Middleware always
adds something the ``Vary`` response header when it can.
Put the ``CacheMiddleware`` after any middlewares that might add something to
the ``Vary`` header. The following middlewares do so:
* ``SessionMiddleware`` adds ``Cookie``
* ``GzipMiddleware`` adds ``Accept-Encoding``

View File

@ -88,6 +88,18 @@ Available middleware
addresses defined in the ``INTERNAL_IPS`` setting. This is used by Django's
automatic documentation system.
``django.middleware.gzip.GZipMiddleware``
Compresses content for browsers that understand gzip compression (all
modern browsers).
``django.middleware.http.ConditionalGetMiddleware``
Handles conditional GET operations. If the response has a ``ETag`` or
``Last-Modified`` header, and the request has ``If-None-Match`` or
``If-Modified-Since``, the response is replaced by an HttpNotModified.
Also removes the content from any response to a HEAD request and sets the
``Date`` and ``Content-Length`` response-headers.
``django.middleware.sessions.SessionMiddleware``
Enables session support. See the `session documentation`_.