From b203db6ec850fee9ad8f2e2c8873be986325572b Mon Sep 17 00:00:00 2001 From: Malcolm Tredinnick Date: Sun, 22 Mar 2009 07:58:29 +0000 Subject: [PATCH] Fixed #5791 -- Added early-bailout support for views (ETags and Last-modified). This provides support for views that can have their ETag and/or Last-modified values computed much more quickly than the view itself. Supports all HTTP verbs (not just GET). Documentation and tests need a little more fleshing out (I'm not happy with the documentation at the moment, since it's a bit backwards), but the functionality is correct. git-svn-id: http://code.djangoproject.com/svn/django/trunk@10114 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/utils/http.py | 23 +++ django/views/decorators/http.py | 100 ++++++++++++- docs/index.txt | 1 + docs/topics/conditional-view-processing.txt | 134 ++++++++++++++++++ docs/topics/index.txt | 1 + .../conditional_processing/__init__.py | 1 + .../conditional_processing/models.py | 100 +++++++++++++ .../conditional_processing/urls.py | 8 ++ .../conditional_processing/views.py | 17 +++ tests/urls.py | 7 +- 10 files changed, 388 insertions(+), 4 deletions(-) create mode 100644 docs/topics/conditional-view-processing.txt create mode 100644 tests/regressiontests/conditional_processing/__init__.py create mode 100644 tests/regressiontests/conditional_processing/models.py create mode 100644 tests/regressiontests/conditional_processing/urls.py create mode 100644 tests/regressiontests/conditional_processing/views.py diff --git a/django/utils/http.py b/django/utils/http.py index 7d2af95c47f..f0b1af9c586 100644 --- a/django/utils/http.py +++ b/django/utils/http.py @@ -1,9 +1,12 @@ +import re import urllib from email.Utils import formatdate from django.utils.encoding import smart_str, force_unicode from django.utils.functional import allow_lazy +ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"') + def urlquote(url, safe='/'): """ A version of Python's urllib.quote() function that can operate on unicode @@ -94,3 +97,23 @@ def int_to_base36(i): i = i % j factor -= 1 return ''.join(base36) + +def parse_etags(etag_str): + """ + Parses a string with one or several etags passed in If-None-Match and + If-Match headers by the rules in RFC 2616. Returns a list of etags + without surrounding double quotes (") and unescaped from \. + """ + etags = ETAG_MATCH.findall(etag_str) + if not etags: + # etag_str has wrong format, treat it as an opaque string then + return [etag_str] + etags = [e.decode('string_escape') for e in etags] + return etags + +def quote_etag(etag): + """ + Wraps a string in double quotes escaping contents as necesary. + """ + return '"%s"' % etag.replace('\\', '\\\\').replace('"', '\\"') + diff --git a/django/views/decorators/http.py b/django/views/decorators/http.py index dd4f90ea9c4..ec4695367b6 100644 --- a/django/views/decorators/http.py +++ b/django/views/decorators/http.py @@ -7,9 +7,15 @@ try: except ImportError: from django.utils.functional import wraps # Python 2.3, 2.4 fallback. +from calendar import timegm +from datetime import timedelta +from email.Utils import formatdate + from django.utils.decorators import decorator_from_middleware +from django.utils.http import parse_etags, quote_etag from django.middleware.http import ConditionalGetMiddleware -from django.http import HttpResponseNotAllowed +from django.http import HttpResponseNotAllowed, HttpResponseNotModified, HttpResponse + conditional_page = decorator_from_middleware(ConditionalGetMiddleware) @@ -36,4 +42,94 @@ require_GET = require_http_methods(["GET"]) require_GET.__doc__ = "Decorator to require that a view only accept the GET method." require_POST = require_http_methods(["POST"]) -require_POST.__doc__ = "Decorator to require that a view only accept the POST method." \ No newline at end of file +require_POST.__doc__ = "Decorator to require that a view only accept the POST method." + +def condition(etag_func=None, last_modified_func=None): + """ + Decorator to support conditional retrieval (or change) for a view + function. + + The parameters are callables to compute the ETag and last modified time for + the requested resource, respectively. The callables are passed the same + parameters as the view itself. The Etag function should return a string (or + None if the resource doesn't exist), whilst the last_modified function + should return a datetime object (or None if the resource doesn't exist). + + If both parameters are provided, all the preconditions must be met before + the view is processed. + + This decorator will either pass control to the wrapped view function or + return an HTTP 304 response (unmodified) or 412 response (preconditions + failed), depending upon the request method. + + Any behavior marked as "undefined" in the HTTP spec (e.g. If-none-match + plus If-modified-since headers) will result in the view function being + called. + """ + def decorator(func): + def inner(request, *args, **kwargs): + # Get HTTP request headers + if_modified_since = request.META.get("HTTP_IF_MODIFIED_SINCE") + if_none_match = request.META.get("HTTP_IF_NONE_MATCH") + if_match = request.META.get("HTTP_IF_MATCH") + if if_none_match or if_match: + # There can be more than one ETag in the request, so we + # consider the list of values. + etags = parse_etags(if_none_match) + + # Compute values (if any) for the requested resource. + if etag_func: + res_etag = etag_func(request, *args, **kwargs) + else: + res_etag = None + if last_modified_func: + dt = last_modified_func(request, *args, **kwargs) + if dt: + res_last_modified = formatdate(timegm(dt.utctimetuple()))[:26] + 'GMT' + else: + res_last_modified = None + else: + res_last_modified = None + + response = None + if not ((if_match and (if_modified_since or if_none_match)) or + (if_match and if_none_match)): + # We only get here if no undefined combinations of headers are + # specified. + if ((if_none_match and (res_etag in etags or + "*" in etags and res_etag)) and + (not if_modified_since or + res_last_modified == if_modified_since)): + if request.method in ("GET", "HEAD"): + response = HttpResponseNotModified() + else: + response = HttpResponse(status=412) + elif if_match and ((not res_etag and "*" in etags) or + (res_etag and res_etag not in etags)): + response = HttpResponse(status=412) + elif (not if_none_match and if_modified_since and + request.method == "GET" and + res_last_modified == if_modified_since): + response = HttpResponseNotModified() + + if response is None: + response = func(request, *args, **kwargs) + + # Set relevant headers on the response if they don't already exist. + if res_last_modified and not response.has_header('Last-Modified'): + response['Last-Modified'] = res_last_modified + if res_etag and not response.has_header('ETag'): + response['ETag'] = quote_etag(res_etag) + + return response + + return inner + return decorator + +# Shortcut decorators for common cases based on ETag or Last-Modified only +def etag(callable): + return condition(etag=callable) + +def last_modified(callable): + return condition(last_modified=callable) + diff --git a/docs/index.txt b/docs/index.txt index 2e13d2dbab1..9e96422ccbd 100644 --- a/docs/index.txt +++ b/docs/index.txt @@ -81,6 +81,7 @@ Other batteries included * :ref:`Admin site ` * :ref:`Authentication ` * :ref:`Cache system ` + * :ref:`Conditional content processing ` * :ref:`Comments ` * :ref:`Content types ` * :ref:`Cross Site Request Forgery protection ` diff --git a/docs/topics/conditional-view-processing.txt b/docs/topics/conditional-view-processing.txt new file mode 100644 index 00000000000..95ad52878b8 --- /dev/null +++ b/docs/topics/conditional-view-processing.txt @@ -0,0 +1,134 @@ +.. _topics-conditional-processing: + +=========================== +Conditional View Processing +=========================== + +.. versionadded:: 1.1 + +HTTP clients can send a number of headers to tell the server about copies of a +resource that they have already seen. This is commonly used when retrieving a +web page (using an HTTP ``GET`` request) to avoid sending all the data for +something the client has already retrieved. However, the same headers can be +used for all HTTP methods (``POST``, ``PUT``, ``DELETE``, etc). + +For each page (response) that Django sends back from a view, it might provide +two HTTP headers: the ``ETag`` header and the ``Last-Modified`` header. These +headers are optional on HTTP responses. They can be set by your view function, +or you can rely on the :class:`~django.middleware.common.CommonMiddleware` +middleware to set the ``ETag`` header. + +When the client next requests the same resource, it might send along a header +such as `If-modified-since`_, containing the date of the last modification +time it was sent, or `If-none-match`_, containing the ``ETag`` it was sent. +If there is no match with the ETag, or if the resource has not been modified, +a 304 status code can be sent back, instead of a full response, telling the +client that nothing has changed. + +.. _If-none-match: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.26 +.. _If-modified-since: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.25 + +Django allows simple usage of this feature with +:class:`django.middleware.http.ConditionalGetMiddleware` and +:class:`~django.middleware.common.CommonMiddleware`. However, whilst being +easy to use and suitable for many situations, they both have limitations for +advanced usage: + + * They are applied globally to all views in your project + * They don't save you from generating the response itself, which may be + expensive + * They are only appropriate for HTTP ``GET`` requests. + +.. conditional-decorators: + +Decorators +========== + +When you need more fine-grained control you may use per-view conditional +processing functions. + +The decorators ``django.views.decorators.http.etag`` and +``django.views.decorators.http.last_modified`` each accept a user-defined +function that takes the same parameters as the view itself. The function +passed ``last_modified`` should return a standard datetime value specifying +the last time the resource was modified, or ``None`` if the resource doesn't +exist. The function passed to the ``etag`` decorator should return a string +representing the `Etag`_ for the resource, or ``None`` if it doesn't exist. + +.. _ETag: http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.11 + +For example:: + + # Compute the last-modified time from when the object was last saved. + @last_modified(lambda r, obj_id: MyObject.objects.get(pk=obj_id).update_time) + def my_object_view(request, obj_id): + # Expensive generation of response with MyObject instance + ... + +Of course, you can always use the non-decorator form if you're using Python +2.3 or don't like the decorator syntax:: + + def my_object_view(request, obj_id): + ... + my_object_view = last_modified(my_func)(my_object_view) + +Using the ``etag`` decorator is similar. + +In practice, though, you won't know if the client is going to send the +``Last-modified`` or the ``If-none-match`` header. If you can quickly compute +both values and want to short-circuit as often as possible, you'll need to use +the ``conditional`` decorator described below. + +HTTP allows to use both "ETag" and "Last-Modified" headers in your response. +Then a response is considered not modified only if the client sends both +headers back and they're both equal to the response headers. This means that +you can't just chain decorators on your view:: + + # Bad code. Don't do this! + @etag(etag_func) + @last_modified(last_modified_func) + def my_view(request): + # ... + + # End of bad code. + +The first decorator doesn't know anything about the second and might +answer that the response is not modified even if the second decorators would +determine otherwise. In this case you should use a more general decorator - +``django.views.decorator.http.condition`` that accepts two functions at once:: + + # The correct way to implement the above example + @condition(etag_func, last_modified_func) + def my_view(request): + # ... + +Using the decorators with other HTTP methods +============================================ + +The ``conditional`` decorator is useful for more than only ``GET`` and +``HEAD`` requests (``HEAD`` requests are the same as ``GET`` in this +situation). It can be used also to be used to provide checking for ``POST``, +``PUT`` and ``DELETE`` requests. In these situations, the idea isn't to return +a "not modified" response, but to tell the client that the resource they are +trying to change has been altered in the meantime. + +For example, consider the following exchange between the client and server: + + 1. Client requests ``/foo/``. + 2. Server responds with some content with an ETag of ``"abcd1234"``. + 3. Client sends and HTTP ``PUT`` request to ``/foo/`` to update the + resource. It sends an ``If-Match: "abcd1234"`` header to specify the + version it is trying to update. + 4. Server checks to see if the resource has changed, by computing the ETag + the same way it does for a ``GET`` request (using the same function). + If the resource *has* changed, it will return a 412 status code code, + meaning "precondition failed". + 5. Client sends a ``GET`` request to ``/foo/``, after receiving a 412 + response, to retrieve an updated version of the content before updating + it. + +The important thing this example shows is that the same functions can be used +to compute the ETag and last modification values in all situations. In fact, +you *should* use the same functions, so that the same values are returned +every time. + diff --git a/docs/topics/index.txt b/docs/topics/index.txt index d4a32ab6cec..20d7aa30610 100644 --- a/docs/topics/index.txt +++ b/docs/topics/index.txt @@ -18,6 +18,7 @@ Introductions to all the key parts of Django you'll need to know: testing auth cache + conditional-view-processing email i18n pagination diff --git a/tests/regressiontests/conditional_processing/__init__.py b/tests/regressiontests/conditional_processing/__init__.py new file mode 100644 index 00000000000..380474e035b --- /dev/null +++ b/tests/regressiontests/conditional_processing/__init__.py @@ -0,0 +1 @@ +# -*- coding:utf-8 -*- diff --git a/tests/regressiontests/conditional_processing/models.py b/tests/regressiontests/conditional_processing/models.py new file mode 100644 index 00000000000..5c52acbd52c --- /dev/null +++ b/tests/regressiontests/conditional_processing/models.py @@ -0,0 +1,100 @@ +# -*- coding:utf-8 -*- +from datetime import datetime, timedelta +from calendar import timegm + +from django.test import TestCase +from django.utils.http import parse_etags, quote_etag + +FULL_RESPONSE = 'Test conditional get response' +LAST_MODIFIED = datetime(2007, 10, 21, 23, 21, 47) +LAST_MODIFIED_STR = 'Sun, 21 Oct 2007 23:21:47 GMT' +EXPIRED_LAST_MODIFIED_STR = 'Sat, 20 Oct 2007 23:21:47 GMT' +ETAG = 'b4246ffc4f62314ca13147c9d4f76974' +EXPIRED_ETAG = '7fae4cd4b0f81e7d2914700043aa8ed6' + +class ConditionalGet(TestCase): + def assertFullResponse(self, response, check_last_modified=True, check_etag=True): + self.assertEquals(response.status_code, 200) + self.assertEquals(response.content, FULL_RESPONSE) + if check_last_modified: + self.assertEquals(response['Last-Modified'], LAST_MODIFIED_STR) + if check_etag: + self.assertEquals(response['ETag'], '"%s"' % ETAG) + + def assertNotModified(self, response): + self.assertEquals(response.status_code, 304) + self.assertEquals(response.content, '') + + def testWithoutConditions(self): + response = self.client.get('/condition/') + self.assertFullResponse(response) + + def testIfModifiedSince(self): + self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR + response = self.client.get('/condition/') + self.assertNotModified(response) + self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR + response = self.client.get('/condition/') + self.assertFullResponse(response) + + def testIfNoneMatch(self): + self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG + response = self.client.get('/condition/') + self.assertNotModified(response) + self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG + response = self.client.get('/condition/') + self.assertFullResponse(response) + + # Several etags in If-None-Match is a bit exotic but why not? + self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s", "%s"' % (ETAG, EXPIRED_ETAG) + response = self.client.get('/condition/') + self.assertNotModified(response) + + def testBothHeaders(self): + self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR + self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG + response = self.client.get('/condition/') + self.assertNotModified(response) + + self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR + self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG + response = self.client.get('/condition/') + self.assertFullResponse(response) + + self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR + self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG + response = self.client.get('/condition/') + self.assertFullResponse(response) + + def testSingleCondition1(self): + self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR + response = self.client.get('/condition/last_modified/') + self.assertNotModified(response) + response = self.client.get('/condition/etag/') + self.assertFullResponse(response, check_last_modified=False) + + def testSingleCondition2(self): + self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG + response = self.client.get('/condition/etag/') + self.assertNotModified(response) + response = self.client.get('/condition/last_modified/') + self.assertFullResponse(response, check_etag=False) + + def testSingleCondition3(self): + self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR + response = self.client.get('/condition/last_modified/') + self.assertFullResponse(response, check_etag=False) + + def testSingleCondition4(self): + self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG + response = self.client.get('/condition/etag/') + self.assertFullResponse(response, check_last_modified=False) + +class ETagProcesing(TestCase): + def testParsing(self): + etags = parse_etags(r'"", "etag", "e\"t\"ag", "e\\tag", W/"weak"') + self.assertEquals(etags, ['', 'etag', 'e"t"ag', r'e\tag', 'weak']) + + def testQuoting(self): + quoted_etag = quote_etag(r'e\t"ag') + self.assertEquals(quoted_etag, r'"e\\t\"ag"') diff --git a/tests/regressiontests/conditional_processing/urls.py b/tests/regressiontests/conditional_processing/urls.py new file mode 100644 index 00000000000..938a4e48330 --- /dev/null +++ b/tests/regressiontests/conditional_processing/urls.py @@ -0,0 +1,8 @@ +from django.conf.urls.defaults import * +import views + +urlpatterns = patterns('', + ('^$', views.index), + ('^last_modified/$', views.last_modified), + ('^etag/$', views.etag), +) diff --git a/tests/regressiontests/conditional_processing/views.py b/tests/regressiontests/conditional_processing/views.py new file mode 100644 index 00000000000..c88236e21b6 --- /dev/null +++ b/tests/regressiontests/conditional_processing/views.py @@ -0,0 +1,17 @@ +# -*- coding:utf-8 -*- +from django.views.decorators.http import condition +from django.http import HttpResponse + +from models import FULL_RESPONSE, LAST_MODIFIED, ETAG + +@condition(lambda r: ETAG, lambda r: LAST_MODIFIED) +def index(request): + return HttpResponse(FULL_RESPONSE) + +@condition(last_modified_func=lambda r: LAST_MODIFIED) +def last_modified(request): + return HttpResponse(FULL_RESPONSE) + +@condition(etag_func=lambda r: ETAG) +def etag(request): + return HttpResponse(FULL_RESPONSE) diff --git a/tests/urls.py b/tests/urls.py index 43806de9208..67048292318 100644 --- a/tests/urls.py +++ b/tests/urls.py @@ -20,11 +20,11 @@ urlpatterns = patterns('', # test urlconf for middleware tests (r'^middleware/', include('regressiontests.middleware.urls')), - + # admin view tests (r'^test_admin/', include('regressiontests.admin_views.urls')), (r'^generic_inline_admin/', include('regressiontests.generic_inline_admin.urls')), - + # admin widget tests (r'widget_admin/', include('regressiontests.admin_widgets.urls')), @@ -32,4 +32,7 @@ urlpatterns = patterns('', # test urlconf for syndication tests (r'^syndication/', include('regressiontests.syndication.urls')), + + # conditional get views + (r'condition/', include('regressiontests.conditional_processing.urls')), )