From dbe6ced0d6911386d731a045e00b0d4c005b8e45 Mon Sep 17 00:00:00 2001 From: Luke Plant Date: Tue, 1 Mar 2011 14:28:06 +0000 Subject: [PATCH] Fixed #717 - If-Modified-Since handling should compare dates according to RFC 2616 Thanks to Maniac for the report, julienb for the initial patch, and especially to aaugustin for the final patch and tests. git-svn-id: http://code.djangoproject.com/svn/django/trunk@15696 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/middleware/http.py | 18 ++-- django/utils/http.py | 55 ++++++++++++ django/views/decorators/http.py | 18 ++-- django/views/static.py | 8 +- .../conditional_processing/models.py | 34 +++++++- tests/regressiontests/middleware/tests.py | 87 +++++++++++++++++++ tests/regressiontests/views/tests/static.py | 2 +- 7 files changed, 196 insertions(+), 26 deletions(-) diff --git a/django/middleware/http.py b/django/middleware/http.py index 13c1b89c41..e98858f772 100644 --- a/django/middleware/http.py +++ b/django/middleware/http.py @@ -1,5 +1,5 @@ from django.core.exceptions import MiddlewareNotUsed -from django.utils.http import http_date +from django.utils.http import http_date, parse_http_date_safe class ConditionalGetMiddleware(object): """ @@ -15,7 +15,7 @@ class ConditionalGetMiddleware(object): response['Content-Length'] = str(len(response.content)) if response.has_header('ETag'): - if_none_match = request.META.get('HTTP_IF_NONE_MATCH', None) + if_none_match = request.META.get('HTTP_IF_NONE_MATCH') if if_none_match == response['ETag']: # Setting the status is enough here. The response handling path # automatically removes content for this status code (in @@ -23,10 +23,14 @@ class ConditionalGetMiddleware(object): response.status_code = 304 if response.has_header('Last-Modified'): - if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE', None) - if if_modified_since == response['Last-Modified']: - # Setting the status code is enough here (same reasons as - # above). - response.status_code = 304 + if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE') + if if_modified_since is not None: + if_modified_since = parse_http_date_safe(if_modified_since) + if if_modified_since is not None: + last_modified = parse_http_date_safe(response['Last-Modified']) + if last_modified is not None and last_modified <= if_modified_since: + # Setting the status code is enough here (same reasons as + # above). + response.status_code = 304 return response diff --git a/django/utils/http.py b/django/utils/http.py index 1384b4294c..bdc367c8f7 100644 --- a/django/utils/http.py +++ b/django/utils/http.py @@ -1,3 +1,5 @@ +import calendar +import datetime import re import sys import urllib @@ -8,6 +10,17 @@ from django.utils.functional import allow_lazy ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"') +MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split() +__D = r'(?P\d{2})' +__D2 = r'(?P[ \d]\d)' +__M = r'(?P\w{3})' +__Y = r'(?P\d{4})' +__Y2 = r'(?P\d{2})' +__T = r'(?P\d{2}):(?P\d{2}):(?P\d{2})' +RFC1123_DATE = re.compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T)) +RFC850_DATE = re.compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T)) +ASCTIME_DATE = re.compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y)) + def urlquote(url, safe='/'): """ A version of Python's urllib.quote() function that can operate on unicode @@ -70,6 +83,48 @@ def http_date(epoch_seconds=None): rfcdate = formatdate(epoch_seconds) return '%s GMT' % rfcdate[:25] +def parse_http_date(date): + """ + Parses a date format as specified by HTTP RFC2616 section 3.3.1. + + The three formats allowed by the RFC are accepted, even if only the first + one is still in widespread use. + + Returns an floating point number expressed in seconds since the epoch, in + UTC. + """ + # emails.Util.parsedate does the job for RFC1123 dates; unfortunately + # RFC2616 makes it mandatory to support RFC850 dates too. So we roll + # our own RFC-compliant parsing. + for regex in RFC1123_DATE, RFC850_DATE, ASCTIME_DATE: + m = regex.match(date) + if m is not None: + break + else: + raise ValueError("%r is not in a valid HTTP date format" % date) + try: + year = int(m.group('year')) + if year < 100: + year += 2000 if year < 70 else 1900 + month = MONTHS.index(m.group('mon').lower()) + 1 + day = int(m.group('day')) + hour = int(m.group('hour')) + min = int(m.group('min')) + sec = int(m.group('sec')) + result = datetime.datetime(year, month, day, hour, min, sec) + return calendar.timegm(result.utctimetuple()) + except Exception: + raise ValueError("%r is not a valid date" % date) + +def parse_http_date_safe(date): + """ + Same as parse_http_date, but returns None if the input is invalid. + """ + try: + return parse_http_date(date) + except Exception: + pass + # Base 36 functions: useful for generating compact URLs def base36_to_int(s): diff --git a/django/views/decorators/http.py b/django/views/decorators/http.py index b763d6ee92..fb3181e10e 100644 --- a/django/views/decorators/http.py +++ b/django/views/decorators/http.py @@ -9,10 +9,9 @@ except ImportError: from calendar import timegm from datetime import timedelta -from email.Utils import formatdate from django.utils.decorators import decorator_from_middleware, available_attrs -from django.utils.http import parse_etags, quote_etag +from django.utils.http import http_date, parse_http_date_safe, parse_etags, quote_etag from django.utils.log import getLogger from django.middleware.http import ConditionalGetMiddleware from django.http import HttpResponseNotAllowed, HttpResponseNotModified, HttpResponse @@ -79,6 +78,8 @@ def condition(etag_func=None, last_modified_func=None): def inner(request, *args, **kwargs): # Get HTTP request headers if_modified_since = request.META.get("HTTP_IF_MODIFIED_SINCE") + if if_modified_since: + if_modified_since = parse_http_date_safe(if_modified_since) if_none_match = request.META.get("HTTP_IF_NONE_MATCH") if_match = request.META.get("HTTP_IF_MATCH") if if_none_match or if_match: @@ -102,7 +103,7 @@ def condition(etag_func=None, last_modified_func=None): if last_modified_func: dt = last_modified_func(request, *args, **kwargs) if dt: - res_last_modified = formatdate(timegm(dt.utctimetuple()))[:26] + 'GMT' + res_last_modified = timegm(dt.utctimetuple()) else: res_last_modified = None else: @@ -116,7 +117,8 @@ def condition(etag_func=None, last_modified_func=None): if ((if_none_match and (res_etag in etags or "*" in etags and res_etag)) and (not if_modified_since or - res_last_modified == if_modified_since)): + (res_last_modified and if_modified_since and + res_last_modified <= if_modified_since))): if request.method in ("GET", "HEAD"): response = HttpResponseNotModified() else: @@ -136,9 +138,9 @@ def condition(etag_func=None, last_modified_func=None): } ) response = HttpResponse(status=412) - elif (not if_none_match and if_modified_since and - request.method == "GET" and - res_last_modified == if_modified_since): + elif (not if_none_match and request.method == "GET" and + res_last_modified and if_modified_since and + res_last_modified <= if_modified_since): response = HttpResponseNotModified() if response is None: @@ -146,7 +148,7 @@ def condition(etag_func=None, last_modified_func=None): # Set relevant headers on the response if they don't already exist. if res_last_modified and not response.has_header('Last-Modified'): - response['Last-Modified'] = res_last_modified + response['Last-Modified'] = http_date(res_last_modified) if res_etag and not response.has_header('ETag'): response['ETag'] = quote_etag(res_etag) diff --git a/django/views/static.py b/django/views/static.py index da1158d9d2..3aeb2ed98e 100644 --- a/django/views/static.py +++ b/django/views/static.py @@ -9,12 +9,11 @@ import posixpath import re import stat import urllib -from email.Utils import parsedate_tz, mktime_tz from django.template import loader from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseNotModified from django.template import Template, Context, TemplateDoesNotExist -from django.utils.http import http_date +from django.utils.http import http_date, parse_http_date def serve(request, path, document_root=None, show_indexes=False): """ @@ -128,10 +127,7 @@ def was_modified_since(header=None, mtime=0, size=0): raise ValueError matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", header, re.IGNORECASE) - header_date = parsedate_tz(matches.group(1)) - if header_date is None: - raise ValueError - header_mtime = mktime_tz(header_date) + header_mtime = parse_http_date(matches.group(1)) header_len = matches.group(3) if header_len and int(header_len) != size: raise ValueError diff --git a/tests/regressiontests/conditional_processing/models.py b/tests/regressiontests/conditional_processing/models.py index b291aed337..129d11b07f 100644 --- a/tests/regressiontests/conditional_processing/models.py +++ b/tests/regressiontests/conditional_processing/models.py @@ -1,17 +1,20 @@ # -*- coding:utf-8 -*- -from datetime import datetime, timedelta -from calendar import timegm +from datetime import datetime from django.test import TestCase -from django.utils.http import parse_etags, quote_etag +from django.utils import unittest +from django.utils.http import parse_etags, quote_etag, parse_http_date FULL_RESPONSE = 'Test conditional get response' LAST_MODIFIED = datetime(2007, 10, 21, 23, 21, 47) LAST_MODIFIED_STR = 'Sun, 21 Oct 2007 23:21:47 GMT' +LAST_MODIFIED_NEWER_STR = 'Mon, 18 Oct 2010 16:56:23 GMT' +LAST_MODIFIED_INVALID_STR = 'Mon, 32 Oct 2010 16:56:23 GMT' EXPIRED_LAST_MODIFIED_STR = 'Sat, 20 Oct 2007 23:21:47 GMT' ETAG = 'b4246ffc4f62314ca13147c9d4f76974' EXPIRED_ETAG = '7fae4cd4b0f81e7d2914700043aa8ed6' + class ConditionalGet(TestCase): def assertFullResponse(self, response, check_last_modified=True, check_etag=True): self.assertEquals(response.status_code, 200) @@ -33,6 +36,12 @@ class ConditionalGet(TestCase): self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR response = self.client.get('/condition/') self.assertNotModified(response) + self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_NEWER_STR + response = self.client.get('/condition/') + self.assertNotModified(response) + self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_INVALID_STR + response = self.client.get('/condition/') + self.assertFullResponse(response) self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR response = self.client.get('/condition/') self.assertFullResponse(response) @@ -118,7 +127,7 @@ class ConditionalGet(TestCase): self.assertFullResponse(response, check_last_modified=False) -class ETagProcesing(TestCase): +class ETagProcessing(unittest.TestCase): def testParsing(self): etags = parse_etags(r'"", "etag", "e\"t\"ag", "e\\tag", W/"weak"') self.assertEquals(etags, ['', 'etag', 'e"t"ag', r'e\tag', 'weak']) @@ -126,3 +135,20 @@ class ETagProcesing(TestCase): def testQuoting(self): quoted_etag = quote_etag(r'e\t"ag') self.assertEquals(quoted_etag, r'"e\\t\"ag"') + + +class HttpDateProcessing(unittest.TestCase): + def testParsingRfc1123(self): + parsed = parse_http_date('Sun, 06 Nov 1994 08:49:37 GMT') + self.assertEqual(datetime.utcfromtimestamp(parsed), + datetime(1994, 11, 06, 8, 49, 37)) + + def testParsingRfc850(self): + parsed = parse_http_date('Sunday, 06-Nov-94 08:49:37 GMT') + self.assertEqual(datetime.utcfromtimestamp(parsed), + datetime(1994, 11, 06, 8, 49, 37)) + + def testParsingAsctime(self): + parsed = parse_http_date('Sun Nov 6 08:49:37 1994') + self.assertEqual(datetime.utcfromtimestamp(parsed), + datetime(1994, 11, 06, 8, 49, 37)) diff --git a/tests/regressiontests/middleware/tests.py b/tests/regressiontests/middleware/tests.py index b77a2a3813..5d90ffc5dc 100644 --- a/tests/regressiontests/middleware/tests.py +++ b/tests/regressiontests/middleware/tests.py @@ -3,6 +3,7 @@ from django.conf import settings from django.http import HttpRequest from django.middleware.common import CommonMiddleware +from django.middleware.http import ConditionalGetMiddleware from django.test import TestCase @@ -247,3 +248,89 @@ class CommonMiddlewareTest(TestCase): self.assertEquals(r.status_code, 301) self.assertEquals(r['Location'], 'http://www.testserver/middleware/customurlconf/slash/') + +class ConditionalGetMiddlewareTest(TestCase): + urls = 'regressiontests.middleware.cond_get_urls' + def setUp(self): + self.req = HttpRequest() + self.req.META = { + 'SERVER_NAME': 'testserver', + 'SERVER_PORT': 80, + } + self.req.path = self.req.path_info = "/" + self.resp = self.client.get(self.req.path) + + # Tests for the Date header + + def test_date_header_added(self): + self.assertFalse('Date' in self.resp) + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertTrue('Date' in self.resp) + + # Tests for the Content-Length header + + def test_content_length_header_added(self): + content_length = len(self.resp.content) + self.assertFalse('Content-Length' in self.resp) + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertTrue('Content-Length' in self.resp) + self.assertEqual(int(self.resp['Content-Length']), content_length) + + def test_content_length_header_not_changed(self): + bad_content_length = len(self.resp.content) + 10 + self.resp['Content-Length'] = bad_content_length + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEqual(int(self.resp['Content-Length']), bad_content_length) + + # Tests for the ETag header + + def test_if_none_match_and_no_etag(self): + self.req.META['HTTP_IF_NONE_MATCH'] = 'spam' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 200) + + def test_no_if_none_match_and_etag(self): + self.resp['ETag'] = 'eggs' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 200) + + def test_if_none_match_and_same_etag(self): + self.req.META['HTTP_IF_NONE_MATCH'] = self.resp['ETag'] = 'spam' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 304) + + def test_if_none_match_and_different_etag(self): + self.req.META['HTTP_IF_NONE_MATCH'] = 'spam' + self.resp['ETag'] = 'eggs' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 200) + + # Tests for the Last-Modified header + + def test_if_modified_since_and_no_last_modified(self): + self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 200) + + def test_no_if_modified_since_and_last_modified(self): + self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:38:44 GMT' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 200) + + def test_if_modified_since_and_same_last_modified(self): + self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT' + self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:38:44 GMT' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 304) + + def test_if_modified_since_and_last_modified_in_the_past(self): + self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT' + self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:35:44 GMT' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 304) + + def test_if_modified_since_and_last_modified_in_the_future(self): + self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT' + self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:41:44 GMT' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 200) diff --git a/tests/regressiontests/views/tests/static.py b/tests/regressiontests/views/tests/static.py index c0565a17ee..e3bc1643c5 100644 --- a/tests/regressiontests/views/tests/static.py +++ b/tests/regressiontests/views/tests/static.py @@ -51,7 +51,7 @@ class StaticTests(TestCase): file_name = 'file.txt' response = self.client.get( '/views/%s/%s' % (self.prefix, file_name), - HTTP_IF_MODIFIED_SINCE='Mon, 18 Jan 2038 05:14:07 UTC' + HTTP_IF_MODIFIED_SINCE='Mon, 18 Jan 2038 05:14:07 GMT' # This is 24h before max Unix time. Remember to fix Django and # update this test well before 2038 :) )