Fixed #27083 -- Added support for weak ETags.

This commit is contained in:
Kevin Christopher Henry 2016-09-01 09:32:20 -04:00 committed by Tim Graham
parent e7abb5ba86
commit 4ef0e019b7
11 changed files with 130 additions and 68 deletions

View File

@ -10,7 +10,6 @@ from django.utils.cache import (
) )
from django.utils.deprecation import MiddlewareMixin from django.utils.deprecation import MiddlewareMixin
from django.utils.encoding import force_text from django.utils.encoding import force_text
from django.utils.http import unquote_etag
from django.utils.six.moves.urllib.parse import urlparse from django.utils.six.moves.urllib.parse import urlparse
@ -122,7 +121,7 @@ class CommonMiddleware(MiddlewareMixin):
if response.has_header('ETag'): if response.has_header('ETag'):
return get_conditional_response( return get_conditional_response(
request, request,
etag=unquote_etag(response['ETag']), etag=response['ETag'],
response=response, response=response,
) )
# Add the Content-Length header to non-streaming responses if not # Add the Content-Length header to non-streaming responses if not

View File

@ -1,6 +1,6 @@
from django.utils.cache import get_conditional_response from django.utils.cache import get_conditional_response
from django.utils.deprecation import MiddlewareMixin from django.utils.deprecation import MiddlewareMixin
from django.utils.http import http_date, parse_http_date_safe, unquote_etag from django.utils.http import http_date, parse_http_date_safe
class ConditionalGetMiddleware(MiddlewareMixin): class ConditionalGetMiddleware(MiddlewareMixin):
@ -24,7 +24,7 @@ class ConditionalGetMiddleware(MiddlewareMixin):
if etag or last_modified: if etag or last_modified:
return get_conditional_response( return get_conditional_response(
request, request,
etag=unquote_etag(etag), etag=etag,
last_modified=last_modified, last_modified=last_modified,
response=response, response=response,
) )

View File

@ -21,7 +21,15 @@ from django.utils.six.moves.urllib.parse import (
urlparse, urlparse,
) )
ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"') # based on RFC 7232, Appendix C
ETAG_MATCH = re.compile(r'''
\A( # start of string and capture group
(?:W/)? # optional weak indicator
" # opening quote
[^"]* # any sequence of non-quote characters
" # end quote
)\Z # end of string and capture group
''', re.X)
MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split() MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split()
__D = r'(?P<day>\d{2})' __D = r'(?P<day>\d{2})'
@ -234,30 +242,27 @@ def urlsafe_base64_decode(s):
def parse_etags(etag_str): def parse_etags(etag_str):
""" """
Parses a string with one or several etags passed in If-None-Match and Parse a string of ETags given in an If-None-Match or If-Match header as
If-Match headers by the rules in RFC 2616. Returns a list of etags defined by RFC 7232. Return a list of quoted ETags, or ['*'] if all ETags
without surrounding double quotes (") and unescaped from \<CHAR>. should be matched.
""" """
etags = ETAG_MATCH.findall(etag_str) if etag_str.strip() == '*':
if not etags: return ['*']
# etag_str has wrong format, treat it as an opaque string then else:
return [etag_str] # Parse each ETag individually, and return any that are valid.
etags = [e.encode('ascii').decode('unicode_escape') for e in etags] etag_matches = (ETAG_MATCH.match(etag.strip()) for etag in etag_str.split(','))
return etags return [match.group(1) for match in etag_matches if match]
def quote_etag(etag): def quote_etag(etag_str):
""" """
Wraps a string in double quotes escaping contents as necessary. If the provided string is already a quoted ETag, return it. Otherwise, wrap
the string in quotes, making it a strong ETag.
""" """
return '"%s"' % etag.replace('\\', '\\\\').replace('"', '\\"') if ETAG_MATCH.match(etag_str):
return etag_str
else:
def unquote_etag(etag): return '"%s"' % etag_str
"""
Unquote an ETag string; i.e. revert quote_etag().
"""
return etag.strip('"').replace('\\"', '"').replace('\\\\', '\\') if etag else etag
def is_same_domain(host, pattern): def is_same_domain(host, pattern):

View File

@ -62,16 +62,16 @@ def condition(etag_func=None, last_modified_func=None):
None if the resource doesn't exist), while the last_modified function None if the resource doesn't exist), while the last_modified function
should return a datetime object (or None if the resource doesn't exist). should return a datetime object (or None if the resource doesn't exist).
If both parameters are provided, all the preconditions must be met before The ETag function should return a complete ETag, including quotes (e.g.
the view is processed. '"etag"'), since that's the only way to distinguish between weak and strong
ETags. If an unquoted ETag is returned (e.g. 'etag'), it will be converted
to a strong ETag by adding quotes.
This decorator will either pass control to the wrapped view function or This decorator will either pass control to the wrapped view function or
return an HTTP 304 response (unmodified) or 412 response (preconditions return an HTTP 304 response (unmodified) or 412 response (precondition
failed), depending upon the request method. failed), depending upon the request method. In either case, it will add the
generated ETag and Last-Modified headers to the response if it doesn't
Any behavior marked as "undefined" in the HTTP spec (e.g. If-none-match already have them.
plus If-modified-since headers) will result in the view function being
called.
""" """
def decorator(func): def decorator(func):
@wraps(func, assigned=available_attrs(func)) @wraps(func, assigned=available_attrs(func))
@ -83,7 +83,9 @@ def condition(etag_func=None, last_modified_func=None):
if dt: if dt:
return timegm(dt.utctimetuple()) return timegm(dt.utctimetuple())
# The value from etag_func() could be quoted or unquoted.
res_etag = etag_func(request, *args, **kwargs) if etag_func else None res_etag = etag_func(request, *args, **kwargs) if etag_func else None
res_etag = quote_etag(res_etag) if res_etag is not None else None
res_last_modified = get_last_modified() res_last_modified = get_last_modified()
response = get_conditional_response( response = get_conditional_response(
@ -99,7 +101,7 @@ def condition(etag_func=None, last_modified_func=None):
if res_last_modified and not response.has_header('Last-Modified'): if res_last_modified and not response.has_header('Last-Modified'):
response['Last-Modified'] = http_date(res_last_modified) response['Last-Modified'] = http_date(res_last_modified)
if res_etag and not response.has_header('ETag'): if res_etag and not response.has_header('ETag'):
response['ETag'] = quote_etag(res_etag) response['ETag'] = res_etag
return response return response

View File

@ -491,6 +491,9 @@ Miscellaneous
* The admin's widget for ``IntegerField`` uses ``type="number"`` rather than * The admin's widget for ``IntegerField`` uses ``type="number"`` rather than
``type="text"``. ``type="text"``.
* ETags are now parsed according to the :rfc:`7232` Conditional Requests
specification rather than the syntax from :rfc:`2616`.
.. _deprecated-features-1.11: .. _deprecated-features-1.11:
Features deprecated in 1.11 Features deprecated in 1.11

View File

@ -66,6 +66,14 @@ last time the resource was modified, or ``None`` if the resource doesn't
exist. The function passed to the ``etag`` decorator should return a string exist. The function passed to the ``etag`` decorator should return a string
representing the `ETag`_ for the resource, or ``None`` if it doesn't exist. representing the `ETag`_ for the resource, or ``None`` if it doesn't exist.
.. versionchanged:: 1.11
In older versions, the return value from ``etag_func()`` was interpreted as
the unquoted part of the ETag. That prevented the use of weak ETags, which
have the format ``W/"<string>"``. The return value is now expected to be
an ETag as defined by the specification (including the quotes), although
the unquoted format is also accepted for backwards compatibility.
Using this feature usefully is probably best explained with an example. Using this feature usefully is probably best explained with an example.
Suppose you have this pair of models, representing a simple blog system:: Suppose you have this pair of models, representing a simple blog system::

View File

@ -11,8 +11,8 @@ LAST_MODIFIED_STR = 'Sun, 21 Oct 2007 23:21:47 GMT'
LAST_MODIFIED_NEWER_STR = 'Mon, 18 Oct 2010 16:56:23 GMT' LAST_MODIFIED_NEWER_STR = 'Mon, 18 Oct 2010 16:56:23 GMT'
LAST_MODIFIED_INVALID_STR = 'Mon, 32 Oct 2010 16:56:23 GMT' LAST_MODIFIED_INVALID_STR = 'Mon, 32 Oct 2010 16:56:23 GMT'
EXPIRED_LAST_MODIFIED_STR = 'Sat, 20 Oct 2007 23:21:47 GMT' EXPIRED_LAST_MODIFIED_STR = 'Sat, 20 Oct 2007 23:21:47 GMT'
ETAG = 'b4246ffc4f62314ca13147c9d4f76974' ETAG = '"b4246ffc4f62314ca13147c9d4f76974"'
EXPIRED_ETAG = '7fae4cd4b0f81e7d2914700043aa8ed6' EXPIRED_ETAG = '"7fae4cd4b0f81e7d2914700043aa8ed6"'
@override_settings(ROOT_URLCONF='conditional_processing.urls') @override_settings(ROOT_URLCONF='conditional_processing.urls')
@ -24,7 +24,7 @@ class ConditionalGet(SimpleTestCase):
if check_last_modified: if check_last_modified:
self.assertEqual(response['Last-Modified'], LAST_MODIFIED_STR) self.assertEqual(response['Last-Modified'], LAST_MODIFIED_STR)
if check_etag: if check_etag:
self.assertEqual(response['ETag'], '"%s"' % ETAG) self.assertEqual(response['ETag'], ETAG)
def assertNotModified(self, response): def assertNotModified(self, response):
self.assertEqual(response.status_code, 304) self.assertEqual(response.status_code, 304)
@ -63,66 +63,66 @@ class ConditionalGet(SimpleTestCase):
self.assertEqual(response.status_code, 412) self.assertEqual(response.status_code, 412)
def test_if_none_match(self): def test_if_none_match(self):
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
response = self.client.get('/condition/') response = self.client.get('/condition/')
self.assertNotModified(response) self.assertNotModified(response)
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
response = self.client.get('/condition/') response = self.client.get('/condition/')
self.assertFullResponse(response) self.assertFullResponse(response)
# Several etags in If-None-Match is a bit exotic but why not? # Several etags in If-None-Match is a bit exotic but why not?
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s", "%s"' % (ETAG, EXPIRED_ETAG) self.client.defaults['HTTP_IF_NONE_MATCH'] = '%s, %s' % (ETAG, EXPIRED_ETAG)
response = self.client.get('/condition/') response = self.client.get('/condition/')
self.assertNotModified(response) self.assertNotModified(response)
def test_if_match(self): def test_if_match(self):
self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % ETAG self.client.defaults['HTTP_IF_MATCH'] = ETAG
response = self.client.put('/condition/etag/') response = self.client.put('/condition/etag/')
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % EXPIRED_ETAG self.client.defaults['HTTP_IF_MATCH'] = EXPIRED_ETAG
response = self.client.put('/condition/etag/') response = self.client.put('/condition/etag/')
self.assertEqual(response.status_code, 412) self.assertEqual(response.status_code, 412)
def test_both_headers(self): def test_both_headers(self):
# see http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.4 # see http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.4
self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
response = self.client.get('/condition/') response = self.client.get('/condition/')
self.assertNotModified(response) self.assertNotModified(response)
self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
response = self.client.get('/condition/') response = self.client.get('/condition/')
self.assertFullResponse(response) self.assertFullResponse(response)
self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
response = self.client.get('/condition/') response = self.client.get('/condition/')
self.assertFullResponse(response) self.assertFullResponse(response)
self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
response = self.client.get('/condition/') response = self.client.get('/condition/')
self.assertFullResponse(response) self.assertFullResponse(response)
def test_both_headers_2(self): def test_both_headers_2(self):
self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = LAST_MODIFIED_STR self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % ETAG self.client.defaults['HTTP_IF_MATCH'] = ETAG
response = self.client.get('/condition/') response = self.client.get('/condition/')
self.assertFullResponse(response) self.assertFullResponse(response)
self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % EXPIRED_ETAG self.client.defaults['HTTP_IF_MATCH'] = ETAG
response = self.client.get('/condition/')
self.assertEqual(response.status_code, 412)
self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % EXPIRED_ETAG
response = self.client.get('/condition/') response = self.client.get('/condition/')
self.assertEqual(response.status_code, 412) self.assertEqual(response.status_code, 412)
self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % ETAG self.client.defaults['HTTP_IF_MATCH'] = EXPIRED_ETAG
response = self.client.get('/condition/')
self.assertEqual(response.status_code, 412)
self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_MATCH'] = EXPIRED_ETAG
response = self.client.get('/condition/') response = self.client.get('/condition/')
self.assertEqual(response.status_code, 412) self.assertEqual(response.status_code, 412)
@ -134,7 +134,7 @@ class ConditionalGet(SimpleTestCase):
self.assertFullResponse(response, check_last_modified=False) self.assertFullResponse(response, check_last_modified=False)
def test_single_condition_2(self): def test_single_condition_2(self):
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
response = self.client.get('/condition/etag/') response = self.client.get('/condition/etag/')
self.assertNotModified(response) self.assertNotModified(response)
response = self.client.get('/condition/last_modified/') response = self.client.get('/condition/last_modified/')
@ -146,7 +146,7 @@ class ConditionalGet(SimpleTestCase):
self.assertFullResponse(response, check_etag=False) self.assertFullResponse(response, check_etag=False)
def test_single_condition_4(self): def test_single_condition_4(self):
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
response = self.client.get('/condition/etag/') response = self.client.get('/condition/etag/')
self.assertFullResponse(response, check_last_modified=False) self.assertFullResponse(response, check_last_modified=False)
@ -158,7 +158,7 @@ class ConditionalGet(SimpleTestCase):
self.assertFullResponse(response, check_last_modified=False) self.assertFullResponse(response, check_last_modified=False)
def test_single_condition_6(self): def test_single_condition_6(self):
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
response = self.client.get('/condition/etag2/') response = self.client.get('/condition/etag2/')
self.assertNotModified(response) self.assertNotModified(response)
response = self.client.get('/condition/last_modified2/') response = self.client.get('/condition/last_modified2/')
@ -188,7 +188,34 @@ class ConditionalGet(SimpleTestCase):
response = self.client.head('/condition/') response = self.client.head('/condition/')
self.assertNotModified(response) self.assertNotModified(response)
def test_unquoted(self):
"""
The same quoted ETag should be set on the header regardless of whether
etag_func() in condition() returns a quoted or an unquoted ETag.
"""
response_quoted = self.client.get('/condition/etag/')
response_unquoted = self.client.get('/condition/unquoted_etag/')
self.assertEqual(response_quoted['ETag'], response_unquoted['ETag'])
# It's possible that the matching algorithm could use the wrong value even
# if the ETag header is set correctly correctly (as tested by
# test_unquoted()), so check that the unquoted value is matched.
def test_unquoted_if_none_match(self):
self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
response = self.client.get('/condition/unquoted_etag/')
self.assertNotModified(response)
self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
response = self.client.get('/condition/unquoted_etag/')
self.assertFullResponse(response, check_last_modified=False)
def test_all_if_none_match(self):
self.client.defaults['HTTP_IF_NONE_MATCH'] = '*'
response = self.client.get('/condition/etag/')
self.assertNotModified(response)
response = self.client.get('/condition/no_etag/')
self.assertFullResponse(response, check_last_modified=False, check_etag=False)
def test_invalid_etag(self): def test_invalid_etag(self):
self.client.defaults['HTTP_IF_NONE_MATCH'] = r'"\"' self.client.defaults['HTTP_IF_NONE_MATCH'] = '"""'
response = self.client.get('/condition/etag/') response = self.client.get('/condition/etag/')
self.assertFullResponse(response, check_last_modified=False) self.assertFullResponse(response, check_last_modified=False)

View File

@ -8,4 +8,6 @@ urlpatterns = [
url('^condition/last_modified2/$', views.last_modified_view2), url('^condition/last_modified2/$', views.last_modified_view2),
url('^condition/etag/$', views.etag_view1), url('^condition/etag/$', views.etag_view1),
url('^condition/etag2/$', views.etag_view2), url('^condition/etag2/$', views.etag_view2),
url('^condition/unquoted_etag/$', views.etag_view_unquoted),
url('^condition/no_etag/$', views.etag_view_none),
] ]

View File

@ -27,3 +27,19 @@ etag_view1 = condition(etag_func=lambda r: ETAG)(etag_view1)
def etag_view2(request): def etag_view2(request):
return HttpResponse(FULL_RESPONSE) return HttpResponse(FULL_RESPONSE)
etag_view2 = etag(lambda r: ETAG)(etag_view2) etag_view2 = etag(lambda r: ETAG)(etag_view2)
@condition(etag_func=lambda r: ETAG.strip('"'))
def etag_view_unquoted(request):
"""
Use an etag_func() that returns an unquoted ETag.
"""
return HttpResponse(FULL_RESPONSE)
@condition(etag_func=lambda r: None)
def etag_view_none(request):
"""
Use an etag_func() that returns None, as opposed to setting etag_func=None.
"""
return HttpResponse(FULL_RESPONSE)

View File

@ -513,11 +513,6 @@ class ConditionalGetMiddlewareTest(SimpleTestCase):
self.assertEqual(self.resp.status_code, 200) self.assertEqual(self.resp.status_code, 200)
def test_if_none_match_and_same_etag(self): def test_if_none_match_and_same_etag(self):
self.req.META['HTTP_IF_NONE_MATCH'] = self.resp['ETag'] = 'spam'
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertEqual(self.resp.status_code, 304)
def test_if_none_match_and_same_etag_with_quotes(self):
self.req.META['HTTP_IF_NONE_MATCH'] = self.resp['ETag'] = '"spam"' self.req.META['HTTP_IF_NONE_MATCH'] = self.resp['ETag'] = '"spam"'
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertEqual(self.resp.status_code, 304) self.assertEqual(self.resp.status_code, 304)

View File

@ -208,14 +208,19 @@ class TestUtilsHttp(unittest.TestCase):
class ETagProcessingTests(unittest.TestCase): class ETagProcessingTests(unittest.TestCase):
def test_parsing(self): def test_parsing(self):
etags = http.parse_etags(r'"", "etag", "e\"t\"ag", "e\\tag", W/"weak"') self.assertEqual(
self.assertEqual(etags, ['', 'etag', 'e"t"ag', r'e\tag', 'weak']) http.parse_etags(r'"" , "etag", "e\\tag", W/"weak"'),
['""', '"etag"', r'"e\\tag"', 'W/"weak"']
)
self.assertEqual(http.parse_etags('*'), ['*'])
# Ignore RFC 2616 ETags that are invalid according to RFC 7232.
self.assertEqual(http.parse_etags(r'"etag", "e\"t\"ag"'), ['"etag"'])
def test_quoting(self): def test_quoting(self):
original_etag = r'e\t"ag' self.assertEqual(http.quote_etag('etag'), '"etag"') # unquoted
quoted_etag = http.quote_etag(original_etag) self.assertEqual(http.quote_etag('"etag"'), '"etag"') # quoted
self.assertEqual(quoted_etag, r'"e\\t\"ag"') self.assertEqual(http.quote_etag('W/"etag"'), 'W/"etag"') # quoted, weak
self.assertEqual(http.unquote_etag(quoted_etag), original_etag)
class HttpDateProcessingTests(unittest.TestCase): class HttpDateProcessingTests(unittest.TestCase):