Fixed #27083 -- Added support for weak ETags.

This commit is contained in:
Kevin Christopher Henry 2016-09-01 09:32:20 -04:00 committed by Tim Graham
parent e7abb5ba86
commit 4ef0e019b7
11 changed files with 130 additions and 68 deletions

View File

@ -10,7 +10,6 @@ from django.utils.cache import (
)
from django.utils.deprecation import MiddlewareMixin
from django.utils.encoding import force_text
from django.utils.http import unquote_etag
from django.utils.six.moves.urllib.parse import urlparse
@ -122,7 +121,7 @@ class CommonMiddleware(MiddlewareMixin):
if response.has_header('ETag'):
return get_conditional_response(
request,
etag=unquote_etag(response['ETag']),
etag=response['ETag'],
response=response,
)
# Add the Content-Length header to non-streaming responses if not

View File

@ -1,6 +1,6 @@
from django.utils.cache import get_conditional_response
from django.utils.deprecation import MiddlewareMixin
from django.utils.http import http_date, parse_http_date_safe, unquote_etag
from django.utils.http import http_date, parse_http_date_safe
class ConditionalGetMiddleware(MiddlewareMixin):
@ -24,7 +24,7 @@ class ConditionalGetMiddleware(MiddlewareMixin):
if etag or last_modified:
return get_conditional_response(
request,
etag=unquote_etag(etag),
etag=etag,
last_modified=last_modified,
response=response,
)

View File

@ -21,7 +21,15 @@ from django.utils.six.moves.urllib.parse import (
urlparse,
)
ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"')
# based on RFC 7232, Appendix C
ETAG_MATCH = re.compile(r'''
\A( # start of string and capture group
(?:W/)? # optional weak indicator
" # opening quote
[^"]* # any sequence of non-quote characters
" # end quote
)\Z # end of string and capture group
''', re.X)
MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split()
__D = r'(?P<day>\d{2})'
@ -234,30 +242,27 @@ def urlsafe_base64_decode(s):
def parse_etags(etag_str):
"""
Parses a string with one or several etags passed in If-None-Match and
If-Match headers by the rules in RFC 2616. Returns a list of etags
without surrounding double quotes (") and unescaped from \<CHAR>.
Parse a string of ETags given in an If-None-Match or If-Match header as
defined by RFC 7232. Return a list of quoted ETags, or ['*'] if all ETags
should be matched.
"""
etags = ETAG_MATCH.findall(etag_str)
if not etags:
# etag_str has wrong format, treat it as an opaque string then
return [etag_str]
etags = [e.encode('ascii').decode('unicode_escape') for e in etags]
return etags
if etag_str.strip() == '*':
return ['*']
else:
# Parse each ETag individually, and return any that are valid.
etag_matches = (ETAG_MATCH.match(etag.strip()) for etag in etag_str.split(','))
return [match.group(1) for match in etag_matches if match]
def quote_etag(etag):
def quote_etag(etag_str):
"""
Wraps a string in double quotes escaping contents as necessary.
If the provided string is already a quoted ETag, return it. Otherwise, wrap
the string in quotes, making it a strong ETag.
"""
return '"%s"' % etag.replace('\\', '\\\\').replace('"', '\\"')
def unquote_etag(etag):
"""
Unquote an ETag string; i.e. revert quote_etag().
"""
return etag.strip('"').replace('\\"', '"').replace('\\\\', '\\') if etag else etag
if ETAG_MATCH.match(etag_str):
return etag_str
else:
return '"%s"' % etag_str
def is_same_domain(host, pattern):

View File

@ -62,16 +62,16 @@ def condition(etag_func=None, last_modified_func=None):
None if the resource doesn't exist), while the last_modified function
should return a datetime object (or None if the resource doesn't exist).
If both parameters are provided, all the preconditions must be met before
the view is processed.
The ETag function should return a complete ETag, including quotes (e.g.
'"etag"'), since that's the only way to distinguish between weak and strong
ETags. If an unquoted ETag is returned (e.g. 'etag'), it will be converted
to a strong ETag by adding quotes.
This decorator will either pass control to the wrapped view function or
return an HTTP 304 response (unmodified) or 412 response (preconditions
failed), depending upon the request method.
Any behavior marked as "undefined" in the HTTP spec (e.g. If-none-match
plus If-modified-since headers) will result in the view function being
called.
return an HTTP 304 response (unmodified) or 412 response (precondition
failed), depending upon the request method. In either case, it will add the
generated ETag and Last-Modified headers to the response if it doesn't
already have them.
"""
def decorator(func):
@wraps(func, assigned=available_attrs(func))
@ -83,7 +83,9 @@ def condition(etag_func=None, last_modified_func=None):
if dt:
return timegm(dt.utctimetuple())
# The value from etag_func() could be quoted or unquoted.
res_etag = etag_func(request, *args, **kwargs) if etag_func else None
res_etag = quote_etag(res_etag) if res_etag is not None else None
res_last_modified = get_last_modified()
response = get_conditional_response(
@ -99,7 +101,7 @@ def condition(etag_func=None, last_modified_func=None):
if res_last_modified and not response.has_header('Last-Modified'):
response['Last-Modified'] = http_date(res_last_modified)
if res_etag and not response.has_header('ETag'):
response['ETag'] = quote_etag(res_etag)
response['ETag'] = res_etag
return response

View File

@ -491,6 +491,9 @@ Miscellaneous
* The admin's widget for ``IntegerField`` uses ``type="number"`` rather than
``type="text"``.
* ETags are now parsed according to the :rfc:`7232` Conditional Requests
specification rather than the syntax from :rfc:`2616`.
.. _deprecated-features-1.11:
Features deprecated in 1.11

View File

@ -66,6 +66,14 @@ last time the resource was modified, or ``None`` if the resource doesn't
exist. The function passed to the ``etag`` decorator should return a string
representing the `ETag`_ for the resource, or ``None`` if it doesn't exist.
.. versionchanged:: 1.11
In older versions, the return value from ``etag_func()`` was interpreted as
the unquoted part of the ETag. That prevented the use of weak ETags, which
have the format ``W/"<string>"``. The return value is now expected to be
an ETag as defined by the specification (including the quotes), although
the unquoted format is also accepted for backwards compatibility.
Using this feature usefully is probably best explained with an example.
Suppose you have this pair of models, representing a simple blog system::

View File

@ -11,8 +11,8 @@ LAST_MODIFIED_STR = 'Sun, 21 Oct 2007 23:21:47 GMT'
LAST_MODIFIED_NEWER_STR = 'Mon, 18 Oct 2010 16:56:23 GMT'
LAST_MODIFIED_INVALID_STR = 'Mon, 32 Oct 2010 16:56:23 GMT'
EXPIRED_LAST_MODIFIED_STR = 'Sat, 20 Oct 2007 23:21:47 GMT'
ETAG = 'b4246ffc4f62314ca13147c9d4f76974'
EXPIRED_ETAG = '7fae4cd4b0f81e7d2914700043aa8ed6'
ETAG = '"b4246ffc4f62314ca13147c9d4f76974"'
EXPIRED_ETAG = '"7fae4cd4b0f81e7d2914700043aa8ed6"'
@override_settings(ROOT_URLCONF='conditional_processing.urls')
@ -24,7 +24,7 @@ class ConditionalGet(SimpleTestCase):
if check_last_modified:
self.assertEqual(response['Last-Modified'], LAST_MODIFIED_STR)
if check_etag:
self.assertEqual(response['ETag'], '"%s"' % ETAG)
self.assertEqual(response['ETag'], ETAG)
def assertNotModified(self, response):
self.assertEqual(response.status_code, 304)
@ -63,66 +63,66 @@ class ConditionalGet(SimpleTestCase):
self.assertEqual(response.status_code, 412)
def test_if_none_match(self):
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG
self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
response = self.client.get('/condition/')
self.assertNotModified(response)
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG
self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
response = self.client.get('/condition/')
self.assertFullResponse(response)
# Several etags in If-None-Match is a bit exotic but why not?
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s", "%s"' % (ETAG, EXPIRED_ETAG)
self.client.defaults['HTTP_IF_NONE_MATCH'] = '%s, %s' % (ETAG, EXPIRED_ETAG)
response = self.client.get('/condition/')
self.assertNotModified(response)
def test_if_match(self):
self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % ETAG
self.client.defaults['HTTP_IF_MATCH'] = ETAG
response = self.client.put('/condition/etag/')
self.assertEqual(response.status_code, 200)
self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % EXPIRED_ETAG
self.client.defaults['HTTP_IF_MATCH'] = EXPIRED_ETAG
response = self.client.put('/condition/etag/')
self.assertEqual(response.status_code, 412)
def test_both_headers(self):
# see http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.4
self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG
self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
response = self.client.get('/condition/')
self.assertNotModified(response)
self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG
self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
response = self.client.get('/condition/')
self.assertFullResponse(response)
self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG
self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
response = self.client.get('/condition/')
self.assertFullResponse(response)
self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG
self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
response = self.client.get('/condition/')
self.assertFullResponse(response)
def test_both_headers_2(self):
self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % ETAG
self.client.defaults['HTTP_IF_MATCH'] = ETAG
response = self.client.get('/condition/')
self.assertFullResponse(response)
self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % EXPIRED_ETAG
response = self.client.get('/condition/')
self.assertEqual(response.status_code, 412)
self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % EXPIRED_ETAG
self.client.defaults['HTTP_IF_MATCH'] = ETAG
response = self.client.get('/condition/')
self.assertEqual(response.status_code, 412)
self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % ETAG
self.client.defaults['HTTP_IF_MATCH'] = EXPIRED_ETAG
response = self.client.get('/condition/')
self.assertEqual(response.status_code, 412)
self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = LAST_MODIFIED_STR
self.client.defaults['HTTP_IF_MATCH'] = EXPIRED_ETAG
response = self.client.get('/condition/')
self.assertEqual(response.status_code, 412)
@ -134,7 +134,7 @@ class ConditionalGet(SimpleTestCase):
self.assertFullResponse(response, check_last_modified=False)
def test_single_condition_2(self):
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG
self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
response = self.client.get('/condition/etag/')
self.assertNotModified(response)
response = self.client.get('/condition/last_modified/')
@ -146,7 +146,7 @@ class ConditionalGet(SimpleTestCase):
self.assertFullResponse(response, check_etag=False)
def test_single_condition_4(self):
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG
self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
response = self.client.get('/condition/etag/')
self.assertFullResponse(response, check_last_modified=False)
@ -158,7 +158,7 @@ class ConditionalGet(SimpleTestCase):
self.assertFullResponse(response, check_last_modified=False)
def test_single_condition_6(self):
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG
self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
response = self.client.get('/condition/etag2/')
self.assertNotModified(response)
response = self.client.get('/condition/last_modified2/')
@ -188,7 +188,34 @@ class ConditionalGet(SimpleTestCase):
response = self.client.head('/condition/')
self.assertNotModified(response)
def test_unquoted(self):
"""
The same quoted ETag should be set on the header regardless of whether
etag_func() in condition() returns a quoted or an unquoted ETag.
"""
response_quoted = self.client.get('/condition/etag/')
response_unquoted = self.client.get('/condition/unquoted_etag/')
self.assertEqual(response_quoted['ETag'], response_unquoted['ETag'])
# It's possible that the matching algorithm could use the wrong value even
# if the ETag header is set correctly correctly (as tested by
# test_unquoted()), so check that the unquoted value is matched.
def test_unquoted_if_none_match(self):
self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
response = self.client.get('/condition/unquoted_etag/')
self.assertNotModified(response)
self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
response = self.client.get('/condition/unquoted_etag/')
self.assertFullResponse(response, check_last_modified=False)
def test_all_if_none_match(self):
self.client.defaults['HTTP_IF_NONE_MATCH'] = '*'
response = self.client.get('/condition/etag/')
self.assertNotModified(response)
response = self.client.get('/condition/no_etag/')
self.assertFullResponse(response, check_last_modified=False, check_etag=False)
def test_invalid_etag(self):
self.client.defaults['HTTP_IF_NONE_MATCH'] = r'"\"'
self.client.defaults['HTTP_IF_NONE_MATCH'] = '"""'
response = self.client.get('/condition/etag/')
self.assertFullResponse(response, check_last_modified=False)

View File

@ -8,4 +8,6 @@ urlpatterns = [
url('^condition/last_modified2/$', views.last_modified_view2),
url('^condition/etag/$', views.etag_view1),
url('^condition/etag2/$', views.etag_view2),
url('^condition/unquoted_etag/$', views.etag_view_unquoted),
url('^condition/no_etag/$', views.etag_view_none),
]

View File

@ -27,3 +27,19 @@ etag_view1 = condition(etag_func=lambda r: ETAG)(etag_view1)
def etag_view2(request):
return HttpResponse(FULL_RESPONSE)
etag_view2 = etag(lambda r: ETAG)(etag_view2)
@condition(etag_func=lambda r: ETAG.strip('"'))
def etag_view_unquoted(request):
"""
Use an etag_func() that returns an unquoted ETag.
"""
return HttpResponse(FULL_RESPONSE)
@condition(etag_func=lambda r: None)
def etag_view_none(request):
"""
Use an etag_func() that returns None, as opposed to setting etag_func=None.
"""
return HttpResponse(FULL_RESPONSE)

View File

@ -513,11 +513,6 @@ class ConditionalGetMiddlewareTest(SimpleTestCase):
self.assertEqual(self.resp.status_code, 200)
def test_if_none_match_and_same_etag(self):
self.req.META['HTTP_IF_NONE_MATCH'] = self.resp['ETag'] = 'spam'
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertEqual(self.resp.status_code, 304)
def test_if_none_match_and_same_etag_with_quotes(self):
self.req.META['HTTP_IF_NONE_MATCH'] = self.resp['ETag'] = '"spam"'
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertEqual(self.resp.status_code, 304)

View File

@ -208,14 +208,19 @@ class TestUtilsHttp(unittest.TestCase):
class ETagProcessingTests(unittest.TestCase):
def test_parsing(self):
etags = http.parse_etags(r'"", "etag", "e\"t\"ag", "e\\tag", W/"weak"')
self.assertEqual(etags, ['', 'etag', 'e"t"ag', r'e\tag', 'weak'])
self.assertEqual(
http.parse_etags(r'"" , "etag", "e\\tag", W/"weak"'),
['""', '"etag"', r'"e\\tag"', 'W/"weak"']
)
self.assertEqual(http.parse_etags('*'), ['*'])
# Ignore RFC 2616 ETags that are invalid according to RFC 7232.
self.assertEqual(http.parse_etags(r'"etag", "e\"t\"ag"'), ['"etag"'])
def test_quoting(self):
original_etag = r'e\t"ag'
quoted_etag = http.quote_etag(original_etag)
self.assertEqual(quoted_etag, r'"e\\t\"ag"')
self.assertEqual(http.unquote_etag(quoted_etag), original_etag)
self.assertEqual(http.quote_etag('etag'), '"etag"') # unquoted
self.assertEqual(http.quote_etag('"etag"'), '"etag"') # quoted
self.assertEqual(http.quote_etag('W/"etag"'), 'W/"etag"') # quoted, weak
class HttpDateProcessingTests(unittest.TestCase):