From 4ef0e019b7dd3d2bf93b5c705b3b7df9cdb77561 Mon Sep 17 00:00:00 2001 From: Kevin Christopher Henry Date: Thu, 1 Sep 2016 09:32:20 -0400 Subject: [PATCH] Fixed #27083 -- Added support for weak ETags. --- django/middleware/common.py | 3 +- django/middleware/http.py | 4 +- django/utils/http.py | 45 +++++++------ django/views/decorators/http.py | 20 +++--- docs/releases/1.11.txt | 3 + docs/topics/conditional-view-processing.txt | 8 +++ tests/conditional_processing/tests.py | 75 ++++++++++++++------- tests/conditional_processing/urls.py | 2 + tests/conditional_processing/views.py | 16 +++++ tests/middleware/tests.py | 5 -- tests/utils_tests/test_http.py | 17 +++-- 11 files changed, 130 insertions(+), 68 deletions(-) diff --git a/django/middleware/common.py b/django/middleware/common.py index 3d55baebde..dc89bcb9fa 100644 --- a/django/middleware/common.py +++ b/django/middleware/common.py @@ -10,7 +10,6 @@ from django.utils.cache import ( ) from django.utils.deprecation import MiddlewareMixin from django.utils.encoding import force_text -from django.utils.http import unquote_etag from django.utils.six.moves.urllib.parse import urlparse @@ -122,7 +121,7 @@ class CommonMiddleware(MiddlewareMixin): if response.has_header('ETag'): return get_conditional_response( request, - etag=unquote_etag(response['ETag']), + etag=response['ETag'], response=response, ) # Add the Content-Length header to non-streaming responses if not diff --git a/django/middleware/http.py b/django/middleware/http.py index a3030463c4..83c0f866e4 100644 --- a/django/middleware/http.py +++ b/django/middleware/http.py @@ -1,6 +1,6 @@ from django.utils.cache import get_conditional_response from django.utils.deprecation import MiddlewareMixin -from django.utils.http import http_date, parse_http_date_safe, unquote_etag +from django.utils.http import http_date, parse_http_date_safe class ConditionalGetMiddleware(MiddlewareMixin): @@ -24,7 +24,7 @@ class ConditionalGetMiddleware(MiddlewareMixin): if etag or last_modified: return get_conditional_response( request, - etag=unquote_etag(etag), + etag=etag, last_modified=last_modified, response=response, ) diff --git a/django/utils/http.py b/django/utils/http.py index c6285e6f40..cba2a08095 100644 --- a/django/utils/http.py +++ b/django/utils/http.py @@ -21,7 +21,15 @@ from django.utils.six.moves.urllib.parse import ( urlparse, ) -ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"') +# based on RFC 7232, Appendix C +ETAG_MATCH = re.compile(r''' + \A( # start of string and capture group + (?:W/)? # optional weak indicator + " # opening quote + [^"]* # any sequence of non-quote characters + " # end quote + )\Z # end of string and capture group +''', re.X) MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split() __D = r'(?P\d{2})' @@ -234,30 +242,27 @@ def urlsafe_base64_decode(s): def parse_etags(etag_str): """ - Parses a string with one or several etags passed in If-None-Match and - If-Match headers by the rules in RFC 2616. Returns a list of etags - without surrounding double quotes (") and unescaped from \. + Parse a string of ETags given in an If-None-Match or If-Match header as + defined by RFC 7232. Return a list of quoted ETags, or ['*'] if all ETags + should be matched. """ - etags = ETAG_MATCH.findall(etag_str) - if not etags: - # etag_str has wrong format, treat it as an opaque string then - return [etag_str] - etags = [e.encode('ascii').decode('unicode_escape') for e in etags] - return etags + if etag_str.strip() == '*': + return ['*'] + else: + # Parse each ETag individually, and return any that are valid. + etag_matches = (ETAG_MATCH.match(etag.strip()) for etag in etag_str.split(',')) + return [match.group(1) for match in etag_matches if match] -def quote_etag(etag): +def quote_etag(etag_str): """ - Wraps a string in double quotes escaping contents as necessary. + If the provided string is already a quoted ETag, return it. Otherwise, wrap + the string in quotes, making it a strong ETag. """ - return '"%s"' % etag.replace('\\', '\\\\').replace('"', '\\"') - - -def unquote_etag(etag): - """ - Unquote an ETag string; i.e. revert quote_etag(). - """ - return etag.strip('"').replace('\\"', '"').replace('\\\\', '\\') if etag else etag + if ETAG_MATCH.match(etag_str): + return etag_str + else: + return '"%s"' % etag_str def is_same_domain(host, pattern): diff --git a/django/views/decorators/http.py b/django/views/decorators/http.py index a5144421d5..846d3921cf 100644 --- a/django/views/decorators/http.py +++ b/django/views/decorators/http.py @@ -62,16 +62,16 @@ def condition(etag_func=None, last_modified_func=None): None if the resource doesn't exist), while the last_modified function should return a datetime object (or None if the resource doesn't exist). - If both parameters are provided, all the preconditions must be met before - the view is processed. + The ETag function should return a complete ETag, including quotes (e.g. + '"etag"'), since that's the only way to distinguish between weak and strong + ETags. If an unquoted ETag is returned (e.g. 'etag'), it will be converted + to a strong ETag by adding quotes. This decorator will either pass control to the wrapped view function or - return an HTTP 304 response (unmodified) or 412 response (preconditions - failed), depending upon the request method. - - Any behavior marked as "undefined" in the HTTP spec (e.g. If-none-match - plus If-modified-since headers) will result in the view function being - called. + return an HTTP 304 response (unmodified) or 412 response (precondition + failed), depending upon the request method. In either case, it will add the + generated ETag and Last-Modified headers to the response if it doesn't + already have them. """ def decorator(func): @wraps(func, assigned=available_attrs(func)) @@ -83,7 +83,9 @@ def condition(etag_func=None, last_modified_func=None): if dt: return timegm(dt.utctimetuple()) + # The value from etag_func() could be quoted or unquoted. res_etag = etag_func(request, *args, **kwargs) if etag_func else None + res_etag = quote_etag(res_etag) if res_etag is not None else None res_last_modified = get_last_modified() response = get_conditional_response( @@ -99,7 +101,7 @@ def condition(etag_func=None, last_modified_func=None): if res_last_modified and not response.has_header('Last-Modified'): response['Last-Modified'] = http_date(res_last_modified) if res_etag and not response.has_header('ETag'): - response['ETag'] = quote_etag(res_etag) + response['ETag'] = res_etag return response diff --git a/docs/releases/1.11.txt b/docs/releases/1.11.txt index afd24e6613..49d0516b99 100644 --- a/docs/releases/1.11.txt +++ b/docs/releases/1.11.txt @@ -491,6 +491,9 @@ Miscellaneous * The admin's widget for ``IntegerField`` uses ``type="number"`` rather than ``type="text"``. +* ETags are now parsed according to the :rfc:`7232` Conditional Requests + specification rather than the syntax from :rfc:`2616`. + .. _deprecated-features-1.11: Features deprecated in 1.11 diff --git a/docs/topics/conditional-view-processing.txt b/docs/topics/conditional-view-processing.txt index 7efd8ac636..7398b3fd05 100644 --- a/docs/topics/conditional-view-processing.txt +++ b/docs/topics/conditional-view-processing.txt @@ -66,6 +66,14 @@ last time the resource was modified, or ``None`` if the resource doesn't exist. The function passed to the ``etag`` decorator should return a string representing the `ETag`_ for the resource, or ``None`` if it doesn't exist. +.. versionchanged:: 1.11 + + In older versions, the return value from ``etag_func()`` was interpreted as + the unquoted part of the ETag. That prevented the use of weak ETags, which + have the format ``W/""``. The return value is now expected to be + an ETag as defined by the specification (including the quotes), although + the unquoted format is also accepted for backwards compatibility. + Using this feature usefully is probably best explained with an example. Suppose you have this pair of models, representing a simple blog system:: diff --git a/tests/conditional_processing/tests.py b/tests/conditional_processing/tests.py index 3a67817683..2faacbf6e2 100644 --- a/tests/conditional_processing/tests.py +++ b/tests/conditional_processing/tests.py @@ -11,8 +11,8 @@ LAST_MODIFIED_STR = 'Sun, 21 Oct 2007 23:21:47 GMT' LAST_MODIFIED_NEWER_STR = 'Mon, 18 Oct 2010 16:56:23 GMT' LAST_MODIFIED_INVALID_STR = 'Mon, 32 Oct 2010 16:56:23 GMT' EXPIRED_LAST_MODIFIED_STR = 'Sat, 20 Oct 2007 23:21:47 GMT' -ETAG = 'b4246ffc4f62314ca13147c9d4f76974' -EXPIRED_ETAG = '7fae4cd4b0f81e7d2914700043aa8ed6' +ETAG = '"b4246ffc4f62314ca13147c9d4f76974"' +EXPIRED_ETAG = '"7fae4cd4b0f81e7d2914700043aa8ed6"' @override_settings(ROOT_URLCONF='conditional_processing.urls') @@ -24,7 +24,7 @@ class ConditionalGet(SimpleTestCase): if check_last_modified: self.assertEqual(response['Last-Modified'], LAST_MODIFIED_STR) if check_etag: - self.assertEqual(response['ETag'], '"%s"' % ETAG) + self.assertEqual(response['ETag'], ETAG) def assertNotModified(self, response): self.assertEqual(response.status_code, 304) @@ -63,66 +63,66 @@ class ConditionalGet(SimpleTestCase): self.assertEqual(response.status_code, 412) def test_if_none_match(self): - self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG + self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG response = self.client.get('/condition/') self.assertNotModified(response) - self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG + self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG response = self.client.get('/condition/') self.assertFullResponse(response) # Several etags in If-None-Match is a bit exotic but why not? - self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s", "%s"' % (ETAG, EXPIRED_ETAG) + self.client.defaults['HTTP_IF_NONE_MATCH'] = '%s, %s' % (ETAG, EXPIRED_ETAG) response = self.client.get('/condition/') self.assertNotModified(response) def test_if_match(self): - self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % ETAG + self.client.defaults['HTTP_IF_MATCH'] = ETAG response = self.client.put('/condition/etag/') self.assertEqual(response.status_code, 200) - self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % EXPIRED_ETAG + self.client.defaults['HTTP_IF_MATCH'] = EXPIRED_ETAG response = self.client.put('/condition/etag/') self.assertEqual(response.status_code, 412) def test_both_headers(self): # see http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.4 self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR - self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG + self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG response = self.client.get('/condition/') self.assertNotModified(response) self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR - self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG + self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG response = self.client.get('/condition/') self.assertFullResponse(response) self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR - self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG + self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG response = self.client.get('/condition/') self.assertFullResponse(response) self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR - self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG + self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG response = self.client.get('/condition/') self.assertFullResponse(response) def test_both_headers_2(self): self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = LAST_MODIFIED_STR - self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % ETAG + self.client.defaults['HTTP_IF_MATCH'] = ETAG response = self.client.get('/condition/') self.assertFullResponse(response) self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR - self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % EXPIRED_ETAG - response = self.client.get('/condition/') - self.assertEqual(response.status_code, 412) - - self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = LAST_MODIFIED_STR - self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % EXPIRED_ETAG + self.client.defaults['HTTP_IF_MATCH'] = ETAG response = self.client.get('/condition/') self.assertEqual(response.status_code, 412) self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR - self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % ETAG + self.client.defaults['HTTP_IF_MATCH'] = EXPIRED_ETAG + response = self.client.get('/condition/') + self.assertEqual(response.status_code, 412) + + self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = LAST_MODIFIED_STR + self.client.defaults['HTTP_IF_MATCH'] = EXPIRED_ETAG response = self.client.get('/condition/') self.assertEqual(response.status_code, 412) @@ -134,7 +134,7 @@ class ConditionalGet(SimpleTestCase): self.assertFullResponse(response, check_last_modified=False) def test_single_condition_2(self): - self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG + self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG response = self.client.get('/condition/etag/') self.assertNotModified(response) response = self.client.get('/condition/last_modified/') @@ -146,7 +146,7 @@ class ConditionalGet(SimpleTestCase): self.assertFullResponse(response, check_etag=False) def test_single_condition_4(self): - self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG + self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG response = self.client.get('/condition/etag/') self.assertFullResponse(response, check_last_modified=False) @@ -158,7 +158,7 @@ class ConditionalGet(SimpleTestCase): self.assertFullResponse(response, check_last_modified=False) def test_single_condition_6(self): - self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG + self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG response = self.client.get('/condition/etag2/') self.assertNotModified(response) response = self.client.get('/condition/last_modified2/') @@ -188,7 +188,34 @@ class ConditionalGet(SimpleTestCase): response = self.client.head('/condition/') self.assertNotModified(response) + def test_unquoted(self): + """ + The same quoted ETag should be set on the header regardless of whether + etag_func() in condition() returns a quoted or an unquoted ETag. + """ + response_quoted = self.client.get('/condition/etag/') + response_unquoted = self.client.get('/condition/unquoted_etag/') + self.assertEqual(response_quoted['ETag'], response_unquoted['ETag']) + + # It's possible that the matching algorithm could use the wrong value even + # if the ETag header is set correctly correctly (as tested by + # test_unquoted()), so check that the unquoted value is matched. + def test_unquoted_if_none_match(self): + self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG + response = self.client.get('/condition/unquoted_etag/') + self.assertNotModified(response) + self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG + response = self.client.get('/condition/unquoted_etag/') + self.assertFullResponse(response, check_last_modified=False) + + def test_all_if_none_match(self): + self.client.defaults['HTTP_IF_NONE_MATCH'] = '*' + response = self.client.get('/condition/etag/') + self.assertNotModified(response) + response = self.client.get('/condition/no_etag/') + self.assertFullResponse(response, check_last_modified=False, check_etag=False) + def test_invalid_etag(self): - self.client.defaults['HTTP_IF_NONE_MATCH'] = r'"\"' + self.client.defaults['HTTP_IF_NONE_MATCH'] = '"""' response = self.client.get('/condition/etag/') self.assertFullResponse(response, check_last_modified=False) diff --git a/tests/conditional_processing/urls.py b/tests/conditional_processing/urls.py index 2492a950a4..c49835264e 100644 --- a/tests/conditional_processing/urls.py +++ b/tests/conditional_processing/urls.py @@ -8,4 +8,6 @@ urlpatterns = [ url('^condition/last_modified2/$', views.last_modified_view2), url('^condition/etag/$', views.etag_view1), url('^condition/etag2/$', views.etag_view2), + url('^condition/unquoted_etag/$', views.etag_view_unquoted), + url('^condition/no_etag/$', views.etag_view_none), ] diff --git a/tests/conditional_processing/views.py b/tests/conditional_processing/views.py index 3542d2bfc1..de0810c0d3 100644 --- a/tests/conditional_processing/views.py +++ b/tests/conditional_processing/views.py @@ -27,3 +27,19 @@ etag_view1 = condition(etag_func=lambda r: ETAG)(etag_view1) def etag_view2(request): return HttpResponse(FULL_RESPONSE) etag_view2 = etag(lambda r: ETAG)(etag_view2) + + +@condition(etag_func=lambda r: ETAG.strip('"')) +def etag_view_unquoted(request): + """ + Use an etag_func() that returns an unquoted ETag. + """ + return HttpResponse(FULL_RESPONSE) + + +@condition(etag_func=lambda r: None) +def etag_view_none(request): + """ + Use an etag_func() that returns None, as opposed to setting etag_func=None. + """ + return HttpResponse(FULL_RESPONSE) diff --git a/tests/middleware/tests.py b/tests/middleware/tests.py index 07e1d4ea5a..29de46841e 100644 --- a/tests/middleware/tests.py +++ b/tests/middleware/tests.py @@ -513,11 +513,6 @@ class ConditionalGetMiddlewareTest(SimpleTestCase): self.assertEqual(self.resp.status_code, 200) def test_if_none_match_and_same_etag(self): - self.req.META['HTTP_IF_NONE_MATCH'] = self.resp['ETag'] = 'spam' - self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) - self.assertEqual(self.resp.status_code, 304) - - def test_if_none_match_and_same_etag_with_quotes(self): self.req.META['HTTP_IF_NONE_MATCH'] = self.resp['ETag'] = '"spam"' self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) self.assertEqual(self.resp.status_code, 304) diff --git a/tests/utils_tests/test_http.py b/tests/utils_tests/test_http.py index e8b4e51eb3..ef4cb90ac4 100644 --- a/tests/utils_tests/test_http.py +++ b/tests/utils_tests/test_http.py @@ -208,14 +208,19 @@ class TestUtilsHttp(unittest.TestCase): class ETagProcessingTests(unittest.TestCase): def test_parsing(self): - etags = http.parse_etags(r'"", "etag", "e\"t\"ag", "e\\tag", W/"weak"') - self.assertEqual(etags, ['', 'etag', 'e"t"ag', r'e\tag', 'weak']) + self.assertEqual( + http.parse_etags(r'"" , "etag", "e\\tag", W/"weak"'), + ['""', '"etag"', r'"e\\tag"', 'W/"weak"'] + ) + self.assertEqual(http.parse_etags('*'), ['*']) + + # Ignore RFC 2616 ETags that are invalid according to RFC 7232. + self.assertEqual(http.parse_etags(r'"etag", "e\"t\"ag"'), ['"etag"']) def test_quoting(self): - original_etag = r'e\t"ag' - quoted_etag = http.quote_etag(original_etag) - self.assertEqual(quoted_etag, r'"e\\t\"ag"') - self.assertEqual(http.unquote_etag(quoted_etag), original_etag) + self.assertEqual(http.quote_etag('etag'), '"etag"') # unquoted + self.assertEqual(http.quote_etag('"etag"'), '"etag"') # quoted + self.assertEqual(http.quote_etag('W/"etag"'), 'W/"etag"') # quoted, weak class HttpDateProcessingTests(unittest.TestCase):