[1.6.x] Fixed #22996 -- Prevented crash with unencoded query string

Thanks Jorge Carleitao for the report and Aymeric Augustin, Tim Graham
for the reviews.
Backport of fa02120d36 from master.
This commit is contained in:
Claude Paroz 2014-08-19 22:38:45 +02:00
parent c0e49ef767
commit 9f9fdc4b0a
4 changed files with 33 additions and 10 deletions

View File

@ -134,7 +134,7 @@ class WSGIRequest(http.HttpRequest):
# The WSGI spec says 'QUERY_STRING' may be absent.
raw_query_string = self.environ.get('QUERY_STRING', str(''))
if six.PY3:
raw_query_string = raw_query_string.encode('iso-8859-1').decode('utf-8')
raw_query_string = raw_query_string.encode('iso-8859-1')
self._get = http.QueryDict(raw_query_string, encoding=self._encoding)
return self._get

View File

@ -290,8 +290,12 @@ class QueryDict(MultiValueDict):
self.encoding = encoding
if six.PY3:
if isinstance(query_string, bytes):
# query_string contains URL-encoded data, a subset of ASCII.
query_string = query_string.decode()
# query_string normally contains URL-encoded data, a subset of ASCII.
try:
query_string = query_string.decode(encoding)
except UnicodeDecodeError:
# ... but some user agents are misbehaving :-(
query_string = query_string.decode('iso-8859-1')
for key, value in parse_qsl(query_string or '',
keep_blank_values=True,
encoding=encoding):

View File

@ -40,3 +40,6 @@ Bugfixes
* Fixed JavaScript errors while editing multi-geometry objects in the OpenLayers
widget (`#23137 <https://code.djangoproject.com/ticket/23137>`_,
`#23293 <https://code.djangoproject.com/ticket/23293>`_).
* Prevented a crash on Python 3 with query strings containing unencoded
non-ASCII characters (`#22996 <http://code.djangoproject.com/ticket/22996>`_).

View File

@ -42,14 +42,30 @@ class HandlerTests(TestCase):
self.assertEqual(response.status_code, 400)
def test_non_ascii_query_string(self):
"""Test that non-ASCII query strings are properly decoded (#20530)."""
"""
Test that non-ASCII query strings are properly decoded (#20530, #22996).
"""
environ = RequestFactory().get('/').environ
raw_query_string = 'want=café'
raw_query_strings = [
b'want=caf%C3%A9', # This is the proper way to encode 'café'
b'want=caf\xc3\xa9', # UA forgot to quote bytes
b'want=caf%E9', # UA quoted, but not in UTF-8
b'want=caf\xe9', # UA forgot to convert Latin-1 to UTF-8 and to quote (typical of MSIE)
]
got = []
for raw_query_string in raw_query_strings:
if six.PY3:
raw_query_string = raw_query_string.encode('utf-8').decode('iso-8859-1')
# Simulate http.server.BaseHTTPRequestHandler.parse_request handling of raw request
environ['QUERY_STRING'] = str(raw_query_string, 'iso-8859-1')
else:
environ['QUERY_STRING'] = raw_query_string
request = WSGIRequest(environ)
self.assertEqual(request.GET['want'], "café")
got.append(request.GET['want'])
if six.PY2:
self.assertListEqual(got, ['café', 'café', 'caf\ufffd', 'caf\ufffd'])
else:
# On Python 3, %E9 is converted to the unicode replacement character by parse_qsl
self.assertListEqual(got, ['café', 'café', 'caf\ufffd', 'café'])
def test_non_ascii_cookie(self):
"""Test that non-ASCII cookies set in JavaScript are properly decoded (#20557)."""