Fixed #22996 -- Prevented crash with unencoded query string

Thanks Jorge Carleitao for the report and Aymeric Augustin, Tim Graham
for the reviews.
This commit is contained in:
Claude Paroz 2014-07-12 19:37:59 +02:00
parent 11d9cbe2f4
commit fa02120d36
5 changed files with 43 additions and 13 deletions

View File

@ -329,8 +329,12 @@ class QueryDict(MultiValueDict):
self.encoding = encoding
if six.PY3:
if isinstance(query_string, bytes):
# query_string contains URL-encoded data, a subset of ASCII.
query_string = query_string.decode()
# query_string normally contains URL-encoded data, a subset of ASCII.
try:
query_string = query_string.decode(encoding)
except UnicodeDecodeError:
# ... but some user agents are misbehaving :-(
query_string = query_string.decode('iso-8859-1')
for key, value in parse_qsl(query_string or '',
keep_blank_values=True,
encoding=encoding):
@ -338,8 +342,12 @@ class QueryDict(MultiValueDict):
else:
for key, value in parse_qsl(query_string or '',
keep_blank_values=True):
try:
value = value.decode(encoding)
except UnicodeDecodeError:
value = value.decode('iso-8859-1')
self.appendlist(force_text(key, encoding, errors='replace'),
force_text(value, encoding, errors='replace'))
value)
self._mutable = mutable
@property

View File

@ -40,3 +40,6 @@ Bugfixes
* Fixed JavaScript errors while editing multi-geometry objects in the OpenLayers
widget (`#23137 <https://code.djangoproject.com/ticket/23137>`_,
`#23293 <https://code.djangoproject.com/ticket/23293>`_).
* Prevented a crash on Python 3 with query strings containing unencoded
non-ASCII characters (`#22996 <http://code.djangoproject.com/ticket/22996>`_).

View File

@ -1425,6 +1425,9 @@ Miscellaneous
databases, use the :djadminopt:`--database` flag to get SQL for those
models (previously they would always be included in the output).
* Decoding the query string from URLs now fallbacks to the ISO-8859-1 encoding
when the input is not valid UTF-8.
.. _deprecated-features-1.7:
Features deprecated in 1.7

View File

@ -42,14 +42,30 @@ class HandlerTests(TestCase):
self.assertEqual(response.status_code, 400)
def test_non_ascii_query_string(self):
"""Test that non-ASCII query strings are properly decoded (#20530)."""
"""
Test that non-ASCII query strings are properly decoded (#20530, #22996).
"""
environ = RequestFactory().get('/').environ
raw_query_string = 'want=café'
raw_query_strings = [
b'want=caf%C3%A9', # This is the proper way to encode 'café'
b'want=caf\xc3\xa9', # UA forgot to quote bytes
b'want=caf%E9', # UA quoted, but not in UTF-8
b'want=caf\xe9', # UA forgot to convert Latin-1 to UTF-8 and to quote (typical of MSIE)
]
got = []
for raw_query_string in raw_query_strings:
if six.PY3:
raw_query_string = raw_query_string.encode('utf-8').decode('iso-8859-1')
# Simulate http.server.BaseHTTPRequestHandler.parse_request handling of raw request
environ['QUERY_STRING'] = str(raw_query_string, 'iso-8859-1')
else:
environ['QUERY_STRING'] = raw_query_string
request = WSGIRequest(environ)
self.assertEqual(request.GET['want'], "café")
got.append(request.GET['want'])
if six.PY2:
self.assertListEqual(got, ['café', 'café', 'café', 'café'])
else:
# On Python 3, %E9 is converted to the unicode replacement character by parse_qsl
self.assertListEqual(got, ['café', 'café', 'caf\ufffd', 'café'])
def test_non_ascii_cookie(self):
"""Test that non-ASCII cookies set in JavaScript are properly decoded (#20557)."""

View File

@ -203,14 +203,14 @@ class QueryDictTests(unittest.TestCase):
def test_invalid_input_encoding(self):
"""
QueryDicts must be able to handle invalid input encoding (in this
case, bad UTF-8 encoding).
case, bad UTF-8 encoding), falling back to ISO-8859-1 decoding.
This test doesn't apply under Python 3 because the URL is a string
and not a bytestring.
"""
q = QueryDict(str(b'foo=bar&foo=\xff'))
self.assertEqual(q['foo'], '\ufffd')
self.assertEqual(q.getlist('foo'), ['bar', '\ufffd'])
self.assertEqual(q['foo'], '\xff')
self.assertEqual(q.getlist('foo'), ['bar', '\xff'])
def test_pickle(self):
q = QueryDict()