[1.7.x] Fixed #22996 -- Prevented crash with unencoded query string
Thanks Jorge Carleitao for the report and Aymeric Augustin, Tim Graham
for the reviews.
Backport of fa02120d36
from master.
This commit is contained in:
parent
6e5e2b0e28
commit
72ad014b6a
|
@ -313,8 +313,12 @@ class QueryDict(MultiValueDict):
|
|||
self.encoding = encoding
|
||||
if six.PY3:
|
||||
if isinstance(query_string, bytes):
|
||||
# query_string contains URL-encoded data, a subset of ASCII.
|
||||
query_string = query_string.decode()
|
||||
# query_string normally contains URL-encoded data, a subset of ASCII.
|
||||
try:
|
||||
query_string = query_string.decode(encoding)
|
||||
except UnicodeDecodeError:
|
||||
# ... but some user agents are misbehaving :-(
|
||||
query_string = query_string.decode('iso-8859-1')
|
||||
for key, value in parse_qsl(query_string or '',
|
||||
keep_blank_values=True,
|
||||
encoding=encoding):
|
||||
|
@ -322,8 +326,12 @@ class QueryDict(MultiValueDict):
|
|||
else:
|
||||
for key, value in parse_qsl(query_string or '',
|
||||
keep_blank_values=True):
|
||||
try:
|
||||
value = value.decode(encoding)
|
||||
except UnicodeDecodeError:
|
||||
value = value.decode('iso-8859-1')
|
||||
self.appendlist(force_text(key, encoding, errors='replace'),
|
||||
force_text(value, encoding, errors='replace'))
|
||||
value)
|
||||
self._mutable = mutable
|
||||
|
||||
@property
|
||||
|
|
|
@ -40,3 +40,6 @@ Bugfixes
|
|||
* Fixed JavaScript errors while editing multi-geometry objects in the OpenLayers
|
||||
widget (`#23137 <https://code.djangoproject.com/ticket/23137>`_,
|
||||
`#23293 <https://code.djangoproject.com/ticket/23293>`_).
|
||||
|
||||
* Prevented a crash on Python 3 with query strings containing unencoded
|
||||
non-ASCII characters (`#22996 <http://code.djangoproject.com/ticket/22996>`_).
|
||||
|
|
|
@ -1425,6 +1425,9 @@ Miscellaneous
|
|||
databases, use the :djadminopt:`--database` flag to get SQL for those
|
||||
models (previously they would always be included in the output).
|
||||
|
||||
* Decoding the query string from URLs now fallbacks to the ISO-8859-1 encoding
|
||||
when the input is not valid UTF-8.
|
||||
|
||||
.. _deprecated-features-1.7:
|
||||
|
||||
Features deprecated in 1.7
|
||||
|
|
|
@ -42,14 +42,30 @@ class HandlerTests(TestCase):
|
|||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
def test_non_ascii_query_string(self):
|
||||
"""Test that non-ASCII query strings are properly decoded (#20530)."""
|
||||
"""
|
||||
Test that non-ASCII query strings are properly decoded (#20530, #22996).
|
||||
"""
|
||||
environ = RequestFactory().get('/').environ
|
||||
raw_query_string = 'want=café'
|
||||
raw_query_strings = [
|
||||
b'want=caf%C3%A9', # This is the proper way to encode 'café'
|
||||
b'want=caf\xc3\xa9', # UA forgot to quote bytes
|
||||
b'want=caf%E9', # UA quoted, but not in UTF-8
|
||||
b'want=caf\xe9', # UA forgot to convert Latin-1 to UTF-8 and to quote (typical of MSIE)
|
||||
]
|
||||
got = []
|
||||
for raw_query_string in raw_query_strings:
|
||||
if six.PY3:
|
||||
raw_query_string = raw_query_string.encode('utf-8').decode('iso-8859-1')
|
||||
# Simulate http.server.BaseHTTPRequestHandler.parse_request handling of raw request
|
||||
environ['QUERY_STRING'] = str(raw_query_string, 'iso-8859-1')
|
||||
else:
|
||||
environ['QUERY_STRING'] = raw_query_string
|
||||
request = WSGIRequest(environ)
|
||||
self.assertEqual(request.GET['want'], "café")
|
||||
got.append(request.GET['want'])
|
||||
if six.PY2:
|
||||
self.assertListEqual(got, ['café', 'café', 'café', 'café'])
|
||||
else:
|
||||
# On Python 3, %E9 is converted to the unicode replacement character by parse_qsl
|
||||
self.assertListEqual(got, ['café', 'café', 'caf\ufffd', 'café'])
|
||||
|
||||
def test_non_ascii_cookie(self):
|
||||
"""Test that non-ASCII cookies set in JavaScript are properly decoded (#20557)."""
|
||||
|
|
|
@ -202,14 +202,14 @@ class QueryDictTests(unittest.TestCase):
|
|||
def test_invalid_input_encoding(self):
|
||||
"""
|
||||
QueryDicts must be able to handle invalid input encoding (in this
|
||||
case, bad UTF-8 encoding).
|
||||
case, bad UTF-8 encoding), falling back to ISO-8859-1 decoding.
|
||||
|
||||
This test doesn't apply under Python 3 because the URL is a string
|
||||
and not a bytestring.
|
||||
"""
|
||||
q = QueryDict(str(b'foo=bar&foo=\xff'))
|
||||
self.assertEqual(q['foo'], '\ufffd')
|
||||
self.assertEqual(q.getlist('foo'), ['bar', '\ufffd'])
|
||||
self.assertEqual(q['foo'], '\xff')
|
||||
self.assertEqual(q.getlist('foo'), ['bar', '\xff'])
|
||||
|
||||
def test_pickle(self):
|
||||
q = QueryDict(str(''))
|
||||
|
|
Loading…
Reference in New Issue