diff --git a/django/http/request.py b/django/http/request.py index 2897df7c11..aee8a0d282 100644 --- a/django/http/request.py +++ b/django/http/request.py @@ -313,8 +313,12 @@ class QueryDict(MultiValueDict): self.encoding = encoding if six.PY3: if isinstance(query_string, bytes): - # query_string contains URL-encoded data, a subset of ASCII. - query_string = query_string.decode() + # query_string normally contains URL-encoded data, a subset of ASCII. + try: + query_string = query_string.decode(encoding) + except UnicodeDecodeError: + # ... but some user agents are misbehaving :-( + query_string = query_string.decode('iso-8859-1') for key, value in parse_qsl(query_string or '', keep_blank_values=True, encoding=encoding): @@ -322,8 +326,12 @@ class QueryDict(MultiValueDict): else: for key, value in parse_qsl(query_string or '', keep_blank_values=True): + try: + value = value.decode(encoding) + except UnicodeDecodeError: + value = value.decode('iso-8859-1') self.appendlist(force_text(key, encoding, errors='replace'), - force_text(value, encoding, errors='replace')) + value) self._mutable = mutable @property diff --git a/docs/releases/1.6.6.txt b/docs/releases/1.6.6.txt index f3ff77dca0..f98aaa4176 100644 --- a/docs/releases/1.6.6.txt +++ b/docs/releases/1.6.6.txt @@ -40,3 +40,6 @@ Bugfixes * Fixed JavaScript errors while editing multi-geometry objects in the OpenLayers widget (`#23137 `_, `#23293 `_). + +* Prevented a crash on Python 3 with query strings containing unencoded + non-ASCII characters (`#22996 `_). diff --git a/docs/releases/1.7.txt b/docs/releases/1.7.txt index 7e0ce2921b..18884307a8 100644 --- a/docs/releases/1.7.txt +++ b/docs/releases/1.7.txt @@ -1425,6 +1425,9 @@ Miscellaneous databases, use the :djadminopt:`--database` flag to get SQL for those models (previously they would always be included in the output). +* Decoding the query string from URLs now fallbacks to the ISO-8859-1 encoding + when the input is not valid UTF-8. + .. _deprecated-features-1.7: Features deprecated in 1.7 diff --git a/tests/handlers/tests.py b/tests/handlers/tests.py index 8789f392b4..9d33e928a6 100644 --- a/tests/handlers/tests.py +++ b/tests/handlers/tests.py @@ -42,14 +42,30 @@ class HandlerTests(TestCase): self.assertEqual(response.status_code, 400) def test_non_ascii_query_string(self): - """Test that non-ASCII query strings are properly decoded (#20530).""" + """ + Test that non-ASCII query strings are properly decoded (#20530, #22996). + """ environ = RequestFactory().get('/').environ - raw_query_string = 'want=café' - if six.PY3: - raw_query_string = raw_query_string.encode('utf-8').decode('iso-8859-1') - environ['QUERY_STRING'] = raw_query_string - request = WSGIRequest(environ) - self.assertEqual(request.GET['want'], "café") + raw_query_strings = [ + b'want=caf%C3%A9', # This is the proper way to encode 'café' + b'want=caf\xc3\xa9', # UA forgot to quote bytes + b'want=caf%E9', # UA quoted, but not in UTF-8 + b'want=caf\xe9', # UA forgot to convert Latin-1 to UTF-8 and to quote (typical of MSIE) + ] + got = [] + for raw_query_string in raw_query_strings: + if six.PY3: + # Simulate http.server.BaseHTTPRequestHandler.parse_request handling of raw request + environ['QUERY_STRING'] = str(raw_query_string, 'iso-8859-1') + else: + environ['QUERY_STRING'] = raw_query_string + request = WSGIRequest(environ) + got.append(request.GET['want']) + if six.PY2: + self.assertListEqual(got, ['café', 'café', 'café', 'café']) + else: + # On Python 3, %E9 is converted to the unicode replacement character by parse_qsl + self.assertListEqual(got, ['café', 'café', 'caf\ufffd', 'café']) def test_non_ascii_cookie(self): """Test that non-ASCII cookies set in JavaScript are properly decoded (#20557).""" diff --git a/tests/httpwrappers/tests.py b/tests/httpwrappers/tests.py index 9dcc054a64..7881f2f5ea 100644 --- a/tests/httpwrappers/tests.py +++ b/tests/httpwrappers/tests.py @@ -202,14 +202,14 @@ class QueryDictTests(unittest.TestCase): def test_invalid_input_encoding(self): """ QueryDicts must be able to handle invalid input encoding (in this - case, bad UTF-8 encoding). + case, bad UTF-8 encoding), falling back to ISO-8859-1 decoding. This test doesn't apply under Python 3 because the URL is a string and not a bytestring. """ q = QueryDict(str(b'foo=bar&foo=\xff')) - self.assertEqual(q['foo'], '\ufffd') - self.assertEqual(q.getlist('foo'), ['bar', '\ufffd']) + self.assertEqual(q['foo'], '\xff') + self.assertEqual(q.getlist('foo'), ['bar', '\xff']) def test_pickle(self): q = QueryDict(str(''))