diff --git a/django/core/handlers/wsgi.py b/django/core/handlers/wsgi.py index adc8804b80b..f9b2633bb6a 100644 --- a/django/core/handlers/wsgi.py +++ b/django/core/handlers/wsgi.py @@ -134,7 +134,7 @@ class WSGIRequest(http.HttpRequest): # The WSGI spec says 'QUERY_STRING' may be absent. raw_query_string = self.environ.get('QUERY_STRING', str('')) if six.PY3: - raw_query_string = raw_query_string.encode('iso-8859-1').decode('utf-8') + raw_query_string = raw_query_string.encode('iso-8859-1') self._get = http.QueryDict(raw_query_string, encoding=self._encoding) return self._get diff --git a/django/http/request.py b/django/http/request.py index deb0724fc77..1a9a038b7e2 100644 --- a/django/http/request.py +++ b/django/http/request.py @@ -290,8 +290,12 @@ class QueryDict(MultiValueDict): self.encoding = encoding if six.PY3: if isinstance(query_string, bytes): - # query_string contains URL-encoded data, a subset of ASCII. - query_string = query_string.decode() + # query_string normally contains URL-encoded data, a subset of ASCII. + try: + query_string = query_string.decode(encoding) + except UnicodeDecodeError: + # ... but some user agents are misbehaving :-( + query_string = query_string.decode('iso-8859-1') for key, value in parse_qsl(query_string or '', keep_blank_values=True, encoding=encoding): diff --git a/docs/releases/1.6.6.txt b/docs/releases/1.6.6.txt index f3ff77dca0e..f98aaa41763 100644 --- a/docs/releases/1.6.6.txt +++ b/docs/releases/1.6.6.txt @@ -40,3 +40,6 @@ Bugfixes * Fixed JavaScript errors while editing multi-geometry objects in the OpenLayers widget (`#23137 `_, `#23293 `_). + +* Prevented a crash on Python 3 with query strings containing unencoded + non-ASCII characters (`#22996 `_). diff --git a/tests/handlers/tests.py b/tests/handlers/tests.py index f5dc7c8531b..572192efe96 100644 --- a/tests/handlers/tests.py +++ b/tests/handlers/tests.py @@ -42,14 +42,30 @@ class HandlerTests(TestCase): self.assertEqual(response.status_code, 400) def test_non_ascii_query_string(self): - """Test that non-ASCII query strings are properly decoded (#20530).""" + """ + Test that non-ASCII query strings are properly decoded (#20530, #22996). + """ environ = RequestFactory().get('/').environ - raw_query_string = 'want=café' - if six.PY3: - raw_query_string = raw_query_string.encode('utf-8').decode('iso-8859-1') - environ['QUERY_STRING'] = raw_query_string - request = WSGIRequest(environ) - self.assertEqual(request.GET['want'], "café") + raw_query_strings = [ + b'want=caf%C3%A9', # This is the proper way to encode 'café' + b'want=caf\xc3\xa9', # UA forgot to quote bytes + b'want=caf%E9', # UA quoted, but not in UTF-8 + b'want=caf\xe9', # UA forgot to convert Latin-1 to UTF-8 and to quote (typical of MSIE) + ] + got = [] + for raw_query_string in raw_query_strings: + if six.PY3: + # Simulate http.server.BaseHTTPRequestHandler.parse_request handling of raw request + environ['QUERY_STRING'] = str(raw_query_string, 'iso-8859-1') + else: + environ['QUERY_STRING'] = raw_query_string + request = WSGIRequest(environ) + got.append(request.GET['want']) + if six.PY2: + self.assertListEqual(got, ['café', 'café', 'caf\ufffd', 'caf\ufffd']) + else: + # On Python 3, %E9 is converted to the unicode replacement character by parse_qsl + self.assertListEqual(got, ['café', 'café', 'caf\ufffd', 'café']) def test_non_ascii_cookie(self): """Test that non-ASCII cookies set in JavaScript are properly decoded (#20557)."""