From 48c34f3336cbbc906066636a7aa35270a7b44895 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Fri, 12 Aug 2016 15:31:18 +0200 Subject: [PATCH] Fixed #26971 -- Prevented crash with non-UTF-8 incoming PATH_INFO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thanks Tim Graham and Loïc Bistuer for the reviews. --- django/core/handlers/wsgi.py | 26 +++++++------------------- django/core/servers/basehttp.py | 16 +--------------- tests/handlers/tests.py | 8 ++++++-- tests/requests/tests.py | 14 ++++++++++++-- 4 files changed, 26 insertions(+), 38 deletions(-) diff --git a/django/core/handlers/wsgi.py b/django/core/handlers/wsgi.py index 3099cfb64f..37cd028f52 100644 --- a/django/core/handlers/wsgi.py +++ b/django/core/handlers/wsgi.py @@ -2,9 +2,7 @@ from __future__ import unicode_literals import cgi import codecs -import logging import re -import sys from io import BytesIO from django import http @@ -13,10 +11,11 @@ from django.core import signals from django.core.handlers import base from django.urls import set_script_prefix from django.utils import six -from django.utils.encoding import force_str, force_text -from django.utils.functional import cached_property +from django.utils.encoding import ( + force_str, force_text, repercent_broken_unicode, +) -logger = logging.getLogger('django.request') +from django.utils.functional import cached_property # encode() and decode() expect the charset to be a native string. ISO_8859_1, UTF_8 = str('iso-8859-1'), str('utf-8') @@ -155,19 +154,8 @@ class WSGIHandler(base.BaseHandler): def __call__(self, environ, start_response): set_script_prefix(get_script_name(environ)) signals.request_started.send(sender=self.__class__, environ=environ) - try: - request = self.request_class(environ) - except UnicodeDecodeError: - logger.warning( - 'Bad Request (UnicodeDecodeError)', - exc_info=sys.exc_info(), - extra={ - 'status_code': 400, - } - ) - response = http.HttpResponseBadRequest() - else: - response = self.get_response(request) + request = self.request_class(environ) + response = self.get_response(request) response._handler_class = self.__class__ @@ -187,7 +175,7 @@ def get_path_info(environ): """ path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '/') - return path_info.decode(UTF_8) + return repercent_broken_unicode(path_info).decode(UTF_8) def get_script_name(environ): diff --git a/django/core/servers/basehttp.py b/django/core/servers/basehttp.py index 60fc09a122..b6fd7b17f6 100644 --- a/django/core/servers/basehttp.py +++ b/django/core/servers/basehttp.py @@ -15,10 +15,8 @@ import sys from wsgiref import simple_server from django.core.exceptions import ImproperlyConfigured -from django.core.handlers.wsgi import ISO_8859_1, UTF_8 from django.core.wsgi import get_wsgi_application from django.utils import six -from django.utils.encoding import uri_to_iri from django.utils.module_loading import import_string from django.utils.six.moves import socketserver @@ -139,19 +137,7 @@ class WSGIRequestHandler(simple_server.WSGIRequestHandler, object): if '_' in k: del self.headers[k] - env = super(WSGIRequestHandler, self).get_environ() - - path = self.path - if '?' in path: - path = path.partition('?')[0] - - path = uri_to_iri(path).encode(UTF_8) - # Under Python 3, non-ASCII values in the WSGI environ are arbitrarily - # decoded with ISO-8859-1. We replicate this behavior here. - # Refs comment in `get_bytes_from_wsgi()`. - env['PATH_INFO'] = path.decode(ISO_8859_1) if six.PY3 else path - - return env + return super(WSGIRequestHandler, self).get_environ() def handle(self): """Copy of WSGIRequestHandler, but with different ServerHandler""" diff --git a/tests/handlers/tests.py b/tests/handlers/tests.py index 9f01cb201a..ea7a5ba130 100644 --- a/tests/handlers/tests.py +++ b/tests/handlers/tests.py @@ -33,12 +33,16 @@ class HandlerTests(SimpleTestCase): self.assertIsNotNone(handler._request_middleware) def test_bad_path_info(self): - """Tests for bug #15672 ('request' referenced before assignment)""" + """ + A non-UTF-8 path populates PATH_INFO with an URL-encoded path and + produces a 404. + """ environ = RequestFactory().get('/').environ environ['PATH_INFO'] = b'\xed' if six.PY2 else '\xed' handler = WSGIHandler() response = handler(environ, lambda *a, **k: None) - self.assertEqual(response.status_code, 400) + # The path of the request will be encoded to '/%ED'. + self.assertEqual(response.status_code, 404) def test_non_ascii_query_string(self): """ diff --git a/tests/requests/tests.py b/tests/requests/tests.py index 0465f0a1e8..425f93a5be 100644 --- a/tests/requests/tests.py +++ b/tests/requests/tests.py @@ -173,8 +173,8 @@ class RequestsTests(SimpleTestCase): self.assertEqual(repr(request), str_prefix("")) def test_wsgirequest_path_info(self): - def wsgi_str(path_info): - path_info = path_info.encode('utf-8') # Actual URL sent by the browser (bytestring) + def wsgi_str(path_info, encoding='utf-8'): + path_info = path_info.encode(encoding) # Actual URL sent by the browser (bytestring) if six.PY3: path_info = path_info.decode('iso-8859-1') # Value in the WSGI environ dict (native string) return path_info @@ -182,6 +182,16 @@ class RequestsTests(SimpleTestCase): request = WSGIRequest({'PATH_INFO': wsgi_str("/سلام/"), 'REQUEST_METHOD': 'get', 'wsgi.input': BytesIO(b'')}) self.assertEqual(request.path, "/سلام/") + # The URL may be incorrectly encoded in a non-UTF-8 encoding (#26971) + request = WSGIRequest({ + 'PATH_INFO': wsgi_str("/café/", encoding='iso-8859-1'), + 'REQUEST_METHOD': 'get', + 'wsgi.input': BytesIO(b''), + }) + # Since it's impossible to decide the (wrong) encoding of the URL, it's + # left percent-encoded in the path. + self.assertEqual(request.path, "/caf%E9/") + def test_httprequest_location(self): request = HttpRequest() self.assertEqual(