Fixed #26971 -- Prevented crash with non-UTF-8 incoming PATH_INFO

Thanks Tim Graham and Loïc Bistuer for the reviews.
This commit is contained in:
Claude Paroz 2016-08-12 15:31:18 +02:00
parent 384f89f8f8
commit 48c34f3336
4 changed files with 26 additions and 38 deletions

View File

@ -2,9 +2,7 @@ from __future__ import unicode_literals
import cgi import cgi
import codecs import codecs
import logging
import re import re
import sys
from io import BytesIO from io import BytesIO
from django import http from django import http
@ -13,10 +11,11 @@ from django.core import signals
from django.core.handlers import base from django.core.handlers import base
from django.urls import set_script_prefix from django.urls import set_script_prefix
from django.utils import six from django.utils import six
from django.utils.encoding import force_str, force_text from django.utils.encoding import (
from django.utils.functional import cached_property force_str, force_text, repercent_broken_unicode,
)
logger = logging.getLogger('django.request') from django.utils.functional import cached_property
# encode() and decode() expect the charset to be a native string. # encode() and decode() expect the charset to be a native string.
ISO_8859_1, UTF_8 = str('iso-8859-1'), str('utf-8') ISO_8859_1, UTF_8 = str('iso-8859-1'), str('utf-8')
@ -155,19 +154,8 @@ class WSGIHandler(base.BaseHandler):
def __call__(self, environ, start_response): def __call__(self, environ, start_response):
set_script_prefix(get_script_name(environ)) set_script_prefix(get_script_name(environ))
signals.request_started.send(sender=self.__class__, environ=environ) signals.request_started.send(sender=self.__class__, environ=environ)
try: request = self.request_class(environ)
request = self.request_class(environ) response = self.get_response(request)
except UnicodeDecodeError:
logger.warning(
'Bad Request (UnicodeDecodeError)',
exc_info=sys.exc_info(),
extra={
'status_code': 400,
}
)
response = http.HttpResponseBadRequest()
else:
response = self.get_response(request)
response._handler_class = self.__class__ response._handler_class = self.__class__
@ -187,7 +175,7 @@ def get_path_info(environ):
""" """
path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '/') path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '/')
return path_info.decode(UTF_8) return repercent_broken_unicode(path_info).decode(UTF_8)
def get_script_name(environ): def get_script_name(environ):

View File

@ -15,10 +15,8 @@ import sys
from wsgiref import simple_server from wsgiref import simple_server
from django.core.exceptions import ImproperlyConfigured from django.core.exceptions import ImproperlyConfigured
from django.core.handlers.wsgi import ISO_8859_1, UTF_8
from django.core.wsgi import get_wsgi_application from django.core.wsgi import get_wsgi_application
from django.utils import six from django.utils import six
from django.utils.encoding import uri_to_iri
from django.utils.module_loading import import_string from django.utils.module_loading import import_string
from django.utils.six.moves import socketserver from django.utils.six.moves import socketserver
@ -139,19 +137,7 @@ class WSGIRequestHandler(simple_server.WSGIRequestHandler, object):
if '_' in k: if '_' in k:
del self.headers[k] del self.headers[k]
env = super(WSGIRequestHandler, self).get_environ() return super(WSGIRequestHandler, self).get_environ()
path = self.path
if '?' in path:
path = path.partition('?')[0]
path = uri_to_iri(path).encode(UTF_8)
# Under Python 3, non-ASCII values in the WSGI environ are arbitrarily
# decoded with ISO-8859-1. We replicate this behavior here.
# Refs comment in `get_bytes_from_wsgi()`.
env['PATH_INFO'] = path.decode(ISO_8859_1) if six.PY3 else path
return env
def handle(self): def handle(self):
"""Copy of WSGIRequestHandler, but with different ServerHandler""" """Copy of WSGIRequestHandler, but with different ServerHandler"""

View File

@ -33,12 +33,16 @@ class HandlerTests(SimpleTestCase):
self.assertIsNotNone(handler._request_middleware) self.assertIsNotNone(handler._request_middleware)
def test_bad_path_info(self): def test_bad_path_info(self):
"""Tests for bug #15672 ('request' referenced before assignment)""" """
A non-UTF-8 path populates PATH_INFO with an URL-encoded path and
produces a 404.
"""
environ = RequestFactory().get('/').environ environ = RequestFactory().get('/').environ
environ['PATH_INFO'] = b'\xed' if six.PY2 else '\xed' environ['PATH_INFO'] = b'\xed' if six.PY2 else '\xed'
handler = WSGIHandler() handler = WSGIHandler()
response = handler(environ, lambda *a, **k: None) response = handler(environ, lambda *a, **k: None)
self.assertEqual(response.status_code, 400) # The path of the request will be encoded to '/%ED'.
self.assertEqual(response.status_code, 404)
def test_non_ascii_query_string(self): def test_non_ascii_query_string(self):
""" """

View File

@ -173,8 +173,8 @@ class RequestsTests(SimpleTestCase):
self.assertEqual(repr(request), str_prefix("<WSGIRequest: GET '/somepath/'>")) self.assertEqual(repr(request), str_prefix("<WSGIRequest: GET '/somepath/'>"))
def test_wsgirequest_path_info(self): def test_wsgirequest_path_info(self):
def wsgi_str(path_info): def wsgi_str(path_info, encoding='utf-8'):
path_info = path_info.encode('utf-8') # Actual URL sent by the browser (bytestring) path_info = path_info.encode(encoding) # Actual URL sent by the browser (bytestring)
if six.PY3: if six.PY3:
path_info = path_info.decode('iso-8859-1') # Value in the WSGI environ dict (native string) path_info = path_info.decode('iso-8859-1') # Value in the WSGI environ dict (native string)
return path_info return path_info
@ -182,6 +182,16 @@ class RequestsTests(SimpleTestCase):
request = WSGIRequest({'PATH_INFO': wsgi_str("/سلام/"), 'REQUEST_METHOD': 'get', 'wsgi.input': BytesIO(b'')}) request = WSGIRequest({'PATH_INFO': wsgi_str("/سلام/"), 'REQUEST_METHOD': 'get', 'wsgi.input': BytesIO(b'')})
self.assertEqual(request.path, "/سلام/") self.assertEqual(request.path, "/سلام/")
# The URL may be incorrectly encoded in a non-UTF-8 encoding (#26971)
request = WSGIRequest({
'PATH_INFO': wsgi_str("/café/", encoding='iso-8859-1'),
'REQUEST_METHOD': 'get',
'wsgi.input': BytesIO(b''),
})
# Since it's impossible to decide the (wrong) encoding of the URL, it's
# left percent-encoded in the path.
self.assertEqual(request.path, "/caf%E9/")
def test_httprequest_location(self): def test_httprequest_location(self):
request = HttpRequest() request = HttpRequest()
self.assertEqual( self.assertEqual(