Fixed #26971 -- Prevented crash with non-UTF-8 incoming PATH_INFO

Thanks Tim Graham and Loïc Bistuer for the reviews.
This commit is contained in:
Claude Paroz 2016-08-12 15:31:18 +02:00
parent 384f89f8f8
commit 48c34f3336
4 changed files with 26 additions and 38 deletions

View File

@ -2,9 +2,7 @@ from __future__ import unicode_literals
import cgi
import codecs
import logging
import re
import sys
from io import BytesIO
from django import http
@ -13,10 +11,11 @@ from django.core import signals
from django.core.handlers import base
from django.urls import set_script_prefix
from django.utils import six
from django.utils.encoding import force_str, force_text
from django.utils.functional import cached_property
from django.utils.encoding import (
force_str, force_text, repercent_broken_unicode,
)
logger = logging.getLogger('django.request')
from django.utils.functional import cached_property
# encode() and decode() expect the charset to be a native string.
ISO_8859_1, UTF_8 = str('iso-8859-1'), str('utf-8')
@ -155,18 +154,7 @@ class WSGIHandler(base.BaseHandler):
def __call__(self, environ, start_response):
set_script_prefix(get_script_name(environ))
signals.request_started.send(sender=self.__class__, environ=environ)
try:
request = self.request_class(environ)
except UnicodeDecodeError:
logger.warning(
'Bad Request (UnicodeDecodeError)',
exc_info=sys.exc_info(),
extra={
'status_code': 400,
}
)
response = http.HttpResponseBadRequest()
else:
response = self.get_response(request)
response._handler_class = self.__class__
@ -187,7 +175,7 @@ def get_path_info(environ):
"""
path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '/')
return path_info.decode(UTF_8)
return repercent_broken_unicode(path_info).decode(UTF_8)
def get_script_name(environ):

View File

@ -15,10 +15,8 @@ import sys
from wsgiref import simple_server
from django.core.exceptions import ImproperlyConfigured
from django.core.handlers.wsgi import ISO_8859_1, UTF_8
from django.core.wsgi import get_wsgi_application
from django.utils import six
from django.utils.encoding import uri_to_iri
from django.utils.module_loading import import_string
from django.utils.six.moves import socketserver
@ -139,19 +137,7 @@ class WSGIRequestHandler(simple_server.WSGIRequestHandler, object):
if '_' in k:
del self.headers[k]
env = super(WSGIRequestHandler, self).get_environ()
path = self.path
if '?' in path:
path = path.partition('?')[0]
path = uri_to_iri(path).encode(UTF_8)
# Under Python 3, non-ASCII values in the WSGI environ are arbitrarily
# decoded with ISO-8859-1. We replicate this behavior here.
# Refs comment in `get_bytes_from_wsgi()`.
env['PATH_INFO'] = path.decode(ISO_8859_1) if six.PY3 else path
return env
return super(WSGIRequestHandler, self).get_environ()
def handle(self):
"""Copy of WSGIRequestHandler, but with different ServerHandler"""

View File

@ -33,12 +33,16 @@ class HandlerTests(SimpleTestCase):
self.assertIsNotNone(handler._request_middleware)
def test_bad_path_info(self):
"""Tests for bug #15672 ('request' referenced before assignment)"""
"""
A non-UTF-8 path populates PATH_INFO with an URL-encoded path and
produces a 404.
"""
environ = RequestFactory().get('/').environ
environ['PATH_INFO'] = b'\xed' if six.PY2 else '\xed'
handler = WSGIHandler()
response = handler(environ, lambda *a, **k: None)
self.assertEqual(response.status_code, 400)
# The path of the request will be encoded to '/%ED'.
self.assertEqual(response.status_code, 404)
def test_non_ascii_query_string(self):
"""

View File

@ -173,8 +173,8 @@ class RequestsTests(SimpleTestCase):
self.assertEqual(repr(request), str_prefix("<WSGIRequest: GET '/somepath/'>"))
def test_wsgirequest_path_info(self):
def wsgi_str(path_info):
path_info = path_info.encode('utf-8') # Actual URL sent by the browser (bytestring)
def wsgi_str(path_info, encoding='utf-8'):
path_info = path_info.encode(encoding) # Actual URL sent by the browser (bytestring)
if six.PY3:
path_info = path_info.decode('iso-8859-1') # Value in the WSGI environ dict (native string)
return path_info
@ -182,6 +182,16 @@ class RequestsTests(SimpleTestCase):
request = WSGIRequest({'PATH_INFO': wsgi_str("/سلام/"), 'REQUEST_METHOD': 'get', 'wsgi.input': BytesIO(b'')})
self.assertEqual(request.path, "/سلام/")
# The URL may be incorrectly encoded in a non-UTF-8 encoding (#26971)
request = WSGIRequest({
'PATH_INFO': wsgi_str("/café/", encoding='iso-8859-1'),
'REQUEST_METHOD': 'get',
'wsgi.input': BytesIO(b''),
})
# Since it's impossible to decide the (wrong) encoding of the URL, it's
# left percent-encoded in the path.
self.assertEqual(request.path, "/caf%E9/")
def test_httprequest_location(self):
request = HttpRequest()
self.assertEqual(