Fixed #26971 -- Prevented crash with non-UTF-8 incoming PATH_INFO
Thanks Tim Graham and Loïc Bistuer for the reviews.
This commit is contained in:
parent
384f89f8f8
commit
48c34f3336
|
@ -2,9 +2,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import cgi
|
import cgi
|
||||||
import codecs
|
import codecs
|
||||||
import logging
|
|
||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
from django import http
|
from django import http
|
||||||
|
@ -13,10 +11,11 @@ from django.core import signals
|
||||||
from django.core.handlers import base
|
from django.core.handlers import base
|
||||||
from django.urls import set_script_prefix
|
from django.urls import set_script_prefix
|
||||||
from django.utils import six
|
from django.utils import six
|
||||||
from django.utils.encoding import force_str, force_text
|
from django.utils.encoding import (
|
||||||
from django.utils.functional import cached_property
|
force_str, force_text, repercent_broken_unicode,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger('django.request')
|
from django.utils.functional import cached_property
|
||||||
|
|
||||||
# encode() and decode() expect the charset to be a native string.
|
# encode() and decode() expect the charset to be a native string.
|
||||||
ISO_8859_1, UTF_8 = str('iso-8859-1'), str('utf-8')
|
ISO_8859_1, UTF_8 = str('iso-8859-1'), str('utf-8')
|
||||||
|
@ -155,19 +154,8 @@ class WSGIHandler(base.BaseHandler):
|
||||||
def __call__(self, environ, start_response):
|
def __call__(self, environ, start_response):
|
||||||
set_script_prefix(get_script_name(environ))
|
set_script_prefix(get_script_name(environ))
|
||||||
signals.request_started.send(sender=self.__class__, environ=environ)
|
signals.request_started.send(sender=self.__class__, environ=environ)
|
||||||
try:
|
request = self.request_class(environ)
|
||||||
request = self.request_class(environ)
|
response = self.get_response(request)
|
||||||
except UnicodeDecodeError:
|
|
||||||
logger.warning(
|
|
||||||
'Bad Request (UnicodeDecodeError)',
|
|
||||||
exc_info=sys.exc_info(),
|
|
||||||
extra={
|
|
||||||
'status_code': 400,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
response = http.HttpResponseBadRequest()
|
|
||||||
else:
|
|
||||||
response = self.get_response(request)
|
|
||||||
|
|
||||||
response._handler_class = self.__class__
|
response._handler_class = self.__class__
|
||||||
|
|
||||||
|
@ -187,7 +175,7 @@ def get_path_info(environ):
|
||||||
"""
|
"""
|
||||||
path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '/')
|
path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '/')
|
||||||
|
|
||||||
return path_info.decode(UTF_8)
|
return repercent_broken_unicode(path_info).decode(UTF_8)
|
||||||
|
|
||||||
|
|
||||||
def get_script_name(environ):
|
def get_script_name(environ):
|
||||||
|
|
|
@ -15,10 +15,8 @@ import sys
|
||||||
from wsgiref import simple_server
|
from wsgiref import simple_server
|
||||||
|
|
||||||
from django.core.exceptions import ImproperlyConfigured
|
from django.core.exceptions import ImproperlyConfigured
|
||||||
from django.core.handlers.wsgi import ISO_8859_1, UTF_8
|
|
||||||
from django.core.wsgi import get_wsgi_application
|
from django.core.wsgi import get_wsgi_application
|
||||||
from django.utils import six
|
from django.utils import six
|
||||||
from django.utils.encoding import uri_to_iri
|
|
||||||
from django.utils.module_loading import import_string
|
from django.utils.module_loading import import_string
|
||||||
from django.utils.six.moves import socketserver
|
from django.utils.six.moves import socketserver
|
||||||
|
|
||||||
|
@ -139,19 +137,7 @@ class WSGIRequestHandler(simple_server.WSGIRequestHandler, object):
|
||||||
if '_' in k:
|
if '_' in k:
|
||||||
del self.headers[k]
|
del self.headers[k]
|
||||||
|
|
||||||
env = super(WSGIRequestHandler, self).get_environ()
|
return super(WSGIRequestHandler, self).get_environ()
|
||||||
|
|
||||||
path = self.path
|
|
||||||
if '?' in path:
|
|
||||||
path = path.partition('?')[0]
|
|
||||||
|
|
||||||
path = uri_to_iri(path).encode(UTF_8)
|
|
||||||
# Under Python 3, non-ASCII values in the WSGI environ are arbitrarily
|
|
||||||
# decoded with ISO-8859-1. We replicate this behavior here.
|
|
||||||
# Refs comment in `get_bytes_from_wsgi()`.
|
|
||||||
env['PATH_INFO'] = path.decode(ISO_8859_1) if six.PY3 else path
|
|
||||||
|
|
||||||
return env
|
|
||||||
|
|
||||||
def handle(self):
|
def handle(self):
|
||||||
"""Copy of WSGIRequestHandler, but with different ServerHandler"""
|
"""Copy of WSGIRequestHandler, but with different ServerHandler"""
|
||||||
|
|
|
@ -33,12 +33,16 @@ class HandlerTests(SimpleTestCase):
|
||||||
self.assertIsNotNone(handler._request_middleware)
|
self.assertIsNotNone(handler._request_middleware)
|
||||||
|
|
||||||
def test_bad_path_info(self):
|
def test_bad_path_info(self):
|
||||||
"""Tests for bug #15672 ('request' referenced before assignment)"""
|
"""
|
||||||
|
A non-UTF-8 path populates PATH_INFO with an URL-encoded path and
|
||||||
|
produces a 404.
|
||||||
|
"""
|
||||||
environ = RequestFactory().get('/').environ
|
environ = RequestFactory().get('/').environ
|
||||||
environ['PATH_INFO'] = b'\xed' if six.PY2 else '\xed'
|
environ['PATH_INFO'] = b'\xed' if six.PY2 else '\xed'
|
||||||
handler = WSGIHandler()
|
handler = WSGIHandler()
|
||||||
response = handler(environ, lambda *a, **k: None)
|
response = handler(environ, lambda *a, **k: None)
|
||||||
self.assertEqual(response.status_code, 400)
|
# The path of the request will be encoded to '/%ED'.
|
||||||
|
self.assertEqual(response.status_code, 404)
|
||||||
|
|
||||||
def test_non_ascii_query_string(self):
|
def test_non_ascii_query_string(self):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -173,8 +173,8 @@ class RequestsTests(SimpleTestCase):
|
||||||
self.assertEqual(repr(request), str_prefix("<WSGIRequest: GET '/somepath/'>"))
|
self.assertEqual(repr(request), str_prefix("<WSGIRequest: GET '/somepath/'>"))
|
||||||
|
|
||||||
def test_wsgirequest_path_info(self):
|
def test_wsgirequest_path_info(self):
|
||||||
def wsgi_str(path_info):
|
def wsgi_str(path_info, encoding='utf-8'):
|
||||||
path_info = path_info.encode('utf-8') # Actual URL sent by the browser (bytestring)
|
path_info = path_info.encode(encoding) # Actual URL sent by the browser (bytestring)
|
||||||
if six.PY3:
|
if six.PY3:
|
||||||
path_info = path_info.decode('iso-8859-1') # Value in the WSGI environ dict (native string)
|
path_info = path_info.decode('iso-8859-1') # Value in the WSGI environ dict (native string)
|
||||||
return path_info
|
return path_info
|
||||||
|
@ -182,6 +182,16 @@ class RequestsTests(SimpleTestCase):
|
||||||
request = WSGIRequest({'PATH_INFO': wsgi_str("/سلام/"), 'REQUEST_METHOD': 'get', 'wsgi.input': BytesIO(b'')})
|
request = WSGIRequest({'PATH_INFO': wsgi_str("/سلام/"), 'REQUEST_METHOD': 'get', 'wsgi.input': BytesIO(b'')})
|
||||||
self.assertEqual(request.path, "/سلام/")
|
self.assertEqual(request.path, "/سلام/")
|
||||||
|
|
||||||
|
# The URL may be incorrectly encoded in a non-UTF-8 encoding (#26971)
|
||||||
|
request = WSGIRequest({
|
||||||
|
'PATH_INFO': wsgi_str("/café/", encoding='iso-8859-1'),
|
||||||
|
'REQUEST_METHOD': 'get',
|
||||||
|
'wsgi.input': BytesIO(b''),
|
||||||
|
})
|
||||||
|
# Since it's impossible to decide the (wrong) encoding of the URL, it's
|
||||||
|
# left percent-encoded in the path.
|
||||||
|
self.assertEqual(request.path, "/caf%E9/")
|
||||||
|
|
||||||
def test_httprequest_location(self):
|
def test_httprequest_location(self):
|
||||||
request = HttpRequest()
|
request = HttpRequest()
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
|
|
Loading…
Reference in New Issue