Fixed #26971 -- Prevented crash with non-UTF-8 incoming PATH_INFO
Thanks Tim Graham and Loïc Bistuer for the reviews.
This commit is contained in:
parent
384f89f8f8
commit
48c34f3336
|
@ -2,9 +2,7 @@ from __future__ import unicode_literals
|
|||
|
||||
import cgi
|
||||
import codecs
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from io import BytesIO
|
||||
|
||||
from django import http
|
||||
|
@ -13,10 +11,11 @@ from django.core import signals
|
|||
from django.core.handlers import base
|
||||
from django.urls import set_script_prefix
|
||||
from django.utils import six
|
||||
from django.utils.encoding import force_str, force_text
|
||||
from django.utils.functional import cached_property
|
||||
from django.utils.encoding import (
|
||||
force_str, force_text, repercent_broken_unicode,
|
||||
)
|
||||
|
||||
logger = logging.getLogger('django.request')
|
||||
from django.utils.functional import cached_property
|
||||
|
||||
# encode() and decode() expect the charset to be a native string.
|
||||
ISO_8859_1, UTF_8 = str('iso-8859-1'), str('utf-8')
|
||||
|
@ -155,18 +154,7 @@ class WSGIHandler(base.BaseHandler):
|
|||
def __call__(self, environ, start_response):
|
||||
set_script_prefix(get_script_name(environ))
|
||||
signals.request_started.send(sender=self.__class__, environ=environ)
|
||||
try:
|
||||
request = self.request_class(environ)
|
||||
except UnicodeDecodeError:
|
||||
logger.warning(
|
||||
'Bad Request (UnicodeDecodeError)',
|
||||
exc_info=sys.exc_info(),
|
||||
extra={
|
||||
'status_code': 400,
|
||||
}
|
||||
)
|
||||
response = http.HttpResponseBadRequest()
|
||||
else:
|
||||
response = self.get_response(request)
|
||||
|
||||
response._handler_class = self.__class__
|
||||
|
@ -187,7 +175,7 @@ def get_path_info(environ):
|
|||
"""
|
||||
path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '/')
|
||||
|
||||
return path_info.decode(UTF_8)
|
||||
return repercent_broken_unicode(path_info).decode(UTF_8)
|
||||
|
||||
|
||||
def get_script_name(environ):
|
||||
|
|
|
@ -15,10 +15,8 @@ import sys
|
|||
from wsgiref import simple_server
|
||||
|
||||
from django.core.exceptions import ImproperlyConfigured
|
||||
from django.core.handlers.wsgi import ISO_8859_1, UTF_8
|
||||
from django.core.wsgi import get_wsgi_application
|
||||
from django.utils import six
|
||||
from django.utils.encoding import uri_to_iri
|
||||
from django.utils.module_loading import import_string
|
||||
from django.utils.six.moves import socketserver
|
||||
|
||||
|
@ -139,19 +137,7 @@ class WSGIRequestHandler(simple_server.WSGIRequestHandler, object):
|
|||
if '_' in k:
|
||||
del self.headers[k]
|
||||
|
||||
env = super(WSGIRequestHandler, self).get_environ()
|
||||
|
||||
path = self.path
|
||||
if '?' in path:
|
||||
path = path.partition('?')[0]
|
||||
|
||||
path = uri_to_iri(path).encode(UTF_8)
|
||||
# Under Python 3, non-ASCII values in the WSGI environ are arbitrarily
|
||||
# decoded with ISO-8859-1. We replicate this behavior here.
|
||||
# Refs comment in `get_bytes_from_wsgi()`.
|
||||
env['PATH_INFO'] = path.decode(ISO_8859_1) if six.PY3 else path
|
||||
|
||||
return env
|
||||
return super(WSGIRequestHandler, self).get_environ()
|
||||
|
||||
def handle(self):
|
||||
"""Copy of WSGIRequestHandler, but with different ServerHandler"""
|
||||
|
|
|
@ -33,12 +33,16 @@ class HandlerTests(SimpleTestCase):
|
|||
self.assertIsNotNone(handler._request_middleware)
|
||||
|
||||
def test_bad_path_info(self):
|
||||
"""Tests for bug #15672 ('request' referenced before assignment)"""
|
||||
"""
|
||||
A non-UTF-8 path populates PATH_INFO with an URL-encoded path and
|
||||
produces a 404.
|
||||
"""
|
||||
environ = RequestFactory().get('/').environ
|
||||
environ['PATH_INFO'] = b'\xed' if six.PY2 else '\xed'
|
||||
handler = WSGIHandler()
|
||||
response = handler(environ, lambda *a, **k: None)
|
||||
self.assertEqual(response.status_code, 400)
|
||||
# The path of the request will be encoded to '/%ED'.
|
||||
self.assertEqual(response.status_code, 404)
|
||||
|
||||
def test_non_ascii_query_string(self):
|
||||
"""
|
||||
|
|
|
@ -173,8 +173,8 @@ class RequestsTests(SimpleTestCase):
|
|||
self.assertEqual(repr(request), str_prefix("<WSGIRequest: GET '/somepath/'>"))
|
||||
|
||||
def test_wsgirequest_path_info(self):
|
||||
def wsgi_str(path_info):
|
||||
path_info = path_info.encode('utf-8') # Actual URL sent by the browser (bytestring)
|
||||
def wsgi_str(path_info, encoding='utf-8'):
|
||||
path_info = path_info.encode(encoding) # Actual URL sent by the browser (bytestring)
|
||||
if six.PY3:
|
||||
path_info = path_info.decode('iso-8859-1') # Value in the WSGI environ dict (native string)
|
||||
return path_info
|
||||
|
@ -182,6 +182,16 @@ class RequestsTests(SimpleTestCase):
|
|||
request = WSGIRequest({'PATH_INFO': wsgi_str("/سلام/"), 'REQUEST_METHOD': 'get', 'wsgi.input': BytesIO(b'')})
|
||||
self.assertEqual(request.path, "/سلام/")
|
||||
|
||||
# The URL may be incorrectly encoded in a non-UTF-8 encoding (#26971)
|
||||
request = WSGIRequest({
|
||||
'PATH_INFO': wsgi_str("/café/", encoding='iso-8859-1'),
|
||||
'REQUEST_METHOD': 'get',
|
||||
'wsgi.input': BytesIO(b''),
|
||||
})
|
||||
# Since it's impossible to decide the (wrong) encoding of the URL, it's
|
||||
# left percent-encoded in the path.
|
||||
self.assertEqual(request.path, "/caf%E9/")
|
||||
|
||||
def test_httprequest_location(self):
|
||||
request = HttpRequest()
|
||||
self.assertEqual(
|
||||
|
|
Loading…
Reference in New Issue