Fixed #19468 -- Decoded request.path correctly on Python 3.

Thanks aliva for the report and claudep for the feedback.
This commit is contained in:
Aymeric Augustin 2012-12-17 10:49:26 +01:00
parent d9a0b6ab36
commit 1e4a27d087
6 changed files with 49 additions and 19 deletions

View File

@ -6,6 +6,7 @@ except ImportError: # Python 2
from urlparse import urlparse
from django.conf import settings
from django.core.handlers.base import get_path_info
from django.core.handlers.wsgi import WSGIHandler
from django.contrib.staticfiles import utils
@ -67,6 +68,6 @@ class StaticFilesHandler(WSGIHandler):
return super(StaticFilesHandler, self).get_response(request)
def __call__(self, environ, start_response):
if not self._should_handle(environ['PATH_INFO']):
if not self._should_handle(get_path_info(environ)):
return self.application(environ, start_response)
return super(StaticFilesHandler, self).__call__(environ, start_response)

View File

@ -5,10 +5,14 @@ import sys
import types
from django import http
from django.conf import settings
from django.core import exceptions
from django.core import urlresolvers
from django.core import signals
from django.utils.encoding import force_text
from django.utils.importlib import import_module
from django.utils import six
from django.views import debug
logger = logging.getLogger('django.request')
@ -32,8 +36,6 @@ class BaseHandler(object):
Must be called after the environment is fixed (see __call__ in subclasses).
"""
from django.conf import settings
from django.core import exceptions
self._view_middleware = []
self._template_response_middleware = []
self._response_middleware = []
@ -75,9 +77,6 @@ class BaseHandler(object):
def get_response(self, request):
"Returns an HttpResponse object for the given HttpRequest"
from django.core import exceptions, urlresolvers
from django.conf import settings
try:
# Setup default url resolver for this thread, this code is outside
# the try/except so we don't get a spurious "unbound local
@ -147,7 +146,6 @@ class BaseHandler(object):
'request': request
})
if settings.DEBUG:
from django.views import debug
response = debug.technical_404_response(request, e)
else:
try:
@ -204,8 +202,6 @@ class BaseHandler(object):
caused by anything, so assuming something like the database is always
available would be an error.
"""
from django.conf import settings
if settings.DEBUG_PROPAGATE_EXCEPTIONS:
raise
@ -218,7 +214,6 @@ class BaseHandler(object):
)
if settings.DEBUG:
from django.views import debug
return debug.technical_500_response(request, *exc_info)
# If Http500 handler is not installed, re-raise last exception
@ -238,6 +233,20 @@ class BaseHandler(object):
response = func(request, response)
return response
def get_path_info(environ):
"""
Returns the HTTP request's PATH_INFO as a unicode string.
"""
path_info = environ.get('PATH_INFO', str('/'))
# Under Python 3, strings in environ are decoded with ISO-8859-1;
# re-encode to recover the original bytestring provided by the webserver.
if six.PY3:
path_info = path_info.encode('iso-8859-1')
# It'd be better to implement URI-to-IRI decoding, see #19508.
return path_info.decode('utf-8')
def get_script_name(environ):
"""
Returns the equivalent of the HTTP request's SCRIPT_NAME environment
@ -246,7 +255,6 @@ def get_script_name(environ):
from the client's perspective), unless the FORCE_SCRIPT_NAME setting is
set (to anything).
"""
from django.conf import settings
if settings.FORCE_SCRIPT_NAME is not None:
return force_text(settings.FORCE_SCRIPT_NAME)
@ -255,9 +263,14 @@ def get_script_name(environ):
# rewrites. Unfortunately not every Web server (lighttpd!) passes this
# information through all the time, so FORCE_SCRIPT_NAME, above, is still
# needed.
script_url = environ.get('SCRIPT_URL', '')
if not script_url:
script_url = environ.get('REDIRECT_URL', '')
script_url = environ.get('SCRIPT_URL', environ.get('REDIRECT_URL', str('')))
if script_url:
return force_text(script_url[:-len(environ.get('PATH_INFO', ''))])
return force_text(environ.get('SCRIPT_NAME', ''))
script_name = script_url[:-len(environ.get('PATH_INFO', str('')))]
else:
script_name = environ.get('SCRIPT_NAME', str(''))
# Under Python 3, strings in environ are decoded with ISO-8859-1;
# re-encode to recover the original bytestring provided by the webserver.
if six.PY3:
script_name = script_name.encode('iso-8859-1')
# It'd be better to implement URI-to-IRI decoding, see #19508.
return script_name.decode('utf-8')

View File

@ -128,7 +128,7 @@ class LimitedStream(object):
class WSGIRequest(http.HttpRequest):
def __init__(self, environ):
script_name = base.get_script_name(environ)
path_info = force_text(environ.get('PATH_INFO', '/'))
path_info = base.get_path_info(environ)
if not path_info or path_info == script_name:
# Sometimes PATH_INFO exists, but is empty (e.g. accessing
# the SCRIPT_NAME URL without a trailing slash). We really need to

View File

@ -245,7 +245,11 @@ class RequestFactory(object):
# If there are parameters, add them
if parsed[3]:
path += str(";") + force_str(parsed[3])
return unquote(path)
path = unquote(path)
# WSGI requires latin-1 encoded strings. See get_path_info().
if six.PY3:
path = path.encode('utf-8').decode('iso-8859-1')
return path
def get(self, path, data={}, **extra):
"Construct a GET request."

View File

@ -1,6 +1,7 @@
from django.core.handlers.wsgi import WSGIHandler
from django.test import RequestFactory
from django.test.utils import override_settings
from django.utils import six
from django.utils import unittest
class HandlerTests(unittest.TestCase):
@ -22,7 +23,7 @@ class HandlerTests(unittest.TestCase):
def test_bad_path_info(self):
"""Tests for bug #15672 ('request' referenced before assignment)"""
environ = RequestFactory().get('/').environ
environ['PATH_INFO'] = b'\xed'
environ['PATH_INFO'] = '\xed'
handler = WSGIHandler()
response = handler(environ, lambda *a, **k: None)
self.assertEqual(response.status_code, 400)

View File

@ -11,6 +11,7 @@ from django.core.handlers.wsgi import WSGIRequest, LimitedStream
from django.http import HttpRequest, HttpResponse, parse_cookie, build_request_repr, UnreadablePostError
from django.test.client import FakePayload
from django.test.utils import override_settings, str_prefix
from django.utils import six
from django.utils import unittest
from django.utils.http import cookie_date, urlencode
from django.utils.timezone import utc
@ -57,6 +58,16 @@ class RequestsTests(unittest.TestCase):
self.assertEqual(build_request_repr(request, path_override='/otherpath/', GET_override={'a': 'b'}, POST_override={'c': 'd'}, COOKIES_override={'e': 'f'}, META_override={'g': 'h'}),
str_prefix("<WSGIRequest\npath:/otherpath/,\nGET:{%(_)s'a': %(_)s'b'},\nPOST:{%(_)s'c': %(_)s'd'},\nCOOKIES:{%(_)s'e': %(_)s'f'},\nMETA:{%(_)s'g': %(_)s'h'}>"))
def test_wsgirequest_path_info(self):
def wsgi_str(path_info):
path_info = path_info.encode('utf-8') # Actual URL sent by the browser (bytestring)
if six.PY3:
path_info = path_info.decode('iso-8859-1') # Value in the WSGI environ dict (native string)
return path_info
# Regression for #19468
request = WSGIRequest({'PATH_INFO': wsgi_str("/سلام/"), 'REQUEST_METHOD': 'get', 'wsgi.input': BytesIO(b'')})
self.assertEqual(request.path, "/سلام/")
def test_parse_cookie(self):
self.assertEqual(parse_cookie('invalid@key=true'), {})