From 99196381374c5f88b7138236aa5d6a969e281590 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Mon, 17 Dec 2012 10:49:26 +0100 Subject: [PATCH] [1.5.x] Fixed #19468 -- Decoded request.path correctly on Python 3. Thanks aliva for the report and claudep for the feedback. Backport of 1e4a27d from master. --- django/contrib/staticfiles/handlers.py | 3 +- django/core/handlers/base.py | 43 ++++++++++++++++--------- django/core/handlers/wsgi.py | 2 +- django/test/client.py | 6 +++- tests/regressiontests/handlers/tests.py | 3 +- tests/regressiontests/requests/tests.py | 11 +++++++ 6 files changed, 49 insertions(+), 19 deletions(-) diff --git a/django/contrib/staticfiles/handlers.py b/django/contrib/staticfiles/handlers.py index 9067a0e75e..5174586ba1 100644 --- a/django/contrib/staticfiles/handlers.py +++ b/django/contrib/staticfiles/handlers.py @@ -6,6 +6,7 @@ except ImportError: # Python 2 from urlparse import urlparse from django.conf import settings +from django.core.handlers.base import get_path_info from django.core.handlers.wsgi import WSGIHandler from django.contrib.staticfiles import utils @@ -67,6 +68,6 @@ class StaticFilesHandler(WSGIHandler): return super(StaticFilesHandler, self).get_response(request) def __call__(self, environ, start_response): - if not self._should_handle(environ['PATH_INFO']): + if not self._should_handle(get_path_info(environ)): return self.application(environ, start_response) return super(StaticFilesHandler, self).__call__(environ, start_response) diff --git a/django/core/handlers/base.py b/django/core/handlers/base.py index 23572465cf..7fff71af0d 100644 --- a/django/core/handlers/base.py +++ b/django/core/handlers/base.py @@ -5,10 +5,14 @@ import sys import types from django import http +from django.conf import settings +from django.core import exceptions +from django.core import urlresolvers from django.core import signals from django.utils.encoding import force_text from django.utils.importlib import import_module from django.utils import six +from django.views import debug logger = logging.getLogger('django.request') @@ -32,8 +36,6 @@ class BaseHandler(object): Must be called after the environment is fixed (see __call__ in subclasses). """ - from django.conf import settings - from django.core import exceptions self._view_middleware = [] self._template_response_middleware = [] self._response_middleware = [] @@ -75,9 +77,6 @@ class BaseHandler(object): def get_response(self, request): "Returns an HttpResponse object for the given HttpRequest" - from django.core import exceptions, urlresolvers - from django.conf import settings - try: # Setup default url resolver for this thread, this code is outside # the try/except so we don't get a spurious "unbound local @@ -147,7 +146,6 @@ class BaseHandler(object): 'request': request }) if settings.DEBUG: - from django.views import debug response = debug.technical_404_response(request, e) else: try: @@ -204,8 +202,6 @@ class BaseHandler(object): caused by anything, so assuming something like the database is always available would be an error. """ - from django.conf import settings - if settings.DEBUG_PROPAGATE_EXCEPTIONS: raise @@ -218,7 +214,6 @@ class BaseHandler(object): ) if settings.DEBUG: - from django.views import debug return debug.technical_500_response(request, *exc_info) # If Http500 handler is not installed, re-raise last exception @@ -238,6 +233,20 @@ class BaseHandler(object): response = func(request, response) return response + +def get_path_info(environ): + """ + Returns the HTTP request's PATH_INFO as a unicode string. + """ + path_info = environ.get('PATH_INFO', str('/')) + # Under Python 3, strings in environ are decoded with ISO-8859-1; + # re-encode to recover the original bytestring provided by the webserver. + if six.PY3: + path_info = path_info.encode('iso-8859-1') + # It'd be better to implement URI-to-IRI decoding, see #19508. + return path_info.decode('utf-8') + + def get_script_name(environ): """ Returns the equivalent of the HTTP request's SCRIPT_NAME environment @@ -246,7 +255,6 @@ def get_script_name(environ): from the client's perspective), unless the FORCE_SCRIPT_NAME setting is set (to anything). """ - from django.conf import settings if settings.FORCE_SCRIPT_NAME is not None: return force_text(settings.FORCE_SCRIPT_NAME) @@ -255,9 +263,14 @@ def get_script_name(environ): # rewrites. Unfortunately not every Web server (lighttpd!) passes this # information through all the time, so FORCE_SCRIPT_NAME, above, is still # needed. - script_url = environ.get('SCRIPT_URL', '') - if not script_url: - script_url = environ.get('REDIRECT_URL', '') + script_url = environ.get('SCRIPT_URL', environ.get('REDIRECT_URL', str(''))) if script_url: - return force_text(script_url[:-len(environ.get('PATH_INFO', ''))]) - return force_text(environ.get('SCRIPT_NAME', '')) + script_name = script_url[:-len(environ.get('PATH_INFO', str('')))] + else: + script_name = environ.get('SCRIPT_NAME', str('')) + # Under Python 3, strings in environ are decoded with ISO-8859-1; + # re-encode to recover the original bytestring provided by the webserver. + if six.PY3: + script_name = script_name.encode('iso-8859-1') + # It'd be better to implement URI-to-IRI decoding, see #19508. + return script_name.decode('utf-8') diff --git a/django/core/handlers/wsgi.py b/django/core/handlers/wsgi.py index 4c0710549a..426679ca7b 100644 --- a/django/core/handlers/wsgi.py +++ b/django/core/handlers/wsgi.py @@ -128,7 +128,7 @@ class LimitedStream(object): class WSGIRequest(http.HttpRequest): def __init__(self, environ): script_name = base.get_script_name(environ) - path_info = force_text(environ.get('PATH_INFO', '/')) + path_info = base.get_path_info(environ) if not path_info or path_info == script_name: # Sometimes PATH_INFO exists, but is empty (e.g. accessing # the SCRIPT_NAME URL without a trailing slash). We really need to diff --git a/django/test/client.py b/django/test/client.py index a3c04bb20d..015ee1309a 100644 --- a/django/test/client.py +++ b/django/test/client.py @@ -245,7 +245,11 @@ class RequestFactory(object): # If there are parameters, add them if parsed[3]: path += str(";") + force_str(parsed[3]) - return unquote(path) + path = unquote(path) + # WSGI requires latin-1 encoded strings. See get_path_info(). + if six.PY3: + path = path.encode('utf-8').decode('iso-8859-1') + return path def get(self, path, data={}, **extra): "Construct a GET request." diff --git a/tests/regressiontests/handlers/tests.py b/tests/regressiontests/handlers/tests.py index 8676a448d9..9cd5816219 100644 --- a/tests/regressiontests/handlers/tests.py +++ b/tests/regressiontests/handlers/tests.py @@ -1,6 +1,7 @@ from django.core.handlers.wsgi import WSGIHandler from django.test import RequestFactory from django.test.utils import override_settings +from django.utils import six from django.utils import unittest class HandlerTests(unittest.TestCase): @@ -22,7 +23,7 @@ class HandlerTests(unittest.TestCase): def test_bad_path_info(self): """Tests for bug #15672 ('request' referenced before assignment)""" environ = RequestFactory().get('/').environ - environ['PATH_INFO'] = b'\xed' + environ['PATH_INFO'] = '\xed' handler = WSGIHandler() response = handler(environ, lambda *a, **k: None) self.assertEqual(response.status_code, 400) diff --git a/tests/regressiontests/requests/tests.py b/tests/regressiontests/requests/tests.py index adf824dff7..bb7f925e87 100644 --- a/tests/regressiontests/requests/tests.py +++ b/tests/regressiontests/requests/tests.py @@ -11,6 +11,7 @@ from django.core.handlers.wsgi import WSGIRequest, LimitedStream from django.http import HttpRequest, HttpResponse, parse_cookie, build_request_repr, UnreadablePostError from django.test.client import FakePayload from django.test.utils import override_settings, str_prefix +from django.utils import six from django.utils import unittest from django.utils.http import cookie_date, urlencode from django.utils.timezone import utc @@ -57,6 +58,16 @@ class RequestsTests(unittest.TestCase): self.assertEqual(build_request_repr(request, path_override='/otherpath/', GET_override={'a': 'b'}, POST_override={'c': 'd'}, COOKIES_override={'e': 'f'}, META_override={'g': 'h'}), str_prefix("")) + def test_wsgirequest_path_info(self): + def wsgi_str(path_info): + path_info = path_info.encode('utf-8') # Actual URL sent by the browser (bytestring) + if six.PY3: + path_info = path_info.decode('iso-8859-1') # Value in the WSGI environ dict (native string) + return path_info + # Regression for #19468 + request = WSGIRequest({'PATH_INFO': wsgi_str("/سلام/"), 'REQUEST_METHOD': 'get', 'wsgi.input': BytesIO(b'')}) + self.assertEqual(request.path, "/سلام/") + def test_parse_cookie(self): self.assertEqual(parse_cookie('invalid@key=true'), {})