diff --git a/django/http/request.py b/django/http/request.py index 4b9c7a77d56..486eeb54104 100644 --- a/django/http/request.py +++ b/django/http/request.py @@ -15,7 +15,9 @@ from django.core.files import uploadhandler from django.http.multipartparser import MultiPartParser, MultiPartParserError from django.utils import six from django.utils.datastructures import MultiValueDict, ImmutableList -from django.utils.encoding import force_bytes, force_text, force_str, iri_to_uri +from django.utils.encoding import ( + force_bytes, force_text, force_str, escape_uri_path, iri_to_uri, +) from django.utils.six.moves.urllib.parse import parse_qsl, urlencode, quote, urljoin, urlsplit @@ -98,7 +100,7 @@ class HttpRequest(object): # RFC 3986 requires query string arguments to be in the ASCII range. # Rather than crash if this doesn't happen, we encode defensively. return '%s%s' % ( - self.path, + escape_uri_path(self.path), ('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))) if self.META.get('QUERY_STRING', '') else '' ) diff --git a/django/utils/encoding.py b/django/utils/encoding.py index 3abee09c529..39d548a9cef 100644 --- a/django/utils/encoding.py +++ b/django/utils/encoding.py @@ -226,6 +226,23 @@ def uri_to_iri(uri): return repercent_broken_unicode(iri).decode('utf-8') +def escape_uri_path(path): + """ + Escape the unsafe characters from the path portion of a Uniform Resource + Identifier (URI). + """ + # These are the "reserved" and "unreserved" characters specified in + # sections 2.2 and 2.3 of RFC 2396: + # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," + # unreserved = alphanum | mark + # mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" + # The list of safe characters here is constructed substracting ";", "=", + # and "?" according to section 3.3 of RFC 2396. + # The reason for not subtracting and escaping "/" is that we are escaping + # the entire path, not a path segment. + return quote(force_bytes(path), safe=b"/:@&+$,-_.!~*'()") + + def repercent_broken_unicode(path): """ As per section 3.2 of RFC 3987, step three of converting a URI into an IRI, diff --git a/docs/ref/utils.txt b/docs/ref/utils.txt index 4c7368f687b..b32f0a838b8 100644 --- a/docs/ref/utils.txt +++ b/docs/ref/utils.txt @@ -298,6 +298,13 @@ The functions defined in this module share the following properties: Returns an ASCII string containing the encoded result. +.. function:: escape_uri_path(path) + + .. versionadded:: 1.8 + + Escapes the unsafe characters from the path portion of a Uniform Resource + Identifier (URI). + ``django.utils.feedgenerator`` ============================== diff --git a/docs/releases/1.8.txt b/docs/releases/1.8.txt index f84294d4592..492fad478f7 100644 --- a/docs/releases/1.8.txt +++ b/docs/releases/1.8.txt @@ -381,6 +381,10 @@ Requests and Responses * ``WSGIRequestHandler`` now follows RFC in converting URI to IRI, using ``uri_to_iri()``. +* The :meth:`HttpRequest.get_full_path() + ` method now escapes unsafe characters + from the path portion of a Uniform Resource Identifier (URI) properly. + Tests ^^^^^ diff --git a/tests/requests/tests.py b/tests/requests/tests.py index 36d1d80a699..22b3021d1fa 100644 --- a/tests/requests/tests.py +++ b/tests/requests/tests.py @@ -35,6 +35,19 @@ class RequestsTests(SimpleTestCase): # and FILES should be MultiValueDict self.assertEqual(request.FILES.getlist('foo'), []) + def test_httprequest_full_path(self): + request = HttpRequest() + request.path = request.path_info = '/;some/?awful/=path/foo:bar/' + request.META['QUERY_STRING'] = ';some=query&+query=string' + expected = '/%3Bsome/%3Fawful/%3Dpath/foo:bar/?;some=query&+query=string' + self.assertEqual(request.get_full_path(), expected) + + def test_httprequest_full_path_with_query_string_and_fragment(self): + request = HttpRequest() + request.path = request.path_info = '/foo#bar' + request.META['QUERY_STRING'] = 'baz#quux' + self.assertEqual(request.get_full_path(), '/foo%23bar?baz#quux') + def test_httprequest_repr(self): request = HttpRequest() request.path = '/somepath/' diff --git a/tests/utils_tests/test_encoding.py b/tests/utils_tests/test_encoding.py index 1685c82def0..3119b6467a5 100644 --- a/tests/utils_tests/test_encoding.py +++ b/tests/utils_tests/test_encoding.py @@ -5,8 +5,10 @@ import unittest import datetime from django.utils import six -from django.utils.encoding import (filepath_to_uri, force_bytes, force_text, - iri_to_uri, uri_to_iri) +from django.utils.encoding import ( + filepath_to_uri, force_bytes, force_text, escape_uri_path, + iri_to_uri, uri_to_iri, +) from django.utils.http import urlquote_plus @@ -40,6 +42,14 @@ class TestEncodingUtils(unittest.TestCase): today = datetime.date.today() self.assertEqual(force_bytes(today, strings_only=True), today) + def test_escape_uri_path(self): + self.assertEqual( + escape_uri_path('/;some/=awful/?path/:with/@lots/&of/+awful/chars'), + '/%3Bsome/%3Dawful/%3Fpath/:with/@lots/&of/+awful/chars' + ) + self.assertEqual(escape_uri_path('/foo#bar'), '/foo%23bar') + self.assertEqual(escape_uri_path('/foo?bar'), '/foo%3Fbar') + class TestRFC3987IEncodingUtils(unittest.TestCase):