Fixed #18456 -- Added path escaping to HttpRequest.get_full_path().

This commit is contained in:
Unai Zalakain 2014-10-31 17:43:34 +02:00 committed by Tim Graham
parent d3db878e4b
commit c548c8d0d1
6 changed files with 57 additions and 4 deletions

View File

@ -15,7 +15,9 @@ from django.core.files import uploadhandler
from django.http.multipartparser import MultiPartParser, MultiPartParserError from django.http.multipartparser import MultiPartParser, MultiPartParserError
from django.utils import six from django.utils import six
from django.utils.datastructures import MultiValueDict, ImmutableList from django.utils.datastructures import MultiValueDict, ImmutableList
from django.utils.encoding import force_bytes, force_text, force_str, iri_to_uri from django.utils.encoding import (
force_bytes, force_text, force_str, escape_uri_path, iri_to_uri,
)
from django.utils.six.moves.urllib.parse import parse_qsl, urlencode, quote, urljoin, urlsplit from django.utils.six.moves.urllib.parse import parse_qsl, urlencode, quote, urljoin, urlsplit
@ -98,7 +100,7 @@ class HttpRequest(object):
# RFC 3986 requires query string arguments to be in the ASCII range. # RFC 3986 requires query string arguments to be in the ASCII range.
# Rather than crash if this doesn't happen, we encode defensively. # Rather than crash if this doesn't happen, we encode defensively.
return '%s%s' % ( return '%s%s' % (
self.path, escape_uri_path(self.path),
('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))) if self.META.get('QUERY_STRING', '') else '' ('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))) if self.META.get('QUERY_STRING', '') else ''
) )

View File

@ -226,6 +226,23 @@ def uri_to_iri(uri):
return repercent_broken_unicode(iri).decode('utf-8') return repercent_broken_unicode(iri).decode('utf-8')
def escape_uri_path(path):
"""
Escape the unsafe characters from the path portion of a Uniform Resource
Identifier (URI).
"""
# These are the "reserved" and "unreserved" characters specified in
# sections 2.2 and 2.3 of RFC 2396:
# reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
# unreserved = alphanum | mark
# mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
# The list of safe characters here is constructed substracting ";", "=",
# and "?" according to section 3.3 of RFC 2396.
# The reason for not subtracting and escaping "/" is that we are escaping
# the entire path, not a path segment.
return quote(force_bytes(path), safe=b"/:@&+$,-_.!~*'()")
def repercent_broken_unicode(path): def repercent_broken_unicode(path):
""" """
As per section 3.2 of RFC 3987, step three of converting a URI into an IRI, As per section 3.2 of RFC 3987, step three of converting a URI into an IRI,

View File

@ -298,6 +298,13 @@ The functions defined in this module share the following properties:
Returns an ASCII string containing the encoded result. Returns an ASCII string containing the encoded result.
.. function:: escape_uri_path(path)
.. versionadded:: 1.8
Escapes the unsafe characters from the path portion of a Uniform Resource
Identifier (URI).
``django.utils.feedgenerator`` ``django.utils.feedgenerator``
============================== ==============================

View File

@ -381,6 +381,10 @@ Requests and Responses
* ``WSGIRequestHandler`` now follows RFC in converting URI to IRI, using * ``WSGIRequestHandler`` now follows RFC in converting URI to IRI, using
``uri_to_iri()``. ``uri_to_iri()``.
* The :meth:`HttpRequest.get_full_path()
<django.http.HttpRequest.get_full_path>` method now escapes unsafe characters
from the path portion of a Uniform Resource Identifier (URI) properly.
Tests Tests
^^^^^ ^^^^^

View File

@ -35,6 +35,19 @@ class RequestsTests(SimpleTestCase):
# and FILES should be MultiValueDict # and FILES should be MultiValueDict
self.assertEqual(request.FILES.getlist('foo'), []) self.assertEqual(request.FILES.getlist('foo'), [])
def test_httprequest_full_path(self):
request = HttpRequest()
request.path = request.path_info = '/;some/?awful/=path/foo:bar/'
request.META['QUERY_STRING'] = ';some=query&+query=string'
expected = '/%3Bsome/%3Fawful/%3Dpath/foo:bar/?;some=query&+query=string'
self.assertEqual(request.get_full_path(), expected)
def test_httprequest_full_path_with_query_string_and_fragment(self):
request = HttpRequest()
request.path = request.path_info = '/foo#bar'
request.META['QUERY_STRING'] = 'baz#quux'
self.assertEqual(request.get_full_path(), '/foo%23bar?baz#quux')
def test_httprequest_repr(self): def test_httprequest_repr(self):
request = HttpRequest() request = HttpRequest()
request.path = '/somepath/' request.path = '/somepath/'

View File

@ -5,8 +5,10 @@ import unittest
import datetime import datetime
from django.utils import six from django.utils import six
from django.utils.encoding import (filepath_to_uri, force_bytes, force_text, from django.utils.encoding import (
iri_to_uri, uri_to_iri) filepath_to_uri, force_bytes, force_text, escape_uri_path,
iri_to_uri, uri_to_iri,
)
from django.utils.http import urlquote_plus from django.utils.http import urlquote_plus
@ -40,6 +42,14 @@ class TestEncodingUtils(unittest.TestCase):
today = datetime.date.today() today = datetime.date.today()
self.assertEqual(force_bytes(today, strings_only=True), today) self.assertEqual(force_bytes(today, strings_only=True), today)
def test_escape_uri_path(self):
self.assertEqual(
escape_uri_path('/;some/=awful/?path/:with/@lots/&of/+awful/chars'),
'/%3Bsome/%3Dawful/%3Fpath/:with/@lots/&of/+awful/chars'
)
self.assertEqual(escape_uri_path('/foo#bar'), '/foo%23bar')
self.assertEqual(escape_uri_path('/foo?bar'), '/foo%3Fbar')
class TestRFC3987IEncodingUtils(unittest.TestCase): class TestRFC3987IEncodingUtils(unittest.TestCase):