From e167e96cfea670422ca75d0b35fe7c4195f25b63 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Thu, 26 Jun 2014 16:55:36 +0200 Subject: [PATCH] Fixed #22223 -- Prevented over-escaping URLs in reverse() And follow more closely the class of characters defined in the RFC 3986. Thanks Erik van Zijst for the report and the initial patch, and Tim Graham for the review. --- django/contrib/admin/utils.py | 2 +- django/core/urlresolvers.py | 6 ++++-- django/utils/html.py | 3 ++- django/utils/http.py | 3 +++ tests/admin_views/tests.py | 12 ++++++------ tests/template_tests/tests.py | 6 +++--- tests/urlpatterns_reverse/tests.py | 2 +- 7 files changed, 20 insertions(+), 14 deletions(-) diff --git a/django/contrib/admin/utils.py b/django/contrib/admin/utils.py index 0c8ac1c6fc..3c3ef3bbd9 100644 --- a/django/contrib/admin/utils.py +++ b/django/contrib/admin/utils.py @@ -57,7 +57,7 @@ def quote(s): res = list(s) for i in range(len(res)): c = res[i] - if c in """:/_#?;@&=+$,"<>%\\""": + if c in """:/_#?;@&=+$,"[]<>%\\""": res[i] = '_%02X' % ord(c) return ''.join(res) diff --git a/django/core/urlresolvers.py b/django/core/urlresolvers.py index e8564ecc76..d3bfef423b 100644 --- a/django/core/urlresolvers.py +++ b/django/core/urlresolvers.py @@ -20,7 +20,7 @@ from django.utils.datastructures import MultiValueDict from django.utils.deprecation import RemovedInDjango20Warning from django.utils.encoding import force_str, force_text, iri_to_uri from django.utils.functional import lazy -from django.utils.http import urlquote +from django.utils.http import RFC3986_SUBDELIMS, urlquote from django.utils.module_loading import module_has_submodule from django.utils.regex_helper import normalize from django.utils import six, lru_cache @@ -453,7 +453,9 @@ class RegexURLResolver(LocaleRegexProvider): # arguments in order to return a properly encoded URL. candidate_pat = prefix_norm.replace('%', '%%') + result if re.search('^%s%s' % (prefix_norm, pattern), candidate_pat % candidate_subs, re.UNICODE): - candidate_subs = dict((k, urlquote(v)) for (k, v) in candidate_subs.items()) + # safe characters from `pchar` definition of RFC 3986 + candidate_subs = dict((k, urlquote(v, safe=RFC3986_SUBDELIMS + str('/~:@'))) + for (k, v) in candidate_subs.items()) return candidate_pat % candidate_subs # lookup_view can be URL label, or dotted path, or callable, Any of # these can be passed in at the top, but callables are not friendly in diff --git a/django/utils/html.py b/django/utils/html.py index 38fe90d1a9..d5fe2f4a6b 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -7,6 +7,7 @@ import sys from django.utils.encoding import force_text, force_str from django.utils.functional import allow_lazy +from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS from django.utils.safestring import SafeData, mark_safe from django.utils import six from django.utils.six.moves.urllib.parse import quote, unquote, urlsplit, urlunsplit @@ -215,7 +216,7 @@ def smart_urlquote(url): url = unquote(force_str(url)) # See http://bugs.python.org/issue2637 - url = quote(url, safe=b'!*\'();:@&=+$,/?#[]~') + url = quote(url, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + str('~')) return force_text(url) diff --git a/django/utils/http.py b/django/utils/http.py index 27d445d46f..2375f95b5b 100644 --- a/django/utils/http.py +++ b/django/utils/http.py @@ -30,6 +30,9 @@ RFC1123_DATE = re.compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T)) RFC850_DATE = re.compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T)) ASCTIME_DATE = re.compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y)) +RFC3986_GENDELIMS = str(":/?#[]@") +RFC3986_SUBDELIMS = str("!$&'()*+,;=") + def urlquote(url, safe='/'): """ diff --git a/tests/admin_views/tests.py b/tests/admin_views/tests.py index f3e060fd92..6006a0e504 100644 --- a/tests/admin_views/tests.py +++ b/tests/admin_views/tests.py @@ -36,7 +36,7 @@ from django.utils import translation from django.utils.cache import get_max_age from django.utils.encoding import iri_to_uri, force_bytes, force_text from django.utils.html import escape -from django.utils.http import urlencode, urlquote +from django.utils.http import urlencode from django.utils.six.moves.urllib.parse import parse_qsl, urljoin, urlparse from django.utils._os import upath from django.utils import six @@ -1748,7 +1748,7 @@ class AdminViewStringPrimaryKeyTest(TestCase): prefix = '/test_admin/admin/admin_views/modelwithstringprimarykey/' response = self.client.get(prefix) # this URL now comes through reverse(), thus url quoting and iri_to_uri encoding - pk_final_url = escape(iri_to_uri(urlquote(quote(self.pk)))) + pk_final_url = escape(iri_to_uri(quote(self.pk))) should_contain = """%s""" % (prefix, pk_final_url, escape(self.pk)) self.assertContains(response, should_contain) @@ -1756,14 +1756,14 @@ class AdminViewStringPrimaryKeyTest(TestCase): "The link from the recent actions list referring to the changeform of the object should be quoted" response = self.client.get('/test_admin/admin/') link = reverse('admin:admin_views_modelwithstringprimarykey_change', args=(quote(self.pk),)) - should_contain = """%s""" % (link, escape(self.pk)) + should_contain = """%s""" % (escape(link), escape(self.pk)) self.assertContains(response, should_contain) def test_recentactions_without_content_type(self): "If a LogEntry is missing content_type it will not display it in span tag under the hyperlink." response = self.client.get('/test_admin/admin/') link = reverse('admin:admin_views_modelwithstringprimarykey_change', args=(quote(self.pk),)) - should_contain = """%s""" % (link, escape(self.pk)) + should_contain = """%s""" % (escape(link), escape(self.pk)) self.assertContains(response, should_contain) should_contain = "Model with string primary key" # capitalized in Recent Actions self.assertContains(response, should_contain) @@ -1785,7 +1785,7 @@ class AdminViewStringPrimaryKeyTest(TestCase): log_entry_name = "Model with string primary key" # capitalized in Recent Actions logentry = LogEntry.objects.get(content_type__name__iexact=log_entry_name) model = "modelwithstringprimarykey" - desired_admin_url = "/test_admin/admin/admin_views/%s/%s/" % (model, escape(iri_to_uri(urlquote(quote(self.pk))))) + desired_admin_url = "/test_admin/admin/admin_views/%s/%s/" % (model, iri_to_uri(quote(self.pk))) self.assertEqual(logentry.get_admin_url(), desired_admin_url) logentry.content_type.model = "non-existent" @@ -1795,7 +1795,7 @@ class AdminViewStringPrimaryKeyTest(TestCase): "The link from the delete confirmation page referring back to the changeform of the object should be quoted" response = self.client.get('/test_admin/admin/admin_views/modelwithstringprimarykey/%s/delete/' % quote(self.pk)) # this URL now comes through reverse(), thus url quoting and iri_to_uri encoding - should_contain = """/%s/">%s""" % (escape(iri_to_uri(urlquote(quote(self.pk)))), escape(self.pk)) + should_contain = """/%s/">%s""" % (escape(iri_to_uri(quote(self.pk))), escape(self.pk)) self.assertContains(response, should_contain) def test_url_conflicts_with_add(self): diff --git a/tests/template_tests/tests.py b/tests/template_tests/tests.py index b418cdac11..4ec6815710 100644 --- a/tests/template_tests/tests.py +++ b/tests/template_tests/tests.py @@ -1673,12 +1673,12 @@ class TemplateTests(TestCase): 'url08': ('{% url "метка_оператора" v %}', {'v': 'Ω'}, '/%D0%AE%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4/%CE%A9/'), 'url09': ('{% url "метка_оператора_2" tag=v %}', {'v': 'Ω'}, '/%D0%AE%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4/%CE%A9/'), 'url10': ('{% url "template_tests.views.client_action" id=client.id action="two words" %}', {'client': {'id': 1}}, '/client/1/two%20words/'), - 'url11': ('{% url "template_tests.views.client_action" id=client.id action="==" %}', {'client': {'id': 1}}, '/client/1/%3D%3D/'), - 'url12': ('{% url "template_tests.views.client_action" id=client.id action="," %}', {'client': {'id': 1}}, '/client/1/%2C/'), + 'url11': ('{% url "template_tests.views.client_action" id=client.id action="==" %}', {'client': {'id': 1}}, '/client/1/==/'), + 'url12': ('{% url "template_tests.views.client_action" id=client.id action="!$&\'()*+,;=~:@," %}', {'client': {'id': 1}}, '/client/1/!$&\'()*+,;=~:@,/'), 'url13': ('{% url "template_tests.views.client_action" id=client.id action=arg|join:"-" %}', {'client': {'id': 1}, 'arg': ['a', 'b']}, '/client/1/a-b/'), 'url14': ('{% url "template_tests.views.client_action" client.id arg|join:"-" %}', {'client': {'id': 1}, 'arg': ['a', 'b']}, '/client/1/a-b/'), 'url15': ('{% url "template_tests.views.client_action" 12 "test" %}', {}, '/client/12/test/'), - 'url18': ('{% url "template_tests.views.client" "1,2" %}', {}, '/client/1%2C2/'), + 'url18': ('{% url "template_tests.views.client" "1,2" %}', {}, '/client/1,2/'), 'url19': ('{% url named_url client.id %}', {'named_url': 'template_tests.views.client', 'client': {'id': 1}}, '/client/1/'), 'url20': ('{% url url_name_in_var client.id %}', {'url_name_in_var': 'named.client', 'client': {'id': 1}}, '/named-client/1/'), diff --git a/tests/urlpatterns_reverse/tests.py b/tests/urlpatterns_reverse/tests.py index 6964065f30..5603ea5f15 100644 --- a/tests/urlpatterns_reverse/tests.py +++ b/tests/urlpatterns_reverse/tests.py @@ -106,7 +106,7 @@ test_data = ( ('product', '/product/chocolate+($2.00)/', [], {'price': '2.00', 'product': 'chocolate'}), ('headlines', '/headlines/2007.5.21/', [], dict(year=2007, month=5, day=21)), ('windows', r'/windows_path/C:%5CDocuments%20and%20Settings%5Cspam/', [], dict(drive_name='C', path=r'Documents and Settings\spam')), - ('special', r'/special_chars/%2B%5C%24%2A/', [r'+\$*'], {}), + ('special', r'/special_chars/~@+%5C$*%7C/', [r'~@+\$*|'], {}), ('special', r'/special_chars/some%20resource/', [r'some resource'], {}), ('special', r'/special_chars/10%25%20complete/', [r'10% complete'], {}), ('special', r'/special_chars/some%20resource/', [], {'chars': r'some resource'}),