From b9d9287f59eb5c33dd8bc81179b4cf197fd54456 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 9 Aug 2014 12:44:48 +0200 Subject: [PATCH] Fixed urlize after smart_urlquote rewrite Refs #22267. --- django/utils/html.py | 31 +++++++++++++++++++++++++++---- tests/template_tests/filters.py | 24 ++++++++++++++++++++---- tests/utils_tests/test_html.py | 1 + 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/django/utils/html.py b/django/utils/html.py index 6eed0b0192..3974bbbc22 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -251,6 +251,7 @@ def smart_urlquote(url): return urlunsplit((scheme, netloc, path, query, fragment)) + def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): """ Converts any URLs in text into clickable links. @@ -268,11 +269,31 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): If autoescape is True, the link text and URLs will be autoescaped. """ + safe_input = isinstance(text, SafeData) + def trim_url(x, limit=trim_url_limit): if limit is None or len(x) <= limit: return x return '%s...' % x[:max(0, limit - 3)] - safe_input = isinstance(text, SafeData) + + def unescape(text, trail): + """ + If input URL is HTML-escaped, unescape it so as we can safely feed it to + smart_urlquote. For example: + http://example.com?x=1&y=<2> => http://example.com?x=1&y=<2> + """ + if not safe_input: + return text, text, trail + unescaped = (text + trail).replace('&', '&').replace('<', '<' + ).replace('>', '>').replace('"', '"' + ).replace(''', "'") + # ';' in trail can be either trailing punctuation or end-of-entity marker + if unescaped.endswith(';'): + return text, unescaped[:-1], trail + else: + text += trail + return text, unescaped, '' + words = word_split_re.split(force_text(text)) for i, word in enumerate(words): if '.' in word or '@' in word or ':' in word: @@ -296,9 +317,11 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): url = None nofollow_attr = ' rel="nofollow"' if nofollow else '' if simple_url_re.match(middle): - url = smart_urlquote(middle) + middle, middle_unescaped, trail = unescape(middle, trail) + url = smart_urlquote(middle_unescaped) elif simple_url_2_re.match(middle): - url = smart_urlquote('http://%s' % middle) + middle, middle_unescaped, trail = unescape(middle, trail) + url = smart_urlquote('http://%s' % middle_unescaped) elif ':' not in middle and simple_email_re.match(middle): local, domain = middle.rsplit('@', 1) try: @@ -313,7 +336,7 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): trimmed = trim_url(middle) if autoescape and not safe_input: lead, trail = escape(lead), escape(trail) - url, trimmed = escape(url), escape(trimmed) + trimmed = escape(trimmed) middle = '%s' % (url, nofollow_attr, trimmed) words[i] = mark_safe('%s%s%s' % (lead, middle, trail)) else: diff --git a/tests/template_tests/filters.py b/tests/template_tests/filters.py index ad7f79d888..3668f91c23 100644 --- a/tests/template_tests/filters.py +++ b/tests/template_tests/filters.py @@ -151,8 +151,16 @@ def get_filter_tests(): 'filter-upper01': ('{% autoescape off %}{{ a|upper }} {{ b|upper }}{% endautoescape %}', {"a": "a & b", "b": mark_safe("a & b")}, "A & B A & B"), 'filter-upper02': ('{{ a|upper }} {{ b|upper }}', {"a": "a & b", "b": mark_safe("a & b")}, "A & B A &AMP; B"), - 'filter-urlize01': ('{% autoescape off %}{{ a|urlize }} {{ b|urlize }}{% endautoescape %}', {"a": "http://example.com/?x=&y=", "b": mark_safe("http://example.com?x=&y=")}, 'http://example.com/?x=&y= http://example.com?x=&y='), - 'filter-urlize02': ('{{ a|urlize }} {{ b|urlize }}', {"a": "http://example.com/?x=&y=", "b": mark_safe("http://example.com?x=&y=")}, 'http://example.com/?x=&y= http://example.com?x=&y='), + 'filter-urlize01': ( + '{% autoescape off %}{{ a|urlize }} {{ b|urlize }}{% endautoescape %}', + {"a": "http://example.com/?x=&y=", "b": mark_safe("http://example.com?x=&y=<2>")}, + 'http://example.com/?x=&y= ' + 'http://example.com?x=&y=<2>'), + 'filter-urlize02': ( + '{{ a|urlize }} {{ b|urlize }}', + {"a": "http://example.com/?x=&y=", "b": mark_safe("http://example.com?x=&y=")}, + 'http://example.com/?x=&y= ' + 'http://example.com?x=&y='), 'filter-urlize03': ('{% autoescape off %}{{ a|urlize }}{% endautoescape %}', {"a": mark_safe("a & b")}, 'a & b'), 'filter-urlize04': ('{{ a|urlize }}', {"a": mark_safe("a & b")}, 'a & b'), @@ -165,8 +173,16 @@ def get_filter_tests(): 'filter-urlize07': ('{{ a|urlize }}', {"a": "Email me at me@example.com"}, 'Email me at me@example.com'), 'filter-urlize08': ('{{ a|urlize }}', {"a": "Email me at "}, 'Email me at <me@example.com>'), - 'filter-urlizetrunc01': ('{% autoescape off %}{{ a|urlizetrunc:"8" }} {{ b|urlizetrunc:"8" }}{% endautoescape %}', {"a": '"Unsafe" http://example.com/x=&y=', "b": mark_safe('"Safe" http://example.com?x=&y=')}, '"Unsafe" http:... "Safe" http:...'), - 'filter-urlizetrunc02': ('{{ a|urlizetrunc:"8" }} {{ b|urlizetrunc:"8" }}', {"a": '"Unsafe" http://example.com/x=&y=', "b": mark_safe('"Safe" http://example.com?x=&y=')}, '"Unsafe" http:... "Safe" http:...'), + 'filter-urlizetrunc01': ( + '{% autoescape off %}{{ a|urlizetrunc:"8" }} {{ b|urlizetrunc:"8" }}{% endautoescape %}', + {"a": '"Unsafe" http://example.com/x=&y=', "b": mark_safe('"Safe" http://example.com?x=&y=')}, + '"Unsafe" http:... ' + '"Safe" http:...'), + 'filter-urlizetrunc02': ( + '{{ a|urlizetrunc:"8" }} {{ b|urlizetrunc:"8" }}', + {"a": '"Unsafe" http://example.com/x=&y=', "b": mark_safe('"Safe" http://example.com?x=&y=')}, + '"Unsafe" http:... ' + '"Safe" http:...'), 'filter-wordcount01': ('{% autoescape off %}{{ a|wordcount }} {{ b|wordcount }}{% endautoescape %}', {"a": "a & b", "b": mark_safe("a & b")}, "3 3"), 'filter-wordcount02': ('{{ a|wordcount }} {{ b|wordcount }}', {"a": "a & b", "b": mark_safe("a & b")}, "3 3"), diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py index 374628dc0c..ff9d7f4ac0 100644 --- a/tests/utils_tests/test_html.py +++ b/tests/utils_tests/test_html.py @@ -174,6 +174,7 @@ class TestUtilsHtml(TestCase): self.assertEqual(quote('http://example.com/path/öäü/'), 'http://example.com/path/%C3%B6%C3%A4%C3%BC/') self.assertEqual(quote('http://example.com/%C3%B6/ä/'), 'http://example.com/%C3%B6/%C3%A4/') self.assertEqual(quote('http://example.com/?x=1&y=2+3&z='), 'http://example.com/?x=1&y=2+3&z=') + self.assertEqual(quote('http://example.com/?x=<>"\''), 'http://example.com/?x=%3C%3E%22%27') self.assertEqual(quote('http://example.com/?q=http://example.com/?x=1%26q=django'), 'http://example.com/?q=http%3A%2F%2Fexample.com%2F%3Fx%3D1%26q%3Ddjango') self.assertEqual(quote('http://example.com/?q=http%3A%2F%2Fexample.com%2F%3Fx%3D1%26q%3Ddjango'),