From 081a27c3585a109b0e8b62e43f57273c69890bd3 Mon Sep 17 00:00:00 2001 From: Florian Apolloner Date: Sun, 28 Jul 2013 10:05:39 +0200 Subject: [PATCH] [1.6.x] Simplified smart_urlquote and added some basic tests. Backport of b70c371fc1f18ea0c43b503122df3f311afc7105 from master. --- django/utils/html.py | 15 ++++++--------- tests/defaultfilters/tests.py | 7 ++++--- tests/utils_tests/test_html.py | 11 +++++++++++ 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/django/utils/html.py b/django/utils/html.py index 4893b6b18a..89e790d96f 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -4,13 +4,13 @@ from __future__ import unicode_literals import re try: - from urllib.parse import quote, urlsplit, urlunsplit + from urllib.parse import quote, unquote, urlsplit, urlunsplit except ImportError: # Python 2 - from urllib import quote + from urllib import quote, unquote from urlparse import urlsplit, urlunsplit from django.utils.safestring import SafeData, mark_safe -from django.utils.encoding import force_bytes, force_text +from django.utils.encoding import force_text, force_str from django.utils.functional import allow_lazy from django.utils import six from django.utils.text import normalize_newlines @@ -26,7 +26,6 @@ WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('<', '>')] DOTS = ['·', '*', '\u2022', '•', '•', '•'] unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)') -unquoted_percents_re = re.compile(r'%(?![0-9A-Fa-f]{2})') word_split_re = re.compile(r'(\s+)') simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE) simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)$', re.IGNORECASE) @@ -185,11 +184,9 @@ def smart_urlquote(url): # invalid IPv6 URL (normally square brackets in hostname part). pass - # An URL is considered unquoted if it contains no % characters or - # contains a % not followed by two hexadecimal digits. See #9655. - if '%' not in url or unquoted_percents_re.search(url): - # See http://bugs.python.org/issue2637 - url = quote(force_bytes(url), safe=b'!*\'();:@&=+$,/?#[]~') + url = unquote(force_str(url)) + # See http://bugs.python.org/issue2637 + url = quote(url, safe=b'!*\'();:@&=+$,/?#[]~') return force_text(url) diff --git a/tests/defaultfilters/tests.py b/tests/defaultfilters/tests.py index d0009c6e66..56b32ae623 100644 --- a/tests/defaultfilters/tests.py +++ b/tests/defaultfilters/tests.py @@ -248,9 +248,10 @@ class DefaultFiltersTests(TestCase): 'https://google.com') # Check urlize doesn't overquote already quoted urls - see #9655 - self.assertEqual(urlize('http://hi.baidu.com/%D6%D8%D0%C2%BF'), - '' - 'http://hi.baidu.com/%D6%D8%D0%C2%BF') + # The teststring is the urlquoted version of 'http://hi.baidu.com/重新开始' + self.assertEqual(urlize('http://hi.baidu.com/%E9%87%8D%E6%96%B0%E5%BC%80%E5%A7%8B'), + '' + 'http://hi.baidu.com/%E9%87%8D%E6%96%B0%E5%BC%80%E5%A7%8B') self.assertEqual(urlize('www.mystore.com/30%OffCoupons!'), '' 'www.mystore.com/30%OffCoupons!') diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py index b973f1c64f..ba8f29e3ae 100644 --- a/tests/utils_tests/test_html.py +++ b/tests/utils_tests/test_html.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from __future__ import unicode_literals from datetime import datetime @@ -181,3 +182,13 @@ class TestUtilsHtml(TestCase): ) for value, tags, output in items: self.assertEqual(f(value, tags), output) + + def test_smart_urlquote(self): + quote = html.smart_urlquote + # Ensure that IDNs are properly quoted + self.assertEqual(quote('http://öäü.com/'), 'http://xn--4ca9at.com/') + self.assertEqual(quote('http://öäü.com/öäü/'), 'http://xn--4ca9at.com/%C3%B6%C3%A4%C3%BC/') + # Ensure that everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered safe as per RFC + self.assertEqual(quote('http://example.com/path/öäü/'), 'http://example.com/path/%C3%B6%C3%A4%C3%BC/') + self.assertEqual(quote('http://example.com/%C3%B6/ä/'), 'http://example.com/%C3%B6/%C3%A4/') + self.assertEqual(quote('http://example.com/?x=1&y=2'), 'http://example.com/?x=1&y=2')