Simplified smart_urlquote and added some basic tests.
This commit is contained in:
parent
0d0ccf81a0
commit
b70c371fc1
|
@ -4,13 +4,13 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
try:
|
try:
|
||||||
from urllib.parse import quote, urlsplit, urlunsplit
|
from urllib.parse import quote, unquote, urlsplit, urlunsplit
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from urllib import quote
|
from urllib import quote, unquote
|
||||||
from urlparse import urlsplit, urlunsplit
|
from urlparse import urlsplit, urlunsplit
|
||||||
|
|
||||||
from django.utils.safestring import SafeData, mark_safe
|
from django.utils.safestring import SafeData, mark_safe
|
||||||
from django.utils.encoding import force_bytes, force_text
|
from django.utils.encoding import force_text, force_str
|
||||||
from django.utils.functional import allow_lazy
|
from django.utils.functional import allow_lazy
|
||||||
from django.utils import six
|
from django.utils import six
|
||||||
from django.utils.text import normalize_newlines
|
from django.utils.text import normalize_newlines
|
||||||
|
@ -26,7 +26,6 @@ WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('<', '>')]
|
||||||
DOTS = ['·', '*', '\u2022', '•', '•', '•']
|
DOTS = ['·', '*', '\u2022', '•', '•', '•']
|
||||||
|
|
||||||
unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
|
unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
|
||||||
unquoted_percents_re = re.compile(r'%(?![0-9A-Fa-f]{2})')
|
|
||||||
word_split_re = re.compile(r'(\s+)')
|
word_split_re = re.compile(r'(\s+)')
|
||||||
simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE)
|
simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE)
|
||||||
simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)$', re.IGNORECASE)
|
simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)$', re.IGNORECASE)
|
||||||
|
@ -185,11 +184,9 @@ def smart_urlquote(url):
|
||||||
# invalid IPv6 URL (normally square brackets in hostname part).
|
# invalid IPv6 URL (normally square brackets in hostname part).
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# An URL is considered unquoted if it contains no % characters or
|
url = unquote(force_str(url))
|
||||||
# contains a % not followed by two hexadecimal digits. See #9655.
|
# See http://bugs.python.org/issue2637
|
||||||
if '%' not in url or unquoted_percents_re.search(url):
|
url = quote(url, safe=b'!*\'();:@&=+$,/?#[]~')
|
||||||
# See http://bugs.python.org/issue2637
|
|
||||||
url = quote(force_bytes(url), safe=b'!*\'();:@&=+$,/?#[]~')
|
|
||||||
|
|
||||||
return force_text(url)
|
return force_text(url)
|
||||||
|
|
||||||
|
|
|
@ -249,9 +249,10 @@ class DefaultFiltersTests(TestCase):
|
||||||
'<a href="https://google.com" rel="nofollow">https://google.com</a>')
|
'<a href="https://google.com" rel="nofollow">https://google.com</a>')
|
||||||
|
|
||||||
# Check urlize doesn't overquote already quoted urls - see #9655
|
# Check urlize doesn't overquote already quoted urls - see #9655
|
||||||
self.assertEqual(urlize('http://hi.baidu.com/%D6%D8%D0%C2%BF'),
|
# The teststring is the urlquoted version of 'http://hi.baidu.com/重新开始'
|
||||||
'<a href="http://hi.baidu.com/%D6%D8%D0%C2%BF" rel="nofollow">'
|
self.assertEqual(urlize('http://hi.baidu.com/%E9%87%8D%E6%96%B0%E5%BC%80%E5%A7%8B'),
|
||||||
'http://hi.baidu.com/%D6%D8%D0%C2%BF</a>')
|
'<a href="http://hi.baidu.com/%E9%87%8D%E6%96%B0%E5%BC%80%E5%A7%8B" rel="nofollow">'
|
||||||
|
'http://hi.baidu.com/%E9%87%8D%E6%96%B0%E5%BC%80%E5%A7%8B</a>')
|
||||||
self.assertEqual(urlize('www.mystore.com/30%OffCoupons!'),
|
self.assertEqual(urlize('www.mystore.com/30%OffCoupons!'),
|
||||||
'<a href="http://www.mystore.com/30%25OffCoupons!" rel="nofollow">'
|
'<a href="http://www.mystore.com/30%25OffCoupons!" rel="nofollow">'
|
||||||
'www.mystore.com/30%OffCoupons!</a>')
|
'www.mystore.com/30%OffCoupons!</a>')
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
@ -181,3 +182,13 @@ class TestUtilsHtml(TestCase):
|
||||||
)
|
)
|
||||||
for value, tags, output in items:
|
for value, tags, output in items:
|
||||||
self.assertEqual(f(value, tags), output)
|
self.assertEqual(f(value, tags), output)
|
||||||
|
|
||||||
|
def test_smart_urlquote(self):
|
||||||
|
quote = html.smart_urlquote
|
||||||
|
# Ensure that IDNs are properly quoted
|
||||||
|
self.assertEqual(quote('http://öäü.com/'), 'http://xn--4ca9at.com/')
|
||||||
|
self.assertEqual(quote('http://öäü.com/öäü/'), 'http://xn--4ca9at.com/%C3%B6%C3%A4%C3%BC/')
|
||||||
|
# Ensure that everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered safe as per RFC
|
||||||
|
self.assertEqual(quote('http://example.com/path/öäü/'), 'http://example.com/path/%C3%B6%C3%A4%C3%BC/')
|
||||||
|
self.assertEqual(quote('http://example.com/%C3%B6/ä/'), 'http://example.com/%C3%B6/%C3%A4/')
|
||||||
|
self.assertEqual(quote('http://example.com/?x=1&y=2'), 'http://example.com/?x=1&y=2')
|
||||||
|
|
Loading…
Reference in New Issue