Fixed #24471 -- Enhanced urlize regex to exclude quotes and angle brackets.

This commit is contained in:
Tim Graham 2015-03-04 09:36:34 -05:00
parent 7b1a67cce5
commit 3ed9c7bdfe
2 changed files with 21 additions and 1 deletions

View File

@ -26,7 +26,7 @@ WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;'), ('
DOTS = ['&middot;', '*', '\u2022', '&#149;', '&bull;', '&#8226;']
unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
word_split_re = re.compile(r'(\s+)')
word_split_re = re.compile(r'''([\s<>"']+)''')
simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE)
simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$', re.IGNORECASE)
simple_email_re = re.compile(r'^\S+@\S+\.\S+$')

View File

@ -106,6 +106,26 @@ class FunctionTests(SimpleTestCase):
'<a href="http://djangoproject.org/" rel="nofollow">djangoproject.org/</a>',
)
def test_url_split_chars(self):
# Quotes (single and double) and angle brackets shouldn't be considered
# part of URLs.
self.assertEqual(
urlize('www.server.com"abc'),
'<a href="http://www.server.com" rel="nofollow">www.server.com</a>&quot;abc',
)
self.assertEqual(
urlize('www.server.com\'abc'),
'<a href="http://www.server.com" rel="nofollow">www.server.com</a>&#39;abc',
)
self.assertEqual(
urlize('www.server.com<abc'),
'<a href="http://www.server.com" rel="nofollow">www.server.com</a>&lt;abc',
)
self.assertEqual(
urlize('www.server.com>abc'),
'<a href="http://www.server.com" rel="nofollow">www.server.com</a>&gt;abc',
)
def test_email(self):
self.assertEqual(
urlize('info@djangoproject.org'),