Fixed #20364 -- Changed urlize regexes to include quotation marks as punctation.

Thanks to EmilStenstrom for raising this, and to Chris Piwoński for all of the fixes and most of the tests.
This commit is contained in:
Giles Richard Greenway 2013-09-23 12:07:26 +01:00 committed by Florian Apolloner
parent 99c87f1410
commit 6c06adad1d
2 changed files with 20 additions and 2 deletions

View File

@ -15,8 +15,8 @@ from .html_parser import HTMLParser, HTMLParseError
# Configuration for urlize() function. # Configuration for urlize() function.
TRAILING_PUNCTUATION = ['.', ',', ':', ';', '.)'] TRAILING_PUNCTUATION = ['.', ',', ':', ';', '.)', '"', '\'']
WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;')] WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;'), ('"', '"'), ('\'', '\'')]
# List of possible strings used for bullets in bulleted lists. # List of possible strings used for bullets in bulleted lists.
DOTS = ['&middot;', '*', '\u2022', '&#149;', '&bull;', '&#8226;'] DOTS = ['&middot;', '*', '\u2022', '&#149;', '&bull;', '&#8226;']

View File

@ -324,6 +324,24 @@ class DefaultFiltersTests(TestCase):
self.assertEqual(urlize('http://[2001:db8:cafe::2]/api/9'), self.assertEqual(urlize('http://[2001:db8:cafe::2]/api/9'),
'<a href="http://[2001:db8:cafe::2]/api/9" rel="nofollow">http://[2001:db8:cafe::2]/api/9</a>') '<a href="http://[2001:db8:cafe::2]/api/9" rel="nofollow">http://[2001:db8:cafe::2]/api/9</a>')
# Check urlize correctly include quotation marks in links - #20364
self.assertEqual(urlize('before "hi@example.com" afterwards'),
u'before "<a href="mailto:hi@example.com">hi@example.com</a>" afterwards')
self.assertEqual(urlize('before hi@example.com" afterwards'),
u'before <a href="mailto:hi@example.com">hi@example.com</a>" afterwards')
self.assertEqual(urlize('before "hi@example.com afterwards'),
u'before "<a href="mailto:hi@example.com">hi@example.com</a> afterwards')
self.assertEqual(urlize('before \'hi@example.com\' afterwards'),
u'before \'<a href="mailto:hi@example.com">hi@example.com</a>\' afterwards')
self.assertEqual(urlize('before hi@example.com\' afterwards'),
u'before <a href="mailto:hi@example.com">hi@example.com</a>\' afterwards')
self.assertEqual(urlize('before \'hi@example.com afterwards'),
u'before \'<a href="mailto:hi@example.com">hi@example.com</a> afterwards')
# Check urlize copes with commas following URLs in quotes - see #20364
self.assertEqual(urlize('Email us at "hi@example.com", or phone us at +xx.yy'),
'Email us at "<a href="mailto:hi@example.com">hi@example.com</a>", or phone us at +xx.yy')
def test_wordcount(self): def test_wordcount(self):
self.assertEqual(wordcount(''), 0) self.assertEqual(wordcount(''), 0)
self.assertEqual(wordcount('oneword'), 1) self.assertEqual(wordcount('oneword'), 1)