Fixed #26193 -- Made urlize() trim multiple trailing punctuation.

This commit is contained in:
Jon Dufresne 2016-02-11 20:37:34 -08:00 committed by Tim Graham
parent fcd08c1757
commit dec334cb66
2 changed files with 58 additions and 13 deletions

View File

@ -17,7 +17,12 @@ from django.utils.text import normalize_newlines
from .html_parser import HTMLParseError, HTMLParser from .html_parser import HTMLParseError, HTMLParser
# Configuration for urlize() function. # Configuration for urlize() function.
TRAILING_PUNCTUATION = ['.', ',', ':', ';', '.)', '"', '\'', '!'] TRAILING_PUNCTUATION_RE = re.compile(
'^' # Beginning of word
'(.*?)' # The URL in word
'([.,:;!]+)' # Allowed non-wrapping, trailing punctuation
'$' # End of word
)
WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;'), ('"', '"'), ('\'', '\'')] WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;'), ('"', '"'), ('\'', '\'')]
# List of possible strings used for bullets in bulleted lists. # List of possible strings used for bullets in bulleted lists.
@ -268,24 +273,46 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
trail = '' trail = ''
return text, unescaped, trail return text, unescaped, trail
words = word_split_re.split(force_text(text)) def trim_punctuation(lead, middle, trail):
for i, word in enumerate(words): """
if '.' in word or '@' in word or ':' in word: Trim trailing and wrapping punctuation from `middle`. Return the items
# Deal with punctuation. of the new state.
lead, middle, trail = '', word, '' """
for punctuation in TRAILING_PUNCTUATION: # Continue trimming until middle remains unchanged.
if middle.endswith(punctuation): trimmed_something = True
middle = middle[:-len(punctuation)] while trimmed_something:
trail = punctuation + trail trimmed_something = False
# Trim trailing punctuation.
match = TRAILING_PUNCTUATION_RE.match(middle)
if match:
middle = match.group(1)
trail = match.group(2) + trail
trimmed_something = True
# Trim wrapping punctuation.
for opening, closing in WRAPPING_PUNCTUATION: for opening, closing in WRAPPING_PUNCTUATION:
if middle.startswith(opening): if middle.startswith(opening):
middle = middle[len(opening):] middle = middle[len(opening):]
lead = lead + opening lead += opening
trimmed_something = True
# Keep parentheses at the end only if they're balanced. # Keep parentheses at the end only if they're balanced.
if (middle.endswith(closing) if (middle.endswith(closing) and
and middle.count(closing) == middle.count(opening) + 1): middle.count(closing) == middle.count(opening) + 1):
middle = middle[:-len(closing)] middle = middle[:-len(closing)]
trail = closing + trail trail = closing + trail
trimmed_something = True
return lead, middle, trail
words = word_split_re.split(force_text(text))
for i, word in enumerate(words):
if '.' in word or '@' in word or ':' in word:
# lead: Current punctuation trimmed from the beginning of the word.
# middle: Current state of the word.
# trail: Current punctuation trimmed from the end of the word.
lead, middle, trail = '', word, ''
# Deal with punctuation.
lead, middle, trail = trim_punctuation(lead, middle, trail)
# Make URL we want to point to. # Make URL we want to point to.
url = None url = None

View File

@ -246,6 +246,24 @@ class FunctionTests(SimpleTestCase):
'(Go to <a href="http://www.example.com/foo" rel="nofollow">http://www.example.com/foo</a>.)', '(Go to <a href="http://www.example.com/foo" rel="nofollow">http://www.example.com/foo</a>.)',
) )
def test_trailing_multiple_punctuation(self):
self.assertEqual(
urlize('A test http://testing.com/example..'),
'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>..'
)
self.assertEqual(
urlize('A test http://testing.com/example!!'),
'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>!!'
)
self.assertEqual(
urlize('A test http://testing.com/example!!!'),
'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>!!!'
)
self.assertEqual(
urlize('A test http://testing.com/example.,:;)"!'),
'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>.,:;)&quot;!'
)
def test_brackets(self): def test_brackets(self):
""" """
#19070 - Check urlize handles brackets properly #19070 - Check urlize handles brackets properly