From dec334cb66b3ee59cb82e1bb99a584aa0b9fbbd5 Mon Sep 17 00:00:00 2001 From: Jon Dufresne Date: Thu, 11 Feb 2016 20:37:34 -0800 Subject: [PATCH] Fixed #26193 -- Made urlize() trim multiple trailing punctuation. --- django/utils/html.py | 53 ++++++++++++++----- .../filter_tests/test_urlize.py | 18 +++++++ 2 files changed, 58 insertions(+), 13 deletions(-) diff --git a/django/utils/html.py b/django/utils/html.py index 89d6a00eb2..de515ef8e9 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -17,7 +17,12 @@ from django.utils.text import normalize_newlines from .html_parser import HTMLParseError, HTMLParser # Configuration for urlize() function. -TRAILING_PUNCTUATION = ['.', ',', ':', ';', '.)', '"', '\'', '!'] +TRAILING_PUNCTUATION_RE = re.compile( + '^' # Beginning of word + '(.*?)' # The URL in word + '([.,:;!]+)' # Allowed non-wrapping, trailing punctuation + '$' # End of word +) WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('<', '>'), ('"', '"'), ('\'', '\'')] # List of possible strings used for bullets in bulleted lists. @@ -268,24 +273,46 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): trail = '' return text, unescaped, trail - words = word_split_re.split(force_text(text)) - for i, word in enumerate(words): - if '.' in word or '@' in word or ':' in word: - # Deal with punctuation. - lead, middle, trail = '', word, '' - for punctuation in TRAILING_PUNCTUATION: - if middle.endswith(punctuation): - middle = middle[:-len(punctuation)] - trail = punctuation + trail + def trim_punctuation(lead, middle, trail): + """ + Trim trailing and wrapping punctuation from `middle`. Return the items + of the new state. + """ + # Continue trimming until middle remains unchanged. + trimmed_something = True + while trimmed_something: + trimmed_something = False + + # Trim trailing punctuation. + match = TRAILING_PUNCTUATION_RE.match(middle) + if match: + middle = match.group(1) + trail = match.group(2) + trail + trimmed_something = True + + # Trim wrapping punctuation. for opening, closing in WRAPPING_PUNCTUATION: if middle.startswith(opening): middle = middle[len(opening):] - lead = lead + opening + lead += opening + trimmed_something = True # Keep parentheses at the end only if they're balanced. - if (middle.endswith(closing) - and middle.count(closing) == middle.count(opening) + 1): + if (middle.endswith(closing) and + middle.count(closing) == middle.count(opening) + 1): middle = middle[:-len(closing)] trail = closing + trail + trimmed_something = True + return lead, middle, trail + + words = word_split_re.split(force_text(text)) + for i, word in enumerate(words): + if '.' in word or '@' in word or ':' in word: + # lead: Current punctuation trimmed from the beginning of the word. + # middle: Current state of the word. + # trail: Current punctuation trimmed from the end of the word. + lead, middle, trail = '', word, '' + # Deal with punctuation. + lead, middle, trail = trim_punctuation(lead, middle, trail) # Make URL we want to point to. url = None diff --git a/tests/template_tests/filter_tests/test_urlize.py b/tests/template_tests/filter_tests/test_urlize.py index 9cf3f982a8..6822092943 100644 --- a/tests/template_tests/filter_tests/test_urlize.py +++ b/tests/template_tests/filter_tests/test_urlize.py @@ -246,6 +246,24 @@ class FunctionTests(SimpleTestCase): '(Go to http://www.example.com/foo.)', ) + def test_trailing_multiple_punctuation(self): + self.assertEqual( + urlize('A test http://testing.com/example..'), + 'A test http://testing.com/example..' + ) + self.assertEqual( + urlize('A test http://testing.com/example!!'), + 'A test http://testing.com/example!!' + ) + self.assertEqual( + urlize('A test http://testing.com/example!!!'), + 'A test http://testing.com/example!!!' + ) + self.assertEqual( + urlize('A test http://testing.com/example.,:;)"!'), + 'A test http://testing.com/example.,:;)"!' + ) + def test_brackets(self): """ #19070 - Check urlize handles brackets properly