From dec334cb66b3ee59cb82e1bb99a584aa0b9fbbd5 Mon Sep 17 00:00:00 2001
From: Jon Dufresne <jon.dufresne@gmail.com>
Date: Thu, 11 Feb 2016 20:37:34 -0800
Subject: [PATCH] Fixed #26193 -- Made urlize() trim multiple trailing
 punctuation.

---
 django/utils/html.py                          | 53 ++++++++++++++-----
 .../filter_tests/test_urlize.py               | 18 +++++++
 2 files changed, 58 insertions(+), 13 deletions(-)

diff --git a/django/utils/html.py b/django/utils/html.py
index 89d6a00eb2..de515ef8e9 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -17,7 +17,12 @@ from django.utils.text import normalize_newlines
 from .html_parser import HTMLParseError, HTMLParser
 
 # Configuration for urlize() function.
-TRAILING_PUNCTUATION = ['.', ',', ':', ';', '.)', '"', '\'', '!']
+TRAILING_PUNCTUATION_RE = re.compile(
+    '^'           # Beginning of word
+    '(.*?)'       # The URL in word
+    '([.,:;!]+)'  # Allowed non-wrapping, trailing punctuation
+    '$'           # End of word
+)
 WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;'), ('"', '"'), ('\'', '\'')]
 
 # List of possible strings used for bullets in bulleted lists.
@@ -268,24 +273,46 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
             trail = ''
         return text, unescaped, trail
 
-    words = word_split_re.split(force_text(text))
-    for i, word in enumerate(words):
-        if '.' in word or '@' in word or ':' in word:
-            # Deal with punctuation.
-            lead, middle, trail = '', word, ''
-            for punctuation in TRAILING_PUNCTUATION:
-                if middle.endswith(punctuation):
-                    middle = middle[:-len(punctuation)]
-                    trail = punctuation + trail
+    def trim_punctuation(lead, middle, trail):
+        """
+        Trim trailing and wrapping punctuation from `middle`. Return the items
+        of the new state.
+        """
+        # Continue trimming until middle remains unchanged.
+        trimmed_something = True
+        while trimmed_something:
+            trimmed_something = False
+
+            # Trim trailing punctuation.
+            match = TRAILING_PUNCTUATION_RE.match(middle)
+            if match:
+                middle = match.group(1)
+                trail = match.group(2) + trail
+                trimmed_something = True
+
+            # Trim wrapping punctuation.
             for opening, closing in WRAPPING_PUNCTUATION:
                 if middle.startswith(opening):
                     middle = middle[len(opening):]
-                    lead = lead + opening
+                    lead += opening
+                    trimmed_something = True
                 # Keep parentheses at the end only if they're balanced.
-                if (middle.endswith(closing)
-                        and middle.count(closing) == middle.count(opening) + 1):
+                if (middle.endswith(closing) and
+                        middle.count(closing) == middle.count(opening) + 1):
                     middle = middle[:-len(closing)]
                     trail = closing + trail
+                    trimmed_something = True
+        return lead, middle, trail
+
+    words = word_split_re.split(force_text(text))
+    for i, word in enumerate(words):
+        if '.' in word or '@' in word or ':' in word:
+            # lead: Current punctuation trimmed from the beginning of the word.
+            # middle: Current state of the word.
+            # trail: Current punctuation trimmed from the end of the word.
+            lead, middle, trail = '', word, ''
+            # Deal with punctuation.
+            lead, middle, trail = trim_punctuation(lead, middle, trail)
 
             # Make URL we want to point to.
             url = None
diff --git a/tests/template_tests/filter_tests/test_urlize.py b/tests/template_tests/filter_tests/test_urlize.py
index 9cf3f982a8..6822092943 100644
--- a/tests/template_tests/filter_tests/test_urlize.py
+++ b/tests/template_tests/filter_tests/test_urlize.py
@@ -246,6 +246,24 @@ class FunctionTests(SimpleTestCase):
             '(Go to <a href="http://www.example.com/foo" rel="nofollow">http://www.example.com/foo</a>.)',
         )
 
+    def test_trailing_multiple_punctuation(self):
+        self.assertEqual(
+            urlize('A test http://testing.com/example..'),
+            'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>..'
+        )
+        self.assertEqual(
+            urlize('A test http://testing.com/example!!'),
+            'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>!!'
+        )
+        self.assertEqual(
+            urlize('A test http://testing.com/example!!!'),
+            'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>!!!'
+        )
+        self.assertEqual(
+            urlize('A test http://testing.com/example.,:;)"!'),
+            'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>.,:;)&quot;!'
+        )
+
     def test_brackets(self):
         """
         #19070 - Check urlize handles brackets properly