From 7f65974f8219729c047fbbf8cd5cc9d80faefe77 Mon Sep 17 00:00:00 2001
From: Florian Apolloner <florian@apolloner.eu>
Date: Mon, 15 Jul 2019 11:46:09 +0200
Subject: [PATCH] Fixed CVE-2019-14232 -- Adjusted regex to avoid backtracking
 issues when truncating HTML.

Thanks to Guido Vranken for initial report.
---
 django/utils/text.py                          |  4 +--
 docs/releases/1.11.23.txt                     | 14 +++++++++++
 docs/releases/2.1.11.txt                      | 14 +++++++++++
 docs/releases/2.2.4.txt                       | 14 +++++++++++
 .../filter_tests/test_truncatewords_html.py   |  4 +--
 tests/utils_tests/test_text.py                | 25 ++++++++++++++++---
 6 files changed, 67 insertions(+), 8 deletions(-)

diff --git a/django/utils/text.py b/django/utils/text.py
index e9b7dcc72b..c2576b012a 100644
--- a/django/utils/text.py
+++ b/django/utils/text.py
@@ -17,8 +17,8 @@ def capfirst(x):
 
 
 # Set up regular expressions
-re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.S)
-re_chars = re.compile(r'<.*?>|(.)', re.S)
+re_words = re.compile(r'<[^>]+?>|([^<>\s]+)', re.S)
+re_chars = re.compile(r'<[^>]+?>|(.)', re.S)
 re_tag = re.compile(r'<(/)?(\S+?)(?:(\s*/)|\s.*?)?>', re.S)
 re_newlines = re.compile(r'\r\n|\r')  # Used in normalize_newlines
 re_camel_case = re.compile(r'(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))')
diff --git a/docs/releases/1.11.23.txt b/docs/releases/1.11.23.txt
index 9a3ab7cbc9..6058bb8a81 100644
--- a/docs/releases/1.11.23.txt
+++ b/docs/releases/1.11.23.txt
@@ -5,3 +5,17 @@ Django 1.11.23 release notes
 *August 1, 2019*
 
 Django 1.11.23 fixes security issues in 1.11.22.
+
+CVE-2019-14232: Denial-of-service possibility in ``django.utils.text.Truncator``
+================================================================================
+
+If ``django.utils.text.Truncator``'s ``chars()`` and ``words()`` methods
+were passed the ``html=True`` argument, they were extremely slow to evaluate
+certain inputs due to a catastrophic backtracking vulnerability in a regular
+expression. The ``chars()`` and ``words()`` methods are used to implement the
+:tfilter:`truncatechars_html` and :tfilter:`truncatewords_html` template
+filters, which were thus vulnerable.
+
+The regular expressions used by ``Truncator`` have been simplified in order to
+avoid potential backtracking issues. As a consequence, trailing punctuation may
+now at times be included in the truncated output.
diff --git a/docs/releases/2.1.11.txt b/docs/releases/2.1.11.txt
index b8098334e1..f4ee3dbd30 100644
--- a/docs/releases/2.1.11.txt
+++ b/docs/releases/2.1.11.txt
@@ -5,3 +5,17 @@ Django 2.1.11 release notes
 *August 1, 2019*
 
 Django 2.1.11 fixes security issues in 2.1.10.
+
+CVE-2019-14232: Denial-of-service possibility in ``django.utils.text.Truncator``
+================================================================================
+
+If ``django.utils.text.Truncator``'s ``chars()`` and ``words()`` methods
+were passed the ``html=True`` argument, they were extremely slow to evaluate
+certain inputs due to a catastrophic backtracking vulnerability in a regular
+expression. The ``chars()`` and ``words()`` methods are used to implement the
+:tfilter:`truncatechars_html` and :tfilter:`truncatewords_html` template
+filters, which were thus vulnerable.
+
+The regular expressions used by ``Truncator`` have been simplified in order to
+avoid potential backtracking issues. As a consequence, trailing punctuation may
+now at times be included in the truncated output.
diff --git a/docs/releases/2.2.4.txt b/docs/releases/2.2.4.txt
index 59c05bf0e2..b22aa42482 100644
--- a/docs/releases/2.2.4.txt
+++ b/docs/releases/2.2.4.txt
@@ -6,6 +6,20 @@ Django 2.2.4 release notes
 
 Django 2.2.4 fixes security issues and several bugs in 2.2.3.
 
+CVE-2019-14232: Denial-of-service possibility in ``django.utils.text.Truncator``
+================================================================================
+
+If ``django.utils.text.Truncator``'s ``chars()`` and ``words()`` methods
+were passed the ``html=True`` argument, they were extremely slow to evaluate
+certain inputs due to a catastrophic backtracking vulnerability in a regular
+expression. The ``chars()`` and ``words()`` methods are used to implement the
+:tfilter:`truncatechars_html` and :tfilter:`truncatewords_html` template
+filters, which were thus vulnerable.
+
+The regular expressions used by ``Truncator`` have been simplified in order to
+avoid potential backtracking issues. As a consequence, trailing punctuation may
+now at times be included in the truncated output.
+
 Bugfixes
 ========
 
diff --git a/tests/template_tests/filter_tests/test_truncatewords_html.py b/tests/template_tests/filter_tests/test_truncatewords_html.py
index 5daeef6cf3..6177fc875d 100644
--- a/tests/template_tests/filter_tests/test_truncatewords_html.py
+++ b/tests/template_tests/filter_tests/test_truncatewords_html.py
@@ -16,13 +16,13 @@ class FunctionTests(SimpleTestCase):
     def test_truncate2(self):
         self.assertEqual(
             truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 4),
-            '<p>one <a href="#">two - three <br>four …</a></p>',
+            '<p>one <a href="#">two - three …</a></p>',
         )
 
     def test_truncate3(self):
         self.assertEqual(
             truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 5),
-            '<p>one <a href="#">two - three <br>four</a> five</p>',
+            '<p>one <a href="#">two - three <br>four …</a></p>',
         )
 
     def test_truncate4(self):
diff --git a/tests/utils_tests/test_text.py b/tests/utils_tests/test_text.py
index f1a7db383c..77d7e73259 100644
--- a/tests/utils_tests/test_text.py
+++ b/tests/utils_tests/test_text.py
@@ -88,6 +88,17 @@ class TestUtilsText(SimpleTestCase):
         # lazy strings are handled correctly
         self.assertEqual(text.Truncator(lazystr('The quick brown fox')).chars(10), 'The quick…')
 
+    def test_truncate_chars_html(self):
+        perf_test_values = [
+            (('</a' + '\t' * 50000) + '//>', None),
+            ('&' * 50000, '&' * 9 + '…'),
+            ('_X<<<<<<<<<<<>', None),
+        ]
+        for value, expected in perf_test_values:
+            with self.subTest(value=value):
+                truncator = text.Truncator(value)
+                self.assertEqual(expected if expected else value, truncator.chars(10, html=True))
+
     def test_truncate_words(self):
         truncator = text.Truncator('The quick brown fox jumped over the lazy dog.')
         self.assertEqual('The quick brown fox jumped over the lazy dog.', truncator.words(10))
@@ -137,11 +148,17 @@ class TestUtilsText(SimpleTestCase):
         truncator = text.Truncator('<i>Buenos d&iacute;as! &#x00bf;C&oacute;mo est&aacute;?</i>')
         self.assertEqual('<i>Buenos d&iacute;as! &#x00bf;C&oacute;mo…</i>', truncator.words(3, html=True))
         truncator = text.Truncator('<p>I &lt;3 python, what about you?</p>')
-        self.assertEqual('<p>I &lt;3 python…</p>', truncator.words(3, html=True))
+        self.assertEqual('<p>I &lt;3 python,…</p>', truncator.words(3, html=True))
 
-        re_tag_catastrophic_test = ('</a' + '\t' * 50000) + '//>'
-        truncator = text.Truncator(re_tag_catastrophic_test)
-        self.assertEqual(re_tag_catastrophic_test, truncator.words(500, html=True))
+        perf_test_values = [
+            ('</a' + '\t' * 50000) + '//>',
+            '&' * 50000,
+            '_X<<<<<<<<<<<>',
+        ]
+        for value in perf_test_values:
+            with self.subTest(value=value):
+                truncator = text.Truncator(value)
+                self.assertEqual(value, truncator.words(50, html=True))
 
     def test_wrap(self):
         digits = '1234 67 9'