Fixed CVE-2018-7536 -- Fixed catastrophic backtracking in urlize and urlizetrunc template filters.

Thanks Florian Apolloner for assisting with the patch.
This commit is contained in:
Tim Graham 2018-02-24 11:30:11 -05:00
parent 4d2a2c83c7
commit 8618271caa
5 changed files with 63 additions and 12 deletions

View File

@ -13,12 +13,7 @@ from django.utils.safestring import SafeData, SafeText, mark_safe
from django.utils.text import normalize_newlines from django.utils.text import normalize_newlines
# Configuration for urlize() function. # Configuration for urlize() function.
TRAILING_PUNCTUATION_RE = re.compile( TRAILING_PUNCTUATION_CHARS = '.,:;!'
'^' # Beginning of word
'(.*?)' # The URL in word
'([.,:;!]+)' # Allowed non-wrapping, trailing punctuation
'$' # End of word
)
WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;'), ('"', '"'), ('\'', '\'')] WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;'), ('"', '"'), ('\'', '\'')]
# List of possible strings used for bullets in bulleted lists. # List of possible strings used for bullets in bulleted lists.
@ -28,7 +23,6 @@ unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
word_split_re = re.compile(r'''([\s<>"']+)''') word_split_re = re.compile(r'''([\s<>"']+)''')
simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE) simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE)
simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$', re.IGNORECASE) simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$', re.IGNORECASE)
simple_email_re = re.compile(r'^\S+@\S+\.\S+$')
_html_escapes = { _html_escapes = {
ord('&'): '&amp;', ord('&'): '&amp;',
@ -293,10 +287,10 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
trimmed_something = False trimmed_something = False
# Trim trailing punctuation. # Trim trailing punctuation.
match = TRAILING_PUNCTUATION_RE.match(middle) stripped = middle.rstrip(TRAILING_PUNCTUATION_CHARS)
if match: if middle != stripped:
middle = match.group(1) trail = middle[len(stripped):] + trail
trail = match.group(2) + trail middle = stripped
trimmed_something = True trimmed_something = True
# Trim wrapping punctuation. # Trim wrapping punctuation.
@ -313,6 +307,21 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
trimmed_something = True trimmed_something = True
return lead, middle, trail return lead, middle, trail
def is_email_simple(value):
"""Return True if value looks like an email address."""
# An @ must be in the middle of the value.
if '@' not in value or value.startswith('@') or value.endswith('@'):
return False
try:
p1, p2 = value.split('@')
except ValueError:
# value contains more than one @.
return False
# Dot must be in p2 (e.g. example.com)
if '.' not in p2 or p2.startswith('.'):
return False
return True
words = word_split_re.split(str(text)) words = word_split_re.split(str(text))
for i, word in enumerate(words): for i, word in enumerate(words):
if '.' in word or '@' in word or ':' in word: if '.' in word or '@' in word or ':' in word:
@ -332,7 +341,7 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
elif simple_url_2_re.match(middle): elif simple_url_2_re.match(middle):
middle, middle_unescaped, trail = unescape(middle, trail) middle, middle_unescaped, trail = unescape(middle, trail)
url = smart_urlquote('http://%s' % middle_unescaped) url = smart_urlquote('http://%s' % middle_unescaped)
elif ':' not in middle and simple_email_re.match(middle): elif ':' not in middle and is_email_simple(middle):
local, domain = middle.rsplit('@', 1) local, domain = middle.rsplit('@', 1)
try: try:
domain = domain.encode('idna').decode('ascii') domain = domain.encode('idna').decode('ascii')

View File

@ -5,3 +5,14 @@ Django 1.11.11 release notes
*March 6, 2018* *March 6, 2018*
Django 1.11.11 fixes two security issues in 1.11.10. Django 1.11.11 fixes two security issues in 1.11.10.
CVE-2018-7536: Denial-of-service possibility in ``urlize`` and ``urlizetrunc`` template filters
===============================================================================================
The ``django.utils.html.urlize()`` function was extremely slow to evaluate
certain inputs due to catastrophic backtracking vulnerabilities in two regular
expressions. The ``urlize()`` function is used to implement the ``urlize`` and
``urlizetrunc`` template filters, which were thus vulnerable.
The problematic regular expressions are replaced with parsing logic that
behaves similarly.

View File

@ -5,3 +5,14 @@ Django 1.8.19 release notes
*March 6, 2018* *March 6, 2018*
Django 1.8.19 fixes two security issues in 1.18.18. Django 1.8.19 fixes two security issues in 1.18.18.
CVE-2018-7536: Denial-of-service possibility in ``urlize`` and ``urlizetrunc`` template filters
===============================================================================================
The ``django.utils.html.urlize()`` function was extremely slow to evaluate
certain inputs due to a catastrophic backtracking vulnerability in a regular
expression. The ``urlize()`` function is used to implement the ``urlize`` and
``urlizetrunc`` template filters, which were thus vulnerable.
The problematic regular expression is replaced with parsing logic that behaves
similarly.

View File

@ -7,6 +7,17 @@ Django 2.0.3 release notes
Django 2.0.3 fixes two security issues and several bugs in 2.0.2. Also, the Django 2.0.3 fixes two security issues and several bugs in 2.0.2. Also, the
latest string translations from Transifex are incorporated. latest string translations from Transifex are incorporated.
CVE-2018-7536: Denial-of-service possibility in ``urlize`` and ``urlizetrunc`` template filters
===============================================================================================
The ``django.utils.html.urlize()`` function was extremely slow to evaluate
certain inputs due to catastrophic backtracking vulnerabilities in two regular
expressions. The ``urlize()`` function is used to implement the ``urlize`` and
``urlizetrunc`` template filters, which were thus vulnerable.
The problematic regular expressions are replaced with parsing logic that
behaves similarly.
Bugfixes Bugfixes
======== ========

View File

@ -253,3 +253,12 @@ class TestUtilsHtml(SimpleTestCase):
for value, output in tests: for value, output in tests:
with self.subTest(value=value): with self.subTest(value=value):
self.assertEqual(urlize(value), output) self.assertEqual(urlize(value), output)
def test_urlize_unchanged_inputs(self):
tests = (
('a' + '@a' * 50000) + 'a', # simple_email_re catastrophic test
('a' + '.' * 1000000) + 'a', # trailing_punctuation catastrophic test
)
for value in tests:
with self.subTest(value=value):
self.assertEqual(urlize(value), value)