From 5f287f75f2277ba821dcf5c444ab12d8eff6cce3 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Sat, 10 Sep 2011 00:47:00 +0000 Subject: [PATCH] Altered the behavior of URLField to avoid a potential DOS vector, and to avoid potential leakage of local filesystem data. A security announcement will be made shortly. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16760 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/core/validators.py | 57 ++++++++++++++------- django/db/models/fields/__init__.py | 2 +- docs/internals/deprecation.txt | 5 ++ docs/ref/forms/fields.txt | 5 ++ docs/ref/models/fields.txt | 13 +++-- docs/ref/settings.txt | 6 ++- docs/releases/1.4.txt | 8 +++ tests/modeltests/validation/__init__.py | 4 +- tests/modeltests/validation/models.py | 1 + tests/modeltests/validation/tests.py | 33 +++++++----- tests/regressiontests/forms/tests/fields.py | 16 ++++-- 11 files changed, 107 insertions(+), 43 deletions(-) diff --git a/django/core/validators.py b/django/core/validators.py index 458f4195e0..d51949308f 100644 --- a/django/core/validators.py +++ b/django/core/validators.py @@ -1,3 +1,4 @@ +import platform import re import urllib2 import urlparse @@ -41,10 +42,6 @@ class RegexValidator(object): if not self.regex.search(smart_unicode(value)): raise ValidationError(self.message, code=self.code) -class HeadRequest(urllib2.Request): - def get_method(self): - return "HEAD" - class URLValidator(RegexValidator): regex = re.compile( r'^(?:http|ftp)s?://' # http:// or https:// @@ -54,7 +51,8 @@ class URLValidator(RegexValidator): r'(?::\d+)?' # optional port r'(?:/?|[/?]\S+)$', re.IGNORECASE) - def __init__(self, verify_exists=False, validator_user_agent=URL_VALIDATOR_USER_AGENT): + def __init__(self, verify_exists=False, + validator_user_agent=URL_VALIDATOR_USER_AGENT): super(URLValidator, self).__init__() self.verify_exists = verify_exists self.user_agent = validator_user_agent @@ -79,6 +77,13 @@ class URLValidator(RegexValidator): url = value if self.verify_exists: + import warnings + warnings.warn( + "The URLField verify_exists argument has intractable security " + "and performance issues. Accordingly, it has been deprecated.", + DeprecationWarning + ) + headers = { "Accept": "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5", "Accept-Language": "en-us,en;q=0.5", @@ -90,21 +95,37 @@ class URLValidator(RegexValidator): broken_error = ValidationError( _(u'This URL appears to be a broken link.'), code='invalid_link') try: - req = HeadRequest(url, None, headers) - u = urllib2.urlopen(req) + req = urllib2.Request(url, None, headers) + req.get_method = lambda: 'HEAD' + #Create an opener that does not support local file access + opener = urllib2.OpenerDirector() + + #Don't follow redirects, but don't treat them as errors either + error_nop = lambda *args, **kwargs: True + http_error_processor = urllib2.HTTPErrorProcessor() + http_error_processor.http_error_301 = error_nop + http_error_processor.http_error_302 = error_nop + http_error_processor.http_error_307 = error_nop + + handlers = [urllib2.UnknownHandler(), + urllib2.HTTPHandler(), + urllib2.HTTPDefaultErrorHandler(), + urllib2.FTPHandler(), + http_error_processor] + try: + import ssl + handlers.append(urllib2.HTTPSHandler()) + except: + #Python isn't compiled with SSL support + pass + map(opener.add_handler, handlers) + opener.http_error_301 = lambda: True + if platform.python_version_tuple() >= (2, 6): + opener.open(req, timeout=10) + else: + opener.open(req) except ValueError: raise ValidationError(_(u'Enter a valid URL.'), code='invalid') - except urllib2.HTTPError, e: - if e.code in (405, 501): - # Try a GET request (HEAD refused) - # See also: http://www.w3.org/Protocols/rfc2616/rfc2616.html - try: - req = urllib2.Request(url, None, headers) - u = urllib2.urlopen(req) - except: - raise broken_error - else: - raise broken_error except: # urllib2.URLError, httplib.InvalidURL, etc. raise broken_error diff --git a/django/db/models/fields/__init__.py b/django/db/models/fields/__init__.py index 47735a8414..0c684285e3 100644 --- a/django/db/models/fields/__init__.py +++ b/django/db/models/fields/__init__.py @@ -1178,7 +1178,7 @@ class TimeField(Field): class URLField(CharField): description = _("URL") - def __init__(self, verbose_name=None, name=None, verify_exists=True, **kwargs): + def __init__(self, verbose_name=None, name=None, verify_exists=False, **kwargs): kwargs['max_length'] = kwargs.get('max_length', 200) CharField.__init__(self, verbose_name, name, **kwargs) self.validators.append(validators.URLValidator(verify_exists=verify_exists)) diff --git a/docs/internals/deprecation.txt b/docs/internals/deprecation.txt index 105b39e937..8d5baaf407 100644 --- a/docs/internals/deprecation.txt +++ b/docs/internals/deprecation.txt @@ -115,6 +115,11 @@ their deprecation, as per the :ref:`deprecation policy beyond that of a simple ``TextField`` since the removal of oldforms. All uses of ``XMLField`` can be replaced with ``TextField``. + * ``django.db.models.fields.URLField.verify_exists`` has been + deprecated due to intractable security and performance + issues. Validation behavior has been removed in 1.4, and the + argument will be removed in 1.5. + 1.5 --- diff --git a/docs/ref/forms/fields.txt b/docs/ref/forms/fields.txt index be1019c16e..437610384e 100644 --- a/docs/ref/forms/fields.txt +++ b/docs/ref/forms/fields.txt @@ -799,6 +799,11 @@ Takes the following optional arguments: If ``True``, the validator will attempt to load the given URL, raising ``ValidationError`` if the page gives a 404. Defaults to ``False``. +.. deprecated:: 1.3.1 + + ``verify_exists`` was deprecated for security reasons and will be + removed in 1.4. This deprecation also removes ``validator_user_agent``. + .. attribute:: URLField.validator_user_agent String used as the user-agent used when checking for a URL's existence. diff --git a/docs/ref/models/fields.txt b/docs/ref/models/fields.txt index 9a40a67439..8b5c0db7f5 100644 --- a/docs/ref/models/fields.txt +++ b/docs/ref/models/fields.txt @@ -872,14 +872,21 @@ shortcuts. ``URLField`` ------------ -.. class:: URLField([verify_exists=True, max_length=200, **options]) +.. class:: URLField([verify_exists=False, max_length=200, **options]) A :class:`CharField` for a URL. Has one extra optional argument: +.. deprecated:: 1.3.1 + + ``verify_exists`` is deprecated for security reasons as of 1.3.1 + and will be removed in 1.4. Prior to 1.3.1, the default value was + ``True``. + .. attribute:: URLField.verify_exists - If ``True`` (the default), the URL given will be checked for existence - (i.e., the URL actually loads and doesn't give a 404 response). + If ``True``, the URL given will be checked for existence (i.e., + the URL actually loads and doesn't give a 404 response) using a + ``HEAD`` request. Redirects are allowed, but will not be followed. Note that when you're using the single-threaded development server, validating a URL being served by the same server will hang. This should not diff --git a/docs/ref/settings.txt b/docs/ref/settings.txt index 756799114d..f23a187bf2 100644 --- a/docs/ref/settings.txt +++ b/docs/ref/settings.txt @@ -2012,8 +2012,10 @@ URL_VALIDATOR_USER_AGENT Default: ``Django/ (http://www.djangoproject.com/)`` -The string to use as the ``User-Agent`` header when checking to see if URLs -exist (see the ``verify_exists`` option on :class:`~django.db.models.URLField`). +The string to use as the ``User-Agent`` header when checking to see if +URLs exist (see the ``verify_exists`` option on +:class:`~django.db.models.URLField`). This setting was deprecated in +1.3.1 along with ``verify_exists`` and will be removed in 1.4. .. setting:: USE_ETAGS diff --git a/docs/releases/1.4.txt b/docs/releases/1.4.txt index ba05f2805f..f078ee0dfd 100644 --- a/docs/releases/1.4.txt +++ b/docs/releases/1.4.txt @@ -519,6 +519,14 @@ This was an alias to ``django.template.loader`` since 2005, it has been removed without emitting a warning due to the length of the deprecation. If your code still referenced this please use ``django.template.loader`` instead. +``django.db.models.fields.URLField.verify_exists`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This functionality has been removed due to intractable performance and +security issues. Any existing usage of ``verify_exists`` should be +removed. + + .. _deprecated-features-1.4: Features deprecated in 1.4 diff --git a/tests/modeltests/validation/__init__.py b/tests/modeltests/validation/__init__.py index c8a89cd36f..31c5000ba6 100644 --- a/tests/modeltests/validation/__init__.py +++ b/tests/modeltests/validation/__init__.py @@ -1,8 +1,8 @@ -from django.utils import unittest +from django.test import TestCase from django.core.exceptions import ValidationError -class ValidationTestCase(unittest.TestCase): +class ValidationTestCase(TestCase): def assertFailsValidation(self, clean, failed_fields): self.assertRaises(ValidationError, clean) try: diff --git a/tests/modeltests/validation/models.py b/tests/modeltests/validation/models.py index bf0430f79e..fca2cef261 100644 --- a/tests/modeltests/validation/models.py +++ b/tests/modeltests/validation/models.py @@ -14,6 +14,7 @@ class ModelToValidate(models.Model): parent = models.ForeignKey('self', blank=True, null=True, limit_choices_to={'number': 10}) email = models.EmailField(blank=True) url = models.URLField(blank=True) + url_verify = models.URLField(blank=True, verify_exists=True) f_with_custom_validator = models.IntegerField(blank=True, null=True, validators=[validate_answer_to_universe]) def clean(self): diff --git a/tests/modeltests/validation/tests.py b/tests/modeltests/validation/tests.py index 1dc9917915..e6e5d97971 100644 --- a/tests/modeltests/validation/tests.py +++ b/tests/modeltests/validation/tests.py @@ -1,3 +1,5 @@ +import warnings + from django import forms from django.test import TestCase from django.core.exceptions import NON_FIELD_ERRORS @@ -14,6 +16,14 @@ from modeltests.validation.test_custom_messages import CustomMessagesTest class BaseModelValidationTests(ValidationTestCase): + def setUp(self): + self.save_warnings_state() + warnings.filterwarnings('ignore', category=DeprecationWarning, + module='django.core.validators') + + def tearDown(self): + self.restore_warnings_state() + def test_missing_required_field_raises_error(self): mtv = ModelToValidate(f_with_custom_validator=42) self.assertFailsValidation(mtv.full_clean, ['name', 'number']) @@ -54,25 +64,22 @@ class BaseModelValidationTests(ValidationTestCase): mtv = ModelToValidate(number=10, name='Some Name', url='not a url') self.assertFieldFailsValidationWithMessage(mtv.full_clean, 'url', [u'Enter a valid value.']) + #The tests below which use url_verify are deprecated def test_correct_url_but_nonexisting_gives_404(self): - mtv = ModelToValidate(number=10, name='Some Name', url='http://google.com/we-love-microsoft.html') - self.assertFieldFailsValidationWithMessage(mtv.full_clean, 'url', [u'This URL appears to be a broken link.']) + mtv = ModelToValidate(number=10, name='Some Name', url_verify='http://qa-dev.w3.org/link-testsuite/http.php?code=404') + self.assertFieldFailsValidationWithMessage(mtv.full_clean, 'url_verify', [u'This URL appears to be a broken link.']) def test_correct_url_value_passes(self): - mtv = ModelToValidate(number=10, name='Some Name', url='http://www.example.com/') + mtv = ModelToValidate(number=10, name='Some Name', url_verify='http://www.google.com/') + self.assertEqual(None, mtv.full_clean()) # This will fail if there's no Internet connection + + def test_correct_url_with_redirect(self): + mtv = ModelToValidate(number=10, name='Some Name', url_verify='http://qa-dev.w3.org/link-testsuite/http.php?code=301') #example.com is a redirect to iana.org now self.assertEqual(None, mtv.full_clean()) # This will fail if there's no Internet connection def test_correct_https_url_but_nonexisting(self): - mtv = ModelToValidate(number=10, name='Some Name', url='https://www.example.com/') - self.assertFieldFailsValidationWithMessage(mtv.full_clean, 'url', [u'This URL appears to be a broken link.']) - - def test_correct_ftp_url_but_nonexisting(self): - mtv = ModelToValidate(number=10, name='Some Name', url='ftp://ftp.google.com/we-love-microsoft.html') - self.assertFieldFailsValidationWithMessage(mtv.full_clean, 'url', [u'This URL appears to be a broken link.']) - - def test_correct_ftps_url_but_nonexisting(self): - mtv = ModelToValidate(number=10, name='Some Name', url='ftps://ftp.google.com/we-love-microsoft.html') - self.assertFieldFailsValidationWithMessage(mtv.full_clean, 'url', [u'This URL appears to be a broken link.']) + mtv = ModelToValidate(number=10, name='Some Name', url_verify='https://www.example.com/') + self.assertFieldFailsValidationWithMessage(mtv.full_clean, 'url_verify', [u'This URL appears to be a broken link.']) def test_text_greater_that_charfields_max_length_raises_erros(self): mtv = ModelToValidate(number=10, name='Some Name'*100) diff --git a/tests/regressiontests/forms/tests/fields.py b/tests/regressiontests/forms/tests/fields.py index 5755501e89..d37ad5ee58 100644 --- a/tests/regressiontests/forms/tests/fields.py +++ b/tests/regressiontests/forms/tests/fields.py @@ -28,6 +28,7 @@ import datetime import re import os import urllib2 +import warnings from decimal import Decimal from functools import wraps @@ -71,6 +72,14 @@ def verify_exists_urls(existing_urls=()): class FieldsTests(SimpleTestCase): + def setUp(self): + self.save_warnings_state() + warnings.filterwarnings('ignore', category=DeprecationWarning, + module='django.core.validators') + + def tearDown(self): + self.restore_warnings_state() + def test_field_sets_widget_is_required(self): self.assertTrue(Field(required=True).widget.is_required) self.assertFalse(Field(required=False).widget.is_required) @@ -622,7 +631,7 @@ class FieldsTests(SimpleTestCase): f.clean('http://www.broken.djangoproject.com') # bad domain except ValidationError, e: self.assertEqual("[u'This URL appears to be a broken link.']", str(e)) - self.assertRaises(ValidationError, f.clean, 'http://google.com/we-love-microsoft.html') # good domain, bad page + self.assertRaises(ValidationError, f.clean, 'http://qa-dev.w3.org/link-testsuite/http.php?code=400') # good domain, bad page try: f.clean('http://google.com/we-love-microsoft.html') # good domain, bad page except ValidationError, e: @@ -681,11 +690,10 @@ class FieldsTests(SimpleTestCase): except ValidationError, e: self.assertEqual("[u'This URL appears to be a broken link.']", str(e)) - @verify_exists_urls(('http://xn--tr-xka.djangoproject.com/',)) def test_urlfield_10(self): - # UTF-8 char in path + # UTF-8 in the domain. f = URLField(verify_exists=True) - url = u'http://t\xfcr.djangoproject.com/' + url = u'http://\u03b5\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac.idn.icann.org/\u0391\u03c1\u03c7\u03b9\u03ba\u03ae_\u03c3\u03b5\u03bb\u03af\u03b4\u03b1' self.assertEqual(url, f.clean(url)) def test_urlfield_not_string(self):