From e258d9a10b92ef581556d9f265d7798d28b3330d Mon Sep 17 00:00:00 2001 From: Jannis Leidel Date: Sat, 12 Feb 2011 19:11:35 +0000 Subject: [PATCH] Fixed #14955 -- Made the URLValidator use a HEAD request when verifying a URL. Thanks, Claude Paroz. git-svn-id: http://code.djangoproject.com/svn/django/trunk@15500 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/core/validators.py | 23 ++++++++++++++++++--- tests/regressiontests/forms/tests/fields.py | 1 + 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/django/core/validators.py b/django/core/validators.py index b1b82dbf0d..7562330dcd 100644 --- a/django/core/validators.py +++ b/django/core/validators.py @@ -1,4 +1,5 @@ import re +import urllib2 import urlparse from django.core.exceptions import ValidationError @@ -38,6 +39,10 @@ class RegexValidator(object): if not self.regex.search(smart_unicode(value)): raise ValidationError(self.message, code=self.code) +class HeadRequest(urllib2.Request): + def get_method(self): + return "HEAD" + class URLValidator(RegexValidator): regex = re.compile( r'^https?://' # http:// or https:// @@ -72,7 +77,6 @@ class URLValidator(RegexValidator): url = value if self.verify_exists: - import urllib2 headers = { "Accept": "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5", "Accept-Language": "en-us,en;q=0.5", @@ -80,13 +84,26 @@ class URLValidator(RegexValidator): "Connection": "close", "User-Agent": self.user_agent, } + broken_error = ValidationError( + _(u'This URL appears to be a broken link.'), code='invalid_link') try: - req = urllib2.Request(url, None, headers) + req = HeadRequest(url, None, headers) u = urllib2.urlopen(req) except ValueError: raise ValidationError(_(u'Enter a valid URL.'), code='invalid') + except urllib2.HTTPError, e: + if e.code in (405, 501): + # Try a GET request (HEAD refused) + # See also: http://www.w3.org/Protocols/rfc2616/rfc2616.html + try: + req = urllib2.Request(url, None, headers) + u = urllib2.urlopen(req) + except: + raise broken_error + else: + raise broken_error except: # urllib2.URLError, httplib.InvalidURL, etc. - raise ValidationError(_(u'This URL appears to be a broken link.'), code='invalid_link') + raise broken_error def validate_integer(value): diff --git a/tests/regressiontests/forms/tests/fields.py b/tests/regressiontests/forms/tests/fields.py index 576a9c3d7b..65c1172bd2 100644 --- a/tests/regressiontests/forms/tests/fields.py +++ b/tests/regressiontests/forms/tests/fields.py @@ -561,6 +561,7 @@ class FieldsTests(TestCase): self.assertEqual(u'http://www.google.com/', f.clean('http://www.google.com')) # This will fail if there's no Internet connection self.assertRaisesErrorWithMessage(ValidationError, "[u'Enter a valid URL.']", f.clean, 'http://example') self.assertRaises(ValidationError, f.clean, 'http://www.broken.djangoproject.com') # bad domain + self.assertRaises(ValidationError, f.clean, 'http://qa-dev.w3.org/link-testsuite/http.php?code=405') # Method not allowed try: f.clean('http://www.broken.djangoproject.com') # bad domain except ValidationError, e: