Fixed #14955 -- Made the URLValidator use a HEAD request when verifying a URL. Thanks, Claude Paroz.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@15500 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Jannis Leidel 2011-02-12 19:11:35 +00:00
parent 492b8a0821
commit e258d9a10b
2 changed files with 21 additions and 3 deletions

View File

@ -1,4 +1,5 @@
import re import re
import urllib2
import urlparse import urlparse
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
@ -38,6 +39,10 @@ class RegexValidator(object):
if not self.regex.search(smart_unicode(value)): if not self.regex.search(smart_unicode(value)):
raise ValidationError(self.message, code=self.code) raise ValidationError(self.message, code=self.code)
class HeadRequest(urllib2.Request):
def get_method(self):
return "HEAD"
class URLValidator(RegexValidator): class URLValidator(RegexValidator):
regex = re.compile( regex = re.compile(
r'^https?://' # http:// or https:// r'^https?://' # http:// or https://
@ -72,7 +77,6 @@ class URLValidator(RegexValidator):
url = value url = value
if self.verify_exists: if self.verify_exists:
import urllib2
headers = { headers = {
"Accept": "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5", "Accept": "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5",
"Accept-Language": "en-us,en;q=0.5", "Accept-Language": "en-us,en;q=0.5",
@ -80,13 +84,26 @@ class URLValidator(RegexValidator):
"Connection": "close", "Connection": "close",
"User-Agent": self.user_agent, "User-Agent": self.user_agent,
} }
broken_error = ValidationError(
_(u'This URL appears to be a broken link.'), code='invalid_link')
try: try:
req = urllib2.Request(url, None, headers) req = HeadRequest(url, None, headers)
u = urllib2.urlopen(req) u = urllib2.urlopen(req)
except ValueError: except ValueError:
raise ValidationError(_(u'Enter a valid URL.'), code='invalid') raise ValidationError(_(u'Enter a valid URL.'), code='invalid')
except urllib2.HTTPError, e:
if e.code in (405, 501):
# Try a GET request (HEAD refused)
# See also: http://www.w3.org/Protocols/rfc2616/rfc2616.html
try:
req = urllib2.Request(url, None, headers)
u = urllib2.urlopen(req)
except:
raise broken_error
else:
raise broken_error
except: # urllib2.URLError, httplib.InvalidURL, etc. except: # urllib2.URLError, httplib.InvalidURL, etc.
raise ValidationError(_(u'This URL appears to be a broken link.'), code='invalid_link') raise broken_error
def validate_integer(value): def validate_integer(value):

View File

@ -561,6 +561,7 @@ class FieldsTests(TestCase):
self.assertEqual(u'http://www.google.com/', f.clean('http://www.google.com')) # This will fail if there's no Internet connection self.assertEqual(u'http://www.google.com/', f.clean('http://www.google.com')) # This will fail if there's no Internet connection
self.assertRaisesErrorWithMessage(ValidationError, "[u'Enter a valid URL.']", f.clean, 'http://example') self.assertRaisesErrorWithMessage(ValidationError, "[u'Enter a valid URL.']", f.clean, 'http://example')
self.assertRaises(ValidationError, f.clean, 'http://www.broken.djangoproject.com') # bad domain self.assertRaises(ValidationError, f.clean, 'http://www.broken.djangoproject.com') # bad domain
self.assertRaises(ValidationError, f.clean, 'http://qa-dev.w3.org/link-testsuite/http.php?code=405') # Method not allowed
try: try:
f.clean('http://www.broken.djangoproject.com') # bad domain f.clean('http://www.broken.djangoproject.com') # bad domain
except ValidationError, e: except ValidationError, e: