Fixed #2934: greatly improved the accuracy if the isExistingURL check. Also introduced a new setting, URL_VALIDATOR_USER_AGENT, which is the User-Agent that the validator will use to check for URL existance. Thanks, Jeremy.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@4035 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Jacob Kaplan-Moss 2006-11-07 02:20:08 +00:00
parent 212cb21baa
commit 4d59730fad
3 changed files with 30 additions and 7 deletions

View File

@ -228,6 +228,10 @@ MONTH_DAY_FORMAT = 'F j'
# Hint: you really don't!
TRANSACTIONS_MANAGED = False
# The User-Agent string to use when checking for URL validity through the
# isExistingURL validator.
URL_VALIDATOR_USER_AGENT = "Django/0.96pre (http://www.djangoproject.com)"
##############
# MIDDLEWARE #
##############

View File

@ -8,6 +8,7 @@ validator will *always* be run, regardless of whether its associated
form field is required.
"""
import urllib2
from django.conf import settings
from django.utils.translation import gettext, gettext_lazy, ngettext
from django.utils.functional import Promise, lazy
@ -223,17 +224,25 @@ def isWellFormedXmlFragment(field_data, all_data):
isWellFormedXml('<root>%s</root>' % field_data, all_data)
def isExistingURL(field_data, all_data):
import urllib2
try:
u = urllib2.urlopen(field_data)
headers = {
"Accept" : "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5",
"Accept-Language" : "en-us,en;q=0.5",
"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7",
"Connection" : "close",
"User-Agent": settings.URL_VALIDATOR_USER_AGENT
}
req = urllib2.Request(field_data,None, headers)
u = urllib2.urlopen(req)
except ValueError:
raise ValidationError, gettext("Invalid URL: %s") % field_data
raise ValidationError, _("Invalid URL: %s") % field_data
except urllib2.HTTPError, e:
# 401s are valid; they just mean authorization is required.
if e.code not in ('401',):
raise ValidationError, gettext("The URL %s is a broken link.") % field_data
# 301 and 302 are redirects; they just mean look somewhere else.
if str(e.code) not in ('401','301','302'):
raise ValidationError, _("The URL %s is a broken link.") % field_data
except: # urllib2.URLError, httplib.InvalidURL, etc.
raise ValidationError, gettext("The URL %s is a broken link.") % field_data
raise ValidationError, _("The URL %s is a broken link.") % field_data
def isValidUSState(field_data, all_data):
"Checks that the given string is a valid two-letter U.S. state abbreviation"

View File

@ -814,6 +814,16 @@ manual configuration option (see below), Django will *not* touch the ``TZ``
environment variable, and it'll be up to you to ensure your processes are
running in the correct environment.
URL_VALIDATOR_USER_AGENT
------------------------
Default: ``Django/<version> (http://www.djangoproject.com/)``
The string to use as the ``User-Agent`` header when checking to see if URLs
exist (see the ``verify_exists`` option on URLField_).
.. URLField: ../model_api/#urlfield
USE_ETAGS
---------