Fixed #2934: greatly improved the accuracy if the isExistingURL check. Also introduced a new setting, URL_VALIDATOR_USER_AGENT, which is the User-Agent that the validator will use to check for URL existance. Thanks, Jeremy.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@4035 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Jacob Kaplan-Moss 2006-11-07 02:20:08 +00:00
parent 212cb21baa
commit 4d59730fad
3 changed files with 30 additions and 7 deletions

View File

@ -228,6 +228,10 @@ MONTH_DAY_FORMAT = 'F j'
# Hint: you really don't! # Hint: you really don't!
TRANSACTIONS_MANAGED = False TRANSACTIONS_MANAGED = False
# The User-Agent string to use when checking for URL validity through the
# isExistingURL validator.
URL_VALIDATOR_USER_AGENT = "Django/0.96pre (http://www.djangoproject.com)"
############## ##############
# MIDDLEWARE # # MIDDLEWARE #
############## ##############

View File

@ -8,6 +8,7 @@ validator will *always* be run, regardless of whether its associated
form field is required. form field is required.
""" """
import urllib2
from django.conf import settings from django.conf import settings
from django.utils.translation import gettext, gettext_lazy, ngettext from django.utils.translation import gettext, gettext_lazy, ngettext
from django.utils.functional import Promise, lazy from django.utils.functional import Promise, lazy
@ -223,17 +224,25 @@ def isWellFormedXmlFragment(field_data, all_data):
isWellFormedXml('<root>%s</root>' % field_data, all_data) isWellFormedXml('<root>%s</root>' % field_data, all_data)
def isExistingURL(field_data, all_data): def isExistingURL(field_data, all_data):
import urllib2
try: try:
u = urllib2.urlopen(field_data) headers = {
"Accept" : "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5",
"Accept-Language" : "en-us,en;q=0.5",
"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7",
"Connection" : "close",
"User-Agent": settings.URL_VALIDATOR_USER_AGENT
}
req = urllib2.Request(field_data,None, headers)
u = urllib2.urlopen(req)
except ValueError: except ValueError:
raise ValidationError, gettext("Invalid URL: %s") % field_data raise ValidationError, _("Invalid URL: %s") % field_data
except urllib2.HTTPError, e: except urllib2.HTTPError, e:
# 401s are valid; they just mean authorization is required. # 401s are valid; they just mean authorization is required.
if e.code not in ('401',): # 301 and 302 are redirects; they just mean look somewhere else.
raise ValidationError, gettext("The URL %s is a broken link.") % field_data if str(e.code) not in ('401','301','302'):
raise ValidationError, _("The URL %s is a broken link.") % field_data
except: # urllib2.URLError, httplib.InvalidURL, etc. except: # urllib2.URLError, httplib.InvalidURL, etc.
raise ValidationError, gettext("The URL %s is a broken link.") % field_data raise ValidationError, _("The URL %s is a broken link.") % field_data
def isValidUSState(field_data, all_data): def isValidUSState(field_data, all_data):
"Checks that the given string is a valid two-letter U.S. state abbreviation" "Checks that the given string is a valid two-letter U.S. state abbreviation"

View File

@ -814,6 +814,16 @@ manual configuration option (see below), Django will *not* touch the ``TZ``
environment variable, and it'll be up to you to ensure your processes are environment variable, and it'll be up to you to ensure your processes are
running in the correct environment. running in the correct environment.
URL_VALIDATOR_USER_AGENT
------------------------
Default: ``Django/<version> (http://www.djangoproject.com/)``
The string to use as the ``User-Agent`` header when checking to see if URLs
exist (see the ``verify_exists`` option on URLField_).
.. URLField: ../model_api/#urlfield
USE_ETAGS USE_ETAGS
--------- ---------