Fixed #2934: greatly improved the accuracy if the isExistingURL check. Also introduced a new setting, URL_VALIDATOR_USER_AGENT, which is the User-Agent that the validator will use to check for URL existance. Thanks, Jeremy.
git-svn-id: http://code.djangoproject.com/svn/django/trunk@4035 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
parent
212cb21baa
commit
4d59730fad
|
@ -228,6 +228,10 @@ MONTH_DAY_FORMAT = 'F j'
|
||||||
# Hint: you really don't!
|
# Hint: you really don't!
|
||||||
TRANSACTIONS_MANAGED = False
|
TRANSACTIONS_MANAGED = False
|
||||||
|
|
||||||
|
# The User-Agent string to use when checking for URL validity through the
|
||||||
|
# isExistingURL validator.
|
||||||
|
URL_VALIDATOR_USER_AGENT = "Django/0.96pre (http://www.djangoproject.com)"
|
||||||
|
|
||||||
##############
|
##############
|
||||||
# MIDDLEWARE #
|
# MIDDLEWARE #
|
||||||
##############
|
##############
|
||||||
|
|
|
@ -8,6 +8,7 @@ validator will *always* be run, regardless of whether its associated
|
||||||
form field is required.
|
form field is required.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import urllib2
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.utils.translation import gettext, gettext_lazy, ngettext
|
from django.utils.translation import gettext, gettext_lazy, ngettext
|
||||||
from django.utils.functional import Promise, lazy
|
from django.utils.functional import Promise, lazy
|
||||||
|
@ -223,17 +224,25 @@ def isWellFormedXmlFragment(field_data, all_data):
|
||||||
isWellFormedXml('<root>%s</root>' % field_data, all_data)
|
isWellFormedXml('<root>%s</root>' % field_data, all_data)
|
||||||
|
|
||||||
def isExistingURL(field_data, all_data):
|
def isExistingURL(field_data, all_data):
|
||||||
import urllib2
|
|
||||||
try:
|
try:
|
||||||
u = urllib2.urlopen(field_data)
|
headers = {
|
||||||
|
"Accept" : "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5",
|
||||||
|
"Accept-Language" : "en-us,en;q=0.5",
|
||||||
|
"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7",
|
||||||
|
"Connection" : "close",
|
||||||
|
"User-Agent": settings.URL_VALIDATOR_USER_AGENT
|
||||||
|
}
|
||||||
|
req = urllib2.Request(field_data,None, headers)
|
||||||
|
u = urllib2.urlopen(req)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise ValidationError, gettext("Invalid URL: %s") % field_data
|
raise ValidationError, _("Invalid URL: %s") % field_data
|
||||||
except urllib2.HTTPError, e:
|
except urllib2.HTTPError, e:
|
||||||
# 401s are valid; they just mean authorization is required.
|
# 401s are valid; they just mean authorization is required.
|
||||||
if e.code not in ('401',):
|
# 301 and 302 are redirects; they just mean look somewhere else.
|
||||||
raise ValidationError, gettext("The URL %s is a broken link.") % field_data
|
if str(e.code) not in ('401','301','302'):
|
||||||
|
raise ValidationError, _("The URL %s is a broken link.") % field_data
|
||||||
except: # urllib2.URLError, httplib.InvalidURL, etc.
|
except: # urllib2.URLError, httplib.InvalidURL, etc.
|
||||||
raise ValidationError, gettext("The URL %s is a broken link.") % field_data
|
raise ValidationError, _("The URL %s is a broken link.") % field_data
|
||||||
|
|
||||||
def isValidUSState(field_data, all_data):
|
def isValidUSState(field_data, all_data):
|
||||||
"Checks that the given string is a valid two-letter U.S. state abbreviation"
|
"Checks that the given string is a valid two-letter U.S. state abbreviation"
|
||||||
|
|
|
@ -814,6 +814,16 @@ manual configuration option (see below), Django will *not* touch the ``TZ``
|
||||||
environment variable, and it'll be up to you to ensure your processes are
|
environment variable, and it'll be up to you to ensure your processes are
|
||||||
running in the correct environment.
|
running in the correct environment.
|
||||||
|
|
||||||
|
URL_VALIDATOR_USER_AGENT
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
Default: ``Django/<version> (http://www.djangoproject.com/)``
|
||||||
|
|
||||||
|
The string to use as the ``User-Agent`` header when checking to see if URLs
|
||||||
|
exist (see the ``verify_exists`` option on URLField_).
|
||||||
|
|
||||||
|
.. URLField: ../model_api/#urlfield
|
||||||
|
|
||||||
USE_ETAGS
|
USE_ETAGS
|
||||||
---------
|
---------
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue