From 4d59730fad1fe882ad957b7ad463398329d2c8ea Mon Sep 17 00:00:00 2001 From: Jacob Kaplan-Moss Date: Tue, 7 Nov 2006 02:20:08 +0000 Subject: [PATCH] Fixed #2934: greatly improved the accuracy if the isExistingURL check. Also introduced a new setting, URL_VALIDATOR_USER_AGENT, which is the User-Agent that the validator will use to check for URL existance. Thanks, Jeremy. git-svn-id: http://code.djangoproject.com/svn/django/trunk@4035 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/conf/global_settings.py | 4 ++++ django/core/validators.py | 23 ++++++++++++++++------- docs/settings.txt | 10 ++++++++++ 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/django/conf/global_settings.py b/django/conf/global_settings.py index 69c775cdec..a50287f537 100644 --- a/django/conf/global_settings.py +++ b/django/conf/global_settings.py @@ -228,6 +228,10 @@ MONTH_DAY_FORMAT = 'F j' # Hint: you really don't! TRANSACTIONS_MANAGED = False +# The User-Agent string to use when checking for URL validity through the +# isExistingURL validator. +URL_VALIDATOR_USER_AGENT = "Django/0.96pre (http://www.djangoproject.com)" + ############## # MIDDLEWARE # ############## diff --git a/django/core/validators.py b/django/core/validators.py index 4c3f59143e..816b8af8a3 100644 --- a/django/core/validators.py +++ b/django/core/validators.py @@ -8,6 +8,7 @@ validator will *always* be run, regardless of whether its associated form field is required. """ +import urllib2 from django.conf import settings from django.utils.translation import gettext, gettext_lazy, ngettext from django.utils.functional import Promise, lazy @@ -223,18 +224,26 @@ def isWellFormedXmlFragment(field_data, all_data): isWellFormedXml('%s' % field_data, all_data) def isExistingURL(field_data, all_data): - import urllib2 try: - u = urllib2.urlopen(field_data) + headers = { + "Accept" : "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5", + "Accept-Language" : "en-us,en;q=0.5", + "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7", + "Connection" : "close", + "User-Agent": settings.URL_VALIDATOR_USER_AGENT + } + req = urllib2.Request(field_data,None, headers) + u = urllib2.urlopen(req) except ValueError: - raise ValidationError, gettext("Invalid URL: %s") % field_data + raise ValidationError, _("Invalid URL: %s") % field_data except urllib2.HTTPError, e: # 401s are valid; they just mean authorization is required. - if e.code not in ('401',): - raise ValidationError, gettext("The URL %s is a broken link.") % field_data + # 301 and 302 are redirects; they just mean look somewhere else. + if str(e.code) not in ('401','301','302'): + raise ValidationError, _("The URL %s is a broken link.") % field_data except: # urllib2.URLError, httplib.InvalidURL, etc. - raise ValidationError, gettext("The URL %s is a broken link.") % field_data - + raise ValidationError, _("The URL %s is a broken link.") % field_data + def isValidUSState(field_data, all_data): "Checks that the given string is a valid two-letter U.S. state abbreviation" states = ['AA', 'AE', 'AK', 'AL', 'AP', 'AR', 'AS', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'FM', 'GA', 'GU', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MH', 'MI', 'MN', 'MO', 'MP', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'PR', 'PW', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VI', 'VT', 'WA', 'WI', 'WV', 'WY'] diff --git a/docs/settings.txt b/docs/settings.txt index 272b20f753..976a174abb 100644 --- a/docs/settings.txt +++ b/docs/settings.txt @@ -814,6 +814,16 @@ manual configuration option (see below), Django will *not* touch the ``TZ`` environment variable, and it'll be up to you to ensure your processes are running in the correct environment. +URL_VALIDATOR_USER_AGENT +------------------------ + +Default: ``Django/ (http://www.djangoproject.com/)`` + +The string to use as the ``User-Agent`` header when checking to see if URLs +exist (see the ``verify_exists`` option on URLField_). + +.. URLField: ../model_api/#urlfield + USE_ETAGS ---------