Fixed #20003 -- Improved and extended URLValidator
This adds support for authentication data (`user:password`) in URLs, IPv6 addresses, and unicode domains. The test suite has been improved by adding test URLs from http://mathiasbynens.be/demo/url-regex (with a few adjustments, like allowing local and reserved IPs). The previous URL validation regex failed this test suite on 13 occasions, the validator was updated based on https://gist.github.com/dperini/729294.
This commit is contained in:
parent
6288fccfda
commit
2e65d56156
|
@ -66,14 +66,25 @@ class RegexValidator(object):
|
|||
|
||||
@deconstructible
|
||||
class URLValidator(RegexValidator):
|
||||
ul = '\u00a1-\uffff' # unicode letters range (must be a unicode string, not a raw string)
|
||||
|
||||
# IP patterns
|
||||
ipv4_re = r'(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}'
|
||||
ipv6_re = r'\[[0-9a-f:\.]+\]' # (simple regex, validated later)
|
||||
|
||||
# Host patterns
|
||||
hostname_re = r'[a-z' + ul + r'0-9](?:[a-z' + ul + r'0-9-]*[a-z' + ul + r'0-9])?'
|
||||
domain_re = r'(?:\.[a-z' + ul + r'0-9]+(?:[a-z' + ul + r'0-9-]*[a-z' + ul + r'0-9]+)*)*'
|
||||
tld_re = r'\.[a-z' + ul + r']{2,}\.?'
|
||||
host_re = '(' + hostname_re + domain_re + tld_re + '|localhost)'
|
||||
|
||||
regex = re.compile(
|
||||
r'^(?:[a-z0-9\.\-]*)://' # scheme is validated separately
|
||||
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}(?<!-)\.?)|' # domain...
|
||||
r'localhost|' # localhost...
|
||||
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|' # ...or ipv4
|
||||
r'\[?[A-F0-9]*:[A-F0-9:]+\]?)' # ...or ipv6
|
||||
r'(?::\d+)?' # optional port
|
||||
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
|
||||
r'(?:\S+(?::\S*)?@)?' # user:pass authentication
|
||||
r'(?:' + ipv4_re + '|' + ipv6_re + '|' + host_re + ')'
|
||||
r'(?::\d{2,5})?' # port
|
||||
r'(?:[/?#][^\s]*)?' # resource path
|
||||
r'$', re.IGNORECASE)
|
||||
message = _('Enter a valid URL.')
|
||||
schemes = ['http', 'https', 'ftp', 'ftps']
|
||||
|
||||
|
@ -105,6 +116,14 @@ class URLValidator(RegexValidator):
|
|||
else:
|
||||
raise
|
||||
else:
|
||||
# Now verify IPv6 in the netloc part
|
||||
host_match = re.search(r'^\[(.+)\](?::\d{2,5})?$', urlsplit(value).netloc)
|
||||
if host_match:
|
||||
potential_ip = host_match.groups()[0]
|
||||
try:
|
||||
validate_ipv6_address(potential_ip)
|
||||
except ValidationError:
|
||||
raise ValidationError(self.message, code=self.code)
|
||||
url = value
|
||||
|
||||
|
||||
|
|
|
@ -149,9 +149,13 @@ to, or in lieu of custom ``field.clean()`` methods.
|
|||
.. class:: URLValidator([schemes=None, regex=None, message=None, code=None])
|
||||
|
||||
A :class:`RegexValidator` that ensures a value looks like a URL, and raises
|
||||
an error code of ``'invalid'`` if it doesn't. In addition to the optional
|
||||
arguments of its parent :class:`RegexValidator` class, ``URLValidator``
|
||||
accepts an extra optional attribute:
|
||||
an error code of ``'invalid'`` if it doesn't.
|
||||
|
||||
Loopback addresses and reserved IP spaces are considered valid. Literal
|
||||
IPv6 addresses (:rfc:`2732`) and unicode domains are both supported.
|
||||
|
||||
In addition to the optional arguments of its parent :class:`RegexValidator`
|
||||
class, ``URLValidator`` accepts an extra optional attribute:
|
||||
|
||||
.. attribute:: schemes
|
||||
|
||||
|
@ -165,6 +169,11 @@ to, or in lieu of custom ``field.clean()`` methods.
|
|||
|
||||
The optional ``schemes`` attribute was added.
|
||||
|
||||
.. versionchanged:: 1.8
|
||||
|
||||
Support for IPv6 addresses, unicode domains, and URLs containing
|
||||
authentication data was added.
|
||||
|
||||
``validate_email``
|
||||
------------------
|
||||
.. data:: validate_email
|
||||
|
|
|
@ -606,7 +606,8 @@ Tests
|
|||
Validators
|
||||
^^^^^^^^^^
|
||||
|
||||
* ...
|
||||
* :class:`~django.core.validators.URLValidator` now supports IPv6 addresses,
|
||||
unicode domains, and URLs containing authentication data.
|
||||
|
||||
Backwards incompatible changes in 1.8
|
||||
=====================================
|
||||
|
|
|
@ -896,9 +896,7 @@ class FieldsTests(SimpleTestCase):
|
|||
"""Test URLField correctly validates IPv6 (#18779)."""
|
||||
f = URLField()
|
||||
urls = (
|
||||
'http://::/',
|
||||
'http://6:21b4:92/',
|
||||
'http://[12:34:3a53]/',
|
||||
'http://[12:34::3a53]/',
|
||||
'http://[a34:9238::]:8080/',
|
||||
)
|
||||
for url in urls:
|
||||
|
|
|
@ -10,3 +10,39 @@ http://inv-.alid-.com
|
|||
http://inv-.-alid.com
|
||||
file://localhost/path
|
||||
git://example.com/
|
||||
http://.
|
||||
http://..
|
||||
http://../
|
||||
http://?
|
||||
http://??
|
||||
http://??/
|
||||
http://#
|
||||
http://##
|
||||
http://##/
|
||||
http://foo.bar?q=Spaces should be encoded
|
||||
//
|
||||
//a
|
||||
///a
|
||||
///
|
||||
http:///a
|
||||
foo.com
|
||||
rdar://1234
|
||||
h://test
|
||||
http:// shouldfail.com
|
||||
:// should fail
|
||||
http://foo.bar/foo(bar)baz quux
|
||||
http://-error-.invalid/
|
||||
http://-a.b.co
|
||||
http://a.b-.co
|
||||
http:/
|
||||
http://
|
||||
http://
|
||||
http://1.1.1.1.1
|
||||
http://123.123.123
|
||||
http://3628126748
|
||||
http://123
|
||||
http://.www.foo.bar/
|
||||
http://.www.foo.bar./
|
||||
http://[::1:2::3]:8080/
|
||||
http://[]
|
||||
http://[]:8080
|
||||
|
|
|
@ -2,6 +2,7 @@ http://www.djangoproject.com/
|
|||
HTTP://WWW.DJANGOPROJECT.COM/
|
||||
http://localhost/
|
||||
http://example.com/
|
||||
http://example.com./
|
||||
http://www.example.com/
|
||||
http://www.example.com:8000/test
|
||||
http://valid-with-hyphens.com/
|
||||
|
@ -14,3 +15,49 @@ http://example.com/index.php?something=value&another=value2
|
|||
https://example.com/
|
||||
ftp://example.com/
|
||||
ftps://example.com/
|
||||
http://foo.com/blah_blah
|
||||
http://foo.com/blah_blah/
|
||||
http://foo.com/blah_blah_(wikipedia)
|
||||
http://foo.com/blah_blah_(wikipedia)_(again)
|
||||
http://www.example.com/wpstyle/?p=364
|
||||
https://www.example.com/foo/?bar=baz&inga=42&quux
|
||||
http://✪df.ws/123
|
||||
http://userid:password@example.com:8080
|
||||
http://userid:password@example.com:8080/
|
||||
http://userid@example.com
|
||||
http://userid@example.com/
|
||||
http://userid@example.com:8080
|
||||
http://userid@example.com:8080/
|
||||
http://userid:password@example.com
|
||||
http://userid:password@example.com/
|
||||
http://142.42.1.1/
|
||||
http://142.42.1.1:8080/
|
||||
http://➡.ws/䨹
|
||||
http://⌘.ws
|
||||
http://⌘.ws/
|
||||
http://foo.com/blah_(wikipedia)#cite-1
|
||||
http://foo.com/blah_(wikipedia)_blah#cite-1
|
||||
http://foo.com/unicode_(✪)_in_parens
|
||||
http://foo.com/(something)?after=parens
|
||||
http://☺.damowmow.com/
|
||||
http://code.google.com/events/#&product=browser
|
||||
http://j.mp
|
||||
ftp://foo.bar/baz
|
||||
http://foo.bar/?q=Test%20URL-encoded%20stuff
|
||||
http://مثال.إختبار
|
||||
http://例子.测试
|
||||
http://उदाहरण.परीक्षा
|
||||
http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com
|
||||
http://1337.net
|
||||
http://a.b-c.de
|
||||
http://223.255.255.254
|
||||
ftps://foo.bar/
|
||||
http://10.1.1.254
|
||||
http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html
|
||||
http://[::192.9.5.5]/ipng
|
||||
http://[::ffff:192.9.5.5]/ipng
|
||||
http://[::1]:8080/
|
||||
http://0.0.0.0/
|
||||
http://255.255.255.255
|
||||
http://224.0.0.0
|
||||
http://224.1.1.1
|
||||
|
|
Loading…
Reference in New Issue