From 2e65d56156b622e2393dee1af66e9c799a51924f Mon Sep 17 00:00:00 2001 From: Danilo Bargen Date: Mon, 3 Nov 2014 18:01:31 +0100 Subject: [PATCH] Fixed #20003 -- Improved and extended URLValidator This adds support for authentication data (`user:password`) in URLs, IPv6 addresses, and unicode domains. The test suite has been improved by adding test URLs from http://mathiasbynens.be/demo/url-regex (with a few adjustments, like allowing local and reserved IPs). The previous URL validation regex failed this test suite on 13 occasions, the validator was updated based on https://gist.github.com/dperini/729294. --- django/core/validators.py | 31 +++++++++++++---- docs/ref/validators.txt | 15 ++++++-- docs/releases/1.8.txt | 3 +- tests/forms_tests/tests/test_fields.py | 4 +-- tests/validators/invalid_urls.txt | 36 ++++++++++++++++++++ tests/validators/valid_urls.txt | 47 ++++++++++++++++++++++++++ 6 files changed, 123 insertions(+), 13 deletions(-) diff --git a/django/core/validators.py b/django/core/validators.py index e0fd995227..8d87cef412 100644 --- a/django/core/validators.py +++ b/django/core/validators.py @@ -66,14 +66,25 @@ class RegexValidator(object): @deconstructible class URLValidator(RegexValidator): + ul = '\u00a1-\uffff' # unicode letters range (must be a unicode string, not a raw string) + + # IP patterns + ipv4_re = r'(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}' + ipv6_re = r'\[[0-9a-f:\.]+\]' # (simple regex, validated later) + + # Host patterns + hostname_re = r'[a-z' + ul + r'0-9](?:[a-z' + ul + r'0-9-]*[a-z' + ul + r'0-9])?' + domain_re = r'(?:\.[a-z' + ul + r'0-9]+(?:[a-z' + ul + r'0-9-]*[a-z' + ul + r'0-9]+)*)*' + tld_re = r'\.[a-z' + ul + r']{2,}\.?' + host_re = '(' + hostname_re + domain_re + tld_re + '|localhost)' + regex = re.compile( r'^(?:[a-z0-9\.\-]*)://' # scheme is validated separately - r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}(?