2012-08-15 03:17:25 +08:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2013-06-22 04:59:33 +08:00
|
|
|
import base64
|
2011-03-01 22:28:06 +08:00
|
|
|
import calendar
|
|
|
|
import datetime
|
2009-03-22 15:58:29 +08:00
|
|
|
import re
|
2011-01-24 16:02:40 +08:00
|
|
|
import sys
|
2015-03-10 08:05:13 +08:00
|
|
|
import unicodedata
|
2013-06-22 04:59:33 +08:00
|
|
|
from binascii import Error as BinasciiError
|
2011-09-10 00:18:38 +08:00
|
|
|
from email.utils import formatdate
|
2007-10-31 11:59:40 +08:00
|
|
|
|
2015-01-28 20:35:27 +08:00
|
|
|
from django.utils import six
|
2011-04-22 20:01:41 +08:00
|
|
|
from django.utils.datastructures import MultiValueDict
|
2014-08-21 19:53:22 +08:00
|
|
|
from django.utils.encoding import force_bytes, force_str, force_text
|
2015-11-07 21:30:20 +08:00
|
|
|
from django.utils.functional import keep_lazy_text
|
2013-09-06 03:38:59 +08:00
|
|
|
from django.utils.six.moves.urllib.parse import (
|
2015-01-28 20:35:27 +08:00
|
|
|
quote, quote_plus, unquote, unquote_plus, urlencode as original_urlencode,
|
|
|
|
urlparse,
|
|
|
|
)
|
Merged Unicode branch into trunk (r4952:5608). This should be fully
backwards compatible for all practical purposes.
Fixed #2391, #2489, #2996, #3322, #3344, #3370, #3406, #3432, #3454, #3492, #3582, #3690, #3878, #3891, #3937, #4039, #4141, #4227, #4286, #4291, #4300, #4452, #4702
git-svn-id: http://code.djangoproject.com/svn/django/trunk@5609 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2007-07-04 20:11:04 +08:00
|
|
|
|
2009-03-22 15:58:29 +08:00
|
|
|
ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"')
|
|
|
|
|
2011-03-01 22:28:06 +08:00
|
|
|
MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split()
|
|
|
|
__D = r'(?P<day>\d{2})'
|
|
|
|
__D2 = r'(?P<day>[ \d]\d)'
|
|
|
|
__M = r'(?P<mon>\w{3})'
|
|
|
|
__Y = r'(?P<year>\d{4})'
|
|
|
|
__Y2 = r'(?P<year>\d{2})'
|
|
|
|
__T = r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})'
|
|
|
|
RFC1123_DATE = re.compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T))
|
|
|
|
RFC850_DATE = re.compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T))
|
|
|
|
ASCTIME_DATE = re.compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y))
|
|
|
|
|
2014-06-26 22:55:36 +08:00
|
|
|
RFC3986_GENDELIMS = str(":/?#[]@")
|
|
|
|
RFC3986_SUBDELIMS = str("!$&'()*+,;=")
|
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2015-11-07 21:30:20 +08:00
|
|
|
@keep_lazy_text
|
Merged Unicode branch into trunk (r4952:5608). This should be fully
backwards compatible for all practical purposes.
Fixed #2391, #2489, #2996, #3322, #3344, #3370, #3406, #3432, #3454, #3492, #3582, #3690, #3878, #3891, #3937, #4039, #4141, #4227, #4286, #4291, #4300, #4452, #4702
git-svn-id: http://code.djangoproject.com/svn/django/trunk@5609 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2007-07-04 20:11:04 +08:00
|
|
|
def urlquote(url, safe='/'):
|
|
|
|
"""
|
|
|
|
A version of Python's urllib.quote() function that can operate on unicode
|
|
|
|
strings. The url is first UTF-8 encoded before quoting. The returned string
|
|
|
|
can safely be used as part of an argument to a subsequent iri_to_uri() call
|
|
|
|
without double-quoting occurring.
|
|
|
|
"""
|
2013-09-06 03:38:59 +08:00
|
|
|
return force_text(quote(force_str(url), force_str(safe)))
|
Merged Unicode branch into trunk (r4952:5608). This should be fully
backwards compatible for all practical purposes.
Fixed #2391, #2489, #2996, #3322, #3344, #3370, #3406, #3432, #3454, #3492, #3582, #3690, #3878, #3891, #3937, #4039, #4141, #4227, #4286, #4291, #4300, #4452, #4702
git-svn-id: http://code.djangoproject.com/svn/django/trunk@5609 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2007-07-04 20:11:04 +08:00
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2015-11-07 21:30:20 +08:00
|
|
|
@keep_lazy_text
|
Merged Unicode branch into trunk (r4952:5608). This should be fully
backwards compatible for all practical purposes.
Fixed #2391, #2489, #2996, #3322, #3344, #3370, #3406, #3432, #3454, #3492, #3582, #3690, #3878, #3891, #3937, #4039, #4141, #4227, #4286, #4291, #4300, #4452, #4702
git-svn-id: http://code.djangoproject.com/svn/django/trunk@5609 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2007-07-04 20:11:04 +08:00
|
|
|
def urlquote_plus(url, safe=''):
|
|
|
|
"""
|
|
|
|
A version of Python's urllib.quote_plus() function that can operate on
|
|
|
|
unicode strings. The url is first UTF-8 encoded before quoting. The
|
|
|
|
returned string can safely be used as part of an argument to a subsequent
|
|
|
|
iri_to_uri() call without double-quoting occurring.
|
|
|
|
"""
|
2013-09-06 03:38:59 +08:00
|
|
|
return force_text(quote_plus(force_str(url), force_str(safe)))
|
Merged Unicode branch into trunk (r4952:5608). This should be fully
backwards compatible for all practical purposes.
Fixed #2391, #2489, #2996, #3322, #3344, #3370, #3406, #3432, #3454, #3492, #3582, #3690, #3878, #3891, #3937, #4039, #4141, #4227, #4286, #4291, #4300, #4452, #4702
git-svn-id: http://code.djangoproject.com/svn/django/trunk@5609 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2007-07-04 20:11:04 +08:00
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2015-11-07 21:30:20 +08:00
|
|
|
@keep_lazy_text
|
2012-01-29 17:00:12 +08:00
|
|
|
def urlunquote(quoted_url):
|
|
|
|
"""
|
|
|
|
A wrapper for Python's urllib.unquote() function that can operate on
|
|
|
|
the result of django.utils.http.urlquote().
|
|
|
|
"""
|
2013-09-06 03:38:59 +08:00
|
|
|
return force_text(unquote(force_str(quoted_url)))
|
2012-01-29 17:00:12 +08:00
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2015-11-07 21:30:20 +08:00
|
|
|
@keep_lazy_text
|
2012-01-29 17:00:12 +08:00
|
|
|
def urlunquote_plus(quoted_url):
|
|
|
|
"""
|
|
|
|
A wrapper for Python's urllib.unquote_plus() function that can operate on
|
|
|
|
the result of django.utils.http.urlquote_plus().
|
|
|
|
"""
|
2013-09-06 03:38:59 +08:00
|
|
|
return force_text(unquote_plus(force_str(quoted_url)))
|
2012-01-29 17:00:12 +08:00
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
Merged Unicode branch into trunk (r4952:5608). This should be fully
backwards compatible for all practical purposes.
Fixed #2391, #2489, #2996, #3322, #3344, #3370, #3406, #3432, #3454, #3492, #3582, #3690, #3878, #3891, #3937, #4039, #4141, #4227, #4286, #4291, #4300, #4452, #4702
git-svn-id: http://code.djangoproject.com/svn/django/trunk@5609 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2007-07-04 20:11:04 +08:00
|
|
|
def urlencode(query, doseq=0):
|
|
|
|
"""
|
|
|
|
A version of Python's urllib.urlencode() function that can operate on
|
2013-05-19 17:24:17 +08:00
|
|
|
unicode strings. The parameters are first cast to UTF-8 encoded strings and
|
Merged Unicode branch into trunk (r4952:5608). This should be fully
backwards compatible for all practical purposes.
Fixed #2391, #2489, #2996, #3322, #3344, #3370, #3406, #3432, #3454, #3492, #3582, #3690, #3878, #3891, #3937, #4039, #4141, #4227, #4286, #4291, #4300, #4452, #4702
git-svn-id: http://code.djangoproject.com/svn/django/trunk@5609 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2007-07-04 20:11:04 +08:00
|
|
|
then encoded as per normal.
|
|
|
|
"""
|
2011-04-22 20:01:41 +08:00
|
|
|
if isinstance(query, MultiValueDict):
|
|
|
|
query = query.lists()
|
|
|
|
elif hasattr(query, 'items'):
|
Merged Unicode branch into trunk (r4952:5608). This should be fully
backwards compatible for all practical purposes.
Fixed #2391, #2489, #2996, #3322, #3344, #3370, #3406, #3432, #3454, #3492, #3582, #3690, #3878, #3891, #3937, #4039, #4141, #4227, #4286, #4291, #4300, #4452, #4702
git-svn-id: http://code.djangoproject.com/svn/django/trunk@5609 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2007-07-04 20:11:04 +08:00
|
|
|
query = query.items()
|
2013-09-06 03:38:59 +08:00
|
|
|
return original_urlencode(
|
2012-08-30 04:40:51 +08:00
|
|
|
[(force_str(k),
|
2013-10-27 01:50:40 +08:00
|
|
|
[force_str(i) for i in v] if isinstance(v, (list, tuple)) else force_str(v))
|
2007-07-21 13:17:20 +08:00
|
|
|
for k, v in query],
|
|
|
|
doseq)
|
Merged Unicode branch into trunk (r4952:5608). This should be fully
backwards compatible for all practical purposes.
Fixed #2391, #2489, #2996, #3322, #3344, #3370, #3406, #3432, #3454, #3492, #3582, #3690, #3878, #3891, #3937, #4039, #4141, #4227, #4286, #4291, #4300, #4452, #4702
git-svn-id: http://code.djangoproject.com/svn/django/trunk@5609 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2007-07-04 20:11:04 +08:00
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2007-10-31 11:59:40 +08:00
|
|
|
def cookie_date(epoch_seconds=None):
|
|
|
|
"""
|
|
|
|
Formats the time to ensure compatibility with Netscape's cookie standard.
|
|
|
|
|
|
|
|
Accepts a floating point number expressed in seconds since the epoch, in
|
|
|
|
UTC - such as that outputted by time.time(). If set to None, defaults to
|
|
|
|
the current time.
|
|
|
|
|
|
|
|
Outputs a string in the format 'Wdy, DD-Mon-YYYY HH:MM:SS GMT'.
|
|
|
|
"""
|
|
|
|
rfcdate = formatdate(epoch_seconds)
|
|
|
|
return '%s-%s-%s GMT' % (rfcdate[:7], rfcdate[8:11], rfcdate[12:25])
|
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2007-10-31 11:59:40 +08:00
|
|
|
def http_date(epoch_seconds=None):
|
|
|
|
"""
|
|
|
|
Formats the time to match the RFC1123 date format as specified by HTTP
|
|
|
|
RFC2616 section 3.3.1.
|
|
|
|
|
|
|
|
Accepts a floating point number expressed in seconds since the epoch, in
|
|
|
|
UTC - such as that outputted by time.time(). If set to None, defaults to
|
|
|
|
the current time.
|
|
|
|
|
|
|
|
Outputs a string in the format 'Wdy, DD Mon YYYY HH:MM:SS GMT'.
|
|
|
|
"""
|
2013-08-07 19:00:39 +08:00
|
|
|
return formatdate(epoch_seconds, usegmt=True)
|
2008-08-01 04:47:53 +08:00
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2011-03-01 22:28:06 +08:00
|
|
|
def parse_http_date(date):
|
|
|
|
"""
|
|
|
|
Parses a date format as specified by HTTP RFC2616 section 3.3.1.
|
|
|
|
|
|
|
|
The three formats allowed by the RFC are accepted, even if only the first
|
|
|
|
one is still in widespread use.
|
|
|
|
|
2012-09-27 03:10:17 +08:00
|
|
|
Returns an integer expressed in seconds since the epoch, in UTC.
|
2011-03-01 22:28:06 +08:00
|
|
|
"""
|
|
|
|
# emails.Util.parsedate does the job for RFC1123 dates; unfortunately
|
|
|
|
# RFC2616 makes it mandatory to support RFC850 dates too. So we roll
|
|
|
|
# our own RFC-compliant parsing.
|
|
|
|
for regex in RFC1123_DATE, RFC850_DATE, ASCTIME_DATE:
|
|
|
|
m = regex.match(date)
|
|
|
|
if m is not None:
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
raise ValueError("%r is not in a valid HTTP date format" % date)
|
|
|
|
try:
|
|
|
|
year = int(m.group('year'))
|
|
|
|
if year < 100:
|
2011-03-04 02:42:59 +08:00
|
|
|
if year < 70:
|
|
|
|
year += 2000
|
|
|
|
else:
|
|
|
|
year += 1900
|
2011-03-01 22:28:06 +08:00
|
|
|
month = MONTHS.index(m.group('mon').lower()) + 1
|
|
|
|
day = int(m.group('day'))
|
|
|
|
hour = int(m.group('hour'))
|
|
|
|
min = int(m.group('min'))
|
|
|
|
sec = int(m.group('sec'))
|
|
|
|
result = datetime.datetime(year, month, day, hour, min, sec)
|
|
|
|
return calendar.timegm(result.utctimetuple())
|
|
|
|
except Exception:
|
2013-03-19 13:04:59 +08:00
|
|
|
six.reraise(ValueError, ValueError("%r is not a valid date" % date), sys.exc_info()[2])
|
2011-03-01 22:28:06 +08:00
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2011-03-01 22:28:06 +08:00
|
|
|
def parse_http_date_safe(date):
|
|
|
|
"""
|
|
|
|
Same as parse_http_date, but returns None if the input is invalid.
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
return parse_http_date(date)
|
|
|
|
except Exception:
|
|
|
|
pass
|
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2008-08-01 04:47:53 +08:00
|
|
|
# Base 36 functions: useful for generating compact URLs
|
|
|
|
|
|
|
|
def base36_to_int(s):
|
|
|
|
"""
|
2011-01-24 16:02:40 +08:00
|
|
|
Converts a base 36 string to an ``int``. Raises ``ValueError` if the
|
|
|
|
input won't fit into an int.
|
2008-08-01 04:47:53 +08:00
|
|
|
"""
|
2011-01-24 16:02:40 +08:00
|
|
|
# To prevent overconsumption of server resources, reject any
|
|
|
|
# base36 string that is long than 13 base36 digits (13 digits
|
|
|
|
# is sufficient to base36-encode any 64-bit integer)
|
2010-12-23 11:45:08 +08:00
|
|
|
if len(s) > 13:
|
|
|
|
raise ValueError("Base36 input too large")
|
2011-01-24 16:02:40 +08:00
|
|
|
value = int(s, 36)
|
2012-08-04 00:46:30 +08:00
|
|
|
# ... then do a final check that the value will fit into an int to avoid
|
|
|
|
# returning a long (#15067). The long type was removed in Python 3.
|
2013-09-02 18:06:32 +08:00
|
|
|
if six.PY2 and value > sys.maxint:
|
2011-01-24 16:02:40 +08:00
|
|
|
raise ValueError("Base36 input too large")
|
|
|
|
return value
|
2008-08-01 04:47:53 +08:00
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2008-08-01 04:47:53 +08:00
|
|
|
def int_to_base36(i):
|
|
|
|
"""
|
|
|
|
Converts an integer to a base36 string
|
|
|
|
"""
|
2014-09-09 07:31:21 +08:00
|
|
|
char_set = '0123456789abcdefghijklmnopqrstuvwxyz'
|
2012-08-04 00:46:30 +08:00
|
|
|
if i < 0:
|
|
|
|
raise ValueError("Negative base36 conversion input.")
|
2013-09-02 18:06:32 +08:00
|
|
|
if six.PY2:
|
2012-08-04 00:46:30 +08:00
|
|
|
if not isinstance(i, six.integer_types):
|
|
|
|
raise TypeError("Non-integer base36 conversion input.")
|
|
|
|
if i > sys.maxint:
|
|
|
|
raise ValueError("Base36 conversion input too large.")
|
2014-09-09 07:31:21 +08:00
|
|
|
if i < 36:
|
|
|
|
return char_set[i]
|
|
|
|
b36 = ''
|
|
|
|
while i != 0:
|
|
|
|
i, n = divmod(i, 36)
|
|
|
|
b36 = char_set[n] + b36
|
|
|
|
return b36
|
2009-03-22 15:58:29 +08:00
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2013-06-22 04:59:33 +08:00
|
|
|
def urlsafe_base64_encode(s):
|
|
|
|
"""
|
|
|
|
Encodes a bytestring in base64 for use in URLs, stripping any trailing
|
|
|
|
equal signs.
|
|
|
|
"""
|
|
|
|
return base64.urlsafe_b64encode(s).rstrip(b'\n=')
|
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2013-06-22 04:59:33 +08:00
|
|
|
def urlsafe_base64_decode(s):
|
|
|
|
"""
|
|
|
|
Decodes a base64 encoded string, adding back any trailing equal signs that
|
|
|
|
might have been stripped.
|
|
|
|
"""
|
2014-08-21 19:53:22 +08:00
|
|
|
s = force_bytes(s)
|
2013-06-22 04:59:33 +08:00
|
|
|
try:
|
|
|
|
return base64.urlsafe_b64decode(s.ljust(len(s) + len(s) % 4, b'='))
|
|
|
|
except (LookupError, BinasciiError) as e:
|
|
|
|
raise ValueError(e)
|
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2009-03-22 15:58:29 +08:00
|
|
|
def parse_etags(etag_str):
|
|
|
|
"""
|
|
|
|
Parses a string with one or several etags passed in If-None-Match and
|
|
|
|
If-Match headers by the rules in RFC 2616. Returns a list of etags
|
|
|
|
without surrounding double quotes (") and unescaped from \<CHAR>.
|
|
|
|
"""
|
|
|
|
etags = ETAG_MATCH.findall(etag_str)
|
|
|
|
if not etags:
|
|
|
|
# etag_str has wrong format, treat it as an opaque string then
|
|
|
|
return [etag_str]
|
2012-08-15 03:17:25 +08:00
|
|
|
etags = [e.encode('ascii').decode('unicode_escape') for e in etags]
|
2009-03-22 15:58:29 +08:00
|
|
|
return etags
|
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2009-03-22 15:58:29 +08:00
|
|
|
def quote_etag(etag):
|
|
|
|
"""
|
2013-03-29 01:16:53 +08:00
|
|
|
Wraps a string in double quotes escaping contents as necessary.
|
2009-03-22 15:58:29 +08:00
|
|
|
"""
|
|
|
|
return '"%s"' % etag.replace('\\', '\\\\').replace('"', '\\"')
|
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2016-01-05 15:09:10 +08:00
|
|
|
def unquote_etag(etag):
|
|
|
|
"""
|
|
|
|
Unquote an ETag string; i.e. revert quote_etag().
|
|
|
|
"""
|
|
|
|
return etag.strip('"').replace('\\"', '"').replace('\\\\', '\\') if etag else etag
|
|
|
|
|
|
|
|
|
2015-03-17 17:52:55 +08:00
|
|
|
def is_same_domain(host, pattern):
|
2012-03-30 17:20:04 +08:00
|
|
|
"""
|
2015-03-17 17:52:55 +08:00
|
|
|
Return ``True`` if the host is either an exact match or a match
|
|
|
|
to the wildcard pattern.
|
|
|
|
|
|
|
|
Any pattern beginning with a period matches a domain and all of its
|
|
|
|
subdomains. (e.g. ``.example.com`` matches ``example.com`` and
|
|
|
|
``foo.example.com``). Anything else is an exact string match.
|
2012-03-30 17:20:04 +08:00
|
|
|
"""
|
2015-03-17 17:52:55 +08:00
|
|
|
if not pattern:
|
2013-05-18 18:32:47 +08:00
|
|
|
return False
|
2012-11-18 05:00:53 +08:00
|
|
|
|
2015-03-17 17:52:55 +08:00
|
|
|
pattern = pattern.lower()
|
|
|
|
return (
|
|
|
|
pattern[0] == '.' and (host.endswith(pattern) or host == pattern[1:]) or
|
|
|
|
pattern == host
|
|
|
|
)
|
|
|
|
|
2013-11-03 07:53:29 +08:00
|
|
|
|
2012-11-18 05:00:53 +08:00
|
|
|
def is_safe_url(url, host=None):
|
|
|
|
"""
|
|
|
|
Return ``True`` if the url is a safe redirection (i.e. it doesn't point to
|
2013-08-14 00:06:22 +08:00
|
|
|
a different host and uses a safe scheme).
|
2012-11-18 05:00:53 +08:00
|
|
|
|
|
|
|
Always returns ``False`` on an empty url.
|
|
|
|
"""
|
2015-03-10 08:05:13 +08:00
|
|
|
if url is not None:
|
|
|
|
url = url.strip()
|
2012-11-18 05:00:53 +08:00
|
|
|
if not url:
|
|
|
|
return False
|
2016-03-04 22:41:52 +08:00
|
|
|
if six.PY2:
|
2016-03-05 06:33:35 +08:00
|
|
|
try:
|
|
|
|
url = force_text(url)
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
return False
|
2016-02-23 05:47:01 +08:00
|
|
|
# Chrome treats \ completely as / in paths but it could be part of some
|
|
|
|
# basic auth credentials so we need to check both URLs.
|
|
|
|
return _is_safe_url(url, host) and _is_safe_url(url.replace('\\', '/'), host)
|
|
|
|
|
|
|
|
|
|
|
|
def _is_safe_url(url, host):
|
2014-05-12 19:38:39 +08:00
|
|
|
# Chrome considers any URL with more than two slashes to be absolute, but
|
2014-05-15 03:00:57 +08:00
|
|
|
# urlparse is not so flexible. Treat any url with three slashes as unsafe.
|
2014-05-12 19:38:39 +08:00
|
|
|
if url.startswith('///'):
|
|
|
|
return False
|
2013-09-06 03:38:59 +08:00
|
|
|
url_info = urlparse(url)
|
2014-05-12 19:38:39 +08:00
|
|
|
# Forbid URLs like http:///example.com - with a scheme, but without a hostname.
|
|
|
|
# In that URL, example.com is not the hostname but, a path component. However,
|
|
|
|
# Chrome will still consider example.com to be the hostname, so we must not
|
|
|
|
# allow this syntax.
|
|
|
|
if not url_info.netloc and url_info.scheme:
|
|
|
|
return False
|
2015-03-10 08:05:13 +08:00
|
|
|
# Forbid URLs that start with control characters. Some browsers (like
|
|
|
|
# Chrome) ignore quite a few control characters at the start of a
|
|
|
|
# URL and might consider the URL as scheme relative.
|
|
|
|
if unicodedata.category(url[0])[0] == 'C':
|
|
|
|
return False
|
2013-09-22 20:01:57 +08:00
|
|
|
return ((not url_info.netloc or url_info.netloc == host) and
|
|
|
|
(not url_info.scheme or url_info.scheme in ['http', 'https']))
|