mirror of https://github.com/django/django.git
Fixed #19508 -- Implemented uri_to_iri as per RFC.
Thanks Loic Bistuer for helping in shaping the patch and Claude Paroz for the review.
This commit is contained in:
parent
3af5af1a61
commit
10b17a22be
|
@ -206,7 +206,6 @@ def get_path_info(environ):
|
||||||
"""
|
"""
|
||||||
path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '/')
|
path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '/')
|
||||||
|
|
||||||
# It'd be better to implement URI-to-IRI decoding, see #19508.
|
|
||||||
return path_info.decode(UTF_8)
|
return path_info.decode(UTF_8)
|
||||||
|
|
||||||
|
|
||||||
|
@ -236,7 +235,6 @@ def get_script_name(environ):
|
||||||
else:
|
else:
|
||||||
script_name = get_bytes_from_wsgi(environ, 'SCRIPT_NAME', '')
|
script_name = get_bytes_from_wsgi(environ, 'SCRIPT_NAME', '')
|
||||||
|
|
||||||
# It'd be better to implement URI-to-IRI decoding, see #19508.
|
|
||||||
return script_name.decode(UTF_8)
|
return script_name.decode(UTF_8)
|
||||||
|
|
||||||
|
|
||||||
|
@ -251,16 +249,15 @@ def get_bytes_from_wsgi(environ, key, default):
|
||||||
# Under Python 3, non-ASCII values in the WSGI environ are arbitrarily
|
# Under Python 3, non-ASCII values in the WSGI environ are arbitrarily
|
||||||
# decoded with ISO-8859-1. This is wrong for Django websites where UTF-8
|
# decoded with ISO-8859-1. This is wrong for Django websites where UTF-8
|
||||||
# is the default. Re-encode to recover the original bytestring.
|
# is the default. Re-encode to recover the original bytestring.
|
||||||
return value if six.PY2 else value.encode(ISO_8859_1)
|
return value.encode(ISO_8859_1) if six.PY3 else value
|
||||||
|
|
||||||
|
|
||||||
def get_str_from_wsgi(environ, key, default):
|
def get_str_from_wsgi(environ, key, default):
|
||||||
"""
|
"""
|
||||||
Get a value from the WSGI environ dictionary as bytes.
|
Get a value from the WSGI environ dictionary as str.
|
||||||
|
|
||||||
key and default should be str objects. Under Python 2 they may also be
|
key and default should be str objects. Under Python 2 they may also be
|
||||||
unicode objects provided they only contain ASCII characters.
|
unicode objects provided they only contain ASCII characters.
|
||||||
"""
|
"""
|
||||||
value = environ.get(str(key), str(default))
|
value = get_bytes_from_wsgi(environ, key, default)
|
||||||
# Same comment as above
|
return value.decode(UTF_8, errors='replace') if six.PY3 else value
|
||||||
return value if six.PY2 else value.encode(ISO_8859_1).decode(UTF_8, errors='replace')
|
|
||||||
|
|
|
@ -15,9 +15,11 @@ from wsgiref import simple_server
|
||||||
from wsgiref.util import FileWrapper # NOQA: for backwards compatibility
|
from wsgiref.util import FileWrapper # NOQA: for backwards compatibility
|
||||||
|
|
||||||
from django.core.exceptions import ImproperlyConfigured
|
from django.core.exceptions import ImproperlyConfigured
|
||||||
|
from django.core.handlers.wsgi import ISO_8859_1, UTF_8
|
||||||
from django.core.management.color import color_style
|
from django.core.management.color import color_style
|
||||||
from django.core.wsgi import get_wsgi_application
|
from django.core.wsgi import get_wsgi_application
|
||||||
from django.utils import six
|
from django.utils import six
|
||||||
|
from django.utils.encoding import uri_to_iri
|
||||||
from django.utils.module_loading import import_string
|
from django.utils.module_loading import import_string
|
||||||
from django.utils.six.moves import socketserver
|
from django.utils.six.moves import socketserver
|
||||||
|
|
||||||
|
@ -117,6 +119,21 @@ class WSGIRequestHandler(simple_server.WSGIRequestHandler, object):
|
||||||
|
|
||||||
sys.stderr.write(msg)
|
sys.stderr.write(msg)
|
||||||
|
|
||||||
|
def get_environ(self):
|
||||||
|
env = super(WSGIRequestHandler, self).get_environ()
|
||||||
|
|
||||||
|
path = self.path
|
||||||
|
if '?' in path:
|
||||||
|
path = path.partition('?')[0]
|
||||||
|
|
||||||
|
path = uri_to_iri(path).encode(UTF_8)
|
||||||
|
# Under Python 3, non-ASCII values in the WSGI environ are arbitrarily
|
||||||
|
# decoded with ISO-8859-1. We replicate this behavior here.
|
||||||
|
# Refs comment in `get_bytes_from_wsgi()`.
|
||||||
|
env['PATH_INFO'] = path.decode(ISO_8859_1) if six.PY3 else path
|
||||||
|
|
||||||
|
return env
|
||||||
|
|
||||||
|
|
||||||
def run(addr, port, wsgi_handler, ipv6=False, threading=False):
|
def run(addr, port, wsgi_handler, ipv6=False, threading=False):
|
||||||
server_address = (addr, port)
|
server_address = (addr, port)
|
||||||
|
|
|
@ -12,7 +12,7 @@ from django.apps import apps
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core import urlresolvers
|
from django.core import urlresolvers
|
||||||
from django.core.handlers.base import BaseHandler
|
from django.core.handlers.base import BaseHandler
|
||||||
from django.core.handlers.wsgi import WSGIRequest
|
from django.core.handlers.wsgi import WSGIRequest, ISO_8859_1, UTF_8
|
||||||
from django.core.signals import (request_started, request_finished,
|
from django.core.signals import (request_started, request_finished,
|
||||||
got_request_exception)
|
got_request_exception)
|
||||||
from django.db import close_old_connections
|
from django.db import close_old_connections
|
||||||
|
@ -20,11 +20,11 @@ from django.http import SimpleCookie, HttpRequest, QueryDict
|
||||||
from django.template import TemplateDoesNotExist
|
from django.template import TemplateDoesNotExist
|
||||||
from django.test import signals
|
from django.test import signals
|
||||||
from django.utils.functional import curry, SimpleLazyObject
|
from django.utils.functional import curry, SimpleLazyObject
|
||||||
from django.utils.encoding import force_bytes, force_str
|
from django.utils.encoding import force_bytes, force_str, uri_to_iri
|
||||||
from django.utils.http import urlencode
|
from django.utils.http import urlencode
|
||||||
from django.utils.itercompat import is_iterable
|
from django.utils.itercompat import is_iterable
|
||||||
from django.utils import six
|
from django.utils import six
|
||||||
from django.utils.six.moves.urllib.parse import unquote, urlparse, urlsplit
|
from django.utils.six.moves.urllib.parse import urlparse, urlsplit
|
||||||
from django.test.utils import ContextList
|
from django.test.utils import ContextList
|
||||||
|
|
||||||
__all__ = ('Client', 'RequestFactory', 'encode_file', 'encode_multipart')
|
__all__ = ('Client', 'RequestFactory', 'encode_file', 'encode_multipart')
|
||||||
|
@ -270,11 +270,11 @@ class RequestFactory(object):
|
||||||
# If there are parameters, add them
|
# If there are parameters, add them
|
||||||
if parsed[3]:
|
if parsed[3]:
|
||||||
path += str(";") + force_str(parsed[3])
|
path += str(";") + force_str(parsed[3])
|
||||||
path = unquote(path)
|
path = uri_to_iri(path).encode(UTF_8)
|
||||||
# WSGI requires latin-1 encoded strings. See get_path_info().
|
# Under Python 3, non-ASCII values in the WSGI environ are arbitrarily
|
||||||
if six.PY3:
|
# decoded with ISO-8859-1. We replicate this behavior here.
|
||||||
path = path.encode('utf-8').decode('iso-8859-1')
|
# Refs comment in `get_bytes_from_wsgi()`.
|
||||||
return path
|
return path.decode(ISO_8859_1) if six.PY3 else path
|
||||||
|
|
||||||
def get(self, path, data=None, secure=False, **extra):
|
def get(self, path, data=None, secure=False, **extra):
|
||||||
"Construct a GET request."
|
"Construct a GET request."
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# -*- encoding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
|
@ -7,7 +8,9 @@ import locale
|
||||||
|
|
||||||
from django.utils.functional import Promise
|
from django.utils.functional import Promise
|
||||||
from django.utils import six
|
from django.utils import six
|
||||||
from django.utils.six.moves.urllib.parse import quote
|
from django.utils.six.moves.urllib.parse import quote, unquote
|
||||||
|
if six.PY3:
|
||||||
|
from urllib.parse import unquote_to_bytes
|
||||||
|
|
||||||
|
|
||||||
class DjangoUnicodeDecodeError(UnicodeDecodeError):
|
class DjangoUnicodeDecodeError(UnicodeDecodeError):
|
||||||
|
@ -185,7 +188,9 @@ def iri_to_uri(iri):
|
||||||
assuming input is either UTF-8 or unicode already, we can simplify things a
|
assuming input is either UTF-8 or unicode already, we can simplify things a
|
||||||
little from the full method.
|
little from the full method.
|
||||||
|
|
||||||
Returns an ASCII string containing the encoded result.
|
Takes an IRI in UTF-8 bytes (e.g. '/I \xe2\x99\xa5 Django/') or unicode
|
||||||
|
(e.g. '/I ♥ Django/') and returns ASCII bytes containing the encoded result
|
||||||
|
(e.g. '/I%20%E2%99%A5%20Django/').
|
||||||
"""
|
"""
|
||||||
# The list of safe characters here is constructed from the "reserved" and
|
# The list of safe characters here is constructed from the "reserved" and
|
||||||
# "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986:
|
# "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986:
|
||||||
|
@ -204,6 +209,38 @@ def iri_to_uri(iri):
|
||||||
return quote(force_bytes(iri), safe=b"/#%[]=:;$&()+,!?*@'~")
|
return quote(force_bytes(iri), safe=b"/#%[]=:;$&()+,!?*@'~")
|
||||||
|
|
||||||
|
|
||||||
|
def uri_to_iri(uri):
|
||||||
|
"""
|
||||||
|
Converts a Uniform Resource Identifier(URI) into an Internationalized
|
||||||
|
Resource Identifier(IRI).
|
||||||
|
|
||||||
|
This is the algorithm from section 3.2 of RFC 3987.
|
||||||
|
|
||||||
|
Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns
|
||||||
|
unicode containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/').
|
||||||
|
"""
|
||||||
|
if uri is None:
|
||||||
|
return uri
|
||||||
|
uri = force_bytes(uri)
|
||||||
|
iri = unquote_to_bytes(uri) if six.PY3 else unquote(uri)
|
||||||
|
return repercent_broken_unicode(iri).decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
def repercent_broken_unicode(path):
|
||||||
|
"""
|
||||||
|
As per section 3.2 of RFC 3987, step three of converting a URI into an IRI,
|
||||||
|
we need to re-percent-encode any octet produced that is not part of a
|
||||||
|
strictly legal UTF-8 octet sequence.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
path.decode('utf-8')
|
||||||
|
except UnicodeDecodeError as e:
|
||||||
|
repercent = quote(path[e.start:e.end], safe=b"/#%[]=:;$&()+,!?*@'~")
|
||||||
|
path = repercent_broken_unicode(
|
||||||
|
path[:e.start] + force_bytes(repercent) + path[e.end:])
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
def filepath_to_uri(path):
|
def filepath_to_uri(path):
|
||||||
"""Convert a file system path to a URI portion that is suitable for
|
"""Convert a file system path to a URI portion that is suitable for
|
||||||
inclusion in a URL.
|
inclusion in a URL.
|
||||||
|
|
|
@ -173,11 +173,11 @@ URL from an IRI_ -- very loosely speaking, a URI_ that can contain Unicode
|
||||||
characters. Quoting and converting an IRI to URI can be a little tricky, so
|
characters. Quoting and converting an IRI to URI can be a little tricky, so
|
||||||
Django provides some assistance.
|
Django provides some assistance.
|
||||||
|
|
||||||
* The function ``django.utils.encoding.iri_to_uri()`` implements the
|
* The function :func:`django.utils.encoding.iri_to_uri()` implements the
|
||||||
conversion from IRI to URI as required by the specification (:rfc:`3987`).
|
conversion from IRI to URI as required by the specification (:rfc:`3987#section-3.1`).
|
||||||
|
|
||||||
* The functions ``django.utils.http.urlquote()`` and
|
* The functions :func:`django.utils.http.urlquote()` and
|
||||||
``django.utils.http.urlquote_plus()`` are versions of Python's standard
|
:func:`django.utils.http.urlquote_plus()` are versions of Python's standard
|
||||||
``urllib.quote()`` and ``urllib.quote_plus()`` that work with non-ASCII
|
``urllib.quote()`` and ``urllib.quote_plus()`` that work with non-ASCII
|
||||||
characters. (The data is converted to UTF-8 prior to encoding.)
|
characters. (The data is converted to UTF-8 prior to encoding.)
|
||||||
|
|
||||||
|
@ -213,12 +213,29 @@ you can construct your IRI without worrying about whether it contains
|
||||||
non-ASCII characters and then, right at the end, call ``iri_to_uri()`` on the
|
non-ASCII characters and then, right at the end, call ``iri_to_uri()`` on the
|
||||||
result.
|
result.
|
||||||
|
|
||||||
The ``iri_to_uri()`` function is also idempotent, which means the following is
|
Similarly, Django provides :func:`django.utils.encoding.uri_to_iri()` which
|
||||||
always true::
|
implements the conversion from URI to IRI as per :rfc:`3987#section-3.2`.
|
||||||
|
It decodes all percent-encodings except those that don't represent a valid
|
||||||
|
UTF-8 sequence.
|
||||||
|
|
||||||
|
An example to demonstrate::
|
||||||
|
|
||||||
|
>>> uri_to_iri('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93')
|
||||||
|
'/♥♥/?utf8=✓'
|
||||||
|
>>> uri_to_iri('%A9helloworld')
|
||||||
|
'%A9helloworld'
|
||||||
|
|
||||||
|
In the first example, the UTF-8 characters and reserved characters are
|
||||||
|
unquoted. In the second, the percent-encoding remains unchanged because it
|
||||||
|
lies outside the valid UTF-8 range.
|
||||||
|
|
||||||
|
Both ``iri_to_uri()`` and ``uri_to_iri()`` functions are idempotent, which means the
|
||||||
|
following is always true::
|
||||||
|
|
||||||
iri_to_uri(iri_to_uri(some_string)) = iri_to_uri(some_string)
|
iri_to_uri(iri_to_uri(some_string)) = iri_to_uri(some_string)
|
||||||
|
uri_to_iri(uri_to_iri(some_string)) = uri_to_iri(some_string)
|
||||||
|
|
||||||
So you can safely call it multiple times on the same IRI without risking
|
So you can safely call it multiple times on the same URI/IRI without risking
|
||||||
double-quoting problems.
|
double-quoting problems.
|
||||||
|
|
||||||
.. _URI: http://www.ietf.org/rfc/rfc2396.txt
|
.. _URI: http://www.ietf.org/rfc/rfc2396.txt
|
||||||
|
|
|
@ -271,7 +271,20 @@ The functions defined in this module share the following properties:
|
||||||
since we are assuming input is either UTF-8 or unicode already, we can
|
since we are assuming input is either UTF-8 or unicode already, we can
|
||||||
simplify things a little from the full method.
|
simplify things a little from the full method.
|
||||||
|
|
||||||
Returns an ASCII string containing the encoded result.
|
Takes an IRI in UTF-8 bytes and returns ASCII bytes containing the encoded
|
||||||
|
result.
|
||||||
|
|
||||||
|
.. function:: uri_to_iri(uri)
|
||||||
|
|
||||||
|
.. versionadded:: 1.8
|
||||||
|
|
||||||
|
Converts a Uniform Resource Identifier into an Internationalized Resource
|
||||||
|
Identifier.
|
||||||
|
|
||||||
|
This is an algorithm from section 3.2 of :rfc:`3987#section-3.2`.
|
||||||
|
|
||||||
|
Takes a URI in ASCII bytes and returns a unicode string containing the
|
||||||
|
encoded result.
|
||||||
|
|
||||||
.. function:: filepath_to_uri(path)
|
.. function:: filepath_to_uri(path)
|
||||||
|
|
||||||
|
|
|
@ -348,6 +348,9 @@ Requests and Responses
|
||||||
* The :attr:`HttpResponse.charset <django.http.HttpResponse.charset>` attribute
|
* The :attr:`HttpResponse.charset <django.http.HttpResponse.charset>` attribute
|
||||||
was added.
|
was added.
|
||||||
|
|
||||||
|
* ``WSGIRequestHandler`` now follows RFC in converting URI to IRI, using
|
||||||
|
``uri_to_iri()``.
|
||||||
|
|
||||||
Tests
|
Tests
|
||||||
^^^^^
|
^^^^^
|
||||||
|
|
||||||
|
|
|
@ -161,3 +161,28 @@ class HandlerSuspiciousOpsTest(TestCase):
|
||||||
def test_suspiciousop_in_view_returns_400(self):
|
def test_suspiciousop_in_view_returns_400(self):
|
||||||
response = self.client.get('/suspicious/')
|
response = self.client.get('/suspicious/')
|
||||||
self.assertEqual(response.status_code, 400)
|
self.assertEqual(response.status_code, 400)
|
||||||
|
|
||||||
|
|
||||||
|
@override_settings(ROOT_URLCONF='handlers.urls')
|
||||||
|
class HandlerNotFoundTest(TestCase):
|
||||||
|
|
||||||
|
def test_invalid_urls(self):
|
||||||
|
response = self.client.get('~%A9helloworld')
|
||||||
|
self.assertEqual(response.status_code, 404)
|
||||||
|
self.assertContains(response, '~%A9helloworld', status_code=404)
|
||||||
|
|
||||||
|
response = self.client.get('d%aao%aaw%aan%aal%aao%aaa%aad%aa/')
|
||||||
|
self.assertEqual(response.status_code, 404)
|
||||||
|
self.assertContains(response, 'd%AAo%AAw%AAn%AAl%AAo%AAa%AAd%AA', status_code=404)
|
||||||
|
|
||||||
|
response = self.client.get('/%E2%99%E2%99%A5/')
|
||||||
|
self.assertEqual(response.status_code, 404)
|
||||||
|
self.assertContains(response, '%E2%99\u2665', status_code=404)
|
||||||
|
|
||||||
|
response = self.client.get('/%E2%98%8E%E2%A9%E2%99%A5/')
|
||||||
|
self.assertEqual(response.status_code, 404)
|
||||||
|
self.assertContains(response, '\u260e%E2%A9\u2665', status_code=404)
|
||||||
|
|
||||||
|
def test_environ_path_info_type(self):
|
||||||
|
environ = RequestFactory().get('/%E2%A8%87%87%A5%E2%A8%A0').environ
|
||||||
|
self.assertIsInstance(environ['PATH_INFO'], six.text_type)
|
||||||
|
|
|
@ -5,8 +5,8 @@ import unittest
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
from django.utils import six
|
from django.utils import six
|
||||||
from django.utils.encoding import (filepath_to_uri, force_bytes,
|
from django.utils.encoding import (filepath_to_uri, force_bytes, force_text,
|
||||||
force_text, iri_to_uri, python_2_unicode_compatible)
|
iri_to_uri, uri_to_iri)
|
||||||
from django.utils.http import urlquote_plus
|
from django.utils.http import urlquote_plus
|
||||||
|
|
||||||
|
|
||||||
|
@ -40,6 +40,9 @@ class TestEncodingUtils(unittest.TestCase):
|
||||||
today = datetime.date.today()
|
today = datetime.date.today()
|
||||||
self.assertEqual(force_bytes(today, strings_only=True), today)
|
self.assertEqual(force_bytes(today, strings_only=True), today)
|
||||||
|
|
||||||
|
|
||||||
|
class TestRFC3987IEncodingUtils(unittest.TestCase):
|
||||||
|
|
||||||
def test_filepath_to_uri(self):
|
def test_filepath_to_uri(self):
|
||||||
self.assertEqual(filepath_to_uri('upload\\чубака.mp4'),
|
self.assertEqual(filepath_to_uri('upload\\чубака.mp4'),
|
||||||
'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
|
'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
|
||||||
|
@ -47,22 +50,57 @@ class TestEncodingUtils(unittest.TestCase):
|
||||||
'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
|
'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
|
||||||
|
|
||||||
def test_iri_to_uri(self):
|
def test_iri_to_uri(self):
|
||||||
self.assertEqual(iri_to_uri('red%09ros\xe9#red'),
|
cases = [
|
||||||
'red%09ros%C3%A9#red')
|
# Valid UTF-8 sequences are encoded.
|
||||||
|
('red%09rosé#red', 'red%09ros%C3%A9#red'),
|
||||||
|
('/blog/for/Jürgen Münster/', '/blog/for/J%C3%BCrgen%20M%C3%BCnster/'),
|
||||||
|
('locations/%s' % urlquote_plus('Paris & Orléans'), 'locations/Paris+%26+Orl%C3%A9ans'),
|
||||||
|
|
||||||
self.assertEqual(iri_to_uri('/blog/for/J\xfcrgen M\xfcnster/'),
|
# Reserved chars remain unescaped.
|
||||||
'/blog/for/J%C3%BCrgen%20M%C3%BCnster/')
|
('%&', '%&'),
|
||||||
|
('red&♥ros%#red', 'red&%E2%99%A5ros%#red'),
|
||||||
|
]
|
||||||
|
|
||||||
self.assertEqual(iri_to_uri('locations/%s' % urlquote_plus('Paris & Orl\xe9ans')),
|
for iri, uri in cases:
|
||||||
'locations/Paris+%26+Orl%C3%A9ans')
|
self.assertEqual(iri_to_uri(iri), uri)
|
||||||
|
|
||||||
def test_iri_to_uri_idempotent(self):
|
# Test idempotency.
|
||||||
self.assertEqual(iri_to_uri(iri_to_uri('red%09ros\xe9#red')),
|
self.assertEqual(iri_to_uri(iri_to_uri(iri)), uri)
|
||||||
'red%09ros%C3%A9#red')
|
|
||||||
|
|
||||||
@unittest.skipIf(six.PY3, "tests a class not defining __str__ under Python 2")
|
def test_uri_to_iri(self):
|
||||||
def test_decorated_class_without_str(self):
|
cases = [
|
||||||
with self.assertRaises(ValueError):
|
# Valid UTF-8 sequences are decoded.
|
||||||
@python_2_unicode_compatible
|
('/%E2%99%A5%E2%99%A5/', '/♥♥/'),
|
||||||
class NoStr(object):
|
('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),
|
||||||
pass
|
|
||||||
|
# Broken UTF-8 sequences remain escaped.
|
||||||
|
('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
|
||||||
|
('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
|
||||||
|
('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
|
||||||
|
('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
|
||||||
|
('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),
|
||||||
|
]
|
||||||
|
|
||||||
|
for uri, iri in cases:
|
||||||
|
self.assertEqual(uri_to_iri(uri), iri)
|
||||||
|
|
||||||
|
# Test idempotency.
|
||||||
|
self.assertEqual(uri_to_iri(uri_to_iri(uri)), iri)
|
||||||
|
|
||||||
|
def test_complementarity(self):
|
||||||
|
cases = [
|
||||||
|
('/blog/for/J%C3%BCrgen%20M%C3%BCnster/', '/blog/for/J\xfcrgen M\xfcnster/'),
|
||||||
|
('%&', '%&'),
|
||||||
|
('red&%E2%99%A5ros%#red', 'red&♥ros%#red'),
|
||||||
|
('/%E2%99%A5%E2%99%A5/', '/♥♥/'),
|
||||||
|
('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),
|
||||||
|
('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
|
||||||
|
('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
|
||||||
|
('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
|
||||||
|
('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
|
||||||
|
('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),
|
||||||
|
]
|
||||||
|
|
||||||
|
for uri, iri in cases:
|
||||||
|
self.assertEqual(iri_to_uri(uri_to_iri(uri)), uri)
|
||||||
|
self.assertEqual(uri_to_iri(iri_to_uri(iri)), iri)
|
||||||
|
|
Loading…
Reference in New Issue