django/tests/utils_tests/test_encoding.py

import datetime
import unittest
from urllib.parse import quote_plus

from django.test import SimpleTestCase
from django.utils.encoding import (
    DjangoUnicodeDecodeError, escape_uri_path, filepath_to_uri, force_bytes,
    force_text, iri_to_uri, smart_text, uri_to_iri,
)
from django.utils.functional import SimpleLazyObject


class TestEncodingUtils(SimpleTestCase):
    def test_force_text_exception(self):
        """
        Broken __str__ actually raises an error.
        """
        class MyString:
            def __str__(self):
                return b'\xc3\xb6\xc3\xa4\xc3\xbc'

        # str(s) raises a TypeError if the result is not a text type.
        with self.assertRaises(TypeError):
            force_text(MyString())

    def test_force_text_lazy(self):
        s = SimpleLazyObject(lambda: 'x')
        self.assertTrue(type(force_text(s)), str)

    def test_force_text_DjangoUnicodeDecodeError(self):
        msg = (
            "'utf-8' codec can't decode byte 0xff in position 0: invalid "
            "start byte. You passed in b'\\xff' (<class 'bytes'>)"
        )
        with self.assertRaisesMessage(DjangoUnicodeDecodeError, msg):
            force_text(b'\xff')

    def test_force_bytes_exception(self):
        """
        force_bytes knows how to convert to bytes an exception
        containing non-ASCII characters in its args.
        """
        error_msg = "This is an exception, voilà"
        exc = ValueError(error_msg)
        self.assertEqual(force_bytes(exc), error_msg.encode('utf-8'))
        self.assertEqual(force_bytes(exc, encoding='ascii', errors='ignore'), b'This is an exception, voil')

    def test_force_bytes_strings_only(self):
        today = datetime.date.today()
        self.assertEqual(force_bytes(today, strings_only=True), today)

    def test_smart_text(self):
        class Test:
            def __str__(self):
                return 'ŠĐĆŽćžšđ'

        class TestU:
            def __str__(self):
                return 'ŠĐĆŽćžšđ'

            def __bytes__(self):
                return b'Foo'

        self.assertEqual(smart_text(Test()), '\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111')
        self.assertEqual(smart_text(TestU()), '\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111')
        self.assertEqual(smart_text(1), '1')
        self.assertEqual(smart_text('foo'), 'foo')


class TestRFC3987IEncodingUtils(unittest.TestCase):

    def test_filepath_to_uri(self):
        self.assertEqual(filepath_to_uri('upload\\чубака.mp4'), 'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')

    def test_iri_to_uri(self):
        cases = [
            # Valid UTF-8 sequences are encoded.
            ('red%09rosé#red', 'red%09ros%C3%A9#red'),
            ('/blog/for/Jürgen Münster/', '/blog/for/J%C3%BCrgen%20M%C3%BCnster/'),
            ('locations/%s' % quote_plus('Paris & Orléans'), 'locations/Paris+%26+Orl%C3%A9ans'),

            # Reserved chars remain unescaped.
            ('%&', '%&'),
            ('red&♥ros%#red', 'red&%E2%99%A5ros%#red'),
        ]

        for iri, uri in cases:
            self.assertEqual(iri_to_uri(iri), uri)

            # Test idempotency.
            self.assertEqual(iri_to_uri(iri_to_uri(iri)), uri)

    def test_uri_to_iri(self):
        cases = [
            # Valid UTF-8 sequences are decoded.
            ('/%E2%99%A5%E2%99%A5/', '/♥♥/'),
            ('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),

            # Broken UTF-8 sequences remain escaped.
            ('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
            ('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
            ('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
            ('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
            ('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),
        ]

        for uri, iri in cases:
            self.assertEqual(uri_to_iri(uri), iri)

            # Test idempotency.
            self.assertEqual(uri_to_iri(uri_to_iri(uri)), iri)

    def test_complementarity(self):
        cases = [
            ('/blog/for/J%C3%BCrgen%20M%C3%BCnster/', '/blog/for/J\xfcrgen M\xfcnster/'),
            ('%&', '%&'),
            ('red&%E2%99%A5ros%#red', 'red&♥ros%#red'),
            ('/%E2%99%A5%E2%99%A5/', '/♥♥/'),
            ('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),
            ('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
            ('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
            ('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
            ('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
            ('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),
        ]

        for uri, iri in cases:
            self.assertEqual(iri_to_uri(uri_to_iri(uri)), uri)
            self.assertEqual(uri_to_iri(iri_to_uri(iri)), iri)

    def test_escape_uri_path(self):
        self.assertEqual(
            escape_uri_path('/;some/=awful/?path/:with/@lots/&of/+awful/chars'),
            '/%3Bsome/%3Dawful/%3Fpath/:with/@lots/&of/+awful/chars'
        )
        self.assertEqual(escape_uri_path('/foo#bar'), '/foo%23bar')
        self.assertEqual(escape_uri_path('/foo?bar'), '/foo%3Fbar')
Fixed #18719 -- Made force_bytes more consistent with force_text. 2013-09-06 23:28:28 +08:00			`import datetime`
Sorted imports with isort; refs #23860. 2015-01-28 20:35:27 +08:00			`import unittest`
Refs #23919 -- Replaced usage of django.utils.http utilities with Python equivalents Thanks Tim Graham for the review. 2017-01-26 21:25:15 +08:00			`from urllib.parse import quote_plus`
Stopped using django.utils.unittest in the test suite. Refs #20680. 2013-07-01 20:22:27 +08:00
Added a test for force_text()'s DjangoUnicodeDecodeError path. 2017-02-02 04:48:53 +08:00			`from django.test import SimpleTestCase`
Fixed #18456 -- Added path escaping to HttpRequest.get_full_path(). 2014-10-31 23:43:34 +08:00			`from django.utils.encoding import (`
Added a test for force_text()'s DjangoUnicodeDecodeError path. 2017-02-02 04:48:53 +08:00			`DjangoUnicodeDecodeError, escape_uri_path, filepath_to_uri, force_bytes,`
			`force_text, iri_to_uri, smart_text, uri_to_iri,`
Fixed #18456 -- Added path escaping to HttpRequest.get_full_path(). 2014-10-31 23:43:34 +08:00			`)`
Fixed #24836 -- Made force_text() resolve lazy objects. 2015-05-27 04:46:13 +08:00			`from django.utils.functional import SimpleLazyObject`
Fixed #18902 -- Made force_bytes properly handle exception input Thanks Aymeric Augustin for the report and the initial patch. 2012-09-04 15:24:39 +08:00

Added a test for force_text()'s DjangoUnicodeDecodeError path. 2017-02-02 04:48:53 +08:00			`class TestEncodingUtils(SimpleTestCase):`
Fixed #20812 -- Error out if __unicode__/__str__ doesn't return a text type. 2013-09-07 01:16:06 +08:00			`def test_force_text_exception(self):`
			`"""`
Refs #23919 -- Removed unneeded str() calls 2017-01-20 17:20:53 +08:00			`Broken __str__ actually raises an error.`
Fixed #20812 -- Error out if __unicode__/__str__ doesn't return a text type. 2013-09-07 01:16:06 +08:00			`"""`
Refs #23919 -- Stopped inheriting from object to define new style classes. 2017-01-19 15:39:46 +08:00			`class MyString:`
Fixed #20812 -- Error out if __unicode__/__str__ doesn't return a text type. 2013-09-07 01:16:06 +08:00			`def __str__(self):`
			`return b'\xc3\xb6\xc3\xa4\xc3\xbc'`

Refs #23919 -- Removed six.PY2/PY3 usage Thanks Tim Graham for the review. 2016-12-01 18:38:01 +08:00			`# str(s) raises a TypeError if the result is not a text type.`
			`with self.assertRaises(TypeError):`
Refs #26022 -- Used context manager version of assertRaises in tests. 2016-01-17 19:26:39 +08:00			`force_text(MyString())`
Fixed #18719 -- Made force_bytes more consistent with force_text. 2013-09-06 23:28:28 +08:00
Fixed #24836 -- Made force_text() resolve lazy objects. 2015-05-27 04:46:13 +08:00			`def test_force_text_lazy(self):`
			`s = SimpleLazyObject(lambda: 'x')`
Removed unneeded force_text calls in the test suite 2017-01-24 19:22:42 +08:00			`self.assertTrue(type(force_text(s)), str)`
Fixed #24836 -- Made force_text() resolve lazy objects. 2015-05-27 04:46:13 +08:00
Added a test for force_text()'s DjangoUnicodeDecodeError path. 2017-02-02 04:48:53 +08:00			`def test_force_text_DjangoUnicodeDecodeError(self):`
			`msg = (`
			`"'utf-8' codec can't decode byte 0xff in position 0: invalid "`
			`"start byte. You passed in b'\\xff' (<class 'bytes'>)"`
			`)`
			`with self.assertRaisesMessage(DjangoUnicodeDecodeError, msg):`
			`force_text(b'\xff')`

Fixed #18902 -- Made force_bytes properly handle exception input Thanks Aymeric Augustin for the report and the initial patch. 2012-09-04 15:24:39 +08:00			`def test_force_bytes_exception(self):`
			`"""`
Refs #27392 -- Removed "Tests that", "Ensures that", etc. from test docstrings. 2016-10-27 15:53:39 +08:00			`force_bytes knows how to convert to bytes an exception`
Fixed #18902 -- Made force_bytes properly handle exception input Thanks Aymeric Augustin for the report and the initial patch. 2012-09-04 15:24:39 +08:00			`containing non-ASCII characters in its args.`
			`"""`
			`error_msg = "This is an exception, voilà"`
			`exc = ValueError(error_msg)`
Removed an untested and broken branch in force_bytes() (refs #6353). The new test crashed in the removed branch. It's unclear if the branch has value since c6a2bd9b962af1cdf46f964589e6023046cfa8ec didn't include tests. 2017-02-04 08:36:53 +08:00			`self.assertEqual(force_bytes(exc), error_msg.encode('utf-8'))`
			`self.assertEqual(force_bytes(exc, encoding='ascii', errors='ignore'), b'This is an exception, voil')`
Fixed #20108 -- Fixed filepath_to_uri decoding error This was a regression due to unicode_literals usage. Thanks Ivan Virabyan for the report and the initial patch. 2013-03-23 00:55:12 +08:00
Fixed #18719 -- Made force_bytes more consistent with force_text. 2013-09-06 23:28:28 +08:00			`def test_force_bytes_strings_only(self):`
			`today = datetime.date.today()`
			`self.assertEqual(force_bytes(today, strings_only=True), today)`

Cleaned up some forms tests. Thanks Berker Peksag and Tim Graham for the reviews. Refs #24219. 2015-01-25 23:45:54 +08:00			`def test_smart_text(self):`
			`class Test:`
Refs #23919 -- Removed six.PY2/PY3 usage Thanks Tim Graham for the review. 2016-12-01 18:38:01 +08:00			`def __str__(self):`
			`return 'ŠĐĆŽćžšđ'`
Cleaned up some forms tests. Thanks Berker Peksag and Tim Graham for the reviews. Refs #24219. 2015-01-25 23:45:54 +08:00
			`class TestU:`
Refs #23919 -- Removed six.PY2/PY3 usage Thanks Tim Graham for the review. 2016-12-01 18:38:01 +08:00			`def __str__(self):`
			`return 'ŠĐĆŽćžšđ'`
Cleaned up some forms tests. Thanks Berker Peksag and Tim Graham for the reviews. Refs #24219. 2015-01-25 23:45:54 +08:00
Refs #23919 -- Removed six.PY2/PY3 usage Thanks Tim Graham for the review. 2016-12-01 18:38:01 +08:00			`def __bytes__(self):`
			`return b'Foo'`
Cleaned up some forms tests. Thanks Berker Peksag and Tim Graham for the reviews. Refs #24219. 2015-01-25 23:45:54 +08:00
			`self.assertEqual(smart_text(Test()), '\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111')`
			`self.assertEqual(smart_text(TestU()), '\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111')`
			`self.assertEqual(smart_text(1), '1')`
			`self.assertEqual(smart_text('foo'), 'foo')`
Fixed #18456 -- Added path escaping to HttpRequest.get_full_path(). 2014-10-31 23:43:34 +08:00
Fixed #19508 -- Implemented uri_to_iri as per RFC. Thanks Loic Bistuer for helping in shaping the patch and Claude Paroz for the review. 2014-07-22 20:25:22 +08:00
			`class TestRFC3987IEncodingUtils(unittest.TestCase):`

Fixed #20108 -- Fixed filepath_to_uri decoding error This was a regression due to unicode_literals usage. Thanks Ivan Virabyan for the report and the initial patch. 2013-03-23 00:55:12 +08:00			`def test_filepath_to_uri(self):`
Fixed E128 flake8 warnings in tests/. 2016-04-08 10:04:45 +08:00			`self.assertEqual(filepath_to_uri('upload\\чубака.mp4'), 'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')`
Fixed #21198 -- Prevented invalid use of @python_2_unicode_compatible. Thanks jpic for the report and chmodas for working on a patch. Reverts 2ea80b94. Refs #19362. 2013-10-14 00:06:58 +08:00
Consolidated some text utils into the utils_tests test package. 2014-09-23 20:45:59 +08:00			`def test_iri_to_uri(self):`
Fixed #19508 -- Implemented uri_to_iri as per RFC. Thanks Loic Bistuer for helping in shaping the patch and Claude Paroz for the review. 2014-07-22 20:25:22 +08:00			`cases = [`
			`# Valid UTF-8 sequences are encoded.`
			`('red%09rosé#red', 'red%09ros%C3%A9#red'),`
			`('/blog/for/Jürgen Münster/', '/blog/for/J%C3%BCrgen%20M%C3%BCnster/'),`
Refs #23919 -- Replaced usage of django.utils.http utilities with Python equivalents Thanks Tim Graham for the review. 2017-01-26 21:25:15 +08:00			`('locations/%s' % quote_plus('Paris & Orléans'), 'locations/Paris+%26+Orl%C3%A9ans'),`
Fixed #19508 -- Implemented uri_to_iri as per RFC. Thanks Loic Bistuer for helping in shaping the patch and Claude Paroz for the review. 2014-07-22 20:25:22 +08:00
			`# Reserved chars remain unescaped.`
			`('%&', '%&'),`
			`('red&♥ros%#red', 'red&%E2%99%A5ros%#red'),`
			`]`

			`for iri, uri in cases:`
			`self.assertEqual(iri_to_uri(iri), uri)`

			`# Test idempotency.`
			`self.assertEqual(iri_to_uri(iri_to_uri(iri)), uri)`

			`def test_uri_to_iri(self):`
			`cases = [`
			`# Valid UTF-8 sequences are decoded.`
			`('/%E2%99%A5%E2%99%A5/', '/♥♥/'),`
			`('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),`

			`# Broken UTF-8 sequences remain escaped.`
			`('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),`
			`('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),`
			`('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),`
			`('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),`
			`('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),`
			`]`
Consolidated some text utils into the utils_tests test package. 2014-09-23 20:45:59 +08:00
Fixed #19508 -- Implemented uri_to_iri as per RFC. Thanks Loic Bistuer for helping in shaping the patch and Claude Paroz for the review. 2014-07-22 20:25:22 +08:00			`for uri, iri in cases:`
			`self.assertEqual(uri_to_iri(uri), iri)`
Consolidated some text utils into the utils_tests test package. 2014-09-23 20:45:59 +08:00
Fixed #19508 -- Implemented uri_to_iri as per RFC. Thanks Loic Bistuer for helping in shaping the patch and Claude Paroz for the review. 2014-07-22 20:25:22 +08:00			`# Test idempotency.`
			`self.assertEqual(uri_to_iri(uri_to_iri(uri)), iri)`
Consolidated some text utils into the utils_tests test package. 2014-09-23 20:45:59 +08:00
Fixed #19508 -- Implemented uri_to_iri as per RFC. Thanks Loic Bistuer for helping in shaping the patch and Claude Paroz for the review. 2014-07-22 20:25:22 +08:00			`def test_complementarity(self):`
			`cases = [`
			`('/blog/for/J%C3%BCrgen%20M%C3%BCnster/', '/blog/for/J\xfcrgen M\xfcnster/'),`
			`('%&', '%&'),`
			`('red&%E2%99%A5ros%#red', 'red&♥ros%#red'),`
			`('/%E2%99%A5%E2%99%A5/', '/♥♥/'),`
			`('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),`
			`('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),`
			`('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),`
			`('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),`
			`('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),`
			`('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),`
			`]`
Consolidated some text utils into the utils_tests test package. 2014-09-23 20:45:59 +08:00
Fixed #19508 -- Implemented uri_to_iri as per RFC. Thanks Loic Bistuer for helping in shaping the patch and Claude Paroz for the review. 2014-07-22 20:25:22 +08:00			`for uri, iri in cases:`
			`self.assertEqual(iri_to_uri(uri_to_iri(uri)), uri)`
			`self.assertEqual(uri_to_iri(iri_to_uri(iri)), iri)`
Cleaned up some forms tests. Thanks Berker Peksag and Tim Graham for the reviews. Refs #24219. 2015-01-25 23:45:54 +08:00
			`def test_escape_uri_path(self):`
			`self.assertEqual(`
			`escape_uri_path('/;some/=awful/?path/:with/@lots/&of/+awful/chars'),`
			`'/%3Bsome/%3Dawful/%3Fpath/:with/@lots/&of/+awful/chars'`
			`)`
			`self.assertEqual(escape_uri_path('/foo#bar'), '/foo%23bar')`
			`self.assertEqual(escape_uri_path('/foo?bar'), '/foo%3Fbar')`