Fixed #18916 -- Allowed non-ASCII headers.

Thanks Malcolm Tredinnick for the review.
2012-09-07 19:08:57 -04:00 · 2012-09-07 19:08:57 -04:00 · 9b07b5edeb
parent 1d235b3914
commit 9b07b5edeb
2 changed files with 74 additions and 40 deletions
--- a/django/http/init.py
+++ b/django/http/init.py
@ -2,6 +2,7 @@ from __future__ import absolute_import, unicode_literals

 import copy
 import datetime
+from email.header import Header
 import os
 import re
 import sys
@ -560,31 +561,44 @@ class HttpResponse(object):
    else:
        __str__ = serialize

-    def _convert_to_ascii(self, *values):
-        """Converts all values to ascii strings."""
-        for value in values:
-            if not isinstance(value, six.string_types):
-                value = str(value)
-            try:
-                if six.PY3:
-                    # Ensure string only contains ASCII
-                    value.encode('us-ascii')
+    def _convert_to_charset(self, value, charset, mime_encode=False):
+        """Converts headers key/value to ascii/latin1 native strings.
+
+        `charset` must be 'ascii' or 'latin-1'. If `mime_encode` is True and
+        `value` value can't be represented in the given charset, MIME-encoding
+        is applied.
+        """
+        if not isinstance(value, (bytes, six.text_type)):
+            value = str(value)
+        try:
+            if six.PY3:
+                if isinstance(value, str):
+                    # Ensure string is valid in given charset
+                    value.encode(charset)
                else:
-                    if isinstance(value, str):
-                        # Ensure string only contains ASCII
-                        value.decode('us-ascii')
-                    else:
-                        # Convert unicode to an ASCII string
-                        value = value.encode('us-ascii')
-            except UnicodeError as e:
-                e.reason += ', HTTP response headers must be in US-ASCII format'
+                    # Convert bytestring using given charset
+                    value = value.decode(charset)
+            else:
+                if isinstance(value, str):
+                    # Ensure string is valid in given charset
+                    value.decode(charset)
+                else:
+                    # Convert unicode string to given charset
+                    value = value.encode(charset)
+        except UnicodeError as e:
+            if mime_encode:
+                # Wrapping in str() is a workaround for #12422 under Python 2.
+                value = str(Header(value, 'utf-8').encode())
+            else:
+                e.reason += ', HTTP response headers must be in %s format' % charset
                raise
-            if '\n' in value or '\r' in value:
-                raise BadHeaderError("Header values can't contain newlines (got %r)" % value)
-            yield value
+        if str('\n') in value or str('\r') in value:
+            raise BadHeaderError("Header values can't contain newlines (got %r)" % value)
+        return value

    def __setitem__(self, header, value):
-        header, value = self._convert_to_ascii(header, value)
+        header = self._convert_to_charset(header, 'ascii')
+        value = self._convert_to_charset(value, 'latin1', mime_encode=True)
        self._headers[header.lower()] = (header, value)

    def __delitem__(self, header):
--- a/tests/regressiontests/httpwrappers/tests.py
+++ b/tests/regressiontests/httpwrappers/tests.py
@ -11,6 +11,7 @@ from django.http import (QueryDict, HttpResponse, HttpResponseRedirect,
                         SimpleCookie, BadHeaderError,
                         parse_cookie)
 from django.test import TestCase
+from django.utils.encoding import smart_str
 from django.utils import six
 from django.utils import unittest

@ -228,33 +229,52 @@ class QueryDictTests(unittest.TestCase):
        self.assertEqual(copy.deepcopy(q).encoding, 'iso-8859-15')

 class HttpResponseTests(unittest.TestCase):
-    def test_unicode_headers(self):
+
+    def test_headers_type(self):
        r = HttpResponse()

-        # If we insert a unicode value it will be converted to an ascii
-        r['value'] = 'test value'
-        self.assertTrue(isinstance(r['value'], str))
+        # The following tests explicitly test types in addition to values
+        # because in Python 2 u'foo' == b'foo'.

-        # An error is raised when a unicode object with non-ascii is assigned.
-        self.assertRaises(UnicodeEncodeError, r.__setitem__, 'value', 't\xebst value')
+        # ASCII unicode or bytes values are converted to native strings.
+        r['key'] = 'test'
+        self.assertEqual(r['key'], str('test'))
+        self.assertIsInstance(r['key'], str)
+        r['key'] = 'test'.encode('ascii')
+        self.assertEqual(r['key'], str('test'))
+        self.assertIsInstance(r['key'], str)

-        # An error is raised when  a unicode object with non-ASCII format is
-        # passed as initial mimetype or content_type.
-        self.assertRaises(UnicodeEncodeError, HttpResponse,
-                content_type='t\xebst value')
+        # Latin-1 unicode or bytes values are also converted to native strings.
+        r['key'] = 'café'
+        self.assertEqual(r['key'], smart_str('café', 'latin-1'))
+        self.assertIsInstance(r['key'], str)
+        r['key'] = 'café'.encode('latin-1')
+        self.assertEqual(r['key'], smart_str('café', 'latin-1'))
+        self.assertIsInstance(r['key'], str)

-        # HttpResponse headers must be convertible to ASCII.
-        self.assertRaises(UnicodeEncodeError, HttpResponse,
-                content_type='t\xebst value')
+        # Other unicode values are MIME-encoded (there's no way to pass them as bytes).
+        r['key'] = '†'
+        self.assertEqual(r['key'], str('=?utf-8?b?4oCg?='))
+        self.assertIsInstance(r['key'], str)

-        # The response also converts unicode keys to strings.)
-        r['test'] = 'testing key'
+        # The response also converts unicode or bytes keys to strings, but requires
+        # them to contain ASCII
+        r = HttpResponse()
+        r['foo'] = 'bar'
        l = list(r.items())
-        l.sort()
-        self.assertEqual(l[1], ('test', 'testing key'))
+        self.assertEqual(l[0], ('foo', 'bar'))
+        self.assertIsInstance(l[0][0], str)
+
+        r = HttpResponse()
+        r[b'foo'] = 'bar'
+        l = list(r.items())
+        self.assertEqual(l[0], ('foo', 'bar'))
+        self.assertIsInstance(l[0][0], str)
+
+        r = HttpResponse()
+        self.assertRaises(UnicodeError, r.__setitem__, 'føø', 'bar')
+        self.assertRaises(UnicodeError, r.__setitem__, 'føø'.encode('utf-8'), 'bar')

-        # It will also raise errors for keys with non-ascii data.
-        self.assertRaises(UnicodeEncodeError, r.__setitem__, 't\xebst key', 'value')

    def test_newlines_in_headers(self):
        # Bug #10188: Do not allow newlines in headers (CR or LF)