Fixed #18916 -- Allowed non-ASCII headers.

Thanks Malcolm Tredinnick for the review.
This commit is contained in:
Aymeric Augustin 2012-09-07 19:08:57 -04:00
parent 1d235b3914
commit 9b07b5edeb
2 changed files with 74 additions and 40 deletions

View File

@ -2,6 +2,7 @@ from __future__ import absolute_import, unicode_literals
import copy import copy
import datetime import datetime
from email.header import Header
import os import os
import re import re
import sys import sys
@ -560,31 +561,44 @@ class HttpResponse(object):
else: else:
__str__ = serialize __str__ = serialize
def _convert_to_ascii(self, *values): def _convert_to_charset(self, value, charset, mime_encode=False):
"""Converts all values to ascii strings.""" """Converts headers key/value to ascii/latin1 native strings.
for value in values:
if not isinstance(value, six.string_types): `charset` must be 'ascii' or 'latin-1'. If `mime_encode` is True and
value = str(value) `value` value can't be represented in the given charset, MIME-encoding
try: is applied.
if six.PY3: """
# Ensure string only contains ASCII if not isinstance(value, (bytes, six.text_type)):
value.encode('us-ascii') value = str(value)
try:
if six.PY3:
if isinstance(value, str):
# Ensure string is valid in given charset
value.encode(charset)
else: else:
if isinstance(value, str): # Convert bytestring using given charset
# Ensure string only contains ASCII value = value.decode(charset)
value.decode('us-ascii') else:
else: if isinstance(value, str):
# Convert unicode to an ASCII string # Ensure string is valid in given charset
value = value.encode('us-ascii') value.decode(charset)
except UnicodeError as e: else:
e.reason += ', HTTP response headers must be in US-ASCII format' # Convert unicode string to given charset
value = value.encode(charset)
except UnicodeError as e:
if mime_encode:
# Wrapping in str() is a workaround for #12422 under Python 2.
value = str(Header(value, 'utf-8').encode())
else:
e.reason += ', HTTP response headers must be in %s format' % charset
raise raise
if '\n' in value or '\r' in value: if str('\n') in value or str('\r') in value:
raise BadHeaderError("Header values can't contain newlines (got %r)" % value) raise BadHeaderError("Header values can't contain newlines (got %r)" % value)
yield value return value
def __setitem__(self, header, value): def __setitem__(self, header, value):
header, value = self._convert_to_ascii(header, value) header = self._convert_to_charset(header, 'ascii')
value = self._convert_to_charset(value, 'latin1', mime_encode=True)
self._headers[header.lower()] = (header, value) self._headers[header.lower()] = (header, value)
def __delitem__(self, header): def __delitem__(self, header):

View File

@ -11,6 +11,7 @@ from django.http import (QueryDict, HttpResponse, HttpResponseRedirect,
SimpleCookie, BadHeaderError, SimpleCookie, BadHeaderError,
parse_cookie) parse_cookie)
from django.test import TestCase from django.test import TestCase
from django.utils.encoding import smart_str
from django.utils import six from django.utils import six
from django.utils import unittest from django.utils import unittest
@ -228,33 +229,52 @@ class QueryDictTests(unittest.TestCase):
self.assertEqual(copy.deepcopy(q).encoding, 'iso-8859-15') self.assertEqual(copy.deepcopy(q).encoding, 'iso-8859-15')
class HttpResponseTests(unittest.TestCase): class HttpResponseTests(unittest.TestCase):
def test_unicode_headers(self):
def test_headers_type(self):
r = HttpResponse() r = HttpResponse()
# If we insert a unicode value it will be converted to an ascii # The following tests explicitly test types in addition to values
r['value'] = 'test value' # because in Python 2 u'foo' == b'foo'.
self.assertTrue(isinstance(r['value'], str))
# An error is raised when a unicode object with non-ascii is assigned. # ASCII unicode or bytes values are converted to native strings.
self.assertRaises(UnicodeEncodeError, r.__setitem__, 'value', 't\xebst value') r['key'] = 'test'
self.assertEqual(r['key'], str('test'))
self.assertIsInstance(r['key'], str)
r['key'] = 'test'.encode('ascii')
self.assertEqual(r['key'], str('test'))
self.assertIsInstance(r['key'], str)
# An error is raised when a unicode object with non-ASCII format is # Latin-1 unicode or bytes values are also converted to native strings.
# passed as initial mimetype or content_type. r['key'] = 'café'
self.assertRaises(UnicodeEncodeError, HttpResponse, self.assertEqual(r['key'], smart_str('café', 'latin-1'))
content_type='t\xebst value') self.assertIsInstance(r['key'], str)
r['key'] = 'café'.encode('latin-1')
self.assertEqual(r['key'], smart_str('café', 'latin-1'))
self.assertIsInstance(r['key'], str)
# HttpResponse headers must be convertible to ASCII. # Other unicode values are MIME-encoded (there's no way to pass them as bytes).
self.assertRaises(UnicodeEncodeError, HttpResponse, r['key'] = ''
content_type='t\xebst value') self.assertEqual(r['key'], str('=?utf-8?b?4oCg?='))
self.assertIsInstance(r['key'], str)
# The response also converts unicode keys to strings.) # The response also converts unicode or bytes keys to strings, but requires
r['test'] = 'testing key' # them to contain ASCII
r = HttpResponse()
r['foo'] = 'bar'
l = list(r.items()) l = list(r.items())
l.sort() self.assertEqual(l[0], ('foo', 'bar'))
self.assertEqual(l[1], ('test', 'testing key')) self.assertIsInstance(l[0][0], str)
r = HttpResponse()
r[b'foo'] = 'bar'
l = list(r.items())
self.assertEqual(l[0], ('foo', 'bar'))
self.assertIsInstance(l[0][0], str)
r = HttpResponse()
self.assertRaises(UnicodeError, r.__setitem__, 'føø', 'bar')
self.assertRaises(UnicodeError, r.__setitem__, 'føø'.encode('utf-8'), 'bar')
# It will also raise errors for keys with non-ascii data.
self.assertRaises(UnicodeEncodeError, r.__setitem__, 't\xebst key', 'value')
def test_newlines_in_headers(self): def test_newlines_in_headers(self):
# Bug #10188: Do not allow newlines in headers (CR or LF) # Bug #10188: Do not allow newlines in headers (CR or LF)