Fixed #24240 -- Allowed GZipping a Unicode StreamingHttpResponse
make_bytes() assumed that if the Content-Encoding header is set, then everything had already been dealt with bytes-wise, but in a streaming situation this was not necessarily the case. make_bytes() is only called when necessary when working with a StreamingHttpResponse iterable, but by that point the middleware has added the Content-Encoding header and thus make_bytes() tried to call bytes(value) (and dies). If it had been a normal HttpResponse, make_bytes() would have been called when the content was set, well before the middleware set the Content-Encoding header. This commit removes the special casing when Content-Encoding is set, allowing unicode strings to be encoded during the iteration before they are e.g. gzipped. This behaviour was added a long time ago for #4969 and it doesn't appear to be necessary any more, as everything is correctly made into bytes at the appropriate places. Two new tests, to show that supplying non-ASCII characters to a StreamingHttpResponse works fine normally, and when passed through the GZip middleware (the latter dies without the change to make_bytes()). Removes the test with a nonsense Content-Encoding and Unicode input - if this were to happen, it can still be encoded as bytes fine.
This commit is contained in:
parent
cd4282816d
commit
250aa7c39b
1
AUTHORS
1
AUTHORS
|
@ -452,6 +452,7 @@ answer newbie questions, and generally made Django that much better:
|
||||||
Matt Deacalion Stevens <matt@dirtymonkey.co.uk>
|
Matt Deacalion Stevens <matt@dirtymonkey.co.uk>
|
||||||
Matt Dennenbaum
|
Matt Dennenbaum
|
||||||
Matthew Flanagan <http://wadofstuff.blogspot.com>
|
Matthew Flanagan <http://wadofstuff.blogspot.com>
|
||||||
|
Matthew Somerville <matthew-github@dracos.co.uk>
|
||||||
Matthew Tretter <m@tthewwithanm.com>
|
Matthew Tretter <m@tthewwithanm.com>
|
||||||
Matthias Kestenholz <mk@406.ch>
|
Matthias Kestenholz <mk@406.ch>
|
||||||
Matthias Pronk <django@masida.nl>
|
Matthias Pronk <django@masida.nl>
|
||||||
|
|
|
@ -218,10 +218,6 @@ class HttpResponseBase(six.Iterator):
|
||||||
# an instance of a subclass, this function returns `bytes(value)`.
|
# an instance of a subclass, this function returns `bytes(value)`.
|
||||||
# This doesn't make a copy when `value` already contains bytes.
|
# This doesn't make a copy when `value` already contains bytes.
|
||||||
|
|
||||||
# If content is already encoded (eg. gzip), assume bytes.
|
|
||||||
if self.has_header('Content-Encoding'):
|
|
||||||
return bytes(value)
|
|
||||||
|
|
||||||
# Handle string types -- we can't rely on force_bytes here because:
|
# Handle string types -- we can't rely on force_bytes here because:
|
||||||
# - under Python 3 it attempts str conversion first
|
# - under Python 3 it attempts str conversion first
|
||||||
# - when self._charset != 'utf-8' it re-encodes the content
|
# - when self._charset != 'utf-8' it re-encodes the content
|
||||||
|
|
|
@ -348,14 +348,6 @@ class HttpResponseTests(unittest.TestCase):
|
||||||
#'\xde\x9e' == unichr(1950).encode('utf-8')
|
#'\xde\x9e' == unichr(1950).encode('utf-8')
|
||||||
self.assertEqual(r.content, b'123\xde\x9e')
|
self.assertEqual(r.content, b'123\xde\x9e')
|
||||||
|
|
||||||
# with Content-Encoding header
|
|
||||||
r = HttpResponse()
|
|
||||||
r['Content-Encoding'] = 'winning'
|
|
||||||
r.content = [b'abc', b'def']
|
|
||||||
self.assertEqual(r.content, b'abcdef')
|
|
||||||
self.assertRaises(TypeError if six.PY3 else UnicodeEncodeError,
|
|
||||||
setattr, r, 'content', ['\u079e'])
|
|
||||||
|
|
||||||
# .content can safely be accessed multiple times.
|
# .content can safely be accessed multiple times.
|
||||||
r = HttpResponse(iter(['hello', 'world']))
|
r = HttpResponse(iter(['hello', 'world']))
|
||||||
self.assertEqual(r.content, r.content)
|
self.assertEqual(r.content, r.content)
|
||||||
|
@ -512,6 +504,14 @@ class StreamingHttpResponseTests(TestCase):
|
||||||
self.assertEqual(list(r), [b'abc', b'def'])
|
self.assertEqual(list(r), [b'abc', b'def'])
|
||||||
self.assertEqual(list(r), [])
|
self.assertEqual(list(r), [])
|
||||||
|
|
||||||
|
# iterating over Unicode strings still yields bytestring chunks.
|
||||||
|
r.streaming_content = iter(['hello', 'café'])
|
||||||
|
chunks = list(r)
|
||||||
|
# '\xc3\xa9' == unichr(233).encode('utf-8')
|
||||||
|
self.assertEqual(chunks, [b'hello', b'caf\xc3\xa9'])
|
||||||
|
for chunk in chunks:
|
||||||
|
self.assertIsInstance(chunk, six.binary_type)
|
||||||
|
|
||||||
# streaming responses don't have a `content` attribute.
|
# streaming responses don't have a `content` attribute.
|
||||||
self.assertFalse(hasattr(r, 'content'))
|
self.assertFalse(hasattr(r, 'content'))
|
||||||
|
|
||||||
|
|
|
@ -599,6 +599,7 @@ class GZipMiddlewareTest(TestCase):
|
||||||
compressible_string = b'a' * 500
|
compressible_string = b'a' * 500
|
||||||
uncompressible_string = b''.join(six.int2byte(random.randint(0, 255)) for _ in range(500))
|
uncompressible_string = b''.join(six.int2byte(random.randint(0, 255)) for _ in range(500))
|
||||||
sequence = [b'a' * 500, b'b' * 200, b'a' * 300]
|
sequence = [b'a' * 500, b'b' * 200, b'a' * 300]
|
||||||
|
sequence_unicode = ['a' * 500, 'é' * 200, 'a' * 300]
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.req = RequestFactory().get('/')
|
self.req = RequestFactory().get('/')
|
||||||
|
@ -610,6 +611,8 @@ class GZipMiddlewareTest(TestCase):
|
||||||
self.resp['Content-Type'] = 'text/html; charset=UTF-8'
|
self.resp['Content-Type'] = 'text/html; charset=UTF-8'
|
||||||
self.stream_resp = StreamingHttpResponse(self.sequence)
|
self.stream_resp = StreamingHttpResponse(self.sequence)
|
||||||
self.stream_resp['Content-Type'] = 'text/html; charset=UTF-8'
|
self.stream_resp['Content-Type'] = 'text/html; charset=UTF-8'
|
||||||
|
self.stream_resp_unicode = StreamingHttpResponse(self.sequence_unicode)
|
||||||
|
self.stream_resp_unicode['Content-Type'] = 'text/html; charset=UTF-8'
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def decompress(gzipped_string):
|
def decompress(gzipped_string):
|
||||||
|
@ -633,6 +636,15 @@ class GZipMiddlewareTest(TestCase):
|
||||||
self.assertEqual(r.get('Content-Encoding'), 'gzip')
|
self.assertEqual(r.get('Content-Encoding'), 'gzip')
|
||||||
self.assertFalse(r.has_header('Content-Length'))
|
self.assertFalse(r.has_header('Content-Length'))
|
||||||
|
|
||||||
|
def test_compress_streaming_response_unicode(self):
|
||||||
|
"""
|
||||||
|
Tests that compression is performed on responses with streaming Unicode content.
|
||||||
|
"""
|
||||||
|
r = GZipMiddleware().process_response(self.req, self.stream_resp_unicode)
|
||||||
|
self.assertEqual(self.decompress(b''.join(r)), b''.join(x.encode('utf-8') for x in self.sequence_unicode))
|
||||||
|
self.assertEqual(r.get('Content-Encoding'), 'gzip')
|
||||||
|
self.assertFalse(r.has_header('Content-Length'))
|
||||||
|
|
||||||
def test_compress_file_response(self):
|
def test_compress_file_response(self):
|
||||||
"""
|
"""
|
||||||
Tests that compression is performed on FileResponse.
|
Tests that compression is performed on FileResponse.
|
||||||
|
|
Loading…
Reference in New Issue