From d88c24f436ae23ff5aec4ea06c0c27499a71e074 Mon Sep 17 00:00:00 2001 From: Matthew Somerville Date: Wed, 28 Jan 2015 21:43:23 +0000 Subject: [PATCH] [1.8.x] Fixed #24240 -- Allowed GZipping a Unicode StreamingHttpResponse make_bytes() assumed that if the Content-Encoding header is set, then everything had already been dealt with bytes-wise, but in a streaming situation this was not necessarily the case. make_bytes() is only called when necessary when working with a StreamingHttpResponse iterable, but by that point the middleware has added the Content-Encoding header and thus make_bytes() tried to call bytes(value) (and dies). If it had been a normal HttpResponse, make_bytes() would have been called when the content was set, well before the middleware set the Content-Encoding header. This commit removes the special casing when Content-Encoding is set, allowing unicode strings to be encoded during the iteration before they are e.g. gzipped. This behaviour was added a long time ago for #4969 and it doesn't appear to be necessary any more, as everything is correctly made into bytes at the appropriate places. Two new tests, to show that supplying non-ASCII characters to a StreamingHttpResponse works fine normally, and when passed through the GZip middleware (the latter dies without the change to make_bytes()). Removes the test with a nonsense Content-Encoding and Unicode input - if this were to happen, it can still be encoded as bytes fine. Backport of 250aa7c39b from master. --- AUTHORS | 1 + django/http/response.py | 4 ---- tests/httpwrappers/tests.py | 16 ++++++++-------- tests/middleware/tests.py | 12 ++++++++++++ 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/AUTHORS b/AUTHORS index b4863849dc..bfc0107f64 100644 --- a/AUTHORS +++ b/AUTHORS @@ -452,6 +452,7 @@ answer newbie questions, and generally made Django that much better: Matt Deacalion Stevens Matt Dennenbaum Matthew Flanagan + Matthew Somerville Matthew Tretter Matthias Kestenholz Matthias Pronk diff --git a/django/http/response.py b/django/http/response.py index 560e2ac7a5..eedd6c2c72 100644 --- a/django/http/response.py +++ b/django/http/response.py @@ -282,10 +282,6 @@ class HttpResponseBase(six.Iterator): # an instance of a subclass, this function returns `bytes(value)`. # This doesn't make a copy when `value` already contains bytes. - # If content is already encoded (eg. gzip), assume bytes. - if self.has_header('Content-Encoding'): - return bytes(value) - # Handle string types -- we can't rely on force_bytes here because: # - under Python 3 it attempts str conversion first # - when self._charset != 'utf-8' it re-encodes the content diff --git a/tests/httpwrappers/tests.py b/tests/httpwrappers/tests.py index cc324b76cd..14ad306c65 100644 --- a/tests/httpwrappers/tests.py +++ b/tests/httpwrappers/tests.py @@ -348,14 +348,6 @@ class HttpResponseTests(unittest.TestCase): #'\xde\x9e' == unichr(1950).encode('utf-8') self.assertEqual(r.content, b'123\xde\x9e') - # with Content-Encoding header - r = HttpResponse() - r['Content-Encoding'] = 'winning' - r.content = [b'abc', b'def'] - self.assertEqual(r.content, b'abcdef') - self.assertRaises(TypeError if six.PY3 else UnicodeEncodeError, - setattr, r, 'content', ['\u079e']) - # .content can safely be accessed multiple times. r = HttpResponse(iter(['hello', 'world'])) self.assertEqual(r.content, r.content) @@ -512,6 +504,14 @@ class StreamingHttpResponseTests(TestCase): self.assertEqual(list(r), [b'abc', b'def']) self.assertEqual(list(r), []) + # iterating over Unicode strings still yields bytestring chunks. + r.streaming_content = iter(['hello', 'café']) + chunks = list(r) + # '\xc3\xa9' == unichr(233).encode('utf-8') + self.assertEqual(chunks, [b'hello', b'caf\xc3\xa9']) + for chunk in chunks: + self.assertIsInstance(chunk, six.binary_type) + # streaming responses don't have a `content` attribute. self.assertFalse(hasattr(r, 'content')) diff --git a/tests/middleware/tests.py b/tests/middleware/tests.py index 2b5b50f9e5..492f7a055a 100644 --- a/tests/middleware/tests.py +++ b/tests/middleware/tests.py @@ -590,6 +590,7 @@ class GZipMiddlewareTest(TestCase): compressible_string = b'a' * 500 uncompressible_string = b''.join(six.int2byte(random.randint(0, 255)) for _ in range(500)) sequence = [b'a' * 500, b'b' * 200, b'a' * 300] + sequence_unicode = ['a' * 500, 'é' * 200, 'a' * 300] def setUp(self): self.req = RequestFactory().get('/') @@ -601,6 +602,8 @@ class GZipMiddlewareTest(TestCase): self.resp['Content-Type'] = 'text/html; charset=UTF-8' self.stream_resp = StreamingHttpResponse(self.sequence) self.stream_resp['Content-Type'] = 'text/html; charset=UTF-8' + self.stream_resp_unicode = StreamingHttpResponse(self.sequence_unicode) + self.stream_resp_unicode['Content-Type'] = 'text/html; charset=UTF-8' @staticmethod def decompress(gzipped_string): @@ -624,6 +627,15 @@ class GZipMiddlewareTest(TestCase): self.assertEqual(r.get('Content-Encoding'), 'gzip') self.assertFalse(r.has_header('Content-Length')) + def test_compress_streaming_response_unicode(self): + """ + Tests that compression is performed on responses with streaming Unicode content. + """ + r = GZipMiddleware().process_response(self.req, self.stream_resp_unicode) + self.assertEqual(self.decompress(b''.join(r)), b''.join(x.encode('utf-8') for x in self.sequence_unicode)) + self.assertEqual(r.get('Content-Encoding'), 'gzip') + self.assertFalse(r.has_header('Content-Length')) + def test_compress_file_response(self): """ Tests that compression is performed on FileResponse.