From d88c24f436ae23ff5aec4ea06c0c27499a71e074 Mon Sep 17 00:00:00 2001
From: Matthew Somerville <matthew-github@dracos.co.uk>
Date: Wed, 28 Jan 2015 21:43:23 +0000
Subject: [PATCH] [1.8.x] Fixed #24240 -- Allowed GZipping a Unicode
 StreamingHttpResponse

make_bytes() assumed that if the Content-Encoding header is set, then
everything had already been dealt with bytes-wise, but in a streaming
situation this was not necessarily the case.

make_bytes() is only called when necessary when working with a
StreamingHttpResponse iterable, but by that point the middleware has
added the Content-Encoding header and thus make_bytes() tried to call
bytes(value) (and dies). If it had been a normal HttpResponse,
make_bytes() would have been called when the content was set, well
before the middleware set the Content-Encoding header.

This commit removes the special casing when Content-Encoding is set,
allowing unicode strings to be encoded during the iteration before they
are e.g. gzipped. This behaviour was added a long time ago for #4969 and
it doesn't appear to be necessary any more, as everything is correctly
made into bytes at the appropriate places.

Two new tests, to show that supplying non-ASCII characters to a
StreamingHttpResponse works fine normally, and when passed through the
GZip middleware (the latter dies without the change to make_bytes()).
Removes the test with a nonsense Content-Encoding and Unicode input - if
this were to happen, it can still be encoded as bytes fine.

Backport of 250aa7c39b from master.
---
 AUTHORS                     |  1 +
 django/http/response.py     |  4 ----
 tests/httpwrappers/tests.py | 16 ++++++++--------
 tests/middleware/tests.py   | 12 ++++++++++++
 4 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index b4863849dc..bfc0107f64 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -452,6 +452,7 @@ answer newbie questions, and generally made Django that much better:
     Matt Deacalion Stevens <matt@dirtymonkey.co.uk>
     Matt Dennenbaum
     Matthew Flanagan <http://wadofstuff.blogspot.com>
+    Matthew Somerville <matthew-github@dracos.co.uk>
     Matthew Tretter <m@tthewwithanm.com>
     Matthias Kestenholz <mk@406.ch>
     Matthias Pronk <django@masida.nl>
diff --git a/django/http/response.py b/django/http/response.py
index 560e2ac7a5..eedd6c2c72 100644
--- a/django/http/response.py
+++ b/django/http/response.py
@@ -282,10 +282,6 @@ class HttpResponseBase(six.Iterator):
         # an instance of a subclass, this function returns `bytes(value)`.
         # This doesn't make a copy when `value` already contains bytes.
 
-        # If content is already encoded (eg. gzip), assume bytes.
-        if self.has_header('Content-Encoding'):
-            return bytes(value)
-
         # Handle string types -- we can't rely on force_bytes here because:
         # - under Python 3 it attempts str conversion first
         # - when self._charset != 'utf-8' it re-encodes the content
diff --git a/tests/httpwrappers/tests.py b/tests/httpwrappers/tests.py
index cc324b76cd..14ad306c65 100644
--- a/tests/httpwrappers/tests.py
+++ b/tests/httpwrappers/tests.py
@@ -348,14 +348,6 @@ class HttpResponseTests(unittest.TestCase):
         #'\xde\x9e' == unichr(1950).encode('utf-8')
         self.assertEqual(r.content, b'123\xde\x9e')
 
-        # with Content-Encoding header
-        r = HttpResponse()
-        r['Content-Encoding'] = 'winning'
-        r.content = [b'abc', b'def']
-        self.assertEqual(r.content, b'abcdef')
-        self.assertRaises(TypeError if six.PY3 else UnicodeEncodeError,
-                          setattr, r, 'content', ['\u079e'])
-
         # .content can safely be accessed multiple times.
         r = HttpResponse(iter(['hello', 'world']))
         self.assertEqual(r.content, r.content)
@@ -512,6 +504,14 @@ class StreamingHttpResponseTests(TestCase):
         self.assertEqual(list(r), [b'abc', b'def'])
         self.assertEqual(list(r), [])
 
+        # iterating over Unicode strings still yields bytestring chunks.
+        r.streaming_content = iter(['hello', 'café'])
+        chunks = list(r)
+        # '\xc3\xa9' == unichr(233).encode('utf-8')
+        self.assertEqual(chunks, [b'hello', b'caf\xc3\xa9'])
+        for chunk in chunks:
+            self.assertIsInstance(chunk, six.binary_type)
+
         # streaming responses don't have a `content` attribute.
         self.assertFalse(hasattr(r, 'content'))
 
diff --git a/tests/middleware/tests.py b/tests/middleware/tests.py
index 2b5b50f9e5..492f7a055a 100644
--- a/tests/middleware/tests.py
+++ b/tests/middleware/tests.py
@@ -590,6 +590,7 @@ class GZipMiddlewareTest(TestCase):
     compressible_string = b'a' * 500
     uncompressible_string = b''.join(six.int2byte(random.randint(0, 255)) for _ in range(500))
     sequence = [b'a' * 500, b'b' * 200, b'a' * 300]
+    sequence_unicode = ['a' * 500, 'é' * 200, 'a' * 300]
 
     def setUp(self):
         self.req = RequestFactory().get('/')
@@ -601,6 +602,8 @@ class GZipMiddlewareTest(TestCase):
         self.resp['Content-Type'] = 'text/html; charset=UTF-8'
         self.stream_resp = StreamingHttpResponse(self.sequence)
         self.stream_resp['Content-Type'] = 'text/html; charset=UTF-8'
+        self.stream_resp_unicode = StreamingHttpResponse(self.sequence_unicode)
+        self.stream_resp_unicode['Content-Type'] = 'text/html; charset=UTF-8'
 
     @staticmethod
     def decompress(gzipped_string):
@@ -624,6 +627,15 @@ class GZipMiddlewareTest(TestCase):
         self.assertEqual(r.get('Content-Encoding'), 'gzip')
         self.assertFalse(r.has_header('Content-Length'))
 
+    def test_compress_streaming_response_unicode(self):
+        """
+        Tests that compression is performed on responses with streaming Unicode content.
+        """
+        r = GZipMiddleware().process_response(self.req, self.stream_resp_unicode)
+        self.assertEqual(self.decompress(b''.join(r)), b''.join(x.encode('utf-8') for x in self.sequence_unicode))
+        self.assertEqual(r.get('Content-Encoding'), 'gzip')
+        self.assertFalse(r.has_header('Content-Length'))
+
     def test_compress_file_response(self):
         """
         Tests that compression is performed on FileResponse.