Fixed #10190 -- Made HttpResponse charset customizable.

Thanks to Simon Charette, Aymeric Augustin, and Tim Graham
for reviews and contributions.
This commit is contained in:
Unai Zalakain 2013-11-16 18:54:12 +01:00 committed by Tim Graham
parent 9d6551204e
commit 5f2542f12a
7 changed files with 125 additions and 18 deletions

View File

@ -2,6 +2,7 @@ from __future__ import unicode_literals
import datetime
import json
import re
import sys
import time
from email.header import Header
@ -83,6 +84,9 @@ REASON_PHRASES = {
}
_charset_from_content_type_re = re.compile(r';\s*charset=(?P<charset>[^\s;]+)', re.I)
class BadHeaderError(ValueError):
pass
@ -98,19 +102,15 @@ class HttpResponseBase(six.Iterator):
status_code = 200
reason_phrase = None # Use default reason phrase for status code.
def __init__(self, content_type=None, status=None, reason=None):
def __init__(self, content_type=None, status=None, reason=None, charset=None):
# _headers is a mapping of the lower-case name to the original case of
# the header (required for working with legacy systems) and the header
# value. Both the name of the header and its value are ASCII strings.
self._headers = {}
self._charset = settings.DEFAULT_CHARSET
self._closable_objects = []
# This parameter is set by the handler. It's necessary to preserve the
# historical behavior of request_finished.
self._handler_class = None
if not content_type:
content_type = "%s; charset=%s" % (settings.DEFAULT_CONTENT_TYPE,
self._charset)
self.cookies = SimpleCookie()
if status is not None:
self.status_code = status
@ -119,8 +119,27 @@ class HttpResponseBase(six.Iterator):
elif self.reason_phrase is None:
self.reason_phrase = REASON_PHRASES.get(self.status_code,
'UNKNOWN STATUS CODE')
self._charset = charset
if content_type is None:
content_type = '%s; charset=%s' % (settings.DEFAULT_CONTENT_TYPE,
self.charset)
self['Content-Type'] = content_type
@property
def charset(self):
if self._charset is not None:
return self._charset
content_type = self.get('Content-Type', '')
matched = _charset_from_content_type_re.search(content_type)
if matched:
# Extract the charset and strip its double quotes
return matched.group('charset').replace('"', '')
return settings.DEFAULT_CHARSET
@charset.setter
def charset(self, value):
self._charset = value
def serialize_headers(self):
"""HTTP headers as a bytestring."""
def to_bytes(val, encoding):
@ -278,10 +297,10 @@ class HttpResponseBase(six.Iterator):
if isinstance(value, bytes):
return bytes(value)
if isinstance(value, six.text_type):
return bytes(value.encode(self._charset))
return bytes(value.encode(self.charset))
# Handle non-string types (#16494)
return force_bytes(value, self._charset)
return force_bytes(value, self.charset)
# These methods partially implement the file-like object interface.
# See http://docs.python.org/lib/bltin-file-objects.html

View File

@ -10,7 +10,8 @@ class ContentNotRenderedError(Exception):
class SimpleTemplateResponse(HttpResponse):
rendering_attrs = ['template_name', 'context_data', '_post_render_callbacks']
def __init__(self, template, context=None, content_type=None, status=None):
def __init__(self, template, context=None, content_type=None, status=None,
charset=None):
# It would seem obvious to call these next two members 'template' and
# 'context', but those names are reserved as part of the test Client
# API. To avoid the name collision, we use tricky-to-debug problems
@ -22,7 +23,7 @@ class SimpleTemplateResponse(HttpResponse):
# content argument doesn't make sense here because it will be replaced
# with rendered template so we always pass empty string in order to
# prevent errors and provide shorter signature.
super(SimpleTemplateResponse, self).__init__('', content_type, status)
super(SimpleTemplateResponse, self).__init__('', content_type, status, charset)
# _is_rendered tracks whether the template and context has been baked
# into a final response.
@ -136,7 +137,7 @@ class TemplateResponse(SimpleTemplateResponse):
rendering_attrs = SimpleTemplateResponse.rendering_attrs + ['_request', '_current_app']
def __init__(self, request, template, context=None, content_type=None,
status=None, current_app=None):
status=None, current_app=None, charset=None):
# self.request gets over-written by django.test.client.Client - and
# unlike context_data and template_name the _request should not
# be considered part of the public API.
@ -145,7 +146,7 @@ class TemplateResponse(SimpleTemplateResponse):
# having to avoid needing to create the RequestContext directly
self._current_app = current_app
super(TemplateResponse, self).__init__(
template, context, content_type, status)
template, context, content_type, status, charset)
def resolve_context(self, context):
"""Convert context data into a full RequestContext object

View File

@ -327,8 +327,8 @@ class SimpleTestCase(unittest.TestCase):
else:
content = response.content
if not isinstance(text, bytes) or html:
text = force_text(text, encoding=response._charset)
content = content.decode(response._charset)
text = force_text(text, encoding=response.charset)
content = content.decode(response.charset)
text_repr = "'%s'" % text
else:
text_repr = repr(text)

View File

@ -627,6 +627,15 @@ Attributes
A bytestring representing the content, encoded from a Unicode
object if necessary.
.. attribute:: HttpResponse.charset
.. versionadded:: 1.8
A string denoting the charset in which the response will be encoded. If not
given at ``HttpResponse`` instantiation time, it will be extracted from
``content_type`` and if that is unsuccessful, the
:setting:`DEFAULT_CHARSET` setting will be used.
.. attribute:: HttpResponse.status_code
The `HTTP status code`_ for the response.
@ -645,7 +654,7 @@ Attributes
Methods
-------
.. method:: HttpResponse.__init__(content='', content_type=None, status=200, reason=None)
.. method:: HttpResponse.__init__(content='', content_type=None, status=200, reason=None, charset=None)
Instantiates an ``HttpResponse`` object with the given page content and
content type.
@ -666,6 +675,14 @@ Methods
``reason`` is the HTTP response phrase. If not provided, a default phrase
will be used.
``charset`` is the charset in which the response will be encoded. If not
given it will be extracted from ``content_type``, and if that
is unsuccessful, the :setting:`DEFAULT_CHARSET` setting will be used.
.. versionadded:: 1.8
The ``charset`` parameter was added.
.. method:: HttpResponse.__setitem__(header, value)
Sets the given header name to the given value. Both ``header`` and

View File

@ -56,7 +56,7 @@ Attributes
Methods
-------
.. method:: SimpleTemplateResponse.__init__(template, context=None, content_type=None, status=None)
.. method:: SimpleTemplateResponse.__init__(template, context=None, content_type=None, status=None, charset=None)
Instantiates a
:class:`~django.template.response.SimpleTemplateResponse` object
@ -80,6 +80,15 @@ Methods
``content_type`` is specified, then its value is used. Otherwise,
:setting:`DEFAULT_CONTENT_TYPE` is used.
``charset``
The charset in which the response will be encoded. If not given it will
be extracted from ``content_type``, and if that is unsuccessful, the
:setting:`DEFAULT_CHARSET` setting will be used.
.. versionadded:: 1.8
The ``charset`` parameter was added.
.. method:: SimpleTemplateResponse.resolve_context(context)
Converts context data into a context instance that can be used for
@ -140,7 +149,7 @@ TemplateResponse objects
Methods
-------
.. method:: TemplateResponse.__init__(request, template, context=None, content_type=None, status=None, current_app=None)
.. method:: TemplateResponse.__init__(request, template, context=None, content_type=None, status=None, current_app=None, charset=None)
Instantiates an ``TemplateResponse`` object with the given
template, context, MIME type and HTTP status.
@ -173,6 +182,14 @@ Methods
:ref:`namespaced URL resolution strategy <topics-http-reversing-url-namespaces>`
for more information.
``charset``
The charset in which the response will be encoded. If not given it will
be extracted from ``content_type``, and if that is unsuccessful, the
:setting:`DEFAULT_CHARSET` setting will be used.
.. versionadded:: 1.8
The ``charset`` parameter was added.
The rendering process
=====================

View File

@ -278,6 +278,9 @@ Requests and Responses
This brings this class into line with the documentation and with
``WSGIRequest``.
* The :attr:`HttpResponse.charset <django.http.HttpResponse.charset>` attribute
was added.
Tests
^^^^^

View File

@ -1,8 +1,16 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.conf import settings
from django.http import HttpResponse
import unittest
from django.test import SimpleTestCase
UTF8 = 'utf-8'
ISO88591 = 'iso-8859-1'
class HttpResponseTests(unittest.TestCase):
class HttpResponseTests(SimpleTestCase):
def test_status_code(self):
resp = HttpResponse(status=418)
@ -14,3 +22,45 @@ class HttpResponseTests(unittest.TestCase):
resp = HttpResponse(status=814, reason=reason)
self.assertEqual(resp.status_code, 814)
self.assertEqual(resp.reason_phrase, reason)
def test_charset_detection(self):
""" HttpResponse should parse charset from content_type."""
response = HttpResponse('ok')
self.assertEqual(response.charset, settings.DEFAULT_CHARSET)
response = HttpResponse(charset=ISO88591)
self.assertEqual(response.charset, ISO88591)
self.assertEqual(response['Content-Type'], 'text/html; charset=%s' % ISO88591)
response = HttpResponse(content_type='text/plain; charset=%s' % UTF8, charset=ISO88591)
self.assertEqual(response.charset, ISO88591)
response = HttpResponse(content_type='text/plain; charset=%s' % ISO88591)
self.assertEqual(response.charset, ISO88591)
response = HttpResponse(content_type='text/plain; charset="%s"' % ISO88591)
self.assertEqual(response.charset, ISO88591)
response = HttpResponse(content_type='text/plain; charset=')
self.assertEqual(response.charset, settings.DEFAULT_CHARSET)
response = HttpResponse(content_type='text/plain')
self.assertEqual(response.charset, settings.DEFAULT_CHARSET)
def test_response_content_charset(self):
"""HttpResponse should encode based on charset."""
content = "Café :)"
utf8_content = content.encode(UTF8)
iso_content = content.encode(ISO88591)
response = HttpResponse(utf8_content)
self.assertContains(response, utf8_content)
response = HttpResponse(iso_content, content_type='text/plain; charset=%s' % ISO88591)
self.assertContains(response, iso_content)
response = HttpResponse(iso_content)
self.assertContains(response, iso_content)
response = HttpResponse(iso_content, content_type='text/plain')
self.assertContains(response, iso_content)