From 5f2542f12a90cfcfb7be776424ef2f7b200df006 Mon Sep 17 00:00:00 2001 From: Unai Zalakain Date: Sat, 16 Nov 2013 18:54:12 +0100 Subject: [PATCH] Fixed #10190 -- Made HttpResponse charset customizable. Thanks to Simon Charette, Aymeric Augustin, and Tim Graham for reviews and contributions. --- django/http/response.py | 33 ++++++++++++++++----- django/template/response.py | 9 +++--- django/test/testcases.py | 4 +-- docs/ref/request-response.txt | 19 +++++++++++- docs/ref/template-response.txt | 21 +++++++++++-- docs/releases/1.8.txt | 3 ++ tests/responses/tests.py | 54 ++++++++++++++++++++++++++++++++-- 7 files changed, 125 insertions(+), 18 deletions(-) diff --git a/django/http/response.py b/django/http/response.py index 866b3480e7..0cc14d1346 100644 --- a/django/http/response.py +++ b/django/http/response.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import datetime import json +import re import sys import time from email.header import Header @@ -83,6 +84,9 @@ REASON_PHRASES = { } +_charset_from_content_type_re = re.compile(r';\s*charset=(?P[^\s;]+)', re.I) + + class BadHeaderError(ValueError): pass @@ -98,19 +102,15 @@ class HttpResponseBase(six.Iterator): status_code = 200 reason_phrase = None # Use default reason phrase for status code. - def __init__(self, content_type=None, status=None, reason=None): + def __init__(self, content_type=None, status=None, reason=None, charset=None): # _headers is a mapping of the lower-case name to the original case of # the header (required for working with legacy systems) and the header # value. Both the name of the header and its value are ASCII strings. self._headers = {} - self._charset = settings.DEFAULT_CHARSET self._closable_objects = [] # This parameter is set by the handler. It's necessary to preserve the # historical behavior of request_finished. self._handler_class = None - if not content_type: - content_type = "%s; charset=%s" % (settings.DEFAULT_CONTENT_TYPE, - self._charset) self.cookies = SimpleCookie() if status is not None: self.status_code = status @@ -119,8 +119,27 @@ class HttpResponseBase(six.Iterator): elif self.reason_phrase is None: self.reason_phrase = REASON_PHRASES.get(self.status_code, 'UNKNOWN STATUS CODE') + self._charset = charset + if content_type is None: + content_type = '%s; charset=%s' % (settings.DEFAULT_CONTENT_TYPE, + self.charset) self['Content-Type'] = content_type + @property + def charset(self): + if self._charset is not None: + return self._charset + content_type = self.get('Content-Type', '') + matched = _charset_from_content_type_re.search(content_type) + if matched: + # Extract the charset and strip its double quotes + return matched.group('charset').replace('"', '') + return settings.DEFAULT_CHARSET + + @charset.setter + def charset(self, value): + self._charset = value + def serialize_headers(self): """HTTP headers as a bytestring.""" def to_bytes(val, encoding): @@ -278,10 +297,10 @@ class HttpResponseBase(six.Iterator): if isinstance(value, bytes): return bytes(value) if isinstance(value, six.text_type): - return bytes(value.encode(self._charset)) + return bytes(value.encode(self.charset)) # Handle non-string types (#16494) - return force_bytes(value, self._charset) + return force_bytes(value, self.charset) # These methods partially implement the file-like object interface. # See http://docs.python.org/lib/bltin-file-objects.html diff --git a/django/template/response.py b/django/template/response.py index f673c5f46a..f65c3adb8b 100644 --- a/django/template/response.py +++ b/django/template/response.py @@ -10,7 +10,8 @@ class ContentNotRenderedError(Exception): class SimpleTemplateResponse(HttpResponse): rendering_attrs = ['template_name', 'context_data', '_post_render_callbacks'] - def __init__(self, template, context=None, content_type=None, status=None): + def __init__(self, template, context=None, content_type=None, status=None, + charset=None): # It would seem obvious to call these next two members 'template' and # 'context', but those names are reserved as part of the test Client # API. To avoid the name collision, we use tricky-to-debug problems @@ -22,7 +23,7 @@ class SimpleTemplateResponse(HttpResponse): # content argument doesn't make sense here because it will be replaced # with rendered template so we always pass empty string in order to # prevent errors and provide shorter signature. - super(SimpleTemplateResponse, self).__init__('', content_type, status) + super(SimpleTemplateResponse, self).__init__('', content_type, status, charset) # _is_rendered tracks whether the template and context has been baked # into a final response. @@ -136,7 +137,7 @@ class TemplateResponse(SimpleTemplateResponse): rendering_attrs = SimpleTemplateResponse.rendering_attrs + ['_request', '_current_app'] def __init__(self, request, template, context=None, content_type=None, - status=None, current_app=None): + status=None, current_app=None, charset=None): # self.request gets over-written by django.test.client.Client - and # unlike context_data and template_name the _request should not # be considered part of the public API. @@ -145,7 +146,7 @@ class TemplateResponse(SimpleTemplateResponse): # having to avoid needing to create the RequestContext directly self._current_app = current_app super(TemplateResponse, self).__init__( - template, context, content_type, status) + template, context, content_type, status, charset) def resolve_context(self, context): """Convert context data into a full RequestContext object diff --git a/django/test/testcases.py b/django/test/testcases.py index d46f672391..e982706784 100644 --- a/django/test/testcases.py +++ b/django/test/testcases.py @@ -327,8 +327,8 @@ class SimpleTestCase(unittest.TestCase): else: content = response.content if not isinstance(text, bytes) or html: - text = force_text(text, encoding=response._charset) - content = content.decode(response._charset) + text = force_text(text, encoding=response.charset) + content = content.decode(response.charset) text_repr = "'%s'" % text else: text_repr = repr(text) diff --git a/docs/ref/request-response.txt b/docs/ref/request-response.txt index ba6481107d..339789b2b0 100644 --- a/docs/ref/request-response.txt +++ b/docs/ref/request-response.txt @@ -627,6 +627,15 @@ Attributes A bytestring representing the content, encoded from a Unicode object if necessary. +.. attribute:: HttpResponse.charset + + .. versionadded:: 1.8 + + A string denoting the charset in which the response will be encoded. If not + given at ``HttpResponse`` instantiation time, it will be extracted from + ``content_type`` and if that is unsuccessful, the + :setting:`DEFAULT_CHARSET` setting will be used. + .. attribute:: HttpResponse.status_code The `HTTP status code`_ for the response. @@ -645,7 +654,7 @@ Attributes Methods ------- -.. method:: HttpResponse.__init__(content='', content_type=None, status=200, reason=None) +.. method:: HttpResponse.__init__(content='', content_type=None, status=200, reason=None, charset=None) Instantiates an ``HttpResponse`` object with the given page content and content type. @@ -666,6 +675,14 @@ Methods ``reason`` is the HTTP response phrase. If not provided, a default phrase will be used. + ``charset`` is the charset in which the response will be encoded. If not + given it will be extracted from ``content_type``, and if that + is unsuccessful, the :setting:`DEFAULT_CHARSET` setting will be used. + + .. versionadded:: 1.8 + + The ``charset`` parameter was added. + .. method:: HttpResponse.__setitem__(header, value) Sets the given header name to the given value. Both ``header`` and diff --git a/docs/ref/template-response.txt b/docs/ref/template-response.txt index 36b710426c..7f0ddcf58d 100644 --- a/docs/ref/template-response.txt +++ b/docs/ref/template-response.txt @@ -56,7 +56,7 @@ Attributes Methods ------- -.. method:: SimpleTemplateResponse.__init__(template, context=None, content_type=None, status=None) +.. method:: SimpleTemplateResponse.__init__(template, context=None, content_type=None, status=None, charset=None) Instantiates a :class:`~django.template.response.SimpleTemplateResponse` object @@ -80,6 +80,15 @@ Methods ``content_type`` is specified, then its value is used. Otherwise, :setting:`DEFAULT_CONTENT_TYPE` is used. + ``charset`` + The charset in which the response will be encoded. If not given it will + be extracted from ``content_type``, and if that is unsuccessful, the + :setting:`DEFAULT_CHARSET` setting will be used. + + .. versionadded:: 1.8 + + The ``charset`` parameter was added. + .. method:: SimpleTemplateResponse.resolve_context(context) Converts context data into a context instance that can be used for @@ -140,7 +149,7 @@ TemplateResponse objects Methods ------- -.. method:: TemplateResponse.__init__(request, template, context=None, content_type=None, status=None, current_app=None) +.. method:: TemplateResponse.__init__(request, template, context=None, content_type=None, status=None, current_app=None, charset=None) Instantiates an ``TemplateResponse`` object with the given template, context, MIME type and HTTP status. @@ -173,6 +182,14 @@ Methods :ref:`namespaced URL resolution strategy ` for more information. + ``charset`` + The charset in which the response will be encoded. If not given it will + be extracted from ``content_type``, and if that is unsuccessful, the + :setting:`DEFAULT_CHARSET` setting will be used. + + .. versionadded:: 1.8 + + The ``charset`` parameter was added. The rendering process ===================== diff --git a/docs/releases/1.8.txt b/docs/releases/1.8.txt index 1f34623811..935893265b 100644 --- a/docs/releases/1.8.txt +++ b/docs/releases/1.8.txt @@ -278,6 +278,9 @@ Requests and Responses This brings this class into line with the documentation and with ``WSGIRequest``. +* The :attr:`HttpResponse.charset ` attribute + was added. + Tests ^^^^^ diff --git a/tests/responses/tests.py b/tests/responses/tests.py index afdd4220fb..e80e466a56 100644 --- a/tests/responses/tests.py +++ b/tests/responses/tests.py @@ -1,8 +1,16 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals + +from django.conf import settings from django.http import HttpResponse -import unittest +from django.test import SimpleTestCase + +UTF8 = 'utf-8' +ISO88591 = 'iso-8859-1' -class HttpResponseTests(unittest.TestCase): +class HttpResponseTests(SimpleTestCase): def test_status_code(self): resp = HttpResponse(status=418) @@ -14,3 +22,45 @@ class HttpResponseTests(unittest.TestCase): resp = HttpResponse(status=814, reason=reason) self.assertEqual(resp.status_code, 814) self.assertEqual(resp.reason_phrase, reason) + + def test_charset_detection(self): + """ HttpResponse should parse charset from content_type.""" + response = HttpResponse('ok') + self.assertEqual(response.charset, settings.DEFAULT_CHARSET) + + response = HttpResponse(charset=ISO88591) + self.assertEqual(response.charset, ISO88591) + self.assertEqual(response['Content-Type'], 'text/html; charset=%s' % ISO88591) + + response = HttpResponse(content_type='text/plain; charset=%s' % UTF8, charset=ISO88591) + self.assertEqual(response.charset, ISO88591) + + response = HttpResponse(content_type='text/plain; charset=%s' % ISO88591) + self.assertEqual(response.charset, ISO88591) + + response = HttpResponse(content_type='text/plain; charset="%s"' % ISO88591) + self.assertEqual(response.charset, ISO88591) + + response = HttpResponse(content_type='text/plain; charset=') + self.assertEqual(response.charset, settings.DEFAULT_CHARSET) + + response = HttpResponse(content_type='text/plain') + self.assertEqual(response.charset, settings.DEFAULT_CHARSET) + + def test_response_content_charset(self): + """HttpResponse should encode based on charset.""" + content = "Café :)" + utf8_content = content.encode(UTF8) + iso_content = content.encode(ISO88591) + + response = HttpResponse(utf8_content) + self.assertContains(response, utf8_content) + + response = HttpResponse(iso_content, content_type='text/plain; charset=%s' % ISO88591) + self.assertContains(response, iso_content) + + response = HttpResponse(iso_content) + self.assertContains(response, iso_content) + + response = HttpResponse(iso_content, content_type='text/plain') + self.assertContains(response, iso_content)