From 68fc21b3784aa34c7ba5515ab02ef0c7b6ee856d Mon Sep 17 00:00:00 2001 From: Hasan Ramezani Date: Thu, 23 Apr 2020 22:14:32 +0200 Subject: [PATCH] Fixed #29249 -- Made JSON and YAML serializers use Unicode by default. --- django/core/serializers/json.py | 1 + django/core/serializers/pyyaml.py | 1 + docs/releases/3.1.txt | 5 +++++ docs/topics/serialization.txt | 10 ++++++++++ tests/serializers/test_xml.py | 8 -------- tests/serializers/tests.py | 9 ++++++++- 6 files changed, 25 insertions(+), 9 deletions(-) diff --git a/django/core/serializers/json.py b/django/core/serializers/json.py index 5d35440a6c..886e8f894c 100644 --- a/django/core/serializers/json.py +++ b/django/core/serializers/json.py @@ -29,6 +29,7 @@ class Serializer(PythonSerializer): # Prevent trailing spaces self.json_kwargs['separators'] = (',', ': ') self.json_kwargs.setdefault('cls', DjangoJSONEncoder) + self.json_kwargs.setdefault('ensure_ascii', False) def start_serialization(self): self._init_options() diff --git a/django/core/serializers/pyyaml.py b/django/core/serializers/pyyaml.py index 778c933584..8f89a633c8 100644 --- a/django/core/serializers/pyyaml.py +++ b/django/core/serializers/pyyaml.py @@ -57,6 +57,7 @@ class Serializer(PythonSerializer): super().handle_field(obj, field) def end_serialization(self): + self.options.setdefault('allow_unicode', True) yaml.dump(self.objects, self.stream, Dumper=DjangoSafeDumper, **self.options) def getvalue(self): diff --git a/docs/releases/3.1.txt b/docs/releases/3.1.txt index fbefef589f..16cb9c4e6e 100644 --- a/docs/releases/3.1.txt +++ b/docs/releases/3.1.txt @@ -668,6 +668,11 @@ Miscellaneous * The undocumented ``version`` parameter to the :class:`~django.contrib.gis.db.models.functions.AsKML` function is removed. +* :ref:`JSON and YAML serializers `, used by + :djadmin:`dumpdata`, now dump all data with Unicode by default. If you need + the previous behavior, pass ``ensure_ascii=True`` to JSON serializer, or + ``allow_unicode=False`` to YAML serializer. + .. _deprecated-features-3.1: Features deprecated in 3.1 diff --git a/docs/topics/serialization.txt b/docs/topics/serialization.txt index 93af47ef28..00322ac64b 100644 --- a/docs/topics/serialization.txt +++ b/docs/topics/serialization.txt @@ -274,6 +274,11 @@ function:: Also note that GeoDjango provides a :doc:`customized GeoJSON serializer `. +.. versionchanged:: 3.1 + + All data is now dumped with Unicode. If you need the previous behavior, + pass ``ensure_ascii=True`` to the ``serializers.serialize()`` function. + ``DjangoJSONEncoder`` ~~~~~~~~~~~~~~~~~~~~~ @@ -315,6 +320,11 @@ again a mapping with the key being name of the field and the value the value:: Referential fields are again represented by the PK or sequence of PKs. +.. versionchanged:: 3.1 + + All data is now dumped with Unicode. If you need the previous behavior, + pass ``allow_unicode=False`` to the ``serializers.serialize()`` function. + .. _topics-serialization-natural-keys: Natural keys diff --git a/tests/serializers/test_xml.py b/tests/serializers/test_xml.py index 4bbfcf18b2..b11cfdd864 100644 --- a/tests/serializers/test_xml.py +++ b/tests/serializers/test_xml.py @@ -4,7 +4,6 @@ from django.core import serializers from django.core.serializers.xml_serializer import DTDForbidden from django.test import TestCase, TransactionTestCase -from .models import Author from .tests import SerializersTestBase, SerializersTransactionTestBase @@ -87,13 +86,6 @@ class XmlSerializerTestCase(SerializersTestBase, TestCase): with self.assertRaises(DTDForbidden): next(serializers.deserialize('xml', xml)) - def test_unicode_serialization(self): - unicode_name = 'יוניקוד' - data = serializers.serialize('xml', [Author(name=unicode_name)]) - self.assertIn(unicode_name, data) - objs = list(serializers.deserialize('xml', data)) - self.assertEqual(objs[0].object.name, unicode_name) - class XmlSerializerTransactionTestCase(SerializersTransactionTestBase, TransactionTestCase): serializer_name = "xml" diff --git a/tests/serializers/tests.py b/tests/serializers/tests.py index 863ee00e89..9d1be81146 100644 --- a/tests/serializers/tests.py +++ b/tests/serializers/tests.py @@ -202,7 +202,7 @@ class SerializersTestBase: for field_name in valid_fields: self.assertTrue(self._get_field_values(serial_str, field_name)) - def test_serialize_unicode(self): + def test_serialize_unicode_roundtrip(self): """Unicode makes the roundtrip intact""" actor_name = "Za\u017c\u00f3\u0142\u0107" movie_title = 'G\u0119\u015bl\u0105 ja\u017a\u0144' @@ -219,6 +219,13 @@ class SerializersTestBase: mv_obj = obj_list[0].object self.assertEqual(mv_obj.title, movie_title) + def test_unicode_serialization(self): + unicode_name = 'יוניקוד' + data = serializers.serialize(self.serializer_name, [Author(name=unicode_name)]) + self.assertIn(unicode_name, data) + objs = list(serializers.deserialize(self.serializer_name, data)) + self.assertEqual(objs[0].object.name, unicode_name) + def test_serialize_progressbar(self): fake_stdout = StringIO() serializers.serialize(