diff --git a/django/core/serializers/xml_serializer.py b/django/core/serializers/xml_serializer.py index e8415230d6..9a4bd77d44 100644 --- a/django/core/serializers/xml_serializer.py +++ b/django/core/serializers/xml_serializer.py @@ -14,7 +14,9 @@ from django.conf import settings from django.core.serializers import base from django.db import DEFAULT_DB_ALIAS, models from django.utils.encoding import smart_text -from django.utils.xmlutils import SimplerXMLGenerator +from django.utils.xmlutils import ( + SimplerXMLGenerator, UnserializableContentError, +) class Serializer(base.Serializer): @@ -78,7 +80,11 @@ class Serializer(base.Serializer): # Get a "string version" of the object's data. if getattr(obj, field.name) is not None: - self.xml.characters(field.value_to_string(obj)) + try: + self.xml.characters(field.value_to_string(obj)) + except UnserializableContentError: + raise ValueError("%s.%s (pk:%s) contains unserializable characters" % ( + obj.__class__.__name__, field.name, obj._get_pk_val())) else: self.xml.addQuickElement("None") diff --git a/django/utils/xmlutils.py b/django/utils/xmlutils.py index b3f7e4defb..f1edfb2ac9 100644 --- a/django/utils/xmlutils.py +++ b/django/utils/xmlutils.py @@ -2,9 +2,14 @@ Utilities for XML generation/parsing. """ +import re from xml.sax.saxutils import XMLGenerator +class UnserializableContentError(ValueError): + pass + + class SimplerXMLGenerator(XMLGenerator): def addQuickElement(self, name, contents=None, attrs=None): "Convenience method for adding an element with no children" @@ -14,3 +19,10 @@ class SimplerXMLGenerator(XMLGenerator): if contents is not None: self.characters(contents) self.endElement(name) + + def characters(self, content): + if content and re.search(r'[\x00-\x08\x0B-\x0C\x0E-\x1F]', content): + # Fail loudly when content has control chars (unsupported in XML 1.0) + # See http://www.w3.org/International/questions/qa-controls + raise UnserializableContentError("Control characters are not supported in XML 1.0") + XMLGenerator.characters(self, content) diff --git a/docs/releases/1.9.txt b/docs/releases/1.9.txt index 9641ab2307..a8c0b960c7 100644 --- a/docs/releases/1.9.txt +++ b/docs/releases/1.9.txt @@ -720,6 +720,10 @@ Miscellaneous * Private function ``django.utils.functional.total_ordering()`` has been removed. It contained a workaround for a ``functools.total_ordering()`` bug in Python versions older than 2.7.3. +* XML serialization (either through :djadmin:`dumpdata` or the syndication + framework) used to output any characters it received. Now if the content to + be serialized contains any control characters not allowed in the XML 1.0 + standard, the serialization will fail with a :exc:`ValueError`. .. _deprecated-features-1.9: diff --git a/docs/topics/serialization.txt b/docs/topics/serialization.txt index 6828eb0f62..b48c625fe8 100644 --- a/docs/topics/serialization.txt +++ b/docs/topics/serialization.txt @@ -213,6 +213,16 @@ the auth.User model has such a relation to the auth.Permission model:: This example links the given user with the permission models with PKs 46 and 47. +.. admonition:: Control characters + + .. versionchanged:: 1.9 + + If the content to be serialized contains control characters that are not + accepted in the XML 1.0 standard, the serialization will fail with a + :exc:`ValueError` exception. Read also the W3C's explanation of `HTML, + XHTML, XML and Control Codes + `_. + .. _serialization-formats-json: JSON diff --git a/tests/serializers/tests.py b/tests/serializers/tests.py index f9b65cba82..505e599f5e 100644 --- a/tests/serializers/tests.py +++ b/tests/serializers/tests.py @@ -371,6 +371,21 @@ class XmlSerializerTestCase(SerializersTestBase, TestCase): ret_list.append("".join(temp)) return ret_list + def test_control_char_failure(self): + """ + Serializing control characters with XML should fail as those characters + are not supported in the XML 1.0 standard (except HT, LF, CR). + """ + self.a1.headline = "This contains \u0001 control \u0011 chars" + msg = "Article.headline (pk:%s) contains unserializable characters" % self.a1.pk + with self.assertRaisesMessage(ValueError, msg): + serializers.serialize(self.serializer_name, [self.a1]) + self.a1.headline = "HT \u0009, LF \u000A, and CR \u000D are allowed" + self.assertIn( + "HT \t, LF \n, and CR \r are allowed", + serializers.serialize(self.serializer_name, [self.a1]) + ) + class XmlSerializerTransactionTestCase(SerializersTransactionTestBase, TransactionTestCase): serializer_name = "xml"