Fixed #20197 -- Made XML serializer fail loudly when outputting unserializable chars

Thanks Tim Graham for the review.
This commit is contained in:
Claude Paroz 2015-06-19 08:42:48 +02:00
parent b769bbd4f6
commit 9368f51e12
5 changed files with 49 additions and 2 deletions

View File

@ -14,7 +14,9 @@ from django.conf import settings
from django.core.serializers import base
from django.db import DEFAULT_DB_ALIAS, models
from django.utils.encoding import smart_text
from django.utils.xmlutils import SimplerXMLGenerator
from django.utils.xmlutils import (
SimplerXMLGenerator, UnserializableContentError,
)
class Serializer(base.Serializer):
@ -78,7 +80,11 @@ class Serializer(base.Serializer):
# Get a "string version" of the object's data.
if getattr(obj, field.name) is not None:
self.xml.characters(field.value_to_string(obj))
try:
self.xml.characters(field.value_to_string(obj))
except UnserializableContentError:
raise ValueError("%s.%s (pk:%s) contains unserializable characters" % (
obj.__class__.__name__, field.name, obj._get_pk_val()))
else:
self.xml.addQuickElement("None")

View File

@ -2,9 +2,14 @@
Utilities for XML generation/parsing.
"""
import re
from xml.sax.saxutils import XMLGenerator
class UnserializableContentError(ValueError):
pass
class SimplerXMLGenerator(XMLGenerator):
def addQuickElement(self, name, contents=None, attrs=None):
"Convenience method for adding an element with no children"
@ -14,3 +19,10 @@ class SimplerXMLGenerator(XMLGenerator):
if contents is not None:
self.characters(contents)
self.endElement(name)
def characters(self, content):
if content and re.search(r'[\x00-\x08\x0B-\x0C\x0E-\x1F]', content):
# Fail loudly when content has control chars (unsupported in XML 1.0)
# See http://www.w3.org/International/questions/qa-controls
raise UnserializableContentError("Control characters are not supported in XML 1.0")
XMLGenerator.characters(self, content)

View File

@ -720,6 +720,10 @@ Miscellaneous
* Private function ``django.utils.functional.total_ordering()`` has been
removed. It contained a workaround for a ``functools.total_ordering()`` bug
in Python versions older than 2.7.3.
* XML serialization (either through :djadmin:`dumpdata` or the syndication
framework) used to output any characters it received. Now if the content to
be serialized contains any control characters not allowed in the XML 1.0
standard, the serialization will fail with a :exc:`ValueError`.
.. _deprecated-features-1.9:

View File

@ -213,6 +213,16 @@ the auth.User model has such a relation to the auth.Permission model::
This example links the given user with the permission models with PKs 46 and 47.
.. admonition:: Control characters
.. versionchanged:: 1.9
If the content to be serialized contains control characters that are not
accepted in the XML 1.0 standard, the serialization will fail with a
:exc:`ValueError` exception. Read also the W3C's explanation of `HTML,
XHTML, XML and Control Codes
<http://www.w3.org/International/questions/qa-controls>`_.
.. _serialization-formats-json:
JSON

View File

@ -371,6 +371,21 @@ class XmlSerializerTestCase(SerializersTestBase, TestCase):
ret_list.append("".join(temp))
return ret_list
def test_control_char_failure(self):
"""
Serializing control characters with XML should fail as those characters
are not supported in the XML 1.0 standard (except HT, LF, CR).
"""
self.a1.headline = "This contains \u0001 control \u0011 chars"
msg = "Article.headline (pk:%s) contains unserializable characters" % self.a1.pk
with self.assertRaisesMessage(ValueError, msg):
serializers.serialize(self.serializer_name, [self.a1])
self.a1.headline = "HT \u0009, LF \u000A, and CR \u000D are allowed"
self.assertIn(
"HT \t, LF \n, and CR \r are allowed",
serializers.serialize(self.serializer_name, [self.a1])
)
class XmlSerializerTransactionTestCase(SerializersTransactionTestBase, TransactionTestCase):
serializer_name = "xml"