Added initial cut at serialization framework, along with some basic tests and a stab at some docs. This is all a bit rough right now, so expect some bumps.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@3225 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Jacob Kaplan-Moss 2006-06-28 16:00:37 +00:00
parent 414bc24e81
commit 4ea7a11659
6 changed files with 632 additions and 0 deletions

View File

@ -0,0 +1,76 @@
"""
Interfaces for serializing Django objects.
Usage::
>>> from django.core import serializers
>>> json = serializers.serialize("json", some_query_set)
>>> objects = list(serializers.deserialize("json", json))
To add your own serializers, use the SERIALIZATION_MODULES setting::
SERIALIZATION_MODULES = {
"csv" : "path.to.csv.serializer",
"txt" : "path.to.txt.serializer",
}
"""
from django.conf import settings
# Built-in serializers
BUILTIN_SERIALIZERS = {
"xml" : "django.core.serializers.xml_serializer",
}
_serializers = {}
def register_serializer(format, serializer_module):
"""Register a new serializer by passing in a module name."""
module = __import__(serializer_module, '', '', [''])
_serializers[format] = module
def unregister_serializer(format):
"""Unregister a given serializer"""
del _serializers[format]
def get_serializer(format):
if not _serializers:
_load_serializers()
return _serializers[format].Serializer
def get_deserializer(format):
if not _serializers:
_load_serializers()
return _serializers[format].Deserializer
def serialize(format, queryset, **options):
"""
Serialize a queryset (or any iterator that returns database objects) using
a certain serializer.
"""
s = get_serializer(format)()
s.serialize(queryset, **options)
return s.getvalue()
def deserialize(format, stream_or_string):
"""
Deserialize a stream or a string. Returns an iterator that yields ``(obj,
m2m_relation_dict)``, where ``obj`` is a instantiated -- but *unsaved* --
object, and ``m2m_relation_dict`` is a dictionary of ``{m2m_field_name :
list_of_related_objects}``.
"""
d = get_deserializer(format)
return d(stream_or_string)
def _load_serializers():
"""
Register built-in and settings-defined serializers. This is done lazily so
that user code has a chance to (e.g.) set up custom settings without
needing to be careful of import order.
"""
for format in BUILTIN_SERIALIZERS:
register_serializer(format, BUILTIN_SERIALIZERS[format])
if hasattr(settings, "SERIALIZATION_MODULES"):
for format in settings.SERIALIZATION_MODULES:
register_serializer(format, settings.SERIALIZATION_MODULES[format])

View File

@ -0,0 +1,159 @@
"""
Module for abstract serializer/unserializer base classes.
"""
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from django.db import models
class SerializationError(Exception):
"""Something bad happened during serialization."""
pass
class DeserializationError(Exception):
"""Something bad happened during deserialization."""
pass
class Serializer(object):
"""
Abstract serializer base class.
"""
def serialize(self, queryset, **options):
"""
Serialize a queryset.
"""
self.options = options
self.stream = options.get("stream", StringIO())
self.start_serialization()
for obj in queryset:
self.start_object(obj)
for field in obj._meta.fields:
if field.rel is None:
self.handle_field(obj, field)
else:
self.handle_fk_field(obj, field)
for field in obj._meta.many_to_many:
self.handle_m2m_field(obj, field)
self.end_object(obj)
self.end_serialization()
return self.getvalue()
def get_string_value(self, obj, field):
"""
Convert a field's value to a string.
"""
if isinstance(field, models.DateTimeField):
value = getattr(obj, field.name).strftime("%Y-%m-%d %H:%M:%S")
elif isinstance(field, models.FileField):
value = getattr(obj, "get_%s_url" % field.name, lambda: None)()
else:
value = field.flatten_data(follow=None, obj=obj).get(field.name, "")
return str(value)
def start_serialization(self):
"""
Called when serializing of the queryset starts.
"""
raise NotImplementedError
def end_serialization(self):
"""
Called when serializing of the queryset ends.
"""
pass
def start_object(self, obj):
"""
Called when serializing of an object starts.
"""
raise NotImplementedError
def end_object(self, obj):
"""
Called when serializing of an object ends.
"""
pass
def handle_field(self, obj, field):
"""
Called to handle each individual (non-relational) field on an object.
"""
raise NotImplementedError
def handle_fk_field(self, obj, field):
"""
Called to handle a ForeignKey field.
"""
raise NotImplementedError
def handle_m2m_field(self, obj, field):
"""
Called to handle a ManyToManyField.
"""
raise NotImplementedError
def getvalue(self):
"""
Return the fully serialized queryset.
"""
return self.stream.getvalue()
class Deserializer(object):
"""
Abstract base deserializer class.
"""
def __init__(self, stream_or_string, **options):
"""
Init this serializer given a stream or a string
"""
self.options = options
if isinstance(stream_or_string, basestring):
self.stream = StringIO(stream_or_string)
else:
self.stream = stream_or_string
# hack to make sure that the models have all been loaded before
# deserialization starts (otherwise subclass calls to get_model()
# and friends might fail...)
models.get_apps()
def __iter__(self):
return self
def next(self):
"""Iteration iterface -- return the next item in the stream"""
raise NotImplementedError
class DeserializedObject(object):
"""
A deserialzed model.
Basically a container for holding the pre-saved deserialized data along
with the many-to-many data saved with the object.
Call ``save()`` to save the object (with the many-to-many data) to the
database; call ``save(save_m2m=False)`` to save just the object fields
(and not touch the many-to-many stuff.)
"""
def __init__(self, obj, m2m_data=None):
self.object = obj
self.m2m_data = m2m_data
def __repr__(self):
return "<DeserializedObject: %s>" % str(self.object)
def save(self, save_m2m=True):
self.object.save()
if self.m2m_data and save_m2m:
for accessor_name, object_list in self.m2m_data.items():
setattr(self.object, accessor_name, object_list)
# prevent a second (possibly accidental) call to save() from saving
# the m2m data twice.
self.m2m_data = None

View File

@ -0,0 +1,218 @@
"""
XML serializer.
"""
from xml.dom import pulldom
from django.utils.xmlutils import SimplerXMLGenerator
from django.core.serializers import base
from django.db import models
class Serializer(base.Serializer):
"""
Serializes a QuerySet to XML.
"""
def start_serialization(self):
"""
Start serialization -- open the XML document and the root element.
"""
self.xml = SimplerXMLGenerator(self.stream, self.options.get("encoding", "utf-8"))
self.xml.startDocument()
self.xml.startElement("django-objects", {"version" : "1.0"})
def end_serialization(self):
"""
End serialization -- end the document.
"""
self.xml.endElement("django-objects")
self.xml.endDocument()
def start_object(self, obj):
"""
Called as each object is handled.
"""
if not hasattr(obj, "_meta"):
raise base.SerializationError("Non-model object (%s) encountered during serialization" % type(obj))
self.xml.startElement("object", {
"pk" : str(obj._get_pk_val()),
"model" : str(obj._meta),
})
def end_object(self, obj):
"""
Called after handling all fields for an object.
"""
self.xml.endElement("object")
def handle_field(self, obj, field):
"""
Called to handle each field on an object (except for ForeignKeys and
ManyToManyFields)
"""
self.xml.startElement("field", {
"name" : field.name,
"type" : field.get_internal_type()
})
# Get a "string version" of the object's data (this is handled by the
# serializer base class). None is handled specially.
value = self.get_string_value(obj, field)
if value is None:
self.xml.addQuickElement("None")
else:
self.xml.characters(str(value))
self.xml.endElement("field")
def handle_fk_field(self, obj, field):
"""
Called to handle a ForeignKey (we need to treat them slightly
differently from regular fields).
"""
self._start_relational_field(field)
related = getattr(obj, field.name)
if related is not None:
self.xml.characters(str(related._get_pk_val()))
else:
self.xml.addQuickElement("None")
self.xml.endElement("field")
def handle_m2m_field(self, obj, field):
"""
Called to handle a ManyToManyField. Related objects are only
serialized as references to the object's PK (i.e. the related *data*
is not dumped, just the relation).
"""
self._start_relational_field(field)
for relobj in getattr(obj, field.name).iterator():
self.xml.addQuickElement("object", attrs={"pk" : str(relobj._get_pk_val())})
self.xml.endElement("field")
def _start_relational_field(self, field):
"""
Helper to output the <field> element for relational fields
"""
self.xml.startElement("field", {
"name" : field.name,
"rel" : field.rel.__class__.__name__,
"to" : str(field.rel.to._meta),
})
class Deserializer(base.Deserializer):
"""
Deserialize XML.
"""
def __init__(self, stream_or_string, **options):
super(Deserializer, self).__init__(stream_or_string, **options)
self.encoding = self.options.get("encoding", "utf-8")
self.event_stream = pulldom.parse(self.stream)
def next(self):
for event, node in self.event_stream:
if event == "START_ELEMENT" and node.nodeName == "object":
self.event_stream.expandNode(node)
return self._handle_object(node)
raise StopIteration
def _handle_object(self, node):
"""
Convert an <object> node to a DeserializedObject.
"""
# Look up the model using the model loading mechanism. If this fails, bail.
Model = self._get_model_from_node(node, "model")
# Start building a data dictionary from the object. If the node is
# missing the pk attribute, bail.
pk = node.getAttribute("pk")
if not pk:
raise base.DeserializationError("<object> node is missing the 'pk' attribute")
data = {Model._meta.pk.name : pk}
# Also start building a dict of m2m data (this is saved as
# {m2m_accessor_attribute : [list_of_related_objects]})
m2m_data = {}
# Deseralize each field.
for field_node in node.getElementsByTagName("field"):
# If the field is missing the name attribute, bail (are you
# sensing a pattern here?)
field_name = field_node.getAttribute("name")
if not field_name:
raise base.DeserializationError("<field> node is missing the 'name' attribute")
# Get the field from the Model. This will raise a
# FieldDoesNotExist if, well, the field doesn't exist, which will
# be propagated correctly.
field = Model._meta.get_field(field_name)
# As is usually the case, relation fields get the special treatment.
if field.rel and isinstance(field.rel, models.ManyToManyRel):
m2m_data[field.name] = self._handle_m2m_field_node(field_node)
elif field.rel and isinstance(field.rel, models.ManyToOneRel):
data[field.name] = self._handle_fk_field_node(field_node)
else:
value = field.to_python(getInnerText(field_node).strip().encode(self.encoding))
data[field.name] = value
# Return a DeserializedObject so that the m2m data has a place to live.
return base.DeserializedObject(Model(**data), m2m_data)
def _handle_fk_field_node(self, node):
"""
Handle a <field> node for a ForeignKey
"""
# Try to set the foreign key by looking up the foreign related object.
# If it doesn't exist, set the field to None (which might trigger
# validation error, but that's expected).
RelatedModel = self._get_model_from_node(node, "to")
return RelatedModel.objects.get(pk=getInnerText(node).strip().encode(self.encoding))
def _handle_m2m_field_node(self, node):
"""
Handle a <field> node for a ManyToManyField
"""
# Load the related model
RelatedModel = self._get_model_from_node(node, "to")
# Look up all the related objects. Using the in_bulk() lookup ensures
# that missing related objects don't cause an exception
related_ids = [c.getAttribute("pk").encode(self.encoding) for c in node.getElementsByTagName("object")]
return RelatedModel._default_manager.in_bulk(related_ids).values()
def _get_model_from_node(self, node, attr):
"""
Helper to look up a model from a <object model=...> or a <field
rel=... to=...> node.
"""
model_identifier = node.getAttribute(attr)
if not model_identifier:
raise base.DeserializationError(
"<%s> node is missing the required '%s' attribute" \
% (node.nodeName, attr))
try:
Model = models.get_model(*model_identifier.split("."))
except TypeError:
Model = None
if Model is None:
raise base.DeserializationError(
"<%s> node has invalid model identifier: '%s'" % \
(node.nodeName, model_identifier))
return Model
def getInnerText(node):
"""
Get all the inner text of a DOM node (recursively).
"""
# inspired by http://mail.python.org/pipermail/xml-sig/2005-March/011022.html
inner_text = []
for child in node.childNodes:
if child.nodeType == child.TEXT_NODE or child.nodeType == child.CDATA_SECTION_NODE:
inner_text.append(child.data)
elif child.nodeType == child.ELEMENT_NODE:
inner_text.extend(getInnerText(child))
else:
pass
return "".join(inner_text)

85
docs/serialization.txt Normal file
View File

@ -0,0 +1,85 @@
==========================
Serializing Django objects
==========================
.. note::
This API is currently under heavy development and may change --
perhaps drastically -- in the future.
You have been warned.
Django's serialization framework provides a mechanism for "translating" Django
objects into other formats. Usually these other formats will be text-based and
used for sending Django objects over a wire, but it's possible for a
serializer to handle any format (text-based or not).
Serializing data
----------------
At the highest level, serializing data is a very simple operation::
from django.core import serializers
data = serializers.serialize("xml", SomeModel.objects.all())
The arguments to the ``serialize`` function are the format to serialize the
data to (see `Serialization formats`_) and a QuerySet_ to serialize.
(Actually, the second argument can be any iterator that yields Django objects,
but it'll almost always be a QuerySet).
.. _QuerySet: ../db_api/#retrieving-objects
You can also use a serializer object directly::
xml_serializer = serializers.get_serializer("xml")
xml_serializer.serialize(queryset)
data = xml_serializer.getvalue()
This is useful if you want to serialize data directly to a file-like object
(which includes a HTTPResponse_)::
out = open("file.xml", "w")
xml_serializer.serialize(SomeModel.objects.all(), stream=out)
.. _HTTPResponse: ../request_response/#httpresponse-objects
Deserializing data
------------------
Deserializing data is also a fairly simple operation::
for obj in serializers.deserialize("xml", data):
do_something_with(obj)
As you can see, the ``deserialize`` function takes the same format argument as
``serialize``, a string or stream of data, and returns an iterator.
However, here it gets slightly complicated. The objects returned by the
``deserialize`` iterator *aren't* simple Django objects. Instead, they are
special ``DeserializedObject`` instances that wrap a created -- but unsaved --
object and any associated relationship data.
Calling ``DeserializedObject.save()`` saves the object to the database.
This ensures that deserializing is a non-destructive operation even if the
data in your serialized representation doesn't match what's currently in the
database. Usually, working with these ``DeserializedObject`` instances looks
something like::
for deserialized_object in serializers.deserialize("xml", data):
if object_should_be_saved(deserialized_object):
obj.save()
In other words, the usual use is to examine the deserialized objects to make
sure that they are "appropriate" for saving before doing so. Of course, if you trust your data source you could just save the object and move on.
The Django object itself can be inspected as ``deserialized_object.object``.
Serialization formats
---------------------
Django "ships" with a few included serializers, and there's a simple API for creating and registering your own...
.. note::
... which will be documented once the API is stable :)

View File

View File

@ -0,0 +1,94 @@
"""
XXX. Serialization
``django.core.serializers`` provides interfaces to converting Django querysets
to and from "flat" data (i.e. strings).
"""
from django.db import models
class Category(models.Model):
name = models.CharField(maxlength=20)
class Meta:
ordering = ('name',)
def __str__(self):
return self.name
class Author(models.Model):
name = models.CharField(maxlength=20)
class Meta:
ordering = ('name',)
def __str__(self):
return self.name
class Article(models.Model):
author = models.ForeignKey(Author)
headline = models.CharField(maxlength=50)
pub_date = models.DateTimeField()
categories = models.ManyToManyField(Category)
class Meta:
ordering = ('pub_date',)
def __str__(self):
return self.headline
API_TESTS = """
# Create some data:
>>> from datetime import datetime
>>> sports = Category(name="Sports")
>>> music = Category(name="Music")
>>> op_ed = Category(name="Op-Ed")
>>> sports.save(); music.save(); op_ed.save()
>>> joe = Author(name="Joe")
>>> jane = Author(name="Jane")
>>> joe.save(); jane.save()
>>> a1 = Article(
... author = jane,
... headline = "Poker has no place on ESPN",
... pub_date = datetime(2006, 6, 16, 11, 00))
>>> a2 = Article(
... author = joe,
... headline = "Time to reform copyright",
... pub_date = datetime(2006, 6, 16, 13, 00))
>>> a1.save(); a2.save()
>>> a1.categories = [sports, op_ed]
>>> a2.categories = [music, op_ed]
# Serialize a queryset to XML
>>> from django.core import serializers
>>> xml = serializers.serialize("xml", Article.objects.all())
# The output is valid XML
>>> from xml.dom import minidom
>>> dom = minidom.parseString(xml)
# Deserializing has a similar interface, except that special DeserializedObject
# instances are returned. This is because data might have changed in the
# database since the data was serialized (we'll simulate that below).
>>> for obj in serializers.deserialize("xml", xml):
... print obj
<DeserializedObject: Poker has no place on ESPN>
<DeserializedObject: Time to reform copyright>
# Deserializing data with different field values doesn't change anything in the
# database until we call save():
>>> xml = xml.replace("Poker has no place on ESPN", "Poker has no place on television")
>>> objs = list(serializers.deserialize("xml", xml))
# Even those I deserialized, the database hasn't been touched
>>> Article.objects.all()
[<Article: Poker has no place on ESPN>, <Article: Time to reform copyright>]
# But when I save, the data changes as you might except.
>>> objs[0].save()
>>> Article.objects.all()
[<Article: Poker has no place on television>, <Article: Time to reform copyright>]
"""