""" XML serializer. """ from django.conf import settings from django.core.serializers import base from django.db import models, DEFAULT_DB_ALIAS from django.utils.xmlutils import SimplerXMLGenerator from django.utils.encoding import smart_unicode from xml.dom import pulldom class Serializer(base.Serializer): """ Serializes a QuerySet to XML. """ def indent(self, level): if self.options.get('indent', None) is not None: self.xml.ignorableWhitespace('\n' + ' ' * self.options.get('indent', None) * level) def start_serialization(self): """ Start serialization -- open the XML document and the root element. """ self.xml = SimplerXMLGenerator(self.stream, self.options.get("encoding", settings.DEFAULT_CHARSET)) self.xml.startDocument() self.xml.startElement("django-objects", {"version" : "1.0"}) def end_serialization(self): """ End serialization -- end the document. """ self.indent(0) self.xml.endElement("django-objects") self.xml.endDocument() def start_object(self, obj): """ Called as each object is handled. """ if not hasattr(obj, "_meta"): raise base.SerializationError("Non-model object (%s) encountered during serialization" % type(obj)) self.indent(1) self.xml.startElement("object", { "pk" : smart_unicode(obj._get_pk_val()), "model" : smart_unicode(obj._meta), }) def end_object(self, obj): """ Called after handling all fields for an object. """ self.indent(1) self.xml.endElement("object") def handle_field(self, obj, field): """ Called to handle each field on an object (except for ForeignKeys and ManyToManyFields) """ self.indent(2) self.xml.startElement("field", { "name" : field.name, "type" : field.get_internal_type() }) # Get a "string version" of the object's data. if getattr(obj, field.name) is not None: self.xml.characters(field.value_to_string(obj)) else: self.xml.addQuickElement("None") self.xml.endElement("field") def handle_fk_field(self, obj, field): """ Called to handle a ForeignKey (we need to treat them slightly differently from regular fields). """ self._start_relational_field(field) related = getattr(obj, field.name) if related is not None: if self.use_natural_keys and hasattr(related, 'natural_key'): # If related object has a natural key, use it related = related.natural_key() # Iterable natural keys are rolled out as subelements for key_value in related: self.xml.startElement("natural", {}) self.xml.characters(smart_unicode(key_value)) self.xml.endElement("natural") else: if field.rel.field_name == related._meta.pk.name: # Related to remote object via primary key related = related._get_pk_val() else: # Related to remote object via other field related = getattr(related, field.rel.field_name) self.xml.characters(smart_unicode(related)) else: self.xml.addQuickElement("None") self.xml.endElement("field") def handle_m2m_field(self, obj, field): """ Called to handle a ManyToManyField. Related objects are only serialized as references to the object's PK (i.e. the related *data* is not dumped, just the relation). """ if field.rel.through._meta.auto_created: self._start_relational_field(field) if self.use_natural_keys and hasattr(field.rel.to, 'natural_key'): # If the objects in the m2m have a natural key, use it def handle_m2m(value): natural = value.natural_key() # Iterable natural keys are rolled out as subelements self.xml.startElement("object", {}) for key_value in natural: self.xml.startElement("natural", {}) self.xml.characters(smart_unicode(key_value)) self.xml.endElement("natural") self.xml.endElement("object") else: def handle_m2m(value): self.xml.addQuickElement("object", attrs={ 'pk' : smart_unicode(value._get_pk_val()) }) for relobj in getattr(obj, field.name).iterator(): handle_m2m(relobj) self.xml.endElement("field") def _start_relational_field(self, field): """ Helper to output the element for relational fields """ self.indent(2) self.xml.startElement("field", { "name" : field.name, "rel" : field.rel.__class__.__name__, "to" : smart_unicode(field.rel.to._meta), }) class Deserializer(base.Deserializer): """ Deserialize XML. """ def __init__(self, stream_or_string, **options): super(Deserializer, self).__init__(stream_or_string, **options) self.event_stream = pulldom.parse(self.stream) self.db = options.pop('using', DEFAULT_DB_ALIAS) def next(self): for event, node in self.event_stream: if event == "START_ELEMENT" and node.nodeName == "object": self.event_stream.expandNode(node) return self._handle_object(node) raise StopIteration def _handle_object(self, node): """ Convert an node to a DeserializedObject. """ # Look up the model using the model loading mechanism. If this fails, # bail. Model = self._get_model_from_node(node, "model") # Start building a data dictionary from the object. If the node is # missing the pk attribute, bail. pk = node.getAttribute("pk") if not pk: raise base.DeserializationError(" node is missing the 'pk' attribute") data = {Model._meta.pk.attname : Model._meta.pk.to_python(pk)} # Also start building a dict of m2m data (this is saved as # {m2m_accessor_attribute : [list_of_related_objects]}) m2m_data = {} # Deseralize each field. for field_node in node.getElementsByTagName("field"): # If the field is missing the name attribute, bail (are you # sensing a pattern here?) field_name = field_node.getAttribute("name") if not field_name: raise base.DeserializationError(" node is missing the 'name' attribute") # Get the field from the Model. This will raise a # FieldDoesNotExist if, well, the field doesn't exist, which will # be propagated correctly. field = Model._meta.get_field(field_name) # As is usually the case, relation fields get the special treatment. if field.rel and isinstance(field.rel, models.ManyToManyRel): m2m_data[field.name] = self._handle_m2m_field_node(field_node, field) elif field.rel and isinstance(field.rel, models.ManyToOneRel): data[field.attname] = self._handle_fk_field_node(field_node, field) else: if field_node.getElementsByTagName('None'): value = None else: value = field.to_python(getInnerText(field_node).strip()) data[field.name] = value # Return a DeserializedObject so that the m2m data has a place to live. return base.DeserializedObject(Model(**data), m2m_data) def _handle_fk_field_node(self, node, field): """ Handle a node for a ForeignKey """ # Check if there is a child node named 'None', returning None if so. if node.getElementsByTagName('None'): return None else: if hasattr(field.rel.to._default_manager, 'get_by_natural_key'): keys = node.getElementsByTagName('natural') if keys: # If there are 'natural' subelements, it must be a natural key field_value = [getInnerText(k).strip() for k in keys] obj = field.rel.to._default_manager.db_manager(self.db).get_by_natural_key(*field_value) obj_pk = getattr(obj, field.rel.field_name) else: # Otherwise, treat like a normal PK field_value = getInnerText(node).strip() obj_pk = field.rel.to._meta.get_field(field.rel.field_name).to_python(field_value) return obj_pk else: field_value = getInnerText(node).strip() return field.rel.to._meta.get_field(field.rel.field_name).to_python(field_value) def _handle_m2m_field_node(self, node, field): """ Handle a node for a ManyToManyField. """ if hasattr(field.rel.to._default_manager, 'get_by_natural_key'): def m2m_convert(n): keys = n.getElementsByTagName('natural') if keys: # If there are 'natural' subelements, it must be a natural key field_value = [getInnerText(k).strip() for k in keys] obj_pk = field.rel.to._default_manager.db_manager(self.db).get_by_natural_key(*field_value).pk else: # Otherwise, treat like a normal PK value. obj_pk = field.rel.to._meta.pk.to_python(n.getAttribute('pk')) return obj_pk else: m2m_convert = lambda n: field.rel.to._meta.pk.to_python(n.getAttribute('pk')) return [m2m_convert(c) for c in node.getElementsByTagName("object")] def _get_model_from_node(self, node, attr): """ Helper to look up a model from a or a node. """ model_identifier = node.getAttribute(attr) if not model_identifier: raise base.DeserializationError( "<%s> node is missing the required '%s' attribute" \ % (node.nodeName, attr)) try: Model = models.get_model(*model_identifier.split(".")) except TypeError: Model = None if Model is None: raise base.DeserializationError( "<%s> node has invalid model identifier: '%s'" % \ (node.nodeName, model_identifier)) return Model def getInnerText(node): """ Get all the inner text of a DOM node (recursively). """ # inspired by http://mail.python.org/pipermail/xml-sig/2005-March/011022.html inner_text = [] for child in node.childNodes: if child.nodeType == child.TEXT_NODE or child.nodeType == child.CDATA_SECTION_NODE: inner_text.append(child.data) elif child.nodeType == child.ELEMENT_NODE: inner_text.extend(getInnerText(child)) else: pass return u"".join(inner_text)