From b1851cca3ee1494cc020f1676e2029ef138250da Mon Sep 17 00:00:00 2001 From: Luke Plant Date: Sat, 21 Jun 2008 20:57:05 +0000 Subject: [PATCH] Fixed bug with Model.delete() which did not always delete objects in the right order. git-svn-id: http://code.djangoproject.com/svn/django/trunk@7722 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/db/models/base.py | 19 +++-- django/db/models/query.py | 116 +++++++++++++++++++++++++++--- tests/modeltests/delete/models.py | 103 ++++++++++++++++++++++---- 3 files changed, 205 insertions(+), 33 deletions(-) diff --git a/django/db/models/base.py b/django/db/models/base.py index 3b6a5fe1ea..14a48044df 100644 --- a/django/db/models/base.py +++ b/django/db/models/base.py @@ -10,7 +10,7 @@ from django.core import validators from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError from django.db.models.fields import AutoField, ImageField, FieldDoesNotExist from django.db.models.fields.related import OneToOneRel, ManyToOneRel, OneToOneField -from django.db.models.query import delete_objects, Q +from django.db.models.query import delete_objects, Q, CollectedObjects from django.db.models.options import Options, AdminOptions from django.db import connection, transaction from django.db.models import signals @@ -368,17 +368,16 @@ class Model(object): error_dict[f.name] = errors return error_dict - def _collect_sub_objects(self, seen_objs): + def _collect_sub_objects(self, seen_objs, parent=None, nullable=False): """ Recursively populates seen_objs with all objects related to this object. - When done, seen_objs will be in the format: - {model_class: {pk_val: obj, pk_val: obj, ...}, - model_class: {pk_val: obj, pk_val: obj, ...}, ...} + When done, seen_objs.items() will be in the format: + [(model_class, {pk_val: obj, pk_val: obj, ...}), + (model_class, {pk_val: obj, pk_val: obj, ...}),...] """ pk_val = self._get_pk_val() - if pk_val in seen_objs.setdefault(self.__class__, {}): + if seen_objs.add(self.__class__, pk_val, self, parent, nullable): return - seen_objs[self.__class__][pk_val] = self for related in self._meta.get_all_related_objects(): rel_opts_name = related.get_accessor_name() @@ -388,16 +387,16 @@ class Model(object): except ObjectDoesNotExist: pass else: - sub_obj._collect_sub_objects(seen_objs) + sub_obj._collect_sub_objects(seen_objs, self.__class__, related.field.null) else: for sub_obj in getattr(self, rel_opts_name).all(): - sub_obj._collect_sub_objects(seen_objs) + sub_obj._collect_sub_objects(seen_objs, self.__class__, related.field.null) def delete(self): assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname) # Find all the objects than need to be deleted - seen_objs = SortedDict() + seen_objs = CollectedObjects() self._collect_sub_objects(seen_objs) # Actually delete the objects diff --git a/django/db/models/query.py b/django/db/models/query.py index fb6d116a6e..8714cffb7f 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -16,6 +16,92 @@ ITER_CHUNK_SIZE = CHUNK_SIZE # Pull into this namespace for backwards compatibility EmptyResultSet = sql.EmptyResultSet +class CyclicDependency(Exception): + pass + +class CollectedObjects(object): + """ + A container that stores keys and lists of values along with + remembering the parent objects for all the keys. + + This is used for the database object deletion routines so that we + can calculate the 'leaf' objects which should be deleted first. + """ + + def __init__(self): + self.data = {} + self.children = {} + + def add(self, model, pk, obj, parent_model, nullable=False): + """ + Adds an item. + model is the class of the object being added, + pk is the primary key, obj is the object itself, + parent_model is the model of the parent object + that this object was reached through, nullable should + be True if this relation is nullable. + + If the item already existed in the structure, + returns true, otherwise false. + """ + d = self.data.setdefault(model, SortedDict()) + retval = pk in d + d[pk] = obj + # Nullable relationships can be ignored -- they + # are nulled out before deleting, and therefore + # do not affect the order in which objects have + # to be deleted. + if parent_model is not None and not nullable: + self.children.setdefault(parent_model, []).append(model) + + return retval + + def __contains__(self, key): + return self.data.__contains__(key) + + def __getitem__(self, key): + return self.data[key] + + def __nonzero__(self): + return bool(self.data) + + def iteritems(self): + for k in self.ordered_keys(): + yield k, self[k] + + def items(self): + return list(self.iteritems()) + + def keys(self): + return self.ordered_keys() + + def ordered_keys(self): + """ + Returns the models in the order that they should be + dealth with i.e. models with no dependencies first. + """ + dealt_with = SortedDict() + # Start with items that have no children + models = self.data.keys() + while len(dealt_with) < len(models): + found = False + for model in models: + children = self.children.setdefault(model, []) + if len([c for c in children if c not in dealt_with]) == 0: + dealt_with[model] = None + found = True + if not found: + raise CyclicDependency("There is a cyclic dependency of items to be processed.") + + return dealt_with.keys() + + def unordered_keys(self): + """ + Fallback for the case where is a cyclic dependency but we + don't care. + """ + return self.data.keys() + class QuerySet(object): "Represents a lazy database lookup for a set of objects" def __init__(self, model=None, query=None): @@ -275,7 +361,7 @@ class QuerySet(object): while 1: # Collect all the objects to be deleted in this chunk, and all the # objects that are related to the objects that are to be deleted. - seen_objs = SortedDict() + seen_objs = CollectedObjects() for object in del_query[:CHUNK_SIZE]: object._collect_sub_objects(seen_objs) @@ -682,19 +768,27 @@ def delete_objects(seen_objs): Iterate through a list of seen classes, and remove any instances that are referred to. """ - ordered_classes = seen_objs.keys() - ordered_classes.reverse() + try: + ordered_classes = seen_objs.keys() + except CyclicDependency: + # if there is a cyclic dependency, we cannot in general delete + # the objects. However, if an appropriate transaction is set + # up, or if the database is lax enough, it will succeed. + # So for now, we go ahead and try anway. + ordered_classes = seen_objs.unordered_keys() + obj_pairs = {} for cls in ordered_classes: - seen_objs[cls] = seen_objs[cls].items() - seen_objs[cls].sort() + items = seen_objs[cls].items() + items.sort() + obj_pairs[cls] = items # Pre notify all instances to be deleted - for pk_val, instance in seen_objs[cls]: + for pk_val, instance in items: dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance) - pk_list = [pk for pk,instance in seen_objs[cls]] + pk_list = [pk for pk,instance in items] del_query = sql.DeleteQuery(cls, connection) del_query.delete_batch_related(pk_list) @@ -705,15 +799,17 @@ def delete_objects(seen_objs): # Now delete the actual data for cls in ordered_classes: - seen_objs[cls].reverse() - pk_list = [pk for pk,instance in seen_objs[cls]] + items = obj_pairs[cls] + items.reverse() + + pk_list = [pk for pk,instance in items] del_query = sql.DeleteQuery(cls, connection) del_query.delete_batch(pk_list) # Last cleanup; set NULLs where there once was a reference to the # object, NULL the primary key of the found objects, and perform # post-notification. - for pk_val, instance in seen_objs[cls]: + for pk_val, instance in items: for field in cls._meta.fields: if field.rel and field.null and field.rel.to in seen_objs: setattr(instance, field.attname, None) diff --git a/tests/modeltests/delete/models.py b/tests/modeltests/delete/models.py index d4f7dd1d1f..f5b423e9ff 100644 --- a/tests/modeltests/delete/models.py +++ b/tests/modeltests/delete/models.py @@ -33,8 +33,46 @@ class D(DefaultRepr, models.Model): # However, if we start at As, we might find Bs first (in which # case things will be nice), or find Ds first. +# Some mutually dependent models, but nullable +class E(DefaultRepr, models.Model): + f = models.ForeignKey('F', null=True, related_name='e_rel') + +class F(DefaultRepr, models.Model): + e = models.ForeignKey(E, related_name='f_rel') + __test__ = {'API_TESTS': """ +# First, some tests for the datastructure we use + +>>> from django.db.models.query import CollectedObjects + +>>> g = CollectedObjects() +>>> g.add("key1", 1, "item1", None) +False +>>> g["key1"] +{1: 'item1'} +>>> g.add("key2", 1, "item1", "key1") +False +>>> g.add("key2", 2, "item2", "key1") +False +>>> g["key2"] +{1: 'item1', 2: 'item2'} +>>> g.add("key3", 1, "item1", "key1") +False +>>> g.add("key3", 1, "item1", "key2") +True +>>> g.ordered_keys() +['key3', 'key2', 'key1'] + +>>> g.add("key2", 1, "item1", "key3") +True +>>> g.ordered_keys() +Traceback (most recent call last): + ... +CyclicDependency: There is a cyclic dependency of items to be processed. + + + # Due to the way that transactions work in the test harness, # doing m.delete() here can work but fail in a real situation, # since it may delete all objects, but not in the right order. @@ -42,11 +80,10 @@ __test__ = {'API_TESTS': """ # Also, it is possible that the order is correct 'accidentally', due # solely to order of imports etc. To check this, we set the order -# that 'get_models()' will retrieve to a known 'tricky' order, and -# then try again with the reverse and try again. Slightly naughty -# access to internals here. +# that 'get_models()' will retrieve to a known 'nice' order, and +# then try again with a known 'tricky' order. Slightly naughty +# access to internals here :-) ->>> from django.utils.datastructures import SortedDict >>> from django.db.models.loading import cache # Nice order @@ -56,8 +93,6 @@ __test__ = {'API_TESTS': """ >>> del C._meta._related_objects_cache >>> del D._meta._related_objects_cache - - >>> a1 = A() >>> a1.save() >>> b1 = B(a=a1) @@ -67,9 +102,9 @@ __test__ = {'API_TESTS': """ >>> d1 = D(c=c1, a=a1) >>> d1.save() ->>> sd = SortedDict() ->>> a1._collect_sub_objects(sd) ->>> list(reversed(sd.keys())) +>>> o = CollectedObjects() +>>> a1._collect_sub_objects(o) +>>> o.keys() [, , , ] >>> a1.delete() @@ -80,7 +115,6 @@ __test__ = {'API_TESTS': """ >>> del C._meta._related_objects_cache >>> del D._meta._related_objects_cache - >>> a2 = A() >>> a2.save() >>> b2 = B(a=a2) @@ -90,13 +124,56 @@ __test__ = {'API_TESTS': """ >>> d2 = D(c=c2, a=a2) >>> d2.save() ->>> sd2 = SortedDict() ->>> a2._collect_sub_objects(sd2) ->>> list(reversed(sd2.keys())) +>>> o = CollectedObjects() +>>> a2._collect_sub_objects(o) +>>> o.keys() [, , , ] >>> a2.delete() +# Tests for nullable related fields +>>> g = CollectedObjects() +>>> g.add("key1", 1, "item1", None) +False +>>> g.add("key2", 1, "item1", "key1", nullable=True) +False +>>> g.add("key1", 1, "item1", "key2") +True +>>> g.ordered_keys() +['key1', 'key2'] + +>>> e1 = E() +>>> e1.save() +>>> f1 = F(e=e1) +>>> f1.save() +>>> e1.f = f1 +>>> e1.save() + +# Since E.f is nullable, we should delete F first (after nulling out +# the E.f field), then E. + +>>> o = CollectedObjects() +>>> e1._collect_sub_objects(o) +>>> o.keys() +[, ] + +>>> e1.delete() + +>>> e2 = E() +>>> e2.save() +>>> f2 = F(e=e2) +>>> f2.save() +>>> e2.f = f2 +>>> e2.save() + +# Same deal as before, though we are starting from the other object. + +>>> o = CollectedObjects() +>>> f2._collect_sub_objects(o) +>>> o.keys() +[, ] + +>>> f2.delete() """ }