Fixed bug with Model.delete() which did not always delete objects in the right order.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@7722 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Luke Plant 2008-06-21 20:57:05 +00:00
parent 7c621535a2
commit b1851cca3e
3 changed files with 205 additions and 33 deletions

View File

@ -10,7 +10,7 @@ from django.core import validators
from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError
from django.db.models.fields import AutoField, ImageField, FieldDoesNotExist
from django.db.models.fields.related import OneToOneRel, ManyToOneRel, OneToOneField
from django.db.models.query import delete_objects, Q
from django.db.models.query import delete_objects, Q, CollectedObjects
from django.db.models.options import Options, AdminOptions
from django.db import connection, transaction
from django.db.models import signals
@ -368,17 +368,16 @@ class Model(object):
error_dict[f.name] = errors
return error_dict
def _collect_sub_objects(self, seen_objs):
def _collect_sub_objects(self, seen_objs, parent=None, nullable=False):
"""
Recursively populates seen_objs with all objects related to this object.
When done, seen_objs will be in the format:
{model_class: {pk_val: obj, pk_val: obj, ...},
model_class: {pk_val: obj, pk_val: obj, ...}, ...}
When done, seen_objs.items() will be in the format:
[(model_class, {pk_val: obj, pk_val: obj, ...}),
(model_class, {pk_val: obj, pk_val: obj, ...}),...]
"""
pk_val = self._get_pk_val()
if pk_val in seen_objs.setdefault(self.__class__, {}):
if seen_objs.add(self.__class__, pk_val, self, parent, nullable):
return
seen_objs[self.__class__][pk_val] = self
for related in self._meta.get_all_related_objects():
rel_opts_name = related.get_accessor_name()
@ -388,16 +387,16 @@ class Model(object):
except ObjectDoesNotExist:
pass
else:
sub_obj._collect_sub_objects(seen_objs)
sub_obj._collect_sub_objects(seen_objs, self.__class__, related.field.null)
else:
for sub_obj in getattr(self, rel_opts_name).all():
sub_obj._collect_sub_objects(seen_objs)
sub_obj._collect_sub_objects(seen_objs, self.__class__, related.field.null)
def delete(self):
assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname)
# Find all the objects than need to be deleted
seen_objs = SortedDict()
seen_objs = CollectedObjects()
self._collect_sub_objects(seen_objs)
# Actually delete the objects

View File

@ -16,6 +16,92 @@ ITER_CHUNK_SIZE = CHUNK_SIZE
# Pull into this namespace for backwards compatibility
EmptyResultSet = sql.EmptyResultSet
class CyclicDependency(Exception):
pass
class CollectedObjects(object):
"""
A container that stores keys and lists of values along with
remembering the parent objects for all the keys.
This is used for the database object deletion routines so that we
can calculate the 'leaf' objects which should be deleted first.
"""
def __init__(self):
self.data = {}
self.children = {}
def add(self, model, pk, obj, parent_model, nullable=False):
"""
Adds an item.
model is the class of the object being added,
pk is the primary key, obj is the object itself,
parent_model is the model of the parent object
that this object was reached through, nullable should
be True if this relation is nullable.
If the item already existed in the structure,
returns true, otherwise false.
"""
d = self.data.setdefault(model, SortedDict())
retval = pk in d
d[pk] = obj
# Nullable relationships can be ignored -- they
# are nulled out before deleting, and therefore
# do not affect the order in which objects have
# to be deleted.
if parent_model is not None and not nullable:
self.children.setdefault(parent_model, []).append(model)
return retval
def __contains__(self, key):
return self.data.__contains__(key)
def __getitem__(self, key):
return self.data[key]
def __nonzero__(self):
return bool(self.data)
def iteritems(self):
for k in self.ordered_keys():
yield k, self[k]
def items(self):
return list(self.iteritems())
def keys(self):
return self.ordered_keys()
def ordered_keys(self):
"""
Returns the models in the order that they should be
dealth with i.e. models with no dependencies first.
"""
dealt_with = SortedDict()
# Start with items that have no children
models = self.data.keys()
while len(dealt_with) < len(models):
found = False
for model in models:
children = self.children.setdefault(model, [])
if len([c for c in children if c not in dealt_with]) == 0:
dealt_with[model] = None
found = True
if not found:
raise CyclicDependency("There is a cyclic dependency of items to be processed.")
return dealt_with.keys()
def unordered_keys(self):
"""
Fallback for the case where is a cyclic dependency but we
don't care.
"""
return self.data.keys()
class QuerySet(object):
"Represents a lazy database lookup for a set of objects"
def __init__(self, model=None, query=None):
@ -275,7 +361,7 @@ class QuerySet(object):
while 1:
# Collect all the objects to be deleted in this chunk, and all the
# objects that are related to the objects that are to be deleted.
seen_objs = SortedDict()
seen_objs = CollectedObjects()
for object in del_query[:CHUNK_SIZE]:
object._collect_sub_objects(seen_objs)
@ -682,19 +768,27 @@ def delete_objects(seen_objs):
Iterate through a list of seen classes, and remove any instances that are
referred to.
"""
ordered_classes = seen_objs.keys()
ordered_classes.reverse()
try:
ordered_classes = seen_objs.keys()
except CyclicDependency:
# if there is a cyclic dependency, we cannot in general delete
# the objects. However, if an appropriate transaction is set
# up, or if the database is lax enough, it will succeed.
# So for now, we go ahead and try anway.
ordered_classes = seen_objs.unordered_keys()
obj_pairs = {}
for cls in ordered_classes:
seen_objs[cls] = seen_objs[cls].items()
seen_objs[cls].sort()
items = seen_objs[cls].items()
items.sort()
obj_pairs[cls] = items
# Pre notify all instances to be deleted
for pk_val, instance in seen_objs[cls]:
for pk_val, instance in items:
dispatcher.send(signal=signals.pre_delete, sender=cls,
instance=instance)
pk_list = [pk for pk,instance in seen_objs[cls]]
pk_list = [pk for pk,instance in items]
del_query = sql.DeleteQuery(cls, connection)
del_query.delete_batch_related(pk_list)
@ -705,15 +799,17 @@ def delete_objects(seen_objs):
# Now delete the actual data
for cls in ordered_classes:
seen_objs[cls].reverse()
pk_list = [pk for pk,instance in seen_objs[cls]]
items = obj_pairs[cls]
items.reverse()
pk_list = [pk for pk,instance in items]
del_query = sql.DeleteQuery(cls, connection)
del_query.delete_batch(pk_list)
# Last cleanup; set NULLs where there once was a reference to the
# object, NULL the primary key of the found objects, and perform
# post-notification.
for pk_val, instance in seen_objs[cls]:
for pk_val, instance in items:
for field in cls._meta.fields:
if field.rel and field.null and field.rel.to in seen_objs:
setattr(instance, field.attname, None)

View File

@ -33,8 +33,46 @@ class D(DefaultRepr, models.Model):
# However, if we start at As, we might find Bs first (in which
# case things will be nice), or find Ds first.
# Some mutually dependent models, but nullable
class E(DefaultRepr, models.Model):
f = models.ForeignKey('F', null=True, related_name='e_rel')
class F(DefaultRepr, models.Model):
e = models.ForeignKey(E, related_name='f_rel')
__test__ = {'API_TESTS': """
# First, some tests for the datastructure we use
>>> from django.db.models.query import CollectedObjects
>>> g = CollectedObjects()
>>> g.add("key1", 1, "item1", None)
False
>>> g["key1"]
{1: 'item1'}
>>> g.add("key2", 1, "item1", "key1")
False
>>> g.add("key2", 2, "item2", "key1")
False
>>> g["key2"]
{1: 'item1', 2: 'item2'}
>>> g.add("key3", 1, "item1", "key1")
False
>>> g.add("key3", 1, "item1", "key2")
True
>>> g.ordered_keys()
['key3', 'key2', 'key1']
>>> g.add("key2", 1, "item1", "key3")
True
>>> g.ordered_keys()
Traceback (most recent call last):
...
CyclicDependency: There is a cyclic dependency of items to be processed.
# Due to the way that transactions work in the test harness,
# doing m.delete() here can work but fail in a real situation,
# since it may delete all objects, but not in the right order.
@ -42,11 +80,10 @@ __test__ = {'API_TESTS': """
# Also, it is possible that the order is correct 'accidentally', due
# solely to order of imports etc. To check this, we set the order
# that 'get_models()' will retrieve to a known 'tricky' order, and
# then try again with the reverse and try again. Slightly naughty
# access to internals here.
# that 'get_models()' will retrieve to a known 'nice' order, and
# then try again with a known 'tricky' order. Slightly naughty
# access to internals here :-)
>>> from django.utils.datastructures import SortedDict
>>> from django.db.models.loading import cache
# Nice order
@ -56,8 +93,6 @@ __test__ = {'API_TESTS': """
>>> del C._meta._related_objects_cache
>>> del D._meta._related_objects_cache
>>> a1 = A()
>>> a1.save()
>>> b1 = B(a=a1)
@ -67,9 +102,9 @@ __test__ = {'API_TESTS': """
>>> d1 = D(c=c1, a=a1)
>>> d1.save()
>>> sd = SortedDict()
>>> a1._collect_sub_objects(sd)
>>> list(reversed(sd.keys()))
>>> o = CollectedObjects()
>>> a1._collect_sub_objects(o)
>>> o.keys()
[<class 'modeltests.delete.models.D'>, <class 'modeltests.delete.models.C'>, <class 'modeltests.delete.models.B'>, <class 'modeltests.delete.models.A'>]
>>> a1.delete()
@ -80,7 +115,6 @@ __test__ = {'API_TESTS': """
>>> del C._meta._related_objects_cache
>>> del D._meta._related_objects_cache
>>> a2 = A()
>>> a2.save()
>>> b2 = B(a=a2)
@ -90,13 +124,56 @@ __test__ = {'API_TESTS': """
>>> d2 = D(c=c2, a=a2)
>>> d2.save()
>>> sd2 = SortedDict()
>>> a2._collect_sub_objects(sd2)
>>> list(reversed(sd2.keys()))
>>> o = CollectedObjects()
>>> a2._collect_sub_objects(o)
>>> o.keys()
[<class 'modeltests.delete.models.D'>, <class 'modeltests.delete.models.C'>, <class 'modeltests.delete.models.B'>, <class 'modeltests.delete.models.A'>]
>>> a2.delete()
# Tests for nullable related fields
>>> g = CollectedObjects()
>>> g.add("key1", 1, "item1", None)
False
>>> g.add("key2", 1, "item1", "key1", nullable=True)
False
>>> g.add("key1", 1, "item1", "key2")
True
>>> g.ordered_keys()
['key1', 'key2']
>>> e1 = E()
>>> e1.save()
>>> f1 = F(e=e1)
>>> f1.save()
>>> e1.f = f1
>>> e1.save()
# Since E.f is nullable, we should delete F first (after nulling out
# the E.f field), then E.
>>> o = CollectedObjects()
>>> e1._collect_sub_objects(o)
>>> o.keys()
[<class 'modeltests.delete.models.F'>, <class 'modeltests.delete.models.E'>]
>>> e1.delete()
>>> e2 = E()
>>> e2.save()
>>> f2 = F(e=e2)
>>> f2.save()
>>> e2.f = f2
>>> e2.save()
# Same deal as before, though we are starting from the other object.
>>> o = CollectedObjects()
>>> f2._collect_sub_objects(o)
>>> o.keys()
[<class 'modeltests.delete.models.F'>, <class 'modeltests.delete.models.E'>]
>>> f2.delete()
"""
}