Fixed #18676 -- Allow fast-path deletion of objects

Objects can be fast-path deleted if there are no signals, and there are
no further cascades. If fast-path is taken, the objects do not need to
be loaded into memory before deletion.

Thanks to Jeremy Dunck, Simon Charette and Alex Gaynor for reviewing
the patch.
This commit is contained in:
Anssi Kääriäinen 2012-09-20 18:51:30 +03:00
parent 3fcca0e947
commit 1cd6e04cd4
13 changed files with 275 additions and 19 deletions

View File

@ -191,6 +191,13 @@ class NestedObjects(Collector):
roots.extend(self._nested(root, seen, format_callback))
return roots
def can_fast_delete(self, *args, **kwargs):
"""
We always want to load the objects into memory so that we can display
them to the user in confirm page.
"""
return False
def model_format_dict(obj):
"""

View File

@ -77,6 +77,9 @@ class Collector(object):
self.data = {}
self.batches = {} # {model: {field: set([instances])}}
self.field_updates = {} # {model: {(field, value): set([instances])}}
# fast_deletes is a list of queryset-likes that can be deleted without
# fetching the objects into memory.
self.fast_deletes = []
# Tracks deletion-order dependency for databases without transactions
# or ability to defer constraint checks. Only concrete model classes
@ -131,6 +134,43 @@ class Collector(object):
model, {}).setdefault(
(field, value), set()).update(objs)
def can_fast_delete(self, objs, from_field=None):
"""
Determines if the objects in the given queryset-like can be
fast-deleted. This can be done if there are no cascades, no
parents and no signal listeners for the object class.
The 'from_field' tells where we are coming from - we need this to
determine if the objects are in fact to be deleted. Allows also
skipping parent -> child -> parent chain preventing fast delete of
the child.
"""
if from_field and from_field.rel.on_delete is not CASCADE:
return False
if not (hasattr(objs, 'model') and hasattr(objs, '_raw_delete')):
return False
model = objs.model
if (signals.pre_delete.has_listeners(model)
or signals.post_delete.has_listeners(model)
or signals.m2m_changed.has_listeners(model)):
return False
# The use of from_field comes from the need to avoid cascade back to
# parent when parent delete is cascading to child.
opts = model._meta
if any(link != from_field for link in opts.concrete_model._meta.parents.values()):
return False
# Foreign keys pointing to this model, both from m2m and other
# models.
for related in opts.get_all_related_objects(
include_hidden=True, include_proxy_eq=True):
if related.field.rel.on_delete is not DO_NOTHING:
return False
# GFK deletes
for relation in opts.many_to_many:
if not relation.rel.through:
return False
return True
def collect(self, objs, source=None, nullable=False, collect_related=True,
source_attr=None, reverse_dependency=False):
"""
@ -148,6 +188,9 @@ class Collector(object):
models, the one case in which the cascade follows the forwards
direction of an FK rather than the reverse direction.)
"""
if self.can_fast_delete(objs):
self.fast_deletes.append(objs)
return
new_objs = self.add(objs, source, nullable,
reverse_dependency=reverse_dependency)
if not new_objs:
@ -160,6 +203,10 @@ class Collector(object):
concrete_model = model._meta.concrete_model
for ptr in six.itervalues(concrete_model._meta.parents):
if ptr:
# FIXME: This seems to be buggy and execute a query for each
# parent object fetch. We have the parent data in the obj,
# but we don't have a nice way to turn that data into parent
# object instance.
parent_objs = [getattr(obj, ptr.name) for obj in new_objs]
self.collect(parent_objs, source=model,
source_attr=ptr.rel.related_name,
@ -170,12 +217,12 @@ class Collector(object):
for related in model._meta.get_all_related_objects(
include_hidden=True, include_proxy_eq=True):
field = related.field
if related.model._meta.auto_created:
self.add_batch(related.model, field, new_objs)
else:
sub_objs = self.related_objects(related, new_objs)
if not sub_objs:
continue
if field.rel.on_delete == DO_NOTHING:
continue
sub_objs = self.related_objects(related, new_objs)
if self.can_fast_delete(sub_objs, from_field=field):
self.fast_deletes.append(sub_objs)
elif sub_objs:
field.rel.on_delete(self, field, sub_objs, self.using)
# TODO This entire block is only needed as a special case to
@ -241,6 +288,10 @@ class Collector(object):
sender=model, instance=obj, using=self.using
)
# fast deletes
for qs in self.fast_deletes:
qs._raw_delete(using=self.using)
# update fields
for model, instances_for_fieldvalues in six.iteritems(self.field_updates):
query = sql.UpdateQuery(model)

View File

@ -529,6 +529,14 @@ class QuerySet(object):
self._result_cache = None
delete.alters_data = True
def _raw_delete(self, using):
"""
Deletes objects found from the given queryset in single direct SQL
query. No signals are sent, and there is no protection for cascades.
"""
sql.DeleteQuery(self.model).delete_qs(self, using)
_raw_delete.alters_data = True
def update(self, **kwargs):
"""
Updates all elements in the current QuerySet, setting all the given

View File

@ -934,7 +934,8 @@ class SQLDeleteCompiler(SQLCompiler):
qn = self.quote_name_unless_alias
result = ['DELETE FROM %s' % qn(self.query.tables[0])]
where, params = self.query.where.as_sql(qn=qn, connection=self.connection)
result.append('WHERE %s' % where)
if where:
result.append('WHERE %s' % where)
return ' '.join(result), tuple(params)
class SQLUpdateCompiler(SQLCompiler):

View File

@ -3,6 +3,7 @@ Query subclasses which provide extra functionality beyond simple data retrieval.
"""
from django.core.exceptions import FieldError
from django.db import connections
from django.db.models.constants import LOOKUP_SEP
from django.db.models.fields import DateField, FieldDoesNotExist
from django.db.models.sql.constants import *
@ -46,6 +47,37 @@ class DeleteQuery(Query):
pk_list[offset:offset + GET_ITERATOR_CHUNK_SIZE]), AND)
self.do_query(self.model._meta.db_table, where, using=using)
def delete_qs(self, query, using):
innerq = query.query
# Make sure the inner query has at least one table in use.
innerq.get_initial_alias()
# The same for our new query.
self.get_initial_alias()
innerq_used_tables = [t for t in innerq.tables
if innerq.alias_refcount[t]]
if ((not innerq_used_tables or innerq_used_tables == self.tables)
and not len(innerq.having)):
# There is only the base table in use in the query, and there are
# no aggregate filtering going on.
self.where = innerq.where
else:
pk = query.model._meta.pk
if not connections[using].features.update_can_self_select:
# We can't do the delete using subquery.
values = list(query.values_list('pk', flat=True))
if not values:
return
self.delete_batch(values, using)
return
else:
values = innerq
innerq.select = [(self.get_initial_alias(), pk.column)]
where = self.where_class()
where.add((Constraint(None, pk.column, pk), 'in', values), AND)
self.where = where
self.get_compiler(using).execute_sql(None)
class UpdateQuery(Query):
"""
Represents an "update" SQL query.

View File

@ -1667,6 +1667,21 @@ methods on your models. It does, however, emit the
:data:`~django.db.models.signals.post_delete` signals for all deleted objects
(including cascaded deletions).
.. versionadded:: 1.5
Allow fast-path deletion of objects
Django needs to fetch objects into memory to send signals and handle cascades.
However, if there are no cascades and no signals, then Django may take a
fast-path and delete objects without fetching into memory. For large
deletes this can result in significantly reduced memory usage. The amount of
executed queries can be reduced, too.
ForeignKeys which are set to :attr:`~django.db.models.ForeignKey.on_delete`
DO_NOTHING do not prevent taking the fast-path in deletion.
Note that the queries generated in object deletion is an implementation
detail subject to change.
.. _field-lookups:
Field lookups

View File

@ -149,6 +149,12 @@ Django 1.5 also includes several smaller improvements worth noting:
* Django now provides a mod_wsgi :doc:`auth handler
</howto/deployment/wsgi/apache-auth>`
* The :meth:`QuerySet.delete() <django.db.models.query.QuerySet.delete>`
and :meth:`Model.delete() <django.db.models.Model.delete()>` can now take
fast-path in some cases. The fast-path allows for less queries and less
objects fetched into memory. See :meth:`QuerySet.delete()
<django.db.models.query.QuerySet.delete>` for details.
Backwards incompatible changes in 1.5
=====================================

View File

@ -95,7 +95,7 @@ class MRNull(models.Model):
class Avatar(models.Model):
pass
desc = models.TextField(null=True)
class User(models.Model):
@ -108,3 +108,21 @@ class HiddenUser(models.Model):
class HiddenUserProfile(models.Model):
user = models.ForeignKey(HiddenUser)
class M2MTo(models.Model):
pass
class M2MFrom(models.Model):
m2m = models.ManyToManyField(M2MTo)
class Parent(models.Model):
pass
class Child(Parent):
pass
class Base(models.Model):
pass
class RelToBase(models.Model):
base = models.ForeignKey(Base, on_delete=models.DO_NOTHING)

View File

@ -1,11 +1,12 @@
from __future__ import absolute_import
from django.db import models, IntegrityError
from django.db import models, IntegrityError, connection
from django.test import TestCase, skipUnlessDBFeature, skipIfDBFeature
from django.utils.six.moves import xrange
from .models import (R, RChild, S, T, U, A, M, MR, MRNull,
create_a, get_default_r, User, Avatar, HiddenUser, HiddenUserProfile)
create_a, get_default_r, User, Avatar, HiddenUser, HiddenUserProfile,
M2MTo, M2MFrom, Parent, Child, Base)
class OnDeleteTests(TestCase):
@ -74,6 +75,16 @@ class OnDeleteTests(TestCase):
self.assertEqual(replacement_r, a.donothing)
models.signals.pre_delete.disconnect(check_do_nothing)
def test_do_nothing_qscount(self):
"""
Test that a models.DO_NOTHING relation doesn't trigger a query.
"""
b = Base.objects.create()
with self.assertNumQueries(1):
# RelToBase should not be queried.
b.delete()
self.assertEqual(Base.objects.count(), 0)
def test_inheritance_cascade_up(self):
child = RChild.objects.create()
child.delete()
@ -229,16 +240,34 @@ class DeletionTests(TestCase):
# 1 query to delete the avatar
# The important thing is that when we can defer constraint checks there
# is no need to do an UPDATE on User.avatar to null it out.
# Attach a signal to make sure we will not do fast_deletes.
calls = []
def noop(*args, **kwargs):
calls.append('')
models.signals.post_delete.connect(noop, sender=User)
self.assertNumQueries(3, a.delete)
self.assertFalse(User.objects.exists())
self.assertFalse(Avatar.objects.exists())
self.assertEquals(len(calls), 1)
models.signals.post_delete.disconnect(noop, sender=User)
@skipIfDBFeature("can_defer_constraint_checks")
def test_cannot_defer_constraint_checks(self):
u = User.objects.create(
avatar=Avatar.objects.create()
)
# Attach a signal to make sure we will not do fast_deletes.
calls = []
def noop(*args, **kwargs):
calls.append('')
models.signals.post_delete.connect(noop, sender=User)
a = Avatar.objects.get(pk=u.avatar_id)
# The below doesn't make sense... Why do we need to null out
# user.avatar if we are going to delete the user immediately after it,
# and there are no more cascades.
# 1 query to find the users for the avatar.
# 1 query to delete the user
# 1 query to null out user.avatar, because we can't defer the constraint
@ -246,6 +275,8 @@ class DeletionTests(TestCase):
self.assertNumQueries(4, a.delete)
self.assertFalse(User.objects.exists())
self.assertFalse(Avatar.objects.exists())
self.assertEquals(len(calls), 1)
models.signals.post_delete.disconnect(noop, sender=User)
def test_hidden_related(self):
r = R.objects.create()
@ -254,3 +285,69 @@ class DeletionTests(TestCase):
r.delete()
self.assertEqual(HiddenUserProfile.objects.count(), 0)
class FastDeleteTests(TestCase):
def test_fast_delete_fk(self):
u = User.objects.create(
avatar=Avatar.objects.create()
)
a = Avatar.objects.get(pk=u.avatar_id)
# 1 query to fast-delete the user
# 1 query to delete the avatar
self.assertNumQueries(2, a.delete)
self.assertFalse(User.objects.exists())
self.assertFalse(Avatar.objects.exists())
def test_fast_delete_m2m(self):
t = M2MTo.objects.create()
f = M2MFrom.objects.create()
f.m2m.add(t)
# 1 to delete f, 1 to fast-delete m2m for f
self.assertNumQueries(2, f.delete)
def test_fast_delete_revm2m(self):
t = M2MTo.objects.create()
f = M2MFrom.objects.create()
f.m2m.add(t)
# 1 to delete t, 1 to fast-delete t's m_set
self.assertNumQueries(2, f.delete)
def test_fast_delete_qs(self):
u1 = User.objects.create()
u2 = User.objects.create()
self.assertNumQueries(1, User.objects.filter(pk=u1.pk).delete)
self.assertEquals(User.objects.count(), 1)
self.assertTrue(User.objects.filter(pk=u2.pk).exists())
def test_fast_delete_joined_qs(self):
a = Avatar.objects.create(desc='a')
User.objects.create(avatar=a)
u2 = User.objects.create()
expected_queries = 1 if connection.features.update_can_self_select else 2
self.assertNumQueries(expected_queries,
User.objects.filter(avatar__desc='a').delete)
self.assertEquals(User.objects.count(), 1)
self.assertTrue(User.objects.filter(pk=u2.pk).exists())
def test_fast_delete_inheritance(self):
c = Child.objects.create()
p = Parent.objects.create()
# 1 for self, 1 for parent
# However, this doesn't work as child.parent access creates a query,
# and this means we will be generating extra queries (a lot for large
# querysets). This is not a fast-delete problem.
# self.assertNumQueries(2, c.delete)
c.delete()
self.assertFalse(Child.objects.exists())
self.assertEquals(Parent.objects.count(), 1)
self.assertEquals(Parent.objects.filter(pk=p.pk).count(), 1)
# 1 for self delete, 1 for fast delete of empty "child" qs.
self.assertNumQueries(2, p.delete)
self.assertFalse(Parent.objects.exists())
# 1 for self delete, 1 for fast delete of empty "child" qs.
c = Child.objects.create()
p = c.parent_ptr
self.assertNumQueries(2, p.delete)
self.assertFalse(Parent.objects.exists())
self.assertFalse(Child.objects.exists())

View File

@ -39,3 +39,6 @@ class Guest(models.Model):
class Meta:
verbose_name = "awesome guest"
class EventGuide(models.Model):
event = models.ForeignKey(Event, on_delete=models.DO_NOTHING)

View File

@ -17,7 +17,7 @@ from django.utils.formats import localize
from django.utils.safestring import mark_safe
from django.utils import six
from .models import Article, Count, Event, Location
from .models import Article, Count, Event, Location, EventGuide
class NestedObjectsTests(TestCase):
@ -71,6 +71,17 @@ class NestedObjectsTests(TestCase):
# Should not require additional queries to populate the nested graph.
self.assertNumQueries(2, self._collect, 0)
def test_on_delete_do_nothing(self):
"""
Check that the nested collector doesn't query for DO_NOTHING objects.
"""
n = NestedObjects(using=DEFAULT_DB_ALIAS)
objs = [Event.objects.create()]
EventGuide.objects.create(event=objs[0])
with self.assertNumQueries(2):
# One for Location, one for Guest, and no query for EventGuide
n.collect(objs)
class UtilTests(unittest.TestCase):
def test_values_from_lookup_field(self):
"""

View File

@ -3,7 +3,7 @@ from __future__ import absolute_import
import datetime
from django.conf import settings
from django.db import backend, transaction, DEFAULT_DB_ALIAS
from django.db import backend, transaction, DEFAULT_DB_ALIAS, models
from django.test import TestCase, TransactionTestCase, skipUnlessDBFeature
from .models import (Book, Award, AwardNote, Person, Child, Toy, PlayedWith,
@ -139,17 +139,24 @@ class DeleteCascadeTransactionTests(TransactionTestCase):
eaten = Eaten.objects.create(food=apple, meal="lunch")
apple.delete()
self.assertFalse(Food.objects.exists())
self.assertFalse(Eaten.objects.exists())
class LargeDeleteTests(TestCase):
def test_large_deletes(self):
"Regression for #13309 -- if the number of objects > chunk size, deletion still occurs"
for x in range(300):
track = Book.objects.create(pagecount=x+100)
# attach a signal to make sure we will not fast-delete
def noop(*args, **kwargs):
pass
models.signals.post_delete.connect(noop, sender=Book)
Book.objects.all().delete()
models.signals.post_delete.disconnect(noop, sender=Book)
self.assertEqual(Book.objects.count(), 0)
class ProxyDeleteTest(TestCase):
"""
Tests on_delete behavior for proxy models.

View File

@ -127,15 +127,15 @@ class DispatcherTests(unittest.TestCase):
self._testIsClean(a_signal)
def test_has_listeners(self):
self.assertIs(a_signal.has_listeners(), False)
self.assertIs(a_signal.has_listeners(sender=object()), False)
self.assertFalse(a_signal.has_listeners())
self.assertFalse(a_signal.has_listeners(sender=object()))
receiver_1 = Callable()
a_signal.connect(receiver_1)
self.assertIs(a_signal.has_listeners(), True)
self.assertIs(a_signal.has_listeners(sender=object()), True)
self.assertTrue(a_signal.has_listeners())
self.assertTrue(a_signal.has_listeners(sender=object()))
a_signal.disconnect(receiver_1)
self.assertIs(a_signal.has_listeners(), False)
self.assertIs(a_signal.has_listeners(sender=object()), False)
self.assertFalse(a_signal.has_listeners())
self.assertFalse(a_signal.has_listeners(sender=object()))
class ReceiverTestCase(unittest.TestCase):