Fixed #18676 -- Allow fast-path deletion of objects
Objects can be fast-path deleted if there are no signals, and there are no further cascades. If fast-path is taken, the objects do not need to be loaded into memory before deletion. Thanks to Jeremy Dunck, Simon Charette and Alex Gaynor for reviewing the patch.
This commit is contained in:
parent
3fcca0e947
commit
1cd6e04cd4
|
@ -191,6 +191,13 @@ class NestedObjects(Collector):
|
|||
roots.extend(self._nested(root, seen, format_callback))
|
||||
return roots
|
||||
|
||||
def can_fast_delete(self, *args, **kwargs):
|
||||
"""
|
||||
We always want to load the objects into memory so that we can display
|
||||
them to the user in confirm page.
|
||||
"""
|
||||
return False
|
||||
|
||||
|
||||
def model_format_dict(obj):
|
||||
"""
|
||||
|
|
|
@ -77,6 +77,9 @@ class Collector(object):
|
|||
self.data = {}
|
||||
self.batches = {} # {model: {field: set([instances])}}
|
||||
self.field_updates = {} # {model: {(field, value): set([instances])}}
|
||||
# fast_deletes is a list of queryset-likes that can be deleted without
|
||||
# fetching the objects into memory.
|
||||
self.fast_deletes = []
|
||||
|
||||
# Tracks deletion-order dependency for databases without transactions
|
||||
# or ability to defer constraint checks. Only concrete model classes
|
||||
|
@ -131,6 +134,43 @@ class Collector(object):
|
|||
model, {}).setdefault(
|
||||
(field, value), set()).update(objs)
|
||||
|
||||
def can_fast_delete(self, objs, from_field=None):
|
||||
"""
|
||||
Determines if the objects in the given queryset-like can be
|
||||
fast-deleted. This can be done if there are no cascades, no
|
||||
parents and no signal listeners for the object class.
|
||||
|
||||
The 'from_field' tells where we are coming from - we need this to
|
||||
determine if the objects are in fact to be deleted. Allows also
|
||||
skipping parent -> child -> parent chain preventing fast delete of
|
||||
the child.
|
||||
"""
|
||||
if from_field and from_field.rel.on_delete is not CASCADE:
|
||||
return False
|
||||
if not (hasattr(objs, 'model') and hasattr(objs, '_raw_delete')):
|
||||
return False
|
||||
model = objs.model
|
||||
if (signals.pre_delete.has_listeners(model)
|
||||
or signals.post_delete.has_listeners(model)
|
||||
or signals.m2m_changed.has_listeners(model)):
|
||||
return False
|
||||
# The use of from_field comes from the need to avoid cascade back to
|
||||
# parent when parent delete is cascading to child.
|
||||
opts = model._meta
|
||||
if any(link != from_field for link in opts.concrete_model._meta.parents.values()):
|
||||
return False
|
||||
# Foreign keys pointing to this model, both from m2m and other
|
||||
# models.
|
||||
for related in opts.get_all_related_objects(
|
||||
include_hidden=True, include_proxy_eq=True):
|
||||
if related.field.rel.on_delete is not DO_NOTHING:
|
||||
return False
|
||||
# GFK deletes
|
||||
for relation in opts.many_to_many:
|
||||
if not relation.rel.through:
|
||||
return False
|
||||
return True
|
||||
|
||||
def collect(self, objs, source=None, nullable=False, collect_related=True,
|
||||
source_attr=None, reverse_dependency=False):
|
||||
"""
|
||||
|
@ -148,6 +188,9 @@ class Collector(object):
|
|||
models, the one case in which the cascade follows the forwards
|
||||
direction of an FK rather than the reverse direction.)
|
||||
"""
|
||||
if self.can_fast_delete(objs):
|
||||
self.fast_deletes.append(objs)
|
||||
return
|
||||
new_objs = self.add(objs, source, nullable,
|
||||
reverse_dependency=reverse_dependency)
|
||||
if not new_objs:
|
||||
|
@ -160,6 +203,10 @@ class Collector(object):
|
|||
concrete_model = model._meta.concrete_model
|
||||
for ptr in six.itervalues(concrete_model._meta.parents):
|
||||
if ptr:
|
||||
# FIXME: This seems to be buggy and execute a query for each
|
||||
# parent object fetch. We have the parent data in the obj,
|
||||
# but we don't have a nice way to turn that data into parent
|
||||
# object instance.
|
||||
parent_objs = [getattr(obj, ptr.name) for obj in new_objs]
|
||||
self.collect(parent_objs, source=model,
|
||||
source_attr=ptr.rel.related_name,
|
||||
|
@ -170,12 +217,12 @@ class Collector(object):
|
|||
for related in model._meta.get_all_related_objects(
|
||||
include_hidden=True, include_proxy_eq=True):
|
||||
field = related.field
|
||||
if related.model._meta.auto_created:
|
||||
self.add_batch(related.model, field, new_objs)
|
||||
else:
|
||||
sub_objs = self.related_objects(related, new_objs)
|
||||
if not sub_objs:
|
||||
continue
|
||||
if field.rel.on_delete == DO_NOTHING:
|
||||
continue
|
||||
sub_objs = self.related_objects(related, new_objs)
|
||||
if self.can_fast_delete(sub_objs, from_field=field):
|
||||
self.fast_deletes.append(sub_objs)
|
||||
elif sub_objs:
|
||||
field.rel.on_delete(self, field, sub_objs, self.using)
|
||||
|
||||
# TODO This entire block is only needed as a special case to
|
||||
|
@ -241,6 +288,10 @@ class Collector(object):
|
|||
sender=model, instance=obj, using=self.using
|
||||
)
|
||||
|
||||
# fast deletes
|
||||
for qs in self.fast_deletes:
|
||||
qs._raw_delete(using=self.using)
|
||||
|
||||
# update fields
|
||||
for model, instances_for_fieldvalues in six.iteritems(self.field_updates):
|
||||
query = sql.UpdateQuery(model)
|
||||
|
|
|
@ -529,6 +529,14 @@ class QuerySet(object):
|
|||
self._result_cache = None
|
||||
delete.alters_data = True
|
||||
|
||||
def _raw_delete(self, using):
|
||||
"""
|
||||
Deletes objects found from the given queryset in single direct SQL
|
||||
query. No signals are sent, and there is no protection for cascades.
|
||||
"""
|
||||
sql.DeleteQuery(self.model).delete_qs(self, using)
|
||||
_raw_delete.alters_data = True
|
||||
|
||||
def update(self, **kwargs):
|
||||
"""
|
||||
Updates all elements in the current QuerySet, setting all the given
|
||||
|
|
|
@ -934,7 +934,8 @@ class SQLDeleteCompiler(SQLCompiler):
|
|||
qn = self.quote_name_unless_alias
|
||||
result = ['DELETE FROM %s' % qn(self.query.tables[0])]
|
||||
where, params = self.query.where.as_sql(qn=qn, connection=self.connection)
|
||||
result.append('WHERE %s' % where)
|
||||
if where:
|
||||
result.append('WHERE %s' % where)
|
||||
return ' '.join(result), tuple(params)
|
||||
|
||||
class SQLUpdateCompiler(SQLCompiler):
|
||||
|
|
|
@ -3,6 +3,7 @@ Query subclasses which provide extra functionality beyond simple data retrieval.
|
|||
"""
|
||||
|
||||
from django.core.exceptions import FieldError
|
||||
from django.db import connections
|
||||
from django.db.models.constants import LOOKUP_SEP
|
||||
from django.db.models.fields import DateField, FieldDoesNotExist
|
||||
from django.db.models.sql.constants import *
|
||||
|
@ -46,6 +47,37 @@ class DeleteQuery(Query):
|
|||
pk_list[offset:offset + GET_ITERATOR_CHUNK_SIZE]), AND)
|
||||
self.do_query(self.model._meta.db_table, where, using=using)
|
||||
|
||||
def delete_qs(self, query, using):
|
||||
innerq = query.query
|
||||
# Make sure the inner query has at least one table in use.
|
||||
innerq.get_initial_alias()
|
||||
# The same for our new query.
|
||||
self.get_initial_alias()
|
||||
innerq_used_tables = [t for t in innerq.tables
|
||||
if innerq.alias_refcount[t]]
|
||||
if ((not innerq_used_tables or innerq_used_tables == self.tables)
|
||||
and not len(innerq.having)):
|
||||
# There is only the base table in use in the query, and there are
|
||||
# no aggregate filtering going on.
|
||||
self.where = innerq.where
|
||||
else:
|
||||
pk = query.model._meta.pk
|
||||
if not connections[using].features.update_can_self_select:
|
||||
# We can't do the delete using subquery.
|
||||
values = list(query.values_list('pk', flat=True))
|
||||
if not values:
|
||||
return
|
||||
self.delete_batch(values, using)
|
||||
return
|
||||
else:
|
||||
values = innerq
|
||||
innerq.select = [(self.get_initial_alias(), pk.column)]
|
||||
where = self.where_class()
|
||||
where.add((Constraint(None, pk.column, pk), 'in', values), AND)
|
||||
self.where = where
|
||||
self.get_compiler(using).execute_sql(None)
|
||||
|
||||
|
||||
class UpdateQuery(Query):
|
||||
"""
|
||||
Represents an "update" SQL query.
|
||||
|
|
|
@ -1667,6 +1667,21 @@ methods on your models. It does, however, emit the
|
|||
:data:`~django.db.models.signals.post_delete` signals for all deleted objects
|
||||
(including cascaded deletions).
|
||||
|
||||
.. versionadded:: 1.5
|
||||
Allow fast-path deletion of objects
|
||||
|
||||
Django needs to fetch objects into memory to send signals and handle cascades.
|
||||
However, if there are no cascades and no signals, then Django may take a
|
||||
fast-path and delete objects without fetching into memory. For large
|
||||
deletes this can result in significantly reduced memory usage. The amount of
|
||||
executed queries can be reduced, too.
|
||||
|
||||
ForeignKeys which are set to :attr:`~django.db.models.ForeignKey.on_delete`
|
||||
DO_NOTHING do not prevent taking the fast-path in deletion.
|
||||
|
||||
Note that the queries generated in object deletion is an implementation
|
||||
detail subject to change.
|
||||
|
||||
.. _field-lookups:
|
||||
|
||||
Field lookups
|
||||
|
|
|
@ -149,6 +149,12 @@ Django 1.5 also includes several smaller improvements worth noting:
|
|||
* Django now provides a mod_wsgi :doc:`auth handler
|
||||
</howto/deployment/wsgi/apache-auth>`
|
||||
|
||||
* The :meth:`QuerySet.delete() <django.db.models.query.QuerySet.delete>`
|
||||
and :meth:`Model.delete() <django.db.models.Model.delete()>` can now take
|
||||
fast-path in some cases. The fast-path allows for less queries and less
|
||||
objects fetched into memory. See :meth:`QuerySet.delete()
|
||||
<django.db.models.query.QuerySet.delete>` for details.
|
||||
|
||||
Backwards incompatible changes in 1.5
|
||||
=====================================
|
||||
|
||||
|
|
|
@ -95,7 +95,7 @@ class MRNull(models.Model):
|
|||
|
||||
|
||||
class Avatar(models.Model):
|
||||
pass
|
||||
desc = models.TextField(null=True)
|
||||
|
||||
|
||||
class User(models.Model):
|
||||
|
@ -108,3 +108,21 @@ class HiddenUser(models.Model):
|
|||
|
||||
class HiddenUserProfile(models.Model):
|
||||
user = models.ForeignKey(HiddenUser)
|
||||
|
||||
class M2MTo(models.Model):
|
||||
pass
|
||||
|
||||
class M2MFrom(models.Model):
|
||||
m2m = models.ManyToManyField(M2MTo)
|
||||
|
||||
class Parent(models.Model):
|
||||
pass
|
||||
|
||||
class Child(Parent):
|
||||
pass
|
||||
|
||||
class Base(models.Model):
|
||||
pass
|
||||
|
||||
class RelToBase(models.Model):
|
||||
base = models.ForeignKey(Base, on_delete=models.DO_NOTHING)
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from django.db import models, IntegrityError
|
||||
from django.db import models, IntegrityError, connection
|
||||
from django.test import TestCase, skipUnlessDBFeature, skipIfDBFeature
|
||||
from django.utils.six.moves import xrange
|
||||
|
||||
from .models import (R, RChild, S, T, U, A, M, MR, MRNull,
|
||||
create_a, get_default_r, User, Avatar, HiddenUser, HiddenUserProfile)
|
||||
create_a, get_default_r, User, Avatar, HiddenUser, HiddenUserProfile,
|
||||
M2MTo, M2MFrom, Parent, Child, Base)
|
||||
|
||||
|
||||
class OnDeleteTests(TestCase):
|
||||
|
@ -74,6 +75,16 @@ class OnDeleteTests(TestCase):
|
|||
self.assertEqual(replacement_r, a.donothing)
|
||||
models.signals.pre_delete.disconnect(check_do_nothing)
|
||||
|
||||
def test_do_nothing_qscount(self):
|
||||
"""
|
||||
Test that a models.DO_NOTHING relation doesn't trigger a query.
|
||||
"""
|
||||
b = Base.objects.create()
|
||||
with self.assertNumQueries(1):
|
||||
# RelToBase should not be queried.
|
||||
b.delete()
|
||||
self.assertEqual(Base.objects.count(), 0)
|
||||
|
||||
def test_inheritance_cascade_up(self):
|
||||
child = RChild.objects.create()
|
||||
child.delete()
|
||||
|
@ -229,16 +240,34 @@ class DeletionTests(TestCase):
|
|||
# 1 query to delete the avatar
|
||||
# The important thing is that when we can defer constraint checks there
|
||||
# is no need to do an UPDATE on User.avatar to null it out.
|
||||
|
||||
# Attach a signal to make sure we will not do fast_deletes.
|
||||
calls = []
|
||||
def noop(*args, **kwargs):
|
||||
calls.append('')
|
||||
models.signals.post_delete.connect(noop, sender=User)
|
||||
|
||||
self.assertNumQueries(3, a.delete)
|
||||
self.assertFalse(User.objects.exists())
|
||||
self.assertFalse(Avatar.objects.exists())
|
||||
self.assertEquals(len(calls), 1)
|
||||
models.signals.post_delete.disconnect(noop, sender=User)
|
||||
|
||||
@skipIfDBFeature("can_defer_constraint_checks")
|
||||
def test_cannot_defer_constraint_checks(self):
|
||||
u = User.objects.create(
|
||||
avatar=Avatar.objects.create()
|
||||
)
|
||||
# Attach a signal to make sure we will not do fast_deletes.
|
||||
calls = []
|
||||
def noop(*args, **kwargs):
|
||||
calls.append('')
|
||||
models.signals.post_delete.connect(noop, sender=User)
|
||||
|
||||
a = Avatar.objects.get(pk=u.avatar_id)
|
||||
# The below doesn't make sense... Why do we need to null out
|
||||
# user.avatar if we are going to delete the user immediately after it,
|
||||
# and there are no more cascades.
|
||||
# 1 query to find the users for the avatar.
|
||||
# 1 query to delete the user
|
||||
# 1 query to null out user.avatar, because we can't defer the constraint
|
||||
|
@ -246,6 +275,8 @@ class DeletionTests(TestCase):
|
|||
self.assertNumQueries(4, a.delete)
|
||||
self.assertFalse(User.objects.exists())
|
||||
self.assertFalse(Avatar.objects.exists())
|
||||
self.assertEquals(len(calls), 1)
|
||||
models.signals.post_delete.disconnect(noop, sender=User)
|
||||
|
||||
def test_hidden_related(self):
|
||||
r = R.objects.create()
|
||||
|
@ -254,3 +285,69 @@ class DeletionTests(TestCase):
|
|||
|
||||
r.delete()
|
||||
self.assertEqual(HiddenUserProfile.objects.count(), 0)
|
||||
|
||||
class FastDeleteTests(TestCase):
|
||||
|
||||
def test_fast_delete_fk(self):
|
||||
u = User.objects.create(
|
||||
avatar=Avatar.objects.create()
|
||||
)
|
||||
a = Avatar.objects.get(pk=u.avatar_id)
|
||||
# 1 query to fast-delete the user
|
||||
# 1 query to delete the avatar
|
||||
self.assertNumQueries(2, a.delete)
|
||||
self.assertFalse(User.objects.exists())
|
||||
self.assertFalse(Avatar.objects.exists())
|
||||
|
||||
def test_fast_delete_m2m(self):
|
||||
t = M2MTo.objects.create()
|
||||
f = M2MFrom.objects.create()
|
||||
f.m2m.add(t)
|
||||
# 1 to delete f, 1 to fast-delete m2m for f
|
||||
self.assertNumQueries(2, f.delete)
|
||||
|
||||
def test_fast_delete_revm2m(self):
|
||||
t = M2MTo.objects.create()
|
||||
f = M2MFrom.objects.create()
|
||||
f.m2m.add(t)
|
||||
# 1 to delete t, 1 to fast-delete t's m_set
|
||||
self.assertNumQueries(2, f.delete)
|
||||
|
||||
def test_fast_delete_qs(self):
|
||||
u1 = User.objects.create()
|
||||
u2 = User.objects.create()
|
||||
self.assertNumQueries(1, User.objects.filter(pk=u1.pk).delete)
|
||||
self.assertEquals(User.objects.count(), 1)
|
||||
self.assertTrue(User.objects.filter(pk=u2.pk).exists())
|
||||
|
||||
def test_fast_delete_joined_qs(self):
|
||||
a = Avatar.objects.create(desc='a')
|
||||
User.objects.create(avatar=a)
|
||||
u2 = User.objects.create()
|
||||
expected_queries = 1 if connection.features.update_can_self_select else 2
|
||||
self.assertNumQueries(expected_queries,
|
||||
User.objects.filter(avatar__desc='a').delete)
|
||||
self.assertEquals(User.objects.count(), 1)
|
||||
self.assertTrue(User.objects.filter(pk=u2.pk).exists())
|
||||
|
||||
def test_fast_delete_inheritance(self):
|
||||
c = Child.objects.create()
|
||||
p = Parent.objects.create()
|
||||
# 1 for self, 1 for parent
|
||||
# However, this doesn't work as child.parent access creates a query,
|
||||
# and this means we will be generating extra queries (a lot for large
|
||||
# querysets). This is not a fast-delete problem.
|
||||
# self.assertNumQueries(2, c.delete)
|
||||
c.delete()
|
||||
self.assertFalse(Child.objects.exists())
|
||||
self.assertEquals(Parent.objects.count(), 1)
|
||||
self.assertEquals(Parent.objects.filter(pk=p.pk).count(), 1)
|
||||
# 1 for self delete, 1 for fast delete of empty "child" qs.
|
||||
self.assertNumQueries(2, p.delete)
|
||||
self.assertFalse(Parent.objects.exists())
|
||||
# 1 for self delete, 1 for fast delete of empty "child" qs.
|
||||
c = Child.objects.create()
|
||||
p = c.parent_ptr
|
||||
self.assertNumQueries(2, p.delete)
|
||||
self.assertFalse(Parent.objects.exists())
|
||||
self.assertFalse(Child.objects.exists())
|
||||
|
|
|
@ -39,3 +39,6 @@ class Guest(models.Model):
|
|||
|
||||
class Meta:
|
||||
verbose_name = "awesome guest"
|
||||
|
||||
class EventGuide(models.Model):
|
||||
event = models.ForeignKey(Event, on_delete=models.DO_NOTHING)
|
||||
|
|
|
@ -17,7 +17,7 @@ from django.utils.formats import localize
|
|||
from django.utils.safestring import mark_safe
|
||||
from django.utils import six
|
||||
|
||||
from .models import Article, Count, Event, Location
|
||||
from .models import Article, Count, Event, Location, EventGuide
|
||||
|
||||
|
||||
class NestedObjectsTests(TestCase):
|
||||
|
@ -71,6 +71,17 @@ class NestedObjectsTests(TestCase):
|
|||
# Should not require additional queries to populate the nested graph.
|
||||
self.assertNumQueries(2, self._collect, 0)
|
||||
|
||||
def test_on_delete_do_nothing(self):
|
||||
"""
|
||||
Check that the nested collector doesn't query for DO_NOTHING objects.
|
||||
"""
|
||||
n = NestedObjects(using=DEFAULT_DB_ALIAS)
|
||||
objs = [Event.objects.create()]
|
||||
EventGuide.objects.create(event=objs[0])
|
||||
with self.assertNumQueries(2):
|
||||
# One for Location, one for Guest, and no query for EventGuide
|
||||
n.collect(objs)
|
||||
|
||||
class UtilTests(unittest.TestCase):
|
||||
def test_values_from_lookup_field(self):
|
||||
"""
|
||||
|
|
|
@ -3,7 +3,7 @@ from __future__ import absolute_import
|
|||
import datetime
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import backend, transaction, DEFAULT_DB_ALIAS
|
||||
from django.db import backend, transaction, DEFAULT_DB_ALIAS, models
|
||||
from django.test import TestCase, TransactionTestCase, skipUnlessDBFeature
|
||||
|
||||
from .models import (Book, Award, AwardNote, Person, Child, Toy, PlayedWith,
|
||||
|
@ -139,17 +139,24 @@ class DeleteCascadeTransactionTests(TransactionTestCase):
|
|||
eaten = Eaten.objects.create(food=apple, meal="lunch")
|
||||
|
||||
apple.delete()
|
||||
self.assertFalse(Food.objects.exists())
|
||||
self.assertFalse(Eaten.objects.exists())
|
||||
|
||||
|
||||
class LargeDeleteTests(TestCase):
|
||||
def test_large_deletes(self):
|
||||
"Regression for #13309 -- if the number of objects > chunk size, deletion still occurs"
|
||||
for x in range(300):
|
||||
track = Book.objects.create(pagecount=x+100)
|
||||
# attach a signal to make sure we will not fast-delete
|
||||
def noop(*args, **kwargs):
|
||||
pass
|
||||
models.signals.post_delete.connect(noop, sender=Book)
|
||||
Book.objects.all().delete()
|
||||
models.signals.post_delete.disconnect(noop, sender=Book)
|
||||
self.assertEqual(Book.objects.count(), 0)
|
||||
|
||||
|
||||
|
||||
class ProxyDeleteTest(TestCase):
|
||||
"""
|
||||
Tests on_delete behavior for proxy models.
|
||||
|
|
|
@ -127,15 +127,15 @@ class DispatcherTests(unittest.TestCase):
|
|||
self._testIsClean(a_signal)
|
||||
|
||||
def test_has_listeners(self):
|
||||
self.assertIs(a_signal.has_listeners(), False)
|
||||
self.assertIs(a_signal.has_listeners(sender=object()), False)
|
||||
self.assertFalse(a_signal.has_listeners())
|
||||
self.assertFalse(a_signal.has_listeners(sender=object()))
|
||||
receiver_1 = Callable()
|
||||
a_signal.connect(receiver_1)
|
||||
self.assertIs(a_signal.has_listeners(), True)
|
||||
self.assertIs(a_signal.has_listeners(sender=object()), True)
|
||||
self.assertTrue(a_signal.has_listeners())
|
||||
self.assertTrue(a_signal.has_listeners(sender=object()))
|
||||
a_signal.disconnect(receiver_1)
|
||||
self.assertIs(a_signal.has_listeners(), False)
|
||||
self.assertIs(a_signal.has_listeners(sender=object()), False)
|
||||
self.assertFalse(a_signal.has_listeners())
|
||||
self.assertFalse(a_signal.has_listeners(sender=object()))
|
||||
|
||||
|
||||
class ReceiverTestCase(unittest.TestCase):
|
||||
|
|
Loading…
Reference in New Issue