From 1cd6e04cd4f768bcd4385b75de433d497d938f82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anssi=20K=C3=A4=C3=A4ri=C3=A4inen?= Date: Thu, 20 Sep 2012 18:51:30 +0300 Subject: [PATCH] Fixed #18676 -- Allow fast-path deletion of objects Objects can be fast-path deleted if there are no signals, and there are no further cascades. If fast-path is taken, the objects do not need to be loaded into memory before deletion. Thanks to Jeremy Dunck, Simon Charette and Alex Gaynor for reviewing the patch. --- django/contrib/admin/util.py | 7 ++ django/db/models/deletion.py | 63 +++++++++-- django/db/models/query.py | 8 ++ django/db/models/sql/compiler.py | 3 +- django/db/models/sql/subqueries.py | 32 ++++++ docs/ref/models/querysets.txt | 15 +++ docs/releases/1.5.txt | 6 ++ tests/modeltests/delete/models.py | 20 +++- tests/modeltests/delete/tests.py | 101 +++++++++++++++++- tests/regressiontests/admin_util/models.py | 3 + tests/regressiontests/admin_util/tests.py | 13 ++- tests/regressiontests/delete_regress/tests.py | 11 +- .../dispatch/tests/test_dispatcher.py | 12 +-- 13 files changed, 275 insertions(+), 19 deletions(-) diff --git a/django/contrib/admin/util.py b/django/contrib/admin/util.py index 74eef2e733..a85045c515 100644 --- a/django/contrib/admin/util.py +++ b/django/contrib/admin/util.py @@ -191,6 +191,13 @@ class NestedObjects(Collector): roots.extend(self._nested(root, seen, format_callback)) return roots + def can_fast_delete(self, *args, **kwargs): + """ + We always want to load the objects into memory so that we can display + them to the user in confirm page. + """ + return False + def model_format_dict(obj): """ diff --git a/django/db/models/deletion.py b/django/db/models/deletion.py index 4449b75a81..6dff4a2882 100644 --- a/django/db/models/deletion.py +++ b/django/db/models/deletion.py @@ -77,6 +77,9 @@ class Collector(object): self.data = {} self.batches = {} # {model: {field: set([instances])}} self.field_updates = {} # {model: {(field, value): set([instances])}} + # fast_deletes is a list of queryset-likes that can be deleted without + # fetching the objects into memory. + self.fast_deletes = [] # Tracks deletion-order dependency for databases without transactions # or ability to defer constraint checks. Only concrete model classes @@ -131,6 +134,43 @@ class Collector(object): model, {}).setdefault( (field, value), set()).update(objs) + def can_fast_delete(self, objs, from_field=None): + """ + Determines if the objects in the given queryset-like can be + fast-deleted. This can be done if there are no cascades, no + parents and no signal listeners for the object class. + + The 'from_field' tells where we are coming from - we need this to + determine if the objects are in fact to be deleted. Allows also + skipping parent -> child -> parent chain preventing fast delete of + the child. + """ + if from_field and from_field.rel.on_delete is not CASCADE: + return False + if not (hasattr(objs, 'model') and hasattr(objs, '_raw_delete')): + return False + model = objs.model + if (signals.pre_delete.has_listeners(model) + or signals.post_delete.has_listeners(model) + or signals.m2m_changed.has_listeners(model)): + return False + # The use of from_field comes from the need to avoid cascade back to + # parent when parent delete is cascading to child. + opts = model._meta + if any(link != from_field for link in opts.concrete_model._meta.parents.values()): + return False + # Foreign keys pointing to this model, both from m2m and other + # models. + for related in opts.get_all_related_objects( + include_hidden=True, include_proxy_eq=True): + if related.field.rel.on_delete is not DO_NOTHING: + return False + # GFK deletes + for relation in opts.many_to_many: + if not relation.rel.through: + return False + return True + def collect(self, objs, source=None, nullable=False, collect_related=True, source_attr=None, reverse_dependency=False): """ @@ -148,6 +188,9 @@ class Collector(object): models, the one case in which the cascade follows the forwards direction of an FK rather than the reverse direction.) """ + if self.can_fast_delete(objs): + self.fast_deletes.append(objs) + return new_objs = self.add(objs, source, nullable, reverse_dependency=reverse_dependency) if not new_objs: @@ -160,6 +203,10 @@ class Collector(object): concrete_model = model._meta.concrete_model for ptr in six.itervalues(concrete_model._meta.parents): if ptr: + # FIXME: This seems to be buggy and execute a query for each + # parent object fetch. We have the parent data in the obj, + # but we don't have a nice way to turn that data into parent + # object instance. parent_objs = [getattr(obj, ptr.name) for obj in new_objs] self.collect(parent_objs, source=model, source_attr=ptr.rel.related_name, @@ -170,12 +217,12 @@ class Collector(object): for related in model._meta.get_all_related_objects( include_hidden=True, include_proxy_eq=True): field = related.field - if related.model._meta.auto_created: - self.add_batch(related.model, field, new_objs) - else: - sub_objs = self.related_objects(related, new_objs) - if not sub_objs: - continue + if field.rel.on_delete == DO_NOTHING: + continue + sub_objs = self.related_objects(related, new_objs) + if self.can_fast_delete(sub_objs, from_field=field): + self.fast_deletes.append(sub_objs) + elif sub_objs: field.rel.on_delete(self, field, sub_objs, self.using) # TODO This entire block is only needed as a special case to @@ -241,6 +288,10 @@ class Collector(object): sender=model, instance=obj, using=self.using ) + # fast deletes + for qs in self.fast_deletes: + qs._raw_delete(using=self.using) + # update fields for model, instances_for_fieldvalues in six.iteritems(self.field_updates): query = sql.UpdateQuery(model) diff --git a/django/db/models/query.py b/django/db/models/query.py index 8bf08b7a93..0210a7914d 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -529,6 +529,14 @@ class QuerySet(object): self._result_cache = None delete.alters_data = True + def _raw_delete(self, using): + """ + Deletes objects found from the given queryset in single direct SQL + query. No signals are sent, and there is no protection for cascades. + """ + sql.DeleteQuery(self.model).delete_qs(self, using) + _raw_delete.alters_data = True + def update(self, **kwargs): """ Updates all elements in the current QuerySet, setting all the given diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py index f06d6b11a4..f6b6bba1d9 100644 --- a/django/db/models/sql/compiler.py +++ b/django/db/models/sql/compiler.py @@ -934,7 +934,8 @@ class SQLDeleteCompiler(SQLCompiler): qn = self.quote_name_unless_alias result = ['DELETE FROM %s' % qn(self.query.tables[0])] where, params = self.query.where.as_sql(qn=qn, connection=self.connection) - result.append('WHERE %s' % where) + if where: + result.append('WHERE %s' % where) return ' '.join(result), tuple(params) class SQLUpdateCompiler(SQLCompiler): diff --git a/django/db/models/sql/subqueries.py b/django/db/models/sql/subqueries.py index c6995c6abb..9f3fb8ac22 100644 --- a/django/db/models/sql/subqueries.py +++ b/django/db/models/sql/subqueries.py @@ -3,6 +3,7 @@ Query subclasses which provide extra functionality beyond simple data retrieval. """ from django.core.exceptions import FieldError +from django.db import connections from django.db.models.constants import LOOKUP_SEP from django.db.models.fields import DateField, FieldDoesNotExist from django.db.models.sql.constants import * @@ -46,6 +47,37 @@ class DeleteQuery(Query): pk_list[offset:offset + GET_ITERATOR_CHUNK_SIZE]), AND) self.do_query(self.model._meta.db_table, where, using=using) + def delete_qs(self, query, using): + innerq = query.query + # Make sure the inner query has at least one table in use. + innerq.get_initial_alias() + # The same for our new query. + self.get_initial_alias() + innerq_used_tables = [t for t in innerq.tables + if innerq.alias_refcount[t]] + if ((not innerq_used_tables or innerq_used_tables == self.tables) + and not len(innerq.having)): + # There is only the base table in use in the query, and there are + # no aggregate filtering going on. + self.where = innerq.where + else: + pk = query.model._meta.pk + if not connections[using].features.update_can_self_select: + # We can't do the delete using subquery. + values = list(query.values_list('pk', flat=True)) + if not values: + return + self.delete_batch(values, using) + return + else: + values = innerq + innerq.select = [(self.get_initial_alias(), pk.column)] + where = self.where_class() + where.add((Constraint(None, pk.column, pk), 'in', values), AND) + self.where = where + self.get_compiler(using).execute_sql(None) + + class UpdateQuery(Query): """ Represents an "update" SQL query. diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt index 749a979db6..d17d869164 100644 --- a/docs/ref/models/querysets.txt +++ b/docs/ref/models/querysets.txt @@ -1667,6 +1667,21 @@ methods on your models. It does, however, emit the :data:`~django.db.models.signals.post_delete` signals for all deleted objects (including cascaded deletions). +.. versionadded:: 1.5 + Allow fast-path deletion of objects + +Django needs to fetch objects into memory to send signals and handle cascades. +However, if there are no cascades and no signals, then Django may take a +fast-path and delete objects without fetching into memory. For large +deletes this can result in significantly reduced memory usage. The amount of +executed queries can be reduced, too. + +ForeignKeys which are set to :attr:`~django.db.models.ForeignKey.on_delete` +DO_NOTHING do not prevent taking the fast-path in deletion. + +Note that the queries generated in object deletion is an implementation +detail subject to change. + .. _field-lookups: Field lookups diff --git a/docs/releases/1.5.txt b/docs/releases/1.5.txt index fddd03d421..5636a2b34b 100644 --- a/docs/releases/1.5.txt +++ b/docs/releases/1.5.txt @@ -149,6 +149,12 @@ Django 1.5 also includes several smaller improvements worth noting: * Django now provides a mod_wsgi :doc:`auth handler ` +* The :meth:`QuerySet.delete() ` + and :meth:`Model.delete() ` can now take + fast-path in some cases. The fast-path allows for less queries and less + objects fetched into memory. See :meth:`QuerySet.delete() + ` for details. + Backwards incompatible changes in 1.5 ===================================== diff --git a/tests/modeltests/delete/models.py b/tests/modeltests/delete/models.py index e0cec426ea..65d4e6f725 100644 --- a/tests/modeltests/delete/models.py +++ b/tests/modeltests/delete/models.py @@ -95,7 +95,7 @@ class MRNull(models.Model): class Avatar(models.Model): - pass + desc = models.TextField(null=True) class User(models.Model): @@ -108,3 +108,21 @@ class HiddenUser(models.Model): class HiddenUserProfile(models.Model): user = models.ForeignKey(HiddenUser) + +class M2MTo(models.Model): + pass + +class M2MFrom(models.Model): + m2m = models.ManyToManyField(M2MTo) + +class Parent(models.Model): + pass + +class Child(Parent): + pass + +class Base(models.Model): + pass + +class RelToBase(models.Model): + base = models.ForeignKey(Base, on_delete=models.DO_NOTHING) diff --git a/tests/modeltests/delete/tests.py b/tests/modeltests/delete/tests.py index 26f2fd52c1..2610cb4b39 100644 --- a/tests/modeltests/delete/tests.py +++ b/tests/modeltests/delete/tests.py @@ -1,11 +1,12 @@ from __future__ import absolute_import -from django.db import models, IntegrityError +from django.db import models, IntegrityError, connection from django.test import TestCase, skipUnlessDBFeature, skipIfDBFeature from django.utils.six.moves import xrange from .models import (R, RChild, S, T, U, A, M, MR, MRNull, - create_a, get_default_r, User, Avatar, HiddenUser, HiddenUserProfile) + create_a, get_default_r, User, Avatar, HiddenUser, HiddenUserProfile, + M2MTo, M2MFrom, Parent, Child, Base) class OnDeleteTests(TestCase): @@ -74,6 +75,16 @@ class OnDeleteTests(TestCase): self.assertEqual(replacement_r, a.donothing) models.signals.pre_delete.disconnect(check_do_nothing) + def test_do_nothing_qscount(self): + """ + Test that a models.DO_NOTHING relation doesn't trigger a query. + """ + b = Base.objects.create() + with self.assertNumQueries(1): + # RelToBase should not be queried. + b.delete() + self.assertEqual(Base.objects.count(), 0) + def test_inheritance_cascade_up(self): child = RChild.objects.create() child.delete() @@ -229,16 +240,34 @@ class DeletionTests(TestCase): # 1 query to delete the avatar # The important thing is that when we can defer constraint checks there # is no need to do an UPDATE on User.avatar to null it out. + + # Attach a signal to make sure we will not do fast_deletes. + calls = [] + def noop(*args, **kwargs): + calls.append('') + models.signals.post_delete.connect(noop, sender=User) + self.assertNumQueries(3, a.delete) self.assertFalse(User.objects.exists()) self.assertFalse(Avatar.objects.exists()) + self.assertEquals(len(calls), 1) + models.signals.post_delete.disconnect(noop, sender=User) @skipIfDBFeature("can_defer_constraint_checks") def test_cannot_defer_constraint_checks(self): u = User.objects.create( avatar=Avatar.objects.create() ) + # Attach a signal to make sure we will not do fast_deletes. + calls = [] + def noop(*args, **kwargs): + calls.append('') + models.signals.post_delete.connect(noop, sender=User) + a = Avatar.objects.get(pk=u.avatar_id) + # The below doesn't make sense... Why do we need to null out + # user.avatar if we are going to delete the user immediately after it, + # and there are no more cascades. # 1 query to find the users for the avatar. # 1 query to delete the user # 1 query to null out user.avatar, because we can't defer the constraint @@ -246,6 +275,8 @@ class DeletionTests(TestCase): self.assertNumQueries(4, a.delete) self.assertFalse(User.objects.exists()) self.assertFalse(Avatar.objects.exists()) + self.assertEquals(len(calls), 1) + models.signals.post_delete.disconnect(noop, sender=User) def test_hidden_related(self): r = R.objects.create() @@ -254,3 +285,69 @@ class DeletionTests(TestCase): r.delete() self.assertEqual(HiddenUserProfile.objects.count(), 0) + +class FastDeleteTests(TestCase): + + def test_fast_delete_fk(self): + u = User.objects.create( + avatar=Avatar.objects.create() + ) + a = Avatar.objects.get(pk=u.avatar_id) + # 1 query to fast-delete the user + # 1 query to delete the avatar + self.assertNumQueries(2, a.delete) + self.assertFalse(User.objects.exists()) + self.assertFalse(Avatar.objects.exists()) + + def test_fast_delete_m2m(self): + t = M2MTo.objects.create() + f = M2MFrom.objects.create() + f.m2m.add(t) + # 1 to delete f, 1 to fast-delete m2m for f + self.assertNumQueries(2, f.delete) + + def test_fast_delete_revm2m(self): + t = M2MTo.objects.create() + f = M2MFrom.objects.create() + f.m2m.add(t) + # 1 to delete t, 1 to fast-delete t's m_set + self.assertNumQueries(2, f.delete) + + def test_fast_delete_qs(self): + u1 = User.objects.create() + u2 = User.objects.create() + self.assertNumQueries(1, User.objects.filter(pk=u1.pk).delete) + self.assertEquals(User.objects.count(), 1) + self.assertTrue(User.objects.filter(pk=u2.pk).exists()) + + def test_fast_delete_joined_qs(self): + a = Avatar.objects.create(desc='a') + User.objects.create(avatar=a) + u2 = User.objects.create() + expected_queries = 1 if connection.features.update_can_self_select else 2 + self.assertNumQueries(expected_queries, + User.objects.filter(avatar__desc='a').delete) + self.assertEquals(User.objects.count(), 1) + self.assertTrue(User.objects.filter(pk=u2.pk).exists()) + + def test_fast_delete_inheritance(self): + c = Child.objects.create() + p = Parent.objects.create() + # 1 for self, 1 for parent + # However, this doesn't work as child.parent access creates a query, + # and this means we will be generating extra queries (a lot for large + # querysets). This is not a fast-delete problem. + # self.assertNumQueries(2, c.delete) + c.delete() + self.assertFalse(Child.objects.exists()) + self.assertEquals(Parent.objects.count(), 1) + self.assertEquals(Parent.objects.filter(pk=p.pk).count(), 1) + # 1 for self delete, 1 for fast delete of empty "child" qs. + self.assertNumQueries(2, p.delete) + self.assertFalse(Parent.objects.exists()) + # 1 for self delete, 1 for fast delete of empty "child" qs. + c = Child.objects.create() + p = c.parent_ptr + self.assertNumQueries(2, p.delete) + self.assertFalse(Parent.objects.exists()) + self.assertFalse(Child.objects.exists()) diff --git a/tests/regressiontests/admin_util/models.py b/tests/regressiontests/admin_util/models.py index b3504a1fa4..32a6cd6291 100644 --- a/tests/regressiontests/admin_util/models.py +++ b/tests/regressiontests/admin_util/models.py @@ -39,3 +39,6 @@ class Guest(models.Model): class Meta: verbose_name = "awesome guest" + +class EventGuide(models.Model): + event = models.ForeignKey(Event, on_delete=models.DO_NOTHING) diff --git a/tests/regressiontests/admin_util/tests.py b/tests/regressiontests/admin_util/tests.py index d04740ce95..ef8a91d1db 100644 --- a/tests/regressiontests/admin_util/tests.py +++ b/tests/regressiontests/admin_util/tests.py @@ -17,7 +17,7 @@ from django.utils.formats import localize from django.utils.safestring import mark_safe from django.utils import six -from .models import Article, Count, Event, Location +from .models import Article, Count, Event, Location, EventGuide class NestedObjectsTests(TestCase): @@ -71,6 +71,17 @@ class NestedObjectsTests(TestCase): # Should not require additional queries to populate the nested graph. self.assertNumQueries(2, self._collect, 0) + def test_on_delete_do_nothing(self): + """ + Check that the nested collector doesn't query for DO_NOTHING objects. + """ + n = NestedObjects(using=DEFAULT_DB_ALIAS) + objs = [Event.objects.create()] + EventGuide.objects.create(event=objs[0]) + with self.assertNumQueries(2): + # One for Location, one for Guest, and no query for EventGuide + n.collect(objs) + class UtilTests(unittest.TestCase): def test_values_from_lookup_field(self): """ diff --git a/tests/regressiontests/delete_regress/tests.py b/tests/regressiontests/delete_regress/tests.py index 32feae2ded..ebe59bffd7 100644 --- a/tests/regressiontests/delete_regress/tests.py +++ b/tests/regressiontests/delete_regress/tests.py @@ -3,7 +3,7 @@ from __future__ import absolute_import import datetime from django.conf import settings -from django.db import backend, transaction, DEFAULT_DB_ALIAS +from django.db import backend, transaction, DEFAULT_DB_ALIAS, models from django.test import TestCase, TransactionTestCase, skipUnlessDBFeature from .models import (Book, Award, AwardNote, Person, Child, Toy, PlayedWith, @@ -139,17 +139,24 @@ class DeleteCascadeTransactionTests(TransactionTestCase): eaten = Eaten.objects.create(food=apple, meal="lunch") apple.delete() + self.assertFalse(Food.objects.exists()) + self.assertFalse(Eaten.objects.exists()) + class LargeDeleteTests(TestCase): def test_large_deletes(self): "Regression for #13309 -- if the number of objects > chunk size, deletion still occurs" for x in range(300): track = Book.objects.create(pagecount=x+100) + # attach a signal to make sure we will not fast-delete + def noop(*args, **kwargs): + pass + models.signals.post_delete.connect(noop, sender=Book) Book.objects.all().delete() + models.signals.post_delete.disconnect(noop, sender=Book) self.assertEqual(Book.objects.count(), 0) - class ProxyDeleteTest(TestCase): """ Tests on_delete behavior for proxy models. diff --git a/tests/regressiontests/dispatch/tests/test_dispatcher.py b/tests/regressiontests/dispatch/tests/test_dispatcher.py index 4e4669d34c..5f8f92acaf 100644 --- a/tests/regressiontests/dispatch/tests/test_dispatcher.py +++ b/tests/regressiontests/dispatch/tests/test_dispatcher.py @@ -127,15 +127,15 @@ class DispatcherTests(unittest.TestCase): self._testIsClean(a_signal) def test_has_listeners(self): - self.assertIs(a_signal.has_listeners(), False) - self.assertIs(a_signal.has_listeners(sender=object()), False) + self.assertFalse(a_signal.has_listeners()) + self.assertFalse(a_signal.has_listeners(sender=object())) receiver_1 = Callable() a_signal.connect(receiver_1) - self.assertIs(a_signal.has_listeners(), True) - self.assertIs(a_signal.has_listeners(sender=object()), True) + self.assertTrue(a_signal.has_listeners()) + self.assertTrue(a_signal.has_listeners(sender=object())) a_signal.disconnect(receiver_1) - self.assertIs(a_signal.has_listeners(), False) - self.assertIs(a_signal.has_listeners(sender=object()), False) + self.assertFalse(a_signal.has_listeners()) + self.assertFalse(a_signal.has_listeners(sender=object())) class ReceiverTestCase(unittest.TestCase):