magic-removal: Fixed #1219 -- Added implementation of bulk delete, and factored common deletion code out of individual models.

git-svn-id: http://code.djangoproject.com/svn/django/branches/magic-removal@2307 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Russell Keith-Magee 2006-02-14 09:51:27 +00:00
parent c9839b85e5
commit a1d9de4dc7
6 changed files with 167 additions and 128 deletions

View File

@ -3,7 +3,7 @@ import django.db.models.manager
from django.db.models.fields import AutoField, ImageField from django.db.models.fields import AutoField, ImageField
from django.db.models.fields.related import OneToOne, ManyToOne from django.db.models.fields.related import OneToOne, ManyToOne
from django.db.models.related import RelatedObject from django.db.models.related import RelatedObject
from django.db.models.query import orderlist2sql from django.db.models.query import orderlist2sql, delete_objects
from django.db.models.options import Options, AdminOptions from django.db.models.options import Options, AdminOptions
from django.db import connection, backend from django.db import connection, backend
from django.db.models import signals from django.db.models import signals
@ -49,15 +49,6 @@ class ModelBase(type):
register_models(new_class._meta.app_label, new_class) register_models(new_class._meta.app_label, new_class)
return new_class return new_class
def cmp_cls(x, y):
for field in x._meta.fields:
if field.rel and not field.null and field.rel.to == y:
return -1
for field in y._meta.fields:
if field.rel and not field.null and field.rel.to == x:
return 1
return 0
class Model(object): class Model(object):
__metaclass__ = ModelBase __metaclass__ = ModelBase
@ -187,7 +178,7 @@ class Model(object):
save.alters_data = True save.alters_data = True
def __collect_sub_objects(self, seen_objs): def _collect_sub_objects(self, seen_objs):
""" """
Recursively populates seen_objs with all objects related to this object. Recursively populates seen_objs with all objects related to this object.
When done, seen_objs will be in the format: When done, seen_objs will be in the format:
@ -207,56 +198,21 @@ class Model(object):
except ObjectDoesNotExist: except ObjectDoesNotExist:
pass pass
else: else:
sub_obj.__collect_sub_objects(seen_objs) sub_obj._collect_sub_objects(seen_objs)
else: else:
for sub_obj in getattr(self, rel_opts_name).all(): for sub_obj in getattr(self, rel_opts_name).all():
sub_obj.__collect_sub_objects(seen_objs) sub_obj._collect_sub_objects(seen_objs)
def delete(self): def delete(self):
assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname) assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname)
# Find all the objects than need to be deleted
seen_objs = {} seen_objs = {}
self.__collect_sub_objects(seen_objs) self._collect_sub_objects(seen_objs)
seen_classes = set(seen_objs.keys()) # Actually delete the objects
ordered_classes = list(seen_classes) delete_objects(seen_objs)
ordered_classes.sort(cmp_cls)
cursor = connection.cursor()
for cls in ordered_classes:
seen_objs[cls] = seen_objs[cls].items()
seen_objs[cls].sort()
for pk_val, instance in seen_objs[cls]:
dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance)
for related in cls._meta.get_all_related_many_to_many_objects():
cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
(backend.quote_name(related.field.get_m2m_db_table(related.opts)),
backend.quote_name(cls._meta.object_name.lower() + '_id')),
[pk_val])
for f in cls._meta.many_to_many:
cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
(backend.quote_name(f.get_m2m_db_table(cls._meta)),
backend.quote_name(cls._meta.object_name.lower() + '_id')),
[pk_val])
for field in cls._meta.fields:
if field.rel and field.null and field.rel.to in seen_classes:
cursor.execute("UPDATE %s SET %s=NULL WHERE %s=%%s" % \
(backend.quote_name(cls._meta.db_table), backend.quote_name(field.column),
backend.quote_name(cls._meta.pk.column)), [pk_val])
setattr(instance, field.attname, None)
for cls in ordered_classes:
seen_objs[cls].reverse()
for pk_val, instance in seen_objs[cls]:
cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
(backend.quote_name(cls._meta.db_table), backend.quote_name(cls._meta.pk.column)),
[pk_val])
setattr(instance, cls._meta.pk.attname, None)
dispatcher.send(signal=signals.post_delete, sender=cls, instance=instance)
connection.commit()
delete.alters_data = True delete.alters_data = True
def _get_FIELD_display(self, field): def _get_FIELD_display(self, field):

View File

@ -57,9 +57,6 @@ class Manager(object):
def dates(self, *args, **kwargs): def dates(self, *args, **kwargs):
return self.get_query_set().dates(*args, **kwargs) return self.get_query_set().dates(*args, **kwargs)
def delete(self, *args, **kwargs):
return self.get_query_set().delete(*args, **kwargs)
def distinct(self, *args, **kwargs): def distinct(self, *args, **kwargs):
return self.get_query_set().distinct(*args, **kwargs) return self.get_query_set().distinct(*args, **kwargs)

View File

@ -1,6 +1,9 @@
from django.db import backend, connection from django.db import backend, connection
from django.db.models.fields import DateField, FieldDoesNotExist from django.db.models.fields import DateField, FieldDoesNotExist
from django.db.models import signals
from django.dispatch import dispatcher
from django.utils.datastructures import SortedDict from django.utils.datastructures import SortedDict
import operator import operator
LOOKUP_SEPARATOR = '__' LOOKUP_SEPARATOR = '__'
@ -125,7 +128,7 @@ class QuerySet(object):
extra_select = self._select.items() extra_select = self._select.items()
cursor = connection.cursor() cursor = connection.cursor()
select, sql, params = self._get_sql_clause(True) select, sql, params = self._get_sql_clause()
cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params) cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params)
fill_cache = self._select_related fill_cache = self._select_related
index_end = len(self.model._meta.fields) index_end = len(self.model._meta.fields)
@ -149,7 +152,7 @@ class QuerySet(object):
counter._offset = None counter._offset = None
counter._limit = None counter._limit = None
counter._select_related = False counter._select_related = False
select, sql, params = counter._get_sql_clause(True) select, sql, params = counter._get_sql_clause()
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute("SELECT COUNT(*)" + sql, params) cursor.execute("SELECT COUNT(*)" + sql, params)
return cursor.fetchone()[0] return cursor.fetchone()[0]
@ -171,34 +174,31 @@ class QuerySet(object):
assert bool(latest_by), "latest() requires either a field_name parameter or 'get_latest_by' in the model" assert bool(latest_by), "latest() requires either a field_name parameter or 'get_latest_by' in the model"
return self._clone(_limit=1, _order_by=('-'+latest_by,)).get() return self._clone(_limit=1, _order_by=('-'+latest_by,)).get()
def delete(self, *args, **kwargs): def delete(self):
""" """
Deletes the records with the given kwargs. If no kwargs are given, Deletes the records in the current QuerySet.
deletes records in the current QuerySet.
""" """
# Remove the DELETE_ALL argument, if it exists. del_query = self._clone()
delete_all = kwargs.pop('DELETE_ALL', False)
# Check for at least one query argument.
if not kwargs and not delete_all:
raise TypeError, "SAFETY MECHANISM: Specify DELETE_ALL=True if you actually want to delete all data."
if kwargs:
del_query = self.filter(*args, **kwargs)
else:
del_query = self._clone()
# disable non-supported fields # disable non-supported fields
del_query._select_related = False del_query._select_related = False
del_query._select = {}
del_query._order_by = [] del_query._order_by = []
del_query._offset = None del_query._offset = None
del_query._limit = None del_query._limit = None
# Perform the SQL delete # Collect all the objects to be deleted, and all the objects that are related to
cursor = connection.cursor() # the objects that are to be deleted
_, sql, params = del_query._get_sql_clause(False) seen_objs = {}
cursor.execute("DELETE " + sql, params) for object in del_query:
object._collect_sub_objects(seen_objs)
# Delete the objects
delete_objects(seen_objs)
# Clear the result cache, in case this QuerySet gets reused.
self._result_cache = None
delete.alters_data = True
################################################## ##################################################
# PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS # # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS #
################################################## ##################################################
@ -297,7 +297,7 @@ class QuerySet(object):
self._result_cache = list(self.iterator()) self._result_cache = list(self.iterator())
return self._result_cache return self._result_cache
def _get_sql_clause(self, allow_joins): def _get_sql_clause(self):
opts = self.model._meta opts = self.model._meta
# Construct the fundamental parts of the query: SELECT X FROM Y WHERE Z. # Construct the fundamental parts of the query: SELECT X FROM Y WHERE Z.
@ -325,10 +325,6 @@ class QuerySet(object):
# Start composing the body of the SQL statement. # Start composing the body of the SQL statement.
sql = [" FROM", backend.quote_name(opts.db_table)] sql = [" FROM", backend.quote_name(opts.db_table)]
# Check if extra tables are allowed. If not, throw an error
if (tables or joins) and not allow_joins:
raise TypeError, "Joins are not allowed in this type of query"
# Compose the join dictionary into SQL describing the joins. # Compose the join dictionary into SQL describing the joins.
if joins: if joins:
sql.append(" ".join(["%s %s AS %s ON %s" % (join_type, table, alias, condition) sql.append(" ".join(["%s %s AS %s ON %s" % (join_type, table, alias, condition)
@ -407,7 +403,7 @@ class ValuesQuerySet(QuerySet):
field_names = [f.attname for f in self.model._meta.fields] field_names = [f.attname for f in self.model._meta.fields]
cursor = connection.cursor() cursor = connection.cursor()
select, sql, params = self._get_sql_clause(True) select, sql, params = self._get_sql_clause()
select = ['%s.%s' % (backend.quote_name(self.model._meta.db_table), backend.quote_name(c)) for c in columns] select = ['%s.%s' % (backend.quote_name(self.model._meta.db_table), backend.quote_name(c)) for c in columns]
cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params) cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params)
while 1: while 1:
@ -429,7 +425,7 @@ class DateQuerySet(QuerySet):
if self._field.null: if self._field.null:
date_query._where.append('%s.%s IS NOT NULL' % \ date_query._where.append('%s.%s IS NOT NULL' % \
(backend.quote_name(self.model._meta.db_table), backend.quote_name(self._field.column))) (backend.quote_name(self.model._meta.db_table), backend.quote_name(self._field.column)))
select, sql, params = self._get_sql_clause(True) select, sql, params = self._get_sql_clause()
sql = 'SELECT %s %s GROUP BY 1 ORDER BY 1 %s' % \ sql = 'SELECT %s %s GROUP BY 1 ORDER BY 1 %s' % \
(backend.get_date_trunc_sql(self._kind, '%s.%s' % (backend.quote_name(self.model._meta.db_table), (backend.get_date_trunc_sql(self._kind, '%s.%s' % (backend.quote_name(self.model._meta.db_table),
backend.quote_name(self._field.column))), sql, self._order) backend.quote_name(self._field.column))), sql, self._order)
@ -762,3 +758,74 @@ def lookup_inner(path, clause, value, opts, table, column):
params.extend(field.get_db_prep_lookup(clause, value)) params.extend(field.get_db_prep_lookup(clause, value))
return tables, joins, where, params return tables, joins, where, params
def compare_models(x, y):
"Comparator for Models that puts models in an order where dependencies are easily resolved."
for field in x._meta.fields:
if field.rel and not field.null and field.rel.to == y:
return -1
for field in y._meta.fields:
if field.rel and not field.null and field.rel.to == x:
return 1
return 0
def delete_objects(seen_objs):
"Iterate through a list of seen classes, and remove any instances that are referred to"
seen_classes = set(seen_objs.keys())
ordered_classes = list(seen_classes)
ordered_classes.sort(compare_models)
cursor = connection.cursor()
for cls in ordered_classes:
seen_objs[cls] = seen_objs[cls].items()
seen_objs[cls].sort()
# Pre notify all instances to be deleted
for pk_val, instance in seen_objs[cls]:
dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance)
pk_list = [pk for pk,instance in seen_objs[cls]]
for related in cls._meta.get_all_related_many_to_many_objects():
cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
(backend.quote_name(related.field.get_m2m_db_table(related.opts)),
backend.quote_name(cls._meta.object_name.lower() + '_id'),
','.join('%s' for pk in pk_list)),
pk_list)
for f in cls._meta.many_to_many:
cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
(backend.quote_name(f.get_m2m_db_table(cls._meta)),
backend.quote_name(cls._meta.object_name.lower() + '_id'),
','.join(['%s' for pk in pk_list])),
pk_list)
for field in cls._meta.fields:
if field.rel and field.null and field.rel.to in seen_classes:
cursor.execute("UPDATE %s SET %s=NULL WHERE %s IN (%s)" % \
(backend.quote_name(cls._meta.db_table),
backend.quote_name(field.column),
backend.quote_name(cls._meta.pk.column),
','.join(['%s' for pk in pk_list])),
pk_list)
# Now delete the actual data
for cls in ordered_classes:
seen_objs[cls].reverse()
pk_list = [pk for pk,instance in seen_objs[cls]]
cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
(backend.quote_name(cls._meta.db_table),
backend.quote_name(cls._meta.pk.column),
','.join(['%s' for pk in pk_list])),
pk_list)
# Last cleanup; set NULLs where there once was a reference to the object,
# NULL the primary key of the found objects, and perform post-notification.
for pk_val, instance in seen_objs[cls]:
for field in cls._meta.fields:
if field.rel and field.null and field.rel.to in seen_classes:
setattr(instance, field.attname, None)
setattr(instance, cls._meta.pk.attname, None)
dispatcher.send(signal=signals.post_delete, sender=cls, instance=instance)
connection.commit()

View File

@ -9,7 +9,9 @@ from django.db import models
class Article(models.Model): class Article(models.Model):
headline = models.CharField(maxlength=100, default='Default headline') headline = models.CharField(maxlength=100, default='Default headline')
pub_date = models.DateTimeField() pub_date = models.DateTimeField()
def __repr__(self):
return self.headline
API_TESTS = """ API_TESTS = """
# No articles are in the system yet. # No articles are in the system yet.
@ -37,36 +39,34 @@ datetime.datetime(2005, 7, 28, 0, 0)
>>> a.headline = 'Area woman programs in Python' >>> a.headline = 'Area woman programs in Python'
>>> a.save() >>> a.save()
# Article.objects.all() returns all the articles in the database. Note that # Article.objects.all() returns all the articles in the database.
# the article is represented by "<Article object>", because we haven't given
# the Article model a __repr__() method.
>>> Article.objects.all() >>> Article.objects.all()
[<Article object>] [Area woman programs in Python]
# Django provides a rich database lookup API. # Django provides a rich database lookup API.
>>> Article.objects.get(id__exact=1) >>> Article.objects.get(id__exact=1)
<Article object> Area woman programs in Python
>>> Article.objects.get(headline__startswith='Area woman') >>> Article.objects.get(headline__startswith='Area woman')
<Article object> Area woman programs in Python
>>> Article.objects.get(pub_date__year=2005) >>> Article.objects.get(pub_date__year=2005)
<Article object> Area woman programs in Python
>>> Article.objects.get(pub_date__year=2005, pub_date__month=7) >>> Article.objects.get(pub_date__year=2005, pub_date__month=7)
<Article object> Area woman programs in Python
>>> Article.objects.get(pub_date__year=2005, pub_date__month=7, pub_date__day=28) >>> Article.objects.get(pub_date__year=2005, pub_date__month=7, pub_date__day=28)
<Article object> Area woman programs in Python
# The "__exact" lookup type can be omitted, as a shortcut. # The "__exact" lookup type can be omitted, as a shortcut.
>>> Article.objects.get(id=1) >>> Article.objects.get(id=1)
<Article object> Area woman programs in Python
>>> Article.objects.get(headline='Area woman programs in Python') >>> Article.objects.get(headline='Area woman programs in Python')
<Article object> Area woman programs in Python
>>> Article.objects.filter(pub_date__year=2005) >>> Article.objects.filter(pub_date__year=2005)
[<Article object>] [Area woman programs in Python]
>>> Article.objects.filter(pub_date__year=2004) >>> Article.objects.filter(pub_date__year=2004)
[] []
>>> Article.objects.filter(pub_date__year=2005, pub_date__month=7) >>> Article.objects.filter(pub_date__year=2005, pub_date__month=7)
[<Article object>] [Area woman programs in Python]
# Django raises an Article.DoesNotExist exception for get() if the parameters # Django raises an Article.DoesNotExist exception for get() if the parameters
# don't match any object. # don't match any object.
@ -84,7 +84,7 @@ DoesNotExist: Article does not exist for ...
# shortcut for primary-key exact lookups. # shortcut for primary-key exact lookups.
# The following is identical to articles.get(id=1). # The following is identical to articles.get(id=1).
>>> Article.objects.get(pk=1) >>> Article.objects.get(pk=1)
<Article object> Area woman programs in Python
# Model instances of the same type and same ID are considered equal. # Model instances of the same type and same ID are considered equal.
>>> a = Article.objects.get(pk=1) >>> a = Article.objects.get(pk=1)
@ -234,12 +234,12 @@ datetime.datetime(2005, 7, 28, 0, 0)
# You can get items using index and slice notation. # You can get items using index and slice notation.
>>> Article.objects.all()[0] >>> Article.objects.all()[0]
<Article object> Area woman programs in Python
>>> Article.objects.all()[1:3] >>> Article.objects.all()[1:3]
[<Article object>, <Article object>] [Second article, Third article]
>>> s3 = Article.objects.filter(id__exact=3) >>> s3 = Article.objects.filter(id__exact=3)
>>> (s1 | s2 | s3)[::2] >>> (s1 | s2 | s3)[::2]
[<Article object>, <Article object>] [Area woman programs in Python, Third article]
# An Article instance doesn't have access to the "objects" attribute. # An Article instance doesn't have access to the "objects" attribute.
# That's only available on the class. # That's only available on the class.
@ -254,20 +254,11 @@ Traceback (most recent call last):
AttributeError: Manager isn't accessible via Article instances AttributeError: Manager isn't accessible via Article instances
# Bulk delete test: How many objects before and after the delete? # Bulk delete test: How many objects before and after the delete?
>>> Article.objects.count() >>> Article.objects.all()
8L [Area woman programs in Python, Second article, Third article, Fourth article, Article 6, Default headline, Article 7, Updated article 8]
>>> Article.objects.delete(id__lte=4) >>> Article.objects.filter(id__lte=4).delete()
>>> Article.objects.count() >>> Article.objects.all()
4L [Article 6, Default headline, Article 7, Updated article 8]
>>> Article.objects.delete()
Traceback (most recent call last):
...
TypeError: SAFETY MECHANISM: Specify DELETE_ALL=True if you actually want to delete all data.
>>> Article.objects.delete(DELETE_ALL=True)
>>> Article.objects.count()
0L
""" """

View File

@ -162,6 +162,33 @@ API_TESTS = """
>>> p2.article_set.all().order_by('headline') >>> p2.article_set.all().order_by('headline')
[Oxygen-free diet works wonders] [Oxygen-free diet works wonders]
# Recreate the article and Publication we just deleted.
>>> p1 = Publication(id=None, title='The Python Journal')
>>> p1.save()
>>> a2 = Article(id=None, headline='NASA uses Python')
>>> a2.save()
>>> a2.publications.add(p1, p2, p3)
# Bulk delete some Publications - references to deleted publications should go
>>> Publication.objects.filter(title__startswith='Science').delete()
>>> Publication.objects.all()
[Highlights for Children, The Python Journal]
>>> Article.objects.all()
[Django lets you build Web apps easily, NASA finds intelligent life on Earth, Oxygen-free diet works wonders, NASA uses Python]
>>> a2.publications.all()
[The Python Journal]
# Bulk delete some articles - references to deleted objects should go
>>> q = Article.objects.filter(headline__startswith='Django')
>>> print q
[Django lets you build Web apps easily]
>>> q.delete()
# After the delete, the QuerySet cache needs to be cleared, and the referenced objects should be gone
>>> print q
[]
>>> p1.article_set.all()
[NASA uses Python]
""" """

View File

@ -94,7 +94,7 @@ John's second story
# The underlying query only makes one join when a related table is referenced twice. # The underlying query only makes one join when a related table is referenced twice.
>>> query = Article.objects.filter(reporter__first_name__exact='John', reporter__last_name__exact='Smith') >>> query = Article.objects.filter(reporter__first_name__exact='John', reporter__last_name__exact='Smith')
>>> null, sql, null = query._get_sql_clause(True) >>> null, sql, null = query._get_sql_clause()
>>> sql.count('INNER JOIN') >>> sql.count('INNER JOIN')
1 1
@ -163,21 +163,22 @@ John Smith
>>> Reporter.objects.filter(article__reporter__first_name__startswith='John').distinct() >>> Reporter.objects.filter(article__reporter__first_name__startswith='John').distinct()
[John Smith] [John Smith]
# Deletes that require joins are prohibited.
>>> Article.objects.delete(reporter__first_name__startswith='Jo')
Traceback (most recent call last):
...
TypeError: Joins are not allowed in this type of query
# If you delete a reporter, his articles will be deleted. # If you delete a reporter, his articles will be deleted.
>>> Article.objects.order_by('headline') >>> Article.objects.order_by('headline')
[John's second story, Paul's story, This is a test, This is a test, This is a test] [John's second story, Paul's story, This is a test, This is a test, This is a test]
>>> Reporter.objects.order_by('first_name') >>> Reporter.objects.order_by('first_name')
[John Smith, Paul Jones] [John Smith, Paul Jones]
>>> r.delete() >>> r2.delete()
>>> Article.objects.order_by('headline') >>> Article.objects.order_by('headline')
[Paul's story] [John's second story, This is a test, This is a test, This is a test]
>>> Reporter.objects.order_by('first_name') >>> Reporter.objects.order_by('first_name')
[Paul Jones] [John Smith]
# Deletes using a join in the query
>>> Reporter.objects.filter(article__headline__startswith='This').delete()
>>> Reporter.objects.all()
[]
>>> Article.objects.all()
[]
""" """