magic-removal: Fixed #1219 -- Added implementation of bulk delete, and factored common deletion code out of individual models.

git-svn-id: http://code.djangoproject.com/svn/django/branches/magic-removal@2307 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Russell Keith-Magee 2006-02-14 09:51:27 +00:00
parent c9839b85e5
commit a1d9de4dc7
6 changed files with 167 additions and 128 deletions

View File

@ -3,7 +3,7 @@ import django.db.models.manager
from django.db.models.fields import AutoField, ImageField
from django.db.models.fields.related import OneToOne, ManyToOne
from django.db.models.related import RelatedObject
from django.db.models.query import orderlist2sql
from django.db.models.query import orderlist2sql, delete_objects
from django.db.models.options import Options, AdminOptions
from django.db import connection, backend
from django.db.models import signals
@ -49,15 +49,6 @@ class ModelBase(type):
register_models(new_class._meta.app_label, new_class)
return new_class
def cmp_cls(x, y):
for field in x._meta.fields:
if field.rel and not field.null and field.rel.to == y:
return -1
for field in y._meta.fields:
if field.rel and not field.null and field.rel.to == x:
return 1
return 0
class Model(object):
__metaclass__ = ModelBase
@ -187,7 +178,7 @@ class Model(object):
save.alters_data = True
def __collect_sub_objects(self, seen_objs):
def _collect_sub_objects(self, seen_objs):
"""
Recursively populates seen_objs with all objects related to this object.
When done, seen_objs will be in the format:
@ -207,55 +198,20 @@ class Model(object):
except ObjectDoesNotExist:
pass
else:
sub_obj.__collect_sub_objects(seen_objs)
sub_obj._collect_sub_objects(seen_objs)
else:
for sub_obj in getattr(self, rel_opts_name).all():
sub_obj.__collect_sub_objects(seen_objs)
sub_obj._collect_sub_objects(seen_objs)
def delete(self):
assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname)
# Find all the objects than need to be deleted
seen_objs = {}
self.__collect_sub_objects(seen_objs)
self._collect_sub_objects(seen_objs)
seen_classes = set(seen_objs.keys())
ordered_classes = list(seen_classes)
ordered_classes.sort(cmp_cls)
cursor = connection.cursor()
for cls in ordered_classes:
seen_objs[cls] = seen_objs[cls].items()
seen_objs[cls].sort()
for pk_val, instance in seen_objs[cls]:
dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance)
for related in cls._meta.get_all_related_many_to_many_objects():
cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
(backend.quote_name(related.field.get_m2m_db_table(related.opts)),
backend.quote_name(cls._meta.object_name.lower() + '_id')),
[pk_val])
for f in cls._meta.many_to_many:
cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
(backend.quote_name(f.get_m2m_db_table(cls._meta)),
backend.quote_name(cls._meta.object_name.lower() + '_id')),
[pk_val])
for field in cls._meta.fields:
if field.rel and field.null and field.rel.to in seen_classes:
cursor.execute("UPDATE %s SET %s=NULL WHERE %s=%%s" % \
(backend.quote_name(cls._meta.db_table), backend.quote_name(field.column),
backend.quote_name(cls._meta.pk.column)), [pk_val])
setattr(instance, field.attname, None)
for cls in ordered_classes:
seen_objs[cls].reverse()
for pk_val, instance in seen_objs[cls]:
cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
(backend.quote_name(cls._meta.db_table), backend.quote_name(cls._meta.pk.column)),
[pk_val])
setattr(instance, cls._meta.pk.attname, None)
dispatcher.send(signal=signals.post_delete, sender=cls, instance=instance)
connection.commit()
# Actually delete the objects
delete_objects(seen_objs)
delete.alters_data = True

View File

@ -57,9 +57,6 @@ class Manager(object):
def dates(self, *args, **kwargs):
return self.get_query_set().dates(*args, **kwargs)
def delete(self, *args, **kwargs):
return self.get_query_set().delete(*args, **kwargs)
def distinct(self, *args, **kwargs):
return self.get_query_set().distinct(*args, **kwargs)

View File

@ -1,6 +1,9 @@
from django.db import backend, connection
from django.db.models.fields import DateField, FieldDoesNotExist
from django.db.models import signals
from django.dispatch import dispatcher
from django.utils.datastructures import SortedDict
import operator
LOOKUP_SEPARATOR = '__'
@ -125,7 +128,7 @@ class QuerySet(object):
extra_select = self._select.items()
cursor = connection.cursor()
select, sql, params = self._get_sql_clause(True)
select, sql, params = self._get_sql_clause()
cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params)
fill_cache = self._select_related
index_end = len(self.model._meta.fields)
@ -149,7 +152,7 @@ class QuerySet(object):
counter._offset = None
counter._limit = None
counter._select_related = False
select, sql, params = counter._get_sql_clause(True)
select, sql, params = counter._get_sql_clause()
cursor = connection.cursor()
cursor.execute("SELECT COUNT(*)" + sql, params)
return cursor.fetchone()[0]
@ -171,33 +174,30 @@ class QuerySet(object):
assert bool(latest_by), "latest() requires either a field_name parameter or 'get_latest_by' in the model"
return self._clone(_limit=1, _order_by=('-'+latest_by,)).get()
def delete(self, *args, **kwargs):
def delete(self):
"""
Deletes the records with the given kwargs. If no kwargs are given,
deletes records in the current QuerySet.
Deletes the records in the current QuerySet.
"""
# Remove the DELETE_ALL argument, if it exists.
delete_all = kwargs.pop('DELETE_ALL', False)
del_query = self._clone()
# Check for at least one query argument.
if not kwargs and not delete_all:
raise TypeError, "SAFETY MECHANISM: Specify DELETE_ALL=True if you actually want to delete all data."
if kwargs:
del_query = self.filter(*args, **kwargs)
else:
del_query = self._clone()
# disable non-supported fields
del_query._select_related = False
del_query._select = {}
del_query._order_by = []
del_query._offset = None
del_query._limit = None
# Perform the SQL delete
cursor = connection.cursor()
_, sql, params = del_query._get_sql_clause(False)
cursor.execute("DELETE " + sql, params)
# Collect all the objects to be deleted, and all the objects that are related to
# the objects that are to be deleted
seen_objs = {}
for object in del_query:
object._collect_sub_objects(seen_objs)
# Delete the objects
delete_objects(seen_objs)
# Clear the result cache, in case this QuerySet gets reused.
self._result_cache = None
delete.alters_data = True
##################################################
# PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS #
@ -297,7 +297,7 @@ class QuerySet(object):
self._result_cache = list(self.iterator())
return self._result_cache
def _get_sql_clause(self, allow_joins):
def _get_sql_clause(self):
opts = self.model._meta
# Construct the fundamental parts of the query: SELECT X FROM Y WHERE Z.
@ -325,10 +325,6 @@ class QuerySet(object):
# Start composing the body of the SQL statement.
sql = [" FROM", backend.quote_name(opts.db_table)]
# Check if extra tables are allowed. If not, throw an error
if (tables or joins) and not allow_joins:
raise TypeError, "Joins are not allowed in this type of query"
# Compose the join dictionary into SQL describing the joins.
if joins:
sql.append(" ".join(["%s %s AS %s ON %s" % (join_type, table, alias, condition)
@ -407,7 +403,7 @@ class ValuesQuerySet(QuerySet):
field_names = [f.attname for f in self.model._meta.fields]
cursor = connection.cursor()
select, sql, params = self._get_sql_clause(True)
select, sql, params = self._get_sql_clause()
select = ['%s.%s' % (backend.quote_name(self.model._meta.db_table), backend.quote_name(c)) for c in columns]
cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params)
while 1:
@ -429,7 +425,7 @@ class DateQuerySet(QuerySet):
if self._field.null:
date_query._where.append('%s.%s IS NOT NULL' % \
(backend.quote_name(self.model._meta.db_table), backend.quote_name(self._field.column)))
select, sql, params = self._get_sql_clause(True)
select, sql, params = self._get_sql_clause()
sql = 'SELECT %s %s GROUP BY 1 ORDER BY 1 %s' % \
(backend.get_date_trunc_sql(self._kind, '%s.%s' % (backend.quote_name(self.model._meta.db_table),
backend.quote_name(self._field.column))), sql, self._order)
@ -762,3 +758,74 @@ def lookup_inner(path, clause, value, opts, table, column):
params.extend(field.get_db_prep_lookup(clause, value))
return tables, joins, where, params
def compare_models(x, y):
"Comparator for Models that puts models in an order where dependencies are easily resolved."
for field in x._meta.fields:
if field.rel and not field.null and field.rel.to == y:
return -1
for field in y._meta.fields:
if field.rel and not field.null and field.rel.to == x:
return 1
return 0
def delete_objects(seen_objs):
"Iterate through a list of seen classes, and remove any instances that are referred to"
seen_classes = set(seen_objs.keys())
ordered_classes = list(seen_classes)
ordered_classes.sort(compare_models)
cursor = connection.cursor()
for cls in ordered_classes:
seen_objs[cls] = seen_objs[cls].items()
seen_objs[cls].sort()
# Pre notify all instances to be deleted
for pk_val, instance in seen_objs[cls]:
dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance)
pk_list = [pk for pk,instance in seen_objs[cls]]
for related in cls._meta.get_all_related_many_to_many_objects():
cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
(backend.quote_name(related.field.get_m2m_db_table(related.opts)),
backend.quote_name(cls._meta.object_name.lower() + '_id'),
','.join('%s' for pk in pk_list)),
pk_list)
for f in cls._meta.many_to_many:
cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
(backend.quote_name(f.get_m2m_db_table(cls._meta)),
backend.quote_name(cls._meta.object_name.lower() + '_id'),
','.join(['%s' for pk in pk_list])),
pk_list)
for field in cls._meta.fields:
if field.rel and field.null and field.rel.to in seen_classes:
cursor.execute("UPDATE %s SET %s=NULL WHERE %s IN (%s)" % \
(backend.quote_name(cls._meta.db_table),
backend.quote_name(field.column),
backend.quote_name(cls._meta.pk.column),
','.join(['%s' for pk in pk_list])),
pk_list)
# Now delete the actual data
for cls in ordered_classes:
seen_objs[cls].reverse()
pk_list = [pk for pk,instance in seen_objs[cls]]
cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
(backend.quote_name(cls._meta.db_table),
backend.quote_name(cls._meta.pk.column),
','.join(['%s' for pk in pk_list])),
pk_list)
# Last cleanup; set NULLs where there once was a reference to the object,
# NULL the primary key of the found objects, and perform post-notification.
for pk_val, instance in seen_objs[cls]:
for field in cls._meta.fields:
if field.rel and field.null and field.rel.to in seen_classes:
setattr(instance, field.attname, None)
setattr(instance, cls._meta.pk.attname, None)
dispatcher.send(signal=signals.post_delete, sender=cls, instance=instance)
connection.commit()

View File

@ -10,6 +10,8 @@ class Article(models.Model):
headline = models.CharField(maxlength=100, default='Default headline')
pub_date = models.DateTimeField()
def __repr__(self):
return self.headline
API_TESTS = """
# No articles are in the system yet.
@ -37,36 +39,34 @@ datetime.datetime(2005, 7, 28, 0, 0)
>>> a.headline = 'Area woman programs in Python'
>>> a.save()
# Article.objects.all() returns all the articles in the database. Note that
# the article is represented by "<Article object>", because we haven't given
# the Article model a __repr__() method.
# Article.objects.all() returns all the articles in the database.
>>> Article.objects.all()
[<Article object>]
[Area woman programs in Python]
# Django provides a rich database lookup API.
>>> Article.objects.get(id__exact=1)
<Article object>
Area woman programs in Python
>>> Article.objects.get(headline__startswith='Area woman')
<Article object>
Area woman programs in Python
>>> Article.objects.get(pub_date__year=2005)
<Article object>
Area woman programs in Python
>>> Article.objects.get(pub_date__year=2005, pub_date__month=7)
<Article object>
Area woman programs in Python
>>> Article.objects.get(pub_date__year=2005, pub_date__month=7, pub_date__day=28)
<Article object>
Area woman programs in Python
# The "__exact" lookup type can be omitted, as a shortcut.
>>> Article.objects.get(id=1)
<Article object>
Area woman programs in Python
>>> Article.objects.get(headline='Area woman programs in Python')
<Article object>
Area woman programs in Python
>>> Article.objects.filter(pub_date__year=2005)
[<Article object>]
[Area woman programs in Python]
>>> Article.objects.filter(pub_date__year=2004)
[]
>>> Article.objects.filter(pub_date__year=2005, pub_date__month=7)
[<Article object>]
[Area woman programs in Python]
# Django raises an Article.DoesNotExist exception for get() if the parameters
# don't match any object.
@ -84,7 +84,7 @@ DoesNotExist: Article does not exist for ...
# shortcut for primary-key exact lookups.
# The following is identical to articles.get(id=1).
>>> Article.objects.get(pk=1)
<Article object>
Area woman programs in Python
# Model instances of the same type and same ID are considered equal.
>>> a = Article.objects.get(pk=1)
@ -234,12 +234,12 @@ datetime.datetime(2005, 7, 28, 0, 0)
# You can get items using index and slice notation.
>>> Article.objects.all()[0]
<Article object>
Area woman programs in Python
>>> Article.objects.all()[1:3]
[<Article object>, <Article object>]
[Second article, Third article]
>>> s3 = Article.objects.filter(id__exact=3)
>>> (s1 | s2 | s3)[::2]
[<Article object>, <Article object>]
[Area woman programs in Python, Third article]
# An Article instance doesn't have access to the "objects" attribute.
# That's only available on the class.
@ -254,20 +254,11 @@ Traceback (most recent call last):
AttributeError: Manager isn't accessible via Article instances
# Bulk delete test: How many objects before and after the delete?
>>> Article.objects.count()
8L
>>> Article.objects.delete(id__lte=4)
>>> Article.objects.count()
4L
>>> Article.objects.delete()
Traceback (most recent call last):
...
TypeError: SAFETY MECHANISM: Specify DELETE_ALL=True if you actually want to delete all data.
>>> Article.objects.delete(DELETE_ALL=True)
>>> Article.objects.count()
0L
>>> Article.objects.all()
[Area woman programs in Python, Second article, Third article, Fourth article, Article 6, Default headline, Article 7, Updated article 8]
>>> Article.objects.filter(id__lte=4).delete()
>>> Article.objects.all()
[Article 6, Default headline, Article 7, Updated article 8]
"""

View File

@ -162,6 +162,33 @@ API_TESTS = """
>>> p2.article_set.all().order_by('headline')
[Oxygen-free diet works wonders]
# Recreate the article and Publication we just deleted.
>>> p1 = Publication(id=None, title='The Python Journal')
>>> p1.save()
>>> a2 = Article(id=None, headline='NASA uses Python')
>>> a2.save()
>>> a2.publications.add(p1, p2, p3)
# Bulk delete some Publications - references to deleted publications should go
>>> Publication.objects.filter(title__startswith='Science').delete()
>>> Publication.objects.all()
[Highlights for Children, The Python Journal]
>>> Article.objects.all()
[Django lets you build Web apps easily, NASA finds intelligent life on Earth, Oxygen-free diet works wonders, NASA uses Python]
>>> a2.publications.all()
[The Python Journal]
# Bulk delete some articles - references to deleted objects should go
>>> q = Article.objects.filter(headline__startswith='Django')
>>> print q
[Django lets you build Web apps easily]
>>> q.delete()
# After the delete, the QuerySet cache needs to be cleared, and the referenced objects should be gone
>>> print q
[]
>>> p1.article_set.all()
[NASA uses Python]
"""

View File

@ -94,7 +94,7 @@ John's second story
# The underlying query only makes one join when a related table is referenced twice.
>>> query = Article.objects.filter(reporter__first_name__exact='John', reporter__last_name__exact='Smith')
>>> null, sql, null = query._get_sql_clause(True)
>>> null, sql, null = query._get_sql_clause()
>>> sql.count('INNER JOIN')
1
@ -163,21 +163,22 @@ John Smith
>>> Reporter.objects.filter(article__reporter__first_name__startswith='John').distinct()
[John Smith]
# Deletes that require joins are prohibited.
>>> Article.objects.delete(reporter__first_name__startswith='Jo')
Traceback (most recent call last):
...
TypeError: Joins are not allowed in this type of query
# If you delete a reporter, his articles will be deleted.
>>> Article.objects.order_by('headline')
[John's second story, Paul's story, This is a test, This is a test, This is a test]
>>> Reporter.objects.order_by('first_name')
[John Smith, Paul Jones]
>>> r.delete()
>>> r2.delete()
>>> Article.objects.order_by('headline')
[Paul's story]
[John's second story, This is a test, This is a test, This is a test]
>>> Reporter.objects.order_by('first_name')
[Paul Jones]
[John Smith]
# Deletes using a join in the query
>>> Reporter.objects.filter(article__headline__startswith='This').delete()
>>> Reporter.objects.all()
[]
>>> Article.objects.all()
[]
"""