Fixed #16759 -- Remove use of __deepcopy__ in qs.clone()

The original problem was that queryset cloning was really expensive
when filtering with F() clauses. The __deepcopy__ went too deep copying
_meta attributes of the models used. To fix this the use of
__deepcopy__ in qs cloning was removed.

This commit results in some speed improvements across the djangobench
benchmark suite. Most query_* tests are 20-30% faster, save() is 50%
faster and finally complex filtering situations can see 2x to order
of magnitude improvments.

Thanks to Suor, Alex and lrekucki for valuable feedback.
This commit is contained in:
Anssi Kääriäinen 2012-06-02 04:13:36 +03:00
parent bb7f34d619
commit 23ca3a0194
4 changed files with 81 additions and 7 deletions

View File

@ -1,6 +1,7 @@
""" """
Classes to represent the default SQL aggregate functions Classes to represent the default SQL aggregate functions
""" """
import copy
from django.db.models.fields import IntegerField, FloatField from django.db.models.fields import IntegerField, FloatField
@ -62,6 +63,11 @@ class Aggregate(object):
self.field = tmp self.field = tmp
def clone(self):
# Different aggregates have different init methods, so use copy here
# deepcopy is not needed, as self.col is only changing variable.
return copy.copy(self)
def relabel_aliases(self, change_map): def relabel_aliases(self, change_map):
if isinstance(self.col, (list, tuple)): if isinstance(self.col, (list, tuple)):
self.col = (change_map.get(self.col[0], self.col[0]), self.col[1]) self.col = (change_map.get(self.col[0], self.col[0]), self.col[1])

View File

@ -279,13 +279,13 @@ class Query(object):
obj.select = self.select[:] obj.select = self.select[:]
obj.related_select_cols = [] obj.related_select_cols = []
obj.tables = self.tables[:] obj.tables = self.tables[:]
obj.where = copy.deepcopy(self.where, memo=memo) obj.where = self.where.clone()
obj.where_class = self.where_class obj.where_class = self.where_class
if self.group_by is None: if self.group_by is None:
obj.group_by = None obj.group_by = None
else: else:
obj.group_by = self.group_by[:] obj.group_by = self.group_by[:]
obj.having = copy.deepcopy(self.having, memo=memo) obj.having = self.having.clone()
obj.order_by = self.order_by[:] obj.order_by = self.order_by[:]
obj.low_mark, obj.high_mark = self.low_mark, self.high_mark obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
obj.distinct = self.distinct obj.distinct = self.distinct
@ -293,7 +293,9 @@ class Query(object):
obj.select_for_update = self.select_for_update obj.select_for_update = self.select_for_update
obj.select_for_update_nowait = self.select_for_update_nowait obj.select_for_update_nowait = self.select_for_update_nowait
obj.select_related = self.select_related obj.select_related = self.select_related
obj.aggregates = copy.deepcopy(self.aggregates, memo=memo) obj.related_select_cols = []
obj.aggregates = SortedDict((k, v.clone())
for k, v in self.aggregates.items())
if self.aggregate_select_mask is None: if self.aggregate_select_mask is None:
obj.aggregate_select_mask = None obj.aggregate_select_mask = None
else: else:
@ -316,7 +318,7 @@ class Query(object):
obj._extra_select_cache = self._extra_select_cache.copy() obj._extra_select_cache = self._extra_select_cache.copy()
obj.extra_tables = self.extra_tables obj.extra_tables = self.extra_tables
obj.extra_order_by = self.extra_order_by obj.extra_order_by = self.extra_order_by
obj.deferred_loading = copy.deepcopy(self.deferred_loading, memo=memo) obj.deferred_loading = copy.copy(self.deferred_loading[0]), self.deferred_loading[1]
if self.filter_is_sticky and self.used_aliases: if self.filter_is_sticky and self.used_aliases:
obj.used_aliases = self.used_aliases.copy() obj.used_aliases = self.used_aliases.copy()
else: else:
@ -549,7 +551,7 @@ class Query(object):
# Now relabel a copy of the rhs where-clause and add it to the current # Now relabel a copy of the rhs where-clause and add it to the current
# one. # one.
if rhs.where: if rhs.where:
w = copy.deepcopy(rhs.where) w = rhs.where.clone()
w.relabel_aliases(change_map) w.relabel_aliases(change_map)
if not self.where: if not self.where:
# Since 'self' matches everything, add an explicit "include # Since 'self' matches everything, add an explicit "include
@ -571,7 +573,7 @@ class Query(object):
new_col = change_map.get(col[0], col[0]), col[1] new_col = change_map.get(col[0], col[0]), col[1]
self.select.append(SelectInfo(new_col, field)) self.select.append(SelectInfo(new_col, field))
else: else:
item = copy.deepcopy(col) item = col.clone()
item.relabel_aliases(change_map) item.relabel_aliases(change_map)
self.select.append(SelectInfo(item, field)) self.select.append(SelectInfo(item, field))

View File

@ -10,7 +10,7 @@ from itertools import repeat
from django.utils import tree from django.utils import tree
from django.db.models.fields import Field from django.db.models.fields import Field
from django.db.models.sql.datastructures import EmptyResultSet from django.db.models.sql.datastructures import EmptyResultSet, Empty
from django.db.models.sql.aggregates import Aggregate from django.db.models.sql.aggregates import Aggregate
from django.utils.six.moves import xrange from django.utils.six.moves import xrange
@ -272,6 +272,23 @@ class WhereNode(tree.Node):
if hasattr(child[3], 'relabel_aliases'): if hasattr(child[3], 'relabel_aliases'):
child[3].relabel_aliases(change_map) child[3].relabel_aliases(change_map)
def clone(self):
"""
Creates a clone of the tree. Must only be called on root nodes (nodes
with empty subtree_parents). Childs must be either (Contraint, lookup,
value) tuples, or objects supporting .clone().
"""
assert not self.subtree_parents
clone = self.__class__._new_instance(
children=[], connector=self.connector, negated=self.negated)
for child in self.children:
if isinstance(child, tuple):
clone.children.append(
(child[0].clone(), child[1], child[2], child[3]))
else:
clone.children.append(child.clone())
return clone
class EmptyWhere(WhereNode): class EmptyWhere(WhereNode):
def add(self, data, connector): def add(self, data, connector):
@ -291,6 +308,9 @@ class EverythingNode(object):
def relabel_aliases(self, change_map, node=None): def relabel_aliases(self, change_map, node=None):
return return
def clone(self):
return self
class NothingNode(object): class NothingNode(object):
""" """
A node that matches nothing. A node that matches nothing.
@ -301,6 +321,9 @@ class NothingNode(object):
def relabel_aliases(self, change_map, node=None): def relabel_aliases(self, change_map, node=None):
return return
def clone(self):
return self
class ExtraWhere(object): class ExtraWhere(object):
def __init__(self, sqls, params): def __init__(self, sqls, params):
self.sqls = sqls self.sqls = sqls
@ -310,6 +333,9 @@ class ExtraWhere(object):
sqls = ["(%s)" % sql for sql in self.sqls] sqls = ["(%s)" % sql for sql in self.sqls]
return " AND ".join(sqls), tuple(self.params or ()) return " AND ".join(sqls), tuple(self.params or ())
def clone(self):
return self
class Constraint(object): class Constraint(object):
""" """
An object that can be passed to WhereNode.add() and knows how to An object that can be passed to WhereNode.add() and knows how to
@ -374,3 +400,9 @@ class Constraint(object):
def relabel_aliases(self, change_map): def relabel_aliases(self, change_map):
if self.alias in change_map: if self.alias in change_map:
self.alias = change_map[self.alias] self.alias = change_map[self.alias]
def clone(self):
new = Empty()
new.__class__ = self.__class__
new.alias, new.col, new.field = self.alias, self.col, self.field
return new

View File

@ -1919,6 +1919,7 @@ class SubqueryTests(TestCase):
class CloneTests(TestCase): class CloneTests(TestCase):
def test_evaluated_queryset_as_argument(self): def test_evaluated_queryset_as_argument(self):
"#13227 -- If a queryset is already evaluated, it can still be used as a query arg" "#13227 -- If a queryset is already evaluated, it can still be used as a query arg"
n = Note(note='Test1', misc='misc') n = Note(note='Test1', misc='misc')
@ -1933,6 +1934,39 @@ class CloneTests(TestCase):
# that query in a way that involves cloning. # that query in a way that involves cloning.
self.assertEqual(ExtraInfo.objects.filter(note__in=n_list)[0].info, 'good') self.assertEqual(ExtraInfo.objects.filter(note__in=n_list)[0].info, 'good')
def test_no_model_options_cloning(self):
"""
Test that cloning a queryset does not get out of hand. While complete
testing is impossible, this is a sanity check against invalid use of
deepcopy. refs #16759.
"""
opts_class = type(Note._meta)
note_deepcopy = getattr(opts_class, "__deepcopy__", None)
opts_class.__deepcopy__ = lambda obj, memo: self.fail("Model options shouldn't be cloned.")
try:
Note.objects.filter(pk__lte=F('pk') + 1).all()
finally:
if note_deepcopy is None:
delattr(opts_class, "__deepcopy__")
else:
opts_class.__deepcopy__ = note_deepcopy
def test_no_fields_cloning(self):
"""
Test that cloning a queryset does not get out of hand. While complete
testing is impossible, this is a sanity check against invalid use of
deepcopy. refs #16759.
"""
opts_class = type(Note._meta.get_field_by_name("misc")[0])
note_deepcopy = getattr(opts_class, "__deepcopy__", None)
opts_class.__deepcopy__ = lambda obj, memo: self.fail("Model fields shouldn't be cloned")
try:
Note.objects.filter(note=F('misc')).all()
finally:
if note_deepcopy is None:
delattr(opts_class, "__deepcopy__")
else:
opts_class.__deepcopy__ = note_deepcopy
class EmptyQuerySetTests(TestCase): class EmptyQuerySetTests(TestCase):
def test_emptyqueryset_values(self): def test_emptyqueryset_values(self):