More efficient IN clauses for prefetch_related queries by use of sets to eliminate duplicates

This simply reduces the size of the IN clause in the SQL, which can be
significant in some cases.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@16944 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Luke Plant 2011-10-08 13:50:29 +00:00
parent 095dbe8804
commit d5803bb5fb
2 changed files with 7 additions and 7 deletions

View File

@ -65,7 +65,7 @@ class GenericForeignKey(object):
def get_prefetch_query_set(self, instances): def get_prefetch_query_set(self, instances):
# For efficiency, group the instances by content type and then do one # For efficiency, group the instances by content type and then do one
# query per model # query per model
fk_dict = defaultdict(list) fk_dict = defaultdict(set)
# We need one instance for each group in order to get the right db: # We need one instance for each group in order to get the right db:
instance_dict = {} instance_dict = {}
ct_attname = self.model._meta.get_field(self.ct_field).get_attname() ct_attname = self.model._meta.get_field(self.ct_field).get_attname()
@ -75,7 +75,7 @@ class GenericForeignKey(object):
if ct_id is not None: if ct_id is not None:
fk_val = getattr(instance, self.fk_field) fk_val = getattr(instance, self.fk_field)
if fk_val is not None: if fk_val is not None:
fk_dict[ct_id].append(fk_val) fk_dict[ct_id].add(fk_val)
instance_dict[ct_id] = instance instance_dict[ct_id] = instance
ret_val = [] ret_val = []
@ -325,7 +325,7 @@ def create_generic_related_manager(superclass):
query = { query = {
'%s__pk' % self.content_type_field_name: self.content_type.id, '%s__pk' % self.content_type_field_name: self.content_type.id,
'%s__in' % self.object_id_field_name: '%s__in' % self.object_id_field_name:
[obj._get_pk_val() for obj in instances] set(obj._get_pk_val() for obj in instances)
} }
qs = super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**query) qs = super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**query)
return (qs, return (qs,

View File

@ -237,7 +237,7 @@ class SingleRelatedObjectDescriptor(object):
return self.related.model._base_manager.using(db) return self.related.model._base_manager.using(db)
def get_prefetch_query_set(self, instances): def get_prefetch_query_set(self, instances):
vals = [instance._get_pk_val() for instance in instances] vals = set(instance._get_pk_val() for instance in instances)
params = {'%s__pk__in' % self.related.field.name: vals} params = {'%s__pk__in' % self.related.field.name: vals}
return (self.get_query_set(), return (self.get_query_set(),
attrgetter(self.related.field.attname), attrgetter(self.related.field.attname),
@ -316,7 +316,7 @@ class ReverseSingleRelatedObjectDescriptor(object):
return QuerySet(self.field.rel.to).using(db) return QuerySet(self.field.rel.to).using(db)
def get_prefetch_query_set(self, instances): def get_prefetch_query_set(self, instances):
vals = [getattr(instance, self.field.attname) for instance in instances] vals = set(getattr(instance, self.field.attname) for instance in instances)
other_field = self.field.rel.get_related_field() other_field = self.field.rel.get_related_field()
if other_field.rel: if other_field.rel:
params = {'%s__pk__in' % self.field.rel.field_name: vals} params = {'%s__pk__in' % self.field.rel.field_name: vals}
@ -463,7 +463,7 @@ class ForeignRelatedObjectsDescriptor(object):
def get_prefetch_query_set(self, instances): def get_prefetch_query_set(self, instances):
db = self._db or router.db_for_read(self.model) db = self._db or router.db_for_read(self.model)
query = {'%s__%s__in' % (rel_field.name, attname): query = {'%s__%s__in' % (rel_field.name, attname):
[getattr(obj, attname) for obj in instances]} set(getattr(obj, attname) for obj in instances)}
qs = super(RelatedManager, self).get_query_set().using(db).filter(**query) qs = super(RelatedManager, self).get_query_set().using(db).filter(**query)
return (qs, return (qs,
attrgetter(rel_field.get_attname()), attrgetter(rel_field.get_attname()),
@ -546,7 +546,7 @@ def create_many_related_manager(superclass, rel):
from django.db import connections from django.db import connections
db = self._db or router.db_for_read(self.model) db = self._db or router.db_for_read(self.model)
query = {'%s__pk__in' % self.query_field_name: query = {'%s__pk__in' % self.query_field_name:
[obj._get_pk_val() for obj in instances]} set(obj._get_pk_val() for obj in instances)}
qs = super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**query) qs = super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**query)
# M2M: need to annotate the query in order to get the primary model # M2M: need to annotate the query in order to get the primary model