From d5803bb5fb1a301a014bb681fedc09e2bcc6967b Mon Sep 17 00:00:00 2001 From: Luke Plant Date: Sat, 8 Oct 2011 13:50:29 +0000 Subject: [PATCH] More efficient IN clauses for prefetch_related queries by use of sets to eliminate duplicates This simply reduces the size of the IN clause in the SQL, which can be significant in some cases. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16944 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/contrib/contenttypes/generic.py | 6 +++--- django/db/models/fields/related.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/django/contrib/contenttypes/generic.py b/django/contrib/contenttypes/generic.py index 23abe01f51..c5137877cb 100644 --- a/django/contrib/contenttypes/generic.py +++ b/django/contrib/contenttypes/generic.py @@ -65,7 +65,7 @@ class GenericForeignKey(object): def get_prefetch_query_set(self, instances): # For efficiency, group the instances by content type and then do one # query per model - fk_dict = defaultdict(list) + fk_dict = defaultdict(set) # We need one instance for each group in order to get the right db: instance_dict = {} ct_attname = self.model._meta.get_field(self.ct_field).get_attname() @@ -75,7 +75,7 @@ class GenericForeignKey(object): if ct_id is not None: fk_val = getattr(instance, self.fk_field) if fk_val is not None: - fk_dict[ct_id].append(fk_val) + fk_dict[ct_id].add(fk_val) instance_dict[ct_id] = instance ret_val = [] @@ -325,7 +325,7 @@ def create_generic_related_manager(superclass): query = { '%s__pk' % self.content_type_field_name: self.content_type.id, '%s__in' % self.object_id_field_name: - [obj._get_pk_val() for obj in instances] + set(obj._get_pk_val() for obj in instances) } qs = super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**query) return (qs, diff --git a/django/db/models/fields/related.py b/django/db/models/fields/related.py index c82c397288..8c054e7303 100644 --- a/django/db/models/fields/related.py +++ b/django/db/models/fields/related.py @@ -237,7 +237,7 @@ class SingleRelatedObjectDescriptor(object): return self.related.model._base_manager.using(db) def get_prefetch_query_set(self, instances): - vals = [instance._get_pk_val() for instance in instances] + vals = set(instance._get_pk_val() for instance in instances) params = {'%s__pk__in' % self.related.field.name: vals} return (self.get_query_set(), attrgetter(self.related.field.attname), @@ -316,7 +316,7 @@ class ReverseSingleRelatedObjectDescriptor(object): return QuerySet(self.field.rel.to).using(db) def get_prefetch_query_set(self, instances): - vals = [getattr(instance, self.field.attname) for instance in instances] + vals = set(getattr(instance, self.field.attname) for instance in instances) other_field = self.field.rel.get_related_field() if other_field.rel: params = {'%s__pk__in' % self.field.rel.field_name: vals} @@ -463,7 +463,7 @@ class ForeignRelatedObjectsDescriptor(object): def get_prefetch_query_set(self, instances): db = self._db or router.db_for_read(self.model) query = {'%s__%s__in' % (rel_field.name, attname): - [getattr(obj, attname) for obj in instances]} + set(getattr(obj, attname) for obj in instances)} qs = super(RelatedManager, self).get_query_set().using(db).filter(**query) return (qs, attrgetter(rel_field.get_attname()), @@ -546,7 +546,7 @@ def create_many_related_manager(superclass, rel): from django.db import connections db = self._db or router.db_for_read(self.model) query = {'%s__pk__in' % self.query_field_name: - [obj._get_pk_val() for obj in instances]} + set(obj._get_pk_val() for obj in instances)} qs = super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**query) # M2M: need to annotate the query in order to get the primary model