Fixed #16937 - added `QuerySet.prefetch_related` to prefetch many related objects.

Many thanks to akaariai for lots of review and feedback, bug finding,
additional unit tests and performance testing.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@16930 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Luke Plant 2011-10-05 23:14:52 +00:00
parent d30fbf8b78
commit 662eea116f
10 changed files with 968 additions and 24 deletions

View File

@ -225,11 +225,7 @@ class ReverseGenericRelatedObjectsDescriptor(object):
content_type = content_type, content_type = content_type,
content_type_field_name = self.field.content_type_field_name, content_type_field_name = self.field.content_type_field_name,
object_id_field_name = self.field.object_id_field_name, object_id_field_name = self.field.object_id_field_name,
core_filters = { prefetch_cache_name = self.field.attname,
'%s__pk' % self.field.content_type_field_name: content_type.id,
'%s__exact' % self.field.object_id_field_name: instance._get_pk_val(),
}
) )
return manager return manager
@ -250,12 +246,12 @@ def create_generic_related_manager(superclass):
""" """
class GenericRelatedObjectManager(superclass): class GenericRelatedObjectManager(superclass):
def __init__(self, model=None, core_filters=None, instance=None, symmetrical=None, def __init__(self, model=None, instance=None, symmetrical=None,
source_col_name=None, target_col_name=None, content_type=None, source_col_name=None, target_col_name=None, content_type=None,
content_type_field_name=None, object_id_field_name=None): content_type_field_name=None, object_id_field_name=None,
prefetch_cache_name=None):
super(GenericRelatedObjectManager, self).__init__() super(GenericRelatedObjectManager, self).__init__()
self.core_filters = core_filters
self.model = model self.model = model
self.content_type = content_type self.content_type = content_type
self.symmetrical = symmetrical self.symmetrical = symmetrical
@ -264,12 +260,30 @@ def create_generic_related_manager(superclass):
self.target_col_name = target_col_name self.target_col_name = target_col_name
self.content_type_field_name = content_type_field_name self.content_type_field_name = content_type_field_name
self.object_id_field_name = object_id_field_name self.object_id_field_name = object_id_field_name
self.prefetch_cache_name = prefetch_cache_name
self.pk_val = self.instance._get_pk_val() self.pk_val = self.instance._get_pk_val()
self.core_filters = {
'%s__pk' % content_type_field_name: content_type.id,
'%s__exact' % object_id_field_name: instance._get_pk_val(),
}
def get_query_set(self): def get_query_set(self):
try:
return self.instance._prefetched_objects_cache[self.prefetch_cache_name]
except (AttributeError, KeyError):
db = self._db or router.db_for_read(self.model, instance=self.instance) db = self._db or router.db_for_read(self.model, instance=self.instance)
return super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**self.core_filters) return super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**self.core_filters)
def get_prefetch_query_set(self, instances):
db = self._db or router.db_for_read(self.model)
query = {
'%s__pk' % self.content_type_field_name: self.content_type.id,
'%s__in' % self.object_id_field_name:
[obj._get_pk_val() for obj in instances]
}
qs = super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**query)
return (qs, self.object_id_field_name, 'pk')
def add(self, *objs): def add(self, *objs):
for obj in objs: for obj in objs:
if not isinstance(obj, self.model): if not isinstance(obj, self.model):

View File

@ -432,8 +432,22 @@ class ForeignRelatedObjectsDescriptor(object):
self.model = rel_model self.model = rel_model
def get_query_set(self): def get_query_set(self):
try:
return self.instance._prefetched_objects_cache[rel_field.related_query_name()]
except (AttributeError, KeyError):
db = self._db or router.db_for_read(self.model, instance=self.instance) db = self._db or router.db_for_read(self.model, instance=self.instance)
return super(RelatedManager, self).get_query_set().using(db).filter(**(self.core_filters)) return super(RelatedManager, self).get_query_set().using(db).filter(**self.core_filters)
def get_prefetch_query_set(self, instances):
"""
Return a queryset that does the bulk lookup needed
by prefetch_related functionality.
"""
db = self._db or router.db_for_read(self.model)
query = {'%s__%s__in' % (rel_field.name, attname):
[getattr(obj, attname) for obj in instances]}
qs = super(RelatedManager, self).get_query_set().using(db).filter(**query)
return (qs, rel_field.get_attname(), attname)
def add(self, *objs): def add(self, *objs):
for obj in objs: for obj in objs:
@ -482,25 +496,60 @@ def create_many_related_manager(superclass, rel):
"""Creates a manager that subclasses 'superclass' (which is a Manager) """Creates a manager that subclasses 'superclass' (which is a Manager)
and adds behavior for many-to-many related objects.""" and adds behavior for many-to-many related objects."""
class ManyRelatedManager(superclass): class ManyRelatedManager(superclass):
def __init__(self, model=None, core_filters=None, instance=None, symmetrical=None, def __init__(self, model=None, query_field_name=None, instance=None, symmetrical=None,
source_field_name=None, target_field_name=None, reverse=False, source_field_name=None, target_field_name=None, reverse=False,
through=None): through=None, prefetch_cache_name=None):
super(ManyRelatedManager, self).__init__() super(ManyRelatedManager, self).__init__()
self.model = model self.model = model
self.core_filters = core_filters self.query_field_name = query_field_name
self.core_filters = {'%s__pk' % query_field_name: instance._get_pk_val()}
self.instance = instance self.instance = instance
self.symmetrical = symmetrical self.symmetrical = symmetrical
self.source_field_name = source_field_name self.source_field_name = source_field_name
self.target_field_name = target_field_name self.target_field_name = target_field_name
self.reverse = reverse self.reverse = reverse
self.through = through self.through = through
self.prefetch_cache_name = prefetch_cache_name
self._pk_val = self.instance.pk self._pk_val = self.instance.pk
if self._pk_val is None: if self._pk_val is None:
raise ValueError("%r instance needs to have a primary key value before a many-to-many relationship can be used." % instance.__class__.__name__) raise ValueError("%r instance needs to have a primary key value before a many-to-many relationship can be used." % instance.__class__.__name__)
def get_query_set(self): def get_query_set(self):
try:
return self.instance._prefetched_objects_cache[self.prefetch_cache_name]
except (AttributeError, KeyError):
db = self._db or router.db_for_read(self.instance.__class__, instance=self.instance) db = self._db or router.db_for_read(self.instance.__class__, instance=self.instance)
return super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**(self.core_filters)) return super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**self.core_filters)
def get_prefetch_query_set(self, instances):
"""
Returns a tuple:
(queryset of instances of self.model that are related to passed in instances
attr of returned instances needed for matching
attr of passed in instances needed for matching)
"""
from django.db import connections
db = self._db or router.db_for_read(self.model)
query = {'%s__pk__in' % self.query_field_name:
[obj._get_pk_val() for obj in instances]}
qs = super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**query)
# M2M: need to annotate the query in order to get the primary model
# that the secondary model was actually related to. We know that
# there will already be a join on the join table, so we can just add
# the select.
# For non-autocreated 'through' models, can't assume we are
# dealing with PK values.
fk = self.through._meta.get_field(self.source_field_name)
source_col = fk.column
join_table = self.through._meta.db_table
connection = connections[db]
qn = connection.ops.quote_name
qs = qs.extra(select={'_prefetch_related_val':
'%s.%s' % (qn(join_table), qn(source_col))})
select_attname = fk.rel.get_related_field().get_attname()
return (qs, '_prefetch_related_val', select_attname)
# If the ManyToMany relation has an intermediary model, # If the ManyToMany relation has an intermediary model,
# the add and remove methods do not exist. # the add and remove methods do not exist.
@ -683,7 +732,8 @@ class ManyRelatedObjectsDescriptor(object):
manager = self.related_manager_cls( manager = self.related_manager_cls(
model=rel_model, model=rel_model,
core_filters={'%s__pk' % self.related.field.name: instance._get_pk_val()}, query_field_name=self.related.field.name,
prefetch_cache_name=self.related.field.related_query_name(),
instance=instance, instance=instance,
symmetrical=False, symmetrical=False,
source_field_name=self.related.field.m2m_reverse_field_name(), source_field_name=self.related.field.m2m_reverse_field_name(),
@ -739,7 +789,8 @@ class ReverseManyRelatedObjectsDescriptor(object):
manager = self.related_manager_cls( manager = self.related_manager_cls(
model=self.field.rel.to, model=self.field.rel.to,
core_filters={'%s__pk' % self.field.related_query_name(): instance._get_pk_val()}, query_field_name=self.field.related_query_name(),
prefetch_cache_name=self.field.name,
instance=instance, instance=instance,
symmetrical=self.field.rel.symmetrical, symmetrical=self.field.rel.symmetrical,
source_field_name=self.field.m2m_field_name(), source_field_name=self.field.m2m_field_name(),

View File

@ -172,6 +172,9 @@ class Manager(object):
def select_related(self, *args, **kwargs): def select_related(self, *args, **kwargs):
return self.get_query_set().select_related(*args, **kwargs) return self.get_query_set().select_related(*args, **kwargs)
def prefetch_related(self, *args, **kwargs):
return self.get_query_set().prefetch_related(*args, **kwargs)
def values(self, *args, **kwargs): def values(self, *args, **kwargs):
return self.get_query_set().values(*args, **kwargs) return self.get_query_set().values(*args, **kwargs)

View File

@ -36,6 +36,8 @@ class QuerySet(object):
self._iter = None self._iter = None
self._sticky_filter = False self._sticky_filter = False
self._for_write = False self._for_write = False
self._prefetch_related_lookups = []
self._prefetch_done = False
######################## ########################
# PYTHON MAGIC METHODS # # PYTHON MAGIC METHODS #
@ -81,9 +83,17 @@ class QuerySet(object):
self._result_cache = list(self.iterator()) self._result_cache = list(self.iterator())
elif self._iter: elif self._iter:
self._result_cache.extend(self._iter) self._result_cache.extend(self._iter)
if self._prefetch_related_lookups and not self._prefetch_done:
self._prefetch_related_objects()
return len(self._result_cache) return len(self._result_cache)
def __iter__(self): def __iter__(self):
if self._prefetch_related_lookups and not self._prefetch_done:
# We need all the results in order to be able to do the prefetch
# in one go. To minimize code duplication, we use the __len__
# code path which also forces this, and also does the prefetch
len(self)
if self._result_cache is None: if self._result_cache is None:
self._iter = self.iterator() self._iter = self.iterator()
self._result_cache = [] self._result_cache = []
@ -106,6 +116,12 @@ class QuerySet(object):
self._fill_cache() self._fill_cache()
def __nonzero__(self): def __nonzero__(self):
if self._prefetch_related_lookups and not self._prefetch_done:
# We need all the results in order to be able to do the prefetch
# in one go. To minimize code duplication, we use the __len__
# code path which also forces this, and also does the prefetch
len(self)
if self._result_cache is not None: if self._result_cache is not None:
return bool(self._result_cache) return bool(self._result_cache)
try: try:
@ -527,6 +543,11 @@ class QuerySet(object):
return self.query.has_results(using=self.db) return self.query.has_results(using=self.db)
return bool(self._result_cache) return bool(self._result_cache)
def _prefetch_related_objects(self):
# This method can only be called once the result cache has been filled.
prefetch_related_objects(self._result_cache, self._prefetch_related_lookups)
self._prefetch_done = True
################################################## ##################################################
# PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS # # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS #
################################################## ##################################################
@ -650,6 +671,23 @@ class QuerySet(object):
obj.query.max_depth = depth obj.query.max_depth = depth
return obj return obj
def prefetch_related(self, *lookups):
"""
Returns a new QuerySet instance that will prefetch the specified
Many-To-One and Many-To-Many related objects when the QuerySet is
evaluated.
When prefetch_related() is called more than once, the list of lookups to
prefetch is appended to. If prefetch_related(None) is called, the
the list is cleared.
"""
clone = self._clone()
if lookups == (None,):
clone._prefetch_related_lookups = []
else:
clone._prefetch_related_lookups.extend(lookups)
return clone
def dup_select_related(self, other): def dup_select_related(self, other):
""" """
Copies the related selection status from the QuerySet 'other' to the Copies the related selection status from the QuerySet 'other' to the
@ -799,6 +837,7 @@ class QuerySet(object):
query.filter_is_sticky = True query.filter_is_sticky = True
c = klass(model=self.model, query=query, using=self._db) c = klass(model=self.model, query=query, using=self._db)
c._for_write = self._for_write c._for_write = self._for_write
c._prefetch_related_lookups = self._prefetch_related_lookups[:]
c.__dict__.update(kwargs) c.__dict__.update(kwargs)
if setup and hasattr(c, '_setup_query'): if setup and hasattr(c, '_setup_query'):
c._setup_query() c._setup_query()
@ -864,6 +903,7 @@ class QuerySet(object):
# empty" result. # empty" result.
value_annotation = True value_annotation = True
class ValuesQuerySet(QuerySet): class ValuesQuerySet(QuerySet):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(ValuesQuerySet, self).__init__(*args, **kwargs) super(ValuesQuerySet, self).__init__(*args, **kwargs)
@ -993,6 +1033,7 @@ class ValuesQuerySet(QuerySet):
% self.__class__.__name__) % self.__class__.__name__)
return self return self
class ValuesListQuerySet(ValuesQuerySet): class ValuesListQuerySet(ValuesQuerySet):
def iterator(self): def iterator(self):
if self.flat and len(self._fields) == 1: if self.flat and len(self._fields) == 1:
@ -1502,6 +1543,7 @@ class RawQuerySet(object):
self._model_fields[converter(column)] = field self._model_fields[converter(column)] = field
return self._model_fields return self._model_fields
def insert_query(model, objs, fields, return_id=False, raw=False, using=None): def insert_query(model, objs, fields, return_id=False, raw=False, using=None):
""" """
Inserts a new record for the given model. This provides an interface to Inserts a new record for the given model. This provides an interface to
@ -1511,3 +1553,140 @@ def insert_query(model, objs, fields, return_id=False, raw=False, using=None):
query = sql.InsertQuery(model) query = sql.InsertQuery(model)
query.insert_values(fields, objs, raw=raw) query.insert_values(fields, objs, raw=raw)
return query.get_compiler(using=using).execute_sql(return_id) return query.get_compiler(using=using).execute_sql(return_id)
def prefetch_related_objects(result_cache, related_lookups):
"""
Helper function for prefetch_related functionality
Populates prefetched objects caches for a list of results
from a QuerySet
"""
from django.db.models.sql.constants import LOOKUP_SEP
if len(result_cache) == 0:
return # nothing to do
model = result_cache[0].__class__
# We need to be able to dynamically add to the list of prefetch_related
# lookups that we look up (see below). So we need some book keeping to
# ensure we don't do duplicate work.
done_lookups = set() # list of lookups like foo__bar__baz
done_queries = {} # dictionary of things like 'foo__bar': [results]
related_lookups = list(related_lookups)
# We may expand related_lookups, so need a loop that allows for that
for lookup in related_lookups:
if lookup in done_lookups:
# We've done exactly this already, skip the whole thing
continue
done_lookups.add(lookup)
# Top level, the list of objects to decorate is the the result cache
# from the primary QuerySet. It won't be for deeper levels.
obj_list = result_cache
attrs = lookup.split(LOOKUP_SEP)
for level, attr in enumerate(attrs):
# Prepare main instances
if len(obj_list) == 0:
break
good_objects = True
for obj in obj_list:
if not hasattr(obj, '_prefetched_objects_cache'):
try:
obj._prefetched_objects_cache = {}
except AttributeError:
# Must be in a QuerySet subclass that is not returning
# Model instances, either in Django or 3rd
# party. prefetch_related() doesn't make sense, so quit
# now.
good_objects = False
break
else:
# We already did this list
break
if not good_objects:
break
# Descend down tree
try:
rel_obj = getattr(obj_list[0], attr)
except AttributeError:
raise AttributeError("Cannot find '%s' on %s object, '%s' is an invalid "
"parameter to prefetch_related()" %
(attr, obj_list[0].__class__.__name__, lookup))
can_prefetch = hasattr(rel_obj, 'get_prefetch_query_set')
if level == len(attrs) - 1 and not can_prefetch:
# Last one, this *must* resolve to a related manager.
raise ValueError("'%s' does not resolve to a supported 'many related"
" manager' for model %s - this is an invalid"
" parameter to prefetch_related()."
% (lookup, model.__name__))
if can_prefetch:
# Check we didn't do this already
current_lookup = LOOKUP_SEP.join(attrs[0:level+1])
if current_lookup in done_queries:
obj_list = done_queries[current_lookup]
else:
relmanager = rel_obj
obj_list, additional_prl = prefetch_one_level(obj_list, relmanager, attr)
for f in additional_prl:
new_prl = LOOKUP_SEP.join([current_lookup, f])
related_lookups.append(new_prl)
done_queries[current_lookup] = obj_list
else:
# Assume we've got some singly related object. We replace
# the current list of parent objects with that list.
obj_list = [getattr(obj, attr) for obj in obj_list]
# Filter out 'None' so that we can continue with nullable
# relations.
obj_list = [obj for obj in obj_list if obj is not None]
def prefetch_one_level(instances, relmanager, attname):
"""
Helper function for prefetch_related_objects
Runs prefetches on all instances using the manager relmanager,
assigning results to queryset against instance.attname.
The prefetched objects are returned, along with any additional
prefetches that must be done due to prefetch_related lookups
found from default managers.
"""
rel_qs, rel_obj_attr, instance_attr = relmanager.get_prefetch_query_set(instances)
# We have to handle the possibility that the default manager itself added
# prefetch_related lookups to the QuerySet we just got back. We don't want to
# trigger the prefetch_related functionality by evaluating the query.
# Rather, we need to merge in the prefetch_related lookups.
additional_prl = getattr(rel_qs, '_prefetch_related_lookups', [])
if additional_prl:
# Don't need to clone because the manager should have given us a fresh
# instance, so we access an internal instead of using public interface
# for performance reasons.
rel_qs._prefetch_related_lookups = []
all_related_objects = list(rel_qs)
rel_obj_cache = {}
for rel_obj in all_related_objects:
rel_attr_val = getattr(rel_obj, rel_obj_attr)
if rel_attr_val not in rel_obj_cache:
rel_obj_cache[rel_attr_val] = []
rel_obj_cache[rel_attr_val].append(rel_obj)
for obj in instances:
qs = getattr(obj, attname).all()
instance_attr_val = getattr(obj, instance_attr)
qs._result_cache = rel_obj_cache.get(instance_attr_val, [])
# We don't want the individual qs doing prefetch_related now, since we
# have merged this into the current work.
qs._prefetch_done = True
obj._prefetched_objects_cache[attname] = qs
return all_related_objects, additional_prl

View File

@ -571,8 +571,6 @@ can be useful in situations where you might want to pass in either a model
manager or a ``QuerySet`` and do further filtering on the result. After calling manager or a ``QuerySet`` and do further filtering on the result. After calling
``all()`` on either object, you'll definitely have a ``QuerySet`` to work with. ``all()`` on either object, you'll definitely have a ``QuerySet`` to work with.
.. _select-related:
select_related select_related
~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~
@ -690,6 +688,107 @@ is defined. Instead of specifying the field name, use the :attr:`related_name
A :class:`~django.db.models.OneToOneField` is not traversed in the reverse A :class:`~django.db.models.OneToOneField` is not traversed in the reverse
direction if you are performing a depth-based ``select_related()`` call. direction if you are performing a depth-based ``select_related()`` call.
prefetch_related
~~~~~~~~~~~~~~~~
.. method:: prefetch_related(*lookups)
.. versionadded:: 1.4
Returns a ``QuerySet`` that will automatically retrieve, in a single batch,
related many-to-many and many-to-one objects for each of the specified lookups.
This is similar to ``select_related`` for the 'many related objects' case, but
note that ``prefetch_related`` causes a separate query to be issued for each set
of related objects that you request, unlike ``select_related`` which modifies
the original query with joins in order to get the related objects. With
``prefetch_related``, the additional queries are done as soon as the QuerySet
begins to be evaluated.
For example, suppose you have these models::
class Topping(models.Model):
name = models.CharField(max_length=30)
class Pizza(models.Model):
name = models.CharField(max_length=50)
toppings = models.ManyToManyField(Topping)
def __unicode__(self):
return u"%s (%s)" % (self.name, u", ".join([topping.name
for topping in self.toppings.all()]))
and run this code::
>>> Pizza.objects.all()
[u"Hawaiian (ham, pineapple)", u"Seafood (prawns, smoked salmon)"...
The problem with this code is that it will run a query on the Toppings table for
**every** item in the Pizza ``QuerySet``. Using ``prefetch_related``, this can
be reduced to two:
>>> Pizza.objects.all().prefetch_related('toppings')
All the relevant toppings will be fetched in a single query, and used to make
``QuerySets`` that have a pre-filled cache of the relevant results. These
``QuerySets`` are then used in the ``self.toppings.all()`` calls.
Please note that use of ``prefetch_related`` will mean that the additional
queries run will **always** be executed - even if you never use the related
objects - and it always fully populates the result cache on the primary
``QuerySet`` (which can sometimes be avoided in other cases).
Also remember that, as always with QuerySets, any subsequent chained methods
will ignore previously cached results, and retrieve data using a fresh database
query. So, if you write the following:
>>> pizzas = Pizza.objects.prefetch_related('toppings')
>>> [list(pizza.toppings.filter(spicy=True)) for pizza in pizzas]
...then the fact that `pizza.toppings.all()` has been prefetched will not help
you - in fact it hurts performance, since you have done a database query that
you haven't used. So use this feature with caution!
The lookups that must be supplied to this method can be any attributes on the
model instances which represent related queries that return multiple
objects. This includes attributes representing the 'many' side of ``ForeignKey``
relationships, forward and reverse ``ManyToManyField`` attributes, and also any
``GenericRelations``.
You can also use the normal join syntax to do related fields of related
fields. Suppose we have an additional model to the example above::
class Restaurant(models.Model):
pizzas = models.ManyToMany(Pizza, related_name='restaurants')
best_pizza = models.ForeignKey(Pizza, related_name='championed_by')
The following are all legal:
>>> Restaurant.objects.prefetch_related('pizzas__toppings')
This will prefetch all pizzas belonging to restaurants, and all toppings
belonging to those pizzas. This will result in a total of 3 database queries -
one for the restaurants, one for the pizzas, and one for the toppings.
>>> Restaurant.objects.select_related('best_pizza').prefetch_related('best_pizza__toppings')
This will fetch the best pizza and all the toppings for the best pizza for each
restaurant. This will be done in 2 database queries - one for the restaurants
and 'best pizzas' combined (achieved through use of ``select_related``), and one
for the toppings.
Chaining ``prefetch_related`` calls will accumulate the fields that should have
this behavior applied. To clear any ``prefetch_related`` behavior, pass `None`
as a parameter::
>>> non_prefetched = qs.prefetch_related(None)
One difference when using ``prefetch_related`` is that, in some circumstances,
objects created by a query can be shared between the different objects that they
are related to i.e. a single Python model instance can appear at more than one
point in the tree of objects that are returned. Normally this behavior will not
be a problem, and will in fact save both memory and CPU time.
extra extra
~~~~~ ~~~~~

View File

@ -63,6 +63,19 @@ setup for test suites) has seen a performance benefit as a result.
See the :meth:`~django.db.models.query.QuerySet.bulk_create` docs for more See the :meth:`~django.db.models.query.QuerySet.bulk_create` docs for more
information. information.
``QuerySet.prefetch_related``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Analagous to :meth:`~django.db.models.query.QuerySet.select_related` but for
many-to-many relationships,
:meth:`~django.db.models.query.QuerySet.prefetch_related` has been added to
:class:`~django.db.models.query.QuerySet`. This method returns a new ``QuerySet``
that will prefetch in a single batch each of the specified related lookups as
soon as it begins to be evaluated (e.g. by iterating over it). This enables you
to fix many instances of a very common performance problem, in which your code
ends up doing O(n) database queries (or worse) if objects on your primary
``QuerySet`` each have many related objects that you also need.
HTML5 HTML5
~~~~~ ~~~~~

View File

@ -141,10 +141,12 @@ retrieving it all in one query. This is particularly important if you have a
query that is executed in a loop, and could therefore end up doing many database query that is executed in a loop, and could therefore end up doing many database
queries, when only one was needed. So: queries, when only one was needed. So:
Use ``QuerySet.select_related()`` Use ``QuerySet.select_related()`` and ``prefetch_related()``
--------------------------------- ------------------------------------------------------------
Understand :ref:`QuerySet.select_related() <select-related>` thoroughly, and use it: Understand :meth:`~django.db.models.query.QuerySet.select_related` and
:meth:`~django.db.models.query.QuerySet.prefetch_related` thoroughly, and use
them:
* in view code, * in view code,

View File

@ -0,0 +1,165 @@
from django.contrib.contenttypes.models import ContentType
from django.contrib.contenttypes import generic
from django.db import models
## Basic tests
class Author(models.Model):
name = models.CharField(max_length=50, unique=True)
first_book = models.ForeignKey('Book', related_name='first_time_authors')
favorite_authors = models.ManyToManyField(
'self', through='FavoriteAuthors', symmetrical=False, related_name='favors_me')
def __unicode__(self):
return self.name
class Meta:
ordering = ['id']
class AuthorWithAge(Author):
author = models.OneToOneField(Author, parent_link=True)
age = models.IntegerField()
class FavoriteAuthors(models.Model):
author = models.ForeignKey(Author, to_field='name', related_name='i_like')
likes_author = models.ForeignKey(Author, to_field='name', related_name='likes_me')
class Meta:
ordering = ['id']
class AuthorAddress(models.Model):
author = models.ForeignKey(Author, to_field='name', related_name='addresses')
address = models.TextField()
class Meta:
ordering = ['id']
def __unicode__(self):
return self.address
class Book(models.Model):
title = models.CharField(max_length=255)
authors = models.ManyToManyField(Author, related_name='books')
def __unicode__(self):
return self.title
class Meta:
ordering = ['id']
class BookWithYear(Book):
book = models.OneToOneField(Book, parent_link=True)
published_year = models.IntegerField()
aged_authors = models.ManyToManyField(
AuthorWithAge, related_name='books_with_year')
class Reader(models.Model):
name = models.CharField(max_length=50)
books_read = models.ManyToManyField(Book, related_name='read_by')
def __unicode__(self):
return self.name
class Meta:
ordering = ['id']
## Models for default manager tests
class Qualification(models.Model):
name = models.CharField(max_length=10)
class Meta:
ordering = ['id']
class TeacherManager(models.Manager):
def get_query_set(self):
return super(TeacherManager, self).get_query_set().prefetch_related('qualifications')
class Teacher(models.Model):
name = models.CharField(max_length=50)
qualifications = models.ManyToManyField(Qualification)
objects = TeacherManager()
def __unicode__(self):
return "%s (%s)" % (self.name, ", ".join(q.name for q in self.qualifications.all()))
class Meta:
ordering = ['id']
class Department(models.Model):
name = models.CharField(max_length=50)
teachers = models.ManyToManyField(Teacher)
class Meta:
ordering = ['id']
## Generic relation tests
class TaggedItem(models.Model):
tag = models.SlugField()
content_type = models.ForeignKey(ContentType, related_name="taggeditem_set2")
object_id = models.PositiveIntegerField()
content_object = generic.GenericForeignKey('content_type', 'object_id')
def __unicode__(self):
return self.tag
class Bookmark(models.Model):
url = models.URLField()
tags = generic.GenericRelation(TaggedItem)
## Models for lookup ordering tests
class House(models.Model):
address = models.CharField(max_length=255)
class Meta:
ordering = ['id']
class Room(models.Model):
name = models.CharField(max_length=50)
house = models.ForeignKey(House, related_name='rooms')
class Meta:
ordering = ['id']
class Person(models.Model):
name = models.CharField(max_length=50)
houses = models.ManyToManyField(House, related_name='occupants')
@property
def primary_house(self):
# Assume business logic forces every person to have at least one house.
return sorted(self.houses.all(), key=lambda house: -house.rooms.count())[0]
class Meta:
ordering = ['id']
## Models for nullable FK tests
class Employee(models.Model):
name = models.CharField(max_length=50)
boss = models.ForeignKey('self', null=True,
related_name='serfs')
def __unicode__(self):
return self.name
class Meta:
ordering = ['id']

View File

@ -0,0 +1,418 @@
from __future__ import with_statement
from django.contrib.contenttypes.models import ContentType
from django.test import TestCase
from django.utils import unittest
from models import (Author, Book, Reader, Qualification, Teacher, Department,
TaggedItem, Bookmark, AuthorAddress, FavoriteAuthors,
AuthorWithAge, BookWithYear, Person, House, Room,
Employee)
class PrefetchRelatedTests(TestCase):
def setUp(self):
self.book1 = Book.objects.create(title="Poems")
self.book2 = Book.objects.create(title="Jane Eyre")
self.book3 = Book.objects.create(title="Wuthering Heights")
self.book4 = Book.objects.create(title="Sense and Sensibility")
self.author1 = Author.objects.create(name="Charlotte",
first_book=self.book1)
self.author2 = Author.objects.create(name="Anne",
first_book=self.book1)
self.author3 = Author.objects.create(name="Emily",
first_book=self.book1)
self.author4 = Author.objects.create(name="Jane",
first_book=self.book4)
self.book1.authors.add(self.author1, self.author2, self.author3)
self.book2.authors.add(self.author1)
self.book3.authors.add(self.author3)
self.book4.authors.add(self.author4)
self.reader1 = Reader.objects.create(name="Amy")
self.reader2 = Reader.objects.create(name="Belinda")
self.reader1.books_read.add(self.book1, self.book4)
self.reader2.books_read.add(self.book2, self.book4)
def test_m2m_forward(self):
with self.assertNumQueries(2):
lists = [list(b.authors.all()) for b in Book.objects.prefetch_related('authors')]
normal_lists = [list(b.authors.all()) for b in Book.objects.all()]
self.assertEqual(lists, normal_lists)
def test_m2m_reverse(self):
with self.assertNumQueries(2):
lists = [list(a.books.all()) for a in Author.objects.prefetch_related('books')]
normal_lists = [list(a.books.all()) for a in Author.objects.all()]
self.assertEqual(lists, normal_lists)
def test_foreignkey_reverse(self):
with self.assertNumQueries(2):
lists = [list(b.first_time_authors.all())
for b in Book.objects.prefetch_related('first_time_authors')]
self.assertQuerysetEqual(self.book2.authors.all(), [u"<Author: Charlotte>"])
def test_survives_clone(self):
with self.assertNumQueries(2):
lists = [list(b.first_time_authors.all())
for b in Book.objects.prefetch_related('first_time_authors').exclude(id=1000)]
def test_len(self):
with self.assertNumQueries(2):
qs = Book.objects.prefetch_related('first_time_authors')
length = len(qs)
lists = [list(b.first_time_authors.all())
for b in qs]
def test_bool(self):
with self.assertNumQueries(2):
qs = Book.objects.prefetch_related('first_time_authors')
x = bool(qs)
lists = [list(b.first_time_authors.all())
for b in qs]
def test_count(self):
with self.assertNumQueries(2):
qs = Book.objects.prefetch_related('first_time_authors')
[b.first_time_authors.count() for b in qs]
def test_exists(self):
with self.assertNumQueries(2):
qs = Book.objects.prefetch_related('first_time_authors')
[b.first_time_authors.exists() for b in qs]
def test_clear(self):
"""
Test that we can clear the behavior by calling prefetch_related()
"""
with self.assertNumQueries(5):
with_prefetch = Author.objects.prefetch_related('books')
without_prefetch = with_prefetch.prefetch_related(None)
lists = [list(a.books.all()) for a in without_prefetch]
def test_m2m_then_m2m(self):
"""
Test we can follow a m2m and another m2m
"""
with self.assertNumQueries(3):
qs = Author.objects.prefetch_related('books__read_by')
lists = [[[unicode(r) for r in b.read_by.all()]
for b in a.books.all()]
for a in qs]
self.assertEqual(lists,
[
[[u"Amy"], [u"Belinda"]], # Charlotte - Poems, Jane Eyre
[[u"Amy"]], # Anne - Poems
[[u"Amy"], []], # Emily - Poems, Wuthering Heights
[[u"Amy", u"Belinda"]], # Jane - Sense and Sense
])
def test_overriding_prefetch(self):
with self.assertNumQueries(3):
qs = Author.objects.prefetch_related('books', 'books__read_by')
lists = [[[unicode(r) for r in b.read_by.all()]
for b in a.books.all()]
for a in qs]
self.assertEqual(lists,
[
[[u"Amy"], [u"Belinda"]], # Charlotte - Poems, Jane Eyre
[[u"Amy"]], # Anne - Poems
[[u"Amy"], []], # Emily - Poems, Wuthering Heights
[[u"Amy", u"Belinda"]], # Jane - Sense and Sense
])
with self.assertNumQueries(3):
qs = Author.objects.prefetch_related('books__read_by', 'books')
lists = [[[unicode(r) for r in b.read_by.all()]
for b in a.books.all()]
for a in qs]
self.assertEqual(lists,
[
[[u"Amy"], [u"Belinda"]], # Charlotte - Poems, Jane Eyre
[[u"Amy"]], # Anne - Poems
[[u"Amy"], []], # Emily - Poems, Wuthering Heights
[[u"Amy", u"Belinda"]], # Jane - Sense and Sense
])
def test_get(self):
"""
Test that objects retrieved with .get() get the prefetch behaviour
"""
# Need a double
with self.assertNumQueries(3):
author = Author.objects.prefetch_related('books__read_by').get(name="Charlotte")
lists = [[unicode(r) for r in b.read_by.all()]
for b in author.books.all()]
self.assertEqual(lists, [[u"Amy"], [u"Belinda"]]) # Poems, Jane Eyre
def test_foreign_key_then_m2m(self):
"""
Test we can follow an m2m relation after a relation like ForeignKey
that doesn't have many objects
"""
with self.assertNumQueries(2):
qs = Author.objects.select_related('first_book').prefetch_related('first_book__read_by')
lists = [[unicode(r) for r in a.first_book.read_by.all()]
for a in qs]
self.assertEqual(lists, [[u"Amy"],
[u"Amy"],
[u"Amy"],
[u"Amy", "Belinda"]])
def test_attribute_error(self):
qs = Reader.objects.all().prefetch_related('books_read__xyz')
with self.assertRaises(AttributeError) as cm:
list(qs)
self.assertTrue('prefetch_related' in str(cm.exception))
def test_invalid_final_lookup(self):
qs = Book.objects.prefetch_related('authors__first_book')
with self.assertRaises(ValueError) as cm:
list(qs)
self.assertTrue('prefetch_related' in str(cm.exception))
self.assertTrue("first_book" in str(cm.exception))
class DefaultManagerTests(TestCase):
def setUp(self):
self.qual1 = Qualification.objects.create(name="BA")
self.qual2 = Qualification.objects.create(name="BSci")
self.qual3 = Qualification.objects.create(name="MA")
self.qual4 = Qualification.objects.create(name="PhD")
self.teacher1 = Teacher.objects.create(name="Mr Cleese")
self.teacher2 = Teacher.objects.create(name="Mr Idle")
self.teacher3 = Teacher.objects.create(name="Mr Chapman")
self.teacher1.qualifications.add(self.qual1, self.qual2, self.qual3, self.qual4)
self.teacher2.qualifications.add(self.qual1)
self.teacher3.qualifications.add(self.qual2)
self.dept1 = Department.objects.create(name="English")
self.dept2 = Department.objects.create(name="Physics")
self.dept1.teachers.add(self.teacher1, self.teacher2)
self.dept2.teachers.add(self.teacher1, self.teacher3)
def test_m2m_then_m2m(self):
with self.assertNumQueries(3):
# When we prefetch the teachers, and force the query, we don't want
# the default manager on teachers to immediately get all the related
# qualifications, since this will do one query per teacher.
qs = Department.objects.prefetch_related('teachers')
depts = "".join(["%s department: %s\n" %
(dept.name, ", ".join(unicode(t) for t in dept.teachers.all()))
for dept in qs])
self.assertEqual(depts,
"English department: Mr Cleese (BA, BSci, MA, PhD), Mr Idle (BA)\n"
"Physics department: Mr Cleese (BA, BSci, MA, PhD), Mr Chapman (BSci)\n")
class GenericRelationTests(TestCase):
def test_traverse_GFK(self):
"""
Test that we can traverse a 'content_object' with prefetch_related()
"""
# In fact, there is no special support for this in prefetch_related code
# - we can traverse any object that will lead us to objects that have
# related managers.
book1 = Book.objects.create(title="Winnie the Pooh")
book2 = Book.objects.create(title="Do you like green eggs and spam?")
reader1 = Reader.objects.create(name="me")
reader2 = Reader.objects.create(name="you")
book1.read_by.add(reader1)
book2.read_by.add(reader2)
TaggedItem.objects.create(tag="awesome", content_object=book1)
TaggedItem.objects.create(tag="awesome", content_object=book2)
ct = ContentType.objects.get_for_model(Book)
# We get 4 queries - 1 for main query, 2 for each access to
# 'content_object' because these can't be handled by select_related, and
# 1 for the 'read_by' relation.
with self.assertNumQueries(4):
# If we limit to books, we know that they will have 'read_by'
# attributes, so the following makes sense:
qs = TaggedItem.objects.select_related('content_type').prefetch_related('content_object__read_by').filter(tag='awesome').filter(content_type=ct, tag='awesome')
readers_of_awesome_books = [r.name for tag in qs
for r in tag.content_object.read_by.all()]
self.assertEqual(readers_of_awesome_books, ["me", "you"])
def test_generic_relation(self):
b = Bookmark.objects.create(url='http://www.djangoproject.com/')
t1 = TaggedItem.objects.create(content_object=b, tag='django')
t2 = TaggedItem.objects.create(content_object=b, tag='python')
with self.assertNumQueries(2):
tags = [t.tag for b in Bookmark.objects.prefetch_related('tags')
for t in b.tags.all()]
self.assertEqual(sorted(tags), ["django", "python"])
class MultiTableInheritanceTest(TestCase):
def setUp(self):
self.book1 = BookWithYear.objects.create(
title="Poems", published_year=2010)
self.book2 = BookWithYear.objects.create(
title="More poems", published_year=2011)
self.author1 = AuthorWithAge.objects.create(
name='Jane', first_book=self.book1, age=50)
self.author2 = AuthorWithAge.objects.create(
name='Tom', first_book=self.book1, age=49)
self.author3 = AuthorWithAge.objects.create(
name='Robert', first_book=self.book2, age=48)
self.authorAddress = AuthorAddress.objects.create(
author=self.author1, address='SomeStreet 1')
self.book2.aged_authors.add(self.author2, self.author3)
def test_foreignkey(self):
with self.assertNumQueries(2):
qs = AuthorWithAge.objects.prefetch_related('addresses')
addresses = [[unicode(address) for address in obj.addresses.all()]
for obj in qs]
self.assertEquals(addresses, [[unicode(self.authorAddress)], [], []])
def test_m2m_to_inheriting_model(self):
qs = AuthorWithAge.objects.prefetch_related('books_with_year')
with self.assertNumQueries(2):
lst = [[unicode(book) for book in author.books_with_year.all()]
for author in qs]
qs = AuthorWithAge.objects.all()
lst2 = [[unicode(book) for book in author.books_with_year.all()]
for author in qs]
self.assertEquals(lst, lst2)
qs = BookWithYear.objects.prefetch_related('aged_authors')
with self.assertNumQueries(2):
lst = [[unicode(author) for author in book.aged_authors.all()]
for book in qs]
qs = BookWithYear.objects.all()
lst2 = [[unicode(author) for author in book.aged_authors.all()]
for book in qs]
self.assertEquals(lst, lst2)
def test_parent_link_prefetch(self):
with self.assertRaises(ValueError) as cm:
qs = list(AuthorWithAge.objects.prefetch_related('author'))
self.assertTrue('prefetch_related' in str(cm.exception))
class ForeignKeyToFieldTest(TestCase):
def setUp(self):
self.book = Book.objects.create(title="Poems")
self.author1 = Author.objects.create(name='Jane', first_book=self.book)
self.author2 = Author.objects.create(name='Tom', first_book=self.book)
self.author3 = Author.objects.create(name='Robert', first_book=self.book)
self.authorAddress = AuthorAddress.objects.create(
author=self.author1, address='SomeStreet 1'
)
FavoriteAuthors.objects.create(author=self.author1,
likes_author=self.author2)
FavoriteAuthors.objects.create(author=self.author2,
likes_author=self.author3)
FavoriteAuthors.objects.create(author=self.author3,
likes_author=self.author1)
def test_foreignkey(self):
with self.assertNumQueries(2):
qs = Author.objects.prefetch_related('addresses')
addresses = [[unicode(address) for address in obj.addresses.all()]
for obj in qs]
self.assertEquals(addresses, [[unicode(self.authorAddress)], [], []])
def test_m2m(self):
with self.assertNumQueries(3):
qs = Author.objects.all().prefetch_related('favorite_authors', 'favors_me')
favorites = [(
[unicode(i_like) for i_like in author.favorite_authors.all()],
[unicode(likes_me) for likes_me in author.favors_me.all()]
) for author in qs]
self.assertEquals(
favorites,
[
([unicode(self.author2)],[unicode(self.author3)]),
([unicode(self.author3)],[unicode(self.author1)]),
([unicode(self.author1)],[unicode(self.author2)])
]
)
class LookupOrderingTest(TestCase):
"""
Test cases that demonstrate that ordering of lookups is important, and
ensure it is preserved.
"""
def setUp(self):
self.person1 = Person.objects.create(name="Joe")
self.person2 = Person.objects.create(name="Mary")
self.house1 = House.objects.create(address="123 Main St")
self.house2 = House.objects.create(address="45 Side St")
self.house3 = House.objects.create(address="6 Downing St")
self.house4 = House.objects.create(address="7 Regents St")
self.room1_1 = Room.objects.create(name="Dining room", house=self.house1)
self.room1_2 = Room.objects.create(name="Lounge", house=self.house1)
self.room1_3 = Room.objects.create(name="Kitchen", house=self.house1)
self.room2_1 = Room.objects.create(name="Dining room", house=self.house2)
self.room2_2 = Room.objects.create(name="Lounge", house=self.house2)
self.room3_1 = Room.objects.create(name="Dining room", house=self.house3)
self.room3_2 = Room.objects.create(name="Lounge", house=self.house3)
self.room3_3 = Room.objects.create(name="Kitchen", house=self.house3)
self.room4_1 = Room.objects.create(name="Dining room", house=self.house4)
self.room4_2 = Room.objects.create(name="Lounge", house=self.house4)
self.person1.houses.add(self.house1, self.house2)
self.person2.houses.add(self.house3, self.house4)
def test_order(self):
with self.assertNumQueries(4):
# The following two queries must be done in the same order as written,
# otherwise 'primary_house' will cause non-prefetched lookups
qs = Person.objects.prefetch_related('houses__rooms',
'primary_house__occupants')
[list(p.primary_house.occupants.all()) for p in qs]
class NullableTest(TestCase):
def setUp(self):
boss = Employee.objects.create(name="Peter")
worker1 = Employee.objects.create(name="Joe", boss=boss)
worker2 = Employee.objects.create(name="Angela", boss=boss)
def test_traverse_nullable(self):
with self.assertNumQueries(2):
qs = Employee.objects.select_related('boss').prefetch_related('boss__serfs')
co_serfs = [list(e.boss.serfs.all()) if e.boss is not None else []
for e in qs]
qs2 = Employee.objects.select_related('boss')
co_serfs2 = [list(e.boss.serfs.all()) if e.boss is not None else []
for e in qs2]
self.assertEqual(co_serfs, co_serfs2)