From 662eea116f5a188b6983f5f7c01c8247e3b6b309 Mon Sep 17 00:00:00 2001 From: Luke Plant Date: Wed, 5 Oct 2011 23:14:52 +0000 Subject: [PATCH] Fixed #16937 - added `QuerySet.prefetch_related` to prefetch many related objects. Many thanks to akaariai for lots of review and feedback, bug finding, additional unit tests and performance testing. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16930 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/contrib/contenttypes/generic.py | 34 +- django/db/models/fields/related.py | 69 ++- django/db/models/manager.py | 3 + django/db/models/query.py | 179 ++++++++ docs/ref/models/querysets.txt | 103 ++++- docs/releases/1.4.txt | 13 + docs/topics/db/optimization.txt | 8 +- tests/modeltests/prefetch_related/__init__.py | 0 tests/modeltests/prefetch_related/models.py | 165 +++++++ tests/modeltests/prefetch_related/tests.py | 418 ++++++++++++++++++ 10 files changed, 968 insertions(+), 24 deletions(-) create mode 100644 tests/modeltests/prefetch_related/__init__.py create mode 100644 tests/modeltests/prefetch_related/models.py create mode 100644 tests/modeltests/prefetch_related/tests.py diff --git a/django/contrib/contenttypes/generic.py b/django/contrib/contenttypes/generic.py index 12f7dc8125..f475c08812 100644 --- a/django/contrib/contenttypes/generic.py +++ b/django/contrib/contenttypes/generic.py @@ -225,11 +225,7 @@ class ReverseGenericRelatedObjectsDescriptor(object): content_type = content_type, content_type_field_name = self.field.content_type_field_name, object_id_field_name = self.field.object_id_field_name, - core_filters = { - '%s__pk' % self.field.content_type_field_name: content_type.id, - '%s__exact' % self.field.object_id_field_name: instance._get_pk_val(), - } - + prefetch_cache_name = self.field.attname, ) return manager @@ -250,12 +246,12 @@ def create_generic_related_manager(superclass): """ class GenericRelatedObjectManager(superclass): - def __init__(self, model=None, core_filters=None, instance=None, symmetrical=None, + def __init__(self, model=None, instance=None, symmetrical=None, source_col_name=None, target_col_name=None, content_type=None, - content_type_field_name=None, object_id_field_name=None): + content_type_field_name=None, object_id_field_name=None, + prefetch_cache_name=None): super(GenericRelatedObjectManager, self).__init__() - self.core_filters = core_filters self.model = model self.content_type = content_type self.symmetrical = symmetrical @@ -264,11 +260,29 @@ def create_generic_related_manager(superclass): self.target_col_name = target_col_name self.content_type_field_name = content_type_field_name self.object_id_field_name = object_id_field_name + self.prefetch_cache_name = prefetch_cache_name self.pk_val = self.instance._get_pk_val() + self.core_filters = { + '%s__pk' % content_type_field_name: content_type.id, + '%s__exact' % object_id_field_name: instance._get_pk_val(), + } def get_query_set(self): - db = self._db or router.db_for_read(self.model, instance=self.instance) - return super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**self.core_filters) + try: + return self.instance._prefetched_objects_cache[self.prefetch_cache_name] + except (AttributeError, KeyError): + db = self._db or router.db_for_read(self.model, instance=self.instance) + return super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**self.core_filters) + + def get_prefetch_query_set(self, instances): + db = self._db or router.db_for_read(self.model) + query = { + '%s__pk' % self.content_type_field_name: self.content_type.id, + '%s__in' % self.object_id_field_name: + [obj._get_pk_val() for obj in instances] + } + qs = super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**query) + return (qs, self.object_id_field_name, 'pk') def add(self, *objs): for obj in objs: diff --git a/django/db/models/fields/related.py b/django/db/models/fields/related.py index 95c4cac253..8bf9d247b1 100644 --- a/django/db/models/fields/related.py +++ b/django/db/models/fields/related.py @@ -432,8 +432,22 @@ class ForeignRelatedObjectsDescriptor(object): self.model = rel_model def get_query_set(self): - db = self._db or router.db_for_read(self.model, instance=self.instance) - return super(RelatedManager, self).get_query_set().using(db).filter(**(self.core_filters)) + try: + return self.instance._prefetched_objects_cache[rel_field.related_query_name()] + except (AttributeError, KeyError): + db = self._db or router.db_for_read(self.model, instance=self.instance) + return super(RelatedManager, self).get_query_set().using(db).filter(**self.core_filters) + + def get_prefetch_query_set(self, instances): + """ + Return a queryset that does the bulk lookup needed + by prefetch_related functionality. + """ + db = self._db or router.db_for_read(self.model) + query = {'%s__%s__in' % (rel_field.name, attname): + [getattr(obj, attname) for obj in instances]} + qs = super(RelatedManager, self).get_query_set().using(db).filter(**query) + return (qs, rel_field.get_attname(), attname) def add(self, *objs): for obj in objs: @@ -482,25 +496,60 @@ def create_many_related_manager(superclass, rel): """Creates a manager that subclasses 'superclass' (which is a Manager) and adds behavior for many-to-many related objects.""" class ManyRelatedManager(superclass): - def __init__(self, model=None, core_filters=None, instance=None, symmetrical=None, + def __init__(self, model=None, query_field_name=None, instance=None, symmetrical=None, source_field_name=None, target_field_name=None, reverse=False, - through=None): + through=None, prefetch_cache_name=None): super(ManyRelatedManager, self).__init__() self.model = model - self.core_filters = core_filters + self.query_field_name = query_field_name + self.core_filters = {'%s__pk' % query_field_name: instance._get_pk_val()} self.instance = instance self.symmetrical = symmetrical self.source_field_name = source_field_name self.target_field_name = target_field_name self.reverse = reverse self.through = through + self.prefetch_cache_name = prefetch_cache_name self._pk_val = self.instance.pk if self._pk_val is None: raise ValueError("%r instance needs to have a primary key value before a many-to-many relationship can be used." % instance.__class__.__name__) def get_query_set(self): - db = self._db or router.db_for_read(self.instance.__class__, instance=self.instance) - return super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**(self.core_filters)) + try: + return self.instance._prefetched_objects_cache[self.prefetch_cache_name] + except (AttributeError, KeyError): + db = self._db or router.db_for_read(self.instance.__class__, instance=self.instance) + return super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**self.core_filters) + + def get_prefetch_query_set(self, instances): + """ + Returns a tuple: + (queryset of instances of self.model that are related to passed in instances + attr of returned instances needed for matching + attr of passed in instances needed for matching) + """ + from django.db import connections + db = self._db or router.db_for_read(self.model) + query = {'%s__pk__in' % self.query_field_name: + [obj._get_pk_val() for obj in instances]} + qs = super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**query) + + # M2M: need to annotate the query in order to get the primary model + # that the secondary model was actually related to. We know that + # there will already be a join on the join table, so we can just add + # the select. + + # For non-autocreated 'through' models, can't assume we are + # dealing with PK values. + fk = self.through._meta.get_field(self.source_field_name) + source_col = fk.column + join_table = self.through._meta.db_table + connection = connections[db] + qn = connection.ops.quote_name + qs = qs.extra(select={'_prefetch_related_val': + '%s.%s' % (qn(join_table), qn(source_col))}) + select_attname = fk.rel.get_related_field().get_attname() + return (qs, '_prefetch_related_val', select_attname) # If the ManyToMany relation has an intermediary model, # the add and remove methods do not exist. @@ -683,7 +732,8 @@ class ManyRelatedObjectsDescriptor(object): manager = self.related_manager_cls( model=rel_model, - core_filters={'%s__pk' % self.related.field.name: instance._get_pk_val()}, + query_field_name=self.related.field.name, + prefetch_cache_name=self.related.field.related_query_name(), instance=instance, symmetrical=False, source_field_name=self.related.field.m2m_reverse_field_name(), @@ -739,7 +789,8 @@ class ReverseManyRelatedObjectsDescriptor(object): manager = self.related_manager_cls( model=self.field.rel.to, - core_filters={'%s__pk' % self.field.related_query_name(): instance._get_pk_val()}, + query_field_name=self.field.related_query_name(), + prefetch_cache_name=self.field.name, instance=instance, symmetrical=self.field.rel.symmetrical, source_field_name=self.field.m2m_field_name(), diff --git a/django/db/models/manager.py b/django/db/models/manager.py index baf701f6dd..e1bbf6ebc5 100644 --- a/django/db/models/manager.py +++ b/django/db/models/manager.py @@ -172,6 +172,9 @@ class Manager(object): def select_related(self, *args, **kwargs): return self.get_query_set().select_related(*args, **kwargs) + def prefetch_related(self, *args, **kwargs): + return self.get_query_set().prefetch_related(*args, **kwargs) + def values(self, *args, **kwargs): return self.get_query_set().values(*args, **kwargs) diff --git a/django/db/models/query.py b/django/db/models/query.py index 06d48f761b..b21db2e521 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -36,6 +36,8 @@ class QuerySet(object): self._iter = None self._sticky_filter = False self._for_write = False + self._prefetch_related_lookups = [] + self._prefetch_done = False ######################## # PYTHON MAGIC METHODS # @@ -81,9 +83,17 @@ class QuerySet(object): self._result_cache = list(self.iterator()) elif self._iter: self._result_cache.extend(self._iter) + if self._prefetch_related_lookups and not self._prefetch_done: + self._prefetch_related_objects() return len(self._result_cache) def __iter__(self): + if self._prefetch_related_lookups and not self._prefetch_done: + # We need all the results in order to be able to do the prefetch + # in one go. To minimize code duplication, we use the __len__ + # code path which also forces this, and also does the prefetch + len(self) + if self._result_cache is None: self._iter = self.iterator() self._result_cache = [] @@ -106,6 +116,12 @@ class QuerySet(object): self._fill_cache() def __nonzero__(self): + if self._prefetch_related_lookups and not self._prefetch_done: + # We need all the results in order to be able to do the prefetch + # in one go. To minimize code duplication, we use the __len__ + # code path which also forces this, and also does the prefetch + len(self) + if self._result_cache is not None: return bool(self._result_cache) try: @@ -527,6 +543,11 @@ class QuerySet(object): return self.query.has_results(using=self.db) return bool(self._result_cache) + def _prefetch_related_objects(self): + # This method can only be called once the result cache has been filled. + prefetch_related_objects(self._result_cache, self._prefetch_related_lookups) + self._prefetch_done = True + ################################################## # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS # ################################################## @@ -650,6 +671,23 @@ class QuerySet(object): obj.query.max_depth = depth return obj + def prefetch_related(self, *lookups): + """ + Returns a new QuerySet instance that will prefetch the specified + Many-To-One and Many-To-Many related objects when the QuerySet is + evaluated. + + When prefetch_related() is called more than once, the list of lookups to + prefetch is appended to. If prefetch_related(None) is called, the + the list is cleared. + """ + clone = self._clone() + if lookups == (None,): + clone._prefetch_related_lookups = [] + else: + clone._prefetch_related_lookups.extend(lookups) + return clone + def dup_select_related(self, other): """ Copies the related selection status from the QuerySet 'other' to the @@ -799,6 +837,7 @@ class QuerySet(object): query.filter_is_sticky = True c = klass(model=self.model, query=query, using=self._db) c._for_write = self._for_write + c._prefetch_related_lookups = self._prefetch_related_lookups[:] c.__dict__.update(kwargs) if setup and hasattr(c, '_setup_query'): c._setup_query() @@ -864,6 +903,7 @@ class QuerySet(object): # empty" result. value_annotation = True + class ValuesQuerySet(QuerySet): def __init__(self, *args, **kwargs): super(ValuesQuerySet, self).__init__(*args, **kwargs) @@ -993,6 +1033,7 @@ class ValuesQuerySet(QuerySet): % self.__class__.__name__) return self + class ValuesListQuerySet(ValuesQuerySet): def iterator(self): if self.flat and len(self._fields) == 1: @@ -1502,6 +1543,7 @@ class RawQuerySet(object): self._model_fields[converter(column)] = field return self._model_fields + def insert_query(model, objs, fields, return_id=False, raw=False, using=None): """ Inserts a new record for the given model. This provides an interface to @@ -1511,3 +1553,140 @@ def insert_query(model, objs, fields, return_id=False, raw=False, using=None): query = sql.InsertQuery(model) query.insert_values(fields, objs, raw=raw) return query.get_compiler(using=using).execute_sql(return_id) + + +def prefetch_related_objects(result_cache, related_lookups): + """ + Helper function for prefetch_related functionality + + Populates prefetched objects caches for a list of results + from a QuerySet + """ + from django.db.models.sql.constants import LOOKUP_SEP + + if len(result_cache) == 0: + return # nothing to do + + model = result_cache[0].__class__ + + # We need to be able to dynamically add to the list of prefetch_related + # lookups that we look up (see below). So we need some book keeping to + # ensure we don't do duplicate work. + done_lookups = set() # list of lookups like foo__bar__baz + done_queries = {} # dictionary of things like 'foo__bar': [results] + related_lookups = list(related_lookups) + + # We may expand related_lookups, so need a loop that allows for that + for lookup in related_lookups: + if lookup in done_lookups: + # We've done exactly this already, skip the whole thing + continue + done_lookups.add(lookup) + + # Top level, the list of objects to decorate is the the result cache + # from the primary QuerySet. It won't be for deeper levels. + obj_list = result_cache + + attrs = lookup.split(LOOKUP_SEP) + for level, attr in enumerate(attrs): + # Prepare main instances + if len(obj_list) == 0: + break + + good_objects = True + for obj in obj_list: + if not hasattr(obj, '_prefetched_objects_cache'): + try: + obj._prefetched_objects_cache = {} + except AttributeError: + # Must be in a QuerySet subclass that is not returning + # Model instances, either in Django or 3rd + # party. prefetch_related() doesn't make sense, so quit + # now. + good_objects = False + break + else: + # We already did this list + break + if not good_objects: + break + + # Descend down tree + try: + rel_obj = getattr(obj_list[0], attr) + except AttributeError: + raise AttributeError("Cannot find '%s' on %s object, '%s' is an invalid " + "parameter to prefetch_related()" % + (attr, obj_list[0].__class__.__name__, lookup)) + + can_prefetch = hasattr(rel_obj, 'get_prefetch_query_set') + if level == len(attrs) - 1 and not can_prefetch: + # Last one, this *must* resolve to a related manager. + raise ValueError("'%s' does not resolve to a supported 'many related" + " manager' for model %s - this is an invalid" + " parameter to prefetch_related()." + % (lookup, model.__name__)) + + if can_prefetch: + # Check we didn't do this already + current_lookup = LOOKUP_SEP.join(attrs[0:level+1]) + if current_lookup in done_queries: + obj_list = done_queries[current_lookup] + else: + relmanager = rel_obj + obj_list, additional_prl = prefetch_one_level(obj_list, relmanager, attr) + for f in additional_prl: + new_prl = LOOKUP_SEP.join([current_lookup, f]) + related_lookups.append(new_prl) + done_queries[current_lookup] = obj_list + else: + # Assume we've got some singly related object. We replace + # the current list of parent objects with that list. + obj_list = [getattr(obj, attr) for obj in obj_list] + + # Filter out 'None' so that we can continue with nullable + # relations. + obj_list = [obj for obj in obj_list if obj is not None] + + +def prefetch_one_level(instances, relmanager, attname): + """ + Helper function for prefetch_related_objects + + Runs prefetches on all instances using the manager relmanager, + assigning results to queryset against instance.attname. + + The prefetched objects are returned, along with any additional + prefetches that must be done due to prefetch_related lookups + found from default managers. + """ + rel_qs, rel_obj_attr, instance_attr = relmanager.get_prefetch_query_set(instances) + # We have to handle the possibility that the default manager itself added + # prefetch_related lookups to the QuerySet we just got back. We don't want to + # trigger the prefetch_related functionality by evaluating the query. + # Rather, we need to merge in the prefetch_related lookups. + additional_prl = getattr(rel_qs, '_prefetch_related_lookups', []) + if additional_prl: + # Don't need to clone because the manager should have given us a fresh + # instance, so we access an internal instead of using public interface + # for performance reasons. + rel_qs._prefetch_related_lookups = [] + + all_related_objects = list(rel_qs) + + rel_obj_cache = {} + for rel_obj in all_related_objects: + rel_attr_val = getattr(rel_obj, rel_obj_attr) + if rel_attr_val not in rel_obj_cache: + rel_obj_cache[rel_attr_val] = [] + rel_obj_cache[rel_attr_val].append(rel_obj) + + for obj in instances: + qs = getattr(obj, attname).all() + instance_attr_val = getattr(obj, instance_attr) + qs._result_cache = rel_obj_cache.get(instance_attr_val, []) + # We don't want the individual qs doing prefetch_related now, since we + # have merged this into the current work. + qs._prefetch_done = True + obj._prefetched_objects_cache[attname] = qs + return all_related_objects, additional_prl diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt index a7c767d660..ea8e0ff6e3 100644 --- a/docs/ref/models/querysets.txt +++ b/docs/ref/models/querysets.txt @@ -571,8 +571,6 @@ can be useful in situations where you might want to pass in either a model manager or a ``QuerySet`` and do further filtering on the result. After calling ``all()`` on either object, you'll definitely have a ``QuerySet`` to work with. -.. _select-related: - select_related ~~~~~~~~~~~~~~ @@ -690,6 +688,107 @@ is defined. Instead of specifying the field name, use the :attr:`related_name A :class:`~django.db.models.OneToOneField` is not traversed in the reverse direction if you are performing a depth-based ``select_related()`` call. +prefetch_related +~~~~~~~~~~~~~~~~ + +.. method:: prefetch_related(*lookups) + +.. versionadded:: 1.4 + +Returns a ``QuerySet`` that will automatically retrieve, in a single batch, +related many-to-many and many-to-one objects for each of the specified lookups. + +This is similar to ``select_related`` for the 'many related objects' case, but +note that ``prefetch_related`` causes a separate query to be issued for each set +of related objects that you request, unlike ``select_related`` which modifies +the original query with joins in order to get the related objects. With +``prefetch_related``, the additional queries are done as soon as the QuerySet +begins to be evaluated. + +For example, suppose you have these models:: + + class Topping(models.Model): + name = models.CharField(max_length=30) + + class Pizza(models.Model): + name = models.CharField(max_length=50) + toppings = models.ManyToManyField(Topping) + + def __unicode__(self): + return u"%s (%s)" % (self.name, u", ".join([topping.name + for topping in self.toppings.all()])) + +and run this code:: + + >>> Pizza.objects.all() + [u"Hawaiian (ham, pineapple)", u"Seafood (prawns, smoked salmon)"... + +The problem with this code is that it will run a query on the Toppings table for +**every** item in the Pizza ``QuerySet``. Using ``prefetch_related``, this can +be reduced to two: + + >>> Pizza.objects.all().prefetch_related('toppings') + +All the relevant toppings will be fetched in a single query, and used to make +``QuerySets`` that have a pre-filled cache of the relevant results. These +``QuerySets`` are then used in the ``self.toppings.all()`` calls. + +Please note that use of ``prefetch_related`` will mean that the additional +queries run will **always** be executed - even if you never use the related +objects - and it always fully populates the result cache on the primary +``QuerySet`` (which can sometimes be avoided in other cases). + +Also remember that, as always with QuerySets, any subsequent chained methods +will ignore previously cached results, and retrieve data using a fresh database +query. So, if you write the following: + + >>> pizzas = Pizza.objects.prefetch_related('toppings') + >>> [list(pizza.toppings.filter(spicy=True)) for pizza in pizzas] + +...then the fact that `pizza.toppings.all()` has been prefetched will not help +you - in fact it hurts performance, since you have done a database query that +you haven't used. So use this feature with caution! + +The lookups that must be supplied to this method can be any attributes on the +model instances which represent related queries that return multiple +objects. This includes attributes representing the 'many' side of ``ForeignKey`` +relationships, forward and reverse ``ManyToManyField`` attributes, and also any +``GenericRelations``. + +You can also use the normal join syntax to do related fields of related +fields. Suppose we have an additional model to the example above:: + + class Restaurant(models.Model): + pizzas = models.ManyToMany(Pizza, related_name='restaurants') + best_pizza = models.ForeignKey(Pizza, related_name='championed_by') + +The following are all legal: + + >>> Restaurant.objects.prefetch_related('pizzas__toppings') + +This will prefetch all pizzas belonging to restaurants, and all toppings +belonging to those pizzas. This will result in a total of 3 database queries - +one for the restaurants, one for the pizzas, and one for the toppings. + + >>> Restaurant.objects.select_related('best_pizza').prefetch_related('best_pizza__toppings') + +This will fetch the best pizza and all the toppings for the best pizza for each +restaurant. This will be done in 2 database queries - one for the restaurants +and 'best pizzas' combined (achieved through use of ``select_related``), and one +for the toppings. + +Chaining ``prefetch_related`` calls will accumulate the fields that should have +this behavior applied. To clear any ``prefetch_related`` behavior, pass `None` +as a parameter:: + + >>> non_prefetched = qs.prefetch_related(None) + +One difference when using ``prefetch_related`` is that, in some circumstances, +objects created by a query can be shared between the different objects that they +are related to i.e. a single Python model instance can appear at more than one +point in the tree of objects that are returned. Normally this behavior will not +be a problem, and will in fact save both memory and CPU time. + extra ~~~~~ diff --git a/docs/releases/1.4.txt b/docs/releases/1.4.txt index 89e1fc11ac..5580b67dce 100644 --- a/docs/releases/1.4.txt +++ b/docs/releases/1.4.txt @@ -63,6 +63,19 @@ setup for test suites) has seen a performance benefit as a result. See the :meth:`~django.db.models.query.QuerySet.bulk_create` docs for more information. +``QuerySet.prefetch_related`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Analagous to :meth:`~django.db.models.query.QuerySet.select_related` but for +many-to-many relationships, +:meth:`~django.db.models.query.QuerySet.prefetch_related` has been added to +:class:`~django.db.models.query.QuerySet`. This method returns a new ``QuerySet`` +that will prefetch in a single batch each of the specified related lookups as +soon as it begins to be evaluated (e.g. by iterating over it). This enables you +to fix many instances of a very common performance problem, in which your code +ends up doing O(n) database queries (or worse) if objects on your primary +``QuerySet`` each have many related objects that you also need. + HTML5 ~~~~~ diff --git a/docs/topics/db/optimization.txt b/docs/topics/db/optimization.txt index 63aa11735b..dda7e9504a 100644 --- a/docs/topics/db/optimization.txt +++ b/docs/topics/db/optimization.txt @@ -141,10 +141,12 @@ retrieving it all in one query. This is particularly important if you have a query that is executed in a loop, and could therefore end up doing many database queries, when only one was needed. So: -Use ``QuerySet.select_related()`` ---------------------------------- +Use ``QuerySet.select_related()`` and ``prefetch_related()`` +------------------------------------------------------------ -Understand :ref:`QuerySet.select_related() ` thoroughly, and use it: +Understand :meth:`~django.db.models.query.QuerySet.select_related` and +:meth:`~django.db.models.query.QuerySet.prefetch_related` thoroughly, and use +them: * in view code, diff --git a/tests/modeltests/prefetch_related/__init__.py b/tests/modeltests/prefetch_related/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/modeltests/prefetch_related/models.py b/tests/modeltests/prefetch_related/models.py new file mode 100644 index 0000000000..ab28496f37 --- /dev/null +++ b/tests/modeltests/prefetch_related/models.py @@ -0,0 +1,165 @@ +from django.contrib.contenttypes.models import ContentType +from django.contrib.contenttypes import generic +from django.db import models + +## Basic tests + +class Author(models.Model): + name = models.CharField(max_length=50, unique=True) + first_book = models.ForeignKey('Book', related_name='first_time_authors') + favorite_authors = models.ManyToManyField( + 'self', through='FavoriteAuthors', symmetrical=False, related_name='favors_me') + + def __unicode__(self): + return self.name + + class Meta: + ordering = ['id'] + + +class AuthorWithAge(Author): + author = models.OneToOneField(Author, parent_link=True) + age = models.IntegerField() + + +class FavoriteAuthors(models.Model): + author = models.ForeignKey(Author, to_field='name', related_name='i_like') + likes_author = models.ForeignKey(Author, to_field='name', related_name='likes_me') + + class Meta: + ordering = ['id'] + + +class AuthorAddress(models.Model): + author = models.ForeignKey(Author, to_field='name', related_name='addresses') + address = models.TextField() + + class Meta: + ordering = ['id'] + + def __unicode__(self): + return self.address + + +class Book(models.Model): + title = models.CharField(max_length=255) + authors = models.ManyToManyField(Author, related_name='books') + + def __unicode__(self): + return self.title + + class Meta: + ordering = ['id'] + +class BookWithYear(Book): + book = models.OneToOneField(Book, parent_link=True) + published_year = models.IntegerField() + aged_authors = models.ManyToManyField( + AuthorWithAge, related_name='books_with_year') + + +class Reader(models.Model): + name = models.CharField(max_length=50) + books_read = models.ManyToManyField(Book, related_name='read_by') + + def __unicode__(self): + return self.name + + class Meta: + ordering = ['id'] + + +## Models for default manager tests + +class Qualification(models.Model): + name = models.CharField(max_length=10) + + class Meta: + ordering = ['id'] + + +class TeacherManager(models.Manager): + def get_query_set(self): + return super(TeacherManager, self).get_query_set().prefetch_related('qualifications') + + +class Teacher(models.Model): + name = models.CharField(max_length=50) + qualifications = models.ManyToManyField(Qualification) + + objects = TeacherManager() + + def __unicode__(self): + return "%s (%s)" % (self.name, ", ".join(q.name for q in self.qualifications.all())) + + class Meta: + ordering = ['id'] + + +class Department(models.Model): + name = models.CharField(max_length=50) + teachers = models.ManyToManyField(Teacher) + + class Meta: + ordering = ['id'] + + +## Generic relation tests + +class TaggedItem(models.Model): + tag = models.SlugField() + content_type = models.ForeignKey(ContentType, related_name="taggeditem_set2") + object_id = models.PositiveIntegerField() + content_object = generic.GenericForeignKey('content_type', 'object_id') + + def __unicode__(self): + return self.tag + + +class Bookmark(models.Model): + url = models.URLField() + tags = generic.GenericRelation(TaggedItem) + + +## Models for lookup ordering tests + + +class House(models.Model): + address = models.CharField(max_length=255) + + class Meta: + ordering = ['id'] + +class Room(models.Model): + name = models.CharField(max_length=50) + house = models.ForeignKey(House, related_name='rooms') + + class Meta: + ordering = ['id'] + + +class Person(models.Model): + name = models.CharField(max_length=50) + houses = models.ManyToManyField(House, related_name='occupants') + + @property + def primary_house(self): + # Assume business logic forces every person to have at least one house. + return sorted(self.houses.all(), key=lambda house: -house.rooms.count())[0] + + class Meta: + ordering = ['id'] + + +## Models for nullable FK tests + +class Employee(models.Model): + name = models.CharField(max_length=50) + boss = models.ForeignKey('self', null=True, + related_name='serfs') + + def __unicode__(self): + return self.name + + class Meta: + ordering = ['id'] diff --git a/tests/modeltests/prefetch_related/tests.py b/tests/modeltests/prefetch_related/tests.py new file mode 100644 index 0000000000..45202f2af8 --- /dev/null +++ b/tests/modeltests/prefetch_related/tests.py @@ -0,0 +1,418 @@ +from __future__ import with_statement + +from django.contrib.contenttypes.models import ContentType +from django.test import TestCase +from django.utils import unittest + +from models import (Author, Book, Reader, Qualification, Teacher, Department, + TaggedItem, Bookmark, AuthorAddress, FavoriteAuthors, + AuthorWithAge, BookWithYear, Person, House, Room, + Employee) + + +class PrefetchRelatedTests(TestCase): + + def setUp(self): + + self.book1 = Book.objects.create(title="Poems") + self.book2 = Book.objects.create(title="Jane Eyre") + self.book3 = Book.objects.create(title="Wuthering Heights") + self.book4 = Book.objects.create(title="Sense and Sensibility") + + self.author1 = Author.objects.create(name="Charlotte", + first_book=self.book1) + self.author2 = Author.objects.create(name="Anne", + first_book=self.book1) + self.author3 = Author.objects.create(name="Emily", + first_book=self.book1) + self.author4 = Author.objects.create(name="Jane", + first_book=self.book4) + + self.book1.authors.add(self.author1, self.author2, self.author3) + self.book2.authors.add(self.author1) + self.book3.authors.add(self.author3) + self.book4.authors.add(self.author4) + + self.reader1 = Reader.objects.create(name="Amy") + self.reader2 = Reader.objects.create(name="Belinda") + + self.reader1.books_read.add(self.book1, self.book4) + self.reader2.books_read.add(self.book2, self.book4) + + def test_m2m_forward(self): + with self.assertNumQueries(2): + lists = [list(b.authors.all()) for b in Book.objects.prefetch_related('authors')] + + normal_lists = [list(b.authors.all()) for b in Book.objects.all()] + self.assertEqual(lists, normal_lists) + + + def test_m2m_reverse(self): + with self.assertNumQueries(2): + lists = [list(a.books.all()) for a in Author.objects.prefetch_related('books')] + + normal_lists = [list(a.books.all()) for a in Author.objects.all()] + self.assertEqual(lists, normal_lists) + + def test_foreignkey_reverse(self): + with self.assertNumQueries(2): + lists = [list(b.first_time_authors.all()) + for b in Book.objects.prefetch_related('first_time_authors')] + + self.assertQuerysetEqual(self.book2.authors.all(), [u""]) + + def test_survives_clone(self): + with self.assertNumQueries(2): + lists = [list(b.first_time_authors.all()) + for b in Book.objects.prefetch_related('first_time_authors').exclude(id=1000)] + + def test_len(self): + with self.assertNumQueries(2): + qs = Book.objects.prefetch_related('first_time_authors') + length = len(qs) + lists = [list(b.first_time_authors.all()) + for b in qs] + + def test_bool(self): + with self.assertNumQueries(2): + qs = Book.objects.prefetch_related('first_time_authors') + x = bool(qs) + lists = [list(b.first_time_authors.all()) + for b in qs] + + def test_count(self): + with self.assertNumQueries(2): + qs = Book.objects.prefetch_related('first_time_authors') + [b.first_time_authors.count() for b in qs] + + def test_exists(self): + with self.assertNumQueries(2): + qs = Book.objects.prefetch_related('first_time_authors') + [b.first_time_authors.exists() for b in qs] + + def test_clear(self): + """ + Test that we can clear the behavior by calling prefetch_related() + """ + with self.assertNumQueries(5): + with_prefetch = Author.objects.prefetch_related('books') + without_prefetch = with_prefetch.prefetch_related(None) + lists = [list(a.books.all()) for a in without_prefetch] + + def test_m2m_then_m2m(self): + """ + Test we can follow a m2m and another m2m + """ + with self.assertNumQueries(3): + qs = Author.objects.prefetch_related('books__read_by') + lists = [[[unicode(r) for r in b.read_by.all()] + for b in a.books.all()] + for a in qs] + self.assertEqual(lists, + [ + [[u"Amy"], [u"Belinda"]], # Charlotte - Poems, Jane Eyre + [[u"Amy"]], # Anne - Poems + [[u"Amy"], []], # Emily - Poems, Wuthering Heights + [[u"Amy", u"Belinda"]], # Jane - Sense and Sense + ]) + + def test_overriding_prefetch(self): + with self.assertNumQueries(3): + qs = Author.objects.prefetch_related('books', 'books__read_by') + lists = [[[unicode(r) for r in b.read_by.all()] + for b in a.books.all()] + for a in qs] + self.assertEqual(lists, + [ + [[u"Amy"], [u"Belinda"]], # Charlotte - Poems, Jane Eyre + [[u"Amy"]], # Anne - Poems + [[u"Amy"], []], # Emily - Poems, Wuthering Heights + [[u"Amy", u"Belinda"]], # Jane - Sense and Sense + ]) + with self.assertNumQueries(3): + qs = Author.objects.prefetch_related('books__read_by', 'books') + lists = [[[unicode(r) for r in b.read_by.all()] + for b in a.books.all()] + for a in qs] + self.assertEqual(lists, + [ + [[u"Amy"], [u"Belinda"]], # Charlotte - Poems, Jane Eyre + [[u"Amy"]], # Anne - Poems + [[u"Amy"], []], # Emily - Poems, Wuthering Heights + [[u"Amy", u"Belinda"]], # Jane - Sense and Sense + ]) + + def test_get(self): + """ + Test that objects retrieved with .get() get the prefetch behaviour + """ + # Need a double + with self.assertNumQueries(3): + author = Author.objects.prefetch_related('books__read_by').get(name="Charlotte") + lists = [[unicode(r) for r in b.read_by.all()] + for b in author.books.all()] + self.assertEqual(lists, [[u"Amy"], [u"Belinda"]]) # Poems, Jane Eyre + + def test_foreign_key_then_m2m(self): + """ + Test we can follow an m2m relation after a relation like ForeignKey + that doesn't have many objects + """ + with self.assertNumQueries(2): + qs = Author.objects.select_related('first_book').prefetch_related('first_book__read_by') + lists = [[unicode(r) for r in a.first_book.read_by.all()] + for a in qs] + self.assertEqual(lists, [[u"Amy"], + [u"Amy"], + [u"Amy"], + [u"Amy", "Belinda"]]) + + def test_attribute_error(self): + qs = Reader.objects.all().prefetch_related('books_read__xyz') + with self.assertRaises(AttributeError) as cm: + list(qs) + + self.assertTrue('prefetch_related' in str(cm.exception)) + + def test_invalid_final_lookup(self): + qs = Book.objects.prefetch_related('authors__first_book') + with self.assertRaises(ValueError) as cm: + list(qs) + + self.assertTrue('prefetch_related' in str(cm.exception)) + self.assertTrue("first_book" in str(cm.exception)) + + +class DefaultManagerTests(TestCase): + + def setUp(self): + self.qual1 = Qualification.objects.create(name="BA") + self.qual2 = Qualification.objects.create(name="BSci") + self.qual3 = Qualification.objects.create(name="MA") + self.qual4 = Qualification.objects.create(name="PhD") + + self.teacher1 = Teacher.objects.create(name="Mr Cleese") + self.teacher2 = Teacher.objects.create(name="Mr Idle") + self.teacher3 = Teacher.objects.create(name="Mr Chapman") + + self.teacher1.qualifications.add(self.qual1, self.qual2, self.qual3, self.qual4) + self.teacher2.qualifications.add(self.qual1) + self.teacher3.qualifications.add(self.qual2) + + self.dept1 = Department.objects.create(name="English") + self.dept2 = Department.objects.create(name="Physics") + + self.dept1.teachers.add(self.teacher1, self.teacher2) + self.dept2.teachers.add(self.teacher1, self.teacher3) + + def test_m2m_then_m2m(self): + with self.assertNumQueries(3): + # When we prefetch the teachers, and force the query, we don't want + # the default manager on teachers to immediately get all the related + # qualifications, since this will do one query per teacher. + qs = Department.objects.prefetch_related('teachers') + depts = "".join(["%s department: %s\n" % + (dept.name, ", ".join(unicode(t) for t in dept.teachers.all())) + for dept in qs]) + + self.assertEqual(depts, + "English department: Mr Cleese (BA, BSci, MA, PhD), Mr Idle (BA)\n" + "Physics department: Mr Cleese (BA, BSci, MA, PhD), Mr Chapman (BSci)\n") + + +class GenericRelationTests(TestCase): + + def test_traverse_GFK(self): + """ + Test that we can traverse a 'content_object' with prefetch_related() + """ + # In fact, there is no special support for this in prefetch_related code + # - we can traverse any object that will lead us to objects that have + # related managers. + + book1 = Book.objects.create(title="Winnie the Pooh") + book2 = Book.objects.create(title="Do you like green eggs and spam?") + + reader1 = Reader.objects.create(name="me") + reader2 = Reader.objects.create(name="you") + + book1.read_by.add(reader1) + book2.read_by.add(reader2) + + TaggedItem.objects.create(tag="awesome", content_object=book1) + TaggedItem.objects.create(tag="awesome", content_object=book2) + + ct = ContentType.objects.get_for_model(Book) + + # We get 4 queries - 1 for main query, 2 for each access to + # 'content_object' because these can't be handled by select_related, and + # 1 for the 'read_by' relation. + with self.assertNumQueries(4): + # If we limit to books, we know that they will have 'read_by' + # attributes, so the following makes sense: + qs = TaggedItem.objects.select_related('content_type').prefetch_related('content_object__read_by').filter(tag='awesome').filter(content_type=ct, tag='awesome') + readers_of_awesome_books = [r.name for tag in qs + for r in tag.content_object.read_by.all()] + self.assertEqual(readers_of_awesome_books, ["me", "you"]) + + + def test_generic_relation(self): + b = Bookmark.objects.create(url='http://www.djangoproject.com/') + t1 = TaggedItem.objects.create(content_object=b, tag='django') + t2 = TaggedItem.objects.create(content_object=b, tag='python') + + with self.assertNumQueries(2): + tags = [t.tag for b in Bookmark.objects.prefetch_related('tags') + for t in b.tags.all()] + self.assertEqual(sorted(tags), ["django", "python"]) + + +class MultiTableInheritanceTest(TestCase): + + def setUp(self): + self.book1 = BookWithYear.objects.create( + title="Poems", published_year=2010) + self.book2 = BookWithYear.objects.create( + title="More poems", published_year=2011) + self.author1 = AuthorWithAge.objects.create( + name='Jane', first_book=self.book1, age=50) + self.author2 = AuthorWithAge.objects.create( + name='Tom', first_book=self.book1, age=49) + self.author3 = AuthorWithAge.objects.create( + name='Robert', first_book=self.book2, age=48) + self.authorAddress = AuthorAddress.objects.create( + author=self.author1, address='SomeStreet 1') + self.book2.aged_authors.add(self.author2, self.author3) + + def test_foreignkey(self): + with self.assertNumQueries(2): + qs = AuthorWithAge.objects.prefetch_related('addresses') + addresses = [[unicode(address) for address in obj.addresses.all()] + for obj in qs] + self.assertEquals(addresses, [[unicode(self.authorAddress)], [], []]) + + def test_m2m_to_inheriting_model(self): + qs = AuthorWithAge.objects.prefetch_related('books_with_year') + with self.assertNumQueries(2): + lst = [[unicode(book) for book in author.books_with_year.all()] + for author in qs] + qs = AuthorWithAge.objects.all() + lst2 = [[unicode(book) for book in author.books_with_year.all()] + for author in qs] + self.assertEquals(lst, lst2) + + qs = BookWithYear.objects.prefetch_related('aged_authors') + with self.assertNumQueries(2): + lst = [[unicode(author) for author in book.aged_authors.all()] + for book in qs] + qs = BookWithYear.objects.all() + lst2 = [[unicode(author) for author in book.aged_authors.all()] + for book in qs] + self.assertEquals(lst, lst2) + + def test_parent_link_prefetch(self): + with self.assertRaises(ValueError) as cm: + qs = list(AuthorWithAge.objects.prefetch_related('author')) + self.assertTrue('prefetch_related' in str(cm.exception)) + + +class ForeignKeyToFieldTest(TestCase): + + def setUp(self): + self.book = Book.objects.create(title="Poems") + self.author1 = Author.objects.create(name='Jane', first_book=self.book) + self.author2 = Author.objects.create(name='Tom', first_book=self.book) + self.author3 = Author.objects.create(name='Robert', first_book=self.book) + self.authorAddress = AuthorAddress.objects.create( + author=self.author1, address='SomeStreet 1' + ) + FavoriteAuthors.objects.create(author=self.author1, + likes_author=self.author2) + FavoriteAuthors.objects.create(author=self.author2, + likes_author=self.author3) + FavoriteAuthors.objects.create(author=self.author3, + likes_author=self.author1) + + def test_foreignkey(self): + with self.assertNumQueries(2): + qs = Author.objects.prefetch_related('addresses') + addresses = [[unicode(address) for address in obj.addresses.all()] + for obj in qs] + self.assertEquals(addresses, [[unicode(self.authorAddress)], [], []]) + + def test_m2m(self): + with self.assertNumQueries(3): + qs = Author.objects.all().prefetch_related('favorite_authors', 'favors_me') + favorites = [( + [unicode(i_like) for i_like in author.favorite_authors.all()], + [unicode(likes_me) for likes_me in author.favors_me.all()] + ) for author in qs] + self.assertEquals( + favorites, + [ + ([unicode(self.author2)],[unicode(self.author3)]), + ([unicode(self.author3)],[unicode(self.author1)]), + ([unicode(self.author1)],[unicode(self.author2)]) + ] + ) + + +class LookupOrderingTest(TestCase): + """ + Test cases that demonstrate that ordering of lookups is important, and + ensure it is preserved. + """ + + def setUp(self): + self.person1 = Person.objects.create(name="Joe") + self.person2 = Person.objects.create(name="Mary") + + self.house1 = House.objects.create(address="123 Main St") + self.house2 = House.objects.create(address="45 Side St") + self.house3 = House.objects.create(address="6 Downing St") + self.house4 = House.objects.create(address="7 Regents St") + + self.room1_1 = Room.objects.create(name="Dining room", house=self.house1) + self.room1_2 = Room.objects.create(name="Lounge", house=self.house1) + self.room1_3 = Room.objects.create(name="Kitchen", house=self.house1) + + self.room2_1 = Room.objects.create(name="Dining room", house=self.house2) + self.room2_2 = Room.objects.create(name="Lounge", house=self.house2) + + self.room3_1 = Room.objects.create(name="Dining room", house=self.house3) + self.room3_2 = Room.objects.create(name="Lounge", house=self.house3) + self.room3_3 = Room.objects.create(name="Kitchen", house=self.house3) + + self.room4_1 = Room.objects.create(name="Dining room", house=self.house4) + self.room4_2 = Room.objects.create(name="Lounge", house=self.house4) + + self.person1.houses.add(self.house1, self.house2) + self.person2.houses.add(self.house3, self.house4) + + def test_order(self): + with self.assertNumQueries(4): + # The following two queries must be done in the same order as written, + # otherwise 'primary_house' will cause non-prefetched lookups + qs = Person.objects.prefetch_related('houses__rooms', + 'primary_house__occupants') + [list(p.primary_house.occupants.all()) for p in qs] + + +class NullableTest(TestCase): + + def setUp(self): + boss = Employee.objects.create(name="Peter") + worker1 = Employee.objects.create(name="Joe", boss=boss) + worker2 = Employee.objects.create(name="Angela", boss=boss) + + def test_traverse_nullable(self): + with self.assertNumQueries(2): + qs = Employee.objects.select_related('boss').prefetch_related('boss__serfs') + co_serfs = [list(e.boss.serfs.all()) if e.boss is not None else [] + for e in qs] + + qs2 = Employee.objects.select_related('boss') + co_serfs2 = [list(e.boss.serfs.all()) if e.boss is not None else [] + for e in qs2] + + self.assertEqual(co_serfs, co_serfs2)