From 4477f50f0b346f6c5f177a4bad99041a9b903842 Mon Sep 17 00:00:00 2001 From: Jacob Kaplan-Moss Date: Wed, 28 Feb 2007 15:24:05 +0000 Subject: [PATCH] Added a "depth" argument to select_related() to control how many "levels" of relations select_related() is willing to follow (refs #3275). Also added unit tests for select_related(). git-svn-id: http://code.djangoproject.com/svn/django/trunk@4645 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/db/models/query.py | 38 +++-- docs/db-api.txt | 13 ++ tests/modeltests/select_related/__init__.py | 0 tests/modeltests/select_related/models.py | 152 ++++++++++++++++++++ 4 files changed, 193 insertions(+), 10 deletions(-) create mode 100644 tests/modeltests/select_related/__init__.py create mode 100644 tests/modeltests/select_related/models.py diff --git a/django/db/models/query.py b/django/db/models/query.py index 5b2c18b25e..a03f4ecc1f 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -84,6 +84,7 @@ class QuerySet(object): self._filters = Q() self._order_by = None # Ordering, e.g. ('date', '-name'). If None, use model's ordering. self._select_related = False # Whether to fill cache for related objects. + self._max_related_depth = 0 # Maximum "depth" for select_related self._distinct = False # Whether the query should use SELECT DISTINCT. self._select = {} # Dictionary of attname -> SQL. self._where = [] # List of extra WHERE clauses to use. @@ -186,7 +187,8 @@ class QuerySet(object): raise StopIteration for row in rows: if fill_cache: - obj, index_end = get_cached_row(self.model, row, 0) + obj, index_end = get_cached_row(klass=self.model, row=row, + index_start=0, max_depth=self._max_related_depth) else: obj = self.model(*row[:index_end]) for i, k in enumerate(extra_select): @@ -394,9 +396,9 @@ class QuerySet(object): else: return self._filter_or_exclude(None, **filter_obj) - def select_related(self, true_or_false=True): + def select_related(self, true_or_false=True, depth=0): "Returns a new QuerySet instance with '_select_related' modified." - return self._clone(_select_related=true_or_false) + return self._clone(_select_related=true_or_false, _max_related_depth=depth) def order_by(self, *field_names): "Returns a new QuerySet instance with the ordering changed." @@ -430,6 +432,7 @@ class QuerySet(object): c._filters = self._filters c._order_by = self._order_by c._select_related = self._select_related + c._max_related_depth = self._max_related_depth c._distinct = self._distinct c._select = self._select.copy() c._where = self._where[:] @@ -483,7 +486,10 @@ class QuerySet(object): # Add additional tables and WHERE clauses based on select_related. if self._select_related: - fill_table_cache(opts, select, tables, where, opts.db_table, [opts.db_table]) + fill_table_cache(opts, select, tables, where, + old_prefix=opts.db_table, + cache_tables_seen=[opts.db_table], + max_depth=self._max_related_depth) # Add any additional SELECTs. if self._select: @@ -728,21 +734,33 @@ def get_where_clause(lookup_type, table_prefix, field_name, value): return backend.get_fulltext_search_sql(table_prefix + field_name) raise TypeError, "Got invalid lookup_type: %s" % repr(lookup_type) -def get_cached_row(klass, row, index_start): - "Helper function that recursively returns an object with cache filled" +def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0): + """Helper function that recursively returns an object with cache filled""" + + # If we've got a max_depth set and we've exceeded that depth, bail now. + if max_depth and cur_depth > max_depth: + return None + index_end = index_start + len(klass._meta.fields) obj = klass(*row[index_start:index_end]) for f in klass._meta.fields: if f.rel and not f.null: - rel_obj, index_end = get_cached_row(f.rel.to, row, index_end) - setattr(obj, f.get_cache_name(), rel_obj) + cached_row = get_cached_row(f.rel.to, row, index_end, max_depth, cur_depth+1) + if cached_row: + rel_obj, index_end = cached_row + setattr(obj, f.get_cache_name(), rel_obj) return obj, index_end -def fill_table_cache(opts, select, tables, where, old_prefix, cache_tables_seen): +def fill_table_cache(opts, select, tables, where, old_prefix, cache_tables_seen, max_depth=0, cur_depth=0): """ Helper function that recursively populates the select, tables and where (in place) for select_related queries. """ + + # If we've got a max_depth set and we've exceeded that depth, bail now. + if max_depth and cur_depth > max_depth: + return None + qn = backend.quote_name for f in opts.fields: if f.rel and not f.null: @@ -757,7 +775,7 @@ def fill_table_cache(opts, select, tables, where, old_prefix, cache_tables_seen) where.append('%s.%s = %s.%s' % \ (qn(old_prefix), qn(f.column), qn(db_table), qn(f.rel.get_related_field().column))) select.extend(['%s.%s' % (qn(db_table), qn(f2.column)) for f2 in f.rel.to._meta.fields]) - fill_table_cache(f.rel.to._meta, select, tables, where, db_table, cache_tables_seen) + fill_table_cache(f.rel.to._meta, select, tables, where, db_table, cache_tables_seen, max_depth, cur_depth+1) def parse_lookup(kwarg_items, opts): # Helper function that handles converting API kwargs diff --git a/docs/db-api.txt b/docs/db-api.txt index 3dc0efbabd..5cb5e4c983 100644 --- a/docs/db-api.txt +++ b/docs/db-api.txt @@ -596,6 +596,19 @@ related ``Person`` *and* the related ``City``:: Note that ``select_related()`` does not follow foreign keys that have ``null=True``. +Usually, using ``select_related()`` can vastly improve performance since your +app can avoid many database calls. However, in situations with deeply nested +sets of relationships ``select_related()`` can sometimes end up following "too +many" relations, and can generate queries so large that they end up being slow. + +In these situations, you can use the ``depth`` argument to ``select_related()`` +to control how many "levels" of relations ``select_related()`` will actually +follow:: + + b = Book.objects.select_related(depth=1).get(id=4) + p = b.author # Doesn't hit the database. + c = p.hometown # Requires a database call. + ``extra(select=None, where=None, params=None, tables=None)`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/tests/modeltests/select_related/__init__.py b/tests/modeltests/select_related/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/modeltests/select_related/models.py b/tests/modeltests/select_related/models.py new file mode 100644 index 0000000000..8a19267870 --- /dev/null +++ b/tests/modeltests/select_related/models.py @@ -0,0 +1,152 @@ +""" +XXX. Tests for ``select_related()`` + +``select_related()`` follows all relationships and pre-caches any foreign key +values so that complex trees can be fetched in a single query. However, this +isn't always a good idea, so the ``depth`` argument control how many "levels" +the select-related behavior will traverse. +""" + +from django.db import models + +# Who remembers high school biology? + +class Domain(models.Model): + name = models.CharField(maxlength=50) + def __str__(self): + return self.name + +class Kingdom(models.Model): + name = models.CharField(maxlength=50) + domain = models.ForeignKey(Domain) + def __str__(self): + return self.name + +class Phylum(models.Model): + name = models.CharField(maxlength=50) + kingdom = models.ForeignKey(Kingdom) + def __str__(self): + return self.name + +class Klass(models.Model): + name = models.CharField(maxlength=50) + phylum = models.ForeignKey(Phylum) + def __str__(self): + return self.name + +class Order(models.Model): + name = models.CharField(maxlength=50) + klass = models.ForeignKey(Klass) + def __str__(self): + return self.name + +class Family(models.Model): + name = models.CharField(maxlength=50) + order = models.ForeignKey(Order) + def __str__(self): + return self.name + +class Genus(models.Model): + name = models.CharField(maxlength=50) + family = models.ForeignKey(Family) + def __str__(self): + return self.name + +class Species(models.Model): + name = models.CharField(maxlength=50) + genus = models.ForeignKey(Genus) + def __str__(self): + return self.name + +def create_tree(stringtree): + """Helper to create a complete tree""" + names = stringtree.split() + models = [Domain, Kingdom, Phylum, Klass, Order, Family, Genus, Species] + assert len(names) == len(models), (names, models) + + parent = None + for name, model in zip(names, models): + try: + obj = model.objects.get(name=name) + except model.DoesNotExist: + obj = model(name=name) + if parent: + setattr(obj, parent.__class__.__name__.lower(), parent) + obj.save() + parent = obj + +__test__ = {'API_TESTS':""" + +# Set up. +# The test runner sets settings.DEBUG to False, but we want to gather queries +# so we'll set it to True here and reset it at the end of the test suite. +>>> from django.conf import settings +>>> settings.DEBUG = True + +>>> create_tree("Eukaryota Animalia Anthropoda Insecta Diptera Drosophilidae Drosophila melanogaster") +>>> create_tree("Eukaryota Animalia Chordata Mammalia Primates Hominidae Homo sapiens") +>>> create_tree("Eukaryota Plantae Magnoliophyta Magnoliopsida Fabales Fabaceae Pisum sativum") +>>> create_tree("Eukaryota Fungi Basidiomycota Homobasidiomycatae Agaricales Amanitacae Amanita muscaria") + +>>> from django import db + +# Normally, accessing FKs doesn't fill in related objects: +>>> db.reset_queries() +>>> fly = Species.objects.get(name="melanogaster") +>>> fly.genus.family.order.klass.phylum.kingdom.domain + +>>> len(db.connection.queries) +8 + +# However, a select_related() call will fill in those related objects without any extra queries: +>>> db.reset_queries() +>>> person = Species.objects.select_related().get(name="sapiens") +>>> person.genus.family.order.klass.phylum.kingdom.domain + +>>> len(db.connection.queries) +1 + +# select_related() also of course applies to entire lists, not just items. +# Without select_related() +>>> db.reset_queries() +>>> world = Species.objects.all() +>>> [o.genus.family for o in world] +[, , , ] +>>> len(db.connection.queries) +9 + +# With select_related(): +>>> db.reset_queries() +>>> world = Species.objects.all().select_related() +>>> [o.genus.family for o in world] +[, , , ] +>>> len(db.connection.queries) +1 + +# The "depth" argument to select_related() will stop the descent at a particular level: +>>> db.reset_queries() +>>> pea = Species.objects.select_related(depth=1).get(name="sativum") +>>> pea.genus.family.order.klass.phylum.kingdom.domain + + +# Notice: one few query than above because of depth=1 +>>> len(db.connection.queries) +7 + +>>> db.reset_queries() +>>> pea = Species.objects.select_related(depth=5).get(name="sativum") +>>> pea.genus.family.order.klass.phylum.kingdom.domain + +>>> len(db.connection.queries) +3 + +>>> db.reset_queries() +>>> world = Species.objects.all().select_related(depth=2) +>>> [o.genus.family.order for o in world] +[, , , ] +>>> len(db.connection.queries) +5 + +# Reset DEBUG to where we found it. +>>> settings.DEBUG = False +"""}