Added a "depth" argument to select_related() to control how many "levels" of relations select_related() is willing to follow (refs #3275).

Also added unit tests for select_related().

git-svn-id: http://code.djangoproject.com/svn/django/trunk@4645 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Jacob Kaplan-Moss 2007-02-28 15:24:05 +00:00
parent 9c67c1dd8d
commit 4477f50f0b
4 changed files with 193 additions and 10 deletions

View File

@ -84,6 +84,7 @@ class QuerySet(object):
self._filters = Q() self._filters = Q()
self._order_by = None # Ordering, e.g. ('date', '-name'). If None, use model's ordering. self._order_by = None # Ordering, e.g. ('date', '-name'). If None, use model's ordering.
self._select_related = False # Whether to fill cache for related objects. self._select_related = False # Whether to fill cache for related objects.
self._max_related_depth = 0 # Maximum "depth" for select_related
self._distinct = False # Whether the query should use SELECT DISTINCT. self._distinct = False # Whether the query should use SELECT DISTINCT.
self._select = {} # Dictionary of attname -> SQL. self._select = {} # Dictionary of attname -> SQL.
self._where = [] # List of extra WHERE clauses to use. self._where = [] # List of extra WHERE clauses to use.
@ -186,7 +187,8 @@ class QuerySet(object):
raise StopIteration raise StopIteration
for row in rows: for row in rows:
if fill_cache: if fill_cache:
obj, index_end = get_cached_row(self.model, row, 0) obj, index_end = get_cached_row(klass=self.model, row=row,
index_start=0, max_depth=self._max_related_depth)
else: else:
obj = self.model(*row[:index_end]) obj = self.model(*row[:index_end])
for i, k in enumerate(extra_select): for i, k in enumerate(extra_select):
@ -394,9 +396,9 @@ class QuerySet(object):
else: else:
return self._filter_or_exclude(None, **filter_obj) return self._filter_or_exclude(None, **filter_obj)
def select_related(self, true_or_false=True): def select_related(self, true_or_false=True, depth=0):
"Returns a new QuerySet instance with '_select_related' modified." "Returns a new QuerySet instance with '_select_related' modified."
return self._clone(_select_related=true_or_false) return self._clone(_select_related=true_or_false, _max_related_depth=depth)
def order_by(self, *field_names): def order_by(self, *field_names):
"Returns a new QuerySet instance with the ordering changed." "Returns a new QuerySet instance with the ordering changed."
@ -430,6 +432,7 @@ class QuerySet(object):
c._filters = self._filters c._filters = self._filters
c._order_by = self._order_by c._order_by = self._order_by
c._select_related = self._select_related c._select_related = self._select_related
c._max_related_depth = self._max_related_depth
c._distinct = self._distinct c._distinct = self._distinct
c._select = self._select.copy() c._select = self._select.copy()
c._where = self._where[:] c._where = self._where[:]
@ -483,7 +486,10 @@ class QuerySet(object):
# Add additional tables and WHERE clauses based on select_related. # Add additional tables and WHERE clauses based on select_related.
if self._select_related: if self._select_related:
fill_table_cache(opts, select, tables, where, opts.db_table, [opts.db_table]) fill_table_cache(opts, select, tables, where,
old_prefix=opts.db_table,
cache_tables_seen=[opts.db_table],
max_depth=self._max_related_depth)
# Add any additional SELECTs. # Add any additional SELECTs.
if self._select: if self._select:
@ -728,21 +734,33 @@ def get_where_clause(lookup_type, table_prefix, field_name, value):
return backend.get_fulltext_search_sql(table_prefix + field_name) return backend.get_fulltext_search_sql(table_prefix + field_name)
raise TypeError, "Got invalid lookup_type: %s" % repr(lookup_type) raise TypeError, "Got invalid lookup_type: %s" % repr(lookup_type)
def get_cached_row(klass, row, index_start): def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0):
"Helper function that recursively returns an object with cache filled" """Helper function that recursively returns an object with cache filled"""
# If we've got a max_depth set and we've exceeded that depth, bail now.
if max_depth and cur_depth > max_depth:
return None
index_end = index_start + len(klass._meta.fields) index_end = index_start + len(klass._meta.fields)
obj = klass(*row[index_start:index_end]) obj = klass(*row[index_start:index_end])
for f in klass._meta.fields: for f in klass._meta.fields:
if f.rel and not f.null: if f.rel and not f.null:
rel_obj, index_end = get_cached_row(f.rel.to, row, index_end) cached_row = get_cached_row(f.rel.to, row, index_end, max_depth, cur_depth+1)
setattr(obj, f.get_cache_name(), rel_obj) if cached_row:
rel_obj, index_end = cached_row
setattr(obj, f.get_cache_name(), rel_obj)
return obj, index_end return obj, index_end
def fill_table_cache(opts, select, tables, where, old_prefix, cache_tables_seen): def fill_table_cache(opts, select, tables, where, old_prefix, cache_tables_seen, max_depth=0, cur_depth=0):
""" """
Helper function that recursively populates the select, tables and where (in Helper function that recursively populates the select, tables and where (in
place) for select_related queries. place) for select_related queries.
""" """
# If we've got a max_depth set and we've exceeded that depth, bail now.
if max_depth and cur_depth > max_depth:
return None
qn = backend.quote_name qn = backend.quote_name
for f in opts.fields: for f in opts.fields:
if f.rel and not f.null: if f.rel and not f.null:
@ -757,7 +775,7 @@ def fill_table_cache(opts, select, tables, where, old_prefix, cache_tables_seen)
where.append('%s.%s = %s.%s' % \ where.append('%s.%s = %s.%s' % \
(qn(old_prefix), qn(f.column), qn(db_table), qn(f.rel.get_related_field().column))) (qn(old_prefix), qn(f.column), qn(db_table), qn(f.rel.get_related_field().column)))
select.extend(['%s.%s' % (qn(db_table), qn(f2.column)) for f2 in f.rel.to._meta.fields]) select.extend(['%s.%s' % (qn(db_table), qn(f2.column)) for f2 in f.rel.to._meta.fields])
fill_table_cache(f.rel.to._meta, select, tables, where, db_table, cache_tables_seen) fill_table_cache(f.rel.to._meta, select, tables, where, db_table, cache_tables_seen, max_depth, cur_depth+1)
def parse_lookup(kwarg_items, opts): def parse_lookup(kwarg_items, opts):
# Helper function that handles converting API kwargs # Helper function that handles converting API kwargs

View File

@ -596,6 +596,19 @@ related ``Person`` *and* the related ``City``::
Note that ``select_related()`` does not follow foreign keys that have Note that ``select_related()`` does not follow foreign keys that have
``null=True``. ``null=True``.
Usually, using ``select_related()`` can vastly improve performance since your
app can avoid many database calls. However, in situations with deeply nested
sets of relationships ``select_related()`` can sometimes end up following "too
many" relations, and can generate queries so large that they end up being slow.
In these situations, you can use the ``depth`` argument to ``select_related()``
to control how many "levels" of relations ``select_related()`` will actually
follow::
b = Book.objects.select_related(depth=1).get(id=4)
p = b.author # Doesn't hit the database.
c = p.hometown # Requires a database call.
``extra(select=None, where=None, params=None, tables=None)`` ``extra(select=None, where=None, params=None, tables=None)``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -0,0 +1,152 @@
"""
XXX. Tests for ``select_related()``
``select_related()`` follows all relationships and pre-caches any foreign key
values so that complex trees can be fetched in a single query. However, this
isn't always a good idea, so the ``depth`` argument control how many "levels"
the select-related behavior will traverse.
"""
from django.db import models
# Who remembers high school biology?
class Domain(models.Model):
name = models.CharField(maxlength=50)
def __str__(self):
return self.name
class Kingdom(models.Model):
name = models.CharField(maxlength=50)
domain = models.ForeignKey(Domain)
def __str__(self):
return self.name
class Phylum(models.Model):
name = models.CharField(maxlength=50)
kingdom = models.ForeignKey(Kingdom)
def __str__(self):
return self.name
class Klass(models.Model):
name = models.CharField(maxlength=50)
phylum = models.ForeignKey(Phylum)
def __str__(self):
return self.name
class Order(models.Model):
name = models.CharField(maxlength=50)
klass = models.ForeignKey(Klass)
def __str__(self):
return self.name
class Family(models.Model):
name = models.CharField(maxlength=50)
order = models.ForeignKey(Order)
def __str__(self):
return self.name
class Genus(models.Model):
name = models.CharField(maxlength=50)
family = models.ForeignKey(Family)
def __str__(self):
return self.name
class Species(models.Model):
name = models.CharField(maxlength=50)
genus = models.ForeignKey(Genus)
def __str__(self):
return self.name
def create_tree(stringtree):
"""Helper to create a complete tree"""
names = stringtree.split()
models = [Domain, Kingdom, Phylum, Klass, Order, Family, Genus, Species]
assert len(names) == len(models), (names, models)
parent = None
for name, model in zip(names, models):
try:
obj = model.objects.get(name=name)
except model.DoesNotExist:
obj = model(name=name)
if parent:
setattr(obj, parent.__class__.__name__.lower(), parent)
obj.save()
parent = obj
__test__ = {'API_TESTS':"""
# Set up.
# The test runner sets settings.DEBUG to False, but we want to gather queries
# so we'll set it to True here and reset it at the end of the test suite.
>>> from django.conf import settings
>>> settings.DEBUG = True
>>> create_tree("Eukaryota Animalia Anthropoda Insecta Diptera Drosophilidae Drosophila melanogaster")
>>> create_tree("Eukaryota Animalia Chordata Mammalia Primates Hominidae Homo sapiens")
>>> create_tree("Eukaryota Plantae Magnoliophyta Magnoliopsida Fabales Fabaceae Pisum sativum")
>>> create_tree("Eukaryota Fungi Basidiomycota Homobasidiomycatae Agaricales Amanitacae Amanita muscaria")
>>> from django import db
# Normally, accessing FKs doesn't fill in related objects:
>>> db.reset_queries()
>>> fly = Species.objects.get(name="melanogaster")
>>> fly.genus.family.order.klass.phylum.kingdom.domain
<Domain: Eukaryota>
>>> len(db.connection.queries)
8
# However, a select_related() call will fill in those related objects without any extra queries:
>>> db.reset_queries()
>>> person = Species.objects.select_related().get(name="sapiens")
>>> person.genus.family.order.klass.phylum.kingdom.domain
<Domain: Eukaryota>
>>> len(db.connection.queries)
1
# select_related() also of course applies to entire lists, not just items.
# Without select_related()
>>> db.reset_queries()
>>> world = Species.objects.all()
>>> [o.genus.family for o in world]
[<Family: Drosophilidae>, <Family: Hominidae>, <Family: Fabaceae>, <Family: Amanitacae>]
>>> len(db.connection.queries)
9
# With select_related():
>>> db.reset_queries()
>>> world = Species.objects.all().select_related()
>>> [o.genus.family for o in world]
[<Family: Drosophilidae>, <Family: Hominidae>, <Family: Fabaceae>, <Family: Amanitacae>]
>>> len(db.connection.queries)
1
# The "depth" argument to select_related() will stop the descent at a particular level:
>>> db.reset_queries()
>>> pea = Species.objects.select_related(depth=1).get(name="sativum")
>>> pea.genus.family.order.klass.phylum.kingdom.domain
<Domain: Eukaryota>
# Notice: one few query than above because of depth=1
>>> len(db.connection.queries)
7
>>> db.reset_queries()
>>> pea = Species.objects.select_related(depth=5).get(name="sativum")
>>> pea.genus.family.order.klass.phylum.kingdom.domain
<Domain: Eukaryota>
>>> len(db.connection.queries)
3
>>> db.reset_queries()
>>> world = Species.objects.all().select_related(depth=2)
>>> [o.genus.family.order for o in world]
[<Order: Diptera>, <Order: Primates>, <Order: Fabales>, <Order: Agaricales>]
>>> len(db.connection.queries)
5
# Reset DEBUG to where we found it.
>>> settings.DEBUG = False
"""}