Fixed #16902 - select_related() results in a poor perfomance

Thanks to ivan_virabyan for the great patch!

(For the record, some very small tweaks were made by me).

git-svn-id: http://code.djangoproject.com/svn/django/trunk@16929 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Luke Plant 2011-10-05 22:56:09 +00:00
parent 03d4a8d1b6
commit d30fbf8b78
1 changed files with 116 additions and 89 deletions

View File

@ -265,12 +265,13 @@ class QuerySet(object):
db = self.db db = self.db
model = self.model model = self.model
compiler = self.query.get_compiler(using=db) compiler = self.query.get_compiler(using=db)
if fill_cache:
klass_info = get_klass_info(model, max_depth=max_depth,
requested=requested, only_load=only_load)
for row in compiler.results_iter(): for row in compiler.results_iter():
if fill_cache: if fill_cache:
obj, _ = get_cached_row(model, row, obj, _ = get_cached_row(row, index_start, db, klass_info,
index_start, using=db, max_depth=max_depth, offset=len(aggregate_select))
requested=requested, offset=len(aggregate_select),
only_load=only_load)
else: else:
if skip: if skip:
row_data = row[index_start:aggregate_start] row_data = row[index_start:aggregate_start]
@ -1174,22 +1175,16 @@ class EmptyQuerySet(QuerySet):
# situations). # situations).
value_annotation = False value_annotation = False
def get_klass_info(klass, max_depth=0, cur_depth=0, requested=None,
def get_cached_row(klass, row, index_start, using, max_depth=0, cur_depth=0, only_load=None, local_only=False):
requested=None, offset=0, only_load=None, local_only=False):
""" """
Helper function that recursively returns an object with the specified Helper function that recursively returns an information for a klass, to be
related attributes already populated. used in get_cached_row. It exists just to compute this information only
once for entire queryset. Otherwise it would be computed for each row, which
This method may be called recursively to populate deep select_related() leads to poor perfomance on large querysets.
clauses.
Arguments: Arguments:
* klass - the class to retrieve (and instantiate) * klass - the class to retrieve (and instantiate)
* row - the row of data returned by the database cursor
* index_start - the index of the row at which data for this
object is known to start
* using - the database alias on which the query is being executed.
* max_depth - the maximum depth to which a select_related() * max_depth - the maximum depth to which a select_related()
relationship should be explored. relationship should be explored.
* cur_depth - the current depth in the select_related() tree. * cur_depth - the current depth in the select_related() tree.
@ -1198,20 +1193,16 @@ def get_cached_row(klass, row, index_start, using, max_depth=0, cur_depth=0,
that is to be retrieved. keys are field names; values are that is to be retrieved. keys are field names; values are
dictionaries describing the keys on that related object that dictionaries describing the keys on that related object that
are themselves to be select_related(). are themselves to be select_related().
* offset - the number of additional fields that are known to
exist in `row` for `klass`. This usually means the number of
annotated results on `klass`.
* only_load - if the query has had only() or defer() applied, * only_load - if the query has had only() or defer() applied,
this is the list of field names that will be returned. If None, this is the list of field names that will be returned. If None,
the full field list for `klass` can be assumed. the full field list for `klass` can be assumed.
* local_only - Only populate local fields. This is used when building * local_only - Only populate local fields. This is used when
following reverse select-related relations following reverse select-related relations
""" """
if max_depth and requested is None and cur_depth > max_depth: if max_depth and requested is None and cur_depth > max_depth:
# We've recursed deeply enough; stop now. # We've recursed deeply enough; stop now.
return None return None
restricted = requested is not None
if only_load: if only_load:
load_fields = only_load.get(klass) load_fields = only_load.get(klass)
# When we create the object, we will also be creating populating # When we create the object, we will also be creating populating
@ -1223,6 +1214,7 @@ def get_cached_row(klass, row, index_start, using, max_depth=0, cur_depth=0,
load_fields.update(fields) load_fields.update(fields)
else: else:
load_fields = None load_fields = None
if load_fields: if load_fields:
# Handle deferred fields. # Handle deferred fields.
skip = set() skip = set()
@ -1237,52 +1229,97 @@ def get_cached_row(klass, row, index_start, using, max_depth=0, cur_depth=0,
init_list.append(field.attname) init_list.append(field.attname)
# Retrieve all the requested fields # Retrieve all the requested fields
field_count = len(init_list) field_count = len(init_list)
fields = row[index_start : index_start + field_count] if skip:
# If all the select_related columns are None, then the related
# object must be non-existent - set the relation to None.
# Otherwise, construct the related object.
if fields == (None,) * field_count:
obj = None
elif skip:
klass = deferred_class_factory(klass, skip) klass = deferred_class_factory(klass, skip)
obj = klass(**dict(zip(init_list, fields))) field_names = init_list
else: else:
obj = klass(*fields) field_names = ()
else: else:
# Load all fields on klass # Load all fields on klass
if local_only:
# We trying to not populate field_names variable for perfomance reason.
# If field_names variable is set, it is used to instantiate desired fields,
# by passing **dict(zip(field_names, fields)) as kwargs to Model.__init__ method.
# But kwargs version of Model.__init__ is slower, so we should avoid using
# it when it is not really neccesary.
if local_only and len(klass._meta.local_fields) != len(klass._meta.fields):
field_count = len(klass._meta.local_fields)
field_names = [f.attname for f in klass._meta.local_fields] field_names = [f.attname for f in klass._meta.local_fields]
else: else:
field_names = [f.attname for f in klass._meta.fields] field_count = len(klass._meta.fields)
field_count = len(field_names) field_names = ()
fields = row[index_start : index_start + field_count]
# If all the select_related columns are None, then the related restricted = requested is not None
# object must be non-existent - set the relation to None.
# Otherwise, construct the related object. related_fields = []
if fields == (None,) * field_count: for f in klass._meta.fields:
obj = None if select_related_descend(f, restricted, requested):
else: if restricted:
next = requested[f.name]
else:
next = None
klass_info = get_klass_info(f.rel.to, max_depth=max_depth, cur_depth=cur_depth+1,
requested=next, only_load=only_load)
related_fields.append((f, klass_info))
reverse_related_fields = []
if restricted:
for o in klass._meta.get_all_related_objects():
if o.field.unique and select_related_descend(o.field, restricted, requested, reverse=True):
next = requested[o.field.related_query_name()]
klass_info = get_klass_info(o.model, max_depth=max_depth, cur_depth=cur_depth+1,
requested=next, only_load=only_load, local_only=True)
reverse_related_fields.append((o.field, klass_info))
return klass, field_names, field_count, related_fields, reverse_related_fields
def get_cached_row(row, index_start, using, klass_info, offset=0):
"""
Helper function that recursively returns an object with the specified
related attributes already populated.
This method may be called recursively to populate deep select_related()
clauses.
Arguments:
* row - the row of data returned by the database cursor
* index_start - the index of the row at which data for this
object is known to start
* offset - the number of additional fields that are known to
exist in row for `klass`. This usually means the number of
annotated results on `klass`.
* using - the database alias on which the query is being executed.
* klass_info - result of the get_klass_info function
"""
if klass_info is None:
return None
klass, field_names, field_count, related_fields, reverse_related_fields = klass_info
fields = row[index_start : index_start + field_count]
# If all the select_related columns are None, then the related
# object must be non-existent - set the relation to None.
# Otherwise, construct the related object.
if fields == (None,) * field_count:
obj = None
else:
if field_names:
obj = klass(**dict(zip(field_names, fields))) obj = klass(**dict(zip(field_names, fields)))
else:
obj = klass(*fields)
# If an object was retrieved, set the database state. # If an object was retrieved, set the database state.
if obj: if obj:
obj._state.db = using obj._state.db = using
obj._state.adding = False obj._state.adding = False
# Instantiate related fields
index_end = index_start + field_count + offset index_end = index_start + field_count + offset
# Iterate over each related object, populating any # Iterate over each related object, populating any
# select_related() fields # select_related() fields
for f in klass._meta.fields: for f, klass_info in related_fields:
if not select_related_descend(f, restricted, requested):
continue
if restricted:
next = requested[f.name]
else:
next = None
# Recursively retrieve the data for the related object # Recursively retrieve the data for the related object
cached_row = get_cached_row(f.rel.to, row, index_end, using, cached_row = get_cached_row(row, index_end, using, klass_info)
max_depth, cur_depth+1, next, only_load=only_load)
# If the recursive descent found an object, populate the # If the recursive descent found an object, populate the
# descriptor caches relevant to the object # descriptor caches relevant to the object
if cached_row: if cached_row:
@ -1299,45 +1336,35 @@ def get_cached_row(klass, row, index_start, using, max_depth=0, cur_depth=0,
# Now do the same, but for reverse related objects. # Now do the same, but for reverse related objects.
# Only handle the restricted case - i.e., don't do a depth # Only handle the restricted case - i.e., don't do a depth
# descent into reverse relations unless explicitly requested # descent into reverse relations unless explicitly requested
if restricted: for f, klass_info in reverse_related_fields:
related_fields = [ # Recursively retrieve the data for the related object
(o.field, o.model) cached_row = get_cached_row(row, index_end, using, klass_info)
for o in klass._meta.get_all_related_objects() # If the recursive descent found an object, populate the
if o.field.unique # descriptor caches relevant to the object
] if cached_row:
for f, model in related_fields: rel_obj, index_end = cached_row
if not select_related_descend(f, restricted, requested, reverse=True): if obj is not None:
continue # If the field is unique, populate the
next = requested[f.related_query_name()] # reverse descriptor cache
# Recursively retrieve the data for the related object setattr(obj, f.related.get_cache_name(), rel_obj)
cached_row = get_cached_row(model, row, index_end, using, if rel_obj is not None:
max_depth, cur_depth+1, next, only_load=only_load, local_only=True) # If the related object exists, populate
# If the recursive descent found an object, populate the # the descriptor cache.
# descriptor caches relevant to the object setattr(rel_obj, f.get_cache_name(), obj)
if cached_row: # Now populate all the non-local field values
rel_obj, index_end = cached_row # on the related object
if obj is not None: for rel_field, rel_model in rel_obj._meta.get_fields_with_model():
# If the field is unique, populate the if rel_model is not None:
# reverse descriptor cache setattr(rel_obj, rel_field.attname, getattr(obj, rel_field.attname))
setattr(obj, f.related.get_cache_name(), rel_obj) # populate the field cache for any related object
if rel_obj is not None: # that has already been retrieved
# If the related object exists, populate if rel_field.rel:
# the descriptor cache. try:
setattr(rel_obj, f.get_cache_name(), obj) cached_obj = getattr(obj, rel_field.get_cache_name())
# Now populate all the non-local field values setattr(rel_obj, rel_field.get_cache_name(), cached_obj)
# on the related object except AttributeError:
for rel_field,rel_model in rel_obj._meta.get_fields_with_model(): # Related object hasn't been cached yet
if rel_model is not None: pass
setattr(rel_obj, rel_field.attname, getattr(obj, rel_field.attname))
# populate the field cache for any related object
# that has already been retrieved
if rel_field.rel:
try:
cached_obj = getattr(obj, rel_field.get_cache_name())
setattr(rel_obj, rel_field.get_cache_name(), cached_obj)
except AttributeError:
# Related object hasn't been cached yet
pass
return obj, index_end return obj, index_end