Fixed #16902 - select_related() results in a poor perfomance
Thanks to ivan_virabyan for the great patch! (For the record, some very small tweaks were made by me). git-svn-id: http://code.djangoproject.com/svn/django/trunk@16929 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
parent
03d4a8d1b6
commit
d30fbf8b78
|
@ -265,12 +265,13 @@ class QuerySet(object):
|
||||||
db = self.db
|
db = self.db
|
||||||
model = self.model
|
model = self.model
|
||||||
compiler = self.query.get_compiler(using=db)
|
compiler = self.query.get_compiler(using=db)
|
||||||
|
if fill_cache:
|
||||||
|
klass_info = get_klass_info(model, max_depth=max_depth,
|
||||||
|
requested=requested, only_load=only_load)
|
||||||
for row in compiler.results_iter():
|
for row in compiler.results_iter():
|
||||||
if fill_cache:
|
if fill_cache:
|
||||||
obj, _ = get_cached_row(model, row,
|
obj, _ = get_cached_row(row, index_start, db, klass_info,
|
||||||
index_start, using=db, max_depth=max_depth,
|
offset=len(aggregate_select))
|
||||||
requested=requested, offset=len(aggregate_select),
|
|
||||||
only_load=only_load)
|
|
||||||
else:
|
else:
|
||||||
if skip:
|
if skip:
|
||||||
row_data = row[index_start:aggregate_start]
|
row_data = row[index_start:aggregate_start]
|
||||||
|
@ -1174,22 +1175,16 @@ class EmptyQuerySet(QuerySet):
|
||||||
# situations).
|
# situations).
|
||||||
value_annotation = False
|
value_annotation = False
|
||||||
|
|
||||||
|
def get_klass_info(klass, max_depth=0, cur_depth=0, requested=None,
|
||||||
def get_cached_row(klass, row, index_start, using, max_depth=0, cur_depth=0,
|
only_load=None, local_only=False):
|
||||||
requested=None, offset=0, only_load=None, local_only=False):
|
|
||||||
"""
|
"""
|
||||||
Helper function that recursively returns an object with the specified
|
Helper function that recursively returns an information for a klass, to be
|
||||||
related attributes already populated.
|
used in get_cached_row. It exists just to compute this information only
|
||||||
|
once for entire queryset. Otherwise it would be computed for each row, which
|
||||||
This method may be called recursively to populate deep select_related()
|
leads to poor perfomance on large querysets.
|
||||||
clauses.
|
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
* klass - the class to retrieve (and instantiate)
|
* klass - the class to retrieve (and instantiate)
|
||||||
* row - the row of data returned by the database cursor
|
|
||||||
* index_start - the index of the row at which data for this
|
|
||||||
object is known to start
|
|
||||||
* using - the database alias on which the query is being executed.
|
|
||||||
* max_depth - the maximum depth to which a select_related()
|
* max_depth - the maximum depth to which a select_related()
|
||||||
relationship should be explored.
|
relationship should be explored.
|
||||||
* cur_depth - the current depth in the select_related() tree.
|
* cur_depth - the current depth in the select_related() tree.
|
||||||
|
@ -1198,20 +1193,16 @@ def get_cached_row(klass, row, index_start, using, max_depth=0, cur_depth=0,
|
||||||
that is to be retrieved. keys are field names; values are
|
that is to be retrieved. keys are field names; values are
|
||||||
dictionaries describing the keys on that related object that
|
dictionaries describing the keys on that related object that
|
||||||
are themselves to be select_related().
|
are themselves to be select_related().
|
||||||
* offset - the number of additional fields that are known to
|
|
||||||
exist in `row` for `klass`. This usually means the number of
|
|
||||||
annotated results on `klass`.
|
|
||||||
* only_load - if the query has had only() or defer() applied,
|
* only_load - if the query has had only() or defer() applied,
|
||||||
this is the list of field names that will be returned. If None,
|
this is the list of field names that will be returned. If None,
|
||||||
the full field list for `klass` can be assumed.
|
the full field list for `klass` can be assumed.
|
||||||
* local_only - Only populate local fields. This is used when building
|
* local_only - Only populate local fields. This is used when
|
||||||
following reverse select-related relations
|
following reverse select-related relations
|
||||||
"""
|
"""
|
||||||
if max_depth and requested is None and cur_depth > max_depth:
|
if max_depth and requested is None and cur_depth > max_depth:
|
||||||
# We've recursed deeply enough; stop now.
|
# We've recursed deeply enough; stop now.
|
||||||
return None
|
return None
|
||||||
|
|
||||||
restricted = requested is not None
|
|
||||||
if only_load:
|
if only_load:
|
||||||
load_fields = only_load.get(klass)
|
load_fields = only_load.get(klass)
|
||||||
# When we create the object, we will also be creating populating
|
# When we create the object, we will also be creating populating
|
||||||
|
@ -1223,6 +1214,7 @@ def get_cached_row(klass, row, index_start, using, max_depth=0, cur_depth=0,
|
||||||
load_fields.update(fields)
|
load_fields.update(fields)
|
||||||
else:
|
else:
|
||||||
load_fields = None
|
load_fields = None
|
||||||
|
|
||||||
if load_fields:
|
if load_fields:
|
||||||
# Handle deferred fields.
|
# Handle deferred fields.
|
||||||
skip = set()
|
skip = set()
|
||||||
|
@ -1237,52 +1229,97 @@ def get_cached_row(klass, row, index_start, using, max_depth=0, cur_depth=0,
|
||||||
init_list.append(field.attname)
|
init_list.append(field.attname)
|
||||||
# Retrieve all the requested fields
|
# Retrieve all the requested fields
|
||||||
field_count = len(init_list)
|
field_count = len(init_list)
|
||||||
fields = row[index_start : index_start + field_count]
|
if skip:
|
||||||
# If all the select_related columns are None, then the related
|
|
||||||
# object must be non-existent - set the relation to None.
|
|
||||||
# Otherwise, construct the related object.
|
|
||||||
if fields == (None,) * field_count:
|
|
||||||
obj = None
|
|
||||||
elif skip:
|
|
||||||
klass = deferred_class_factory(klass, skip)
|
klass = deferred_class_factory(klass, skip)
|
||||||
obj = klass(**dict(zip(init_list, fields)))
|
field_names = init_list
|
||||||
else:
|
else:
|
||||||
obj = klass(*fields)
|
field_names = ()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Load all fields on klass
|
# Load all fields on klass
|
||||||
if local_only:
|
|
||||||
|
# We trying to not populate field_names variable for perfomance reason.
|
||||||
|
# If field_names variable is set, it is used to instantiate desired fields,
|
||||||
|
# by passing **dict(zip(field_names, fields)) as kwargs to Model.__init__ method.
|
||||||
|
# But kwargs version of Model.__init__ is slower, so we should avoid using
|
||||||
|
# it when it is not really neccesary.
|
||||||
|
if local_only and len(klass._meta.local_fields) != len(klass._meta.fields):
|
||||||
|
field_count = len(klass._meta.local_fields)
|
||||||
field_names = [f.attname for f in klass._meta.local_fields]
|
field_names = [f.attname for f in klass._meta.local_fields]
|
||||||
else:
|
else:
|
||||||
field_names = [f.attname for f in klass._meta.fields]
|
field_count = len(klass._meta.fields)
|
||||||
field_count = len(field_names)
|
field_names = ()
|
||||||
fields = row[index_start : index_start + field_count]
|
|
||||||
# If all the select_related columns are None, then the related
|
restricted = requested is not None
|
||||||
# object must be non-existent - set the relation to None.
|
|
||||||
# Otherwise, construct the related object.
|
related_fields = []
|
||||||
if fields == (None,) * field_count:
|
for f in klass._meta.fields:
|
||||||
obj = None
|
if select_related_descend(f, restricted, requested):
|
||||||
else:
|
if restricted:
|
||||||
|
next = requested[f.name]
|
||||||
|
else:
|
||||||
|
next = None
|
||||||
|
klass_info = get_klass_info(f.rel.to, max_depth=max_depth, cur_depth=cur_depth+1,
|
||||||
|
requested=next, only_load=only_load)
|
||||||
|
related_fields.append((f, klass_info))
|
||||||
|
|
||||||
|
reverse_related_fields = []
|
||||||
|
if restricted:
|
||||||
|
for o in klass._meta.get_all_related_objects():
|
||||||
|
if o.field.unique and select_related_descend(o.field, restricted, requested, reverse=True):
|
||||||
|
next = requested[o.field.related_query_name()]
|
||||||
|
klass_info = get_klass_info(o.model, max_depth=max_depth, cur_depth=cur_depth+1,
|
||||||
|
requested=next, only_load=only_load, local_only=True)
|
||||||
|
reverse_related_fields.append((o.field, klass_info))
|
||||||
|
|
||||||
|
return klass, field_names, field_count, related_fields, reverse_related_fields
|
||||||
|
|
||||||
|
|
||||||
|
def get_cached_row(row, index_start, using, klass_info, offset=0):
|
||||||
|
"""
|
||||||
|
Helper function that recursively returns an object with the specified
|
||||||
|
related attributes already populated.
|
||||||
|
|
||||||
|
This method may be called recursively to populate deep select_related()
|
||||||
|
clauses.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
* row - the row of data returned by the database cursor
|
||||||
|
* index_start - the index of the row at which data for this
|
||||||
|
object is known to start
|
||||||
|
* offset - the number of additional fields that are known to
|
||||||
|
exist in row for `klass`. This usually means the number of
|
||||||
|
annotated results on `klass`.
|
||||||
|
* using - the database alias on which the query is being executed.
|
||||||
|
* klass_info - result of the get_klass_info function
|
||||||
|
"""
|
||||||
|
if klass_info is None:
|
||||||
|
return None
|
||||||
|
klass, field_names, field_count, related_fields, reverse_related_fields = klass_info
|
||||||
|
|
||||||
|
fields = row[index_start : index_start + field_count]
|
||||||
|
# If all the select_related columns are None, then the related
|
||||||
|
# object must be non-existent - set the relation to None.
|
||||||
|
# Otherwise, construct the related object.
|
||||||
|
if fields == (None,) * field_count:
|
||||||
|
obj = None
|
||||||
|
else:
|
||||||
|
if field_names:
|
||||||
obj = klass(**dict(zip(field_names, fields)))
|
obj = klass(**dict(zip(field_names, fields)))
|
||||||
|
else:
|
||||||
|
obj = klass(*fields)
|
||||||
|
|
||||||
# If an object was retrieved, set the database state.
|
# If an object was retrieved, set the database state.
|
||||||
if obj:
|
if obj:
|
||||||
obj._state.db = using
|
obj._state.db = using
|
||||||
obj._state.adding = False
|
obj._state.adding = False
|
||||||
|
|
||||||
|
# Instantiate related fields
|
||||||
index_end = index_start + field_count + offset
|
index_end = index_start + field_count + offset
|
||||||
# Iterate over each related object, populating any
|
# Iterate over each related object, populating any
|
||||||
# select_related() fields
|
# select_related() fields
|
||||||
for f in klass._meta.fields:
|
for f, klass_info in related_fields:
|
||||||
if not select_related_descend(f, restricted, requested):
|
|
||||||
continue
|
|
||||||
if restricted:
|
|
||||||
next = requested[f.name]
|
|
||||||
else:
|
|
||||||
next = None
|
|
||||||
# Recursively retrieve the data for the related object
|
# Recursively retrieve the data for the related object
|
||||||
cached_row = get_cached_row(f.rel.to, row, index_end, using,
|
cached_row = get_cached_row(row, index_end, using, klass_info)
|
||||||
max_depth, cur_depth+1, next, only_load=only_load)
|
|
||||||
# If the recursive descent found an object, populate the
|
# If the recursive descent found an object, populate the
|
||||||
# descriptor caches relevant to the object
|
# descriptor caches relevant to the object
|
||||||
if cached_row:
|
if cached_row:
|
||||||
|
@ -1299,45 +1336,35 @@ def get_cached_row(klass, row, index_start, using, max_depth=0, cur_depth=0,
|
||||||
# Now do the same, but for reverse related objects.
|
# Now do the same, but for reverse related objects.
|
||||||
# Only handle the restricted case - i.e., don't do a depth
|
# Only handle the restricted case - i.e., don't do a depth
|
||||||
# descent into reverse relations unless explicitly requested
|
# descent into reverse relations unless explicitly requested
|
||||||
if restricted:
|
for f, klass_info in reverse_related_fields:
|
||||||
related_fields = [
|
# Recursively retrieve the data for the related object
|
||||||
(o.field, o.model)
|
cached_row = get_cached_row(row, index_end, using, klass_info)
|
||||||
for o in klass._meta.get_all_related_objects()
|
# If the recursive descent found an object, populate the
|
||||||
if o.field.unique
|
# descriptor caches relevant to the object
|
||||||
]
|
if cached_row:
|
||||||
for f, model in related_fields:
|
rel_obj, index_end = cached_row
|
||||||
if not select_related_descend(f, restricted, requested, reverse=True):
|
if obj is not None:
|
||||||
continue
|
# If the field is unique, populate the
|
||||||
next = requested[f.related_query_name()]
|
# reverse descriptor cache
|
||||||
# Recursively retrieve the data for the related object
|
setattr(obj, f.related.get_cache_name(), rel_obj)
|
||||||
cached_row = get_cached_row(model, row, index_end, using,
|
if rel_obj is not None:
|
||||||
max_depth, cur_depth+1, next, only_load=only_load, local_only=True)
|
# If the related object exists, populate
|
||||||
# If the recursive descent found an object, populate the
|
# the descriptor cache.
|
||||||
# descriptor caches relevant to the object
|
setattr(rel_obj, f.get_cache_name(), obj)
|
||||||
if cached_row:
|
# Now populate all the non-local field values
|
||||||
rel_obj, index_end = cached_row
|
# on the related object
|
||||||
if obj is not None:
|
for rel_field, rel_model in rel_obj._meta.get_fields_with_model():
|
||||||
# If the field is unique, populate the
|
if rel_model is not None:
|
||||||
# reverse descriptor cache
|
setattr(rel_obj, rel_field.attname, getattr(obj, rel_field.attname))
|
||||||
setattr(obj, f.related.get_cache_name(), rel_obj)
|
# populate the field cache for any related object
|
||||||
if rel_obj is not None:
|
# that has already been retrieved
|
||||||
# If the related object exists, populate
|
if rel_field.rel:
|
||||||
# the descriptor cache.
|
try:
|
||||||
setattr(rel_obj, f.get_cache_name(), obj)
|
cached_obj = getattr(obj, rel_field.get_cache_name())
|
||||||
# Now populate all the non-local field values
|
setattr(rel_obj, rel_field.get_cache_name(), cached_obj)
|
||||||
# on the related object
|
except AttributeError:
|
||||||
for rel_field,rel_model in rel_obj._meta.get_fields_with_model():
|
# Related object hasn't been cached yet
|
||||||
if rel_model is not None:
|
pass
|
||||||
setattr(rel_obj, rel_field.attname, getattr(obj, rel_field.attname))
|
|
||||||
# populate the field cache for any related object
|
|
||||||
# that has already been retrieved
|
|
||||||
if rel_field.rel:
|
|
||||||
try:
|
|
||||||
cached_obj = getattr(obj, rel_field.get_cache_name())
|
|
||||||
setattr(rel_obj, rel_field.get_cache_name(), cached_obj)
|
|
||||||
except AttributeError:
|
|
||||||
# Related object hasn't been cached yet
|
|
||||||
pass
|
|
||||||
return obj, index_end
|
return obj, index_end
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue