Fixed #5420 -- Added support for delayed loading of model fields.

In extreme cases, some fields are expensive to load from the database
(e.g. GIS fields requiring conversion, or large text fields). This
commit adds defer() and only() methods to querysets that allow the
caller to specify which fields should not be loaded unless they are
accessed.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@10090 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Malcolm Tredinnick 2009-03-19 09:06:04 +00:00
parent 96d5d434fa
commit 29050ef999
10 changed files with 685 additions and 111 deletions

View File

@ -12,7 +12,8 @@ import django.db.models.manager # Imported to register signal handler.
from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError
from django.db.models.fields import AutoField, FieldDoesNotExist from django.db.models.fields import AutoField, FieldDoesNotExist
from django.db.models.fields.related import OneToOneRel, ManyToOneRel, OneToOneField from django.db.models.fields.related import OneToOneRel, ManyToOneRel, OneToOneField
from django.db.models.query import delete_objects, Q, CollectedObjects from django.db.models.query import delete_objects, Q
from django.db.models.query_utils import CollectedObjects, DeferredAttribute
from django.db.models.options import Options from django.db.models.options import Options
from django.db import connection, transaction, DatabaseError from django.db import connection, transaction, DatabaseError
from django.db.models import signals from django.db.models import signals
@ -235,6 +236,7 @@ class ModelBase(type):
class Model(object): class Model(object):
__metaclass__ = ModelBase __metaclass__ = ModelBase
_deferred = False
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
signals.pre_init.send(sender=self.__class__, args=args, kwargs=kwargs) signals.pre_init.send(sender=self.__class__, args=args, kwargs=kwargs)
@ -271,6 +273,13 @@ class Model(object):
for field in fields_iter: for field in fields_iter:
is_related_object = False is_related_object = False
if kwargs: if kwargs:
# This slightly odd construct is so that we can access any
# data-descriptor object (DeferredAttribute) without triggering
# its __get__ method.
if (field.attname not in kwargs and
isinstance(self.__class__.__dict__.get(field.attname), DeferredAttribute)):
# This field will be populated on request.
continue
if isinstance(field.rel, ManyToOneRel): if isinstance(field.rel, ManyToOneRel):
try: try:
# Assume object instance was passed in. # Assume object instance was passed in.
@ -332,6 +341,31 @@ class Model(object):
def __hash__(self): def __hash__(self):
return hash(self._get_pk_val()) return hash(self._get_pk_val())
def __reduce__(self):
"""
Provide pickling support. Normally, this just dispatches to Python's
standard handling. However, for models with deferred field loading, we
need to do things manually, as they're dynamically created classes and
only module-level classes can be pickled by the default path.
"""
if not self._deferred:
return super(Model, self).__reduce__()
data = self.__dict__
defers = []
pk_val = None
for field in self._meta.fields:
if isinstance(self.__class__.__dict__.get(field.attname),
DeferredAttribute):
defers.append(field.attname)
if pk_val is None:
# The pk_val and model values are the same for all
# DeferredAttribute classes, so we only need to do this
# once.
obj = self.__class__.__dict__[field.attname]
pk_val = obj.pk_value
model = obj.model_ref()
return (model_unpickle, (model, pk_val, defers), data)
def _get_pk_val(self, meta=None): def _get_pk_val(self, meta=None):
if not meta: if not meta:
meta = self._meta meta = self._meta
@ -591,6 +625,15 @@ def get_absolute_url(opts, func, self, *args, **kwargs):
class Empty(object): class Empty(object):
pass pass
def model_unpickle(model, pk_val, attrs):
"""
Used to unpickle Model subclasses with deferred fields.
"""
from django.db.models.query_utils import deferred_class_factory
cls = deferred_class_factory(model, pk_val, attrs)
return cls.__new__(cls)
model_unpickle.__safe_for_unpickle__ = True
if sys.version_info < (2, 5): if sys.version_info < (2, 5):
# Prior to Python 2.5, Exception was an old-style class # Prior to Python 2.5, Exception was an old-style class
def subclass_exception(name, parent, unused): def subclass_exception(name, parent, unused):

View File

@ -167,6 +167,12 @@ class Manager(object):
def reverse(self, *args, **kwargs): def reverse(self, *args, **kwargs):
return self.get_query_set().reverse(*args, **kwargs) return self.get_query_set().reverse(*args, **kwargs)
def defer(self, *args, **kwargs):
return self.get_query_set().defer(*args, **kwargs)
def only(self, *args, **kwargs):
return self.get_query_set().only(*args, **kwargs)
def _insert(self, values, **kwargs): def _insert(self, values, **kwargs):
return insert_query(self.model, values, **kwargs) return insert_query(self.model, values, **kwargs)

View File

@ -477,3 +477,9 @@ class Options(object):
self._ordered_objects = objects self._ordered_objects = objects
return self._ordered_objects return self._ordered_objects
def pk_index(self):
"""
Returns the index of the primary key field in the self.fields list.
"""
return self.fields.index(self.pk)

View File

@ -1,3 +1,7 @@
"""
The main QuerySet implementation. This provides the public API for the ORM.
"""
try: try:
set set
except NameError: except NameError:
@ -6,9 +10,8 @@ except NameError:
from django.db import connection, transaction, IntegrityError from django.db import connection, transaction, IntegrityError
from django.db.models.aggregates import Aggregate from django.db.models.aggregates import Aggregate
from django.db.models.fields import DateField from django.db.models.fields import DateField
from django.db.models.query_utils import Q, select_related_descend from django.db.models.query_utils import Q, select_related_descend, CollectedObjects, CyclicDependency, deferred_class_factory
from django.db.models import signals, sql from django.db.models import signals, sql
from django.utils.datastructures import SortedDict
# Used to control how many objects are worked with at once in some cases (e.g. # Used to control how many objects are worked with at once in some cases (e.g.
@ -22,102 +25,6 @@ REPR_OUTPUT_SIZE = 20
# Pull into this namespace for backwards compatibility. # Pull into this namespace for backwards compatibility.
EmptyResultSet = sql.EmptyResultSet EmptyResultSet = sql.EmptyResultSet
class CyclicDependency(Exception):
"""
An error when dealing with a collection of objects that have a cyclic
dependency, i.e. when deleting multiple objects.
"""
pass
class CollectedObjects(object):
"""
A container that stores keys and lists of values along with remembering the
parent objects for all the keys.
This is used for the database object deletion routines so that we can
calculate the 'leaf' objects which should be deleted first.
"""
def __init__(self):
self.data = {}
self.children = {}
def add(self, model, pk, obj, parent_model, nullable=False):
"""
Adds an item to the container.
Arguments:
* model - the class of the object being added.
* pk - the primary key.
* obj - the object itself.
* parent_model - the model of the parent object that this object was
reached through.
* nullable - should be True if this relation is nullable.
Returns True if the item already existed in the structure and
False otherwise.
"""
d = self.data.setdefault(model, SortedDict())
retval = pk in d
d[pk] = obj
# Nullable relationships can be ignored -- they are nulled out before
# deleting, and therefore do not affect the order in which objects
# have to be deleted.
if parent_model is not None and not nullable:
self.children.setdefault(parent_model, []).append(model)
return retval
def __contains__(self, key):
return self.data.__contains__(key)
def __getitem__(self, key):
return self.data[key]
def __nonzero__(self):
return bool(self.data)
def iteritems(self):
for k in self.ordered_keys():
yield k, self[k]
def items(self):
return list(self.iteritems())
def keys(self):
return self.ordered_keys()
def ordered_keys(self):
"""
Returns the models in the order that they should be dealt with (i.e.
models with no dependencies first).
"""
dealt_with = SortedDict()
# Start with items that have no children
models = self.data.keys()
while len(dealt_with) < len(models):
found = False
for model in models:
if model in dealt_with:
continue
children = self.children.setdefault(model, [])
if len([c for c in children if c not in dealt_with]) == 0:
dealt_with[model] = None
found = True
if not found:
raise CyclicDependency(
"There is a cyclic dependency of items to be processed.")
return dealt_with.keys()
def unordered_keys(self):
"""
Fallback for the case where is a cyclic dependency but we don't care.
"""
return self.data.keys()
class QuerySet(object): class QuerySet(object):
""" """
Represents a lazy database lookup for a set of objects. Represents a lazy database lookup for a set of objects.
@ -275,6 +182,11 @@ class QuerySet(object):
extra_select = self.query.extra_select.keys() extra_select = self.query.extra_select.keys()
aggregate_select = self.query.aggregate_select.keys() aggregate_select = self.query.aggregate_select.keys()
only_load = self.query.get_loaded_field_names()
if not fill_cache:
fields = self.model._meta.fields
pk_idx = self.model._meta.pk_index()
index_start = len(extra_select) index_start = len(extra_select)
aggregate_start = index_start + len(self.model._meta.fields) aggregate_start = index_start + len(self.model._meta.fields)
@ -282,9 +194,30 @@ class QuerySet(object):
if fill_cache: if fill_cache:
obj, _ = get_cached_row(self.model, row, obj, _ = get_cached_row(self.model, row,
index_start, max_depth, index_start, max_depth,
requested=requested, offset=len(aggregate_select)) requested=requested, offset=len(aggregate_select),
only_load=only_load)
else: else:
# omit aggregates in object creation load_fields = only_load.get(self.model)
if load_fields:
# Some fields have been deferred, so we have to initialise
# via keyword arguments.
row_data = row[index_start:aggregate_start]
pk_val = row_data[pk_idx]
skip = set()
init_list = []
for field in fields:
if field.name not in load_fields:
skip.add(field.attname)
else:
init_list.append(field.attname)
if skip:
model_cls = deferred_class_factory(self.model, pk_val,
skip)
obj = model_cls(**dict(zip(init_list, row_data)))
else:
obj = self.model(*row[index_start:aggregate_start])
else:
# Omit aggregates in object creation.
obj = self.model(*row[index_start:aggregate_start]) obj = self.model(*row[index_start:aggregate_start])
for i, k in enumerate(extra_select): for i, k in enumerate(extra_select):
@ -655,6 +588,35 @@ class QuerySet(object):
clone.query.standard_ordering = not clone.query.standard_ordering clone.query.standard_ordering = not clone.query.standard_ordering
return clone return clone
def defer(self, *fields):
"""
Defers the loading of data for certain fields until they are accessed.
The set of fields to defer is added to any existing set of deferred
fields. The only exception to this is if None is passed in as the only
parameter, in which case all deferrals are removed (None acts as a
reset option).
"""
clone = self._clone()
if fields == (None,):
clone.query.clear_deferred_loading()
else:
clone.query.add_deferred_loading(fields)
return clone
def only(self, *fields):
"""
Essentially, the opposite of defer. Only the fields passed into this
method and that are not already specified as deferred are loaded
immediately when the queryset is evaluated.
"""
if fields == [None]:
# Can only pass None to defer(), not only(), as the rest option.
# That won't stop people trying to do this, so let's be explicit.
raise TypeError("Cannot pass None as an argument to only().")
clone = self._clone()
clone.query.add_immediate_loading(fields)
return clone
################### ###################
# PRIVATE METHODS # # PRIVATE METHODS #
################### ###################
@ -757,6 +719,7 @@ class ValuesQuerySet(QuerySet):
Called by the _clone() method after initializing the rest of the Called by the _clone() method after initializing the rest of the
instance. instance.
""" """
self.query.clear_deferred_loading()
self.query.clear_select_fields() self.query.clear_select_fields()
if self._fields: if self._fields:
@ -847,9 +810,9 @@ class ValuesListQuerySet(ValuesQuerySet):
for row in self.query.results_iter(): for row in self.query.results_iter():
yield tuple(row) yield tuple(row)
else: else:
# When extra(select=...) or an annotation is involved, the extra cols are # When extra(select=...) or an annotation is involved, the extra
# always at the start of the row, and we need to reorder the fields # cols are always at the start of the row, and we need to reorder
# to match the order in self._fields. # the fields to match the order in self._fields.
extra_names = self.query.extra_select.keys() extra_names = self.query.extra_select.keys()
field_names = self.field_names field_names = self.field_names
aggregate_names = self.query.aggregate_select.keys() aggregate_names = self.query.aggregate_select.keys()
@ -884,6 +847,7 @@ class DateQuerySet(QuerySet):
Called by the _clone() method after initializing the rest of the Called by the _clone() method after initializing the rest of the
instance. instance.
""" """
self.query.clear_deferred_loading()
self.query = self.query.clone(klass=sql.DateQuery, setup=True) self.query = self.query.clone(klass=sql.DateQuery, setup=True)
self.query.select = [] self.query.select = []
field = self.model._meta.get_field(self._field_name, many_to_many=False) field = self.model._meta.get_field(self._field_name, many_to_many=False)
@ -935,7 +899,7 @@ class EmptyQuerySet(QuerySet):
def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0, def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0,
requested=None, offset=0): requested=None, offset=0, only_load=None):
""" """
Helper function that recursively returns an object with the specified Helper function that recursively returns an object with the specified
related attributes already populated. related attributes already populated.
@ -950,6 +914,23 @@ def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0,
if not [x for x in fields if x is not None]: if not [x for x in fields if x is not None]:
# If we only have a list of Nones, there was not related object. # If we only have a list of Nones, there was not related object.
obj = None obj = None
else:
load_fields = only_load and only_load.get(klass) or None
if load_fields:
# Handle deferred fields.
skip = set()
init_list = []
pk_val = fields[klass._meta.pk_index()]
for field in klass._meta.fields:
if field.name not in load_fields:
skip.add(field.name)
else:
init_list.append(field.attname)
if skip:
klass = deferred_class_factory(klass, pk_val, skip)
obj = klass(**dict(zip(init_list, fields)))
else:
obj = klass(*fields)
else: else:
obj = klass(*fields) obj = klass(*fields)
index_end += offset index_end += offset

View File

@ -1,13 +1,115 @@
""" """
Various data structures used in query construction. Various data structures used in query construction.
Factored out from django.db.models.query so that they can also be used by other Factored out from django.db.models.query to avoid making the main module very
modules without getting into circular import difficulties. large and/or so that they can be used by other modules without getting into
circular import difficulties.
""" """
import weakref
from copy import deepcopy from copy import deepcopy
from django.utils import tree from django.utils import tree
from django.utils.datastructures import SortedDict
try:
sorted
except NameError:
from django.utils.itercompat import sorted # For Python 2.3.
class CyclicDependency(Exception):
"""
An error when dealing with a collection of objects that have a cyclic
dependency, i.e. when deleting multiple objects.
"""
pass
class CollectedObjects(object):
"""
A container that stores keys and lists of values along with remembering the
parent objects for all the keys.
This is used for the database object deletion routines so that we can
calculate the 'leaf' objects which should be deleted first.
"""
def __init__(self):
self.data = {}
self.children = {}
def add(self, model, pk, obj, parent_model, nullable=False):
"""
Adds an item to the container.
Arguments:
* model - the class of the object being added.
* pk - the primary key.
* obj - the object itself.
* parent_model - the model of the parent object that this object was
reached through.
* nullable - should be True if this relation is nullable.
Returns True if the item already existed in the structure and
False otherwise.
"""
d = self.data.setdefault(model, SortedDict())
retval = pk in d
d[pk] = obj
# Nullable relationships can be ignored -- they are nulled out before
# deleting, and therefore do not affect the order in which objects
# have to be deleted.
if parent_model is not None and not nullable:
self.children.setdefault(parent_model, []).append(model)
return retval
def __contains__(self, key):
return self.data.__contains__(key)
def __getitem__(self, key):
return self.data[key]
def __nonzero__(self):
return bool(self.data)
def iteritems(self):
for k in self.ordered_keys():
yield k, self[k]
def items(self):
return list(self.iteritems())
def keys(self):
return self.ordered_keys()
def ordered_keys(self):
"""
Returns the models in the order that they should be dealt with (i.e.
models with no dependencies first).
"""
dealt_with = SortedDict()
# Start with items that have no children
models = self.data.keys()
while len(dealt_with) < len(models):
found = False
for model in models:
if model in dealt_with:
continue
children = self.children.setdefault(model, [])
if len([c for c in children if c not in dealt_with]) == 0:
dealt_with[model] = None
found = True
if not found:
raise CyclicDependency(
"There is a cyclic dependency of items to be processed.")
return dealt_with.keys()
def unordered_keys(self):
"""
Fallback for the case where is a cyclic dependency but we don't care.
"""
return self.data.keys()
class QueryWrapper(object): class QueryWrapper(object):
""" """
@ -51,6 +153,39 @@ class Q(tree.Node):
obj.negate() obj.negate()
return obj return obj
class DeferredAttribute(object):
"""
A wrapper for a deferred-loading field. When the value is read from this
object the first time, the query is executed.
"""
def __init__(self, field_name, pk_value, model):
self.field_name = field_name
self.pk_value = pk_value
self.model_ref = weakref.ref(model)
self.loaded = False
def __get__(self, instance, owner):
"""
Retrieves and caches the value from the datastore on the first lookup.
Returns the cached value.
"""
assert instance is not None
if not self.loaded:
obj = self.model_ref()
if obj is None:
return
self.value = list(obj._base_manager.filter(pk=self.pk_value).values_list(self.field_name, flat=True))[0]
self.loaded = True
return self.value
def __set__(self, name, value):
"""
Deferred loading attributes can be set normally (which means there will
never be a database lookup involved.
"""
self.value = value
self.loaded = True
def select_related_descend(field, restricted, requested): def select_related_descend(field, restricted, requested):
""" """
Returns True if this field should be used to descend deeper for Returns True if this field should be used to descend deeper for
@ -67,3 +202,35 @@ def select_related_descend(field, restricted, requested):
if not restricted and field.null: if not restricted and field.null:
return False return False
return True return True
# This function is needed because data descriptors must be defined on a class
# object, not an instance, to have any effect.
def deferred_class_factory(model, pk_value, attrs):
"""
Returns a class object that is a copy of "model" with the specified "attrs"
being replaced with DeferredAttribute objects. The "pk_value" ties the
deferred attributes to a particular instance of the model.
"""
class Meta:
pass
setattr(Meta, "proxy", True)
setattr(Meta, "app_label", model._meta.app_label)
# The app_cache wants a unique name for each model, otherwise the new class
# won't be created (we get an old one back). Therefore, we generate the
# name using the passed in attrs. It's OK to reuse an old case if the attrs
# are identical.
name = "%s_Deferred_%s" % (model.__name__, '_'.join(sorted(list(attrs))))
overrides = dict([(attr, DeferredAttribute(attr, pk_value, model))
for attr in attrs])
overrides["Meta"] = Meta
overrides["__module__"] = model.__module__
overrides["_deferred"] = True
return type(name, (model,), overrides)
# The above function is also used to unpickle model instances with deferred
# fields.
deferred_class_factory.__safe_for_unpickling__ = True

View File

@ -94,6 +94,11 @@ class BaseQuery(object):
self.extra_params = () self.extra_params = ()
self.extra_order_by = () self.extra_order_by = ()
# A tuple that is a set of model field names and either True, if these
# are the fields to defer, or False if these are the only fields to
# load.
self.deferred_loading = (set(), True)
def __str__(self): def __str__(self):
""" """
Returns the query as a string of SQL with the parameter values Returns the query as a string of SQL with the parameter values
@ -206,6 +211,7 @@ class BaseQuery(object):
obj.extra_where = self.extra_where obj.extra_where = self.extra_where
obj.extra_params = self.extra_params obj.extra_params = self.extra_params
obj.extra_order_by = self.extra_order_by obj.extra_order_by = self.extra_order_by
obj.deferred_loading = deepcopy(self.deferred_loading)
if self.filter_is_sticky and self.used_aliases: if self.filter_is_sticky and self.used_aliases:
obj.used_aliases = self.used_aliases.copy() obj.used_aliases = self.used_aliases.copy()
else: else:
@ -550,9 +556,101 @@ class BaseQuery(object):
if self.select_related and not self.related_select_cols: if self.select_related and not self.related_select_cols:
self.fill_related_selections() self.fill_related_selections()
def deferred_to_data(self, target, callback):
"""
Converts the self.deferred_loading data structure to an alternate data
structure, describing the field that *will* be loaded. This is used to
compute the columns to select from the database and also by the
QuerySet class to work out which fields are being initialised on each
model. Models that have all their fields included aren't mentioned in
the result, only those that have field restrictions in place.
The "target" parameter is the instance that is populated (in place).
The "callback" is a function that is called whenever a (model, field)
pair need to be added to "target". It accepts three parameters:
"target", and the model and list of fields being added for that model.
"""
field_names, defer = self.deferred_loading
if not field_names:
return
columns = set()
cur_model = self.model
opts = cur_model._meta
seen = {}
must_include = {cur_model: set([opts.pk])}
for field_name in field_names:
parts = field_name.split(LOOKUP_SEP)
for name in parts[:-1]:
old_model = cur_model
source = opts.get_field_by_name(name)[0]
cur_model = opts.get_field_by_name(name)[0].rel.to
opts = cur_model._meta
# Even if we're "just passing through" this model, we must add
# both the current model's pk and the related reference field
# to the things we select.
must_include[old_model].add(source)
add_to_dict(must_include, cur_model, opts.pk)
field, model, _, _ = opts.get_field_by_name(parts[-1])
if model is None:
model = cur_model
add_to_dict(seen, model, field)
if defer:
# We need to load all fields for each model, except those that
# appear in "seen" (for all models that appear in "seen"). The only
# slight complexity here is handling fields that exist on parent
# models.
workset = {}
for model, values in seen.iteritems():
for field, f_model in model._meta.get_fields_with_model():
if field in values:
continue
add_to_dict(workset, f_model or model, field)
for model, values in must_include.iteritems():
# If we haven't included a model in workset, we don't add the
# corresponding must_include fields for that model, since an
# empty set means "include all fields". That's why there's no
# "else" branch here.
if model in workset:
workset[model].update(values)
for model, values in workset.iteritems():
callback(target, model, values)
else:
for model, values in must_include.iteritems():
if model in seen:
seen[model].update(values)
else:
# As we've passed through this model, but not explicitly
# included any fields, we have to make sure it's mentioned
# so that only the "must include" fields are pulled in.
seen[model] = values
for model, values in seen.iteritems():
callback(target, model, values)
def deferred_to_columns(self):
"""
Converts the self.deferred_loading data structure to mapping of table
names to sets of column names which are to be loaded. Returns the
dictionary.
"""
columns = {}
self.deferred_to_data(columns, self.deferred_to_columns_cb)
return columns
def deferred_to_columns_cb(self, target, model, fields):
"""
Callback used by deferred_to_columns(). The "target" parameter should
be a set instance.
"""
table = model._meta.db_table
if table not in target:
target[table] = set()
for field in fields:
target[table].add(field.column)
def get_columns(self, with_aliases=False): def get_columns(self, with_aliases=False):
""" """
Return the list of columns to use in the select statement. If no Returns the list of columns to use in the select statement. If no
columns have been specified, returns all columns relating to fields in columns have been specified, returns all columns relating to fields in
the model. the model.
@ -569,9 +667,14 @@ class BaseQuery(object):
else: else:
col_aliases = set() col_aliases = set()
if self.select: if self.select:
only_load = self.deferred_to_columns()
for col in self.select: for col in self.select:
if isinstance(col, (list, tuple)): if isinstance(col, (list, tuple)):
r = '%s.%s' % (qn(col[0]), qn(col[1])) alias, column = col
table = self.alias_map[alias][TABLE_NAME]
if table in only_load and col not in only_load[table]:
continue
r = '%s.%s' % (qn(alias), qn(column))
if with_aliases: if with_aliases:
if col[1] in col_aliases: if col[1] in col_aliases:
c_alias = 'Col%d' % len(col_aliases) c_alias = 'Col%d' % len(col_aliases)
@ -641,6 +744,7 @@ class BaseQuery(object):
qn = self.quote_name_unless_alias qn = self.quote_name_unless_alias
qn2 = self.connection.ops.quote_name qn2 = self.connection.ops.quote_name
aliases = set() aliases = set()
only_load = self.deferred_to_columns()
proxied_model = opts.proxy and opts.proxy_for_model or 0 proxied_model = opts.proxy and opts.proxy_for_model or 0
if start_alias: if start_alias:
seen = {None: start_alias} seen = {None: start_alias}
@ -661,6 +765,9 @@ class BaseQuery(object):
# aliases will have already been set up in pre_sql_setup(), so # aliases will have already been set up in pre_sql_setup(), so
# we can save time here. # we can save time here.
alias = self.included_inherited_models[model] alias = self.included_inherited_models[model]
table = self.alias_map[alias][TABLE_NAME]
if table in only_load and field.column not in only_load[table]:
continue
if as_pairs: if as_pairs:
result.append((alias, field.column)) result.append((alias, field.column))
continue continue
@ -2014,6 +2121,70 @@ class BaseQuery(object):
if order_by: if order_by:
self.extra_order_by = order_by self.extra_order_by = order_by
def clear_deferred_loading(self):
"""
Remove any fields from the deferred loading set.
"""
self.deferred_loading = (set(), True)
def add_deferred_loading(self, field_names):
"""
Add the given list of model field names to the set of fields to
exclude from loading from the database when automatic column selection
is done. The new field names are added to any existing field names that
are deferred (or removed from any existing field names that are marked
as the only ones for immediate loading).
"""
# Fields on related models are stored in the literal double-underscore
# format, so that we can use a set datastructure. We do the foo__bar
# splitting and handling when computing the SQL colum names (as part of
# get_columns()).
existing, defer = self.deferred_loading
if defer:
# Add to existing deferred names.
self.deferred_loading = existing.union(field_names), True
else:
# Remove names from the set of any existing "immediate load" names.
self.deferred_loading = existing.difference(field_names), False
def add_immediate_loading(self, field_names):
"""
Add the given list of model field names to the set of fields to
retrieve when the SQL is executed ("immediate loading" fields). The
field names replace any existing immediate loading field names. If
there are field names already specified for deferred loading, those
names are removed from the new field_names before storing the new names
for immediate loading. (That is, immediate loading overrides any
existing immediate values, but respects existing deferrals.)
"""
existing, defer = self.deferred_loading
if defer:
# Remove any existing deferred names from the current set before
# setting the new names.
self.deferred_loading = set(field_names).difference(existing), False
else:
# Replace any existing "immediate load" field names.
self.deferred_loading = set(field_names), False
def get_loaded_field_names(self):
"""
If any fields are marked to be deferred, returns a dictionary mapping
models to a set of names in those fields that will be loaded. If a
model is not in the returned dictionary, none of it's fields are
deferred.
If no fields are marked for deferral, returns an empty dictionary.
"""
collection = {}
self.deferred_to_data(collection, self.get_loaded_field_names_cb)
return collection
def get_loaded_field_names_cb(self, target, model, fields):
"""
Callback used by get_deferred_field_names().
"""
target[model] = set([f.name for f in fields])
def trim_extra_select(self, names): def trim_extra_select(self, names):
""" """
Removes any aliases in the extra_select dictionary that aren't in Removes any aliases in the extra_select dictionary that aren't in
@ -2180,3 +2351,13 @@ def setup_join_cache(sender, **kwargs):
signals.class_prepared.connect(setup_join_cache) signals.class_prepared.connect(setup_join_cache)
def add_to_dict(data, key, value):
"""
A helper function to add "value" to the set of values for "key", whether or
not "key" already exists.
"""
if key in data:
data[key].add(value)
else:
data[key] = set([value])

View File

@ -768,6 +768,101 @@ of the arguments is required, but you should use at least one of them.
Entry.objects.extra(where=['headline=%s'], params=['Lennon']) Entry.objects.extra(where=['headline=%s'], params=['Lennon'])
``defer(*fields)``
~~~~~~~~~~~~~~~~~~
.. versionadded:: 1.1
In some complex data-modeling situations, your models might contain a lot of
fields, some of which could contain a lot of data (for example, text fields),
or require expensive processing to convert them to Python objects. If you are
using the results of a queryset in some situation where you know you don't
need those particular fields, you can tell Django not to retrieve them from
the database.
This is done by passing the names of the fields to not load to ``defer()``::
Entry.objects.defer("lede", "body")
A queryset that has deferred fields will still return model instances. Each
deferred field will be retrieved from the database if you access that field
(one at a time, not all the deferred fields at once).
You can make multiple calls to ``defer()``. Each call adds new fields to the
deferred set::
# Defers both the body and lede fields.
Entry.objects.defer("body").filter(headline="Lennon").defer("lede")
The order in which fields are added to the deferred set does not matter. Calling ``defer()`` with a field name that has already been deferred is harmless (the field will still be deferred).
You can defer loading of fields in related models (if the related models are
loading via ``select_related()``) by using the standard double-underscore
notation to separate related fields::
Blog.objects.select_related().defer("entry__lede", "entry__body")
If you want to clear the set of deferred fields, pass ``None`` as a parameter
to ``defer()``::
# Load all fields immediately.
my_queryset.defer(None)
Some fields in a model won't be deferred, even if you ask for them. You can
never defer the loading of the primary key. If you are using
``select_related()`` to retrieve other models at the same time you shouldn't
defer the loading of the field that connects from the primary model to the
related one (at the moment, that doesn't raise an error, but it will
eventually).
.. note::
The ``defer()`` method (and its cousin, ``only()``, below) are only for
advanced use-cases. They provide an optimization for when you have
analyzed your queries closely and understand *exactly* what information
you need and have measured that the difference between returning the
fields you need and the full set of fields for the model will be
significant. When you are initially developing your applications, don't
bother using ``defer()``; leave it until your query construction has
settled down and you understand where the hot-points are.
``only(*fields)``
~~~~~~~~~~~~~~~~~~
.. versionadded:: 1.1
The ``only()`` method is more or less the opposite of ``defer()``. You
call it with the fields that should *not* be deferred when retrieving a model.
If you have a model where almost all the fields need to be deferred, using
``only()`` to specify the complementary set of fields could result in simpler
code.
If you have a model with fields ``name``, ``age`` and ``biography``, the
following two querysets are the same, in terms of deferred fields::
Person.objects.defer("age", "biography")
Person.objects.only("name")
Whenever you call ``only()`` it *replaces* the set of fields to load
immediately. The method's name is mnemonic: **only** those fields are loaded
immediately; the remainder are deferred. Thus, successive calls to ``only()``
result in only the final fields being considered::
# This will defer all fields except the headline.
Entry.objects.only("body", "lede").only("headline")
Since ``defer()`` acts incrementally (adding fields to the deferred list), you
can combine calls to ``only()`` and ``defer()`` and things will behave
logically::
# Final result is that everything except "headline" is deferred.
Entry.objects.only("headline", "body").defer("body")
# Final result loads headline and body immediately (only() replaces any
# existing set of fields).
Entry.objects.defer("body").only("headline", "body")
QuerySet methods that do not return QuerySets QuerySet methods that do not return QuerySets
--------------------------------------------- ---------------------------------------------

View File

View File

@ -0,0 +1,89 @@
"""
Tests for defer() and only().
"""
from django.db import models
from django.db.models.query_utils import DeferredAttribute
class Secondary(models.Model):
first = models.CharField(max_length=50)
second = models.CharField(max_length=50)
class Primary(models.Model):
name = models.CharField(max_length=50)
value = models.CharField(max_length=50)
related = models.ForeignKey(Secondary)
def count_delayed_fields(obj, debug=False):
"""
Returns the number of delayed attributes on the given model instance.
"""
count = 0
for field in obj._meta.fields:
if isinstance(obj.__class__.__dict__.get(field.attname),
DeferredAttribute):
if debug:
print field.name, field.attname
count += 1
return count
__test__ = {"API_TEST": """
To all outward appearances, instances with deferred fields look the same as
normal instances when we examine attribut values. Therefore we test for the
number of deferred fields on returned instances (by poking at the internals),
as a way to observe what is going on.
>>> s1 = Secondary.objects.create(first="x1", second="y1")
>>> p1 = Primary.objects.create(name="p1", value="xx", related=s1)
>>> qs = Primary.objects.all()
>>> count_delayed_fields(qs.defer('name')[0])
1
>>> count_delayed_fields(qs.only('name')[0])
2
>>> count_delayed_fields(qs.defer('related__first')[0])
0
>>> obj = qs.select_related().only('related__first')[0]
>>> count_delayed_fields(obj)
2
>>> obj.related_id == s1.pk
True
>>> count_delayed_fields(qs.defer('name').extra(select={'a': 1})[0])
1
>>> count_delayed_fields(qs.extra(select={'a': 1}).defer('name')[0])
1
>>> count_delayed_fields(qs.defer('name').defer('value')[0])
2
>>> count_delayed_fields(qs.only('name').only('value')[0])
2
>>> count_delayed_fields(qs.only('name').defer('value')[0])
2
>>> count_delayed_fields(qs.only('name', 'value').defer('value')[0])
2
>>> count_delayed_fields(qs.defer('name').only('value')[0])
2
>>> obj = qs.only()[0]
>>> count_delayed_fields(qs.defer(None)[0])
0
>>> count_delayed_fields(qs.only('name').defer(None)[0])
0
User values() won't defer anything (you get the full list of dictionaries
back), but it still works.
>>> qs.defer('name').values()[0] == {'id': p1.id, 'name': u'p1', 'value': 'xx', 'related_id': s1.id}
True
>>> qs.only('name').values()[0] == {'id': p1.id, 'name': u'p1', 'value': 'xx', 'related_id': s1.id}
True
Using defer() and only() with get() is also valid.
>>> count_delayed_fields(qs.defer('name').get(pk=p1.pk))
1
>>> count_delayed_fields(qs.only('name').get(pk=p1.pk))
2
# KNOWN NOT TO WORK: >>> count_delayed_fields(qs.only('name').select_related('related')[0])
# KNOWN NOT TO WORK >>> count_delayed_fields(qs.defer('related').select_related('related')[0])
"""}

View File

@ -890,6 +890,12 @@ unpickling.
>>> query2.as_sql()[0] == query >>> query2.as_sql()[0] == query
True True
Check pickling of deferred-loading querysets
>>> qs = Item.objects.defer('name', 'creator')
>>> q2 = pickle.loads(pickle.dumps(qs))
>>> list(qs) == list(q2)
True
Bug #7277 Bug #7277
>>> n1.annotation_set.filter(Q(tag=t5) | Q(tag__children=t5) | Q(tag__children__children=t5)) >>> n1.annotation_set.filter(Q(tag=t5) | Q(tag__children=t5) | Q(tag__children__children=t5))
[<Annotation: a1>] [<Annotation: a1>]