Fixed #5420 -- Added support for delayed loading of model fields.

In extreme cases, some fields are expensive to load from the database (e.g. GIS fields requiring conversion, or large text fields). This commit adds defer() and only() methods to querysets that allow the caller to specify which fields should not be loaded unless they are accessed. git-svn-id: http://code.djangoproject.com/svn/django/trunk@10090 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2009-03-19 09:06:04 +00:00 · 2009-03-19 09:06:04 +00:00 · 29050ef999
parent 96d5d434fa
commit 29050ef999
10 changed files with 685 additions and 111 deletions
--- a/django/db/models/base.py
+++ b/django/db/models/base.py
@ -12,7 +12,8 @@ import django.db.models.manager     # Imported to register signal handler.
 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError
 from django.db.models.fields import AutoField, FieldDoesNotExist
 from django.db.models.fields.related import OneToOneRel, ManyToOneRel, OneToOneField
-from django.db.models.query import delete_objects, Q, CollectedObjects
+from django.db.models.query import delete_objects, Q
 from django.db.models.query_utils import CollectedObjects, DeferredAttribute
 from django.db.models.options import Options
 from django.db import connection, transaction, DatabaseError
 from django.db.models import signals
@ -235,6 +236,7 @@ class ModelBase(type):
 class Model(object):
    __metaclass__ = ModelBase
    _deferred = False
    def __init__(self, *args, **kwargs):
        signals.pre_init.send(sender=self.__class__, args=args, kwargs=kwargs)
@ -271,6 +273,13 @@ class Model(object):
        for field in fields_iter:
            is_related_object = False
            if kwargs:
                # This slightly odd construct is so that we can access any
                # data-descriptor object (DeferredAttribute) without triggering
                # its __get__ method.
                if (field.attname not in kwargs and
                        isinstance(self.__class__.__dict__.get(field.attname), DeferredAttribute)):
                    # This field will be populated on request.
                    continue
                if isinstance(field.rel, ManyToOneRel):
                    try:
                        # Assume object instance was passed in.
@ -332,6 +341,31 @@ class Model(object):
    def __hash__(self):
        return hash(self._get_pk_val())
    def __reduce__(self):
        """
        Provide pickling support. Normally, this just dispatches to Python's
        standard handling. However, for models with deferred field loading, we
        need to do things manually, as they're dynamically created classes and
        only module-level classes can be pickled by the default path.
        """
        if not self._deferred:
            return super(Model, self).__reduce__()
        data = self.__dict__
        defers = []
        pk_val = None
        for field in self._meta.fields:
            if isinstance(self.__class__.__dict__.get(field.attname),
                    DeferredAttribute):
                defers.append(field.attname)
                if pk_val is None:
                    # The pk_val and model values are the same for all
                    # DeferredAttribute classes, so we only need to do this
                    # once.
                    obj = self.__class__.__dict__[field.attname]
                    pk_val = obj.pk_value
                    model = obj.model_ref()
        return (model_unpickle, (model, pk_val, defers), data)
    def _get_pk_val(self, meta=None):
        if not meta:
            meta = self._meta
@ -591,6 +625,15 @@ def get_absolute_url(opts, func, self, *args, **kwargs):
 class Empty(object):
    pass
 def model_unpickle(model, pk_val, attrs):
    """
    Used to unpickle Model subclasses with deferred fields.
    """
    from django.db.models.query_utils import deferred_class_factory
    cls = deferred_class_factory(model, pk_val, attrs)
    return cls.__new__(cls)
 model_unpickle.__safe_for_unpickle__ = True
 if sys.version_info < (2, 5):
    # Prior to Python 2.5, Exception was an old-style class
    def subclass_exception(name, parent, unused):
--- a/django/db/models/manager.py
+++ b/django/db/models/manager.py
@ -167,6 +167,12 @@ class Manager(object):
    def reverse(self, *args, **kwargs):
        return self.get_query_set().reverse(*args, **kwargs)
    def defer(self, *args, **kwargs):
        return self.get_query_set().defer(*args, **kwargs)
    def only(self, *args, **kwargs):
        return self.get_query_set().only(*args, **kwargs)
    def _insert(self, values, **kwargs):
        return insert_query(self.model, values, **kwargs)
--- a/django/db/models/options.py
+++ b/django/db/models/options.py
@ -477,3 +477,9 @@ class Options(object):
            self._ordered_objects = objects
        return self._ordered_objects
    def pk_index(self):
        """
        Returns the index of the primary key field in the self.fields list.
        """
        return self.fields.index(self.pk)
--- a/django/db/models/query.py
+++ b/django/db/models/query.py
@ -1,3 +1,7 @@
 """
 The main QuerySet implementation. This provides the public API for the ORM.
 """
 try:
    set
 except NameError:
@ -6,9 +10,8 @@ except NameError:
 from django.db import connection, transaction, IntegrityError
 from django.db.models.aggregates import Aggregate
 from django.db.models.fields import DateField
-from django.db.models.query_utils import Q, select_related_descend
+from django.db.models.query_utils import Q, select_related_descend, CollectedObjects, CyclicDependency, deferred_class_factory
 from django.db.models import signals, sql
 from django.utils.datastructures import SortedDict
 # Used to control how many objects are worked with at once in some cases (e.g.
@ -22,102 +25,6 @@ REPR_OUTPUT_SIZE = 20
 # Pull into this namespace for backwards compatibility.
 EmptyResultSet = sql.EmptyResultSet
 class CyclicDependency(Exception):
    """
    An error when dealing with a collection of objects that have a cyclic
    dependency, i.e. when deleting multiple objects.
    """
    pass
 class CollectedObjects(object):
    """
    A container that stores keys and lists of values along with remembering the
    parent objects for all the keys.
    This is used for the database object deletion routines so that we can
    calculate the 'leaf' objects which should be deleted first.
    """
    def __init__(self):
        self.data = {}
        self.children = {}
    def add(self, model, pk, obj, parent_model, nullable=False):
        """
        Adds an item to the container.
        Arguments:
        * model - the class of the object being added.
        * pk - the primary key.
        * obj - the object itself.
        * parent_model - the model of the parent object that this object was
          reached through.
        * nullable - should be True if this relation is nullable.
        Returns True if the item already existed in the structure and
        False otherwise.
        """
        d = self.data.setdefault(model, SortedDict())
        retval = pk in d
        d[pk] = obj
        # Nullable relationships can be ignored -- they are nulled out before
        # deleting, and therefore do not affect the order in which objects
        # have to be deleted.
        if parent_model is not None and not nullable:
            self.children.setdefault(parent_model, []).append(model)
        return retval
    def __contains__(self, key):
        return self.data.__contains__(key)
    def __getitem__(self, key):
        return self.data[key]
    def __nonzero__(self):
        return bool(self.data)
    def iteritems(self):
        for k in self.ordered_keys():
            yield k, self[k]
    def items(self):
        return list(self.iteritems())
    def keys(self):
        return self.ordered_keys()
    def ordered_keys(self):
        """
        Returns the models in the order that they should be dealt with (i.e.
        models with no dependencies first).
        """
        dealt_with = SortedDict()
        # Start with items that have no children
        models = self.data.keys()
        while len(dealt_with) < len(models):
            found = False
            for model in models:
                if model in dealt_with:
                    continue
                children = self.children.setdefault(model, [])
                if len([c for c in children if c not in dealt_with]) == 0:
                    dealt_with[model] = None
                    found = True
            if not found:
                raise CyclicDependency(
                    "There is a cyclic dependency of items to be processed.")
        return dealt_with.keys()
    def unordered_keys(self):
        """
        Fallback for the case where is a cyclic dependency but we don't  care.
        """
        return self.data.keys()
 class QuerySet(object):
    """
    Represents a lazy database lookup for a set of objects.
@ -275,6 +182,11 @@ class QuerySet(object):
        extra_select = self.query.extra_select.keys()
        aggregate_select = self.query.aggregate_select.keys()
        only_load = self.query.get_loaded_field_names()
        if not fill_cache:
            fields = self.model._meta.fields
            pk_idx = self.model._meta.pk_index()
        index_start = len(extra_select)
        aggregate_start = index_start + len(self.model._meta.fields)
@ -282,10 +194,31 @@ class QuerySet(object):
            if fill_cache:
                obj, _ = get_cached_row(self.model, row,
                            index_start, max_depth,
-                            requested=requested, offset=len(aggregate_select))
+                            requested=requested, offset=len(aggregate_select),
                            only_load=only_load)
            else:
-                # omit aggregates in object creation
+                load_fields = only_load.get(self.model)
-                obj = self.model(*row[index_start:aggregate_start])
+                if load_fields:
                    # Some fields have been deferred, so we have to initialise
                    # via keyword arguments.
                    row_data = row[index_start:aggregate_start]
                    pk_val = row_data[pk_idx]
                    skip = set()
                    init_list = []
                    for field in fields:
                        if field.name not in load_fields:
                            skip.add(field.attname)
                        else:
                            init_list.append(field.attname)
                    if skip:
                        model_cls = deferred_class_factory(self.model, pk_val,
                                skip)
                        obj = model_cls(**dict(zip(init_list, row_data)))
                    else:
                        obj = self.model(*row[index_start:aggregate_start])
                else:
                    # Omit aggregates in object creation.
                    obj = self.model(*row[index_start:aggregate_start])
            for i, k in enumerate(extra_select):
                setattr(obj, k, row[i])
@ -655,6 +588,35 @@ class QuerySet(object):
        clone.query.standard_ordering = not clone.query.standard_ordering
        return clone
    def defer(self, *fields):
        """
        Defers the loading of data for certain fields until they are accessed.
        The set of fields to defer is added to any existing set of deferred
        fields. The only exception to this is if None is passed in as the only
        parameter, in which case all deferrals are removed (None acts as a
        reset option).
        """
        clone = self._clone()
        if fields == (None,):
            clone.query.clear_deferred_loading()
        else:
            clone.query.add_deferred_loading(fields)
        return clone
    def only(self, *fields):
        """
        Essentially, the opposite of defer. Only the fields passed into this
        method and that are not already specified as deferred are loaded
        immediately when the queryset is evaluated.
        """
        if fields == [None]:
            # Can only pass None to defer(), not only(), as the rest option.
            # That won't stop people trying to do this, so let's be explicit.
            raise TypeError("Cannot pass None as an argument to only().")
        clone = self._clone()
        clone.query.add_immediate_loading(fields)
        return clone
    ###################
    # PRIVATE METHODS #
    ###################
@ -757,6 +719,7 @@ class ValuesQuerySet(QuerySet):
        Called by the _clone() method after initializing the rest of the
        instance.
        """
        self.query.clear_deferred_loading()
        self.query.clear_select_fields()
        if self._fields:
@ -847,9 +810,9 @@ class ValuesListQuerySet(ValuesQuerySet):
            for row in self.query.results_iter():
                yield tuple(row)
        else:
-            # When extra(select=...) or an annotation is involved, the extra cols are
+            # When extra(select=...) or an annotation is involved, the extra
-            # always at the start of the row, and we need to reorder the fields
+            # cols are always at the start of the row, and we need to reorder
-            # to match the order in self._fields.
+            # the fields to match the order in self._fields.
            extra_names = self.query.extra_select.keys()
            field_names = self.field_names
            aggregate_names = self.query.aggregate_select.keys()
@ -884,6 +847,7 @@ class DateQuerySet(QuerySet):
        Called by the _clone() method after initializing the rest of the
        instance.
        """
        self.query.clear_deferred_loading()
        self.query = self.query.clone(klass=sql.DateQuery, setup=True)
        self.query.select = []
        field = self.model._meta.get_field(self._field_name, many_to_many=False)
@ -935,7 +899,7 @@ class EmptyQuerySet(QuerySet):
 def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0,
-                   requested=None, offset=0):
+                   requested=None, offset=0, only_load=None):
    """
    Helper function that recursively returns an object with the specified
    related attributes already populated.
@ -951,7 +915,24 @@ def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0,
        # If we only have a list of Nones, there was not related object.
        obj = None
    else:
-        obj = klass(*fields)
+        load_fields = only_load and only_load.get(klass) or None
        if load_fields:
            # Handle deferred fields.
            skip = set()
            init_list = []
            pk_val = fields[klass._meta.pk_index()]
            for field in klass._meta.fields:
                if field.name not in load_fields:
                    skip.add(field.name)
                else:
                    init_list.append(field.attname)
            if skip:
                klass = deferred_class_factory(klass, pk_val, skip)
                obj = klass(**dict(zip(init_list, fields)))
            else:
                obj = klass(*fields)
        else:
            obj = klass(*fields)
    index_end += offset
    for f in klass._meta.fields:
        if not select_related_descend(f, restricted, requested):
--- a/django/db/models/query_utils.py
+++ b/django/db/models/query_utils.py
@ -1,13 +1,115 @@
 """
 Various data structures used in query construction.
-Factored out from django.db.models.query so that they can also be used by other
+Factored out from django.db.models.query to avoid making the main module very
-modules without getting into circular import difficulties.
+large and/or so that they can be used by other modules without getting into
 circular import difficulties.
 """
 import weakref
 from copy import deepcopy
 from django.utils import tree
 from django.utils.datastructures import SortedDict
 try:
    sorted
 except NameError:
    from django.utils.itercompat import sorted  # For Python 2.3.
 class CyclicDependency(Exception):
    """
    An error when dealing with a collection of objects that have a cyclic
    dependency, i.e. when deleting multiple objects.
    """
    pass
 class CollectedObjects(object):
    """
    A container that stores keys and lists of values along with remembering the
    parent objects for all the keys.
    This is used for the database object deletion routines so that we can
    calculate the 'leaf' objects which should be deleted first.
    """
    def __init__(self):
        self.data = {}
        self.children = {}
    def add(self, model, pk, obj, parent_model, nullable=False):
        """
        Adds an item to the container.
        Arguments:
        * model - the class of the object being added.
        * pk - the primary key.
        * obj - the object itself.
        * parent_model - the model of the parent object that this object was
          reached through.
        * nullable - should be True if this relation is nullable.
        Returns True if the item already existed in the structure and
        False otherwise.
        """
        d = self.data.setdefault(model, SortedDict())
        retval = pk in d
        d[pk] = obj
        # Nullable relationships can be ignored -- they are nulled out before
        # deleting, and therefore do not affect the order in which objects
        # have to be deleted.
        if parent_model is not None and not nullable:
            self.children.setdefault(parent_model, []).append(model)
        return retval
    def __contains__(self, key):
        return self.data.__contains__(key)
    def __getitem__(self, key):
        return self.data[key]
    def __nonzero__(self):
        return bool(self.data)
    def iteritems(self):
        for k in self.ordered_keys():
            yield k, self[k]
    def items(self):
        return list(self.iteritems())
    def keys(self):
        return self.ordered_keys()
    def ordered_keys(self):
        """
        Returns the models in the order that they should be dealt with (i.e.
        models with no dependencies first).
        """
        dealt_with = SortedDict()
        # Start with items that have no children
        models = self.data.keys()
        while len(dealt_with) < len(models):
            found = False
            for model in models:
                if model in dealt_with:
                    continue
                children = self.children.setdefault(model, [])
                if len([c for c in children if c not in dealt_with]) == 0:
                    dealt_with[model] = None
                    found = True
            if not found:
                raise CyclicDependency(
                    "There is a cyclic dependency of items to be processed.")
        return dealt_with.keys()
    def unordered_keys(self):
        """
        Fallback for the case where is a cyclic dependency but we don't  care.
        """
        return self.data.keys()
 class QueryWrapper(object):
    """
@ -51,6 +153,39 @@ class Q(tree.Node):
        obj.negate()
        return obj
 class DeferredAttribute(object):
    """
    A wrapper for a deferred-loading field. When the value is read from this
    object the first time, the query is executed.
    """
    def __init__(self, field_name, pk_value, model):
        self.field_name = field_name
        self.pk_value = pk_value
        self.model_ref = weakref.ref(model)
        self.loaded = False
    def __get__(self, instance, owner):
        """
        Retrieves and caches the value from the datastore on the first lookup.
        Returns the cached value.
        """
        assert instance is not None
        if not self.loaded:
            obj = self.model_ref()
            if obj is None:
                return
            self.value = list(obj._base_manager.filter(pk=self.pk_value).values_list(self.field_name, flat=True))[0]
            self.loaded = True
        return self.value
    def __set__(self, name, value):
        """
        Deferred loading attributes can be set normally (which means there will
        never be a database lookup involved.
        """
        self.value = value
        self.loaded = True
 def select_related_descend(field, restricted, requested):
    """
    Returns True if this field should be used to descend deeper for
@ -67,3 +202,35 @@ def select_related_descend(field, restricted, requested):
    if not restricted and field.null:
        return False
    return True
 # This function is needed because data descriptors must be defined on a class
 # object, not an instance, to have any effect.
 def deferred_class_factory(model, pk_value, attrs):
    """
    Returns a class object that is a copy of "model" with the specified "attrs"
    being replaced with DeferredAttribute objects. The "pk_value" ties the
    deferred attributes to a particular instance of the model.
    """
    class Meta:
        pass
    setattr(Meta, "proxy", True)
    setattr(Meta, "app_label", model._meta.app_label)
    # The app_cache wants a unique name for each model, otherwise the new class
    # won't be created (we get an old one back). Therefore, we generate the
    # name using the passed in attrs. It's OK to reuse an old case if the attrs
    # are identical.
    name = "%s_Deferred_%s" % (model.__name__, '_'.join(sorted(list(attrs))))
    overrides = dict([(attr, DeferredAttribute(attr, pk_value, model))
            for attr in attrs])
    overrides["Meta"] = Meta
    overrides["__module__"] = model.__module__
    overrides["_deferred"] = True
    return type(name, (model,), overrides)
 # The above function is also used to unpickle model instances with deferred
 # fields.
 deferred_class_factory.__safe_for_unpickling__ = True
--- a/django/db/models/sql/query.py
+++ b/django/db/models/sql/query.py
@ -94,6 +94,11 @@ class BaseQuery(object):
        self.extra_params = ()
        self.extra_order_by = ()
        # A tuple that is a set of model field names and either True, if these
        # are the fields to defer, or False if these are the only fields to
        # load.
        self.deferred_loading = (set(), True)
    def __str__(self):
        """
        Returns the query as a string of SQL with the parameter values
@ -206,6 +211,7 @@ class BaseQuery(object):
        obj.extra_where = self.extra_where
        obj.extra_params = self.extra_params
        obj.extra_order_by = self.extra_order_by
        obj.deferred_loading = deepcopy(self.deferred_loading)
        if self.filter_is_sticky and self.used_aliases:
            obj.used_aliases = self.used_aliases.copy()
        else:
@ -550,9 +556,101 @@ class BaseQuery(object):
        if self.select_related and not self.related_select_cols:
            self.fill_related_selections()
    def deferred_to_data(self, target, callback):
        """
        Converts the self.deferred_loading data structure to an alternate data
        structure, describing the field that *will* be loaded. This is used to
        compute the columns to select from the database and also by the
        QuerySet class to work out which fields are being initialised on each
        model. Models that have all their fields included aren't mentioned in
        the result, only those that have field restrictions in place.
        The "target" parameter is the instance that is populated (in place).
        The "callback" is a function that is called whenever a (model, field)
        pair need to be added to "target". It accepts three parameters:
        "target", and the model and list of fields being added for that model.
        """
        field_names, defer = self.deferred_loading
        if not field_names:
            return
        columns = set()
        cur_model = self.model
        opts = cur_model._meta
        seen = {}
        must_include = {cur_model: set([opts.pk])}
        for field_name in field_names:
            parts = field_name.split(LOOKUP_SEP)
            for name in parts[:-1]:
                old_model = cur_model
                source = opts.get_field_by_name(name)[0]
                cur_model = opts.get_field_by_name(name)[0].rel.to
                opts = cur_model._meta
                # Even if we're "just passing through" this model, we must add
                # both the current model's pk and the related reference field
                # to the things we select.
                must_include[old_model].add(source)
                add_to_dict(must_include, cur_model, opts.pk)
            field, model, _, _ = opts.get_field_by_name(parts[-1])
            if model is None:
                model = cur_model
            add_to_dict(seen, model, field)
        if defer:
            # We need to load all fields for each model, except those that
            # appear in "seen" (for all models that appear in "seen"). The only
            # slight complexity here is handling fields that exist on parent
            # models.
            workset = {}
            for model, values in seen.iteritems():
                for field, f_model in model._meta.get_fields_with_model():
                    if field in values:
                        continue
                    add_to_dict(workset, f_model or model, field)
            for model, values in must_include.iteritems():
                # If we haven't included a model in workset, we don't add the
                # corresponding must_include fields for that model, since an
                # empty set means "include all fields". That's why there's no
                # "else" branch here.
                if model in workset:
                    workset[model].update(values)
            for model, values in workset.iteritems():
                callback(target, model, values)
        else:
            for model, values in must_include.iteritems():
                if model in seen:
                    seen[model].update(values)
                else:
                    # As we've passed through this model, but not explicitly
                    # included any fields, we have to make sure it's mentioned
                    # so that only the "must include" fields are pulled in.
                    seen[model] = values
            for model, values in seen.iteritems():
                callback(target, model, values)
    def deferred_to_columns(self):
        """
        Converts the self.deferred_loading data structure to mapping of table
        names to sets of column names which are to be loaded. Returns the
        dictionary.
        """
        columns = {}
        self.deferred_to_data(columns, self.deferred_to_columns_cb)
        return columns
    def deferred_to_columns_cb(self, target, model, fields):
        """
        Callback used by deferred_to_columns(). The "target" parameter should
        be a set instance.
        """
        table = model._meta.db_table
        if table not in target:
            target[table] = set()
        for field in fields:
            target[table].add(field.column)
    def get_columns(self, with_aliases=False):
        """
-        Return the list of columns to use in the select statement. If no
+        Returns the list of columns to use in the select statement. If no
        columns have been specified, returns all columns relating to fields in
        the model.
@ -569,9 +667,14 @@ class BaseQuery(object):
        else:
            col_aliases = set()
        if self.select:
            only_load = self.deferred_to_columns()
            for col in self.select:
                if isinstance(col, (list, tuple)):
-                    r = '%s.%s' % (qn(col[0]), qn(col[1]))
+                    alias, column = col
                    table = self.alias_map[alias][TABLE_NAME]
                    if table in only_load and col not in only_load[table]:
                        continue
                    r = '%s.%s' % (qn(alias), qn(column))
                    if with_aliases:
                        if col[1] in col_aliases:
                            c_alias = 'Col%d' % len(col_aliases)
@ -641,6 +744,7 @@ class BaseQuery(object):
        qn = self.quote_name_unless_alias
        qn2 = self.connection.ops.quote_name
        aliases = set()
        only_load = self.deferred_to_columns()
        proxied_model = opts.proxy and opts.proxy_for_model or 0
        if start_alias:
            seen = {None: start_alias}
@ -661,6 +765,9 @@ class BaseQuery(object):
                # aliases will have already been set up in pre_sql_setup(), so
                # we can save time here.
                alias = self.included_inherited_models[model]
            table = self.alias_map[alias][TABLE_NAME]
            if table in only_load and field.column not in only_load[table]:
                continue
            if as_pairs:
                result.append((alias, field.column))
                continue
@ -2014,6 +2121,70 @@ class BaseQuery(object):
        if order_by:
            self.extra_order_by = order_by
    def clear_deferred_loading(self):
        """
        Remove any fields from the deferred loading set.
        """
        self.deferred_loading = (set(), True)
    def add_deferred_loading(self, field_names):
        """
        Add the given list of model field names to the set of fields to
        exclude from loading from the database when automatic column selection
        is done. The new field names are added to any existing field names that
        are deferred (or removed from any existing field names that are marked
        as the only ones for immediate loading).
        """
        # Fields on related models are stored in the literal double-underscore
        # format, so that we can use a set datastructure. We do the foo__bar
        # splitting and handling when computing the SQL colum names (as part of
        # get_columns()).
        existing, defer = self.deferred_loading
        if defer:
            # Add to existing deferred names.
            self.deferred_loading = existing.union(field_names), True
        else:
            # Remove names from the set of any existing "immediate load" names.
            self.deferred_loading = existing.difference(field_names), False
    def add_immediate_loading(self, field_names):
        """
        Add the given list of model field names to the set of fields to
        retrieve when the SQL is executed ("immediate loading" fields). The
        field names replace any existing immediate loading field names. If
        there are field names already specified for deferred loading, those
        names are removed from the new field_names before storing the new names
        for immediate loading. (That is, immediate loading overrides any
        existing immediate values, but respects existing deferrals.)
        """
        existing, defer = self.deferred_loading
        if defer:
            # Remove any existing deferred names from the current set before
            # setting the new names.
            self.deferred_loading = set(field_names).difference(existing), False
        else:
            # Replace any existing "immediate load" field names.
            self.deferred_loading = set(field_names), False
    def get_loaded_field_names(self):
        """
        If any fields are marked to be deferred, returns a dictionary mapping
        models to a set of names in those fields that will be loaded. If a
        model is not in the returned dictionary, none of it's fields are
        deferred.
        If no fields are marked for deferral, returns an empty dictionary.
        """
        collection = {}
        self.deferred_to_data(collection, self.get_loaded_field_names_cb)
        return collection
    def get_loaded_field_names_cb(self, target, model, fields):
        """
        Callback used by get_deferred_field_names().
        """
        target[model] = set([f.name for f in fields])
    def trim_extra_select(self, names):
        """
        Removes any aliases in the extra_select dictionary that aren't in
@ -2180,3 +2351,13 @@ def setup_join_cache(sender, **kwargs):
 signals.class_prepared.connect(setup_join_cache)
 def add_to_dict(data, key, value):
    """
    A helper function to add "value" to the set of values for "key", whether or
    not "key" already exists.
    """
    if key in data:
        data[key].add(value)
    else:
        data[key] = set([value])
--- a/docs/ref/models/querysets.txt
+++ b/docs/ref/models/querysets.txt
@ -768,6 +768,101 @@ of the arguments is required, but you should use at least one of them.
        Entry.objects.extra(where=['headline=%s'], params=['Lennon'])
 ``defer(*fields)``
 ~~~~~~~~~~~~~~~~~~
 .. versionadded:: 1.1
 In some complex data-modeling situations, your models might contain a lot of
 fields, some of which could contain a lot of data (for example, text fields),
 or require expensive processing to convert them to Python objects. If you are
 using the results of a queryset in some situation where you know you don't
 need those particular fields, you can tell Django not to retrieve them from
 the database.
 This is done by passing the names of the fields to not load to ``defer()``::
    Entry.objects.defer("lede", "body")
 A queryset that has deferred fields will still return model instances. Each
 deferred field will be retrieved from the database if you access that field
 (one at a time, not all the deferred fields at once).
 You can make multiple calls to ``defer()``. Each call adds new fields to the
 deferred set::
    # Defers both the body and lede fields.
    Entry.objects.defer("body").filter(headline="Lennon").defer("lede")
 The order in which fields are added to the deferred set does not matter. Calling ``defer()`` with a field name that has already been deferred is harmless (the field will still be deferred).
 You can defer loading of fields in related models (if the related models are
 loading via ``select_related()``) by using the standard double-underscore
 notation to separate related fields::
    Blog.objects.select_related().defer("entry__lede", "entry__body")
 If you want to clear the set of deferred fields, pass ``None`` as a parameter
 to ``defer()``::
    # Load all fields immediately.
    my_queryset.defer(None)
 Some fields in a model won't be deferred, even if you ask for them. You can
 never defer the loading of the primary key. If you are using
 ``select_related()`` to retrieve other models at the same time you shouldn't
 defer the loading of the field that connects from the primary model to the
 related one (at the moment, that doesn't raise an error, but it will
 eventually).
 .. note::
    The ``defer()`` method (and its cousin, ``only()``, below) are only for
    advanced use-cases. They provide an optimization for when you have
    analyzed your queries closely and understand *exactly* what information
    you need and have measured that the difference between returning the
    fields you need and the full set of fields for the model will be
    significant. When you are initially developing your applications, don't
    bother using ``defer()``; leave it until your query construction has
    settled down and you understand where the hot-points are.
 ``only(*fields)``
 ~~~~~~~~~~~~~~~~~~
 .. versionadded:: 1.1
 The ``only()`` method is more or less the opposite of ``defer()``. You
 call it with the fields that should *not* be deferred when retrieving a model.
 If you have a model where almost all the fields need to be deferred, using
 ``only()`` to specify the complementary set of fields could result in simpler
 code.
 If you have a model with fields ``name``, ``age`` and ``biography``, the
 following two querysets are the same, in terms of deferred fields::
    Person.objects.defer("age", "biography")
    Person.objects.only("name")
 Whenever you call ``only()`` it *replaces* the set of fields to load
 immediately. The method's name is mnemonic: **only** those fields are loaded
 immediately; the remainder are deferred. Thus, successive calls to ``only()``
 result in only the final fields being considered::
    # This will defer all fields except the headline.
    Entry.objects.only("body", "lede").only("headline")
 Since ``defer()`` acts incrementally (adding fields to the deferred list), you
 can combine calls to ``only()`` and ``defer()`` and things will behave
 logically::
    # Final result is that everything except "headline" is deferred.
    Entry.objects.only("headline", "body").defer("body")
    # Final result loads headline and body immediately (only() replaces any
    # existing set of fields).
    Entry.objects.defer("body").only("headline", "body")
 QuerySet methods that do not return QuerySets
 ---------------------------------------------
--- a/tests/modeltests/defer/init.py
+++ b/tests/modeltests/defer/init.py
--- a/tests/modeltests/defer/models.py
+++ b/tests/modeltests/defer/models.py
@ -0,0 +1,89 @@
 """
 Tests for defer() and only().
 """
 from django.db import models
 from django.db.models.query_utils import DeferredAttribute
 class Secondary(models.Model):
    first = models.CharField(max_length=50)
    second = models.CharField(max_length=50)
 class Primary(models.Model):
    name = models.CharField(max_length=50)
    value = models.CharField(max_length=50)
    related = models.ForeignKey(Secondary)
 def count_delayed_fields(obj, debug=False):
    """
    Returns the number of delayed attributes on the given model instance.
    """
    count = 0
    for field in obj._meta.fields:
        if isinstance(obj.__class__.__dict__.get(field.attname),
                DeferredAttribute):
            if debug:
                print field.name, field.attname
            count += 1
    return count
 __test__ = {"API_TEST": """
 To all outward appearances, instances with deferred fields look the same as
 normal instances when we examine attribut values. Therefore we test for the
 number of deferred fields on returned instances (by poking at the internals),
 as a way to observe what is going on.
 >>> s1 = Secondary.objects.create(first="x1", second="y1")
 >>> p1 = Primary.objects.create(name="p1", value="xx", related=s1)
 >>> qs = Primary.objects.all()
 >>> count_delayed_fields(qs.defer('name')[0])
 1
 >>> count_delayed_fields(qs.only('name')[0])
 2
 >>> count_delayed_fields(qs.defer('related__first')[0])
 0
 >>> obj = qs.select_related().only('related__first')[0]
 >>> count_delayed_fields(obj)
 2
 >>> obj.related_id == s1.pk
 True
 >>> count_delayed_fields(qs.defer('name').extra(select={'a': 1})[0])
 1
 >>> count_delayed_fields(qs.extra(select={'a': 1}).defer('name')[0])
 1
 >>> count_delayed_fields(qs.defer('name').defer('value')[0])
 2
 >>> count_delayed_fields(qs.only('name').only('value')[0])
 2
 >>> count_delayed_fields(qs.only('name').defer('value')[0])
 2
 >>> count_delayed_fields(qs.only('name', 'value').defer('value')[0])
 2
 >>> count_delayed_fields(qs.defer('name').only('value')[0])
 2
 >>> obj = qs.only()[0]
 >>> count_delayed_fields(qs.defer(None)[0])
 0
 >>> count_delayed_fields(qs.only('name').defer(None)[0])
 0
 User values() won't defer anything (you get the full list of dictionaries
 back), but it still works.
 >>> qs.defer('name').values()[0] == {'id': p1.id, 'name': u'p1', 'value': 'xx', 'related_id': s1.id}
 True
 >>> qs.only('name').values()[0] == {'id': p1.id, 'name': u'p1', 'value': 'xx', 'related_id': s1.id}
 True
 Using defer() and only() with get() is also valid.
 >>> count_delayed_fields(qs.defer('name').get(pk=p1.pk))
 1
 >>> count_delayed_fields(qs.only('name').get(pk=p1.pk))
 2
 # KNOWN NOT TO WORK: >>> count_delayed_fields(qs.only('name').select_related('related')[0])
 # KNOWN NOT TO WORK >>> count_delayed_fields(qs.defer('related').select_related('related')[0])
 """}
--- a/tests/regressiontests/queries/models.py
+++ b/tests/regressiontests/queries/models.py
@ -890,6 +890,12 @@ unpickling.
 >>> query2.as_sql()[0] == query
 True
 Check pickling of deferred-loading querysets
 >>> qs = Item.objects.defer('name', 'creator')
 >>> q2 = pickle.loads(pickle.dumps(qs))
 >>> list(qs) == list(q2)
 True
 Bug #7277
 >>> n1.annotation_set.filter(Q(tag=t5) | Q(tag__children=t5) | Q(tag__children__children=t5))
 [<Annotation: a1>]