diff --git a/django/db/models/base.py b/django/db/models/base.py index 289294e97a0..2136ed3da46 100644 --- a/django/db/models/base.py +++ b/django/db/models/base.py @@ -12,7 +12,8 @@ import django.db.models.manager # Imported to register signal handler. from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError from django.db.models.fields import AutoField, FieldDoesNotExist from django.db.models.fields.related import OneToOneRel, ManyToOneRel, OneToOneField -from django.db.models.query import delete_objects, Q, CollectedObjects +from django.db.models.query import delete_objects, Q +from django.db.models.query_utils import CollectedObjects, DeferredAttribute from django.db.models.options import Options from django.db import connection, transaction, DatabaseError from django.db.models import signals @@ -235,6 +236,7 @@ class ModelBase(type): class Model(object): __metaclass__ = ModelBase + _deferred = False def __init__(self, *args, **kwargs): signals.pre_init.send(sender=self.__class__, args=args, kwargs=kwargs) @@ -271,6 +273,13 @@ class Model(object): for field in fields_iter: is_related_object = False if kwargs: + # This slightly odd construct is so that we can access any + # data-descriptor object (DeferredAttribute) without triggering + # its __get__ method. + if (field.attname not in kwargs and + isinstance(self.__class__.__dict__.get(field.attname), DeferredAttribute)): + # This field will be populated on request. + continue if isinstance(field.rel, ManyToOneRel): try: # Assume object instance was passed in. @@ -332,6 +341,31 @@ class Model(object): def __hash__(self): return hash(self._get_pk_val()) + def __reduce__(self): + """ + Provide pickling support. Normally, this just dispatches to Python's + standard handling. However, for models with deferred field loading, we + need to do things manually, as they're dynamically created classes and + only module-level classes can be pickled by the default path. + """ + if not self._deferred: + return super(Model, self).__reduce__() + data = self.__dict__ + defers = [] + pk_val = None + for field in self._meta.fields: + if isinstance(self.__class__.__dict__.get(field.attname), + DeferredAttribute): + defers.append(field.attname) + if pk_val is None: + # The pk_val and model values are the same for all + # DeferredAttribute classes, so we only need to do this + # once. + obj = self.__class__.__dict__[field.attname] + pk_val = obj.pk_value + model = obj.model_ref() + return (model_unpickle, (model, pk_val, defers), data) + def _get_pk_val(self, meta=None): if not meta: meta = self._meta @@ -591,6 +625,15 @@ def get_absolute_url(opts, func, self, *args, **kwargs): class Empty(object): pass +def model_unpickle(model, pk_val, attrs): + """ + Used to unpickle Model subclasses with deferred fields. + """ + from django.db.models.query_utils import deferred_class_factory + cls = deferred_class_factory(model, pk_val, attrs) + return cls.__new__(cls) +model_unpickle.__safe_for_unpickle__ = True + if sys.version_info < (2, 5): # Prior to Python 2.5, Exception was an old-style class def subclass_exception(name, parent, unused): diff --git a/django/db/models/manager.py b/django/db/models/manager.py index c130a0c74a2..57844f28fe3 100644 --- a/django/db/models/manager.py +++ b/django/db/models/manager.py @@ -167,6 +167,12 @@ class Manager(object): def reverse(self, *args, **kwargs): return self.get_query_set().reverse(*args, **kwargs) + def defer(self, *args, **kwargs): + return self.get_query_set().defer(*args, **kwargs) + + def only(self, *args, **kwargs): + return self.get_query_set().only(*args, **kwargs) + def _insert(self, values, **kwargs): return insert_query(self.model, values, **kwargs) diff --git a/django/db/models/options.py b/django/db/models/options.py index 85cc511a4c6..c7ae983541a 100644 --- a/django/db/models/options.py +++ b/django/db/models/options.py @@ -477,3 +477,9 @@ class Options(object): self._ordered_objects = objects return self._ordered_objects + def pk_index(self): + """ + Returns the index of the primary key field in the self.fields list. + """ + return self.fields.index(self.pk) + diff --git a/django/db/models/query.py b/django/db/models/query.py index 3417f59ace1..d5ff1b2fe2b 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -1,3 +1,7 @@ +""" +The main QuerySet implementation. This provides the public API for the ORM. +""" + try: set except NameError: @@ -6,9 +10,8 @@ except NameError: from django.db import connection, transaction, IntegrityError from django.db.models.aggregates import Aggregate from django.db.models.fields import DateField -from django.db.models.query_utils import Q, select_related_descend +from django.db.models.query_utils import Q, select_related_descend, CollectedObjects, CyclicDependency, deferred_class_factory from django.db.models import signals, sql -from django.utils.datastructures import SortedDict # Used to control how many objects are worked with at once in some cases (e.g. @@ -22,102 +25,6 @@ REPR_OUTPUT_SIZE = 20 # Pull into this namespace for backwards compatibility. EmptyResultSet = sql.EmptyResultSet - -class CyclicDependency(Exception): - """ - An error when dealing with a collection of objects that have a cyclic - dependency, i.e. when deleting multiple objects. - """ - pass - - -class CollectedObjects(object): - """ - A container that stores keys and lists of values along with remembering the - parent objects for all the keys. - - This is used for the database object deletion routines so that we can - calculate the 'leaf' objects which should be deleted first. - """ - - def __init__(self): - self.data = {} - self.children = {} - - def add(self, model, pk, obj, parent_model, nullable=False): - """ - Adds an item to the container. - - Arguments: - * model - the class of the object being added. - * pk - the primary key. - * obj - the object itself. - * parent_model - the model of the parent object that this object was - reached through. - * nullable - should be True if this relation is nullable. - - Returns True if the item already existed in the structure and - False otherwise. - """ - d = self.data.setdefault(model, SortedDict()) - retval = pk in d - d[pk] = obj - # Nullable relationships can be ignored -- they are nulled out before - # deleting, and therefore do not affect the order in which objects - # have to be deleted. - if parent_model is not None and not nullable: - self.children.setdefault(parent_model, []).append(model) - return retval - - def __contains__(self, key): - return self.data.__contains__(key) - - def __getitem__(self, key): - return self.data[key] - - def __nonzero__(self): - return bool(self.data) - - def iteritems(self): - for k in self.ordered_keys(): - yield k, self[k] - - def items(self): - return list(self.iteritems()) - - def keys(self): - return self.ordered_keys() - - def ordered_keys(self): - """ - Returns the models in the order that they should be dealt with (i.e. - models with no dependencies first). - """ - dealt_with = SortedDict() - # Start with items that have no children - models = self.data.keys() - while len(dealt_with) < len(models): - found = False - for model in models: - if model in dealt_with: - continue - children = self.children.setdefault(model, []) - if len([c for c in children if c not in dealt_with]) == 0: - dealt_with[model] = None - found = True - if not found: - raise CyclicDependency( - "There is a cyclic dependency of items to be processed.") - - return dealt_with.keys() - - def unordered_keys(self): - """ - Fallback for the case where is a cyclic dependency but we don't care. - """ - return self.data.keys() - - class QuerySet(object): """ Represents a lazy database lookup for a set of objects. @@ -275,6 +182,11 @@ class QuerySet(object): extra_select = self.query.extra_select.keys() aggregate_select = self.query.aggregate_select.keys() + only_load = self.query.get_loaded_field_names() + if not fill_cache: + fields = self.model._meta.fields + pk_idx = self.model._meta.pk_index() + index_start = len(extra_select) aggregate_start = index_start + len(self.model._meta.fields) @@ -282,10 +194,31 @@ class QuerySet(object): if fill_cache: obj, _ = get_cached_row(self.model, row, index_start, max_depth, - requested=requested, offset=len(aggregate_select)) + requested=requested, offset=len(aggregate_select), + only_load=only_load) else: - # omit aggregates in object creation - obj = self.model(*row[index_start:aggregate_start]) + load_fields = only_load.get(self.model) + if load_fields: + # Some fields have been deferred, so we have to initialise + # via keyword arguments. + row_data = row[index_start:aggregate_start] + pk_val = row_data[pk_idx] + skip = set() + init_list = [] + for field in fields: + if field.name not in load_fields: + skip.add(field.attname) + else: + init_list.append(field.attname) + if skip: + model_cls = deferred_class_factory(self.model, pk_val, + skip) + obj = model_cls(**dict(zip(init_list, row_data))) + else: + obj = self.model(*row[index_start:aggregate_start]) + else: + # Omit aggregates in object creation. + obj = self.model(*row[index_start:aggregate_start]) for i, k in enumerate(extra_select): setattr(obj, k, row[i]) @@ -655,6 +588,35 @@ class QuerySet(object): clone.query.standard_ordering = not clone.query.standard_ordering return clone + def defer(self, *fields): + """ + Defers the loading of data for certain fields until they are accessed. + The set of fields to defer is added to any existing set of deferred + fields. The only exception to this is if None is passed in as the only + parameter, in which case all deferrals are removed (None acts as a + reset option). + """ + clone = self._clone() + if fields == (None,): + clone.query.clear_deferred_loading() + else: + clone.query.add_deferred_loading(fields) + return clone + + def only(self, *fields): + """ + Essentially, the opposite of defer. Only the fields passed into this + method and that are not already specified as deferred are loaded + immediately when the queryset is evaluated. + """ + if fields == [None]: + # Can only pass None to defer(), not only(), as the rest option. + # That won't stop people trying to do this, so let's be explicit. + raise TypeError("Cannot pass None as an argument to only().") + clone = self._clone() + clone.query.add_immediate_loading(fields) + return clone + ################### # PRIVATE METHODS # ################### @@ -757,6 +719,7 @@ class ValuesQuerySet(QuerySet): Called by the _clone() method after initializing the rest of the instance. """ + self.query.clear_deferred_loading() self.query.clear_select_fields() if self._fields: @@ -847,9 +810,9 @@ class ValuesListQuerySet(ValuesQuerySet): for row in self.query.results_iter(): yield tuple(row) else: - # When extra(select=...) or an annotation is involved, the extra cols are - # always at the start of the row, and we need to reorder the fields - # to match the order in self._fields. + # When extra(select=...) or an annotation is involved, the extra + # cols are always at the start of the row, and we need to reorder + # the fields to match the order in self._fields. extra_names = self.query.extra_select.keys() field_names = self.field_names aggregate_names = self.query.aggregate_select.keys() @@ -884,6 +847,7 @@ class DateQuerySet(QuerySet): Called by the _clone() method after initializing the rest of the instance. """ + self.query.clear_deferred_loading() self.query = self.query.clone(klass=sql.DateQuery, setup=True) self.query.select = [] field = self.model._meta.get_field(self._field_name, many_to_many=False) @@ -935,7 +899,7 @@ class EmptyQuerySet(QuerySet): def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0, - requested=None, offset=0): + requested=None, offset=0, only_load=None): """ Helper function that recursively returns an object with the specified related attributes already populated. @@ -951,7 +915,24 @@ def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0, # If we only have a list of Nones, there was not related object. obj = None else: - obj = klass(*fields) + load_fields = only_load and only_load.get(klass) or None + if load_fields: + # Handle deferred fields. + skip = set() + init_list = [] + pk_val = fields[klass._meta.pk_index()] + for field in klass._meta.fields: + if field.name not in load_fields: + skip.add(field.name) + else: + init_list.append(field.attname) + if skip: + klass = deferred_class_factory(klass, pk_val, skip) + obj = klass(**dict(zip(init_list, fields))) + else: + obj = klass(*fields) + else: + obj = klass(*fields) index_end += offset for f in klass._meta.fields: if not select_related_descend(f, restricted, requested): diff --git a/django/db/models/query_utils.py b/django/db/models/query_utils.py index b85798f2aa4..8baa6543444 100644 --- a/django/db/models/query_utils.py +++ b/django/db/models/query_utils.py @@ -1,13 +1,115 @@ """ Various data structures used in query construction. -Factored out from django.db.models.query so that they can also be used by other -modules without getting into circular import difficulties. +Factored out from django.db.models.query to avoid making the main module very +large and/or so that they can be used by other modules without getting into +circular import difficulties. """ +import weakref from copy import deepcopy from django.utils import tree +from django.utils.datastructures import SortedDict + +try: + sorted +except NameError: + from django.utils.itercompat import sorted # For Python 2.3. + + +class CyclicDependency(Exception): + """ + An error when dealing with a collection of objects that have a cyclic + dependency, i.e. when deleting multiple objects. + """ + pass + +class CollectedObjects(object): + """ + A container that stores keys and lists of values along with remembering the + parent objects for all the keys. + + This is used for the database object deletion routines so that we can + calculate the 'leaf' objects which should be deleted first. + """ + + def __init__(self): + self.data = {} + self.children = {} + + def add(self, model, pk, obj, parent_model, nullable=False): + """ + Adds an item to the container. + + Arguments: + * model - the class of the object being added. + * pk - the primary key. + * obj - the object itself. + * parent_model - the model of the parent object that this object was + reached through. + * nullable - should be True if this relation is nullable. + + Returns True if the item already existed in the structure and + False otherwise. + """ + d = self.data.setdefault(model, SortedDict()) + retval = pk in d + d[pk] = obj + # Nullable relationships can be ignored -- they are nulled out before + # deleting, and therefore do not affect the order in which objects + # have to be deleted. + if parent_model is not None and not nullable: + self.children.setdefault(parent_model, []).append(model) + return retval + + def __contains__(self, key): + return self.data.__contains__(key) + + def __getitem__(self, key): + return self.data[key] + + def __nonzero__(self): + return bool(self.data) + + def iteritems(self): + for k in self.ordered_keys(): + yield k, self[k] + + def items(self): + return list(self.iteritems()) + + def keys(self): + return self.ordered_keys() + + def ordered_keys(self): + """ + Returns the models in the order that they should be dealt with (i.e. + models with no dependencies first). + """ + dealt_with = SortedDict() + # Start with items that have no children + models = self.data.keys() + while len(dealt_with) < len(models): + found = False + for model in models: + if model in dealt_with: + continue + children = self.children.setdefault(model, []) + if len([c for c in children if c not in dealt_with]) == 0: + dealt_with[model] = None + found = True + if not found: + raise CyclicDependency( + "There is a cyclic dependency of items to be processed.") + + return dealt_with.keys() + + def unordered_keys(self): + """ + Fallback for the case where is a cyclic dependency but we don't care. + """ + return self.data.keys() class QueryWrapper(object): """ @@ -51,6 +153,39 @@ class Q(tree.Node): obj.negate() return obj +class DeferredAttribute(object): + """ + A wrapper for a deferred-loading field. When the value is read from this + object the first time, the query is executed. + """ + def __init__(self, field_name, pk_value, model): + self.field_name = field_name + self.pk_value = pk_value + self.model_ref = weakref.ref(model) + self.loaded = False + + def __get__(self, instance, owner): + """ + Retrieves and caches the value from the datastore on the first lookup. + Returns the cached value. + """ + assert instance is not None + if not self.loaded: + obj = self.model_ref() + if obj is None: + return + self.value = list(obj._base_manager.filter(pk=self.pk_value).values_list(self.field_name, flat=True))[0] + self.loaded = True + return self.value + + def __set__(self, name, value): + """ + Deferred loading attributes can be set normally (which means there will + never be a database lookup involved. + """ + self.value = value + self.loaded = True + def select_related_descend(field, restricted, requested): """ Returns True if this field should be used to descend deeper for @@ -67,3 +202,35 @@ def select_related_descend(field, restricted, requested): if not restricted and field.null: return False return True + +# This function is needed because data descriptors must be defined on a class +# object, not an instance, to have any effect. + +def deferred_class_factory(model, pk_value, attrs): + """ + Returns a class object that is a copy of "model" with the specified "attrs" + being replaced with DeferredAttribute objects. The "pk_value" ties the + deferred attributes to a particular instance of the model. + """ + class Meta: + pass + setattr(Meta, "proxy", True) + setattr(Meta, "app_label", model._meta.app_label) + + # The app_cache wants a unique name for each model, otherwise the new class + # won't be created (we get an old one back). Therefore, we generate the + # name using the passed in attrs. It's OK to reuse an old case if the attrs + # are identical. + name = "%s_Deferred_%s" % (model.__name__, '_'.join(sorted(list(attrs)))) + + overrides = dict([(attr, DeferredAttribute(attr, pk_value, model)) + for attr in attrs]) + overrides["Meta"] = Meta + overrides["__module__"] = model.__module__ + overrides["_deferred"] = True + return type(name, (model,), overrides) + +# The above function is also used to unpickle model instances with deferred +# fields. +deferred_class_factory.__safe_for_unpickling__ = True + diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py index cfb6501be5b..b7e39847b02 100644 --- a/django/db/models/sql/query.py +++ b/django/db/models/sql/query.py @@ -94,6 +94,11 @@ class BaseQuery(object): self.extra_params = () self.extra_order_by = () + # A tuple that is a set of model field names and either True, if these + # are the fields to defer, or False if these are the only fields to + # load. + self.deferred_loading = (set(), True) + def __str__(self): """ Returns the query as a string of SQL with the parameter values @@ -206,6 +211,7 @@ class BaseQuery(object): obj.extra_where = self.extra_where obj.extra_params = self.extra_params obj.extra_order_by = self.extra_order_by + obj.deferred_loading = deepcopy(self.deferred_loading) if self.filter_is_sticky and self.used_aliases: obj.used_aliases = self.used_aliases.copy() else: @@ -550,9 +556,101 @@ class BaseQuery(object): if self.select_related and not self.related_select_cols: self.fill_related_selections() + def deferred_to_data(self, target, callback): + """ + Converts the self.deferred_loading data structure to an alternate data + structure, describing the field that *will* be loaded. This is used to + compute the columns to select from the database and also by the + QuerySet class to work out which fields are being initialised on each + model. Models that have all their fields included aren't mentioned in + the result, only those that have field restrictions in place. + + The "target" parameter is the instance that is populated (in place). + The "callback" is a function that is called whenever a (model, field) + pair need to be added to "target". It accepts three parameters: + "target", and the model and list of fields being added for that model. + """ + field_names, defer = self.deferred_loading + if not field_names: + return + columns = set() + cur_model = self.model + opts = cur_model._meta + seen = {} + must_include = {cur_model: set([opts.pk])} + for field_name in field_names: + parts = field_name.split(LOOKUP_SEP) + for name in parts[:-1]: + old_model = cur_model + source = opts.get_field_by_name(name)[0] + cur_model = opts.get_field_by_name(name)[0].rel.to + opts = cur_model._meta + # Even if we're "just passing through" this model, we must add + # both the current model's pk and the related reference field + # to the things we select. + must_include[old_model].add(source) + add_to_dict(must_include, cur_model, opts.pk) + field, model, _, _ = opts.get_field_by_name(parts[-1]) + if model is None: + model = cur_model + add_to_dict(seen, model, field) + + if defer: + # We need to load all fields for each model, except those that + # appear in "seen" (for all models that appear in "seen"). The only + # slight complexity here is handling fields that exist on parent + # models. + workset = {} + for model, values in seen.iteritems(): + for field, f_model in model._meta.get_fields_with_model(): + if field in values: + continue + add_to_dict(workset, f_model or model, field) + for model, values in must_include.iteritems(): + # If we haven't included a model in workset, we don't add the + # corresponding must_include fields for that model, since an + # empty set means "include all fields". That's why there's no + # "else" branch here. + if model in workset: + workset[model].update(values) + for model, values in workset.iteritems(): + callback(target, model, values) + else: + for model, values in must_include.iteritems(): + if model in seen: + seen[model].update(values) + else: + # As we've passed through this model, but not explicitly + # included any fields, we have to make sure it's mentioned + # so that only the "must include" fields are pulled in. + seen[model] = values + for model, values in seen.iteritems(): + callback(target, model, values) + + def deferred_to_columns(self): + """ + Converts the self.deferred_loading data structure to mapping of table + names to sets of column names which are to be loaded. Returns the + dictionary. + """ + columns = {} + self.deferred_to_data(columns, self.deferred_to_columns_cb) + return columns + + def deferred_to_columns_cb(self, target, model, fields): + """ + Callback used by deferred_to_columns(). The "target" parameter should + be a set instance. + """ + table = model._meta.db_table + if table not in target: + target[table] = set() + for field in fields: + target[table].add(field.column) + def get_columns(self, with_aliases=False): """ - Return the list of columns to use in the select statement. If no + Returns the list of columns to use in the select statement. If no columns have been specified, returns all columns relating to fields in the model. @@ -569,9 +667,14 @@ class BaseQuery(object): else: col_aliases = set() if self.select: + only_load = self.deferred_to_columns() for col in self.select: if isinstance(col, (list, tuple)): - r = '%s.%s' % (qn(col[0]), qn(col[1])) + alias, column = col + table = self.alias_map[alias][TABLE_NAME] + if table in only_load and col not in only_load[table]: + continue + r = '%s.%s' % (qn(alias), qn(column)) if with_aliases: if col[1] in col_aliases: c_alias = 'Col%d' % len(col_aliases) @@ -641,6 +744,7 @@ class BaseQuery(object): qn = self.quote_name_unless_alias qn2 = self.connection.ops.quote_name aliases = set() + only_load = self.deferred_to_columns() proxied_model = opts.proxy and opts.proxy_for_model or 0 if start_alias: seen = {None: start_alias} @@ -661,6 +765,9 @@ class BaseQuery(object): # aliases will have already been set up in pre_sql_setup(), so # we can save time here. alias = self.included_inherited_models[model] + table = self.alias_map[alias][TABLE_NAME] + if table in only_load and field.column not in only_load[table]: + continue if as_pairs: result.append((alias, field.column)) continue @@ -2014,6 +2121,70 @@ class BaseQuery(object): if order_by: self.extra_order_by = order_by + def clear_deferred_loading(self): + """ + Remove any fields from the deferred loading set. + """ + self.deferred_loading = (set(), True) + + def add_deferred_loading(self, field_names): + """ + Add the given list of model field names to the set of fields to + exclude from loading from the database when automatic column selection + is done. The new field names are added to any existing field names that + are deferred (or removed from any existing field names that are marked + as the only ones for immediate loading). + """ + # Fields on related models are stored in the literal double-underscore + # format, so that we can use a set datastructure. We do the foo__bar + # splitting and handling when computing the SQL colum names (as part of + # get_columns()). + existing, defer = self.deferred_loading + if defer: + # Add to existing deferred names. + self.deferred_loading = existing.union(field_names), True + else: + # Remove names from the set of any existing "immediate load" names. + self.deferred_loading = existing.difference(field_names), False + + def add_immediate_loading(self, field_names): + """ + Add the given list of model field names to the set of fields to + retrieve when the SQL is executed ("immediate loading" fields). The + field names replace any existing immediate loading field names. If + there are field names already specified for deferred loading, those + names are removed from the new field_names before storing the new names + for immediate loading. (That is, immediate loading overrides any + existing immediate values, but respects existing deferrals.) + """ + existing, defer = self.deferred_loading + if defer: + # Remove any existing deferred names from the current set before + # setting the new names. + self.deferred_loading = set(field_names).difference(existing), False + else: + # Replace any existing "immediate load" field names. + self.deferred_loading = set(field_names), False + + def get_loaded_field_names(self): + """ + If any fields are marked to be deferred, returns a dictionary mapping + models to a set of names in those fields that will be loaded. If a + model is not in the returned dictionary, none of it's fields are + deferred. + + If no fields are marked for deferral, returns an empty dictionary. + """ + collection = {} + self.deferred_to_data(collection, self.get_loaded_field_names_cb) + return collection + + def get_loaded_field_names_cb(self, target, model, fields): + """ + Callback used by get_deferred_field_names(). + """ + target[model] = set([f.name for f in fields]) + def trim_extra_select(self, names): """ Removes any aliases in the extra_select dictionary that aren't in @@ -2180,3 +2351,13 @@ def setup_join_cache(sender, **kwargs): signals.class_prepared.connect(setup_join_cache) +def add_to_dict(data, key, value): + """ + A helper function to add "value" to the set of values for "key", whether or + not "key" already exists. + """ + if key in data: + data[key].add(value) + else: + data[key] = set([value]) + diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt index a6ea916e870..deb1c0104db 100644 --- a/docs/ref/models/querysets.txt +++ b/docs/ref/models/querysets.txt @@ -768,6 +768,101 @@ of the arguments is required, but you should use at least one of them. Entry.objects.extra(where=['headline=%s'], params=['Lennon']) +``defer(*fields)`` +~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.1 + +In some complex data-modeling situations, your models might contain a lot of +fields, some of which could contain a lot of data (for example, text fields), +or require expensive processing to convert them to Python objects. If you are +using the results of a queryset in some situation where you know you don't +need those particular fields, you can tell Django not to retrieve them from +the database. + +This is done by passing the names of the fields to not load to ``defer()``:: + + Entry.objects.defer("lede", "body") + +A queryset that has deferred fields will still return model instances. Each +deferred field will be retrieved from the database if you access that field +(one at a time, not all the deferred fields at once). + +You can make multiple calls to ``defer()``. Each call adds new fields to the +deferred set:: + + # Defers both the body and lede fields. + Entry.objects.defer("body").filter(headline="Lennon").defer("lede") + +The order in which fields are added to the deferred set does not matter. Calling ``defer()`` with a field name that has already been deferred is harmless (the field will still be deferred). + +You can defer loading of fields in related models (if the related models are +loading via ``select_related()``) by using the standard double-underscore +notation to separate related fields:: + + Blog.objects.select_related().defer("entry__lede", "entry__body") + +If you want to clear the set of deferred fields, pass ``None`` as a parameter +to ``defer()``:: + + # Load all fields immediately. + my_queryset.defer(None) + +Some fields in a model won't be deferred, even if you ask for them. You can +never defer the loading of the primary key. If you are using +``select_related()`` to retrieve other models at the same time you shouldn't +defer the loading of the field that connects from the primary model to the +related one (at the moment, that doesn't raise an error, but it will +eventually). + +.. note:: + + The ``defer()`` method (and its cousin, ``only()``, below) are only for + advanced use-cases. They provide an optimization for when you have + analyzed your queries closely and understand *exactly* what information + you need and have measured that the difference between returning the + fields you need and the full set of fields for the model will be + significant. When you are initially developing your applications, don't + bother using ``defer()``; leave it until your query construction has + settled down and you understand where the hot-points are. + +``only(*fields)`` +~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.1 + +The ``only()`` method is more or less the opposite of ``defer()``. You +call it with the fields that should *not* be deferred when retrieving a model. +If you have a model where almost all the fields need to be deferred, using +``only()`` to specify the complementary set of fields could result in simpler +code. + +If you have a model with fields ``name``, ``age`` and ``biography``, the +following two querysets are the same, in terms of deferred fields:: + + Person.objects.defer("age", "biography") + Person.objects.only("name") + +Whenever you call ``only()`` it *replaces* the set of fields to load +immediately. The method's name is mnemonic: **only** those fields are loaded +immediately; the remainder are deferred. Thus, successive calls to ``only()`` +result in only the final fields being considered:: + + # This will defer all fields except the headline. + Entry.objects.only("body", "lede").only("headline") + +Since ``defer()`` acts incrementally (adding fields to the deferred list), you +can combine calls to ``only()`` and ``defer()`` and things will behave +logically:: + + # Final result is that everything except "headline" is deferred. + Entry.objects.only("headline", "body").defer("body") + + # Final result loads headline and body immediately (only() replaces any + # existing set of fields). + Entry.objects.defer("body").only("headline", "body") + + QuerySet methods that do not return QuerySets --------------------------------------------- diff --git a/tests/modeltests/defer/__init__.py b/tests/modeltests/defer/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/modeltests/defer/models.py b/tests/modeltests/defer/models.py new file mode 100644 index 00000000000..1f515bc95ed --- /dev/null +++ b/tests/modeltests/defer/models.py @@ -0,0 +1,89 @@ +""" +Tests for defer() and only(). +""" + +from django.db import models +from django.db.models.query_utils import DeferredAttribute + +class Secondary(models.Model): + first = models.CharField(max_length=50) + second = models.CharField(max_length=50) + +class Primary(models.Model): + name = models.CharField(max_length=50) + value = models.CharField(max_length=50) + related = models.ForeignKey(Secondary) + +def count_delayed_fields(obj, debug=False): + """ + Returns the number of delayed attributes on the given model instance. + """ + count = 0 + for field in obj._meta.fields: + if isinstance(obj.__class__.__dict__.get(field.attname), + DeferredAttribute): + if debug: + print field.name, field.attname + count += 1 + return count + + +__test__ = {"API_TEST": """ +To all outward appearances, instances with deferred fields look the same as +normal instances when we examine attribut values. Therefore we test for the +number of deferred fields on returned instances (by poking at the internals), +as a way to observe what is going on. + +>>> s1 = Secondary.objects.create(first="x1", second="y1") +>>> p1 = Primary.objects.create(name="p1", value="xx", related=s1) + +>>> qs = Primary.objects.all() + +>>> count_delayed_fields(qs.defer('name')[0]) +1 +>>> count_delayed_fields(qs.only('name')[0]) +2 +>>> count_delayed_fields(qs.defer('related__first')[0]) +0 +>>> obj = qs.select_related().only('related__first')[0] +>>> count_delayed_fields(obj) +2 +>>> obj.related_id == s1.pk +True +>>> count_delayed_fields(qs.defer('name').extra(select={'a': 1})[0]) +1 +>>> count_delayed_fields(qs.extra(select={'a': 1}).defer('name')[0]) +1 +>>> count_delayed_fields(qs.defer('name').defer('value')[0]) +2 +>>> count_delayed_fields(qs.only('name').only('value')[0]) +2 +>>> count_delayed_fields(qs.only('name').defer('value')[0]) +2 +>>> count_delayed_fields(qs.only('name', 'value').defer('value')[0]) +2 +>>> count_delayed_fields(qs.defer('name').only('value')[0]) +2 +>>> obj = qs.only()[0] +>>> count_delayed_fields(qs.defer(None)[0]) +0 +>>> count_delayed_fields(qs.only('name').defer(None)[0]) +0 + +User values() won't defer anything (you get the full list of dictionaries +back), but it still works. +>>> qs.defer('name').values()[0] == {'id': p1.id, 'name': u'p1', 'value': 'xx', 'related_id': s1.id} +True +>>> qs.only('name').values()[0] == {'id': p1.id, 'name': u'p1', 'value': 'xx', 'related_id': s1.id} +True + +Using defer() and only() with get() is also valid. +>>> count_delayed_fields(qs.defer('name').get(pk=p1.pk)) +1 +>>> count_delayed_fields(qs.only('name').get(pk=p1.pk)) +2 + +# KNOWN NOT TO WORK: >>> count_delayed_fields(qs.only('name').select_related('related')[0]) +# KNOWN NOT TO WORK >>> count_delayed_fields(qs.defer('related').select_related('related')[0]) + +"""} diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py index ccf06be5f38..939fc0e86f5 100644 --- a/tests/regressiontests/queries/models.py +++ b/tests/regressiontests/queries/models.py @@ -890,6 +890,12 @@ unpickling. >>> query2.as_sql()[0] == query True +Check pickling of deferred-loading querysets +>>> qs = Item.objects.defer('name', 'creator') +>>> q2 = pickle.loads(pickle.dumps(qs)) +>>> list(qs) == list(q2) +True + Bug #7277 >>> n1.annotation_set.filter(Q(tag=t5) | Q(tag__children=t5) | Q(tag__children__children=t5)) []