Fixed #3566 -- Added support for aggregation to the ORM. See the documentation for details on usage.

Many thanks to:
 * Nicolas Lara, who worked on this feature during the 2008 Google Summer of Code.
 * Alex Gaynor for his help debugging and fixing a number of issues.
 * Justin Bronn for his help integrating with contrib.gis.
 * Karen Tracey for her help with cross-platform testing.
 * Ian Kelly for his help testing and fixing Oracle support.
 * Malcolm Tredinnick for his invaluable review notes.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@9742 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Russell Keith-Magee 2009-01-15 11:06:34 +00:00
parent 50a293a0c3
commit cc4e4d9aee
30 changed files with 2357 additions and 325 deletions

View File

@ -31,6 +31,7 @@ answer newbie questions, and generally made Django that much better:
AgarFu <heaven@croasanaso.sytes.net> AgarFu <heaven@croasanaso.sytes.net>
Dagur Páll Ammendrup <dagurp@gmail.com> Dagur Páll Ammendrup <dagurp@gmail.com>
Collin Anderson <cmawebsite@gmail.com> Collin Anderson <cmawebsite@gmail.com>
Nicolas Lara <nicolaslara@gmail.com>
Jeff Anderson <jefferya@programmerq.net> Jeff Anderson <jefferya@programmerq.net>
Marian Andre <django@andre.sk> Marian Andre <django@andre.sk>
Andreas Andreas

View File

@ -0,0 +1,10 @@
from django.db.models import Aggregate
class Extent(Aggregate):
name = 'Extent'
class MakeLine(Aggregate):
name = 'MakeLine'
class Union(Aggregate):
name = 'Union'

View File

@ -3,6 +3,7 @@ from django.db import connection
from django.db.models.query import sql, QuerySet, Q from django.db.models.query import sql, QuerySet, Q
from django.contrib.gis.db.backend import SpatialBackend from django.contrib.gis.db.backend import SpatialBackend
from django.contrib.gis.db.models import aggregates
from django.contrib.gis.db.models.fields import GeometryField, PointField from django.contrib.gis.db.models.fields import GeometryField, PointField
from django.contrib.gis.db.models.sql import AreaField, DistanceField, GeomField, GeoQuery, GeoWhereNode from django.contrib.gis.db.models.sql import AreaField, DistanceField, GeomField, GeoQuery, GeoWhereNode
from django.contrib.gis.measure import Area, Distance from django.contrib.gis.measure import Area, Distance
@ -98,20 +99,7 @@ class GeoQuerySet(QuerySet):
Returns the extent (aggregate) of the features in the GeoQuerySet. The Returns the extent (aggregate) of the features in the GeoQuerySet. The
extent will be returned as a 4-tuple, consisting of (xmin, ymin, xmax, ymax). extent will be returned as a 4-tuple, consisting of (xmin, ymin, xmax, ymax).
""" """
convert_extent = None return self._spatial_aggregate(aggregates.Extent, **kwargs)
if SpatialBackend.postgis:
def convert_extent(box, geo_field):
# TODO: Parsing of BOX3D, Oracle support (patches welcome!)
# Box text will be something like "BOX(-90.0 30.0, -85.0 40.0)";
# parsing out and returning as a 4-tuple.
ll, ur = box[4:-1].split(',')
xmin, ymin = map(float, ll.split())
xmax, ymax = map(float, ur.split())
return (xmin, ymin, xmax, ymax)
elif SpatialBackend.oracle:
def convert_extent(wkt, geo_field):
raise NotImplementedError
return self._spatial_aggregate('extent', convert_func=convert_extent, **kwargs)
def gml(self, precision=8, version=2, **kwargs): def gml(self, precision=8, version=2, **kwargs):
""" """
@ -163,9 +151,7 @@ class GeoQuerySet(QuerySet):
this GeoQuerySet and returns it. This is a spatial aggregate this GeoQuerySet and returns it. This is a spatial aggregate
method, and thus returns a geometry rather than a GeoQuerySet. method, and thus returns a geometry rather than a GeoQuerySet.
""" """
kwargs['geo_field_type'] = PointField return self._spatial_aggregate(aggregates.MakeLine, geo_field_type=PointField, **kwargs)
kwargs['agg_field'] = GeometryField
return self._spatial_aggregate('make_line', **kwargs)
def mem_size(self, **kwargs): def mem_size(self, **kwargs):
""" """
@ -288,11 +274,10 @@ class GeoQuerySet(QuerySet):
None if the GeoQuerySet is empty. The `tolerance` keyword is for None if the GeoQuerySet is empty. The `tolerance` keyword is for
Oracle backends only. Oracle backends only.
""" """
kwargs['agg_field'] = GeometryField return self._spatial_aggregate(aggregates.Union, **kwargs)
return self._spatial_aggregate('unionagg', **kwargs)
### Private API -- Abstracted DRY routines. ### ### Private API -- Abstracted DRY routines. ###
def _spatial_setup(self, att, aggregate=False, desc=None, field_name=None, geo_field_type=None): def _spatial_setup(self, att, desc=None, field_name=None, geo_field_type=None):
""" """
Performs set up for executing the spatial function. Performs set up for executing the spatial function.
""" """
@ -316,71 +301,37 @@ class GeoQuerySet(QuerySet):
raise TypeError('"%s" stored procedures may only be called on %ss.' % (func, geo_field_type.__name__)) raise TypeError('"%s" stored procedures may only be called on %ss.' % (func, geo_field_type.__name__))
# Setting the procedure args. # Setting the procedure args.
procedure_args['geo_col'] = self._geocol_select(geo_field, field_name, aggregate) procedure_args['geo_col'] = self._geocol_select(geo_field, field_name)
return procedure_args, geo_field return procedure_args, geo_field
def _spatial_aggregate(self, att, field_name=None, def _spatial_aggregate(self, aggregate, field_name=None,
agg_field=None, convert_func=None, geo_field_type=None, tolerance=0.05):
geo_field_type=None, tolerance=0.0005):
""" """
DRY routine for calling aggregate spatial stored procedures and DRY routine for calling aggregate spatial stored procedures and
returning their result to the caller of the function. returning their result to the caller of the function.
""" """
# Constructing the setup keyword arguments. # Getting the field the geographic aggregate will be called on.
setup_kwargs = {'aggregate' : True, geo_field = self.query._geo_field(field_name)
'field_name' : field_name, if not geo_field:
'geo_field_type' : geo_field_type, raise TypeError('%s aggregate only available on GeometryFields.' % aggregate.name)
}
procedure_args, geo_field = self._spatial_setup(att, **setup_kwargs)
if SpatialBackend.oracle: # Checking if there are any geo field type limitations on this
procedure_args['tolerance'] = tolerance # aggregate (e.g. ST_Makeline only operates on PointFields).
# Adding in selection SQL for Oracle geometry columns. if not geo_field_type is None and not isinstance(geo_field, geo_field_type):
if agg_field is GeometryField: raise TypeError('%s aggregate may only be called on %ss.' % (aggregate.name, geo_field_type.__name__))
agg_sql = '%s' % SpatialBackend.select
else:
agg_sql = '%s'
agg_sql = agg_sql % ('%(function)s(SDOAGGRTYPE(%(geo_col)s,%(tolerance)s))' % procedure_args)
else:
agg_sql = '%(function)s(%(geo_col)s)' % procedure_args
# Wrapping our selection SQL in `GeomSQL` to bypass quoting, and # Getting the string expression of the field name, as this is the
# specifying the type of the aggregate field. # argument taken by `Aggregate` objects.
self.query.select = [GeomSQL(agg_sql)] agg_col = field_name or geo_field.name
self.query.select_fields = [agg_field]
try: # Adding any keyword parameters for the Aggregate object. Oracle backends
# `asql` => not overriding `sql` module. # in particular need an additional `tolerance` parameter.
asql, params = self.query.as_sql() agg_kwargs = {}
except sql.datastructures.EmptyResultSet: if SpatialBackend.oracle: agg_kwargs['tolerance'] = tolerance
return None
# Getting a cursor, executing the query, and extracting the returned # Calling the QuerySet.aggregate, and returning only the value of the aggregate.
# value from the aggregate function. return self.aggregate(_geoagg=aggregate(agg_col, **agg_kwargs))['_geoagg']
cursor = connection.cursor()
cursor.execute(asql, params)
result = cursor.fetchone()[0]
# If the `agg_field` is specified as a GeometryField, then autmatically
# set up the conversion function.
if agg_field is GeometryField and not callable(convert_func):
if SpatialBackend.postgis:
def convert_geom(hex, geo_field):
if hex: return SpatialBackend.Geometry(hex)
else: return None
elif SpatialBackend.oracle:
def convert_geom(clob, geo_field):
if clob: return SpatialBackend.Geometry(clob.read(), geo_field._srid)
else: return None
convert_func = convert_geom
# Returning the callback function evaluated on the result culled
# from the executed cursor.
if callable(convert_func):
return convert_func(result, geo_field)
else:
return result
def _spatial_attribute(self, att, settings, field_name=None, model_att=None): def _spatial_attribute(self, att, settings, field_name=None, model_att=None):
""" """
@ -595,16 +546,12 @@ class GeoQuerySet(QuerySet):
s['procedure_args']['tolerance'] = tolerance s['procedure_args']['tolerance'] = tolerance
return self._spatial_attribute(func, s, **kwargs) return self._spatial_attribute(func, s, **kwargs)
def _geocol_select(self, geo_field, field_name, aggregate=False): def _geocol_select(self, geo_field, field_name):
""" """
Helper routine for constructing the SQL to select the geographic Helper routine for constructing the SQL to select the geographic
column. Takes into account if the geographic field is in a column. Takes into account if the geographic field is in a
ForeignKey relation to the current model. ForeignKey relation to the current model.
""" """
# If this is an aggregate spatial query, the flag needs to be
# set on the `GeoQuery` object of this queryset.
if aggregate: self.query.aggregate = True
opts = self.model._meta opts = self.model._meta
if not geo_field in opts.fields: if not geo_field in opts.fields:
# Is this operation going to be on a related geographic field? # Is this operation going to be on a related geographic field?

View File

@ -0,0 +1,36 @@
from django.db.models.sql.aggregates import *
from django.contrib.gis.db.models.fields import GeometryField
from django.contrib.gis.db.backend import SpatialBackend
if SpatialBackend.oracle:
geo_template = '%(function)s(SDOAGGRTYPE(%(field)s,%(tolerance)s))'
else:
geo_template = '%(function)s(%(field)s)'
class GeoAggregate(Aggregate):
# Overriding the SQL template with the geographic one.
sql_template = geo_template
is_extent = False
def __init__(self, col, source=None, is_summary=False, **extra):
super(GeoAggregate, self).__init__(col, source, is_summary, **extra)
# Can't use geographic aggregates on non-geometry fields.
if not isinstance(self.source, GeometryField):
raise ValueError('Geospatial aggregates only allowed on geometry fields.')
# Making sure the SQL function is available for this spatial backend.
if not self.sql_function:
raise NotImplementedError('This aggregate functionality not implemented for your spatial backend.')
class Extent(GeoAggregate):
is_extent = True
sql_function = SpatialBackend.extent
class MakeLine(GeoAggregate):
sql_function = SpatialBackend.make_line
class Union(GeoAggregate):
sql_function = SpatialBackend.unionagg

View File

@ -5,6 +5,7 @@ from django.db.models.fields.related import ForeignKey
from django.contrib.gis.db.backend import SpatialBackend from django.contrib.gis.db.backend import SpatialBackend
from django.contrib.gis.db.models.fields import GeometryField from django.contrib.gis.db.models.fields import GeometryField
from django.contrib.gis.db.models.sql import aggregates as gis_aggregates_module
from django.contrib.gis.db.models.sql.where import GeoWhereNode from django.contrib.gis.db.models.sql.where import GeoWhereNode
from django.contrib.gis.measure import Area, Distance from django.contrib.gis.measure import Area, Distance
@ -12,12 +13,35 @@ from django.contrib.gis.measure import Area, Distance
ALL_TERMS = sql.constants.QUERY_TERMS.copy() ALL_TERMS = sql.constants.QUERY_TERMS.copy()
ALL_TERMS.update(SpatialBackend.gis_terms) ALL_TERMS.update(SpatialBackend.gis_terms)
# Conversion functions used in normalizing geographic aggregates.
if SpatialBackend.postgis:
def convert_extent(box):
# TODO: Parsing of BOX3D, Oracle support (patches welcome!)
# Box text will be something like "BOX(-90.0 30.0, -85.0 40.0)";
# parsing out and returning as a 4-tuple.
ll, ur = box[4:-1].split(',')
xmin, ymin = map(float, ll.split())
xmax, ymax = map(float, ur.split())
return (xmin, ymin, xmax, ymax)
def convert_geom(hex, geo_field):
if hex: return SpatialBackend.Geometry(hex)
else: return None
else:
def convert_extent(box):
raise NotImplementedError('Aggregate extent not implemented for this spatial backend.')
def convert_geom(clob, geo_field):
if clob: return SpatialBackend.Geometry(clob.read(), geo_field._srid)
else: return None
class GeoQuery(sql.Query): class GeoQuery(sql.Query):
""" """
A single spatial SQL query. A single spatial SQL query.
""" """
# Overridding the valid query terms. # Overridding the valid query terms.
query_terms = ALL_TERMS query_terms = ALL_TERMS
aggregates_module = gis_aggregates_module
#### Methods overridden from the base Query class #### #### Methods overridden from the base Query class ####
def __init__(self, model, conn): def __init__(self, model, conn):
@ -25,7 +49,6 @@ class GeoQuery(sql.Query):
# The following attributes are customized for the GeoQuerySet. # The following attributes are customized for the GeoQuerySet.
# The GeoWhereNode and SpatialBackend classes contain backend-specific # The GeoWhereNode and SpatialBackend classes contain backend-specific
# routines and functions. # routines and functions.
self.aggregate = False
self.custom_select = {} self.custom_select = {}
self.transformed_srid = None self.transformed_srid = None
self.extra_select_fields = {} self.extra_select_fields = {}
@ -34,7 +57,6 @@ class GeoQuery(sql.Query):
obj = super(GeoQuery, self).clone(*args, **kwargs) obj = super(GeoQuery, self).clone(*args, **kwargs)
# Customized selection dictionary and transformed srid flag have # Customized selection dictionary and transformed srid flag have
# to also be added to obj. # to also be added to obj.
obj.aggregate = self.aggregate
obj.custom_select = self.custom_select.copy() obj.custom_select = self.custom_select.copy()
obj.transformed_srid = self.transformed_srid obj.transformed_srid = self.transformed_srid
obj.extra_select_fields = self.extra_select_fields.copy() obj.extra_select_fields = self.extra_select_fields.copy()
@ -67,38 +89,53 @@ class GeoQuery(sql.Query):
for col, field in izip(self.select, self.select_fields): for col, field in izip(self.select, self.select_fields):
if isinstance(col, (list, tuple)): if isinstance(col, (list, tuple)):
r = self.get_field_select(field, col[0]) r = self.get_field_select(field, col[0])
if with_aliases and col[1] in col_aliases: if with_aliases:
c_alias = 'Col%d' % len(col_aliases) if col[1] in col_aliases:
result.append('%s AS %s' % (r, c_alias)) c_alias = 'Col%d' % len(col_aliases)
aliases.add(c_alias) result.append('%s AS %s' % (r, c_alias))
col_aliases.add(c_alias) aliases.add(c_alias)
col_aliases.add(c_alias)
else:
result.append('%s AS %s' % (r, col[1]))
aliases.add(r)
col_aliases.add(col[1])
else: else:
result.append(r) result.append(r)
aliases.add(r) aliases.add(r)
col_aliases.add(col[1]) col_aliases.add(col[1])
else: else:
result.append(col.as_sql(quote_func=qn)) result.append(col.as_sql(quote_func=qn))
if hasattr(col, 'alias'): if hasattr(col, 'alias'):
aliases.add(col.alias) aliases.add(col.alias)
col_aliases.add(col.alias) col_aliases.add(col.alias)
elif self.default_cols: elif self.default_cols:
cols, new_aliases = self.get_default_columns(with_aliases, cols, new_aliases = self.get_default_columns(with_aliases,
col_aliases) col_aliases)
result.extend(cols) result.extend(cols)
aliases.update(new_aliases) aliases.update(new_aliases)
result.extend([
'%s%s' % (
aggregate.as_sql(quote_func=qn),
alias is not None and ' AS %s' % alias or ''
)
for alias, aggregate in self.aggregate_select.items()
])
# This loop customized for GeoQuery. # This loop customized for GeoQuery.
if not self.aggregate: for (table, col), field in izip(self.related_select_cols, self.related_select_fields):
for (table, col), field in izip(self.related_select_cols, self.related_select_fields): r = self.get_field_select(field, table)
r = self.get_field_select(field, table) if with_aliases and col in col_aliases:
if with_aliases and col in col_aliases: c_alias = 'Col%d' % len(col_aliases)
c_alias = 'Col%d' % len(col_aliases) result.append('%s AS %s' % (r, c_alias))
result.append('%s AS %s' % (r, c_alias)) aliases.add(c_alias)
aliases.add(c_alias) col_aliases.add(c_alias)
col_aliases.add(c_alias) else:
else: result.append(r)
result.append(r) aliases.add(r)
aliases.add(r) col_aliases.add(col)
col_aliases.add(col)
self._select_aliases = aliases self._select_aliases = aliases
return result return result
@ -154,16 +191,6 @@ class GeoQuery(sql.Query):
return result, None return result, None
return result, aliases return result, aliases
def get_ordering(self):
"""
This routine is overridden to disable ordering for aggregate
spatial queries.
"""
if not self.aggregate:
return super(GeoQuery, self).get_ordering()
else:
return ()
def resolve_columns(self, row, fields=()): def resolve_columns(self, row, fields=()):
""" """
This routine is necessary so that distances and geometries returned This routine is necessary so that distances and geometries returned
@ -212,6 +239,19 @@ class GeoQuery(sql.Query):
value = SpatialBackend.Geometry(value) value = SpatialBackend.Geometry(value)
return value return value
def resolve_aggregate(self, value, aggregate):
"""
Overridden from GeoQuery's normalize to handle the conversion of
GeoAggregate objects.
"""
if isinstance(aggregate, self.aggregates_module.GeoAggregate):
if aggregate.is_extent:
return convert_extent(value)
else:
return convert_geom(value, aggregate.source)
else:
return super(GeoQuery, self).resolve_aggregate(value, aggregate)
#### Routines unique to GeoQuery #### #### Routines unique to GeoQuery ####
def get_extra_select_format(self, alias): def get_extra_select_format(self, alias):
sel_fmt = '%s' sel_fmt = '%s'

View File

@ -10,6 +10,12 @@ except NameError:
# Python 2.3 compat # Python 2.3 compat
from sets import Set as set from sets import Set as set
try:
import decimal
except ImportError:
# Python 2.3 fallback
from django.utils import _decimal as decimal
from django.db.backends import util from django.db.backends import util
from django.utils import datetime_safe from django.utils import datetime_safe
@ -62,6 +68,7 @@ class BaseDatabaseWrapper(local):
return util.CursorDebugWrapper(cursor, self) return util.CursorDebugWrapper(cursor, self)
class BaseDatabaseFeatures(object): class BaseDatabaseFeatures(object):
allows_group_by_pk = False
# True if django.db.backend.utils.typecast_timestamp is used on values # True if django.db.backend.utils.typecast_timestamp is used on values
# returned from dates() calls. # returned from dates() calls.
needs_datetime_string_cast = True needs_datetime_string_cast = True
@ -376,6 +383,22 @@ class BaseDatabaseOperations(object):
""" """
return self.year_lookup_bounds(value) return self.year_lookup_bounds(value)
def convert_values(self, value, field):
"""Coerce the value returned by the database backend into a consistent type that
is compatible with the field type.
"""
internal_type = field.get_internal_type()
if internal_type == 'DecimalField':
return value
elif internal_type and internal_type.endswith('IntegerField') or internal_type == 'AutoField':
return int(value)
elif internal_type in ('DateField', 'DateTimeField', 'TimeField'):
return value
# No field, or the field isn't known to be a decimal or integer
# Default to a float
return float(value)
class BaseDatabaseIntrospection(object): class BaseDatabaseIntrospection(object):
""" """
This class encapsulates all backend-specific introspection utilities This class encapsulates all backend-specific introspection utilities

View File

@ -110,6 +110,7 @@ class CursorWrapper(object):
class DatabaseFeatures(BaseDatabaseFeatures): class DatabaseFeatures(BaseDatabaseFeatures):
empty_fetchmany_value = () empty_fetchmany_value = ()
update_can_self_select = False update_can_self_select = False
allows_group_by_pk = True
related_fields_match_type = True related_fields_match_type = True
class DatabaseOperations(BaseDatabaseOperations): class DatabaseOperations(BaseDatabaseOperations):

View File

@ -53,21 +53,23 @@ def query_class(QueryClass, Database):
return values return values
def convert_values(self, value, field): def convert_values(self, value, field):
from django.db.models.fields import DateField, DateTimeField, \ from django.db.models.fields import Field
TimeField, BooleanField, NullBooleanField, DecimalField, Field
if isinstance(value, Database.LOB): if isinstance(value, Database.LOB):
value = value.read() value = value.read()
# Oracle stores empty strings as null. We need to undo this in # Oracle stores empty strings as null. We need to undo this in
# order to adhere to the Django convention of using the empty # order to adhere to the Django convention of using the empty
# string instead of null, but only if the field accepts the # string instead of null, but only if the field accepts the
# empty string. # empty string.
if value is None and isinstance(field, Field) and field.empty_strings_allowed: if value is None and field and field.empty_strings_allowed:
value = u'' value = u''
# Convert 1 or 0 to True or False # Convert 1 or 0 to True or False
elif value in (1, 0) and isinstance(field, (BooleanField, NullBooleanField)): elif value in (1, 0) and field and field.get_internal_type() in ('BooleanField', 'NullBooleanField'):
value = bool(value) value = bool(value)
# Force floats to the correct type
elif value is not None and field and field.get_internal_type() == 'FloatField':
value = float(value)
# Convert floats to decimals # Convert floats to decimals
elif value is not None and isinstance(field, DecimalField): elif value is not None and field and field.get_internal_type() == 'DecimalField':
value = util.typecast_decimal(field.format_number(value)) value = util.typecast_decimal(field.format_number(value))
# cx_Oracle always returns datetime.datetime objects for # cx_Oracle always returns datetime.datetime objects for
# DATE and TIMESTAMP columns, but Django wants to see a # DATE and TIMESTAMP columns, but Django wants to see a
@ -86,13 +88,9 @@ def query_class(QueryClass, Database):
value = datetime.datetime(value.year, value.month, value = datetime.datetime(value.year, value.month,
value.day, value.hour, value.minute, value.second, value.day, value.hour, value.minute, value.second,
value.fsecond) value.fsecond)
if isinstance(field, DateTimeField): if field and field.get_internal_type() == 'DateField':
# DateTimeField subclasses DateField so must be checked
# first.
pass
elif isinstance(field, DateField):
value = value.date() value = value.date()
elif isinstance(field, TimeField) or (value.year == 1900 and value.month == value.day == 1): elif field and field.get_internal_type() == 'TimeField' or (value.year == 1900 and value.month == value.day == 1):
value = value.time() value = value.time()
elif value.hour == value.minute == value.second == value.microsecond == 0: elif value.hour == value.minute == value.second == value.microsecond == 0:
value = value.date() value = value.date()

View File

@ -102,6 +102,26 @@ class DatabaseOperations(BaseDatabaseOperations):
second = '%s-12-31 23:59:59.999999' second = '%s-12-31 23:59:59.999999'
return [first % value, second % value] return [first % value, second % value]
def convert_values(self, value, field):
"""SQLite returns floats when it should be returning decimals,
and gets dates and datetimes wrong.
For consistency with other backends, coerce when required.
"""
internal_type = field.get_internal_type()
if internal_type == 'DecimalField':
return util.typecast_decimal(field.format_number(value))
elif internal_type and internal_type.endswith('IntegerField') or internal_type == 'AutoField':
return int(value)
elif internal_type == 'DateField':
return util.typecast_date(value)
elif internal_type == 'DateTimeField':
return util.typecast_timestamp(value)
elif internal_type == 'TimeField':
return util.typecast_time(value)
# No field, or the field isn't known to be a decimal or integer
return value
class DatabaseWrapper(BaseDatabaseWrapper): class DatabaseWrapper(BaseDatabaseWrapper):
# SQLite requires LIKE statements to include an ESCAPE clause if the value # SQLite requires LIKE statements to include an ESCAPE clause if the value

View File

@ -5,6 +5,7 @@ from django.db.models.loading import get_apps, get_app, get_models, get_model, r
from django.db.models.query import Q from django.db.models.query import Q
from django.db.models.manager import Manager from django.db.models.manager import Manager
from django.db.models.base import Model from django.db.models.base import Model
from django.db.models.aggregates import *
from django.db.models.fields import * from django.db.models.fields import *
from django.db.models.fields.subclassing import SubfieldBase from django.db.models.fields.subclassing import SubfieldBase
from django.db.models.fields.files import FileField, ImageField from django.db.models.fields.files import FileField, ImageField

View File

@ -0,0 +1,66 @@
"""
Classes to represent the definitions of aggregate functions.
"""
class Aggregate(object):
"""
Default Aggregate definition.
"""
def __init__(self, lookup, **extra):
"""Instantiate a new aggregate.
* lookup is the field on which the aggregate operates.
* extra is a dictionary of additional data to provide for the
aggregate definition
Also utilizes the class variables:
* name, the identifier for this aggregate function.
"""
self.lookup = lookup
self.extra = extra
def _default_alias(self):
return '%s__%s' % (self.lookup, self.name.lower())
default_alias = property(_default_alias)
def add_to_query(self, query, alias, col, source, is_summary):
"""Add the aggregate to the nominated query.
This method is used to convert the generic Aggregate definition into a
backend-specific definition.
* query is the backend-specific query instance to which the aggregate
is to be added.
* col is a column reference describing the subject field
of the aggregate. It can be an alias, or a tuple describing
a table and column name.
* source is the underlying field or aggregate definition for
the column reference. If the aggregate is not an ordinal or
computed type, this reference is used to determine the coerced
output type of the aggregate.
* is_summary is a boolean that is set True if the aggregate is a
summary value rather than an annotation.
"""
aggregate = getattr(query.aggregates_module, self.name)
query.aggregate_select[alias] = aggregate(col, source=source, is_summary=is_summary, **self.extra)
class Avg(Aggregate):
name = 'Avg'
class Count(Aggregate):
name = 'Count'
class Max(Aggregate):
name = 'Max'
class Min(Aggregate):
name = 'Min'
class StdDev(Aggregate):
name = 'StdDev'
class Sum(Aggregate):
name = 'Sum'
class Variance(Aggregate):
name = 'Variance'

View File

@ -101,6 +101,12 @@ class Manager(object):
def filter(self, *args, **kwargs): def filter(self, *args, **kwargs):
return self.get_query_set().filter(*args, **kwargs) return self.get_query_set().filter(*args, **kwargs)
def aggregate(self, *args, **kwargs):
return self.get_query_set().aggregate(*args, **kwargs)
def annotate(self, *args, **kwargs):
return self.get_query_set().annotate(*args, **kwargs)
def complex_filter(self, *args, **kwargs): def complex_filter(self, *args, **kwargs):
return self.get_query_set().complex_filter(*args, **kwargs) return self.get_query_set().complex_filter(*args, **kwargs)

View File

@ -4,6 +4,7 @@ except NameError:
from sets import Set as set # Python 2.3 fallback from sets import Set as set # Python 2.3 fallback
from django.db import connection, transaction, IntegrityError from django.db import connection, transaction, IntegrityError
from django.db.models.aggregates import Aggregate
from django.db.models.fields import DateField from django.db.models.fields import DateField
from django.db.models.query_utils import Q, select_related_descend from django.db.models.query_utils import Q, select_related_descend
from django.db.models import signals, sql from django.db.models import signals, sql
@ -270,18 +271,47 @@ class QuerySet(object):
else: else:
requested = None requested = None
max_depth = self.query.max_depth max_depth = self.query.max_depth
extra_select = self.query.extra_select.keys() extra_select = self.query.extra_select.keys()
aggregate_select = self.query.aggregate_select.keys()
index_start = len(extra_select) index_start = len(extra_select)
aggregate_start = index_start + len(self.model._meta.fields)
for row in self.query.results_iter(): for row in self.query.results_iter():
if fill_cache: if fill_cache:
obj, _ = get_cached_row(self.model, row, index_start, obj, aggregate_start = get_cached_row(self.model, row,
max_depth, requested=requested) index_start, max_depth, requested=requested)
else: else:
obj = self.model(*row[index_start:]) # omit aggregates in object creation
obj = self.model(*row[index_start:aggregate_start])
for i, k in enumerate(extra_select): for i, k in enumerate(extra_select):
setattr(obj, k, row[i]) setattr(obj, k, row[i])
# Add the aggregates to the model
for i, aggregate in enumerate(aggregate_select):
setattr(obj, aggregate, row[i+aggregate_start])
yield obj yield obj
def aggregate(self, *args, **kwargs):
"""
Returns a dictionary containing the calculations (aggregation)
over the current queryset
If args is present the expression is passed as a kwarg ussing
the Aggregate object's default alias.
"""
for arg in args:
kwargs[arg.default_alias] = arg
for (alias, aggregate_expr) in kwargs.items():
self.query.add_aggregate(aggregate_expr, self.model, alias,
is_summary=True)
return self.query.get_aggregation()
def count(self): def count(self):
""" """
Performs a SELECT COUNT() and returns the number of records as an Performs a SELECT COUNT() and returns the number of records as an
@ -553,6 +583,25 @@ class QuerySet(object):
""" """
self.query.select_related = other.query.select_related self.query.select_related = other.query.select_related
def annotate(self, *args, **kwargs):
"""
Return a query set in which the returned objects have been annotated
with data aggregated from related fields.
"""
for arg in args:
kwargs[arg.default_alias] = arg
obj = self._clone()
obj._setup_aggregate_query()
# Add the aggregates to the query
for (alias, aggregate_expr) in kwargs.items():
obj.query.add_aggregate(aggregate_expr, self.model, alias,
is_summary=False)
return obj
def order_by(self, *field_names): def order_by(self, *field_names):
""" """
Returns a new QuerySet instance with the ordering changed. Returns a new QuerySet instance with the ordering changed.
@ -641,6 +690,16 @@ class QuerySet(object):
""" """
pass pass
def _setup_aggregate_query(self):
"""
Prepare the query for computing a result that contains aggregate annotations.
"""
opts = self.model._meta
if not self.query.group_by:
field_names = [f.attname for f in opts.fields]
self.query.add_fields(field_names, False)
self.query.set_group_by()
def as_sql(self): def as_sql(self):
""" """
Returns the internal query's SQL and parameters (as a tuple). Returns the internal query's SQL and parameters (as a tuple).
@ -669,6 +728,8 @@ class ValuesQuerySet(QuerySet):
len(self.field_names) != len(self.model._meta.fields)): len(self.field_names) != len(self.model._meta.fields)):
self.query.trim_extra_select(self.extra_names) self.query.trim_extra_select(self.extra_names)
names = self.query.extra_select.keys() + self.field_names names = self.query.extra_select.keys() + self.field_names
names.extend(self.query.aggregate_select.keys())
for row in self.query.results_iter(): for row in self.query.results_iter():
yield dict(zip(names, row)) yield dict(zip(names, row))
@ -682,20 +743,25 @@ class ValuesQuerySet(QuerySet):
""" """
self.query.clear_select_fields() self.query.clear_select_fields()
self.extra_names = [] self.extra_names = []
self.aggregate_names = []
if self._fields: if self._fields:
if not self.query.extra_select: if not self.query.extra_select and not self.query.aggregate_select:
field_names = list(self._fields) field_names = list(self._fields)
else: else:
field_names = [] field_names = []
for f in self._fields: for f in self._fields:
if self.query.extra_select.has_key(f): if self.query.extra_select.has_key(f):
self.extra_names.append(f) self.extra_names.append(f)
elif self.query.aggregate_select.has_key(f):
self.aggregate_names.append(f)
else: else:
field_names.append(f) field_names.append(f)
else: else:
# Default to all fields. # Default to all fields.
field_names = [f.attname for f in self.model._meta.fields] field_names = [f.attname for f in self.model._meta.fields]
self.query.select = []
self.query.add_fields(field_names, False) self.query.add_fields(field_names, False)
self.query.default_cols = False self.query.default_cols = False
self.field_names = field_names self.field_names = field_names
@ -711,6 +777,7 @@ class ValuesQuerySet(QuerySet):
c._fields = self._fields[:] c._fields = self._fields[:]
c.field_names = self.field_names c.field_names = self.field_names
c.extra_names = self.extra_names c.extra_names = self.extra_names
c.aggregate_names = self.aggregate_names
if setup and hasattr(c, '_setup_query'): if setup and hasattr(c, '_setup_query'):
c._setup_query() c._setup_query()
return c return c
@ -718,10 +785,18 @@ class ValuesQuerySet(QuerySet):
def _merge_sanity_check(self, other): def _merge_sanity_check(self, other):
super(ValuesQuerySet, self)._merge_sanity_check(other) super(ValuesQuerySet, self)._merge_sanity_check(other)
if (set(self.extra_names) != set(other.extra_names) or if (set(self.extra_names) != set(other.extra_names) or
set(self.field_names) != set(other.field_names)): set(self.field_names) != set(other.field_names) or
self.aggregate_names != other.aggregate_names):
raise TypeError("Merging '%s' classes must involve the same values in each case." raise TypeError("Merging '%s' classes must involve the same values in each case."
% self.__class__.__name__) % self.__class__.__name__)
def _setup_aggregate_query(self):
"""
Prepare the query for computing a result that contains aggregate annotations.
"""
self.query.set_group_by()
super(ValuesQuerySet, self)._setup_aggregate_query()
class ValuesListQuerySet(ValuesQuerySet): class ValuesListQuerySet(ValuesQuerySet):
def iterator(self): def iterator(self):
@ -729,14 +804,14 @@ class ValuesListQuerySet(ValuesQuerySet):
if self.flat and len(self._fields) == 1: if self.flat and len(self._fields) == 1:
for row in self.query.results_iter(): for row in self.query.results_iter():
yield row[0] yield row[0]
elif not self.query.extra_select: elif not self.query.extra_select and not self.query.aggregate_select:
for row in self.query.results_iter(): for row in self.query.results_iter():
yield tuple(row) yield tuple(row)
else: else:
# When extra(select=...) is involved, the extra cols come are # When extra(select=...) is involved, the extra cols come are
# always at the start of the row, so we need to reorder the fields # always at the start of the row, so we need to reorder the fields
# to match the order in self._fields. # to match the order in self._fields.
names = self.query.extra_select.keys() + self.field_names names = self.query.extra_select.keys() + self.field_names + self.query.aggregate_select.keys()
for row in self.query.results_iter(): for row in self.query.results_iter():
data = dict(zip(names, row)) data = dict(zip(names, row))
yield tuple([data[f] for f in self._fields]) yield tuple([data[f] for f in self._fields])

View File

@ -64,4 +64,3 @@ def select_related_descend(field, restricted, requested):
if not restricted and field.null: if not restricted and field.null:
return False return False
return True return True

View File

@ -0,0 +1,130 @@
"""
Classes to represent the default SQL aggregate functions
"""
class AggregateField(object):
"""An internal field mockup used to identify aggregates in the
data-conversion parts of the database backend.
"""
def __init__(self, internal_type):
self.internal_type = internal_type
def get_internal_type(self):
return self.internal_type
ordinal_aggregate_field = AggregateField('IntegerField')
computed_aggregate_field = AggregateField('FloatField')
class Aggregate(object):
"""
Default SQL Aggregate.
"""
is_ordinal = False
is_computed = False
sql_template = '%(function)s(%(field)s)'
def __init__(self, col, source=None, is_summary=False, **extra):
"""Instantiate an SQL aggregate
* col is a column reference describing the subject field
of the aggregate. It can be an alias, or a tuple describing
a table and column name.
* source is the underlying field or aggregate definition for
the column reference. If the aggregate is not an ordinal or
computed type, this reference is used to determine the coerced
output type of the aggregate.
* extra is a dictionary of additional data to provide for the
aggregate definition
Also utilizes the class variables:
* sql_function, the name of the SQL function that implements the
aggregate.
* sql_template, a template string that is used to render the
aggregate into SQL.
* is_ordinal, a boolean indicating if the output of this aggregate
is an integer (e.g., a count)
* is_computed, a boolean indicating if this output of this aggregate
is a computed float (e.g., an average), regardless of the input
type.
"""
self.col = col
self.source = source
self.is_summary = is_summary
self.extra = extra
# Follow the chain of aggregate sources back until you find an
# actual field, or an aggregate that forces a particular output
# type. This type of this field will be used to coerce values
# retrieved from the database.
tmp = self
while tmp and isinstance(tmp, Aggregate):
if getattr(tmp, 'is_ordinal', False):
tmp = ordinal_aggregate_field
elif getattr(tmp, 'is_computed', False):
tmp = computed_aggregate_field
else:
tmp = tmp.source
self.field = tmp
def relabel_aliases(self, change_map):
if isinstance(self.col, (list, tuple)):
self.col = (change_map.get(self.col[0], self.col[0]), self.col[1])
def as_sql(self, quote_func=None):
"Return the aggregate, rendered as SQL."
if not quote_func:
quote_func = lambda x: x
if hasattr(self.col, 'as_sql'):
field_name = self.col.as_sql(quote_func)
elif isinstance(self.col, (list, tuple)):
field_name = '.'.join([quote_func(c) for c in self.col])
else:
field_name = self.col
params = {
'function': self.sql_function,
'field': field_name
}
params.update(self.extra)
return self.sql_template % params
class Avg(Aggregate):
is_computed = True
sql_function = 'AVG'
class Count(Aggregate):
is_ordinal = True
sql_function = 'COUNT'
sql_template = '%(function)s(%(distinct)s%(field)s)'
def __init__(self, col, distinct=False, **extra):
super(Count, self).__init__(col, distinct=distinct and 'DISTINCT ' or '', **extra)
class Max(Aggregate):
sql_function = 'MAX'
class Min(Aggregate):
sql_function = 'MIN'
class StdDev(Aggregate):
is_computed = True
def __init__(self, col, sample=False, **extra):
super(StdDev, self).__init__(col, **extra)
self.sql_function = sample and 'STDDEV_SAMP' or 'STDDEV_POP'
class Sum(Aggregate):
sql_function = 'SUM'
class Variance(Aggregate):
is_computed = True
def __init__(self, col, sample=False, **extra):
super(Variance, self).__init__(col, **extra)
self.sql_function = sample and 'VAR_SAMP' or 'VAR_POP'

View File

@ -25,59 +25,6 @@ class RawValue(object):
def __init__(self, value): def __init__(self, value):
self.value = value self.value = value
class Aggregate(object):
"""
Base class for all aggregate-related classes (min, max, avg, count, sum).
"""
def relabel_aliases(self, change_map):
"""
Relabel the column alias, if necessary. Must be implemented by
subclasses.
"""
raise NotImplementedError
def as_sql(self, quote_func=None):
"""
Returns the SQL string fragment for this object.
The quote_func function is used to quote the column components. If
None, it defaults to doing nothing.
Must be implemented by subclasses.
"""
raise NotImplementedError
class Count(Aggregate):
"""
Perform a count on the given column.
"""
def __init__(self, col='*', distinct=False):
"""
Set the column to count on (defaults to '*') and set whether the count
should be distinct or not.
"""
self.col = col
self.distinct = distinct
def relabel_aliases(self, change_map):
c = self.col
if isinstance(c, (list, tuple)):
self.col = (change_map.get(c[0], c[0]), c[1])
def as_sql(self, quote_func=None):
if not quote_func:
quote_func = lambda x: x
if isinstance(self.col, (list, tuple)):
col = ('%s.%s' % tuple([quote_func(c) for c in self.col]))
elif hasattr(self.col, 'as_sql'):
col = self.col.as_sql(quote_func)
else:
col = self.col
if self.distinct:
return 'COUNT(DISTINCT %s)' % col
else:
return 'COUNT(%s)' % col
class Date(object): class Date(object):
""" """
Add a date selection column. Add a date selection column.

View File

@ -12,12 +12,13 @@ from copy import deepcopy
from django.utils.tree import Node from django.utils.tree import Node
from django.utils.datastructures import SortedDict from django.utils.datastructures import SortedDict
from django.utils.encoding import force_unicode from django.utils.encoding import force_unicode
from django.db.backends.util import truncate_name
from django.db import connection from django.db import connection
from django.db.models import signals from django.db.models import signals
from django.db.models.fields import FieldDoesNotExist from django.db.models.fields import FieldDoesNotExist
from django.db.models.query_utils import select_related_descend from django.db.models.query_utils import select_related_descend
from django.db.models.sql import aggregates as base_aggregates_module
from django.db.models.sql.where import WhereNode, Constraint, EverythingNode, AND, OR from django.db.models.sql.where import WhereNode, Constraint, EverythingNode, AND, OR
from django.db.models.sql.datastructures import Count
from django.core.exceptions import FieldError from django.core.exceptions import FieldError
from datastructures import EmptyResultSet, Empty, MultiJoin from datastructures import EmptyResultSet, Empty, MultiJoin
from constants import * from constants import *
@ -40,6 +41,7 @@ class BaseQuery(object):
alias_prefix = 'T' alias_prefix = 'T'
query_terms = QUERY_TERMS query_terms = QUERY_TERMS
aggregates_module = base_aggregates_module
def __init__(self, model, connection, where=WhereNode): def __init__(self, model, connection, where=WhereNode):
self.model = model self.model = model
@ -73,6 +75,9 @@ class BaseQuery(object):
self.select_related = False self.select_related = False
self.related_select_cols = [] self.related_select_cols = []
# SQL aggregate-related attributes
self.aggregate_select = SortedDict() # Maps alias -> SQL aggregate function
# Arbitrary maximum limit for select_related. Prevents infinite # Arbitrary maximum limit for select_related. Prevents infinite
# recursion. Can be changed by the depth parameter to select_related(). # recursion. Can be changed by the depth parameter to select_related().
self.max_depth = 5 self.max_depth = 5
@ -178,6 +183,7 @@ class BaseQuery(object):
obj.distinct = self.distinct obj.distinct = self.distinct
obj.select_related = self.select_related obj.select_related = self.select_related
obj.related_select_cols = [] obj.related_select_cols = []
obj.aggregate_select = self.aggregate_select.copy()
obj.max_depth = self.max_depth obj.max_depth = self.max_depth
obj.extra_select = self.extra_select.copy() obj.extra_select = self.extra_select.copy()
obj.extra_tables = self.extra_tables obj.extra_tables = self.extra_tables
@ -194,6 +200,35 @@ class BaseQuery(object):
obj._setup_query() obj._setup_query()
return obj return obj
def convert_values(self, value, field):
"""Convert the database-returned value into a type that is consistent
across database backends.
By default, this defers to the underlying backend operations, but
it can be overridden by Query classes for specific backends.
"""
return self.connection.ops.convert_values(value, field)
def resolve_aggregate(self, value, aggregate):
"""Resolve the value of aggregates returned by the database to
consistent (and reasonable) types.
This is required because of the predisposition of certain backends
to return Decimal and long types when they are not needed.
"""
if value is None:
# Return None as-is
return value
elif aggregate.is_ordinal:
# Any ordinal aggregate (e.g., count) returns an int
return int(value)
elif aggregate.is_computed:
# Any computed aggregate (e.g., avg) returns a float
return float(value)
else:
# Return value depends on the type of the field being processed.
return self.convert_values(value, aggregate.field)
def results_iter(self): def results_iter(self):
""" """
Returns an iterator over the results from executing this query. Returns an iterator over the results from executing this query.
@ -212,29 +247,78 @@ class BaseQuery(object):
else: else:
fields = self.model._meta.fields fields = self.model._meta.fields
row = self.resolve_columns(row, fields) row = self.resolve_columns(row, fields)
if self.aggregate_select:
aggregate_start = len(self.extra_select.keys()) + len(self.select)
row = tuple(row[:aggregate_start]) + tuple([
self.resolve_aggregate(value, aggregate)
for (alias, aggregate), value
in zip(self.aggregate_select.items(), row[aggregate_start:])
])
yield row yield row
def get_aggregation(self):
"""
Returns the dictionary with the values of the existing aggregations.
"""
if not self.aggregate_select:
return {}
# If there is a group by clause, aggregating does not add useful
# information but retrieves only the first row. Aggregate
# over the subquery instead.
if self.group_by:
from subqueries import AggregateQuery
query = AggregateQuery(self.model, self.connection)
obj = self.clone()
# Remove any aggregates marked for reduction from the subquery
# and move them to the outer AggregateQuery.
for alias, aggregate in self.aggregate_select.items():
if aggregate.is_summary:
query.aggregate_select[alias] = aggregate
del obj.aggregate_select[alias]
query.add_subquery(obj)
else:
query = self
self.select = []
self.default_cols = False
self.extra_select = {}
query.clear_ordering(True)
query.clear_limits()
query.select_related = False
query.related_select_cols = []
query.related_select_fields = []
return dict([
(alias, self.resolve_aggregate(val, aggregate))
for (alias, aggregate), val
in zip(query.aggregate_select.items(), query.execute_sql(SINGLE))
])
def get_count(self): def get_count(self):
""" """
Performs a COUNT() query using the current filter constraints. Performs a COUNT() query using the current filter constraints.
""" """
from subqueries import CountQuery
obj = self.clone() obj = self.clone()
obj.clear_ordering(True) if len(self.select) > 1:
obj.clear_limits() # If a select clause exists, then the query has already started to
obj.select_related = False # specify the columns that are to be returned.
obj.related_select_cols = [] # In this case, we need to use a subquery to evaluate the count.
obj.related_select_fields = [] from subqueries import AggregateQuery
if len(obj.select) > 1: subquery = obj
obj = self.clone(CountQuery, _query=obj, where=self.where_class(), subquery.clear_ordering(True)
distinct=False) subquery.clear_limits()
obj.select = []
obj.extra_select = SortedDict() obj = AggregateQuery(obj.model, obj.connection)
obj.add_subquery(subquery)
obj.add_count_column() obj.add_count_column()
data = obj.execute_sql(SINGLE) number = obj.get_aggregation()[None]
if not data:
return 0
number = data[0]
# Apply offset and limit constraints manually, since using LIMIT/OFFSET # Apply offset and limit constraints manually, since using LIMIT/OFFSET
# in SQL (in variants that provide them) doesn't change the COUNT # in SQL (in variants that provide them) doesn't change the COUNT
@ -450,25 +534,41 @@ class BaseQuery(object):
for col in self.select: for col in self.select:
if isinstance(col, (list, tuple)): if isinstance(col, (list, tuple)):
r = '%s.%s' % (qn(col[0]), qn(col[1])) r = '%s.%s' % (qn(col[0]), qn(col[1]))
if with_aliases and col[1] in col_aliases: if with_aliases:
c_alias = 'Col%d' % len(col_aliases) if col[1] in col_aliases:
result.append('%s AS %s' % (r, c_alias)) c_alias = 'Col%d' % len(col_aliases)
aliases.add(c_alias) result.append('%s AS %s' % (r, c_alias))
col_aliases.add(c_alias) aliases.add(c_alias)
col_aliases.add(c_alias)
else:
result.append('%s AS %s' % (r, col[1]))
aliases.add(r)
col_aliases.add(col[1])
else: else:
result.append(r) result.append(r)
aliases.add(r) aliases.add(r)
col_aliases.add(col[1]) col_aliases.add(col[1])
else: else:
result.append(col.as_sql(quote_func=qn)) result.append(col.as_sql(quote_func=qn))
if hasattr(col, 'alias'): if hasattr(col, 'alias'):
aliases.add(col.alias) aliases.add(col.alias)
col_aliases.add(col.alias) col_aliases.add(col.alias)
elif self.default_cols: elif self.default_cols:
cols, new_aliases = self.get_default_columns(with_aliases, cols, new_aliases = self.get_default_columns(with_aliases,
col_aliases) col_aliases)
result.extend(cols) result.extend(cols)
aliases.update(new_aliases) aliases.update(new_aliases)
result.extend([
'%s%s' % (
aggregate.as_sql(quote_func=qn),
alias is not None and ' AS %s' % qn(alias) or ''
)
for alias, aggregate in self.aggregate_select.items()
])
for table, col in self.related_select_cols: for table, col in self.related_select_cols:
r = '%s.%s' % (qn(table), qn(col)) r = '%s.%s' % (qn(table), qn(col))
if with_aliases and col in col_aliases: if with_aliases and col in col_aliases:
@ -538,7 +638,7 @@ class BaseQuery(object):
Returns a list of strings that are joined together to go after the Returns a list of strings that are joined together to go after the
"FROM" part of the query, as well as a list any extra parameters that "FROM" part of the query, as well as a list any extra parameters that
need to be included. Sub-classes, can override this to create a need to be included. Sub-classes, can override this to create a
from-clause via a "select", for example (e.g. CountQuery). from-clause via a "select".
This should only be called after any SQL construction methods that This should only be called after any SQL construction methods that
might change the tables we need. This means the select columns and might change the tables we need. This means the select columns and
@ -635,10 +735,13 @@ class BaseQuery(object):
order = asc order = asc
result.append('%s %s' % (field, order)) result.append('%s %s' % (field, order))
continue continue
col, order = get_order_dir(field, asc)
if col in self.aggregate_select:
result.append('%s %s' % (col, order))
continue
if '.' in field: if '.' in field:
# This came in through an extra(order_by=...) addition. Pass it # This came in through an extra(order_by=...) addition. Pass it
# on verbatim. # on verbatim.
col, order = get_order_dir(field, asc)
table, col = col.split('.', 1) table, col = col.split('.', 1)
if (table, col) not in processed_pairs: if (table, col) not in processed_pairs:
elt = '%s.%s' % (qn(table), col) elt = '%s.%s' % (qn(table), col)
@ -657,7 +760,6 @@ class BaseQuery(object):
ordering_aliases.append(elt) ordering_aliases.append(elt)
result.append('%s %s' % (elt, order)) result.append('%s %s' % (elt, order))
else: else:
col, order = get_order_dir(field, asc)
elt = qn2(col) elt = qn2(col)
if distinct and col not in select_aliases: if distinct and col not in select_aliases:
ordering_aliases.append(elt) ordering_aliases.append(elt)
@ -1068,6 +1170,48 @@ class BaseQuery(object):
self.fill_related_selections(f.rel.to._meta, alias, cur_depth + 1, self.fill_related_selections(f.rel.to._meta, alias, cur_depth + 1,
used, next, restricted, new_nullable, dupe_set, avoid) used, next, restricted, new_nullable, dupe_set, avoid)
def add_aggregate(self, aggregate, model, alias, is_summary):
"""
Adds a single aggregate expression to the Query
"""
opts = model._meta
field_list = aggregate.lookup.split(LOOKUP_SEP)
if (len(field_list) == 1 and
aggregate.lookup in self.aggregate_select.keys()):
# Aggregate is over an annotation
field_name = field_list[0]
col = field_name
source = self.aggregate_select[field_name]
elif (len(field_list) > 1 or
field_list[0] not in [i.name for i in opts.fields]):
field, source, opts, join_list, last, _ = self.setup_joins(
field_list, opts, self.get_initial_alias(), False)
# Process the join chain to see if it can be trimmed
_, _, col, _, join_list = self.trim_joins(source, join_list, last, False)
# If the aggregate references a model or field that requires a join,
# those joins must be LEFT OUTER - empty join rows must be returned
# in order for zeros to be returned for those aggregates.
for column_alias in join_list:
self.promote_alias(column_alias, unconditional=True)
col = (join_list[-1], col)
else:
# Aggregate references a normal field
field_name = field_list[0]
source = opts.get_field(field_name)
if not (self.group_by and is_summary):
# Only use a column alias if this is a
# standalone aggregate, or an annotation
col = (opts.db_table, source.column)
else:
col = field_name
# Add the aggregate to the query
alias = truncate_name(alias, self.connection.ops.max_name_length())
aggregate.add_to_query(self, alias, col=col, source=source, is_summary=is_summary)
def add_filter(self, filter_expr, connector=AND, negate=False, trim=False, def add_filter(self, filter_expr, connector=AND, negate=False, trim=False,
can_reuse=None, process_extras=True): can_reuse=None, process_extras=True):
""" """
@ -1119,6 +1263,11 @@ class BaseQuery(object):
elif callable(value): elif callable(value):
value = value() value = value()
for alias, aggregate in self.aggregate_select.items():
if alias == parts[0]:
self.having.add((aggregate, lookup_type, value), AND)
return
opts = self.get_meta() opts = self.get_meta()
alias = self.get_initial_alias() alias = self.get_initial_alias()
allow_many = trim or not negate allow_many = trim or not negate
@ -1131,38 +1280,9 @@ class BaseQuery(object):
self.split_exclude(filter_expr, LOOKUP_SEP.join(parts[:e.level]), self.split_exclude(filter_expr, LOOKUP_SEP.join(parts[:e.level]),
can_reuse) can_reuse)
return return
final = len(join_list)
penultimate = last.pop()
if penultimate == final:
penultimate = last.pop()
if trim and len(join_list) > 1:
extra = join_list[penultimate:]
join_list = join_list[:penultimate]
final = penultimate
penultimate = last.pop()
col = self.alias_map[extra[0]][LHS_JOIN_COL]
for alias in extra:
self.unref_alias(alias)
else:
col = target.column
alias = join_list[-1]
while final > 1: # Process the join chain to see if it can be trimmed
# An optimization: if the final join is against the same column as final, penultimate, col, alias, join_list = self.trim_joins(target, join_list, last, trim)
# we are comparing against, we can go back one step in the join
# chain and compare against the lhs of the join instead (and then
# repeat the optimization). The result, potentially, involves less
# table joins.
join = self.alias_map[alias]
if col != join[RHS_JOIN_COL]:
break
self.unref_alias(alias)
alias = join[LHS_ALIAS]
col = join[LHS_JOIN_COL]
join_list = join_list[:-1]
final -= 1
if final == penultimate:
penultimate = last.pop()
if (lookup_type == 'isnull' and value is True and not negate and if (lookup_type == 'isnull' and value is True and not negate and
final > 1): final > 1):
@ -1313,7 +1433,7 @@ class BaseQuery(object):
field, model, direct, m2m = opts.get_field_by_name(f.name) field, model, direct, m2m = opts.get_field_by_name(f.name)
break break
else: else:
names = opts.get_all_field_names() names = opts.get_all_field_names() + self.aggregate_select.keys()
raise FieldError("Cannot resolve keyword %r into field. " raise FieldError("Cannot resolve keyword %r into field. "
"Choices are: %s" % (name, ", ".join(names))) "Choices are: %s" % (name, ", ".join(names)))
@ -1462,6 +1582,43 @@ class BaseQuery(object):
return field, target, opts, joins, last, extra_filters return field, target, opts, joins, last, extra_filters
def trim_joins(self, target, join_list, last, trim):
"""An optimization: if the final join is against the same column as
we are comparing against, we can go back one step in a join
chain and compare against the LHS of the join instead (and then
repeat the optimization). The result, potentially, involves less
table joins.
Returns a tuple
"""
final = len(join_list)
penultimate = last.pop()
if penultimate == final:
penultimate = last.pop()
if trim and len(join_list) > 1:
extra = join_list[penultimate:]
join_list = join_list[:penultimate]
final = penultimate
penultimate = last.pop()
col = self.alias_map[extra[0]][LHS_JOIN_COL]
for alias in extra:
self.unref_alias(alias)
else:
col = target.column
alias = join_list[-1]
while final > 1:
join = self.alias_map[alias]
if col != join[RHS_JOIN_COL]:
break
self.unref_alias(alias)
alias = join[LHS_ALIAS]
col = join[LHS_JOIN_COL]
join_list = join_list[:-1]
final -= 1
if final == penultimate:
penultimate = last.pop()
return final, penultimate, col, alias, join_list
def update_dupe_avoidance(self, opts, col, alias): def update_dupe_avoidance(self, opts, col, alias):
""" """
For a column that is one of multiple pointing to the same table, update For a column that is one of multiple pointing to the same table, update
@ -1554,6 +1711,7 @@ class BaseQuery(object):
""" """
alias = self.get_initial_alias() alias = self.get_initial_alias()
opts = self.get_meta() opts = self.get_meta()
try: try:
for name in field_names: for name in field_names:
field, target, u2, joins, u3, u4 = self.setup_joins( field, target, u2, joins, u3, u4 = self.setup_joins(
@ -1574,7 +1732,7 @@ class BaseQuery(object):
except MultiJoin: except MultiJoin:
raise FieldError("Invalid field name: '%s'" % name) raise FieldError("Invalid field name: '%s'" % name)
except FieldError: except FieldError:
names = opts.get_all_field_names() + self.extra_select.keys() names = opts.get_all_field_names() + self.extra_select.keys() + self.aggregate_select.keys()
names.sort() names.sort()
raise FieldError("Cannot resolve keyword %r into field. " raise FieldError("Cannot resolve keyword %r into field. "
"Choices are: %s" % (name, ", ".join(names))) "Choices are: %s" % (name, ", ".join(names)))
@ -1609,38 +1767,52 @@ class BaseQuery(object):
if force_empty: if force_empty:
self.default_ordering = False self.default_ordering = False
def set_group_by(self):
"""
Expands the GROUP BY clause required by the query.
This will usually be the set of all non-aggregate fields in the
return data. If the database backend supports grouping by the
primary key, and the query would be equivalent, the optimization
will be made automatically.
"""
if self.connection.features.allows_group_by_pk:
if len(self.select) == len(self.model._meta.fields):
self.group_by.append('.'.join([self.model._meta.db_table,
self.model._meta.pk.column]))
return
for sel in self.select:
self.group_by.append(sel)
def add_count_column(self): def add_count_column(self):
""" """
Converts the query to do count(...) or count(distinct(pk)) in order to Converts the query to do count(...) or count(distinct(pk)) in order to
get its size. get its size.
""" """
# TODO: When group_by support is added, this needs to be adjusted so
# that it doesn't totally overwrite the select list.
if not self.distinct: if not self.distinct:
if not self.select: if not self.select:
select = Count() count = self.aggregates_module.Count('*', is_summary=True)
else: else:
assert len(self.select) == 1, \ assert len(self.select) == 1, \
"Cannot add count col with multiple cols in 'select': %r" % self.select "Cannot add count col with multiple cols in 'select': %r" % self.select
select = Count(self.select[0]) count = self.aggregates_module.Count(self.select[0])
else: else:
opts = self.model._meta opts = self.model._meta
if not self.select: if not self.select:
select = Count((self.join((None, opts.db_table, None, None)), count = self.aggregates_module.Count((self.join((None, opts.db_table, None, None)), opts.pk.column),
opts.pk.column), True) is_summary=True, distinct=True)
else: else:
# Because of SQL portability issues, multi-column, distinct # Because of SQL portability issues, multi-column, distinct
# counts need a sub-query -- see get_count() for details. # counts need a sub-query -- see get_count() for details.
assert len(self.select) == 1, \ assert len(self.select) == 1, \
"Cannot add count col with multiple cols in 'select'." "Cannot add count col with multiple cols in 'select'."
select = Count(self.select[0], True)
count = self.aggregates_module.Count(self.select[0], distinct=True)
# Distinct handling is done in Count(), so don't do it at this # Distinct handling is done in Count(), so don't do it at this
# level. # level.
self.distinct = False self.distinct = False
self.select = [select] self.aggregate_select = {None: count}
self.select_fields = [None]
self.extra_select = {}
def add_select_related(self, fields): def add_select_related(self, fields):
""" """
@ -1758,7 +1930,6 @@ class BaseQuery(object):
return empty_iter() return empty_iter()
else: else:
return return
cursor = self.connection.cursor() cursor = self.connection.cursor()
cursor.execute(sql, params) cursor.execute(sql, params)

View File

@ -9,7 +9,7 @@ from django.db.models.sql.query import Query
from django.db.models.sql.where import AND, Constraint from django.db.models.sql.where import AND, Constraint
__all__ = ['DeleteQuery', 'UpdateQuery', 'InsertQuery', 'DateQuery', __all__ = ['DeleteQuery', 'UpdateQuery', 'InsertQuery', 'DateQuery',
'CountQuery'] 'AggregateQuery']
class DeleteQuery(Query): class DeleteQuery(Query):
""" """
@ -400,15 +400,25 @@ class DateQuery(Query):
self.distinct = True self.distinct = True
self.order_by = order == 'ASC' and [1] or [-1] self.order_by = order == 'ASC' and [1] or [-1]
class CountQuery(Query): class AggregateQuery(Query):
""" """
A CountQuery knows how to take a normal query which would select over An AggregateQuery takes another query as a parameter to the FROM
multiple distinct columns and turn it into SQL that can be used on a clause and only selects the elements in the provided list.
variety of backends (it requires a select in the FROM clause).
""" """
def get_from_clause(self): def add_subquery(self, query):
result, params = self._query.as_sql() self.subquery, self.sub_params = query.as_sql(with_col_aliases=True)
return ['(%s) A1' % result], params
def get_ordering(self): def as_sql(self, quote_func=None):
return () """
Creates the SQL for this query. Returns the SQL string and list of
parameters.
"""
sql = ('SELECT %s FROM (%s) subquery' % (
', '.join([
aggregate.as_sql()
for aggregate in self.aggregate_select.values()
]),
self.subquery)
)
params = self.sub_params
return (sql, params)

View File

@ -14,6 +14,7 @@ from django.test.client import Client
from django.utils import simplejson from django.utils import simplejson
normalize_long_ints = lambda s: re.sub(r'(?<![\w])(\d+)L(?![\w])', '\\1', s) normalize_long_ints = lambda s: re.sub(r'(?<![\w])(\d+)L(?![\w])', '\\1', s)
normalize_decimals = lambda s: re.sub(r"Decimal\('(\d+(\.\d*)?)'\)", lambda m: "Decimal(\"%s\")" % m.groups()[0], s)
def to_list(value): def to_list(value):
""" """
@ -31,7 +32,7 @@ class OutputChecker(doctest.OutputChecker):
def check_output(self, want, got, optionflags): def check_output(self, want, got, optionflags):
"The entry method for doctest output checking. Defers to a sequence of child checkers" "The entry method for doctest output checking. Defers to a sequence of child checkers"
checks = (self.check_output_default, checks = (self.check_output_default,
self.check_output_long, self.check_output_numeric,
self.check_output_xml, self.check_output_xml,
self.check_output_json) self.check_output_json)
for check in checks: for check in checks:
@ -43,13 +44,17 @@ class OutputChecker(doctest.OutputChecker):
"The default comparator provided by doctest - not perfect, but good for most purposes" "The default comparator provided by doctest - not perfect, but good for most purposes"
return doctest.OutputChecker.check_output(self, want, got, optionflags) return doctest.OutputChecker.check_output(self, want, got, optionflags)
def check_output_long(self, want, got, optionflags): def check_output_numeric(self, want, got, optionflags):
"""Doctest does an exact string comparison of output, which means long """Doctest does an exact string comparison of output, which means that
integers aren't equal to normal integers ("22L" vs. "22"). The some numerically equivalent values aren't equal. This check normalizes
following code normalizes long integers so that they equal normal * long integers (22L) so that they equal normal integers. (22)
integers. * Decimals so that they are comparable, regardless of the change
made to __repr__ in Python 2.6.
""" """
return normalize_long_ints(want) == normalize_long_ints(got) return doctest.OutputChecker.check_output(self,
normalize_decimals(normalize_long_ints(want)),
normalize_decimals(normalize_long_ints(got)),
optionflags)
def check_output_xml(self, want, got, optionsflags): def check_output_xml(self, want, got, optionsflags):
"""Tries to do a 'xml-comparision' of want and got. Plain string """Tries to do a 'xml-comparision' of want and got. Plain string

View File

@ -42,7 +42,7 @@ The model layer
* **Models:** :ref:`Model syntax <topics-db-models>` | :ref:`Field types <ref-models-fields>` | :ref:`Meta options <ref-models-options>` * **Models:** :ref:`Model syntax <topics-db-models>` | :ref:`Field types <ref-models-fields>` | :ref:`Meta options <ref-models-options>`
* **QuerySets:** :ref:`Executing queries <topics-db-queries>` | :ref:`QuerySet method reference <ref-models-querysets>` * **QuerySets:** :ref:`Executing queries <topics-db-queries>` | :ref:`QuerySet method reference <ref-models-querysets>`
* **Model instances:** :ref:`Instance methods <ref-models-instances>` | :ref:`Accessing related objects <ref-models-relations>` * **Model instances:** :ref:`Instance methods <ref-models-instances>` | :ref:`Accessing related objects <ref-models-relations>`
* **Advanced:** :ref:`Managers <topics-db-managers>` | :ref:`Raw SQL <topics-db-sql>` | :ref:`Transactions <topics-db-transactions>` | :ref:`Custom fields <howto-custom-model-fields>` * **Advanced:** :ref:`Managers <topics-db-managers>` | :ref:`Raw SQL <topics-db-sql>` | :ref:`Transactions <topics-db-transactions>` | :ref:`Aggregation <topics-db-aggregation>` | :ref:`Custom fields <howto-custom-model-fields>`
* **Other:** :ref:`Supported databases <ref-databases>` | :ref:`Legacy databases <howto-legacy-databases>` | :ref:`Providing initial data <howto-initial-data>` * **Other:** :ref:`Supported databases <ref-databases>` | :ref:`Legacy databases <howto-legacy-databases>` | :ref:`Providing initial data <howto-initial-data>`
The template layer The template layer

View File

@ -158,6 +158,48 @@ In SQL terms, that evaluates to::
Note the second example is more restrictive. Note the second example is more restrictive.
``annotate(*args, **kwargs)``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. versionadded:: 1.1
Annotates each object in the ``QuerySet`` with the provided list of
aggregate values (averages, sums, etc) that have been computed over
the objects that are related to the objects in the ``QuerySet``.
Each argument to ``annotate()`` is an annotation that will be added
to each object in the ``QuerySet`` that is returned.
The aggregation functions that are provided by Django are described
in `Aggregation Functions`_ below.
Annotations specified using keyword arguments will use the keyword as
the alias for the annotation. Anonymous arguments will have an alias
generated for them based upon the name of the aggregate function and
the model field that is being aggregated.
For example, if you were manipulating a list of blogs, you may want
to determine how many entries have been made in each blog::
>>> q = Blog.objects.annotate(Count('entry'))
# The name of the first blog
>>> q[0].name
'Blogasaurus'
# The number of entries on the first blog
>>> q[0].entry__count
42
The ``Blog`` model doesn't define an ``entry_count`` attribute by itself,
but by using a keyword argument to specify the aggregate function, you can
control the name of the annotation::
>>> q = Blog.objects.annotate(number_of_entries=Count('entry'))
# The number of entries on the first blog, using the name provided
>>> q[0].number_of_entries
42
For an in-depth discussion of aggregation, see :ref:`the topic guide on
Aggregation <topics-db-aggregation>`.
``order_by(*fields)`` ``order_by(*fields)``
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
@ -931,6 +973,38 @@ exist with the given parameters.
Note ``latest()`` exists purely for convenience and readability. Note ``latest()`` exists purely for convenience and readability.
``aggregate(*args, **kwargs)``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. versionadded:: 1.1
Returns a dictionary of aggregate values (averages, sums, etc) calculated
over the ``QuerySet``. Each argument to ``aggregate()`` specifies
a value that will be included in the dictionary that is returned.
The aggregation functions that are provided by Django are described
in `Aggregation Functions`_ below.
Aggregates specified using keyword arguments will use the keyword as
the name for the annotation. Anonymous arguments will have an name
generated for them based upon the name of the aggregate function and
the model field that is being aggregated.
For example, if you were manipulating blog entries, you may want to know
the average number of authors contributing to blog entries::
>>> q = Blog.objects.aggregate(Count('entry'))
{'entry__count': 16}
By using a keyword argument to specify the aggregate function, you can
control the name of the aggregation value that is returned::
>>> q = Blog.objects.aggregate(number_of_entries=Count('entry'))
{'number_of_entries': 2.34}
For an in-depth discussion of aggregation, see :ref:`the topic guide on
Aggregation <topics-db-aggregation>`.
.. _field-lookups: .. _field-lookups:
Field lookups Field lookups
@ -1326,3 +1400,115 @@ SQL equivalents::
SELECT ... WHERE title REGEXP '(?i)^(an?|the) +'; -- SQLite SELECT ... WHERE title REGEXP '(?i)^(an?|the) +'; -- SQLite
.. _aggregation-functions:
Aggregation Functions
---------------------
.. versionadded:: 1.1
Django provides the following aggregation functions in the
``django.db.models`` module.
``Avg``
~~~~~~~
.. class:: Avg(field)
Returns the mean value of the given field.
* Default alias: ``<field>__avg``
* Return type: float
``Count``
~~~~~~~~~
.. class:: Count(field, distinct=False)
Returns the number of objects that are related through the provided field.
* Default alias: ``<field>__count``
* Return type: integer
Has one optional argument:
.. attribute:: distinct
If distinct=True, the count will only include unique instances. This has
the SQL equivalent of ``COUNT(DISTINCT field)``. Default value is ``False``.
``Max``
~~~~~~~
.. class:: Max(field)
Returns the maximum value of the given field.
* Default alias: ``<field>__max``
* Return type: same as input field
``Min``
~~~~~~~
.. class:: Min(field)
Returns the minimum value of the given field.
* Default alias: ``<field>__min``
* Return type: same as input field
``StdDev``
~~~~~~~~~
.. class:: StdDev(field, sample=False)
Returns the standard deviation of the data in the provided field.
* Default alias: ``<field>__stddev``
* Return type: float
Has one optional argument:
.. attribute:: sample
By default, ``StdDev`` returns the population standard deviation. However,
if ``sample=True``, the return value will be the sample standard deviation.
.. admonition:: SQLite
SQLite doesn't provide ``StdDev`` out of the box. An implementation is
available as an extension module for SQLite. Consult the SQlite
documentation for instructions on obtaining and installing this extension.
``Sum``
~~~~~~~
.. class:: Sum(field)
Computes the sum of all values of the given field.
* Default alias: ``<field>__sum``
* Return type: same as input field
``Variance``
~~~~~~~~~
.. class:: Variance(field, sample=False)
Returns the variance of the data in the provided field.
* Default alias: ``<field>__variance``
* Return type: float
Has one optional argument:
.. attribute:: sample
By default, ``Variance`` returns the population variance. However,
if ``sample=True``, the return value will be the sample variance.
.. admonition:: SQLite
SQLite doesn't provide ``Variance`` out of the box. An implementation is
available as an extension module for SQLite. Consult the SQlite
documentation for instructions on obtaining and installing this extension.

View File

@ -0,0 +1,323 @@
.. _topics-db-aggregation:
=============
Aggregation
=============
.. versionadded:: 1.1
.. currentmodule:: django.db.models
The topic guide on :ref:`Django's database-abstraction API <topics-db-queries`
described the way that you can use Django queries that create,
retrieve, update and delete individual objects. However, sometimes you will
need to retrieve values that are derived by summarizing or *aggregating* a
collection of objects. This topic guide describes the ways that aggregate values
can be generated and returned using Django queries.
Throughout this guide, we'll refer to the following models. These models are
used to track the inventory for a series of online bookstores:
.. _queryset-model-example:
.. code-block:: python
class Author(models.Model):
name = models.CharField(max_length=100)
age = models.IntegerField()
friends = models.ManyToManyField('self', blank=True)
class Publisher(models.Model):
name = models.CharField(max_length=300)
num_awards = models.IntegerField()
class Book(models.Model):
isbn = models.CharField(max_length=9)
name = models.CharField(max_length=300)
pages = models.IntegerField()
price = models.DecimalField(max_digits=10, decimal_places=2)
rating = models.FloatField()
authors = models.ManyToManyField(Author)
publisher = models.ForeignKey(Publisher)
pubdate = models.DateField
class Store(models.Model):
name = models.CharField(max_length=300)
books = models.ManyToManyField(Book)
Generating aggregates over a QuerySet
=====================================
Django provides two ways to generate aggregates. The first way is to generate
summary values over an entire ``QuerySet``. For example, say you wanted to
calculate the average price of all books available for sale. Django's query
syntax provides a means for describing the set of all books::
>>> Book.objects.all()
What we need is a way to calculate summary values over the objects that
belong to this ``QuerySet``. This is done by appending an ``aggregate()``
clause onto the ``QuerySet``::
>>> from django.db.models import Avg
>>> Book.objects.all().aggregate(Avg('price'))
{'price__avg': 34.35}
The ``all()`` is redundant in this example, so this could be simplified to::
>>> Book.objects.aggregate(Avg('price'))
{'price__avg': 34.35}
The argument to the ``aggregate()`` clause describes the aggregate value that
we want to compute - in this case, the average of the ``price`` field on the
``Book`` model. A list of the aggregate functions that are available can be
found in the :ref:`QuerySet reference <aggregation-functions>`.
``aggregate()`` is a terminal clause for a ``QuerySet`` that, when invoked,
returns a dictionary of name-value pairs. The name is an identifier for the
aggregate value; the value is the computed aggregate. The name is
automatically generated from the name of the field and the aggregate function.
If you want to manually specify a name for the aggregate value, you can do so
by providing that name when you specify the aggregate clause::
>>> Book.objects.aggregate(average_price=Avg('price'))
{'average_price': 34.35}
If you want to generate more than one aggregate, you just add another
argument to the ``aggregate()`` clause. So, if we also wanted to know
the maximum and minimum price of all books, we would issue the query::
>>> Book.objects.aggregate(Avg('price'), Max('price'), Min('price'))
{'price__avg': 34.35, 'price__max': Decimal('81.20'), 'price__min': Decimal('12.99')}
Generating aggregates for each item in a QuerySet
=================================================
The second way to generate summary values is to generate an independent
summary for each object in a ``Queryset``. For example, if you are retrieving
a list of books, you may want to know how many authors contributed to
each book. Each Book has a many-to-many relationship with the Author; we
want to summarize this relationship for each book in the ``QuerySet``.
Per-object summaries can be generated using the ``annotate()`` clause.
When an ``annotate()`` clause is specified, each object in the ``QuerySet``
will be annotated with the specified values.
The syntax for these annotations is identical to that used for the
``aggregate()`` clause. Each argument to ``annotate()`` describes and
aggregate that is to be calculated. For example, to annotate Books with
the number of authors::
# Build an annotated queryset
>>> q = Book.objects.annotate(Count('authors'))
# Interrogate the first object in the queryset
>>> q[0]
<Book: The Definitive Guide to Django>
>>> q[0].authors__count
2
# Interrogate the second object in the queryset
>>> q[1]
<Book: Practical Django Projects>
>>> q[1].authors__count
1
As with ``aggregate()``, the name for the annotation is automatically derived
from the name of the aggregate function and the name of the field being
aggregated. You can override this default name by providing an alias when you
specify the annotation::
>>> q = Book.objects.annotate(num_authors=Count('authors'))
>>> q[0].num_authors
2
>>> q[1].num_authors
1
Unlike ``aggregate()``, ``annotate()`` is *not* a terminal clause. The output
of the ``annotate()`` clause is a ``QuerySet``; this ``QuerySet`` can be
modified using any other ``QuerySet`` operation, including ``filter()``,
``order_by``, or even additional calls to ``annotate()``.
Joins and aggregates
====================
So far, we have dealt with aggregates over fields that belong to the
model being queries. However, sometimes the value you want to aggregate
will belong to a model that is related to the model you are querying.
When specifying the field to be aggregated in an aggregate functions,
Django will allow you to use the same
:ref:`double underscore notation <field-lookups-intro>` that is used
when referring to related fields in filters. Django will then handle
any table joins that are required to retrieve and aggregate the
related value.
For example, to find the price range of books offered in each store,
you could use the annotation::
>>> Store.objects.annotate(min_price=Min('books__price'), max_price=Min('books__price'))
This tells Django to retrieve the Store model, join (through the
many-to-many relationship) with the Book model, and aggregate on the
price field of the book model to produce a minimum and maximum value.
The same rules apply to the ``aggregate()`` clause. If you wanted to
know the lowest and highest price of any book that is available for sale
in a store, you could use the aggregate::
>>> Store.objects.aggregate(min_price=Min('books__price'), max_price=Min('books__price'))
Join chains can be as deep as you required. For example, to extract the
age of the youngest author of any book available for sale, you could
issue the query::
>>> Store.objects.aggregate(youngest_age=Min('books__authors__age'))
Aggregations and other QuerySet clauses
=======================================
``filter()`` and ``exclude()``
------------------------------
Aggregates can also participate in filters. Any ``filter()`` (or
``exclude()``) applied to normal model fields will have the effect of
constraining the objects that are considered for aggregation.
When used with an ``annotate()`` clause, a filter has the effect of
constraining the objects for which an annotation is calculated. For example,
you can generate an annotated list of all books that have a title starting
with "Django" using the query::
>>> Book.objects.filter(name__startswith="Django").annotate(num_authors=Count('authors'))
When used with an ``aggregate()`` clause, a filter has the effect of
constraining the objects over which the aggregate is calculated.
For example, you can generate the average price of all books with a
title that starts with "Django" using the query::
>>> Book.objects.filter(name__startswith="Django").aggregate(Avg('price'))
Filtering on annotations
~~~~~~~~~~~~~~~~~~~~~~~~
Annotated values can also be filtered. The alias for the annotation can be
used in ``filter()`` and ``exclude()`` clauses in the same way as any other
model field.
For example, to generate a list of books that have more than one author,
you can issue the query::
>>> Book.objects.annotate(num_authors=Count('authors')).filter(num_authors__gt=1)
This query generates an annotated result set, and then generates a filter
based upon that annotation.
Order of ``annotate()`` and ``filter()`` clauses
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When developing a complex query that involves both ``annotate()`` and
``filter()`` clauses, particular attention should be paid to the order
in which the clauses are applied to the ``QuerySet``.
When an ``annotate()`` clause is applied to a query, the annotation is
computed over the state of the query up to the point where the annotation
is requested. The practical implication of this is that ``filter()`` and
``annotate()`` are not transitive operations -- that is, there is a
difference between the query::
>>> Publisher.objects.annotate(num_books=Count('book')).filter(book__rating__gt=3.0)
and the query::
>>> Publisher.objects.filter(book__rating__gt=3.0).annotate(num_books=Count('book'))
Both queries will return a list of Publishers that have at least one good
book (i.e., a book with a rating exceeding 3.0). However, the annotation in
the first query will provide the total number of all books published by the
publisher; the second query will only include good books in the annotated
count. In the first query, the annotation precedes the filter, so the
filter has no effect on the annotation. In the second query, the filter
preceeds the annotation, and as a result, the filter constrains the objects
considered when calculating the annotation.
``order_by()``
--------------
Annotations can be used as a basis for ordering. When you
define an ``order_by()`` clause, the aggregates you provide can reference
any alias defined as part of an ``annotate()`` clause in the query.
For example, to order a ``QuerySet`` of books by the number of authors
that have contributed to the book, you could use the following query::
>>> Book.objects.annotate(num_authors=Count('authors')).order_by('num_authors')
``values()``
------------
Ordinarily, annotations are generated on a per-object basis - an annotated
``QuerySet`` will return one result for each object in the original
``Queryset``. However, when a ``values()`` clause is used to constrain the
columns that are returned in the result set, the method for evaluating
annotations is slightly different. Instead of returning an annotated result
for each result in the original ``QuerySet``, the original results are
grouped according to the unique combinations of the fields specified in the
``values()`` clause. An annotation is then provided for each unique group;
the annotation is computed over all members of the group.
For example, consider an author query that attempts to find out the average
rating of books written by each author:
>>> Author.objects.annotate(average_rating=Avg('book_rating'))
This will return one result for each author in the database, annotate with
their average book rating.
However, the result will be slightly different if you use a ``values()`` clause::
>>> Author.objects.values('name').annotate(average_rating=Avg('book_rating'))
In this example, the authors will be grouped by name, so you will only get
an annotated result for each *unique* author name. This means if you have
two authors with the same name, their results will be merged into a single
result in the output of the query; the average will be computed as the
average over the books written by both authors.
The annotation name will be added to the fields returned
as part of the ``ValuesQuerySet``.
Order of ``annotate()`` and ``filter()`` clauses
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
As with the ``filter()`` clause, the order in which ``annotate()`` and
``values()`` clauses are applied to a query is significant. If the
``values()`` clause precedes the ``annotate()``, the annotation will be
computed using the grouping described by the ``values()`` clause.
However, if the ``annotate()`` clause precedes the ``values()`` clause,
the annotations will be generated over the entire query set. In this case,
the ``values()`` clause only constrains the fields that are generated on
output.
For example, if we reverse the order of the ``values()`` and ``annotate()``
clause from our previous example::
>>> Author.objects.annotate(average_rating=Avg('book_rating')).values('name')
This will now yield one unique result for each author; however, only
the author's name and the ``average_rating`` annotation will be returned
in the output data.
Aggregating annotations
-----------------------
You can also generate an aggregate on the result of an annotation. When you
define an ``aggregate()`` clause, the aggregates you provide can reference
any alias defined as part of an ``annotate()`` clause in the query.
For example, if you wanted to calculate the average number of authors per
book you first annotate the set of books with the author count, then
aggregate that author count, referencing the annotation field::
>>> Book.objects.annotate(num_authors=Count('authors')).aggregate(Avg('num_authors'))
{'num_authors__avg': 1.66}

View File

@ -12,6 +12,7 @@ model maps to a single database table.
models models
queries queries
aggregation
managers managers
sql sql
transactions transactions

View File

View File

@ -0,0 +1,229 @@
[
{
"pk": 1,
"model": "aggregation.publisher",
"fields": {
"name": "Apress",
"num_awards": 3
}
},
{
"pk": 2,
"model": "aggregation.publisher",
"fields": {
"name": "Sams",
"num_awards": 1
}
},
{
"pk": 3,
"model": "aggregation.publisher",
"fields": {
"name": "Prentice Hall",
"num_awards": 7
}
},
{
"pk": 4,
"model": "aggregation.publisher",
"fields": {
"name": "Morgan Kaufmann",
"num_awards": 9
}
},
{
"pk": 1,
"model": "aggregation.book",
"fields": {
"publisher": 1,
"isbn": "159059725",
"name": "The Definitive Guide to Django: Web Development Done Right",
"price": "30.00",
"rating": 4.5,
"authors": [1, 2],
"pages": 447,
"pubdate": "2007-12-6"
}
},
{
"pk": 2,
"model": "aggregation.book",
"fields": {
"publisher": 2,
"isbn": "067232959",
"name": "Sams Teach Yourself Django in 24 Hours",
"price": "23.09",
"rating": 3.0,
"authors": [3],
"pages": 528,
"pubdate": "2008-3-3"
}
},
{
"pk": 3,
"model": "aggregation.book",
"fields": {
"publisher": 1,
"isbn": "159059996",
"name": "Practical Django Projects",
"price": "29.69",
"rating": 4.0,
"authors": [4],
"pages": 300,
"pubdate": "2008-6-23"
}
},
{
"pk": 4,
"model": "aggregation.book",
"fields": {
"publisher": 3,
"isbn": "013235613",
"name": "Python Web Development with Django",
"price": "29.69",
"rating": 4.0,
"authors": [5, 6, 7],
"pages": 350,
"pubdate": "2008-11-3"
}
},
{
"pk": 5,
"model": "aggregation.book",
"fields": {
"publisher": 3,
"isbn": "013790395",
"name": "Artificial Intelligence: A Modern Approach",
"price": "82.80",
"rating": 4.0,
"authors": [8, 9],
"pages": 1132,
"pubdate": "1995-1-15"
}
},
{
"pk": 6,
"model": "aggregation.book",
"fields": {
"publisher": 4,
"isbn": "155860191",
"name": "Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp",
"price": "75.00",
"rating": 5.0,
"authors": [8],
"pages": 946,
"pubdate": "1991-10-15"
}
},
{
"pk": 1,
"model": "aggregation.store",
"fields": {
"books": [1, 2, 3, 4, 5, 6],
"name": "Amazon.com",
"original_opening": "1994-4-23 9:17:42",
"friday_night_closing": "23:59:59"
}
},
{
"pk": 2,
"model": "aggregation.store",
"fields": {
"books": [1, 3, 5, 6],
"name": "Books.com",
"original_opening": "2001-3-15 11:23:37",
"friday_night_closing": "23:59:59"
}
},
{
"pk": 3,
"model": "aggregation.store",
"fields": {
"books": [3, 4, 6],
"name": "Mamma and Pappa's Books",
"original_opening": "1945-4-25 16:24:14",
"friday_night_closing": "21:30:00"
}
},
{
"pk": 1,
"model": "aggregation.author",
"fields": {
"age": 34,
"friends": [2, 4],
"name": "Adrian Holovaty"
}
},
{
"pk": 2,
"model": "aggregation.author",
"fields": {
"age": 35,
"friends": [1, 7],
"name": "Jacob Kaplan-Moss"
}
},
{
"pk": 3,
"model": "aggregation.author",
"fields": {
"age": 45,
"friends": [],
"name": "Brad Dayley"
}
},
{
"pk": 4,
"model": "aggregation.author",
"fields": {
"age": 29,
"friends": [1],
"name": "James Bennett"
}
},
{
"pk": 5,
"model": "aggregation.author",
"fields": {
"age": 37,
"friends": [6, 7],
"name": "Jeffrey Forcier "
}
},
{
"pk": 6,
"model": "aggregation.author",
"fields": {
"age": 29,
"friends": [5, 7],
"name": "Paul Bissex"
}
},
{
"pk": 7,
"model": "aggregation.author",
"fields": {
"age": 25,
"friends": [2, 5, 6],
"name": "Wesley J. Chun"
}
},
{
"pk": 8,
"model": "aggregation.author",
"fields": {
"age": 57,
"friends": [9],
"name": "Peter Norvig"
}
},
{
"pk": 9,
"model": "aggregation.author",
"fields": {
"age": 46,
"friends": [8],
"name": "Stuart Russell"
}
}
]

View File

@ -0,0 +1,379 @@
# coding: utf-8
from django.db import models
try:
sorted
except NameError:
from django.utils.itercompat import sorted # For Python 2.3
class Author(models.Model):
name = models.CharField(max_length=100)
age = models.IntegerField()
friends = models.ManyToManyField('self', blank=True)
def __unicode__(self):
return self.name
class Publisher(models.Model):
name = models.CharField(max_length=300)
num_awards = models.IntegerField()
def __unicode__(self):
return self.name
class Book(models.Model):
isbn = models.CharField(max_length=9)
name = models.CharField(max_length=300)
pages = models.IntegerField()
rating = models.FloatField()
price = models.DecimalField(decimal_places=2, max_digits=6)
authors = models.ManyToManyField(Author)
publisher = models.ForeignKey(Publisher)
pubdate = models.DateField()
def __unicode__(self):
return self.name
class Store(models.Model):
name = models.CharField(max_length=300)
books = models.ManyToManyField(Book)
original_opening = models.DateTimeField()
friday_night_closing = models.TimeField()
def __unicode__(self):
return self.name
class Entries(models.Model):
EntryID = models.AutoField(primary_key=True, db_column='Entry ID')
Entry = models.CharField(unique=True, max_length=50)
Exclude = models.BooleanField()
class Clues(models.Model):
ID = models.AutoField(primary_key=True)
EntryID = models.ForeignKey(Entries, verbose_name='Entry', db_column = 'Entry ID')
Clue = models.CharField(max_length=150)
# Tests on 'aggergate'
# Different backends and numbers.
__test__ = {'API_TESTS': """
>>> from django.core import management
>>> try:
... from decimal import Decimal
... except:
... from django.utils._decimal import Decimal
>>> from datetime import date
# Reset the database representation of this app.
# This will return the database to a clean initial state.
>>> management.call_command('flush', verbosity=0, interactive=False)
# Empty Call - request nothing, get nothing.
>>> Author.objects.all().aggregate()
{}
>>> from django.db.models import Avg, Sum, Count, Max, Min
# Single model aggregation
#
# Single aggregate
# Average age of Authors
>>> Author.objects.all().aggregate(Avg('age'))
{'age__avg': 37.4...}
# Multiple aggregates
# Average and Sum of Author ages
>>> Author.objects.all().aggregate(Sum('age'), Avg('age'))
{'age__sum': 337, 'age__avg': 37.4...}
# Aggreates interact with filters, and only
# generate aggregate values for the filtered values
# Sum of the age of those older than 29 years old
>>> Author.objects.all().filter(age__gt=29).aggregate(Sum('age'))
{'age__sum': 254}
# Depth-1 Joins
#
# On Relationships with self
# Average age of the friends of each author
>>> Author.objects.all().aggregate(Avg('friends__age'))
{'friends__age__avg': 34.07...}
# On ManyToMany Relationships
#
# Forward
# Average age of the Authors of Books with a rating of less than 4.5
>>> Book.objects.all().filter(rating__lt=4.5).aggregate(Avg('authors__age'))
{'authors__age__avg': 38.2...}
# Backward
# Average rating of the Books whose Author's name contains the letter 'a'
>>> Author.objects.all().filter(name__contains='a').aggregate(Avg('book__rating'))
{'book__rating__avg': 4.0}
# On OneToMany Relationships
#
# Forward
# Sum of the number of awards of each Book's Publisher
>>> Book.objects.all().aggregate(Sum('publisher__num_awards'))
{'publisher__num_awards__sum': 30}
# Backward
# Sum of the price of every Book that has a Publisher
>>> Publisher.objects.all().aggregate(Sum('book__price'))
{'book__price__sum': Decimal("270.27")}
# Multiple Joins
#
# Forward
>>> Store.objects.all().aggregate(Max('books__authors__age'))
{'books__authors__age__max': 57}
# Backward
# Note that the very long default alias may be truncated
>>> Author.objects.all().aggregate(Min('book__publisher__num_awards'))
{'book__publisher__num_award...': 1}
# Aggregate outputs can also be aliased.
# Average amazon.com Book rating
>>> Store.objects.filter(name='Amazon.com').aggregate(amazon_mean=Avg('books__rating'))
{'amazon_mean': 4.08...}
# Tests on annotate()
# An empty annotate call does nothing but return the same QuerySet
>>> Book.objects.all().annotate().order_by('pk')
[<Book: The Definitive Guide to Django: Web Development Done Right>, <Book: Sams Teach Yourself Django in 24 Hours>, <Book: Practical Django Projects>, <Book: Python Web Development with Django>, <Book: Artificial Intelligence: A Modern Approach>, <Book: Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp>]
# Annotate inserts the alias into the model object with the aggregated result
>>> books = Book.objects.all().annotate(mean_age=Avg('authors__age'))
>>> books.get(pk=1).name
u'The Definitive Guide to Django: Web Development Done Right'
>>> books.get(pk=1).mean_age
34.5
# On ManyToMany Relationships
# Forward
# Average age of the Authors of each book with a rating less than 4.5
>>> books = Book.objects.all().filter(rating__lt=4.5).annotate(Avg('authors__age'))
>>> sorted([(b.name, b.authors__age__avg) for b in books])
[(u'Artificial Intelligence: A Modern Approach', 51.5), (u'Practical Django Projects', 29.0), (u'Python Web Development with Django', 30.3...), (u'Sams Teach Yourself Django in 24 Hours', 45.0)]
# Count the number of authors of each book
>>> books = Book.objects.annotate(num_authors=Count('authors'))
>>> sorted([(b.name, b.num_authors) for b in books])
[(u'Artificial Intelligence: A Modern Approach', 2), (u'Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 1), (u'Practical Django Projects', 1), (u'Python Web Development with Django', 3), (u'Sams Teach Yourself Django in 24 Hours', 1), (u'The Definitive Guide to Django: Web Development Done Right', 2)]
# Backward
# Average rating of the Books whose Author's names contains the letter 'a'
>>> authors = Author.objects.all().filter(name__contains='a').annotate(Avg('book__rating'))
>>> sorted([(a.name, a.book__rating__avg) for a in authors])
[(u'Adrian Holovaty', 4.5), (u'Brad Dayley', 3.0), (u'Jacob Kaplan-Moss', 4.5), (u'James Bennett', 4.0), (u'Paul Bissex', 4.0), (u'Stuart Russell', 4.0)]
# Count the number of books written by each author
>>> authors = Author.objects.annotate(num_books=Count('book'))
>>> sorted([(a.name, a.num_books) for a in authors])
[(u'Adrian Holovaty', 1), (u'Brad Dayley', 1), (u'Jacob Kaplan-Moss', 1), (u'James Bennett', 1), (u'Jeffrey Forcier ', 1), (u'Paul Bissex', 1), (u'Peter Norvig', 2), (u'Stuart Russell', 1), (u'Wesley J. Chun', 1)]
# On OneToMany Relationships
# Forward
# Annotate each book with the number of awards of each Book's Publisher
>>> books = Book.objects.all().annotate(Sum('publisher__num_awards'))
>>> sorted([(b.name, b.publisher__num_awards__sum) for b in books])
[(u'Artificial Intelligence: A Modern Approach', 7), (u'Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 9), (u'Practical Django Projects', 3), (u'Python Web Development with Django', 7), (u'Sams Teach Yourself Django in 24 Hours', 1), (u'The Definitive Guide to Django: Web Development Done Right', 3)]
# Backward
# Annotate each publisher with the sum of the price of all books sold
>>> publishers = Publisher.objects.all().annotate(Sum('book__price'))
>>> sorted([(p.name, p.book__price__sum) for p in publishers])
[(u'Apress', Decimal("59.69")), (u'Morgan Kaufmann', Decimal("75.00")), (u'Prentice Hall', Decimal("112.49")), (u'Sams', Decimal("23.09"))]
# Calls to values() are not commutative over annotate().
# Calling values on a queryset that has annotations returns the output
# as a dictionary
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values()
[{'rating': 4.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'pubdate': datetime.date(2007, 12, 6), 'price': Decimal("30..."), 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}]
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values('pk', 'isbn', 'mean_age')
[{'pk': 1, 'isbn': u'159059725', 'mean_age': 34.5}]
# Calling it with paramters reduces the output but does not remove the
# annotation.
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values('name')
[{'name': u'The Definitive Guide to Django: Web Development Done Right', 'mean_age': 34.5}]
# An empty values() call before annotating has the same effect as an
# empty values() call after annotating
>>> Book.objects.filter(pk=1).values().annotate(mean_age=Avg('authors__age'))
[{'rating': 4.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'pubdate': datetime.date(2007, 12, 6), 'price': Decimal("30..."), 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}]
# Calling annotate() on a ValuesQuerySet annotates over the groups of
# fields to be selected by the ValuesQuerySet.
# Note that an extra parameter is added to each dictionary. This
# parameter is a queryset representing the objects that have been
# grouped to generate the annotation
>>> Book.objects.all().values('rating').annotate(n_authors=Count('authors__id'), mean_age=Avg('authors__age')).order_by('rating')
[{'rating': 3.0, 'n_authors': 1, 'mean_age': 45.0}, {'rating': 4.0, 'n_authors': 6, 'mean_age': 37.1...}, {'rating': 4.5, 'n_authors': 2, 'mean_age': 34.5}, {'rating': 5.0, 'n_authors': 1, 'mean_age': 57.0}]
# If a join doesn't match any objects, an aggregate returns None
>>> authors = Author.objects.all().annotate(Avg('friends__age')).order_by('id')
>>> len(authors)
9
>>> sorted([(a.name, a.friends__age__avg) for a in authors])
[(u'Adrian Holovaty', 32.0), (u'Brad Dayley', None), (u'Jacob Kaplan-Moss', 29.5), (u'James Bennett', 34.0), (u'Jeffrey Forcier ', 27.0), (u'Paul Bissex', 31.0), (u'Peter Norvig', 46.0), (u'Stuart Russell', 57.0), (u'Wesley J. Chun', 33.6...)]
# The Count aggregation function allows an extra parameter: distinct.
# This restricts the count results to unique items
>>> Book.objects.all().aggregate(Count('rating'))
{'rating__count': 6}
>>> Book.objects.all().aggregate(Count('rating', distinct=True))
{'rating__count': 4}
# Retreiving the grouped objects
# When using Count you can also omit the primary key and refer only to
# the related field name if you want to count all the related objects
# and not a specific column
>>> explicit = list(Author.objects.annotate(Count('book__id')))
>>> implicit = list(Author.objects.annotate(Count('book')))
>>> explicit == implicit
True
# Ordering is allowed on aggregates
>>> Book.objects.values('rating').annotate(oldest=Max('authors__age')).order_by('oldest', 'rating')
[{'rating': 4.5, 'oldest': 35}, {'rating': 3.0, 'oldest': 45}, {'rating': 4.0, 'oldest': 57}, {'rating': 5.0, 'oldest': 57}]
>>> Book.objects.values('rating').annotate(oldest=Max('authors__age')).order_by('-oldest', '-rating')
[{'rating': 5.0, 'oldest': 57}, {'rating': 4.0, 'oldest': 57}, {'rating': 3.0, 'oldest': 45}, {'rating': 4.5, 'oldest': 35}]
# It is possible to aggregate over anotated values
>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Avg('num_authors'))
{'num_authors__avg': 1.66...}
# You can filter the results based on the aggregation alias.
# Lets add a publisher to test the different possibilities for filtering
>>> p = Publisher(name='Expensive Publisher', num_awards=0)
>>> p.save()
>>> Book(name='ExpensiveBook1', pages=1, isbn='111', rating=3.5, price=Decimal("1000"), publisher=p, pubdate=date(2008,12,1)).save()
>>> Book(name='ExpensiveBook2', pages=1, isbn='222', rating=4.0, price=Decimal("1000"), publisher=p, pubdate=date(2008,12,2)).save()
>>> Book(name='ExpensiveBook3', pages=1, isbn='333', rating=4.5, price=Decimal("35"), publisher=p, pubdate=date(2008,12,3)).save()
# Publishers that have:
# (i) more than one book
>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
[<Publisher: Apress>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
# (ii) a book that cost less than 40
>>> Publisher.objects.filter(book__price__lt=Decimal("40.0")).order_by('pk')
[<Publisher: Apress>, <Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
# (iii) more than one book and (at least) a book that cost less than 40
>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1, book__price__lt=Decimal("40.0")).order_by('pk')
[<Publisher: Apress>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
# (iv) more than one book that costs less than $40
>>> Publisher.objects.filter(book__price__lt=Decimal("40.0")).annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
[<Publisher: Apress>]
# Now a bit of testing on the different lookup types
#
>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 3]).order_by('pk')
[<Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Morgan Kaufmann>, <Publisher: Expensive Publisher>]
>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 2]).order_by('pk')
[<Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Morgan Kaufmann>]
>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__in=[1, 3]).order_by('pk')
[<Publisher: Sams>, <Publisher: Morgan Kaufmann>, <Publisher: Expensive Publisher>]
>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__isnull=True)
[]
>>> p.delete()
# Does Author X have any friends? (or better, how many friends does author X have)
>> Author.objects.filter(pk=1).aggregate(Count('friends__id'))
{'friends__id__count': 2.0}
# Give me a list of all Books with more than 1 authors
>>> Book.objects.all().annotate(num_authors=Count('authors__name')).filter(num_authors__ge=2).order_by('pk')
[<Book: The Definitive Guide to Django: Web Development Done Right>, <Book: Artificial Intelligence: A Modern Approach>]
# Give me a list of all Authors that have no friends
>>> Author.objects.all().annotate(num_friends=Count('friends__id', distinct=True)).filter(num_friends=0).order_by('pk')
[<Author: Brad Dayley>]
# Give me a list of all publishers that have published more than 1 books
>>> Publisher.objects.all().annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
[<Publisher: Apress>, <Publisher: Prentice Hall>]
# Give me a list of all publishers that have published more than 1 books that cost less than 40
>>> Publisher.objects.all().filter(book__price__lt=Decimal("40.0")).annotate(num_books=Count('book__id')).filter(num_books__gt=1)
[<Publisher: Apress>]
# Give me a list of all Books that were written by X and one other author.
>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1)
[<Book: Artificial Intelligence: A Modern Approach>]
# Give me the average rating of all Books that were written by X and one other author.
#(Aggregate over objects discovered using membership of the m2m set)
# Adding an existing author to another book to test it the right way
>>> a = Author.objects.get(name__contains='Norvig')
>>> b = Book.objects.get(name__contains='Done Right')
>>> b.authors.add(a)
>>> b.save()
# This should do it
>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1).aggregate(Avg('rating'))
{'rating__avg': 4.25}
>>> b.authors.remove(a)
# Give me a list of all Authors that have published a book with at least one other person
# (Filters over a count generated on a related object)
#
# Cheating: [a for a in Author.objects.all().annotate(num_coleagues=Count('book__authors__id'), num_books=Count('book__id', distinct=True)) if a.num_coleagues - a.num_books > 0]
# F-Syntax is required. Will be fixed after F objects are available
# Tests on fields with non-default table and column names.
>>> Clues.objects.values('EntryID__Entry').annotate(Appearances=Count('EntryID'), Distinct_Clues=Count('Clue', distinct=True))
[]
# Aggregates also work on dates, times and datetimes
>>> Publisher.objects.annotate(earliest_book=Min('book__pubdate')).order_by('earliest_book').values()
[{'earliest_book': datetime.date(1991, 10, 15), 'num_awards': 9, 'id': 4, 'name': u'Morgan Kaufmann'}, {'earliest_book': datetime.date(1995, 1, 15), 'num_awards': 7, 'id': 3, 'name': u'Prentice Hall'}, {'earliest_book': datetime.date(2007, 12, 6), 'num_awards': 3, 'id': 1, 'name': u'Apress'}, {'earliest_book': datetime.date(2008, 3, 3), 'num_awards': 1, 'id': 2, 'name': u'Sams'}]
>>> Store.objects.aggregate(Max('friday_night_closing'), Min("original_opening"))
{'friday_night_closing__max': datetime.time(23, 59, 59), 'original_opening__min': datetime.datetime(1945, 4, 25, 16, 24, 14)}
# values_list() can also be used
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('pk', 'isbn', 'mean_age')
[(1, u'159059725', 34.5)]
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('isbn')
[(u'159059725',)]
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('mean_age')
[(34.5,)]
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('mean_age', flat=True)
[34.5]
"""}

View File

@ -0,0 +1,229 @@
[
{
"pk": 1,
"model": "aggregation_regress.publisher",
"fields": {
"name": "Apress",
"num_awards": 3
}
},
{
"pk": 2,
"model": "aggregation_regress.publisher",
"fields": {
"name": "Sams",
"num_awards": 1
}
},
{
"pk": 3,
"model": "aggregation_regress.publisher",
"fields": {
"name": "Prentice Hall",
"num_awards": 7
}
},
{
"pk": 4,
"model": "aggregation_regress.publisher",
"fields": {
"name": "Morgan Kaufmann",
"num_awards": 9
}
},
{
"pk": 1,
"model": "aggregation_regress.book",
"fields": {
"publisher": 1,
"isbn": "159059725",
"name": "The Definitive Guide to Django: Web Development Done Right",
"price": "30.00",
"rating": 4.5,
"authors": [1, 2],
"pages": 447,
"pubdate": "2007-12-6"
}
},
{
"pk": 2,
"model": "aggregation_regress.book",
"fields": {
"publisher": 2,
"isbn": "067232959",
"name": "Sams Teach Yourself Django in 24 Hours",
"price": "23.09",
"rating": 3.0,
"authors": [3],
"pages": 528,
"pubdate": "2008-3-3"
}
},
{
"pk": 3,
"model": "aggregation_regress.book",
"fields": {
"publisher": 1,
"isbn": "159059996",
"name": "Practical Django Projects",
"price": "29.69",
"rating": 4.0,
"authors": [4],
"pages": 300,
"pubdate": "2008-6-23"
}
},
{
"pk": 4,
"model": "aggregation_regress.book",
"fields": {
"publisher": 3,
"isbn": "013235613",
"name": "Python Web Development with Django",
"price": "29.69",
"rating": 4.0,
"authors": [5, 6, 7],
"pages": 350,
"pubdate": "2008-11-3"
}
},
{
"pk": 5,
"model": "aggregation_regress.book",
"fields": {
"publisher": 3,
"isbn": "013790395",
"name": "Artificial Intelligence: A Modern Approach",
"price": "82.80",
"rating": 4.0,
"authors": [8, 9],
"pages": 1132,
"pubdate": "1995-1-15"
}
},
{
"pk": 6,
"model": "aggregation_regress.book",
"fields": {
"publisher": 4,
"isbn": "155860191",
"name": "Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp",
"price": "75.00",
"rating": 5.0,
"authors": [8],
"pages": 946,
"pubdate": "1991-10-15"
}
},
{
"pk": 1,
"model": "aggregation_regress.store",
"fields": {
"books": [1, 2, 3, 4, 5, 6],
"name": "Amazon.com",
"original_opening": "1994-4-23 9:17:42",
"friday_night_closing": "23:59:59"
}
},
{
"pk": 2,
"model": "aggregation_regress.store",
"fields": {
"books": [1, 3, 5, 6],
"name": "Books.com",
"original_opening": "2001-3-15 11:23:37",
"friday_night_closing": "23:59:59"
}
},
{
"pk": 3,
"model": "aggregation_regress.store",
"fields": {
"books": [3, 4, 6],
"name": "Mamma and Pappa's Books",
"original_opening": "1945-4-25 16:24:14",
"friday_night_closing": "21:30:00"
}
},
{
"pk": 1,
"model": "aggregation_regress.author",
"fields": {
"age": 34,
"friends": [2, 4],
"name": "Adrian Holovaty"
}
},
{
"pk": 2,
"model": "aggregation_regress.author",
"fields": {
"age": 35,
"friends": [1, 7],
"name": "Jacob Kaplan-Moss"
}
},
{
"pk": 3,
"model": "aggregation_regress.author",
"fields": {
"age": 45,
"friends": [],
"name": "Brad Dayley"
}
},
{
"pk": 4,
"model": "aggregation_regress.author",
"fields": {
"age": 29,
"friends": [1],
"name": "James Bennett"
}
},
{
"pk": 5,
"model": "aggregation_regress.author",
"fields": {
"age": 37,
"friends": [6, 7],
"name": "Jeffrey Forcier "
}
},
{
"pk": 6,
"model": "aggregation_regress.author",
"fields": {
"age": 29,
"friends": [5, 7],
"name": "Paul Bissex"
}
},
{
"pk": 7,
"model": "aggregation_regress.author",
"fields": {
"age": 25,
"friends": [2, 5, 6],
"name": "Wesley J. Chun"
}
},
{
"pk": 8,
"model": "aggregation_regress.author",
"fields": {
"age": 57,
"friends": [9],
"name": "Peter Norvig"
}
},
{
"pk": 9,
"model": "aggregation_regress.author",
"fields": {
"age": 46,
"friends": [8],
"name": "Stuart Russell"
}
}
]

View File

@ -0,0 +1,199 @@
# coding: utf-8
from django.db import models
from django.conf import settings
try:
sorted
except NameError:
from django.utils.itercompat import sorted # For Python 2.3
class Author(models.Model):
name = models.CharField(max_length=100)
age = models.IntegerField()
friends = models.ManyToManyField('self', blank=True)
def __unicode__(self):
return self.name
class Publisher(models.Model):
name = models.CharField(max_length=300)
num_awards = models.IntegerField()
def __unicode__(self):
return self.name
class Book(models.Model):
isbn = models.CharField(max_length=9)
name = models.CharField(max_length=300)
pages = models.IntegerField()
rating = models.FloatField()
price = models.DecimalField(decimal_places=2, max_digits=6)
authors = models.ManyToManyField(Author)
publisher = models.ForeignKey(Publisher)
pubdate = models.DateField()
class Meta:
ordering = ('name',)
def __unicode__(self):
return self.name
class Store(models.Model):
name = models.CharField(max_length=300)
books = models.ManyToManyField(Book)
original_opening = models.DateTimeField()
friday_night_closing = models.TimeField()
def __unicode__(self):
return self.name
#Extra does not play well with values. Modify the tests if/when this is fixed.
__test__ = {'API_TESTS': """
>>> from django.core import management
>>> from django.db.models import get_app
# Reset the database representation of this app.
# This will return the database to a clean initial state.
>>> management.call_command('flush', verbosity=0, interactive=False)
>>> from django.db.models import Avg, Sum, Count, Max, Min, StdDev, Variance
# Ordering requests are ignored
>>> Author.objects.all().order_by('name').aggregate(Avg('age'))
{'age__avg': 37.4...}
# Implicit ordering is also ignored
>>> Book.objects.all().aggregate(Sum('pages'))
{'pages__sum': 3703}
# Baseline results
>>> Book.objects.all().aggregate(Sum('pages'), Avg('pages'))
{'pages__sum': 3703, 'pages__avg': 617.1...}
# Empty values query doesn't affect grouping or results
>>> Book.objects.all().values().aggregate(Sum('pages'), Avg('pages'))
{'pages__sum': 3703, 'pages__avg': 617.1...}
# Aggregate overrides extra selected column
>>> Book.objects.all().extra(select={'price_per_page' : 'price / pages'}).aggregate(Sum('pages'))
{'pages__sum': 3703}
# Annotations get combined with extra select clauses
>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).get(pk=2).__dict__.items())
[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
# Order of the annotate/extra in the query doesn't matter
>>> sorted(Book.objects.all().extra(select={'manufacture_cost' : 'price * .5'}).annotate(mean_auth_age=Avg('authors__age')).get(pk=2).__dict__.items())
[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
# Values queries can be combined with annotate and extra
>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).values().get(pk=2).items())
[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
# The order of the values, annotate and extra clauses doesn't matter
>>> sorted(Book.objects.all().values().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).get(pk=2).items())
[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
# A values query that selects specific columns reduces the output
>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).values('name').get(pk=1).items())
[('mean_auth_age', 34.5), ('name', u'The Definitive Guide to Django: Web Development Done Right')]
# The annotations are added to values output if values() precedes annotate()
>>> sorted(Book.objects.all().values('name').annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).get(pk=1).items())
[('mean_auth_age', 34.5), ('name', u'The Definitive Guide to Django: Web Development Done Right')]
# Check that all of the objects are getting counted (allow_nulls) and that values respects the amount of objects
>>> len(Author.objects.all().annotate(Avg('friends__age')).values())
9
# Check that consecutive calls to annotate accumulate in the query
>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('oldest', 'price').annotate(Max('publisher__num_awards'))
[{'price': Decimal("30..."), 'oldest': 35, 'publisher__num_awards__max': 3}, {'price': Decimal("29.69"), 'oldest': 37, 'publisher__num_awards__max': 7}, {'price': Decimal("23.09"), 'oldest': 45, 'publisher__num_awards__max': 1}, {'price': Decimal("75..."), 'oldest': 57, 'publisher__num_awards__max': 9}, {'price': Decimal("82.8..."), 'oldest': 57, 'publisher__num_awards__max': 7}]
# Aggregates can be composed over annotations.
# The return type is derived from the composed aggregate
>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Max('pages'), Max('price'), Sum('num_authors'), Avg('num_authors'))
{'num_authors__sum': 10, 'num_authors__avg': 1.66..., 'pages__max': 1132, 'price__max': Decimal("82.80")}
# Bad field requests in aggregates are caught and reported
>>> Book.objects.all().aggregate(num_authors=Count('foo'))
Traceback (most recent call last):
...
FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store
>>> Book.objects.all().annotate(num_authors=Count('foo'))
Traceback (most recent call last):
...
FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store
>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Max('foo'))
Traceback (most recent call last):
...
FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store, num_authors
# Old-style count aggregations can be mixed with new-style
>>> Book.objects.annotate(num_authors=Count('authors')).count()
6
# Non-ordinal, non-computed Aggregates over annotations correctly inherit
# the annotation's internal type if the annotation is ordinal or computed
>>> Book.objects.annotate(num_authors=Count('authors')).aggregate(Max('num_authors'))
{'num_authors__max': 3}
>>> Publisher.objects.annotate(avg_price=Avg('book__price')).aggregate(Max('avg_price'))
{'avg_price__max': 75.0...}
# Aliases are quoted to protected aliases that might be reserved names
>>> Book.objects.aggregate(number=Max('pages'), select=Max('pages'))
{'number': 1132, 'select': 1132}
"""
}
if settings.DATABASE_ENGINE != 'sqlite3':
__test__['API_TESTS'] += """
# Stddev and Variance are not guaranteed to be available for SQLite.
>>> Book.objects.aggregate(StdDev('pages'))
{'pages__stddev': 311.46...}
>>> Book.objects.aggregate(StdDev('rating'))
{'rating__stddev': 0.60...}
>>> Book.objects.aggregate(StdDev('price'))
{'price__stddev': 24.16...}
>>> Book.objects.aggregate(StdDev('pages', sample=True))
{'pages__stddev': 341.19...}
>>> Book.objects.aggregate(StdDev('rating', sample=True))
{'rating__stddev': 0.66...}
>>> Book.objects.aggregate(StdDev('price', sample=True))
{'price__stddev': 26.46...}
>>> Book.objects.aggregate(Variance('pages'))
{'pages__variance': 97010.80...}
>>> Book.objects.aggregate(Variance('rating'))
{'rating__variance': 0.36...}
>>> Book.objects.aggregate(Variance('price'))
{'price__variance': 583.77...}
>>> Book.objects.aggregate(Variance('pages', sample=True))
{'pages__variance': 116412.96...}
>>> Book.objects.aggregate(Variance('rating', sample=True))
{'rating__variance': 0.44...}
>>> Book.objects.aggregate(Variance('price', sample=True))
{'price__variance': 700.53...}
"""