Fixed #3566 -- Added support for aggregation to the ORM. See the documentation for details on usage.

Many thanks to:
 * Nicolas Lara, who worked on this feature during the 2008 Google Summer of Code.
 * Alex Gaynor for his help debugging and fixing a number of issues.
 * Justin Bronn for his help integrating with contrib.gis.
 * Karen Tracey for her help with cross-platform testing.
 * Ian Kelly for his help testing and fixing Oracle support.
 * Malcolm Tredinnick for his invaluable review notes.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@9742 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Russell Keith-Magee 2009-01-15 11:06:34 +00:00
parent 50a293a0c3
commit cc4e4d9aee
30 changed files with 2357 additions and 325 deletions

View File

@ -31,6 +31,7 @@ answer newbie questions, and generally made Django that much better:
AgarFu <heaven@croasanaso.sytes.net>
Dagur Páll Ammendrup <dagurp@gmail.com>
Collin Anderson <cmawebsite@gmail.com>
Nicolas Lara <nicolaslara@gmail.com>
Jeff Anderson <jefferya@programmerq.net>
Marian Andre <django@andre.sk>
Andreas

View File

@ -0,0 +1,10 @@
from django.db.models import Aggregate
class Extent(Aggregate):
name = 'Extent'
class MakeLine(Aggregate):
name = 'MakeLine'
class Union(Aggregate):
name = 'Union'

View File

@ -3,6 +3,7 @@ from django.db import connection
from django.db.models.query import sql, QuerySet, Q
from django.contrib.gis.db.backend import SpatialBackend
from django.contrib.gis.db.models import aggregates
from django.contrib.gis.db.models.fields import GeometryField, PointField
from django.contrib.gis.db.models.sql import AreaField, DistanceField, GeomField, GeoQuery, GeoWhereNode
from django.contrib.gis.measure import Area, Distance
@ -17,7 +18,7 @@ class GeomSQL(object):
"Simple wrapper object for geometric SQL."
def __init__(self, geo_sql):
self.sql = geo_sql
def as_sql(self, *args, **kwargs):
return self.sql
@ -30,7 +31,7 @@ class GeoQuerySet(QuerySet):
def area(self, tolerance=0.05, **kwargs):
"""
Returns the area of the geographic field in an `area` attribute on
Returns the area of the geographic field in an `area` attribute on
each element of this GeoQuerySet.
"""
# Peforming setup here rather than in `_spatial_attribute` so that
@ -75,21 +76,21 @@ class GeoQuerySet(QuerySet):
Keyword Arguments:
`spheroid` => If the geometry field is geodetic and PostGIS is
the spatial database, then the more accurate
the spatial database, then the more accurate
spheroid calculation will be used instead of the
quicker sphere calculation.
`tolerance` => Used only for Oracle. The tolerance is
in meters -- a default of 5 centimeters (0.05)
`tolerance` => Used only for Oracle. The tolerance is
in meters -- a default of 5 centimeters (0.05)
is used.
"""
return self._distance_attribute('distance', geom, **kwargs)
def envelope(self, **kwargs):
"""
Returns a Geometry representing the bounding box of the
Returns a Geometry representing the bounding box of the
Geometry field in an `envelope` attribute on each element of
the GeoQuerySet.
the GeoQuerySet.
"""
return self._geom_attribute('envelope', **kwargs)
@ -98,20 +99,7 @@ class GeoQuerySet(QuerySet):
Returns the extent (aggregate) of the features in the GeoQuerySet. The
extent will be returned as a 4-tuple, consisting of (xmin, ymin, xmax, ymax).
"""
convert_extent = None
if SpatialBackend.postgis:
def convert_extent(box, geo_field):
# TODO: Parsing of BOX3D, Oracle support (patches welcome!)
# Box text will be something like "BOX(-90.0 30.0, -85.0 40.0)";
# parsing out and returning as a 4-tuple.
ll, ur = box[4:-1].split(',')
xmin, ymin = map(float, ll.split())
xmax, ymax = map(float, ur.split())
return (xmin, ymin, xmax, ymax)
elif SpatialBackend.oracle:
def convert_extent(wkt, geo_field):
raise NotImplementedError
return self._spatial_aggregate('extent', convert_func=convert_extent, **kwargs)
return self._spatial_aggregate(aggregates.Extent, **kwargs)
def gml(self, precision=8, version=2, **kwargs):
"""
@ -120,7 +108,7 @@ class GeoQuerySet(QuerySet):
"""
s = {'desc' : 'GML', 'procedure_args' : {'precision' : precision}}
if SpatialBackend.postgis:
# PostGIS AsGML() aggregate function parameter order depends on the
# PostGIS AsGML() aggregate function parameter order depends on the
# version -- uggh.
major, minor1, minor2 = SpatialBackend.version
if major >= 1 and (minor1 > 3 or (minor1 == 3 and minor2 > 1)):
@ -163,9 +151,7 @@ class GeoQuerySet(QuerySet):
this GeoQuerySet and returns it. This is a spatial aggregate
method, and thus returns a geometry rather than a GeoQuerySet.
"""
kwargs['geo_field_type'] = PointField
kwargs['agg_field'] = GeometryField
return self._spatial_aggregate('make_line', **kwargs)
return self._spatial_aggregate(aggregates.MakeLine, geo_field_type=PointField, **kwargs)
def mem_size(self, **kwargs):
"""
@ -185,7 +171,7 @@ class GeoQuerySet(QuerySet):
def num_points(self, **kwargs):
"""
Returns the number of points in the first linestring in the
Returns the number of points in the first linestring in the
Geometry field in a `num_points` attribute on each element of
this GeoQuerySet; otherwise sets with None.
"""
@ -231,7 +217,7 @@ class GeoQuerySet(QuerySet):
def sym_difference(self, geom, **kwargs):
"""
Returns the symmetric difference of the geographic field in a
Returns the symmetric difference of the geographic field in a
`sym_difference` attribute on each element of this GeoQuerySet.
"""
return self._geomset_attribute('sym_difference', geom, **kwargs)
@ -265,7 +251,7 @@ class GeoQuerySet(QuerySet):
# when there's also a transformation we need to cascade the substitutions.
# For example, 'SDO_UTIL.TO_WKTGEOMETRY(SDO_CS.TRANSFORM( ... )'
geo_col = self.query.custom_select.get(geo_field, field_col)
# Setting the key for the field's column with the custom SELECT SQL to
# override the geometry column returned from the database.
custom_sel = '%s(%s, %s)' % (SpatialBackend.transform, geo_col, srid)
@ -288,11 +274,10 @@ class GeoQuerySet(QuerySet):
None if the GeoQuerySet is empty. The `tolerance` keyword is for
Oracle backends only.
"""
kwargs['agg_field'] = GeometryField
return self._spatial_aggregate('unionagg', **kwargs)
return self._spatial_aggregate(aggregates.Union, **kwargs)
### Private API -- Abstracted DRY routines. ###
def _spatial_setup(self, att, aggregate=False, desc=None, field_name=None, geo_field_type=None):
def _spatial_setup(self, att, desc=None, field_name=None, geo_field_type=None):
"""
Performs set up for executing the spatial function.
"""
@ -301,86 +286,52 @@ class GeoQuerySet(QuerySet):
if desc is None: desc = att
if not func: raise ImproperlyConfigured('%s stored procedure not available.' % desc)
# Initializing the procedure arguments.
# Initializing the procedure arguments.
procedure_args = {'function' : func}
# Is there a geographic field in the model to perform this
# Is there a geographic field in the model to perform this
# operation on?
geo_field = self.query._geo_field(field_name)
if not geo_field:
raise TypeError('%s output only available on GeometryFields.' % func)
# If the `geo_field_type` keyword was used, then enforce that
# If the `geo_field_type` keyword was used, then enforce that
# type limitation.
if not geo_field_type is None and not isinstance(geo_field, geo_field_type):
raise TypeError('"%s" stored procedures may only be called on %ss.' % (func, geo_field_type.__name__))
if not geo_field_type is None and not isinstance(geo_field, geo_field_type):
raise TypeError('"%s" stored procedures may only be called on %ss.' % (func, geo_field_type.__name__))
# Setting the procedure args.
procedure_args['geo_col'] = self._geocol_select(geo_field, field_name, aggregate)
procedure_args['geo_col'] = self._geocol_select(geo_field, field_name)
return procedure_args, geo_field
def _spatial_aggregate(self, att, field_name=None,
agg_field=None, convert_func=None,
geo_field_type=None, tolerance=0.0005):
def _spatial_aggregate(self, aggregate, field_name=None,
geo_field_type=None, tolerance=0.05):
"""
DRY routine for calling aggregate spatial stored procedures and
returning their result to the caller of the function.
"""
# Constructing the setup keyword arguments.
setup_kwargs = {'aggregate' : True,
'field_name' : field_name,
'geo_field_type' : geo_field_type,
}
procedure_args, geo_field = self._spatial_setup(att, **setup_kwargs)
if SpatialBackend.oracle:
procedure_args['tolerance'] = tolerance
# Adding in selection SQL for Oracle geometry columns.
if agg_field is GeometryField:
agg_sql = '%s' % SpatialBackend.select
else:
agg_sql = '%s'
agg_sql = agg_sql % ('%(function)s(SDOAGGRTYPE(%(geo_col)s,%(tolerance)s))' % procedure_args)
else:
agg_sql = '%(function)s(%(geo_col)s)' % procedure_args
# Getting the field the geographic aggregate will be called on.
geo_field = self.query._geo_field(field_name)
if not geo_field:
raise TypeError('%s aggregate only available on GeometryFields.' % aggregate.name)
# Wrapping our selection SQL in `GeomSQL` to bypass quoting, and
# specifying the type of the aggregate field.
self.query.select = [GeomSQL(agg_sql)]
self.query.select_fields = [agg_field]
# Checking if there are any geo field type limitations on this
# aggregate (e.g. ST_Makeline only operates on PointFields).
if not geo_field_type is None and not isinstance(geo_field, geo_field_type):
raise TypeError('%s aggregate may only be called on %ss.' % (aggregate.name, geo_field_type.__name__))
try:
# `asql` => not overriding `sql` module.
asql, params = self.query.as_sql()
except sql.datastructures.EmptyResultSet:
return None
# Getting the string expression of the field name, as this is the
# argument taken by `Aggregate` objects.
agg_col = field_name or geo_field.name
# Getting a cursor, executing the query, and extracting the returned
# value from the aggregate function.
cursor = connection.cursor()
cursor.execute(asql, params)
result = cursor.fetchone()[0]
# If the `agg_field` is specified as a GeometryField, then autmatically
# set up the conversion function.
if agg_field is GeometryField and not callable(convert_func):
if SpatialBackend.postgis:
def convert_geom(hex, geo_field):
if hex: return SpatialBackend.Geometry(hex)
else: return None
elif SpatialBackend.oracle:
def convert_geom(clob, geo_field):
if clob: return SpatialBackend.Geometry(clob.read(), geo_field._srid)
else: return None
convert_func = convert_geom
# Adding any keyword parameters for the Aggregate object. Oracle backends
# in particular need an additional `tolerance` parameter.
agg_kwargs = {}
if SpatialBackend.oracle: agg_kwargs['tolerance'] = tolerance
# Returning the callback function evaluated on the result culled
# from the executed cursor.
if callable(convert_func):
return convert_func(result, geo_field)
else:
return result
# Calling the QuerySet.aggregate, and returning only the value of the aggregate.
return self.aggregate(_geoagg=aggregate(agg_col, **agg_kwargs))['_geoagg']
def _spatial_attribute(self, att, settings, field_name=None, model_att=None):
"""
@ -393,7 +344,7 @@ class GeoQuerySet(QuerySet):
SQL function to call.
settings:
Dictonary of internal settings to customize for the spatial procedure.
Dictonary of internal settings to customize for the spatial procedure.
Public Keyword Arguments:
@ -420,7 +371,7 @@ class GeoQuerySet(QuerySet):
for k, v in default_args.iteritems(): settings['procedure_args'].setdefault(k, v)
else:
geo_field = settings['geo_field']
# The attribute to attach to the model.
if not isinstance(model_att, basestring): model_att = att
@ -429,7 +380,7 @@ class GeoQuerySet(QuerySet):
# Using the field's get_db_prep_lookup() to get any needed
# transformation SQL -- we pass in a 'dummy' `contains` lookup.
where, params = geo_field.get_db_prep_lookup('contains', settings['procedure_args'][name])
# Replacing the procedure format with that of any needed
# Replacing the procedure format with that of any needed
# transformation SQL.
old_fmt = '%%(%s)s' % name
new_fmt = where[0] % '%%s'
@ -438,7 +389,7 @@ class GeoQuerySet(QuerySet):
# Getting the format for the stored procedure.
fmt = '%%(function)s(%s)' % settings['procedure_fmt']
# If the result of this function needs to be converted.
if settings.get('select_field', False):
sel_fld = settings['select_field']
@ -446,10 +397,10 @@ class GeoQuerySet(QuerySet):
self.query.custom_select[model_att] = SpatialBackend.select
self.query.extra_select_fields[model_att] = sel_fld
# Finally, setting the extra selection attribute with
# Finally, setting the extra selection attribute with
# the format string expanded with the stored procedure
# arguments.
return self.extra(select={model_att : fmt % settings['procedure_args']},
return self.extra(select={model_att : fmt % settings['procedure_args']},
select_params=settings['select_params'])
def _distance_attribute(self, func, geom=None, tolerance=0.05, spheroid=False, **kwargs):
@ -471,10 +422,10 @@ class GeoQuerySet(QuerySet):
distance = func == 'distance'
length = func == 'length'
perimeter = func == 'perimeter'
if not (distance or length or perimeter):
if not (distance or length or perimeter):
raise ValueError('Unknown distance function: %s' % func)
# The field's get_db_prep_lookup() is used to get any
# The field's get_db_prep_lookup() is used to get any
# extra distance parameters. Here we set up the
# parameters that will be passed in to field's function.
lookup_params = [geom or 'POINT (0 0)', 0]
@ -482,12 +433,12 @@ class GeoQuerySet(QuerySet):
# If the spheroid calculation is desired, either by the `spheroid`
# keyword or wehn calculating the length of geodetic field, make
# sure the 'spheroid' distance setting string is passed in so we
# get the correct spatial stored procedure.
if spheroid or (SpatialBackend.postgis and geo_field.geodetic and length):
lookup_params.append('spheroid')
# get the correct spatial stored procedure.
if spheroid or (SpatialBackend.postgis and geo_field.geodetic and length):
lookup_params.append('spheroid')
where, params = geo_field.get_db_prep_lookup('distance_lte', lookup_params)
# The `geom_args` flag is set to true if a geometry parameter was
# The `geom_args` flag is set to true if a geometry parameter was
# passed in.
geom_args = bool(geom)
@ -505,7 +456,7 @@ class GeoQuerySet(QuerySet):
geodetic = unit_name in geo_field.geodetic_units
else:
geodetic = geo_field.geodetic
if distance:
if self.query.transformed_srid:
# Setting the `geom_args` flag to false because we want to handle
@ -515,7 +466,7 @@ class GeoQuerySet(QuerySet):
geom_args = False
procedure_fmt = '%s(%%(geo_col)s, %s)' % (SpatialBackend.transform, self.query.transformed_srid)
if geom.srid is None or geom.srid == self.query.transformed_srid:
# If the geom parameter srid is None, it is assumed the coordinates
# If the geom parameter srid is None, it is assumed the coordinates
# are in the transformed units. A placeholder is used for the
# geometry parameter.
procedure_fmt += ', %%s'
@ -529,10 +480,10 @@ class GeoQuerySet(QuerySet):
if geodetic:
# Spherical distance calculation is needed (because the geographic
# field is geodetic). However, the PostGIS ST_distance_sphere/spheroid()
# field is geodetic). However, the PostGIS ST_distance_sphere/spheroid()
# procedures may only do queries from point columns to point geometries
# some error checking is required.
if not isinstance(geo_field, PointField):
if not isinstance(geo_field, PointField):
raise TypeError('Spherical distance calculation only supported on PointFields.')
if not str(SpatialBackend.Geometry(buffer(params[0].wkb)).geom_type) == 'Point':
raise TypeError('Spherical distance calculation only supported with Point Geometry parameters')
@ -553,12 +504,12 @@ class GeoQuerySet(QuerySet):
# Setting up the settings for `_spatial_attribute`.
s = {'select_field' : DistanceField(dist_att),
'setup' : False,
'setup' : False,
'geo_field' : geo_field,
'procedure_args' : procedure_args,
'procedure_fmt' : procedure_fmt,
}
if geom_args:
if geom_args:
s['geom_args'] = ('geom',)
s['procedure_args']['geom'] = geom
elif geom:
@ -577,12 +528,12 @@ class GeoQuerySet(QuerySet):
s['procedure_fmt'] = '%(geo_col)s,%(tolerance)s'
s['procedure_args'] = {'tolerance' : tolerance}
return self._spatial_attribute(func, s, **kwargs)
def _geomset_attribute(self, func, geom, tolerance=0.05, **kwargs):
"""
DRY routine for setting up a GeoQuerySet method that attaches a
Geometry attribute and takes a Geoemtry parameter. This is used
for geometry set-like operations (e.g., intersection, difference,
for geometry set-like operations (e.g., intersection, difference,
union, sym_difference).
"""
s = {'geom_args' : ('geom',),
@ -595,16 +546,12 @@ class GeoQuerySet(QuerySet):
s['procedure_args']['tolerance'] = tolerance
return self._spatial_attribute(func, s, **kwargs)
def _geocol_select(self, geo_field, field_name, aggregate=False):
def _geocol_select(self, geo_field, field_name):
"""
Helper routine for constructing the SQL to select the geographic
column. Takes into account if the geographic field is in a
ForeignKey relation to the current model.
"""
# If this is an aggregate spatial query, the flag needs to be
# set on the `GeoQuery` object of this queryset.
if aggregate: self.query.aggregate = True
opts = self.model._meta
if not geo_field in opts.fields:
# Is this operation going to be on a related geographic field?

View File

@ -0,0 +1,36 @@
from django.db.models.sql.aggregates import *
from django.contrib.gis.db.models.fields import GeometryField
from django.contrib.gis.db.backend import SpatialBackend
if SpatialBackend.oracle:
geo_template = '%(function)s(SDOAGGRTYPE(%(field)s,%(tolerance)s))'
else:
geo_template = '%(function)s(%(field)s)'
class GeoAggregate(Aggregate):
# Overriding the SQL template with the geographic one.
sql_template = geo_template
is_extent = False
def __init__(self, col, source=None, is_summary=False, **extra):
super(GeoAggregate, self).__init__(col, source, is_summary, **extra)
# Can't use geographic aggregates on non-geometry fields.
if not isinstance(self.source, GeometryField):
raise ValueError('Geospatial aggregates only allowed on geometry fields.')
# Making sure the SQL function is available for this spatial backend.
if not self.sql_function:
raise NotImplementedError('This aggregate functionality not implemented for your spatial backend.')
class Extent(GeoAggregate):
is_extent = True
sql_function = SpatialBackend.extent
class MakeLine(GeoAggregate):
sql_function = SpatialBackend.make_line
class Union(GeoAggregate):
sql_function = SpatialBackend.unionagg

View File

@ -5,6 +5,7 @@ from django.db.models.fields.related import ForeignKey
from django.contrib.gis.db.backend import SpatialBackend
from django.contrib.gis.db.models.fields import GeometryField
from django.contrib.gis.db.models.sql import aggregates as gis_aggregates_module
from django.contrib.gis.db.models.sql.where import GeoWhereNode
from django.contrib.gis.measure import Area, Distance
@ -12,12 +13,35 @@ from django.contrib.gis.measure import Area, Distance
ALL_TERMS = sql.constants.QUERY_TERMS.copy()
ALL_TERMS.update(SpatialBackend.gis_terms)
# Conversion functions used in normalizing geographic aggregates.
if SpatialBackend.postgis:
def convert_extent(box):
# TODO: Parsing of BOX3D, Oracle support (patches welcome!)
# Box text will be something like "BOX(-90.0 30.0, -85.0 40.0)";
# parsing out and returning as a 4-tuple.
ll, ur = box[4:-1].split(',')
xmin, ymin = map(float, ll.split())
xmax, ymax = map(float, ur.split())
return (xmin, ymin, xmax, ymax)
def convert_geom(hex, geo_field):
if hex: return SpatialBackend.Geometry(hex)
else: return None
else:
def convert_extent(box):
raise NotImplementedError('Aggregate extent not implemented for this spatial backend.')
def convert_geom(clob, geo_field):
if clob: return SpatialBackend.Geometry(clob.read(), geo_field._srid)
else: return None
class GeoQuery(sql.Query):
"""
A single spatial SQL query.
"""
# Overridding the valid query terms.
query_terms = ALL_TERMS
aggregates_module = gis_aggregates_module
#### Methods overridden from the base Query class ####
def __init__(self, model, conn):
@ -25,7 +49,6 @@ class GeoQuery(sql.Query):
# The following attributes are customized for the GeoQuerySet.
# The GeoWhereNode and SpatialBackend classes contain backend-specific
# routines and functions.
self.aggregate = False
self.custom_select = {}
self.transformed_srid = None
self.extra_select_fields = {}
@ -34,7 +57,6 @@ class GeoQuery(sql.Query):
obj = super(GeoQuery, self).clone(*args, **kwargs)
# Customized selection dictionary and transformed srid flag have
# to also be added to obj.
obj.aggregate = self.aggregate
obj.custom_select = self.custom_select.copy()
obj.transformed_srid = self.transformed_srid
obj.extra_select_fields = self.extra_select_fields.copy()
@ -50,12 +72,12 @@ class GeoQuery(sql.Query):
(without the table names) are given unique aliases. This is needed in
some cases to avoid ambiguitity with nested queries.
This routine is overridden from Query to handle customized selection of
This routine is overridden from Query to handle customized selection of
geometry columns.
"""
qn = self.quote_name_unless_alias
qn2 = self.connection.ops.quote_name
result = ['(%s) AS %s' % (self.get_extra_select_format(alias) % col[0], qn2(alias))
result = ['(%s) AS %s' % (self.get_extra_select_format(alias) % col[0], qn2(alias))
for alias, col in self.extra_select.iteritems()]
aliases = set(self.extra_select.keys())
if with_aliases:
@ -67,38 +89,53 @@ class GeoQuery(sql.Query):
for col, field in izip(self.select, self.select_fields):
if isinstance(col, (list, tuple)):
r = self.get_field_select(field, col[0])
if with_aliases and col[1] in col_aliases:
c_alias = 'Col%d' % len(col_aliases)
result.append('%s AS %s' % (r, c_alias))
aliases.add(c_alias)
col_aliases.add(c_alias)
if with_aliases:
if col[1] in col_aliases:
c_alias = 'Col%d' % len(col_aliases)
result.append('%s AS %s' % (r, c_alias))
aliases.add(c_alias)
col_aliases.add(c_alias)
else:
result.append('%s AS %s' % (r, col[1]))
aliases.add(r)
col_aliases.add(col[1])
else:
result.append(r)
aliases.add(r)
col_aliases.add(col[1])
else:
result.append(col.as_sql(quote_func=qn))
if hasattr(col, 'alias'):
aliases.add(col.alias)
col_aliases.add(col.alias)
elif self.default_cols:
cols, new_aliases = self.get_default_columns(with_aliases,
col_aliases)
result.extend(cols)
aliases.update(new_aliases)
result.extend([
'%s%s' % (
aggregate.as_sql(quote_func=qn),
alias is not None and ' AS %s' % alias or ''
)
for alias, aggregate in self.aggregate_select.items()
])
# This loop customized for GeoQuery.
if not self.aggregate:
for (table, col), field in izip(self.related_select_cols, self.related_select_fields):
r = self.get_field_select(field, table)
if with_aliases and col in col_aliases:
c_alias = 'Col%d' % len(col_aliases)
result.append('%s AS %s' % (r, c_alias))
aliases.add(c_alias)
col_aliases.add(c_alias)
else:
result.append(r)
aliases.add(r)
col_aliases.add(col)
for (table, col), field in izip(self.related_select_cols, self.related_select_fields):
r = self.get_field_select(field, table)
if with_aliases and col in col_aliases:
c_alias = 'Col%d' % len(col_aliases)
result.append('%s AS %s' % (r, c_alias))
aliases.add(c_alias)
col_aliases.add(c_alias)
else:
result.append(r)
aliases.add(r)
col_aliases.add(col)
self._select_aliases = aliases
return result
@ -112,7 +149,7 @@ class GeoQuery(sql.Query):
Returns a list of strings, quoted appropriately for use in SQL
directly, as well as a set of aliases used in the select statement.
This routine is overridden from Query to handle customized selection of
This routine is overridden from Query to handle customized selection of
geometry columns.
"""
result = []
@ -154,20 +191,10 @@ class GeoQuery(sql.Query):
return result, None
return result, aliases
def get_ordering(self):
"""
This routine is overridden to disable ordering for aggregate
spatial queries.
"""
if not self.aggregate:
return super(GeoQuery, self).get_ordering()
else:
return ()
def resolve_columns(self, row, fields=()):
"""
This routine is necessary so that distances and geometries returned
from extra selection SQL get resolved appropriately into Python
from extra selection SQL get resolved appropriately into Python
objects.
"""
values = []
@ -183,7 +210,7 @@ class GeoQuery(sql.Query):
# Converting any extra selection values (e.g., geometries and
# distance objects added by GeoQuerySet methods).
values = [self.convert_values(v, self.extra_select_fields.get(a, None))
values = [self.convert_values(v, self.extra_select_fields.get(a, None))
for v, a in izip(row[rn_offset:index_start], aliases)]
if SpatialBackend.oracle:
# This is what happens normally in OracleQuery's `resolve_columns`.
@ -212,6 +239,19 @@ class GeoQuery(sql.Query):
value = SpatialBackend.Geometry(value)
return value
def resolve_aggregate(self, value, aggregate):
"""
Overridden from GeoQuery's normalize to handle the conversion of
GeoAggregate objects.
"""
if isinstance(aggregate, self.aggregates_module.GeoAggregate):
if aggregate.is_extent:
return convert_extent(value)
else:
return convert_geom(value, aggregate.source)
else:
return super(GeoQuery, self).resolve_aggregate(value, aggregate)
#### Routines unique to GeoQuery ####
def get_extra_select_format(self, alias):
sel_fmt = '%s'
@ -222,9 +262,9 @@ class GeoQuery(sql.Query):
def get_field_select(self, fld, alias=None):
"""
Returns the SELECT SQL string for the given field. Figures out
if any custom selection SQL is needed for the column The `alias`
keyword may be used to manually specify the database table where
the column exists, if not in the model associated with this
if any custom selection SQL is needed for the column The `alias`
keyword may be used to manually specify the database table where
the column exists, if not in the model associated with this
`GeoQuery`.
"""
sel_fmt = self.get_select_format(fld)
@ -263,15 +303,15 @@ class GeoQuery(sql.Query):
"""
Recursive utility routine for checking the given name parameter
on the given model. Initially, the name parameter is a string,
of the field on the given model e.g., 'point', 'the_geom'.
Related model field strings like 'address__point', may also be
of the field on the given model e.g., 'point', 'the_geom'.
Related model field strings like 'address__point', may also be
used.
If a GeometryField exists according to the given name parameter
If a GeometryField exists according to the given name parameter
it will be returned, otherwise returns False.
"""
if isinstance(name_param, basestring):
# This takes into account the situation where the name is a
# This takes into account the situation where the name is a
# lookup to a related geographic field, e.g., 'address__point'.
name_param = name_param.split(sql.constants.LOOKUP_SEP)
name_param.reverse() # Reversing so list operates like a queue of related lookups.
@ -284,7 +324,7 @@ class GeoQuery(sql.Query):
except (FieldDoesNotExist, IndexError):
return False
# TODO: ManyToManyField?
if isinstance(fld, GeometryField):
if isinstance(fld, GeometryField):
return fld # A-OK.
elif isinstance(fld, ForeignKey):
# ForeignKey encountered, return the output of this utility called
@ -297,12 +337,12 @@ class GeoQuery(sql.Query):
"""
Helper function that returns the database column for the given field.
The table and column are returned (quoted) in the proper format, e.g.,
`"geoapp_city"."point"`. If `table_alias` is not specified, the
`"geoapp_city"."point"`. If `table_alias` is not specified, the
database table associated with the model of this `GeoQuery` will be
used.
"""
if table_alias is None: table_alias = self.model._meta.db_table
return "%s.%s" % (self.quote_name_unless_alias(table_alias),
return "%s.%s" % (self.quote_name_unless_alias(table_alias),
self.connection.ops.quote_name(field.column))
def _geo_field(self, field_name=None):
@ -333,5 +373,5 @@ class DistanceField(object):
# Rather than use GeometryField (which requires a SQL query
# upon instantiation), use this lighter weight class.
class GeomField(object):
class GeomField(object):
pass

View File

@ -10,6 +10,12 @@ except NameError:
# Python 2.3 compat
from sets import Set as set
try:
import decimal
except ImportError:
# Python 2.3 fallback
from django.utils import _decimal as decimal
from django.db.backends import util
from django.utils import datetime_safe
@ -62,6 +68,7 @@ class BaseDatabaseWrapper(local):
return util.CursorDebugWrapper(cursor, self)
class BaseDatabaseFeatures(object):
allows_group_by_pk = False
# True if django.db.backend.utils.typecast_timestamp is used on values
# returned from dates() calls.
needs_datetime_string_cast = True
@ -376,6 +383,22 @@ class BaseDatabaseOperations(object):
"""
return self.year_lookup_bounds(value)
def convert_values(self, value, field):
"""Coerce the value returned by the database backend into a consistent type that
is compatible with the field type.
"""
internal_type = field.get_internal_type()
if internal_type == 'DecimalField':
return value
elif internal_type and internal_type.endswith('IntegerField') or internal_type == 'AutoField':
return int(value)
elif internal_type in ('DateField', 'DateTimeField', 'TimeField'):
return value
# No field, or the field isn't known to be a decimal or integer
# Default to a float
return float(value)
class BaseDatabaseIntrospection(object):
"""
This class encapsulates all backend-specific introspection utilities

View File

@ -110,6 +110,7 @@ class CursorWrapper(object):
class DatabaseFeatures(BaseDatabaseFeatures):
empty_fetchmany_value = ()
update_can_self_select = False
allows_group_by_pk = True
related_fields_match_type = True
class DatabaseOperations(BaseDatabaseOperations):

View File

@ -53,21 +53,23 @@ def query_class(QueryClass, Database):
return values
def convert_values(self, value, field):
from django.db.models.fields import DateField, DateTimeField, \
TimeField, BooleanField, NullBooleanField, DecimalField, Field
from django.db.models.fields import Field
if isinstance(value, Database.LOB):
value = value.read()
# Oracle stores empty strings as null. We need to undo this in
# order to adhere to the Django convention of using the empty
# string instead of null, but only if the field accepts the
# empty string.
if value is None and isinstance(field, Field) and field.empty_strings_allowed:
if value is None and field and field.empty_strings_allowed:
value = u''
# Convert 1 or 0 to True or False
elif value in (1, 0) and isinstance(field, (BooleanField, NullBooleanField)):
elif value in (1, 0) and field and field.get_internal_type() in ('BooleanField', 'NullBooleanField'):
value = bool(value)
# Force floats to the correct type
elif value is not None and field and field.get_internal_type() == 'FloatField':
value = float(value)
# Convert floats to decimals
elif value is not None and isinstance(field, DecimalField):
elif value is not None and field and field.get_internal_type() == 'DecimalField':
value = util.typecast_decimal(field.format_number(value))
# cx_Oracle always returns datetime.datetime objects for
# DATE and TIMESTAMP columns, but Django wants to see a
@ -86,13 +88,9 @@ def query_class(QueryClass, Database):
value = datetime.datetime(value.year, value.month,
value.day, value.hour, value.minute, value.second,
value.fsecond)
if isinstance(field, DateTimeField):
# DateTimeField subclasses DateField so must be checked
# first.
pass
elif isinstance(field, DateField):
if field and field.get_internal_type() == 'DateField':
value = value.date()
elif isinstance(field, TimeField) or (value.year == 1900 and value.month == value.day == 1):
elif field and field.get_internal_type() == 'TimeField' or (value.year == 1900 and value.month == value.day == 1):
value = value.time()
elif value.hour == value.minute == value.second == value.microsecond == 0:
value = value.date()

View File

@ -10,7 +10,7 @@ from django.db.backends import *
from django.db.backends.sqlite3.client import DatabaseClient
from django.db.backends.sqlite3.creation import DatabaseCreation
from django.db.backends.sqlite3.introspection import DatabaseIntrospection
from django.utils.safestring import SafeString
from django.utils.safestring import SafeString
try:
try:
@ -102,6 +102,26 @@ class DatabaseOperations(BaseDatabaseOperations):
second = '%s-12-31 23:59:59.999999'
return [first % value, second % value]
def convert_values(self, value, field):
"""SQLite returns floats when it should be returning decimals,
and gets dates and datetimes wrong.
For consistency with other backends, coerce when required.
"""
internal_type = field.get_internal_type()
if internal_type == 'DecimalField':
return util.typecast_decimal(field.format_number(value))
elif internal_type and internal_type.endswith('IntegerField') or internal_type == 'AutoField':
return int(value)
elif internal_type == 'DateField':
return util.typecast_date(value)
elif internal_type == 'DateTimeField':
return util.typecast_timestamp(value)
elif internal_type == 'TimeField':
return util.typecast_time(value)
# No field, or the field isn't known to be a decimal or integer
return value
class DatabaseWrapper(BaseDatabaseWrapper):
# SQLite requires LIKE statements to include an ESCAPE clause if the value

View File

@ -5,6 +5,7 @@ from django.db.models.loading import get_apps, get_app, get_models, get_model, r
from django.db.models.query import Q
from django.db.models.manager import Manager
from django.db.models.base import Model
from django.db.models.aggregates import *
from django.db.models.fields import *
from django.db.models.fields.subclassing import SubfieldBase
from django.db.models.fields.files import FileField, ImageField

View File

@ -0,0 +1,66 @@
"""
Classes to represent the definitions of aggregate functions.
"""
class Aggregate(object):
"""
Default Aggregate definition.
"""
def __init__(self, lookup, **extra):
"""Instantiate a new aggregate.
* lookup is the field on which the aggregate operates.
* extra is a dictionary of additional data to provide for the
aggregate definition
Also utilizes the class variables:
* name, the identifier for this aggregate function.
"""
self.lookup = lookup
self.extra = extra
def _default_alias(self):
return '%s__%s' % (self.lookup, self.name.lower())
default_alias = property(_default_alias)
def add_to_query(self, query, alias, col, source, is_summary):
"""Add the aggregate to the nominated query.
This method is used to convert the generic Aggregate definition into a
backend-specific definition.
* query is the backend-specific query instance to which the aggregate
is to be added.
* col is a column reference describing the subject field
of the aggregate. It can be an alias, or a tuple describing
a table and column name.
* source is the underlying field or aggregate definition for
the column reference. If the aggregate is not an ordinal or
computed type, this reference is used to determine the coerced
output type of the aggregate.
* is_summary is a boolean that is set True if the aggregate is a
summary value rather than an annotation.
"""
aggregate = getattr(query.aggregates_module, self.name)
query.aggregate_select[alias] = aggregate(col, source=source, is_summary=is_summary, **self.extra)
class Avg(Aggregate):
name = 'Avg'
class Count(Aggregate):
name = 'Count'
class Max(Aggregate):
name = 'Max'
class Min(Aggregate):
name = 'Min'
class StdDev(Aggregate):
name = 'StdDev'
class Sum(Aggregate):
name = 'Sum'
class Variance(Aggregate):
name = 'Variance'

View File

@ -101,6 +101,12 @@ class Manager(object):
def filter(self, *args, **kwargs):
return self.get_query_set().filter(*args, **kwargs)
def aggregate(self, *args, **kwargs):
return self.get_query_set().aggregate(*args, **kwargs)
def annotate(self, *args, **kwargs):
return self.get_query_set().annotate(*args, **kwargs)
def complex_filter(self, *args, **kwargs):
return self.get_query_set().complex_filter(*args, **kwargs)

View File

@ -4,6 +4,7 @@ except NameError:
from sets import Set as set # Python 2.3 fallback
from django.db import connection, transaction, IntegrityError
from django.db.models.aggregates import Aggregate
from django.db.models.fields import DateField
from django.db.models.query_utils import Q, select_related_descend
from django.db.models import signals, sql
@ -270,18 +271,47 @@ class QuerySet(object):
else:
requested = None
max_depth = self.query.max_depth
extra_select = self.query.extra_select.keys()
aggregate_select = self.query.aggregate_select.keys()
index_start = len(extra_select)
aggregate_start = index_start + len(self.model._meta.fields)
for row in self.query.results_iter():
if fill_cache:
obj, _ = get_cached_row(self.model, row, index_start,
max_depth, requested=requested)
obj, aggregate_start = get_cached_row(self.model, row,
index_start, max_depth, requested=requested)
else:
obj = self.model(*row[index_start:])
# omit aggregates in object creation
obj = self.model(*row[index_start:aggregate_start])
for i, k in enumerate(extra_select):
setattr(obj, k, row[i])
# Add the aggregates to the model
for i, aggregate in enumerate(aggregate_select):
setattr(obj, aggregate, row[i+aggregate_start])
yield obj
def aggregate(self, *args, **kwargs):
"""
Returns a dictionary containing the calculations (aggregation)
over the current queryset
If args is present the expression is passed as a kwarg ussing
the Aggregate object's default alias.
"""
for arg in args:
kwargs[arg.default_alias] = arg
for (alias, aggregate_expr) in kwargs.items():
self.query.add_aggregate(aggregate_expr, self.model, alias,
is_summary=True)
return self.query.get_aggregation()
def count(self):
"""
Performs a SELECT COUNT() and returns the number of records as an
@ -553,6 +583,25 @@ class QuerySet(object):
"""
self.query.select_related = other.query.select_related
def annotate(self, *args, **kwargs):
"""
Return a query set in which the returned objects have been annotated
with data aggregated from related fields.
"""
for arg in args:
kwargs[arg.default_alias] = arg
obj = self._clone()
obj._setup_aggregate_query()
# Add the aggregates to the query
for (alias, aggregate_expr) in kwargs.items():
obj.query.add_aggregate(aggregate_expr, self.model, alias,
is_summary=False)
return obj
def order_by(self, *field_names):
"""
Returns a new QuerySet instance with the ordering changed.
@ -641,6 +690,16 @@ class QuerySet(object):
"""
pass
def _setup_aggregate_query(self):
"""
Prepare the query for computing a result that contains aggregate annotations.
"""
opts = self.model._meta
if not self.query.group_by:
field_names = [f.attname for f in opts.fields]
self.query.add_fields(field_names, False)
self.query.set_group_by()
def as_sql(self):
"""
Returns the internal query's SQL and parameters (as a tuple).
@ -669,6 +728,8 @@ class ValuesQuerySet(QuerySet):
len(self.field_names) != len(self.model._meta.fields)):
self.query.trim_extra_select(self.extra_names)
names = self.query.extra_select.keys() + self.field_names
names.extend(self.query.aggregate_select.keys())
for row in self.query.results_iter():
yield dict(zip(names, row))
@ -682,20 +743,25 @@ class ValuesQuerySet(QuerySet):
"""
self.query.clear_select_fields()
self.extra_names = []
self.aggregate_names = []
if self._fields:
if not self.query.extra_select:
if not self.query.extra_select and not self.query.aggregate_select:
field_names = list(self._fields)
else:
field_names = []
for f in self._fields:
if self.query.extra_select.has_key(f):
self.extra_names.append(f)
elif self.query.aggregate_select.has_key(f):
self.aggregate_names.append(f)
else:
field_names.append(f)
else:
# Default to all fields.
field_names = [f.attname for f in self.model._meta.fields]
self.query.select = []
self.query.add_fields(field_names, False)
self.query.default_cols = False
self.field_names = field_names
@ -711,6 +777,7 @@ class ValuesQuerySet(QuerySet):
c._fields = self._fields[:]
c.field_names = self.field_names
c.extra_names = self.extra_names
c.aggregate_names = self.aggregate_names
if setup and hasattr(c, '_setup_query'):
c._setup_query()
return c
@ -718,10 +785,18 @@ class ValuesQuerySet(QuerySet):
def _merge_sanity_check(self, other):
super(ValuesQuerySet, self)._merge_sanity_check(other)
if (set(self.extra_names) != set(other.extra_names) or
set(self.field_names) != set(other.field_names)):
set(self.field_names) != set(other.field_names) or
self.aggregate_names != other.aggregate_names):
raise TypeError("Merging '%s' classes must involve the same values in each case."
% self.__class__.__name__)
def _setup_aggregate_query(self):
"""
Prepare the query for computing a result that contains aggregate annotations.
"""
self.query.set_group_by()
super(ValuesQuerySet, self)._setup_aggregate_query()
class ValuesListQuerySet(ValuesQuerySet):
def iterator(self):
@ -729,14 +804,14 @@ class ValuesListQuerySet(ValuesQuerySet):
if self.flat and len(self._fields) == 1:
for row in self.query.results_iter():
yield row[0]
elif not self.query.extra_select:
elif not self.query.extra_select and not self.query.aggregate_select:
for row in self.query.results_iter():
yield tuple(row)
else:
# When extra(select=...) is involved, the extra cols come are
# always at the start of the row, so we need to reorder the fields
# to match the order in self._fields.
names = self.query.extra_select.keys() + self.field_names
names = self.query.extra_select.keys() + self.field_names + self.query.aggregate_select.keys()
for row in self.query.results_iter():
data = dict(zip(names, row))
yield tuple([data[f] for f in self._fields])

View File

@ -64,4 +64,3 @@ def select_related_descend(field, restricted, requested):
if not restricted and field.null:
return False
return True

View File

@ -0,0 +1,130 @@
"""
Classes to represent the default SQL aggregate functions
"""
class AggregateField(object):
"""An internal field mockup used to identify aggregates in the
data-conversion parts of the database backend.
"""
def __init__(self, internal_type):
self.internal_type = internal_type
def get_internal_type(self):
return self.internal_type
ordinal_aggregate_field = AggregateField('IntegerField')
computed_aggregate_field = AggregateField('FloatField')
class Aggregate(object):
"""
Default SQL Aggregate.
"""
is_ordinal = False
is_computed = False
sql_template = '%(function)s(%(field)s)'
def __init__(self, col, source=None, is_summary=False, **extra):
"""Instantiate an SQL aggregate
* col is a column reference describing the subject field
of the aggregate. It can be an alias, or a tuple describing
a table and column name.
* source is the underlying field or aggregate definition for
the column reference. If the aggregate is not an ordinal or
computed type, this reference is used to determine the coerced
output type of the aggregate.
* extra is a dictionary of additional data to provide for the
aggregate definition
Also utilizes the class variables:
* sql_function, the name of the SQL function that implements the
aggregate.
* sql_template, a template string that is used to render the
aggregate into SQL.
* is_ordinal, a boolean indicating if the output of this aggregate
is an integer (e.g., a count)
* is_computed, a boolean indicating if this output of this aggregate
is a computed float (e.g., an average), regardless of the input
type.
"""
self.col = col
self.source = source
self.is_summary = is_summary
self.extra = extra
# Follow the chain of aggregate sources back until you find an
# actual field, or an aggregate that forces a particular output
# type. This type of this field will be used to coerce values
# retrieved from the database.
tmp = self
while tmp and isinstance(tmp, Aggregate):
if getattr(tmp, 'is_ordinal', False):
tmp = ordinal_aggregate_field
elif getattr(tmp, 'is_computed', False):
tmp = computed_aggregate_field
else:
tmp = tmp.source
self.field = tmp
def relabel_aliases(self, change_map):
if isinstance(self.col, (list, tuple)):
self.col = (change_map.get(self.col[0], self.col[0]), self.col[1])
def as_sql(self, quote_func=None):
"Return the aggregate, rendered as SQL."
if not quote_func:
quote_func = lambda x: x
if hasattr(self.col, 'as_sql'):
field_name = self.col.as_sql(quote_func)
elif isinstance(self.col, (list, tuple)):
field_name = '.'.join([quote_func(c) for c in self.col])
else:
field_name = self.col
params = {
'function': self.sql_function,
'field': field_name
}
params.update(self.extra)
return self.sql_template % params
class Avg(Aggregate):
is_computed = True
sql_function = 'AVG'
class Count(Aggregate):
is_ordinal = True
sql_function = 'COUNT'
sql_template = '%(function)s(%(distinct)s%(field)s)'
def __init__(self, col, distinct=False, **extra):
super(Count, self).__init__(col, distinct=distinct and 'DISTINCT ' or '', **extra)
class Max(Aggregate):
sql_function = 'MAX'
class Min(Aggregate):
sql_function = 'MIN'
class StdDev(Aggregate):
is_computed = True
def __init__(self, col, sample=False, **extra):
super(StdDev, self).__init__(col, **extra)
self.sql_function = sample and 'STDDEV_SAMP' or 'STDDEV_POP'
class Sum(Aggregate):
sql_function = 'SUM'
class Variance(Aggregate):
is_computed = True
def __init__(self, col, sample=False, **extra):
super(Variance, self).__init__(col, **extra)
self.sql_function = sample and 'VAR_SAMP' or 'VAR_POP'

View File

@ -25,59 +25,6 @@ class RawValue(object):
def __init__(self, value):
self.value = value
class Aggregate(object):
"""
Base class for all aggregate-related classes (min, max, avg, count, sum).
"""
def relabel_aliases(self, change_map):
"""
Relabel the column alias, if necessary. Must be implemented by
subclasses.
"""
raise NotImplementedError
def as_sql(self, quote_func=None):
"""
Returns the SQL string fragment for this object.
The quote_func function is used to quote the column components. If
None, it defaults to doing nothing.
Must be implemented by subclasses.
"""
raise NotImplementedError
class Count(Aggregate):
"""
Perform a count on the given column.
"""
def __init__(self, col='*', distinct=False):
"""
Set the column to count on (defaults to '*') and set whether the count
should be distinct or not.
"""
self.col = col
self.distinct = distinct
def relabel_aliases(self, change_map):
c = self.col
if isinstance(c, (list, tuple)):
self.col = (change_map.get(c[0], c[0]), c[1])
def as_sql(self, quote_func=None):
if not quote_func:
quote_func = lambda x: x
if isinstance(self.col, (list, tuple)):
col = ('%s.%s' % tuple([quote_func(c) for c in self.col]))
elif hasattr(self.col, 'as_sql'):
col = self.col.as_sql(quote_func)
else:
col = self.col
if self.distinct:
return 'COUNT(DISTINCT %s)' % col
else:
return 'COUNT(%s)' % col
class Date(object):
"""
Add a date selection column.

View File

@ -12,12 +12,13 @@ from copy import deepcopy
from django.utils.tree import Node
from django.utils.datastructures import SortedDict
from django.utils.encoding import force_unicode
from django.db.backends.util import truncate_name
from django.db import connection
from django.db.models import signals
from django.db.models.fields import FieldDoesNotExist
from django.db.models.query_utils import select_related_descend
from django.db.models.sql import aggregates as base_aggregates_module
from django.db.models.sql.where import WhereNode, Constraint, EverythingNode, AND, OR
from django.db.models.sql.datastructures import Count
from django.core.exceptions import FieldError
from datastructures import EmptyResultSet, Empty, MultiJoin
from constants import *
@ -40,6 +41,7 @@ class BaseQuery(object):
alias_prefix = 'T'
query_terms = QUERY_TERMS
aggregates_module = base_aggregates_module
def __init__(self, model, connection, where=WhereNode):
self.model = model
@ -73,6 +75,9 @@ class BaseQuery(object):
self.select_related = False
self.related_select_cols = []
# SQL aggregate-related attributes
self.aggregate_select = SortedDict() # Maps alias -> SQL aggregate function
# Arbitrary maximum limit for select_related. Prevents infinite
# recursion. Can be changed by the depth parameter to select_related().
self.max_depth = 5
@ -178,6 +183,7 @@ class BaseQuery(object):
obj.distinct = self.distinct
obj.select_related = self.select_related
obj.related_select_cols = []
obj.aggregate_select = self.aggregate_select.copy()
obj.max_depth = self.max_depth
obj.extra_select = self.extra_select.copy()
obj.extra_tables = self.extra_tables
@ -194,6 +200,35 @@ class BaseQuery(object):
obj._setup_query()
return obj
def convert_values(self, value, field):
"""Convert the database-returned value into a type that is consistent
across database backends.
By default, this defers to the underlying backend operations, but
it can be overridden by Query classes for specific backends.
"""
return self.connection.ops.convert_values(value, field)
def resolve_aggregate(self, value, aggregate):
"""Resolve the value of aggregates returned by the database to
consistent (and reasonable) types.
This is required because of the predisposition of certain backends
to return Decimal and long types when they are not needed.
"""
if value is None:
# Return None as-is
return value
elif aggregate.is_ordinal:
# Any ordinal aggregate (e.g., count) returns an int
return int(value)
elif aggregate.is_computed:
# Any computed aggregate (e.g., avg) returns a float
return float(value)
else:
# Return value depends on the type of the field being processed.
return self.convert_values(value, aggregate.field)
def results_iter(self):
"""
Returns an iterator over the results from executing this query.
@ -212,29 +247,78 @@ class BaseQuery(object):
else:
fields = self.model._meta.fields
row = self.resolve_columns(row, fields)
if self.aggregate_select:
aggregate_start = len(self.extra_select.keys()) + len(self.select)
row = tuple(row[:aggregate_start]) + tuple([
self.resolve_aggregate(value, aggregate)
for (alias, aggregate), value
in zip(self.aggregate_select.items(), row[aggregate_start:])
])
yield row
def get_aggregation(self):
"""
Returns the dictionary with the values of the existing aggregations.
"""
if not self.aggregate_select:
return {}
# If there is a group by clause, aggregating does not add useful
# information but retrieves only the first row. Aggregate
# over the subquery instead.
if self.group_by:
from subqueries import AggregateQuery
query = AggregateQuery(self.model, self.connection)
obj = self.clone()
# Remove any aggregates marked for reduction from the subquery
# and move them to the outer AggregateQuery.
for alias, aggregate in self.aggregate_select.items():
if aggregate.is_summary:
query.aggregate_select[alias] = aggregate
del obj.aggregate_select[alias]
query.add_subquery(obj)
else:
query = self
self.select = []
self.default_cols = False
self.extra_select = {}
query.clear_ordering(True)
query.clear_limits()
query.select_related = False
query.related_select_cols = []
query.related_select_fields = []
return dict([
(alias, self.resolve_aggregate(val, aggregate))
for (alias, aggregate), val
in zip(query.aggregate_select.items(), query.execute_sql(SINGLE))
])
def get_count(self):
"""
Performs a COUNT() query using the current filter constraints.
"""
from subqueries import CountQuery
obj = self.clone()
obj.clear_ordering(True)
obj.clear_limits()
obj.select_related = False
obj.related_select_cols = []
obj.related_select_fields = []
if len(obj.select) > 1:
obj = self.clone(CountQuery, _query=obj, where=self.where_class(),
distinct=False)
obj.select = []
obj.extra_select = SortedDict()
if len(self.select) > 1:
# If a select clause exists, then the query has already started to
# specify the columns that are to be returned.
# In this case, we need to use a subquery to evaluate the count.
from subqueries import AggregateQuery
subquery = obj
subquery.clear_ordering(True)
subquery.clear_limits()
obj = AggregateQuery(obj.model, obj.connection)
obj.add_subquery(subquery)
obj.add_count_column()
data = obj.execute_sql(SINGLE)
if not data:
return 0
number = data[0]
number = obj.get_aggregation()[None]
# Apply offset and limit constraints manually, since using LIMIT/OFFSET
# in SQL (in variants that provide them) doesn't change the COUNT
@ -450,25 +534,41 @@ class BaseQuery(object):
for col in self.select:
if isinstance(col, (list, tuple)):
r = '%s.%s' % (qn(col[0]), qn(col[1]))
if with_aliases and col[1] in col_aliases:
c_alias = 'Col%d' % len(col_aliases)
result.append('%s AS %s' % (r, c_alias))
aliases.add(c_alias)
col_aliases.add(c_alias)
if with_aliases:
if col[1] in col_aliases:
c_alias = 'Col%d' % len(col_aliases)
result.append('%s AS %s' % (r, c_alias))
aliases.add(c_alias)
col_aliases.add(c_alias)
else:
result.append('%s AS %s' % (r, col[1]))
aliases.add(r)
col_aliases.add(col[1])
else:
result.append(r)
aliases.add(r)
col_aliases.add(col[1])
else:
result.append(col.as_sql(quote_func=qn))
if hasattr(col, 'alias'):
aliases.add(col.alias)
col_aliases.add(col.alias)
elif self.default_cols:
cols, new_aliases = self.get_default_columns(with_aliases,
col_aliases)
result.extend(cols)
aliases.update(new_aliases)
result.extend([
'%s%s' % (
aggregate.as_sql(quote_func=qn),
alias is not None and ' AS %s' % qn(alias) or ''
)
for alias, aggregate in self.aggregate_select.items()
])
for table, col in self.related_select_cols:
r = '%s.%s' % (qn(table), qn(col))
if with_aliases and col in col_aliases:
@ -538,7 +638,7 @@ class BaseQuery(object):
Returns a list of strings that are joined together to go after the
"FROM" part of the query, as well as a list any extra parameters that
need to be included. Sub-classes, can override this to create a
from-clause via a "select", for example (e.g. CountQuery).
from-clause via a "select".
This should only be called after any SQL construction methods that
might change the tables we need. This means the select columns and
@ -635,10 +735,13 @@ class BaseQuery(object):
order = asc
result.append('%s %s' % (field, order))
continue
col, order = get_order_dir(field, asc)
if col in self.aggregate_select:
result.append('%s %s' % (col, order))
continue
if '.' in field:
# This came in through an extra(order_by=...) addition. Pass it
# on verbatim.
col, order = get_order_dir(field, asc)
table, col = col.split('.', 1)
if (table, col) not in processed_pairs:
elt = '%s.%s' % (qn(table), col)
@ -657,7 +760,6 @@ class BaseQuery(object):
ordering_aliases.append(elt)
result.append('%s %s' % (elt, order))
else:
col, order = get_order_dir(field, asc)
elt = qn2(col)
if distinct and col not in select_aliases:
ordering_aliases.append(elt)
@ -1068,6 +1170,48 @@ class BaseQuery(object):
self.fill_related_selections(f.rel.to._meta, alias, cur_depth + 1,
used, next, restricted, new_nullable, dupe_set, avoid)
def add_aggregate(self, aggregate, model, alias, is_summary):
"""
Adds a single aggregate expression to the Query
"""
opts = model._meta
field_list = aggregate.lookup.split(LOOKUP_SEP)
if (len(field_list) == 1 and
aggregate.lookup in self.aggregate_select.keys()):
# Aggregate is over an annotation
field_name = field_list[0]
col = field_name
source = self.aggregate_select[field_name]
elif (len(field_list) > 1 or
field_list[0] not in [i.name for i in opts.fields]):
field, source, opts, join_list, last, _ = self.setup_joins(
field_list, opts, self.get_initial_alias(), False)
# Process the join chain to see if it can be trimmed
_, _, col, _, join_list = self.trim_joins(source, join_list, last, False)
# If the aggregate references a model or field that requires a join,
# those joins must be LEFT OUTER - empty join rows must be returned
# in order for zeros to be returned for those aggregates.
for column_alias in join_list:
self.promote_alias(column_alias, unconditional=True)
col = (join_list[-1], col)
else:
# Aggregate references a normal field
field_name = field_list[0]
source = opts.get_field(field_name)
if not (self.group_by and is_summary):
# Only use a column alias if this is a
# standalone aggregate, or an annotation
col = (opts.db_table, source.column)
else:
col = field_name
# Add the aggregate to the query
alias = truncate_name(alias, self.connection.ops.max_name_length())
aggregate.add_to_query(self, alias, col=col, source=source, is_summary=is_summary)
def add_filter(self, filter_expr, connector=AND, negate=False, trim=False,
can_reuse=None, process_extras=True):
"""
@ -1119,6 +1263,11 @@ class BaseQuery(object):
elif callable(value):
value = value()
for alias, aggregate in self.aggregate_select.items():
if alias == parts[0]:
self.having.add((aggregate, lookup_type, value), AND)
return
opts = self.get_meta()
alias = self.get_initial_alias()
allow_many = trim or not negate
@ -1131,38 +1280,9 @@ class BaseQuery(object):
self.split_exclude(filter_expr, LOOKUP_SEP.join(parts[:e.level]),
can_reuse)
return
final = len(join_list)
penultimate = last.pop()
if penultimate == final:
penultimate = last.pop()
if trim and len(join_list) > 1:
extra = join_list[penultimate:]
join_list = join_list[:penultimate]
final = penultimate
penultimate = last.pop()
col = self.alias_map[extra[0]][LHS_JOIN_COL]
for alias in extra:
self.unref_alias(alias)
else:
col = target.column
alias = join_list[-1]
while final > 1:
# An optimization: if the final join is against the same column as
# we are comparing against, we can go back one step in the join
# chain and compare against the lhs of the join instead (and then
# repeat the optimization). The result, potentially, involves less
# table joins.
join = self.alias_map[alias]
if col != join[RHS_JOIN_COL]:
break
self.unref_alias(alias)
alias = join[LHS_ALIAS]
col = join[LHS_JOIN_COL]
join_list = join_list[:-1]
final -= 1
if final == penultimate:
penultimate = last.pop()
# Process the join chain to see if it can be trimmed
final, penultimate, col, alias, join_list = self.trim_joins(target, join_list, last, trim)
if (lookup_type == 'isnull' and value is True and not negate and
final > 1):
@ -1313,7 +1433,7 @@ class BaseQuery(object):
field, model, direct, m2m = opts.get_field_by_name(f.name)
break
else:
names = opts.get_all_field_names()
names = opts.get_all_field_names() + self.aggregate_select.keys()
raise FieldError("Cannot resolve keyword %r into field. "
"Choices are: %s" % (name, ", ".join(names)))
@ -1462,6 +1582,43 @@ class BaseQuery(object):
return field, target, opts, joins, last, extra_filters
def trim_joins(self, target, join_list, last, trim):
"""An optimization: if the final join is against the same column as
we are comparing against, we can go back one step in a join
chain and compare against the LHS of the join instead (and then
repeat the optimization). The result, potentially, involves less
table joins.
Returns a tuple
"""
final = len(join_list)
penultimate = last.pop()
if penultimate == final:
penultimate = last.pop()
if trim and len(join_list) > 1:
extra = join_list[penultimate:]
join_list = join_list[:penultimate]
final = penultimate
penultimate = last.pop()
col = self.alias_map[extra[0]][LHS_JOIN_COL]
for alias in extra:
self.unref_alias(alias)
else:
col = target.column
alias = join_list[-1]
while final > 1:
join = self.alias_map[alias]
if col != join[RHS_JOIN_COL]:
break
self.unref_alias(alias)
alias = join[LHS_ALIAS]
col = join[LHS_JOIN_COL]
join_list = join_list[:-1]
final -= 1
if final == penultimate:
penultimate = last.pop()
return final, penultimate, col, alias, join_list
def update_dupe_avoidance(self, opts, col, alias):
"""
For a column that is one of multiple pointing to the same table, update
@ -1554,6 +1711,7 @@ class BaseQuery(object):
"""
alias = self.get_initial_alias()
opts = self.get_meta()
try:
for name in field_names:
field, target, u2, joins, u3, u4 = self.setup_joins(
@ -1574,7 +1732,7 @@ class BaseQuery(object):
except MultiJoin:
raise FieldError("Invalid field name: '%s'" % name)
except FieldError:
names = opts.get_all_field_names() + self.extra_select.keys()
names = opts.get_all_field_names() + self.extra_select.keys() + self.aggregate_select.keys()
names.sort()
raise FieldError("Cannot resolve keyword %r into field. "
"Choices are: %s" % (name, ", ".join(names)))
@ -1609,38 +1767,52 @@ class BaseQuery(object):
if force_empty:
self.default_ordering = False
def set_group_by(self):
"""
Expands the GROUP BY clause required by the query.
This will usually be the set of all non-aggregate fields in the
return data. If the database backend supports grouping by the
primary key, and the query would be equivalent, the optimization
will be made automatically.
"""
if self.connection.features.allows_group_by_pk:
if len(self.select) == len(self.model._meta.fields):
self.group_by.append('.'.join([self.model._meta.db_table,
self.model._meta.pk.column]))
return
for sel in self.select:
self.group_by.append(sel)
def add_count_column(self):
"""
Converts the query to do count(...) or count(distinct(pk)) in order to
get its size.
"""
# TODO: When group_by support is added, this needs to be adjusted so
# that it doesn't totally overwrite the select list.
if not self.distinct:
if not self.select:
select = Count()
count = self.aggregates_module.Count('*', is_summary=True)
else:
assert len(self.select) == 1, \
"Cannot add count col with multiple cols in 'select': %r" % self.select
select = Count(self.select[0])
count = self.aggregates_module.Count(self.select[0])
else:
opts = self.model._meta
if not self.select:
select = Count((self.join((None, opts.db_table, None, None)),
opts.pk.column), True)
count = self.aggregates_module.Count((self.join((None, opts.db_table, None, None)), opts.pk.column),
is_summary=True, distinct=True)
else:
# Because of SQL portability issues, multi-column, distinct
# counts need a sub-query -- see get_count() for details.
assert len(self.select) == 1, \
"Cannot add count col with multiple cols in 'select'."
select = Count(self.select[0], True)
count = self.aggregates_module.Count(self.select[0], distinct=True)
# Distinct handling is done in Count(), so don't do it at this
# level.
self.distinct = False
self.select = [select]
self.select_fields = [None]
self.extra_select = {}
self.aggregate_select = {None: count}
def add_select_related(self, fields):
"""
@ -1758,7 +1930,6 @@ class BaseQuery(object):
return empty_iter()
else:
return
cursor = self.connection.cursor()
cursor.execute(sql, params)

View File

@ -9,7 +9,7 @@ from django.db.models.sql.query import Query
from django.db.models.sql.where import AND, Constraint
__all__ = ['DeleteQuery', 'UpdateQuery', 'InsertQuery', 'DateQuery',
'CountQuery']
'AggregateQuery']
class DeleteQuery(Query):
"""
@ -400,15 +400,25 @@ class DateQuery(Query):
self.distinct = True
self.order_by = order == 'ASC' and [1] or [-1]
class CountQuery(Query):
class AggregateQuery(Query):
"""
A CountQuery knows how to take a normal query which would select over
multiple distinct columns and turn it into SQL that can be used on a
variety of backends (it requires a select in the FROM clause).
An AggregateQuery takes another query as a parameter to the FROM
clause and only selects the elements in the provided list.
"""
def get_from_clause(self):
result, params = self._query.as_sql()
return ['(%s) A1' % result], params
def add_subquery(self, query):
self.subquery, self.sub_params = query.as_sql(with_col_aliases=True)
def get_ordering(self):
return ()
def as_sql(self, quote_func=None):
"""
Creates the SQL for this query. Returns the SQL string and list of
parameters.
"""
sql = ('SELECT %s FROM (%s) subquery' % (
', '.join([
aggregate.as_sql()
for aggregate in self.aggregate_select.values()
]),
self.subquery)
)
params = self.sub_params
return (sql, params)

View File

@ -14,6 +14,7 @@ from django.test.client import Client
from django.utils import simplejson
normalize_long_ints = lambda s: re.sub(r'(?<![\w])(\d+)L(?![\w])', '\\1', s)
normalize_decimals = lambda s: re.sub(r"Decimal\('(\d+(\.\d*)?)'\)", lambda m: "Decimal(\"%s\")" % m.groups()[0], s)
def to_list(value):
"""
@ -31,7 +32,7 @@ class OutputChecker(doctest.OutputChecker):
def check_output(self, want, got, optionflags):
"The entry method for doctest output checking. Defers to a sequence of child checkers"
checks = (self.check_output_default,
self.check_output_long,
self.check_output_numeric,
self.check_output_xml,
self.check_output_json)
for check in checks:
@ -43,19 +44,23 @@ class OutputChecker(doctest.OutputChecker):
"The default comparator provided by doctest - not perfect, but good for most purposes"
return doctest.OutputChecker.check_output(self, want, got, optionflags)
def check_output_long(self, want, got, optionflags):
"""Doctest does an exact string comparison of output, which means long
integers aren't equal to normal integers ("22L" vs. "22"). The
following code normalizes long integers so that they equal normal
integers.
def check_output_numeric(self, want, got, optionflags):
"""Doctest does an exact string comparison of output, which means that
some numerically equivalent values aren't equal. This check normalizes
* long integers (22L) so that they equal normal integers. (22)
* Decimals so that they are comparable, regardless of the change
made to __repr__ in Python 2.6.
"""
return normalize_long_ints(want) == normalize_long_ints(got)
return doctest.OutputChecker.check_output(self,
normalize_decimals(normalize_long_ints(want)),
normalize_decimals(normalize_long_ints(got)),
optionflags)
def check_output_xml(self, want, got, optionsflags):
"""Tries to do a 'xml-comparision' of want and got. Plain string
comparision doesn't always work because, for example, attribute
ordering should not be important.
Based on http://codespeak.net/svn/lxml/trunk/src/lxml/doctestcompare.py
"""
_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
@ -102,7 +107,7 @@ class OutputChecker(doctest.OutputChecker):
wrapper = '<root>%s</root>'
want = wrapper % want
got = wrapper % got
# Parse the want and got strings, and compare the parsings.
try:
want_root = parseString(want).firstChild
@ -174,7 +179,7 @@ class TestCase(unittest.TestCase):
"""Performs any pre-test setup. This includes:
* Flushing the database.
* If the Test Case class has a 'fixtures' member, installing the
* If the Test Case class has a 'fixtures' member, installing the
named fixtures.
* If the Test Case class has a 'urls' member, replace the
ROOT_URLCONF with it.

View File

@ -42,7 +42,7 @@ The model layer
* **Models:** :ref:`Model syntax <topics-db-models>` | :ref:`Field types <ref-models-fields>` | :ref:`Meta options <ref-models-options>`
* **QuerySets:** :ref:`Executing queries <topics-db-queries>` | :ref:`QuerySet method reference <ref-models-querysets>`
* **Model instances:** :ref:`Instance methods <ref-models-instances>` | :ref:`Accessing related objects <ref-models-relations>`
* **Advanced:** :ref:`Managers <topics-db-managers>` | :ref:`Raw SQL <topics-db-sql>` | :ref:`Transactions <topics-db-transactions>` | :ref:`Custom fields <howto-custom-model-fields>`
* **Advanced:** :ref:`Managers <topics-db-managers>` | :ref:`Raw SQL <topics-db-sql>` | :ref:`Transactions <topics-db-transactions>` | :ref:`Aggregation <topics-db-aggregation>` | :ref:`Custom fields <howto-custom-model-fields>`
* **Other:** :ref:`Supported databases <ref-databases>` | :ref:`Legacy databases <howto-legacy-databases>` | :ref:`Providing initial data <howto-initial-data>`
The template layer

View File

@ -7,7 +7,7 @@ Model API reference. For introductory material, see :ref:`topics-db-models`.
.. toctree::
:maxdepth: 1
fields
relations
options

View File

@ -158,6 +158,48 @@ In SQL terms, that evaluates to::
Note the second example is more restrictive.
``annotate(*args, **kwargs)``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. versionadded:: 1.1
Annotates each object in the ``QuerySet`` with the provided list of
aggregate values (averages, sums, etc) that have been computed over
the objects that are related to the objects in the ``QuerySet``.
Each argument to ``annotate()`` is an annotation that will be added
to each object in the ``QuerySet`` that is returned.
The aggregation functions that are provided by Django are described
in `Aggregation Functions`_ below.
Annotations specified using keyword arguments will use the keyword as
the alias for the annotation. Anonymous arguments will have an alias
generated for them based upon the name of the aggregate function and
the model field that is being aggregated.
For example, if you were manipulating a list of blogs, you may want
to determine how many entries have been made in each blog::
>>> q = Blog.objects.annotate(Count('entry'))
# The name of the first blog
>>> q[0].name
'Blogasaurus'
# The number of entries on the first blog
>>> q[0].entry__count
42
The ``Blog`` model doesn't define an ``entry_count`` attribute by itself,
but by using a keyword argument to specify the aggregate function, you can
control the name of the annotation::
>>> q = Blog.objects.annotate(number_of_entries=Count('entry'))
# The number of entries on the first blog, using the name provided
>>> q[0].number_of_entries
42
For an in-depth discussion of aggregation, see :ref:`the topic guide on
Aggregation <topics-db-aggregation>`.
``order_by(*fields)``
~~~~~~~~~~~~~~~~~~~~~
@ -931,6 +973,38 @@ exist with the given parameters.
Note ``latest()`` exists purely for convenience and readability.
``aggregate(*args, **kwargs)``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. versionadded:: 1.1
Returns a dictionary of aggregate values (averages, sums, etc) calculated
over the ``QuerySet``. Each argument to ``aggregate()`` specifies
a value that will be included in the dictionary that is returned.
The aggregation functions that are provided by Django are described
in `Aggregation Functions`_ below.
Aggregates specified using keyword arguments will use the keyword as
the name for the annotation. Anonymous arguments will have an name
generated for them based upon the name of the aggregate function and
the model field that is being aggregated.
For example, if you were manipulating blog entries, you may want to know
the average number of authors contributing to blog entries::
>>> q = Blog.objects.aggregate(Count('entry'))
{'entry__count': 16}
By using a keyword argument to specify the aggregate function, you can
control the name of the aggregation value that is returned::
>>> q = Blog.objects.aggregate(number_of_entries=Count('entry'))
{'number_of_entries': 2.34}
For an in-depth discussion of aggregation, see :ref:`the topic guide on
Aggregation <topics-db-aggregation>`.
.. _field-lookups:
Field lookups
@ -1326,3 +1400,115 @@ SQL equivalents::
SELECT ... WHERE title REGEXP '(?i)^(an?|the) +'; -- SQLite
.. _aggregation-functions:
Aggregation Functions
---------------------
.. versionadded:: 1.1
Django provides the following aggregation functions in the
``django.db.models`` module.
``Avg``
~~~~~~~
.. class:: Avg(field)
Returns the mean value of the given field.
* Default alias: ``<field>__avg``
* Return type: float
``Count``
~~~~~~~~~
.. class:: Count(field, distinct=False)
Returns the number of objects that are related through the provided field.
* Default alias: ``<field>__count``
* Return type: integer
Has one optional argument:
.. attribute:: distinct
If distinct=True, the count will only include unique instances. This has
the SQL equivalent of ``COUNT(DISTINCT field)``. Default value is ``False``.
``Max``
~~~~~~~
.. class:: Max(field)
Returns the maximum value of the given field.
* Default alias: ``<field>__max``
* Return type: same as input field
``Min``
~~~~~~~
.. class:: Min(field)
Returns the minimum value of the given field.
* Default alias: ``<field>__min``
* Return type: same as input field
``StdDev``
~~~~~~~~~
.. class:: StdDev(field, sample=False)
Returns the standard deviation of the data in the provided field.
* Default alias: ``<field>__stddev``
* Return type: float
Has one optional argument:
.. attribute:: sample
By default, ``StdDev`` returns the population standard deviation. However,
if ``sample=True``, the return value will be the sample standard deviation.
.. admonition:: SQLite
SQLite doesn't provide ``StdDev`` out of the box. An implementation is
available as an extension module for SQLite. Consult the SQlite
documentation for instructions on obtaining and installing this extension.
``Sum``
~~~~~~~
.. class:: Sum(field)
Computes the sum of all values of the given field.
* Default alias: ``<field>__sum``
* Return type: same as input field
``Variance``
~~~~~~~~~
.. class:: Variance(field, sample=False)
Returns the variance of the data in the provided field.
* Default alias: ``<field>__variance``
* Return type: float
Has one optional argument:
.. attribute:: sample
By default, ``Variance`` returns the population variance. However,
if ``sample=True``, the return value will be the sample variance.
.. admonition:: SQLite
SQLite doesn't provide ``Variance`` out of the box. An implementation is
available as an extension module for SQLite. Consult the SQlite
documentation for instructions on obtaining and installing this extension.

View File

@ -0,0 +1,323 @@
.. _topics-db-aggregation:
=============
Aggregation
=============
.. versionadded:: 1.1
.. currentmodule:: django.db.models
The topic guide on :ref:`Django's database-abstraction API <topics-db-queries`
described the way that you can use Django queries that create,
retrieve, update and delete individual objects. However, sometimes you will
need to retrieve values that are derived by summarizing or *aggregating* a
collection of objects. This topic guide describes the ways that aggregate values
can be generated and returned using Django queries.
Throughout this guide, we'll refer to the following models. These models are
used to track the inventory for a series of online bookstores:
.. _queryset-model-example:
.. code-block:: python
class Author(models.Model):
name = models.CharField(max_length=100)
age = models.IntegerField()
friends = models.ManyToManyField('self', blank=True)
class Publisher(models.Model):
name = models.CharField(max_length=300)
num_awards = models.IntegerField()
class Book(models.Model):
isbn = models.CharField(max_length=9)
name = models.CharField(max_length=300)
pages = models.IntegerField()
price = models.DecimalField(max_digits=10, decimal_places=2)
rating = models.FloatField()
authors = models.ManyToManyField(Author)
publisher = models.ForeignKey(Publisher)
pubdate = models.DateField
class Store(models.Model):
name = models.CharField(max_length=300)
books = models.ManyToManyField(Book)
Generating aggregates over a QuerySet
=====================================
Django provides two ways to generate aggregates. The first way is to generate
summary values over an entire ``QuerySet``. For example, say you wanted to
calculate the average price of all books available for sale. Django's query
syntax provides a means for describing the set of all books::
>>> Book.objects.all()
What we need is a way to calculate summary values over the objects that
belong to this ``QuerySet``. This is done by appending an ``aggregate()``
clause onto the ``QuerySet``::
>>> from django.db.models import Avg
>>> Book.objects.all().aggregate(Avg('price'))
{'price__avg': 34.35}
The ``all()`` is redundant in this example, so this could be simplified to::
>>> Book.objects.aggregate(Avg('price'))
{'price__avg': 34.35}
The argument to the ``aggregate()`` clause describes the aggregate value that
we want to compute - in this case, the average of the ``price`` field on the
``Book`` model. A list of the aggregate functions that are available can be
found in the :ref:`QuerySet reference <aggregation-functions>`.
``aggregate()`` is a terminal clause for a ``QuerySet`` that, when invoked,
returns a dictionary of name-value pairs. The name is an identifier for the
aggregate value; the value is the computed aggregate. The name is
automatically generated from the name of the field and the aggregate function.
If you want to manually specify a name for the aggregate value, you can do so
by providing that name when you specify the aggregate clause::
>>> Book.objects.aggregate(average_price=Avg('price'))
{'average_price': 34.35}
If you want to generate more than one aggregate, you just add another
argument to the ``aggregate()`` clause. So, if we also wanted to know
the maximum and minimum price of all books, we would issue the query::
>>> Book.objects.aggregate(Avg('price'), Max('price'), Min('price'))
{'price__avg': 34.35, 'price__max': Decimal('81.20'), 'price__min': Decimal('12.99')}
Generating aggregates for each item in a QuerySet
=================================================
The second way to generate summary values is to generate an independent
summary for each object in a ``Queryset``. For example, if you are retrieving
a list of books, you may want to know how many authors contributed to
each book. Each Book has a many-to-many relationship with the Author; we
want to summarize this relationship for each book in the ``QuerySet``.
Per-object summaries can be generated using the ``annotate()`` clause.
When an ``annotate()`` clause is specified, each object in the ``QuerySet``
will be annotated with the specified values.
The syntax for these annotations is identical to that used for the
``aggregate()`` clause. Each argument to ``annotate()`` describes and
aggregate that is to be calculated. For example, to annotate Books with
the number of authors::
# Build an annotated queryset
>>> q = Book.objects.annotate(Count('authors'))
# Interrogate the first object in the queryset
>>> q[0]
<Book: The Definitive Guide to Django>
>>> q[0].authors__count
2
# Interrogate the second object in the queryset
>>> q[1]
<Book: Practical Django Projects>
>>> q[1].authors__count
1
As with ``aggregate()``, the name for the annotation is automatically derived
from the name of the aggregate function and the name of the field being
aggregated. You can override this default name by providing an alias when you
specify the annotation::
>>> q = Book.objects.annotate(num_authors=Count('authors'))
>>> q[0].num_authors
2
>>> q[1].num_authors
1
Unlike ``aggregate()``, ``annotate()`` is *not* a terminal clause. The output
of the ``annotate()`` clause is a ``QuerySet``; this ``QuerySet`` can be
modified using any other ``QuerySet`` operation, including ``filter()``,
``order_by``, or even additional calls to ``annotate()``.
Joins and aggregates
====================
So far, we have dealt with aggregates over fields that belong to the
model being queries. However, sometimes the value you want to aggregate
will belong to a model that is related to the model you are querying.
When specifying the field to be aggregated in an aggregate functions,
Django will allow you to use the same
:ref:`double underscore notation <field-lookups-intro>` that is used
when referring to related fields in filters. Django will then handle
any table joins that are required to retrieve and aggregate the
related value.
For example, to find the price range of books offered in each store,
you could use the annotation::
>>> Store.objects.annotate(min_price=Min('books__price'), max_price=Min('books__price'))
This tells Django to retrieve the Store model, join (through the
many-to-many relationship) with the Book model, and aggregate on the
price field of the book model to produce a minimum and maximum value.
The same rules apply to the ``aggregate()`` clause. If you wanted to
know the lowest and highest price of any book that is available for sale
in a store, you could use the aggregate::
>>> Store.objects.aggregate(min_price=Min('books__price'), max_price=Min('books__price'))
Join chains can be as deep as you required. For example, to extract the
age of the youngest author of any book available for sale, you could
issue the query::
>>> Store.objects.aggregate(youngest_age=Min('books__authors__age'))
Aggregations and other QuerySet clauses
=======================================
``filter()`` and ``exclude()``
------------------------------
Aggregates can also participate in filters. Any ``filter()`` (or
``exclude()``) applied to normal model fields will have the effect of
constraining the objects that are considered for aggregation.
When used with an ``annotate()`` clause, a filter has the effect of
constraining the objects for which an annotation is calculated. For example,
you can generate an annotated list of all books that have a title starting
with "Django" using the query::
>>> Book.objects.filter(name__startswith="Django").annotate(num_authors=Count('authors'))
When used with an ``aggregate()`` clause, a filter has the effect of
constraining the objects over which the aggregate is calculated.
For example, you can generate the average price of all books with a
title that starts with "Django" using the query::
>>> Book.objects.filter(name__startswith="Django").aggregate(Avg('price'))
Filtering on annotations
~~~~~~~~~~~~~~~~~~~~~~~~
Annotated values can also be filtered. The alias for the annotation can be
used in ``filter()`` and ``exclude()`` clauses in the same way as any other
model field.
For example, to generate a list of books that have more than one author,
you can issue the query::
>>> Book.objects.annotate(num_authors=Count('authors')).filter(num_authors__gt=1)
This query generates an annotated result set, and then generates a filter
based upon that annotation.
Order of ``annotate()`` and ``filter()`` clauses
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When developing a complex query that involves both ``annotate()`` and
``filter()`` clauses, particular attention should be paid to the order
in which the clauses are applied to the ``QuerySet``.
When an ``annotate()`` clause is applied to a query, the annotation is
computed over the state of the query up to the point where the annotation
is requested. The practical implication of this is that ``filter()`` and
``annotate()`` are not transitive operations -- that is, there is a
difference between the query::
>>> Publisher.objects.annotate(num_books=Count('book')).filter(book__rating__gt=3.0)
and the query::
>>> Publisher.objects.filter(book__rating__gt=3.0).annotate(num_books=Count('book'))
Both queries will return a list of Publishers that have at least one good
book (i.e., a book with a rating exceeding 3.0). However, the annotation in
the first query will provide the total number of all books published by the
publisher; the second query will only include good books in the annotated
count. In the first query, the annotation precedes the filter, so the
filter has no effect on the annotation. In the second query, the filter
preceeds the annotation, and as a result, the filter constrains the objects
considered when calculating the annotation.
``order_by()``
--------------
Annotations can be used as a basis for ordering. When you
define an ``order_by()`` clause, the aggregates you provide can reference
any alias defined as part of an ``annotate()`` clause in the query.
For example, to order a ``QuerySet`` of books by the number of authors
that have contributed to the book, you could use the following query::
>>> Book.objects.annotate(num_authors=Count('authors')).order_by('num_authors')
``values()``
------------
Ordinarily, annotations are generated on a per-object basis - an annotated
``QuerySet`` will return one result for each object in the original
``Queryset``. However, when a ``values()`` clause is used to constrain the
columns that are returned in the result set, the method for evaluating
annotations is slightly different. Instead of returning an annotated result
for each result in the original ``QuerySet``, the original results are
grouped according to the unique combinations of the fields specified in the
``values()`` clause. An annotation is then provided for each unique group;
the annotation is computed over all members of the group.
For example, consider an author query that attempts to find out the average
rating of books written by each author:
>>> Author.objects.annotate(average_rating=Avg('book_rating'))
This will return one result for each author in the database, annotate with
their average book rating.
However, the result will be slightly different if you use a ``values()`` clause::
>>> Author.objects.values('name').annotate(average_rating=Avg('book_rating'))
In this example, the authors will be grouped by name, so you will only get
an annotated result for each *unique* author name. This means if you have
two authors with the same name, their results will be merged into a single
result in the output of the query; the average will be computed as the
average over the books written by both authors.
The annotation name will be added to the fields returned
as part of the ``ValuesQuerySet``.
Order of ``annotate()`` and ``filter()`` clauses
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
As with the ``filter()`` clause, the order in which ``annotate()`` and
``values()`` clauses are applied to a query is significant. If the
``values()`` clause precedes the ``annotate()``, the annotation will be
computed using the grouping described by the ``values()`` clause.
However, if the ``annotate()`` clause precedes the ``values()`` clause,
the annotations will be generated over the entire query set. In this case,
the ``values()`` clause only constrains the fields that are generated on
output.
For example, if we reverse the order of the ``values()`` and ``annotate()``
clause from our previous example::
>>> Author.objects.annotate(average_rating=Avg('book_rating')).values('name')
This will now yield one unique result for each author; however, only
the author's name and the ``average_rating`` annotation will be returned
in the output data.
Aggregating annotations
-----------------------
You can also generate an aggregate on the result of an annotation. When you
define an ``aggregate()`` clause, the aggregates you provide can reference
any alias defined as part of an ``annotate()`` clause in the query.
For example, if you wanted to calculate the average number of authors per
book you first annotate the set of books with the author count, then
aggregate that author count, referencing the annotation field::
>>> Book.objects.annotate(num_authors=Count('authors')).aggregate(Avg('num_authors'))
{'num_authors__avg': 1.66}

View File

@ -12,6 +12,7 @@ model maps to a single database table.
models
queries
aggregation
managers
sql
transactions

View File

View File

@ -0,0 +1,229 @@
[
{
"pk": 1,
"model": "aggregation.publisher",
"fields": {
"name": "Apress",
"num_awards": 3
}
},
{
"pk": 2,
"model": "aggregation.publisher",
"fields": {
"name": "Sams",
"num_awards": 1
}
},
{
"pk": 3,
"model": "aggregation.publisher",
"fields": {
"name": "Prentice Hall",
"num_awards": 7
}
},
{
"pk": 4,
"model": "aggregation.publisher",
"fields": {
"name": "Morgan Kaufmann",
"num_awards": 9
}
},
{
"pk": 1,
"model": "aggregation.book",
"fields": {
"publisher": 1,
"isbn": "159059725",
"name": "The Definitive Guide to Django: Web Development Done Right",
"price": "30.00",
"rating": 4.5,
"authors": [1, 2],
"pages": 447,
"pubdate": "2007-12-6"
}
},
{
"pk": 2,
"model": "aggregation.book",
"fields": {
"publisher": 2,
"isbn": "067232959",
"name": "Sams Teach Yourself Django in 24 Hours",
"price": "23.09",
"rating": 3.0,
"authors": [3],
"pages": 528,
"pubdate": "2008-3-3"
}
},
{
"pk": 3,
"model": "aggregation.book",
"fields": {
"publisher": 1,
"isbn": "159059996",
"name": "Practical Django Projects",
"price": "29.69",
"rating": 4.0,
"authors": [4],
"pages": 300,
"pubdate": "2008-6-23"
}
},
{
"pk": 4,
"model": "aggregation.book",
"fields": {
"publisher": 3,
"isbn": "013235613",
"name": "Python Web Development with Django",
"price": "29.69",
"rating": 4.0,
"authors": [5, 6, 7],
"pages": 350,
"pubdate": "2008-11-3"
}
},
{
"pk": 5,
"model": "aggregation.book",
"fields": {
"publisher": 3,
"isbn": "013790395",
"name": "Artificial Intelligence: A Modern Approach",
"price": "82.80",
"rating": 4.0,
"authors": [8, 9],
"pages": 1132,
"pubdate": "1995-1-15"
}
},
{
"pk": 6,
"model": "aggregation.book",
"fields": {
"publisher": 4,
"isbn": "155860191",
"name": "Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp",
"price": "75.00",
"rating": 5.0,
"authors": [8],
"pages": 946,
"pubdate": "1991-10-15"
}
},
{
"pk": 1,
"model": "aggregation.store",
"fields": {
"books": [1, 2, 3, 4, 5, 6],
"name": "Amazon.com",
"original_opening": "1994-4-23 9:17:42",
"friday_night_closing": "23:59:59"
}
},
{
"pk": 2,
"model": "aggregation.store",
"fields": {
"books": [1, 3, 5, 6],
"name": "Books.com",
"original_opening": "2001-3-15 11:23:37",
"friday_night_closing": "23:59:59"
}
},
{
"pk": 3,
"model": "aggregation.store",
"fields": {
"books": [3, 4, 6],
"name": "Mamma and Pappa's Books",
"original_opening": "1945-4-25 16:24:14",
"friday_night_closing": "21:30:00"
}
},
{
"pk": 1,
"model": "aggregation.author",
"fields": {
"age": 34,
"friends": [2, 4],
"name": "Adrian Holovaty"
}
},
{
"pk": 2,
"model": "aggregation.author",
"fields": {
"age": 35,
"friends": [1, 7],
"name": "Jacob Kaplan-Moss"
}
},
{
"pk": 3,
"model": "aggregation.author",
"fields": {
"age": 45,
"friends": [],
"name": "Brad Dayley"
}
},
{
"pk": 4,
"model": "aggregation.author",
"fields": {
"age": 29,
"friends": [1],
"name": "James Bennett"
}
},
{
"pk": 5,
"model": "aggregation.author",
"fields": {
"age": 37,
"friends": [6, 7],
"name": "Jeffrey Forcier "
}
},
{
"pk": 6,
"model": "aggregation.author",
"fields": {
"age": 29,
"friends": [5, 7],
"name": "Paul Bissex"
}
},
{
"pk": 7,
"model": "aggregation.author",
"fields": {
"age": 25,
"friends": [2, 5, 6],
"name": "Wesley J. Chun"
}
},
{
"pk": 8,
"model": "aggregation.author",
"fields": {
"age": 57,
"friends": [9],
"name": "Peter Norvig"
}
},
{
"pk": 9,
"model": "aggregation.author",
"fields": {
"age": 46,
"friends": [8],
"name": "Stuart Russell"
}
}
]

View File

@ -0,0 +1,379 @@
# coding: utf-8
from django.db import models
try:
sorted
except NameError:
from django.utils.itercompat import sorted # For Python 2.3
class Author(models.Model):
name = models.CharField(max_length=100)
age = models.IntegerField()
friends = models.ManyToManyField('self', blank=True)
def __unicode__(self):
return self.name
class Publisher(models.Model):
name = models.CharField(max_length=300)
num_awards = models.IntegerField()
def __unicode__(self):
return self.name
class Book(models.Model):
isbn = models.CharField(max_length=9)
name = models.CharField(max_length=300)
pages = models.IntegerField()
rating = models.FloatField()
price = models.DecimalField(decimal_places=2, max_digits=6)
authors = models.ManyToManyField(Author)
publisher = models.ForeignKey(Publisher)
pubdate = models.DateField()
def __unicode__(self):
return self.name
class Store(models.Model):
name = models.CharField(max_length=300)
books = models.ManyToManyField(Book)
original_opening = models.DateTimeField()
friday_night_closing = models.TimeField()
def __unicode__(self):
return self.name
class Entries(models.Model):
EntryID = models.AutoField(primary_key=True, db_column='Entry ID')
Entry = models.CharField(unique=True, max_length=50)
Exclude = models.BooleanField()
class Clues(models.Model):
ID = models.AutoField(primary_key=True)
EntryID = models.ForeignKey(Entries, verbose_name='Entry', db_column = 'Entry ID')
Clue = models.CharField(max_length=150)
# Tests on 'aggergate'
# Different backends and numbers.
__test__ = {'API_TESTS': """
>>> from django.core import management
>>> try:
... from decimal import Decimal
... except:
... from django.utils._decimal import Decimal
>>> from datetime import date
# Reset the database representation of this app.
# This will return the database to a clean initial state.
>>> management.call_command('flush', verbosity=0, interactive=False)
# Empty Call - request nothing, get nothing.
>>> Author.objects.all().aggregate()
{}
>>> from django.db.models import Avg, Sum, Count, Max, Min
# Single model aggregation
#
# Single aggregate
# Average age of Authors
>>> Author.objects.all().aggregate(Avg('age'))
{'age__avg': 37.4...}
# Multiple aggregates
# Average and Sum of Author ages
>>> Author.objects.all().aggregate(Sum('age'), Avg('age'))
{'age__sum': 337, 'age__avg': 37.4...}
# Aggreates interact with filters, and only
# generate aggregate values for the filtered values
# Sum of the age of those older than 29 years old
>>> Author.objects.all().filter(age__gt=29).aggregate(Sum('age'))
{'age__sum': 254}
# Depth-1 Joins
#
# On Relationships with self
# Average age of the friends of each author
>>> Author.objects.all().aggregate(Avg('friends__age'))
{'friends__age__avg': 34.07...}
# On ManyToMany Relationships
#
# Forward
# Average age of the Authors of Books with a rating of less than 4.5
>>> Book.objects.all().filter(rating__lt=4.5).aggregate(Avg('authors__age'))
{'authors__age__avg': 38.2...}
# Backward
# Average rating of the Books whose Author's name contains the letter 'a'
>>> Author.objects.all().filter(name__contains='a').aggregate(Avg('book__rating'))
{'book__rating__avg': 4.0}
# On OneToMany Relationships
#
# Forward
# Sum of the number of awards of each Book's Publisher
>>> Book.objects.all().aggregate(Sum('publisher__num_awards'))
{'publisher__num_awards__sum': 30}
# Backward
# Sum of the price of every Book that has a Publisher
>>> Publisher.objects.all().aggregate(Sum('book__price'))
{'book__price__sum': Decimal("270.27")}
# Multiple Joins
#
# Forward
>>> Store.objects.all().aggregate(Max('books__authors__age'))
{'books__authors__age__max': 57}
# Backward
# Note that the very long default alias may be truncated
>>> Author.objects.all().aggregate(Min('book__publisher__num_awards'))
{'book__publisher__num_award...': 1}
# Aggregate outputs can also be aliased.
# Average amazon.com Book rating
>>> Store.objects.filter(name='Amazon.com').aggregate(amazon_mean=Avg('books__rating'))
{'amazon_mean': 4.08...}
# Tests on annotate()
# An empty annotate call does nothing but return the same QuerySet
>>> Book.objects.all().annotate().order_by('pk')
[<Book: The Definitive Guide to Django: Web Development Done Right>, <Book: Sams Teach Yourself Django in 24 Hours>, <Book: Practical Django Projects>, <Book: Python Web Development with Django>, <Book: Artificial Intelligence: A Modern Approach>, <Book: Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp>]
# Annotate inserts the alias into the model object with the aggregated result
>>> books = Book.objects.all().annotate(mean_age=Avg('authors__age'))
>>> books.get(pk=1).name
u'The Definitive Guide to Django: Web Development Done Right'
>>> books.get(pk=1).mean_age
34.5
# On ManyToMany Relationships
# Forward
# Average age of the Authors of each book with a rating less than 4.5
>>> books = Book.objects.all().filter(rating__lt=4.5).annotate(Avg('authors__age'))
>>> sorted([(b.name, b.authors__age__avg) for b in books])
[(u'Artificial Intelligence: A Modern Approach', 51.5), (u'Practical Django Projects', 29.0), (u'Python Web Development with Django', 30.3...), (u'Sams Teach Yourself Django in 24 Hours', 45.0)]
# Count the number of authors of each book
>>> books = Book.objects.annotate(num_authors=Count('authors'))
>>> sorted([(b.name, b.num_authors) for b in books])
[(u'Artificial Intelligence: A Modern Approach', 2), (u'Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 1), (u'Practical Django Projects', 1), (u'Python Web Development with Django', 3), (u'Sams Teach Yourself Django in 24 Hours', 1), (u'The Definitive Guide to Django: Web Development Done Right', 2)]
# Backward
# Average rating of the Books whose Author's names contains the letter 'a'
>>> authors = Author.objects.all().filter(name__contains='a').annotate(Avg('book__rating'))
>>> sorted([(a.name, a.book__rating__avg) for a in authors])
[(u'Adrian Holovaty', 4.5), (u'Brad Dayley', 3.0), (u'Jacob Kaplan-Moss', 4.5), (u'James Bennett', 4.0), (u'Paul Bissex', 4.0), (u'Stuart Russell', 4.0)]
# Count the number of books written by each author
>>> authors = Author.objects.annotate(num_books=Count('book'))
>>> sorted([(a.name, a.num_books) for a in authors])
[(u'Adrian Holovaty', 1), (u'Brad Dayley', 1), (u'Jacob Kaplan-Moss', 1), (u'James Bennett', 1), (u'Jeffrey Forcier ', 1), (u'Paul Bissex', 1), (u'Peter Norvig', 2), (u'Stuart Russell', 1), (u'Wesley J. Chun', 1)]
# On OneToMany Relationships
# Forward
# Annotate each book with the number of awards of each Book's Publisher
>>> books = Book.objects.all().annotate(Sum('publisher__num_awards'))
>>> sorted([(b.name, b.publisher__num_awards__sum) for b in books])
[(u'Artificial Intelligence: A Modern Approach', 7), (u'Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 9), (u'Practical Django Projects', 3), (u'Python Web Development with Django', 7), (u'Sams Teach Yourself Django in 24 Hours', 1), (u'The Definitive Guide to Django: Web Development Done Right', 3)]
# Backward
# Annotate each publisher with the sum of the price of all books sold
>>> publishers = Publisher.objects.all().annotate(Sum('book__price'))
>>> sorted([(p.name, p.book__price__sum) for p in publishers])
[(u'Apress', Decimal("59.69")), (u'Morgan Kaufmann', Decimal("75.00")), (u'Prentice Hall', Decimal("112.49")), (u'Sams', Decimal("23.09"))]
# Calls to values() are not commutative over annotate().
# Calling values on a queryset that has annotations returns the output
# as a dictionary
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values()
[{'rating': 4.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'pubdate': datetime.date(2007, 12, 6), 'price': Decimal("30..."), 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}]
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values('pk', 'isbn', 'mean_age')
[{'pk': 1, 'isbn': u'159059725', 'mean_age': 34.5}]
# Calling it with paramters reduces the output but does not remove the
# annotation.
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values('name')
[{'name': u'The Definitive Guide to Django: Web Development Done Right', 'mean_age': 34.5}]
# An empty values() call before annotating has the same effect as an
# empty values() call after annotating
>>> Book.objects.filter(pk=1).values().annotate(mean_age=Avg('authors__age'))
[{'rating': 4.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'pubdate': datetime.date(2007, 12, 6), 'price': Decimal("30..."), 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}]
# Calling annotate() on a ValuesQuerySet annotates over the groups of
# fields to be selected by the ValuesQuerySet.
# Note that an extra parameter is added to each dictionary. This
# parameter is a queryset representing the objects that have been
# grouped to generate the annotation
>>> Book.objects.all().values('rating').annotate(n_authors=Count('authors__id'), mean_age=Avg('authors__age')).order_by('rating')
[{'rating': 3.0, 'n_authors': 1, 'mean_age': 45.0}, {'rating': 4.0, 'n_authors': 6, 'mean_age': 37.1...}, {'rating': 4.5, 'n_authors': 2, 'mean_age': 34.5}, {'rating': 5.0, 'n_authors': 1, 'mean_age': 57.0}]
# If a join doesn't match any objects, an aggregate returns None
>>> authors = Author.objects.all().annotate(Avg('friends__age')).order_by('id')
>>> len(authors)
9
>>> sorted([(a.name, a.friends__age__avg) for a in authors])
[(u'Adrian Holovaty', 32.0), (u'Brad Dayley', None), (u'Jacob Kaplan-Moss', 29.5), (u'James Bennett', 34.0), (u'Jeffrey Forcier ', 27.0), (u'Paul Bissex', 31.0), (u'Peter Norvig', 46.0), (u'Stuart Russell', 57.0), (u'Wesley J. Chun', 33.6...)]
# The Count aggregation function allows an extra parameter: distinct.
# This restricts the count results to unique items
>>> Book.objects.all().aggregate(Count('rating'))
{'rating__count': 6}
>>> Book.objects.all().aggregate(Count('rating', distinct=True))
{'rating__count': 4}
# Retreiving the grouped objects
# When using Count you can also omit the primary key and refer only to
# the related field name if you want to count all the related objects
# and not a specific column
>>> explicit = list(Author.objects.annotate(Count('book__id')))
>>> implicit = list(Author.objects.annotate(Count('book')))
>>> explicit == implicit
True
# Ordering is allowed on aggregates
>>> Book.objects.values('rating').annotate(oldest=Max('authors__age')).order_by('oldest', 'rating')
[{'rating': 4.5, 'oldest': 35}, {'rating': 3.0, 'oldest': 45}, {'rating': 4.0, 'oldest': 57}, {'rating': 5.0, 'oldest': 57}]
>>> Book.objects.values('rating').annotate(oldest=Max('authors__age')).order_by('-oldest', '-rating')
[{'rating': 5.0, 'oldest': 57}, {'rating': 4.0, 'oldest': 57}, {'rating': 3.0, 'oldest': 45}, {'rating': 4.5, 'oldest': 35}]
# It is possible to aggregate over anotated values
>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Avg('num_authors'))
{'num_authors__avg': 1.66...}
# You can filter the results based on the aggregation alias.
# Lets add a publisher to test the different possibilities for filtering
>>> p = Publisher(name='Expensive Publisher', num_awards=0)
>>> p.save()
>>> Book(name='ExpensiveBook1', pages=1, isbn='111', rating=3.5, price=Decimal("1000"), publisher=p, pubdate=date(2008,12,1)).save()
>>> Book(name='ExpensiveBook2', pages=1, isbn='222', rating=4.0, price=Decimal("1000"), publisher=p, pubdate=date(2008,12,2)).save()
>>> Book(name='ExpensiveBook3', pages=1, isbn='333', rating=4.5, price=Decimal("35"), publisher=p, pubdate=date(2008,12,3)).save()
# Publishers that have:
# (i) more than one book
>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
[<Publisher: Apress>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
# (ii) a book that cost less than 40
>>> Publisher.objects.filter(book__price__lt=Decimal("40.0")).order_by('pk')
[<Publisher: Apress>, <Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
# (iii) more than one book and (at least) a book that cost less than 40
>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1, book__price__lt=Decimal("40.0")).order_by('pk')
[<Publisher: Apress>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
# (iv) more than one book that costs less than $40
>>> Publisher.objects.filter(book__price__lt=Decimal("40.0")).annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
[<Publisher: Apress>]
# Now a bit of testing on the different lookup types
#
>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 3]).order_by('pk')
[<Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Morgan Kaufmann>, <Publisher: Expensive Publisher>]
>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 2]).order_by('pk')
[<Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Morgan Kaufmann>]
>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__in=[1, 3]).order_by('pk')
[<Publisher: Sams>, <Publisher: Morgan Kaufmann>, <Publisher: Expensive Publisher>]
>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__isnull=True)
[]
>>> p.delete()
# Does Author X have any friends? (or better, how many friends does author X have)
>> Author.objects.filter(pk=1).aggregate(Count('friends__id'))
{'friends__id__count': 2.0}
# Give me a list of all Books with more than 1 authors
>>> Book.objects.all().annotate(num_authors=Count('authors__name')).filter(num_authors__ge=2).order_by('pk')
[<Book: The Definitive Guide to Django: Web Development Done Right>, <Book: Artificial Intelligence: A Modern Approach>]
# Give me a list of all Authors that have no friends
>>> Author.objects.all().annotate(num_friends=Count('friends__id', distinct=True)).filter(num_friends=0).order_by('pk')
[<Author: Brad Dayley>]
# Give me a list of all publishers that have published more than 1 books
>>> Publisher.objects.all().annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
[<Publisher: Apress>, <Publisher: Prentice Hall>]
# Give me a list of all publishers that have published more than 1 books that cost less than 40
>>> Publisher.objects.all().filter(book__price__lt=Decimal("40.0")).annotate(num_books=Count('book__id')).filter(num_books__gt=1)
[<Publisher: Apress>]
# Give me a list of all Books that were written by X and one other author.
>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1)
[<Book: Artificial Intelligence: A Modern Approach>]
# Give me the average rating of all Books that were written by X and one other author.
#(Aggregate over objects discovered using membership of the m2m set)
# Adding an existing author to another book to test it the right way
>>> a = Author.objects.get(name__contains='Norvig')
>>> b = Book.objects.get(name__contains='Done Right')
>>> b.authors.add(a)
>>> b.save()
# This should do it
>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1).aggregate(Avg('rating'))
{'rating__avg': 4.25}
>>> b.authors.remove(a)
# Give me a list of all Authors that have published a book with at least one other person
# (Filters over a count generated on a related object)
#
# Cheating: [a for a in Author.objects.all().annotate(num_coleagues=Count('book__authors__id'), num_books=Count('book__id', distinct=True)) if a.num_coleagues - a.num_books > 0]
# F-Syntax is required. Will be fixed after F objects are available
# Tests on fields with non-default table and column names.
>>> Clues.objects.values('EntryID__Entry').annotate(Appearances=Count('EntryID'), Distinct_Clues=Count('Clue', distinct=True))
[]
# Aggregates also work on dates, times and datetimes
>>> Publisher.objects.annotate(earliest_book=Min('book__pubdate')).order_by('earliest_book').values()
[{'earliest_book': datetime.date(1991, 10, 15), 'num_awards': 9, 'id': 4, 'name': u'Morgan Kaufmann'}, {'earliest_book': datetime.date(1995, 1, 15), 'num_awards': 7, 'id': 3, 'name': u'Prentice Hall'}, {'earliest_book': datetime.date(2007, 12, 6), 'num_awards': 3, 'id': 1, 'name': u'Apress'}, {'earliest_book': datetime.date(2008, 3, 3), 'num_awards': 1, 'id': 2, 'name': u'Sams'}]
>>> Store.objects.aggregate(Max('friday_night_closing'), Min("original_opening"))
{'friday_night_closing__max': datetime.time(23, 59, 59), 'original_opening__min': datetime.datetime(1945, 4, 25, 16, 24, 14)}
# values_list() can also be used
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('pk', 'isbn', 'mean_age')
[(1, u'159059725', 34.5)]
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('isbn')
[(u'159059725',)]
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('mean_age')
[(34.5,)]
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('mean_age', flat=True)
[34.5]
"""}

View File

@ -0,0 +1,229 @@
[
{
"pk": 1,
"model": "aggregation_regress.publisher",
"fields": {
"name": "Apress",
"num_awards": 3
}
},
{
"pk": 2,
"model": "aggregation_regress.publisher",
"fields": {
"name": "Sams",
"num_awards": 1
}
},
{
"pk": 3,
"model": "aggregation_regress.publisher",
"fields": {
"name": "Prentice Hall",
"num_awards": 7
}
},
{
"pk": 4,
"model": "aggregation_regress.publisher",
"fields": {
"name": "Morgan Kaufmann",
"num_awards": 9
}
},
{
"pk": 1,
"model": "aggregation_regress.book",
"fields": {
"publisher": 1,
"isbn": "159059725",
"name": "The Definitive Guide to Django: Web Development Done Right",
"price": "30.00",
"rating": 4.5,
"authors": [1, 2],
"pages": 447,
"pubdate": "2007-12-6"
}
},
{
"pk": 2,
"model": "aggregation_regress.book",
"fields": {
"publisher": 2,
"isbn": "067232959",
"name": "Sams Teach Yourself Django in 24 Hours",
"price": "23.09",
"rating": 3.0,
"authors": [3],
"pages": 528,
"pubdate": "2008-3-3"
}
},
{
"pk": 3,
"model": "aggregation_regress.book",
"fields": {
"publisher": 1,
"isbn": "159059996",
"name": "Practical Django Projects",
"price": "29.69",
"rating": 4.0,
"authors": [4],
"pages": 300,
"pubdate": "2008-6-23"
}
},
{
"pk": 4,
"model": "aggregation_regress.book",
"fields": {
"publisher": 3,
"isbn": "013235613",
"name": "Python Web Development with Django",
"price": "29.69",
"rating": 4.0,
"authors": [5, 6, 7],
"pages": 350,
"pubdate": "2008-11-3"
}
},
{
"pk": 5,
"model": "aggregation_regress.book",
"fields": {
"publisher": 3,
"isbn": "013790395",
"name": "Artificial Intelligence: A Modern Approach",
"price": "82.80",
"rating": 4.0,
"authors": [8, 9],
"pages": 1132,
"pubdate": "1995-1-15"
}
},
{
"pk": 6,
"model": "aggregation_regress.book",
"fields": {
"publisher": 4,
"isbn": "155860191",
"name": "Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp",
"price": "75.00",
"rating": 5.0,
"authors": [8],
"pages": 946,
"pubdate": "1991-10-15"
}
},
{
"pk": 1,
"model": "aggregation_regress.store",
"fields": {
"books": [1, 2, 3, 4, 5, 6],
"name": "Amazon.com",
"original_opening": "1994-4-23 9:17:42",
"friday_night_closing": "23:59:59"
}
},
{
"pk": 2,
"model": "aggregation_regress.store",
"fields": {
"books": [1, 3, 5, 6],
"name": "Books.com",
"original_opening": "2001-3-15 11:23:37",
"friday_night_closing": "23:59:59"
}
},
{
"pk": 3,
"model": "aggregation_regress.store",
"fields": {
"books": [3, 4, 6],
"name": "Mamma and Pappa's Books",
"original_opening": "1945-4-25 16:24:14",
"friday_night_closing": "21:30:00"
}
},
{
"pk": 1,
"model": "aggregation_regress.author",
"fields": {
"age": 34,
"friends": [2, 4],
"name": "Adrian Holovaty"
}
},
{
"pk": 2,
"model": "aggregation_regress.author",
"fields": {
"age": 35,
"friends": [1, 7],
"name": "Jacob Kaplan-Moss"
}
},
{
"pk": 3,
"model": "aggregation_regress.author",
"fields": {
"age": 45,
"friends": [],
"name": "Brad Dayley"
}
},
{
"pk": 4,
"model": "aggregation_regress.author",
"fields": {
"age": 29,
"friends": [1],
"name": "James Bennett"
}
},
{
"pk": 5,
"model": "aggregation_regress.author",
"fields": {
"age": 37,
"friends": [6, 7],
"name": "Jeffrey Forcier "
}
},
{
"pk": 6,
"model": "aggregation_regress.author",
"fields": {
"age": 29,
"friends": [5, 7],
"name": "Paul Bissex"
}
},
{
"pk": 7,
"model": "aggregation_regress.author",
"fields": {
"age": 25,
"friends": [2, 5, 6],
"name": "Wesley J. Chun"
}
},
{
"pk": 8,
"model": "aggregation_regress.author",
"fields": {
"age": 57,
"friends": [9],
"name": "Peter Norvig"
}
},
{
"pk": 9,
"model": "aggregation_regress.author",
"fields": {
"age": 46,
"friends": [8],
"name": "Stuart Russell"
}
}
]

View File

@ -0,0 +1,199 @@
# coding: utf-8
from django.db import models
from django.conf import settings
try:
sorted
except NameError:
from django.utils.itercompat import sorted # For Python 2.3
class Author(models.Model):
name = models.CharField(max_length=100)
age = models.IntegerField()
friends = models.ManyToManyField('self', blank=True)
def __unicode__(self):
return self.name
class Publisher(models.Model):
name = models.CharField(max_length=300)
num_awards = models.IntegerField()
def __unicode__(self):
return self.name
class Book(models.Model):
isbn = models.CharField(max_length=9)
name = models.CharField(max_length=300)
pages = models.IntegerField()
rating = models.FloatField()
price = models.DecimalField(decimal_places=2, max_digits=6)
authors = models.ManyToManyField(Author)
publisher = models.ForeignKey(Publisher)
pubdate = models.DateField()
class Meta:
ordering = ('name',)
def __unicode__(self):
return self.name
class Store(models.Model):
name = models.CharField(max_length=300)
books = models.ManyToManyField(Book)
original_opening = models.DateTimeField()
friday_night_closing = models.TimeField()
def __unicode__(self):
return self.name
#Extra does not play well with values. Modify the tests if/when this is fixed.
__test__ = {'API_TESTS': """
>>> from django.core import management
>>> from django.db.models import get_app
# Reset the database representation of this app.
# This will return the database to a clean initial state.
>>> management.call_command('flush', verbosity=0, interactive=False)
>>> from django.db.models import Avg, Sum, Count, Max, Min, StdDev, Variance
# Ordering requests are ignored
>>> Author.objects.all().order_by('name').aggregate(Avg('age'))
{'age__avg': 37.4...}
# Implicit ordering is also ignored
>>> Book.objects.all().aggregate(Sum('pages'))
{'pages__sum': 3703}
# Baseline results
>>> Book.objects.all().aggregate(Sum('pages'), Avg('pages'))
{'pages__sum': 3703, 'pages__avg': 617.1...}
# Empty values query doesn't affect grouping or results
>>> Book.objects.all().values().aggregate(Sum('pages'), Avg('pages'))
{'pages__sum': 3703, 'pages__avg': 617.1...}
# Aggregate overrides extra selected column
>>> Book.objects.all().extra(select={'price_per_page' : 'price / pages'}).aggregate(Sum('pages'))
{'pages__sum': 3703}
# Annotations get combined with extra select clauses
>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).get(pk=2).__dict__.items())
[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
# Order of the annotate/extra in the query doesn't matter
>>> sorted(Book.objects.all().extra(select={'manufacture_cost' : 'price * .5'}).annotate(mean_auth_age=Avg('authors__age')).get(pk=2).__dict__.items())
[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
# Values queries can be combined with annotate and extra
>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).values().get(pk=2).items())
[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
# The order of the values, annotate and extra clauses doesn't matter
>>> sorted(Book.objects.all().values().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).get(pk=2).items())
[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
# A values query that selects specific columns reduces the output
>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).values('name').get(pk=1).items())
[('mean_auth_age', 34.5), ('name', u'The Definitive Guide to Django: Web Development Done Right')]
# The annotations are added to values output if values() precedes annotate()
>>> sorted(Book.objects.all().values('name').annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).get(pk=1).items())
[('mean_auth_age', 34.5), ('name', u'The Definitive Guide to Django: Web Development Done Right')]
# Check that all of the objects are getting counted (allow_nulls) and that values respects the amount of objects
>>> len(Author.objects.all().annotate(Avg('friends__age')).values())
9
# Check that consecutive calls to annotate accumulate in the query
>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('oldest', 'price').annotate(Max('publisher__num_awards'))
[{'price': Decimal("30..."), 'oldest': 35, 'publisher__num_awards__max': 3}, {'price': Decimal("29.69"), 'oldest': 37, 'publisher__num_awards__max': 7}, {'price': Decimal("23.09"), 'oldest': 45, 'publisher__num_awards__max': 1}, {'price': Decimal("75..."), 'oldest': 57, 'publisher__num_awards__max': 9}, {'price': Decimal("82.8..."), 'oldest': 57, 'publisher__num_awards__max': 7}]
# Aggregates can be composed over annotations.
# The return type is derived from the composed aggregate
>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Max('pages'), Max('price'), Sum('num_authors'), Avg('num_authors'))
{'num_authors__sum': 10, 'num_authors__avg': 1.66..., 'pages__max': 1132, 'price__max': Decimal("82.80")}
# Bad field requests in aggregates are caught and reported
>>> Book.objects.all().aggregate(num_authors=Count('foo'))
Traceback (most recent call last):
...
FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store
>>> Book.objects.all().annotate(num_authors=Count('foo'))
Traceback (most recent call last):
...
FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store
>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Max('foo'))
Traceback (most recent call last):
...
FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store, num_authors
# Old-style count aggregations can be mixed with new-style
>>> Book.objects.annotate(num_authors=Count('authors')).count()
6
# Non-ordinal, non-computed Aggregates over annotations correctly inherit
# the annotation's internal type if the annotation is ordinal or computed
>>> Book.objects.annotate(num_authors=Count('authors')).aggregate(Max('num_authors'))
{'num_authors__max': 3}
>>> Publisher.objects.annotate(avg_price=Avg('book__price')).aggregate(Max('avg_price'))
{'avg_price__max': 75.0...}
# Aliases are quoted to protected aliases that might be reserved names
>>> Book.objects.aggregate(number=Max('pages'), select=Max('pages'))
{'number': 1132, 'select': 1132}
"""
}
if settings.DATABASE_ENGINE != 'sqlite3':
__test__['API_TESTS'] += """
# Stddev and Variance are not guaranteed to be available for SQLite.
>>> Book.objects.aggregate(StdDev('pages'))
{'pages__stddev': 311.46...}
>>> Book.objects.aggregate(StdDev('rating'))
{'rating__stddev': 0.60...}
>>> Book.objects.aggregate(StdDev('price'))
{'price__stddev': 24.16...}
>>> Book.objects.aggregate(StdDev('pages', sample=True))
{'pages__stddev': 341.19...}
>>> Book.objects.aggregate(StdDev('rating', sample=True))
{'rating__stddev': 0.66...}
>>> Book.objects.aggregate(StdDev('price', sample=True))
{'price__stddev': 26.46...}
>>> Book.objects.aggregate(Variance('pages'))
{'pages__variance': 97010.80...}
>>> Book.objects.aggregate(Variance('rating'))
{'rating__variance': 0.36...}
>>> Book.objects.aggregate(Variance('price'))
{'price__variance': 583.77...}
>>> Book.objects.aggregate(Variance('pages', sample=True))
{'pages__variance': 116412.96...}
>>> Book.objects.aggregate(Variance('rating', sample=True))
{'rating__variance': 0.44...}
>>> Book.objects.aggregate(Variance('price', sample=True))
{'price__variance': 700.53...}
"""