diff --git a/AUTHORS b/AUTHORS index f2107263be9..75df381c8d9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -31,6 +31,7 @@ answer newbie questions, and generally made Django that much better: AgarFu Dagur Páll Ammendrup Collin Anderson + Nicolas Lara Jeff Anderson Marian Andre Andreas diff --git a/django/contrib/gis/db/models/aggregates.py b/django/contrib/gis/db/models/aggregates.py new file mode 100644 index 00000000000..111601171bd --- /dev/null +++ b/django/contrib/gis/db/models/aggregates.py @@ -0,0 +1,10 @@ +from django.db.models import Aggregate + +class Extent(Aggregate): + name = 'Extent' + +class MakeLine(Aggregate): + name = 'MakeLine' + +class Union(Aggregate): + name = 'Union' diff --git a/django/contrib/gis/db/models/query.py b/django/contrib/gis/db/models/query.py index b7b7dcda934..8eb435de936 100644 --- a/django/contrib/gis/db/models/query.py +++ b/django/contrib/gis/db/models/query.py @@ -3,6 +3,7 @@ from django.db import connection from django.db.models.query import sql, QuerySet, Q from django.contrib.gis.db.backend import SpatialBackend +from django.contrib.gis.db.models import aggregates from django.contrib.gis.db.models.fields import GeometryField, PointField from django.contrib.gis.db.models.sql import AreaField, DistanceField, GeomField, GeoQuery, GeoWhereNode from django.contrib.gis.measure import Area, Distance @@ -17,7 +18,7 @@ class GeomSQL(object): "Simple wrapper object for geometric SQL." def __init__(self, geo_sql): self.sql = geo_sql - + def as_sql(self, *args, **kwargs): return self.sql @@ -30,7 +31,7 @@ class GeoQuerySet(QuerySet): def area(self, tolerance=0.05, **kwargs): """ - Returns the area of the geographic field in an `area` attribute on + Returns the area of the geographic field in an `area` attribute on each element of this GeoQuerySet. """ # Peforming setup here rather than in `_spatial_attribute` so that @@ -75,21 +76,21 @@ class GeoQuerySet(QuerySet): Keyword Arguments: `spheroid` => If the geometry field is geodetic and PostGIS is - the spatial database, then the more accurate + the spatial database, then the more accurate spheroid calculation will be used instead of the quicker sphere calculation. - - `tolerance` => Used only for Oracle. The tolerance is - in meters -- a default of 5 centimeters (0.05) + + `tolerance` => Used only for Oracle. The tolerance is + in meters -- a default of 5 centimeters (0.05) is used. """ return self._distance_attribute('distance', geom, **kwargs) def envelope(self, **kwargs): """ - Returns a Geometry representing the bounding box of the + Returns a Geometry representing the bounding box of the Geometry field in an `envelope` attribute on each element of - the GeoQuerySet. + the GeoQuerySet. """ return self._geom_attribute('envelope', **kwargs) @@ -98,20 +99,7 @@ class GeoQuerySet(QuerySet): Returns the extent (aggregate) of the features in the GeoQuerySet. The extent will be returned as a 4-tuple, consisting of (xmin, ymin, xmax, ymax). """ - convert_extent = None - if SpatialBackend.postgis: - def convert_extent(box, geo_field): - # TODO: Parsing of BOX3D, Oracle support (patches welcome!) - # Box text will be something like "BOX(-90.0 30.0, -85.0 40.0)"; - # parsing out and returning as a 4-tuple. - ll, ur = box[4:-1].split(',') - xmin, ymin = map(float, ll.split()) - xmax, ymax = map(float, ur.split()) - return (xmin, ymin, xmax, ymax) - elif SpatialBackend.oracle: - def convert_extent(wkt, geo_field): - raise NotImplementedError - return self._spatial_aggregate('extent', convert_func=convert_extent, **kwargs) + return self._spatial_aggregate(aggregates.Extent, **kwargs) def gml(self, precision=8, version=2, **kwargs): """ @@ -120,7 +108,7 @@ class GeoQuerySet(QuerySet): """ s = {'desc' : 'GML', 'procedure_args' : {'precision' : precision}} if SpatialBackend.postgis: - # PostGIS AsGML() aggregate function parameter order depends on the + # PostGIS AsGML() aggregate function parameter order depends on the # version -- uggh. major, minor1, minor2 = SpatialBackend.version if major >= 1 and (minor1 > 3 or (minor1 == 3 and minor2 > 1)): @@ -163,9 +151,7 @@ class GeoQuerySet(QuerySet): this GeoQuerySet and returns it. This is a spatial aggregate method, and thus returns a geometry rather than a GeoQuerySet. """ - kwargs['geo_field_type'] = PointField - kwargs['agg_field'] = GeometryField - return self._spatial_aggregate('make_line', **kwargs) + return self._spatial_aggregate(aggregates.MakeLine, geo_field_type=PointField, **kwargs) def mem_size(self, **kwargs): """ @@ -185,7 +171,7 @@ class GeoQuerySet(QuerySet): def num_points(self, **kwargs): """ - Returns the number of points in the first linestring in the + Returns the number of points in the first linestring in the Geometry field in a `num_points` attribute on each element of this GeoQuerySet; otherwise sets with None. """ @@ -231,7 +217,7 @@ class GeoQuerySet(QuerySet): def sym_difference(self, geom, **kwargs): """ - Returns the symmetric difference of the geographic field in a + Returns the symmetric difference of the geographic field in a `sym_difference` attribute on each element of this GeoQuerySet. """ return self._geomset_attribute('sym_difference', geom, **kwargs) @@ -265,7 +251,7 @@ class GeoQuerySet(QuerySet): # when there's also a transformation we need to cascade the substitutions. # For example, 'SDO_UTIL.TO_WKTGEOMETRY(SDO_CS.TRANSFORM( ... )' geo_col = self.query.custom_select.get(geo_field, field_col) - + # Setting the key for the field's column with the custom SELECT SQL to # override the geometry column returned from the database. custom_sel = '%s(%s, %s)' % (SpatialBackend.transform, geo_col, srid) @@ -288,11 +274,10 @@ class GeoQuerySet(QuerySet): None if the GeoQuerySet is empty. The `tolerance` keyword is for Oracle backends only. """ - kwargs['agg_field'] = GeometryField - return self._spatial_aggregate('unionagg', **kwargs) + return self._spatial_aggregate(aggregates.Union, **kwargs) ### Private API -- Abstracted DRY routines. ### - def _spatial_setup(self, att, aggregate=False, desc=None, field_name=None, geo_field_type=None): + def _spatial_setup(self, att, desc=None, field_name=None, geo_field_type=None): """ Performs set up for executing the spatial function. """ @@ -301,86 +286,52 @@ class GeoQuerySet(QuerySet): if desc is None: desc = att if not func: raise ImproperlyConfigured('%s stored procedure not available.' % desc) - # Initializing the procedure arguments. + # Initializing the procedure arguments. procedure_args = {'function' : func} - - # Is there a geographic field in the model to perform this + + # Is there a geographic field in the model to perform this # operation on? geo_field = self.query._geo_field(field_name) if not geo_field: raise TypeError('%s output only available on GeometryFields.' % func) - # If the `geo_field_type` keyword was used, then enforce that + # If the `geo_field_type` keyword was used, then enforce that # type limitation. - if not geo_field_type is None and not isinstance(geo_field, geo_field_type): - raise TypeError('"%s" stored procedures may only be called on %ss.' % (func, geo_field_type.__name__)) + if not geo_field_type is None and not isinstance(geo_field, geo_field_type): + raise TypeError('"%s" stored procedures may only be called on %ss.' % (func, geo_field_type.__name__)) # Setting the procedure args. - procedure_args['geo_col'] = self._geocol_select(geo_field, field_name, aggregate) + procedure_args['geo_col'] = self._geocol_select(geo_field, field_name) return procedure_args, geo_field - def _spatial_aggregate(self, att, field_name=None, - agg_field=None, convert_func=None, - geo_field_type=None, tolerance=0.0005): + def _spatial_aggregate(self, aggregate, field_name=None, + geo_field_type=None, tolerance=0.05): """ DRY routine for calling aggregate spatial stored procedures and returning their result to the caller of the function. """ - # Constructing the setup keyword arguments. - setup_kwargs = {'aggregate' : True, - 'field_name' : field_name, - 'geo_field_type' : geo_field_type, - } - procedure_args, geo_field = self._spatial_setup(att, **setup_kwargs) - - if SpatialBackend.oracle: - procedure_args['tolerance'] = tolerance - # Adding in selection SQL for Oracle geometry columns. - if agg_field is GeometryField: - agg_sql = '%s' % SpatialBackend.select - else: - agg_sql = '%s' - agg_sql = agg_sql % ('%(function)s(SDOAGGRTYPE(%(geo_col)s,%(tolerance)s))' % procedure_args) - else: - agg_sql = '%(function)s(%(geo_col)s)' % procedure_args + # Getting the field the geographic aggregate will be called on. + geo_field = self.query._geo_field(field_name) + if not geo_field: + raise TypeError('%s aggregate only available on GeometryFields.' % aggregate.name) - # Wrapping our selection SQL in `GeomSQL` to bypass quoting, and - # specifying the type of the aggregate field. - self.query.select = [GeomSQL(agg_sql)] - self.query.select_fields = [agg_field] + # Checking if there are any geo field type limitations on this + # aggregate (e.g. ST_Makeline only operates on PointFields). + if not geo_field_type is None and not isinstance(geo_field, geo_field_type): + raise TypeError('%s aggregate may only be called on %ss.' % (aggregate.name, geo_field_type.__name__)) - try: - # `asql` => not overriding `sql` module. - asql, params = self.query.as_sql() - except sql.datastructures.EmptyResultSet: - return None + # Getting the string expression of the field name, as this is the + # argument taken by `Aggregate` objects. + agg_col = field_name or geo_field.name - # Getting a cursor, executing the query, and extracting the returned - # value from the aggregate function. - cursor = connection.cursor() - cursor.execute(asql, params) - result = cursor.fetchone()[0] - - # If the `agg_field` is specified as a GeometryField, then autmatically - # set up the conversion function. - if agg_field is GeometryField and not callable(convert_func): - if SpatialBackend.postgis: - def convert_geom(hex, geo_field): - if hex: return SpatialBackend.Geometry(hex) - else: return None - elif SpatialBackend.oracle: - def convert_geom(clob, geo_field): - if clob: return SpatialBackend.Geometry(clob.read(), geo_field._srid) - else: return None - convert_func = convert_geom + # Adding any keyword parameters for the Aggregate object. Oracle backends + # in particular need an additional `tolerance` parameter. + agg_kwargs = {} + if SpatialBackend.oracle: agg_kwargs['tolerance'] = tolerance - # Returning the callback function evaluated on the result culled - # from the executed cursor. - if callable(convert_func): - return convert_func(result, geo_field) - else: - return result + # Calling the QuerySet.aggregate, and returning only the value of the aggregate. + return self.aggregate(_geoagg=aggregate(agg_col, **agg_kwargs))['_geoagg'] def _spatial_attribute(self, att, settings, field_name=None, model_att=None): """ @@ -393,7 +344,7 @@ class GeoQuerySet(QuerySet): SQL function to call. settings: - Dictonary of internal settings to customize for the spatial procedure. + Dictonary of internal settings to customize for the spatial procedure. Public Keyword Arguments: @@ -420,7 +371,7 @@ class GeoQuerySet(QuerySet): for k, v in default_args.iteritems(): settings['procedure_args'].setdefault(k, v) else: geo_field = settings['geo_field'] - + # The attribute to attach to the model. if not isinstance(model_att, basestring): model_att = att @@ -429,7 +380,7 @@ class GeoQuerySet(QuerySet): # Using the field's get_db_prep_lookup() to get any needed # transformation SQL -- we pass in a 'dummy' `contains` lookup. where, params = geo_field.get_db_prep_lookup('contains', settings['procedure_args'][name]) - # Replacing the procedure format with that of any needed + # Replacing the procedure format with that of any needed # transformation SQL. old_fmt = '%%(%s)s' % name new_fmt = where[0] % '%%s' @@ -438,7 +389,7 @@ class GeoQuerySet(QuerySet): # Getting the format for the stored procedure. fmt = '%%(function)s(%s)' % settings['procedure_fmt'] - + # If the result of this function needs to be converted. if settings.get('select_field', False): sel_fld = settings['select_field'] @@ -446,10 +397,10 @@ class GeoQuerySet(QuerySet): self.query.custom_select[model_att] = SpatialBackend.select self.query.extra_select_fields[model_att] = sel_fld - # Finally, setting the extra selection attribute with + # Finally, setting the extra selection attribute with # the format string expanded with the stored procedure # arguments. - return self.extra(select={model_att : fmt % settings['procedure_args']}, + return self.extra(select={model_att : fmt % settings['procedure_args']}, select_params=settings['select_params']) def _distance_attribute(self, func, geom=None, tolerance=0.05, spheroid=False, **kwargs): @@ -471,10 +422,10 @@ class GeoQuerySet(QuerySet): distance = func == 'distance' length = func == 'length' perimeter = func == 'perimeter' - if not (distance or length or perimeter): + if not (distance or length or perimeter): raise ValueError('Unknown distance function: %s' % func) - # The field's get_db_prep_lookup() is used to get any + # The field's get_db_prep_lookup() is used to get any # extra distance parameters. Here we set up the # parameters that will be passed in to field's function. lookup_params = [geom or 'POINT (0 0)', 0] @@ -482,12 +433,12 @@ class GeoQuerySet(QuerySet): # If the spheroid calculation is desired, either by the `spheroid` # keyword or wehn calculating the length of geodetic field, make # sure the 'spheroid' distance setting string is passed in so we - # get the correct spatial stored procedure. - if spheroid or (SpatialBackend.postgis and geo_field.geodetic and length): - lookup_params.append('spheroid') + # get the correct spatial stored procedure. + if spheroid or (SpatialBackend.postgis and geo_field.geodetic and length): + lookup_params.append('spheroid') where, params = geo_field.get_db_prep_lookup('distance_lte', lookup_params) - # The `geom_args` flag is set to true if a geometry parameter was + # The `geom_args` flag is set to true if a geometry parameter was # passed in. geom_args = bool(geom) @@ -505,7 +456,7 @@ class GeoQuerySet(QuerySet): geodetic = unit_name in geo_field.geodetic_units else: geodetic = geo_field.geodetic - + if distance: if self.query.transformed_srid: # Setting the `geom_args` flag to false because we want to handle @@ -515,7 +466,7 @@ class GeoQuerySet(QuerySet): geom_args = False procedure_fmt = '%s(%%(geo_col)s, %s)' % (SpatialBackend.transform, self.query.transformed_srid) if geom.srid is None or geom.srid == self.query.transformed_srid: - # If the geom parameter srid is None, it is assumed the coordinates + # If the geom parameter srid is None, it is assumed the coordinates # are in the transformed units. A placeholder is used for the # geometry parameter. procedure_fmt += ', %%s' @@ -529,10 +480,10 @@ class GeoQuerySet(QuerySet): if geodetic: # Spherical distance calculation is needed (because the geographic - # field is geodetic). However, the PostGIS ST_distance_sphere/spheroid() + # field is geodetic). However, the PostGIS ST_distance_sphere/spheroid() # procedures may only do queries from point columns to point geometries # some error checking is required. - if not isinstance(geo_field, PointField): + if not isinstance(geo_field, PointField): raise TypeError('Spherical distance calculation only supported on PointFields.') if not str(SpatialBackend.Geometry(buffer(params[0].wkb)).geom_type) == 'Point': raise TypeError('Spherical distance calculation only supported with Point Geometry parameters') @@ -553,12 +504,12 @@ class GeoQuerySet(QuerySet): # Setting up the settings for `_spatial_attribute`. s = {'select_field' : DistanceField(dist_att), - 'setup' : False, + 'setup' : False, 'geo_field' : geo_field, 'procedure_args' : procedure_args, 'procedure_fmt' : procedure_fmt, } - if geom_args: + if geom_args: s['geom_args'] = ('geom',) s['procedure_args']['geom'] = geom elif geom: @@ -577,12 +528,12 @@ class GeoQuerySet(QuerySet): s['procedure_fmt'] = '%(geo_col)s,%(tolerance)s' s['procedure_args'] = {'tolerance' : tolerance} return self._spatial_attribute(func, s, **kwargs) - + def _geomset_attribute(self, func, geom, tolerance=0.05, **kwargs): """ DRY routine for setting up a GeoQuerySet method that attaches a Geometry attribute and takes a Geoemtry parameter. This is used - for geometry set-like operations (e.g., intersection, difference, + for geometry set-like operations (e.g., intersection, difference, union, sym_difference). """ s = {'geom_args' : ('geom',), @@ -595,16 +546,12 @@ class GeoQuerySet(QuerySet): s['procedure_args']['tolerance'] = tolerance return self._spatial_attribute(func, s, **kwargs) - def _geocol_select(self, geo_field, field_name, aggregate=False): + def _geocol_select(self, geo_field, field_name): """ Helper routine for constructing the SQL to select the geographic column. Takes into account if the geographic field is in a ForeignKey relation to the current model. """ - # If this is an aggregate spatial query, the flag needs to be - # set on the `GeoQuery` object of this queryset. - if aggregate: self.query.aggregate = True - opts = self.model._meta if not geo_field in opts.fields: # Is this operation going to be on a related geographic field? diff --git a/django/contrib/gis/db/models/sql/aggregates.py b/django/contrib/gis/db/models/sql/aggregates.py new file mode 100644 index 00000000000..ff763342491 --- /dev/null +++ b/django/contrib/gis/db/models/sql/aggregates.py @@ -0,0 +1,36 @@ +from django.db.models.sql.aggregates import * + +from django.contrib.gis.db.models.fields import GeometryField +from django.contrib.gis.db.backend import SpatialBackend + +if SpatialBackend.oracle: + geo_template = '%(function)s(SDOAGGRTYPE(%(field)s,%(tolerance)s))' +else: + geo_template = '%(function)s(%(field)s)' + +class GeoAggregate(Aggregate): + # Overriding the SQL template with the geographic one. + sql_template = geo_template + + is_extent = False + + def __init__(self, col, source=None, is_summary=False, **extra): + super(GeoAggregate, self).__init__(col, source, is_summary, **extra) + + # Can't use geographic aggregates on non-geometry fields. + if not isinstance(self.source, GeometryField): + raise ValueError('Geospatial aggregates only allowed on geometry fields.') + + # Making sure the SQL function is available for this spatial backend. + if not self.sql_function: + raise NotImplementedError('This aggregate functionality not implemented for your spatial backend.') + +class Extent(GeoAggregate): + is_extent = True + sql_function = SpatialBackend.extent + +class MakeLine(GeoAggregate): + sql_function = SpatialBackend.make_line + +class Union(GeoAggregate): + sql_function = SpatialBackend.unionagg diff --git a/django/contrib/gis/db/models/sql/query.py b/django/contrib/gis/db/models/sql/query.py index 52f521d500b..246ea0300f5 100644 --- a/django/contrib/gis/db/models/sql/query.py +++ b/django/contrib/gis/db/models/sql/query.py @@ -5,6 +5,7 @@ from django.db.models.fields.related import ForeignKey from django.contrib.gis.db.backend import SpatialBackend from django.contrib.gis.db.models.fields import GeometryField +from django.contrib.gis.db.models.sql import aggregates as gis_aggregates_module from django.contrib.gis.db.models.sql.where import GeoWhereNode from django.contrib.gis.measure import Area, Distance @@ -12,12 +13,35 @@ from django.contrib.gis.measure import Area, Distance ALL_TERMS = sql.constants.QUERY_TERMS.copy() ALL_TERMS.update(SpatialBackend.gis_terms) +# Conversion functions used in normalizing geographic aggregates. +if SpatialBackend.postgis: + def convert_extent(box): + # TODO: Parsing of BOX3D, Oracle support (patches welcome!) + # Box text will be something like "BOX(-90.0 30.0, -85.0 40.0)"; + # parsing out and returning as a 4-tuple. + ll, ur = box[4:-1].split(',') + xmin, ymin = map(float, ll.split()) + xmax, ymax = map(float, ur.split()) + return (xmin, ymin, xmax, ymax) + + def convert_geom(hex, geo_field): + if hex: return SpatialBackend.Geometry(hex) + else: return None +else: + def convert_extent(box): + raise NotImplementedError('Aggregate extent not implemented for this spatial backend.') + + def convert_geom(clob, geo_field): + if clob: return SpatialBackend.Geometry(clob.read(), geo_field._srid) + else: return None + class GeoQuery(sql.Query): """ A single spatial SQL query. """ # Overridding the valid query terms. query_terms = ALL_TERMS + aggregates_module = gis_aggregates_module #### Methods overridden from the base Query class #### def __init__(self, model, conn): @@ -25,7 +49,6 @@ class GeoQuery(sql.Query): # The following attributes are customized for the GeoQuerySet. # The GeoWhereNode and SpatialBackend classes contain backend-specific # routines and functions. - self.aggregate = False self.custom_select = {} self.transformed_srid = None self.extra_select_fields = {} @@ -34,7 +57,6 @@ class GeoQuery(sql.Query): obj = super(GeoQuery, self).clone(*args, **kwargs) # Customized selection dictionary and transformed srid flag have # to also be added to obj. - obj.aggregate = self.aggregate obj.custom_select = self.custom_select.copy() obj.transformed_srid = self.transformed_srid obj.extra_select_fields = self.extra_select_fields.copy() @@ -50,12 +72,12 @@ class GeoQuery(sql.Query): (without the table names) are given unique aliases. This is needed in some cases to avoid ambiguitity with nested queries. - This routine is overridden from Query to handle customized selection of + This routine is overridden from Query to handle customized selection of geometry columns. """ qn = self.quote_name_unless_alias qn2 = self.connection.ops.quote_name - result = ['(%s) AS %s' % (self.get_extra_select_format(alias) % col[0], qn2(alias)) + result = ['(%s) AS %s' % (self.get_extra_select_format(alias) % col[0], qn2(alias)) for alias, col in self.extra_select.iteritems()] aliases = set(self.extra_select.keys()) if with_aliases: @@ -67,38 +89,53 @@ class GeoQuery(sql.Query): for col, field in izip(self.select, self.select_fields): if isinstance(col, (list, tuple)): r = self.get_field_select(field, col[0]) - if with_aliases and col[1] in col_aliases: - c_alias = 'Col%d' % len(col_aliases) - result.append('%s AS %s' % (r, c_alias)) - aliases.add(c_alias) - col_aliases.add(c_alias) + if with_aliases: + if col[1] in col_aliases: + c_alias = 'Col%d' % len(col_aliases) + result.append('%s AS %s' % (r, c_alias)) + aliases.add(c_alias) + col_aliases.add(c_alias) + else: + result.append('%s AS %s' % (r, col[1])) + aliases.add(r) + col_aliases.add(col[1]) else: result.append(r) aliases.add(r) col_aliases.add(col[1]) else: result.append(col.as_sql(quote_func=qn)) + if hasattr(col, 'alias'): aliases.add(col.alias) col_aliases.add(col.alias) + elif self.default_cols: cols, new_aliases = self.get_default_columns(with_aliases, col_aliases) result.extend(cols) aliases.update(new_aliases) + + result.extend([ + '%s%s' % ( + aggregate.as_sql(quote_func=qn), + alias is not None and ' AS %s' % alias or '' + ) + for alias, aggregate in self.aggregate_select.items() + ]) + # This loop customized for GeoQuery. - if not self.aggregate: - for (table, col), field in izip(self.related_select_cols, self.related_select_fields): - r = self.get_field_select(field, table) - if with_aliases and col in col_aliases: - c_alias = 'Col%d' % len(col_aliases) - result.append('%s AS %s' % (r, c_alias)) - aliases.add(c_alias) - col_aliases.add(c_alias) - else: - result.append(r) - aliases.add(r) - col_aliases.add(col) + for (table, col), field in izip(self.related_select_cols, self.related_select_fields): + r = self.get_field_select(field, table) + if with_aliases and col in col_aliases: + c_alias = 'Col%d' % len(col_aliases) + result.append('%s AS %s' % (r, c_alias)) + aliases.add(c_alias) + col_aliases.add(c_alias) + else: + result.append(r) + aliases.add(r) + col_aliases.add(col) self._select_aliases = aliases return result @@ -112,7 +149,7 @@ class GeoQuery(sql.Query): Returns a list of strings, quoted appropriately for use in SQL directly, as well as a set of aliases used in the select statement. - This routine is overridden from Query to handle customized selection of + This routine is overridden from Query to handle customized selection of geometry columns. """ result = [] @@ -154,20 +191,10 @@ class GeoQuery(sql.Query): return result, None return result, aliases - def get_ordering(self): - """ - This routine is overridden to disable ordering for aggregate - spatial queries. - """ - if not self.aggregate: - return super(GeoQuery, self).get_ordering() - else: - return () - def resolve_columns(self, row, fields=()): """ This routine is necessary so that distances and geometries returned - from extra selection SQL get resolved appropriately into Python + from extra selection SQL get resolved appropriately into Python objects. """ values = [] @@ -183,7 +210,7 @@ class GeoQuery(sql.Query): # Converting any extra selection values (e.g., geometries and # distance objects added by GeoQuerySet methods). - values = [self.convert_values(v, self.extra_select_fields.get(a, None)) + values = [self.convert_values(v, self.extra_select_fields.get(a, None)) for v, a in izip(row[rn_offset:index_start], aliases)] if SpatialBackend.oracle: # This is what happens normally in OracleQuery's `resolve_columns`. @@ -212,6 +239,19 @@ class GeoQuery(sql.Query): value = SpatialBackend.Geometry(value) return value + def resolve_aggregate(self, value, aggregate): + """ + Overridden from GeoQuery's normalize to handle the conversion of + GeoAggregate objects. + """ + if isinstance(aggregate, self.aggregates_module.GeoAggregate): + if aggregate.is_extent: + return convert_extent(value) + else: + return convert_geom(value, aggregate.source) + else: + return super(GeoQuery, self).resolve_aggregate(value, aggregate) + #### Routines unique to GeoQuery #### def get_extra_select_format(self, alias): sel_fmt = '%s' @@ -222,9 +262,9 @@ class GeoQuery(sql.Query): def get_field_select(self, fld, alias=None): """ Returns the SELECT SQL string for the given field. Figures out - if any custom selection SQL is needed for the column The `alias` - keyword may be used to manually specify the database table where - the column exists, if not in the model associated with this + if any custom selection SQL is needed for the column The `alias` + keyword may be used to manually specify the database table where + the column exists, if not in the model associated with this `GeoQuery`. """ sel_fmt = self.get_select_format(fld) @@ -263,15 +303,15 @@ class GeoQuery(sql.Query): """ Recursive utility routine for checking the given name parameter on the given model. Initially, the name parameter is a string, - of the field on the given model e.g., 'point', 'the_geom'. - Related model field strings like 'address__point', may also be + of the field on the given model e.g., 'point', 'the_geom'. + Related model field strings like 'address__point', may also be used. - If a GeometryField exists according to the given name parameter + If a GeometryField exists according to the given name parameter it will be returned, otherwise returns False. """ if isinstance(name_param, basestring): - # This takes into account the situation where the name is a + # This takes into account the situation where the name is a # lookup to a related geographic field, e.g., 'address__point'. name_param = name_param.split(sql.constants.LOOKUP_SEP) name_param.reverse() # Reversing so list operates like a queue of related lookups. @@ -284,7 +324,7 @@ class GeoQuery(sql.Query): except (FieldDoesNotExist, IndexError): return False # TODO: ManyToManyField? - if isinstance(fld, GeometryField): + if isinstance(fld, GeometryField): return fld # A-OK. elif isinstance(fld, ForeignKey): # ForeignKey encountered, return the output of this utility called @@ -297,12 +337,12 @@ class GeoQuery(sql.Query): """ Helper function that returns the database column for the given field. The table and column are returned (quoted) in the proper format, e.g., - `"geoapp_city"."point"`. If `table_alias` is not specified, the + `"geoapp_city"."point"`. If `table_alias` is not specified, the database table associated with the model of this `GeoQuery` will be used. """ if table_alias is None: table_alias = self.model._meta.db_table - return "%s.%s" % (self.quote_name_unless_alias(table_alias), + return "%s.%s" % (self.quote_name_unless_alias(table_alias), self.connection.ops.quote_name(field.column)) def _geo_field(self, field_name=None): @@ -333,5 +373,5 @@ class DistanceField(object): # Rather than use GeometryField (which requires a SQL query # upon instantiation), use this lighter weight class. -class GeomField(object): +class GeomField(object): pass diff --git a/django/db/backends/__init__.py b/django/db/backends/__init__.py index 736667d2ffe..33ec14e4d7d 100644 --- a/django/db/backends/__init__.py +++ b/django/db/backends/__init__.py @@ -10,6 +10,12 @@ except NameError: # Python 2.3 compat from sets import Set as set +try: + import decimal +except ImportError: + # Python 2.3 fallback + from django.utils import _decimal as decimal + from django.db.backends import util from django.utils import datetime_safe @@ -62,6 +68,7 @@ class BaseDatabaseWrapper(local): return util.CursorDebugWrapper(cursor, self) class BaseDatabaseFeatures(object): + allows_group_by_pk = False # True if django.db.backend.utils.typecast_timestamp is used on values # returned from dates() calls. needs_datetime_string_cast = True @@ -376,6 +383,22 @@ class BaseDatabaseOperations(object): """ return self.year_lookup_bounds(value) + def convert_values(self, value, field): + """Coerce the value returned by the database backend into a consistent type that + is compatible with the field type. + """ + internal_type = field.get_internal_type() + if internal_type == 'DecimalField': + return value + elif internal_type and internal_type.endswith('IntegerField') or internal_type == 'AutoField': + return int(value) + elif internal_type in ('DateField', 'DateTimeField', 'TimeField'): + return value + # No field, or the field isn't known to be a decimal or integer + # Default to a float + return float(value) + + class BaseDatabaseIntrospection(object): """ This class encapsulates all backend-specific introspection utilities diff --git a/django/db/backends/mysql/base.py b/django/db/backends/mysql/base.py index f6eccb94b73..bb3e1297790 100644 --- a/django/db/backends/mysql/base.py +++ b/django/db/backends/mysql/base.py @@ -110,6 +110,7 @@ class CursorWrapper(object): class DatabaseFeatures(BaseDatabaseFeatures): empty_fetchmany_value = () update_can_self_select = False + allows_group_by_pk = True related_fields_match_type = True class DatabaseOperations(BaseDatabaseOperations): diff --git a/django/db/backends/oracle/query.py b/django/db/backends/oracle/query.py index de169d6901b..a86e922b954 100644 --- a/django/db/backends/oracle/query.py +++ b/django/db/backends/oracle/query.py @@ -53,21 +53,23 @@ def query_class(QueryClass, Database): return values def convert_values(self, value, field): - from django.db.models.fields import DateField, DateTimeField, \ - TimeField, BooleanField, NullBooleanField, DecimalField, Field + from django.db.models.fields import Field if isinstance(value, Database.LOB): value = value.read() # Oracle stores empty strings as null. We need to undo this in # order to adhere to the Django convention of using the empty # string instead of null, but only if the field accepts the # empty string. - if value is None and isinstance(field, Field) and field.empty_strings_allowed: + if value is None and field and field.empty_strings_allowed: value = u'' # Convert 1 or 0 to True or False - elif value in (1, 0) and isinstance(field, (BooleanField, NullBooleanField)): + elif value in (1, 0) and field and field.get_internal_type() in ('BooleanField', 'NullBooleanField'): value = bool(value) + # Force floats to the correct type + elif value is not None and field and field.get_internal_type() == 'FloatField': + value = float(value) # Convert floats to decimals - elif value is not None and isinstance(field, DecimalField): + elif value is not None and field and field.get_internal_type() == 'DecimalField': value = util.typecast_decimal(field.format_number(value)) # cx_Oracle always returns datetime.datetime objects for # DATE and TIMESTAMP columns, but Django wants to see a @@ -86,13 +88,9 @@ def query_class(QueryClass, Database): value = datetime.datetime(value.year, value.month, value.day, value.hour, value.minute, value.second, value.fsecond) - if isinstance(field, DateTimeField): - # DateTimeField subclasses DateField so must be checked - # first. - pass - elif isinstance(field, DateField): + if field and field.get_internal_type() == 'DateField': value = value.date() - elif isinstance(field, TimeField) or (value.year == 1900 and value.month == value.day == 1): + elif field and field.get_internal_type() == 'TimeField' or (value.year == 1900 and value.month == value.day == 1): value = value.time() elif value.hour == value.minute == value.second == value.microsecond == 0: value = value.date() diff --git a/django/db/backends/sqlite3/base.py b/django/db/backends/sqlite3/base.py index 15b0e983ad2..071d421b583 100644 --- a/django/db/backends/sqlite3/base.py +++ b/django/db/backends/sqlite3/base.py @@ -10,7 +10,7 @@ from django.db.backends import * from django.db.backends.sqlite3.client import DatabaseClient from django.db.backends.sqlite3.creation import DatabaseCreation from django.db.backends.sqlite3.introspection import DatabaseIntrospection -from django.utils.safestring import SafeString +from django.utils.safestring import SafeString try: try: @@ -102,6 +102,26 @@ class DatabaseOperations(BaseDatabaseOperations): second = '%s-12-31 23:59:59.999999' return [first % value, second % value] + def convert_values(self, value, field): + """SQLite returns floats when it should be returning decimals, + and gets dates and datetimes wrong. + For consistency with other backends, coerce when required. + """ + internal_type = field.get_internal_type() + if internal_type == 'DecimalField': + return util.typecast_decimal(field.format_number(value)) + elif internal_type and internal_type.endswith('IntegerField') or internal_type == 'AutoField': + return int(value) + elif internal_type == 'DateField': + return util.typecast_date(value) + elif internal_type == 'DateTimeField': + return util.typecast_timestamp(value) + elif internal_type == 'TimeField': + return util.typecast_time(value) + + # No field, or the field isn't known to be a decimal or integer + return value + class DatabaseWrapper(BaseDatabaseWrapper): # SQLite requires LIKE statements to include an ESCAPE clause if the value diff --git a/django/db/models/__init__.py b/django/db/models/__init__.py index 5413133306f..0802f8695ee 100644 --- a/django/db/models/__init__.py +++ b/django/db/models/__init__.py @@ -5,6 +5,7 @@ from django.db.models.loading import get_apps, get_app, get_models, get_model, r from django.db.models.query import Q from django.db.models.manager import Manager from django.db.models.base import Model +from django.db.models.aggregates import * from django.db.models.fields import * from django.db.models.fields.subclassing import SubfieldBase from django.db.models.fields.files import FileField, ImageField diff --git a/django/db/models/aggregates.py b/django/db/models/aggregates.py new file mode 100644 index 00000000000..1f676253b5f --- /dev/null +++ b/django/db/models/aggregates.py @@ -0,0 +1,66 @@ +""" +Classes to represent the definitions of aggregate functions. +""" + +class Aggregate(object): + """ + Default Aggregate definition. + """ + def __init__(self, lookup, **extra): + """Instantiate a new aggregate. + + * lookup is the field on which the aggregate operates. + * extra is a dictionary of additional data to provide for the + aggregate definition + + Also utilizes the class variables: + * name, the identifier for this aggregate function. + """ + self.lookup = lookup + self.extra = extra + + def _default_alias(self): + return '%s__%s' % (self.lookup, self.name.lower()) + default_alias = property(_default_alias) + + def add_to_query(self, query, alias, col, source, is_summary): + """Add the aggregate to the nominated query. + + This method is used to convert the generic Aggregate definition into a + backend-specific definition. + + * query is the backend-specific query instance to which the aggregate + is to be added. + * col is a column reference describing the subject field + of the aggregate. It can be an alias, or a tuple describing + a table and column name. + * source is the underlying field or aggregate definition for + the column reference. If the aggregate is not an ordinal or + computed type, this reference is used to determine the coerced + output type of the aggregate. + * is_summary is a boolean that is set True if the aggregate is a + summary value rather than an annotation. + """ + aggregate = getattr(query.aggregates_module, self.name) + query.aggregate_select[alias] = aggregate(col, source=source, is_summary=is_summary, **self.extra) + +class Avg(Aggregate): + name = 'Avg' + +class Count(Aggregate): + name = 'Count' + +class Max(Aggregate): + name = 'Max' + +class Min(Aggregate): + name = 'Min' + +class StdDev(Aggregate): + name = 'StdDev' + +class Sum(Aggregate): + name = 'Sum' + +class Variance(Aggregate): + name = 'Variance' diff --git a/django/db/models/manager.py b/django/db/models/manager.py index 683a8f8d104..4e8c6e94fb1 100644 --- a/django/db/models/manager.py +++ b/django/db/models/manager.py @@ -101,6 +101,12 @@ class Manager(object): def filter(self, *args, **kwargs): return self.get_query_set().filter(*args, **kwargs) + def aggregate(self, *args, **kwargs): + return self.get_query_set().aggregate(*args, **kwargs) + + def annotate(self, *args, **kwargs): + return self.get_query_set().annotate(*args, **kwargs) + def complex_filter(self, *args, **kwargs): return self.get_query_set().complex_filter(*args, **kwargs) diff --git a/django/db/models/query.py b/django/db/models/query.py index e2dcb1fa655..b5c9d2f25de 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -4,6 +4,7 @@ except NameError: from sets import Set as set # Python 2.3 fallback from django.db import connection, transaction, IntegrityError +from django.db.models.aggregates import Aggregate from django.db.models.fields import DateField from django.db.models.query_utils import Q, select_related_descend from django.db.models import signals, sql @@ -270,18 +271,47 @@ class QuerySet(object): else: requested = None max_depth = self.query.max_depth + extra_select = self.query.extra_select.keys() + aggregate_select = self.query.aggregate_select.keys() + index_start = len(extra_select) + aggregate_start = index_start + len(self.model._meta.fields) + for row in self.query.results_iter(): if fill_cache: - obj, _ = get_cached_row(self.model, row, index_start, - max_depth, requested=requested) + obj, aggregate_start = get_cached_row(self.model, row, + index_start, max_depth, requested=requested) else: - obj = self.model(*row[index_start:]) + # omit aggregates in object creation + obj = self.model(*row[index_start:aggregate_start]) + for i, k in enumerate(extra_select): setattr(obj, k, row[i]) + + # Add the aggregates to the model + for i, aggregate in enumerate(aggregate_select): + setattr(obj, aggregate, row[i+aggregate_start]) + yield obj + def aggregate(self, *args, **kwargs): + """ + Returns a dictionary containing the calculations (aggregation) + over the current queryset + + If args is present the expression is passed as a kwarg ussing + the Aggregate object's default alias. + """ + for arg in args: + kwargs[arg.default_alias] = arg + + for (alias, aggregate_expr) in kwargs.items(): + self.query.add_aggregate(aggregate_expr, self.model, alias, + is_summary=True) + + return self.query.get_aggregation() + def count(self): """ Performs a SELECT COUNT() and returns the number of records as an @@ -553,6 +583,25 @@ class QuerySet(object): """ self.query.select_related = other.query.select_related + def annotate(self, *args, **kwargs): + """ + Return a query set in which the returned objects have been annotated + with data aggregated from related fields. + """ + for arg in args: + kwargs[arg.default_alias] = arg + + obj = self._clone() + + obj._setup_aggregate_query() + + # Add the aggregates to the query + for (alias, aggregate_expr) in kwargs.items(): + obj.query.add_aggregate(aggregate_expr, self.model, alias, + is_summary=False) + + return obj + def order_by(self, *field_names): """ Returns a new QuerySet instance with the ordering changed. @@ -641,6 +690,16 @@ class QuerySet(object): """ pass + def _setup_aggregate_query(self): + """ + Prepare the query for computing a result that contains aggregate annotations. + """ + opts = self.model._meta + if not self.query.group_by: + field_names = [f.attname for f in opts.fields] + self.query.add_fields(field_names, False) + self.query.set_group_by() + def as_sql(self): """ Returns the internal query's SQL and parameters (as a tuple). @@ -669,6 +728,8 @@ class ValuesQuerySet(QuerySet): len(self.field_names) != len(self.model._meta.fields)): self.query.trim_extra_select(self.extra_names) names = self.query.extra_select.keys() + self.field_names + names.extend(self.query.aggregate_select.keys()) + for row in self.query.results_iter(): yield dict(zip(names, row)) @@ -682,20 +743,25 @@ class ValuesQuerySet(QuerySet): """ self.query.clear_select_fields() self.extra_names = [] + self.aggregate_names = [] + if self._fields: - if not self.query.extra_select: + if not self.query.extra_select and not self.query.aggregate_select: field_names = list(self._fields) else: field_names = [] for f in self._fields: if self.query.extra_select.has_key(f): self.extra_names.append(f) + elif self.query.aggregate_select.has_key(f): + self.aggregate_names.append(f) else: field_names.append(f) else: # Default to all fields. field_names = [f.attname for f in self.model._meta.fields] + self.query.select = [] self.query.add_fields(field_names, False) self.query.default_cols = False self.field_names = field_names @@ -711,6 +777,7 @@ class ValuesQuerySet(QuerySet): c._fields = self._fields[:] c.field_names = self.field_names c.extra_names = self.extra_names + c.aggregate_names = self.aggregate_names if setup and hasattr(c, '_setup_query'): c._setup_query() return c @@ -718,10 +785,18 @@ class ValuesQuerySet(QuerySet): def _merge_sanity_check(self, other): super(ValuesQuerySet, self)._merge_sanity_check(other) if (set(self.extra_names) != set(other.extra_names) or - set(self.field_names) != set(other.field_names)): + set(self.field_names) != set(other.field_names) or + self.aggregate_names != other.aggregate_names): raise TypeError("Merging '%s' classes must involve the same values in each case." % self.__class__.__name__) + def _setup_aggregate_query(self): + """ + Prepare the query for computing a result that contains aggregate annotations. + """ + self.query.set_group_by() + + super(ValuesQuerySet, self)._setup_aggregate_query() class ValuesListQuerySet(ValuesQuerySet): def iterator(self): @@ -729,14 +804,14 @@ class ValuesListQuerySet(ValuesQuerySet): if self.flat and len(self._fields) == 1: for row in self.query.results_iter(): yield row[0] - elif not self.query.extra_select: + elif not self.query.extra_select and not self.query.aggregate_select: for row in self.query.results_iter(): yield tuple(row) else: # When extra(select=...) is involved, the extra cols come are # always at the start of the row, so we need to reorder the fields # to match the order in self._fields. - names = self.query.extra_select.keys() + self.field_names + names = self.query.extra_select.keys() + self.field_names + self.query.aggregate_select.keys() for row in self.query.results_iter(): data = dict(zip(names, row)) yield tuple([data[f] for f in self._fields]) diff --git a/django/db/models/query_utils.py b/django/db/models/query_utils.py index 8dbb1ec6670..9463283f254 100644 --- a/django/db/models/query_utils.py +++ b/django/db/models/query_utils.py @@ -64,4 +64,3 @@ def select_related_descend(field, restricted, requested): if not restricted and field.null: return False return True - diff --git a/django/db/models/sql/aggregates.py b/django/db/models/sql/aggregates.py new file mode 100644 index 00000000000..6fdaf188c4d --- /dev/null +++ b/django/db/models/sql/aggregates.py @@ -0,0 +1,130 @@ +""" +Classes to represent the default SQL aggregate functions +""" + +class AggregateField(object): + """An internal field mockup used to identify aggregates in the + data-conversion parts of the database backend. + """ + def __init__(self, internal_type): + self.internal_type = internal_type + def get_internal_type(self): + return self.internal_type + +ordinal_aggregate_field = AggregateField('IntegerField') +computed_aggregate_field = AggregateField('FloatField') + +class Aggregate(object): + """ + Default SQL Aggregate. + """ + is_ordinal = False + is_computed = False + sql_template = '%(function)s(%(field)s)' + + def __init__(self, col, source=None, is_summary=False, **extra): + """Instantiate an SQL aggregate + + * col is a column reference describing the subject field + of the aggregate. It can be an alias, or a tuple describing + a table and column name. + * source is the underlying field or aggregate definition for + the column reference. If the aggregate is not an ordinal or + computed type, this reference is used to determine the coerced + output type of the aggregate. + * extra is a dictionary of additional data to provide for the + aggregate definition + + Also utilizes the class variables: + * sql_function, the name of the SQL function that implements the + aggregate. + * sql_template, a template string that is used to render the + aggregate into SQL. + * is_ordinal, a boolean indicating if the output of this aggregate + is an integer (e.g., a count) + * is_computed, a boolean indicating if this output of this aggregate + is a computed float (e.g., an average), regardless of the input + type. + + """ + self.col = col + self.source = source + self.is_summary = is_summary + self.extra = extra + + # Follow the chain of aggregate sources back until you find an + # actual field, or an aggregate that forces a particular output + # type. This type of this field will be used to coerce values + # retrieved from the database. + tmp = self + + while tmp and isinstance(tmp, Aggregate): + if getattr(tmp, 'is_ordinal', False): + tmp = ordinal_aggregate_field + elif getattr(tmp, 'is_computed', False): + tmp = computed_aggregate_field + else: + tmp = tmp.source + + self.field = tmp + + def relabel_aliases(self, change_map): + if isinstance(self.col, (list, tuple)): + self.col = (change_map.get(self.col[0], self.col[0]), self.col[1]) + + def as_sql(self, quote_func=None): + "Return the aggregate, rendered as SQL." + if not quote_func: + quote_func = lambda x: x + + if hasattr(self.col, 'as_sql'): + field_name = self.col.as_sql(quote_func) + elif isinstance(self.col, (list, tuple)): + field_name = '.'.join([quote_func(c) for c in self.col]) + else: + field_name = self.col + + params = { + 'function': self.sql_function, + 'field': field_name + } + params.update(self.extra) + + return self.sql_template % params + + +class Avg(Aggregate): + is_computed = True + sql_function = 'AVG' + +class Count(Aggregate): + is_ordinal = True + sql_function = 'COUNT' + sql_template = '%(function)s(%(distinct)s%(field)s)' + + def __init__(self, col, distinct=False, **extra): + super(Count, self).__init__(col, distinct=distinct and 'DISTINCT ' or '', **extra) + +class Max(Aggregate): + sql_function = 'MAX' + +class Min(Aggregate): + sql_function = 'MIN' + +class StdDev(Aggregate): + is_computed = True + + def __init__(self, col, sample=False, **extra): + super(StdDev, self).__init__(col, **extra) + self.sql_function = sample and 'STDDEV_SAMP' or 'STDDEV_POP' + +class Sum(Aggregate): + sql_function = 'SUM' + +class Variance(Aggregate): + is_computed = True + + def __init__(self, col, sample=False, **extra): + super(Variance, self).__init__(col, **extra) + self.sql_function = sample and 'VAR_SAMP' or 'VAR_POP' + diff --git a/django/db/models/sql/datastructures.py b/django/db/models/sql/datastructures.py index 913d8fde25c..4d53999c794 100644 --- a/django/db/models/sql/datastructures.py +++ b/django/db/models/sql/datastructures.py @@ -25,59 +25,6 @@ class RawValue(object): def __init__(self, value): self.value = value -class Aggregate(object): - """ - Base class for all aggregate-related classes (min, max, avg, count, sum). - """ - def relabel_aliases(self, change_map): - """ - Relabel the column alias, if necessary. Must be implemented by - subclasses. - """ - raise NotImplementedError - - def as_sql(self, quote_func=None): - """ - Returns the SQL string fragment for this object. - - The quote_func function is used to quote the column components. If - None, it defaults to doing nothing. - - Must be implemented by subclasses. - """ - raise NotImplementedError - -class Count(Aggregate): - """ - Perform a count on the given column. - """ - def __init__(self, col='*', distinct=False): - """ - Set the column to count on (defaults to '*') and set whether the count - should be distinct or not. - """ - self.col = col - self.distinct = distinct - - def relabel_aliases(self, change_map): - c = self.col - if isinstance(c, (list, tuple)): - self.col = (change_map.get(c[0], c[0]), c[1]) - - def as_sql(self, quote_func=None): - if not quote_func: - quote_func = lambda x: x - if isinstance(self.col, (list, tuple)): - col = ('%s.%s' % tuple([quote_func(c) for c in self.col])) - elif hasattr(self.col, 'as_sql'): - col = self.col.as_sql(quote_func) - else: - col = self.col - if self.distinct: - return 'COUNT(DISTINCT %s)' % col - else: - return 'COUNT(%s)' % col - class Date(object): """ Add a date selection column. diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py index b12912461fb..156617f8073 100644 --- a/django/db/models/sql/query.py +++ b/django/db/models/sql/query.py @@ -12,12 +12,13 @@ from copy import deepcopy from django.utils.tree import Node from django.utils.datastructures import SortedDict from django.utils.encoding import force_unicode +from django.db.backends.util import truncate_name from django.db import connection from django.db.models import signals from django.db.models.fields import FieldDoesNotExist from django.db.models.query_utils import select_related_descend +from django.db.models.sql import aggregates as base_aggregates_module from django.db.models.sql.where import WhereNode, Constraint, EverythingNode, AND, OR -from django.db.models.sql.datastructures import Count from django.core.exceptions import FieldError from datastructures import EmptyResultSet, Empty, MultiJoin from constants import * @@ -40,6 +41,7 @@ class BaseQuery(object): alias_prefix = 'T' query_terms = QUERY_TERMS + aggregates_module = base_aggregates_module def __init__(self, model, connection, where=WhereNode): self.model = model @@ -73,6 +75,9 @@ class BaseQuery(object): self.select_related = False self.related_select_cols = [] + # SQL aggregate-related attributes + self.aggregate_select = SortedDict() # Maps alias -> SQL aggregate function + # Arbitrary maximum limit for select_related. Prevents infinite # recursion. Can be changed by the depth parameter to select_related(). self.max_depth = 5 @@ -178,6 +183,7 @@ class BaseQuery(object): obj.distinct = self.distinct obj.select_related = self.select_related obj.related_select_cols = [] + obj.aggregate_select = self.aggregate_select.copy() obj.max_depth = self.max_depth obj.extra_select = self.extra_select.copy() obj.extra_tables = self.extra_tables @@ -194,6 +200,35 @@ class BaseQuery(object): obj._setup_query() return obj + def convert_values(self, value, field): + """Convert the database-returned value into a type that is consistent + across database backends. + + By default, this defers to the underlying backend operations, but + it can be overridden by Query classes for specific backends. + """ + return self.connection.ops.convert_values(value, field) + + def resolve_aggregate(self, value, aggregate): + """Resolve the value of aggregates returned by the database to + consistent (and reasonable) types. + + This is required because of the predisposition of certain backends + to return Decimal and long types when they are not needed. + """ + if value is None: + # Return None as-is + return value + elif aggregate.is_ordinal: + # Any ordinal aggregate (e.g., count) returns an int + return int(value) + elif aggregate.is_computed: + # Any computed aggregate (e.g., avg) returns a float + return float(value) + else: + # Return value depends on the type of the field being processed. + return self.convert_values(value, aggregate.field) + def results_iter(self): """ Returns an iterator over the results from executing this query. @@ -212,29 +247,78 @@ class BaseQuery(object): else: fields = self.model._meta.fields row = self.resolve_columns(row, fields) + + if self.aggregate_select: + aggregate_start = len(self.extra_select.keys()) + len(self.select) + row = tuple(row[:aggregate_start]) + tuple([ + self.resolve_aggregate(value, aggregate) + for (alias, aggregate), value + in zip(self.aggregate_select.items(), row[aggregate_start:]) + ]) + yield row + def get_aggregation(self): + """ + Returns the dictionary with the values of the existing aggregations. + """ + if not self.aggregate_select: + return {} + + # If there is a group by clause, aggregating does not add useful + # information but retrieves only the first row. Aggregate + # over the subquery instead. + if self.group_by: + from subqueries import AggregateQuery + query = AggregateQuery(self.model, self.connection) + + obj = self.clone() + + # Remove any aggregates marked for reduction from the subquery + # and move them to the outer AggregateQuery. + for alias, aggregate in self.aggregate_select.items(): + if aggregate.is_summary: + query.aggregate_select[alias] = aggregate + del obj.aggregate_select[alias] + + query.add_subquery(obj) + else: + query = self + self.select = [] + self.default_cols = False + self.extra_select = {} + + query.clear_ordering(True) + query.clear_limits() + query.select_related = False + query.related_select_cols = [] + query.related_select_fields = [] + + return dict([ + (alias, self.resolve_aggregate(val, aggregate)) + for (alias, aggregate), val + in zip(query.aggregate_select.items(), query.execute_sql(SINGLE)) + ]) + def get_count(self): """ Performs a COUNT() query using the current filter constraints. """ - from subqueries import CountQuery obj = self.clone() - obj.clear_ordering(True) - obj.clear_limits() - obj.select_related = False - obj.related_select_cols = [] - obj.related_select_fields = [] - if len(obj.select) > 1: - obj = self.clone(CountQuery, _query=obj, where=self.where_class(), - distinct=False) - obj.select = [] - obj.extra_select = SortedDict() + if len(self.select) > 1: + # If a select clause exists, then the query has already started to + # specify the columns that are to be returned. + # In this case, we need to use a subquery to evaluate the count. + from subqueries import AggregateQuery + subquery = obj + subquery.clear_ordering(True) + subquery.clear_limits() + + obj = AggregateQuery(obj.model, obj.connection) + obj.add_subquery(subquery) + obj.add_count_column() - data = obj.execute_sql(SINGLE) - if not data: - return 0 - number = data[0] + number = obj.get_aggregation()[None] # Apply offset and limit constraints manually, since using LIMIT/OFFSET # in SQL (in variants that provide them) doesn't change the COUNT @@ -450,25 +534,41 @@ class BaseQuery(object): for col in self.select: if isinstance(col, (list, tuple)): r = '%s.%s' % (qn(col[0]), qn(col[1])) - if with_aliases and col[1] in col_aliases: - c_alias = 'Col%d' % len(col_aliases) - result.append('%s AS %s' % (r, c_alias)) - aliases.add(c_alias) - col_aliases.add(c_alias) + if with_aliases: + if col[1] in col_aliases: + c_alias = 'Col%d' % len(col_aliases) + result.append('%s AS %s' % (r, c_alias)) + aliases.add(c_alias) + col_aliases.add(c_alias) + else: + result.append('%s AS %s' % (r, col[1])) + aliases.add(r) + col_aliases.add(col[1]) else: result.append(r) aliases.add(r) col_aliases.add(col[1]) else: result.append(col.as_sql(quote_func=qn)) + if hasattr(col, 'alias'): aliases.add(col.alias) col_aliases.add(col.alias) + elif self.default_cols: cols, new_aliases = self.get_default_columns(with_aliases, col_aliases) result.extend(cols) aliases.update(new_aliases) + + result.extend([ + '%s%s' % ( + aggregate.as_sql(quote_func=qn), + alias is not None and ' AS %s' % qn(alias) or '' + ) + for alias, aggregate in self.aggregate_select.items() + ]) + for table, col in self.related_select_cols: r = '%s.%s' % (qn(table), qn(col)) if with_aliases and col in col_aliases: @@ -538,7 +638,7 @@ class BaseQuery(object): Returns a list of strings that are joined together to go after the "FROM" part of the query, as well as a list any extra parameters that need to be included. Sub-classes, can override this to create a - from-clause via a "select", for example (e.g. CountQuery). + from-clause via a "select". This should only be called after any SQL construction methods that might change the tables we need. This means the select columns and @@ -635,10 +735,13 @@ class BaseQuery(object): order = asc result.append('%s %s' % (field, order)) continue + col, order = get_order_dir(field, asc) + if col in self.aggregate_select: + result.append('%s %s' % (col, order)) + continue if '.' in field: # This came in through an extra(order_by=...) addition. Pass it # on verbatim. - col, order = get_order_dir(field, asc) table, col = col.split('.', 1) if (table, col) not in processed_pairs: elt = '%s.%s' % (qn(table), col) @@ -657,7 +760,6 @@ class BaseQuery(object): ordering_aliases.append(elt) result.append('%s %s' % (elt, order)) else: - col, order = get_order_dir(field, asc) elt = qn2(col) if distinct and col not in select_aliases: ordering_aliases.append(elt) @@ -1068,6 +1170,48 @@ class BaseQuery(object): self.fill_related_selections(f.rel.to._meta, alias, cur_depth + 1, used, next, restricted, new_nullable, dupe_set, avoid) + def add_aggregate(self, aggregate, model, alias, is_summary): + """ + Adds a single aggregate expression to the Query + """ + opts = model._meta + field_list = aggregate.lookup.split(LOOKUP_SEP) + if (len(field_list) == 1 and + aggregate.lookup in self.aggregate_select.keys()): + # Aggregate is over an annotation + field_name = field_list[0] + col = field_name + source = self.aggregate_select[field_name] + elif (len(field_list) > 1 or + field_list[0] not in [i.name for i in opts.fields]): + field, source, opts, join_list, last, _ = self.setup_joins( + field_list, opts, self.get_initial_alias(), False) + + # Process the join chain to see if it can be trimmed + _, _, col, _, join_list = self.trim_joins(source, join_list, last, False) + + # If the aggregate references a model or field that requires a join, + # those joins must be LEFT OUTER - empty join rows must be returned + # in order for zeros to be returned for those aggregates. + for column_alias in join_list: + self.promote_alias(column_alias, unconditional=True) + + col = (join_list[-1], col) + else: + # Aggregate references a normal field + field_name = field_list[0] + source = opts.get_field(field_name) + if not (self.group_by and is_summary): + # Only use a column alias if this is a + # standalone aggregate, or an annotation + col = (opts.db_table, source.column) + else: + col = field_name + + # Add the aggregate to the query + alias = truncate_name(alias, self.connection.ops.max_name_length()) + aggregate.add_to_query(self, alias, col=col, source=source, is_summary=is_summary) + def add_filter(self, filter_expr, connector=AND, negate=False, trim=False, can_reuse=None, process_extras=True): """ @@ -1119,6 +1263,11 @@ class BaseQuery(object): elif callable(value): value = value() + for alias, aggregate in self.aggregate_select.items(): + if alias == parts[0]: + self.having.add((aggregate, lookup_type, value), AND) + return + opts = self.get_meta() alias = self.get_initial_alias() allow_many = trim or not negate @@ -1131,38 +1280,9 @@ class BaseQuery(object): self.split_exclude(filter_expr, LOOKUP_SEP.join(parts[:e.level]), can_reuse) return - final = len(join_list) - penultimate = last.pop() - if penultimate == final: - penultimate = last.pop() - if trim and len(join_list) > 1: - extra = join_list[penultimate:] - join_list = join_list[:penultimate] - final = penultimate - penultimate = last.pop() - col = self.alias_map[extra[0]][LHS_JOIN_COL] - for alias in extra: - self.unref_alias(alias) - else: - col = target.column - alias = join_list[-1] - while final > 1: - # An optimization: if the final join is against the same column as - # we are comparing against, we can go back one step in the join - # chain and compare against the lhs of the join instead (and then - # repeat the optimization). The result, potentially, involves less - # table joins. - join = self.alias_map[alias] - if col != join[RHS_JOIN_COL]: - break - self.unref_alias(alias) - alias = join[LHS_ALIAS] - col = join[LHS_JOIN_COL] - join_list = join_list[:-1] - final -= 1 - if final == penultimate: - penultimate = last.pop() + # Process the join chain to see if it can be trimmed + final, penultimate, col, alias, join_list = self.trim_joins(target, join_list, last, trim) if (lookup_type == 'isnull' and value is True and not negate and final > 1): @@ -1313,7 +1433,7 @@ class BaseQuery(object): field, model, direct, m2m = opts.get_field_by_name(f.name) break else: - names = opts.get_all_field_names() + names = opts.get_all_field_names() + self.aggregate_select.keys() raise FieldError("Cannot resolve keyword %r into field. " "Choices are: %s" % (name, ", ".join(names))) @@ -1462,6 +1582,43 @@ class BaseQuery(object): return field, target, opts, joins, last, extra_filters + def trim_joins(self, target, join_list, last, trim): + """An optimization: if the final join is against the same column as + we are comparing against, we can go back one step in a join + chain and compare against the LHS of the join instead (and then + repeat the optimization). The result, potentially, involves less + table joins. + + Returns a tuple + """ + final = len(join_list) + penultimate = last.pop() + if penultimate == final: + penultimate = last.pop() + if trim and len(join_list) > 1: + extra = join_list[penultimate:] + join_list = join_list[:penultimate] + final = penultimate + penultimate = last.pop() + col = self.alias_map[extra[0]][LHS_JOIN_COL] + for alias in extra: + self.unref_alias(alias) + else: + col = target.column + alias = join_list[-1] + while final > 1: + join = self.alias_map[alias] + if col != join[RHS_JOIN_COL]: + break + self.unref_alias(alias) + alias = join[LHS_ALIAS] + col = join[LHS_JOIN_COL] + join_list = join_list[:-1] + final -= 1 + if final == penultimate: + penultimate = last.pop() + return final, penultimate, col, alias, join_list + def update_dupe_avoidance(self, opts, col, alias): """ For a column that is one of multiple pointing to the same table, update @@ -1554,6 +1711,7 @@ class BaseQuery(object): """ alias = self.get_initial_alias() opts = self.get_meta() + try: for name in field_names: field, target, u2, joins, u3, u4 = self.setup_joins( @@ -1574,7 +1732,7 @@ class BaseQuery(object): except MultiJoin: raise FieldError("Invalid field name: '%s'" % name) except FieldError: - names = opts.get_all_field_names() + self.extra_select.keys() + names = opts.get_all_field_names() + self.extra_select.keys() + self.aggregate_select.keys() names.sort() raise FieldError("Cannot resolve keyword %r into field. " "Choices are: %s" % (name, ", ".join(names))) @@ -1609,38 +1767,52 @@ class BaseQuery(object): if force_empty: self.default_ordering = False + def set_group_by(self): + """ + Expands the GROUP BY clause required by the query. + + This will usually be the set of all non-aggregate fields in the + return data. If the database backend supports grouping by the + primary key, and the query would be equivalent, the optimization + will be made automatically. + """ + if self.connection.features.allows_group_by_pk: + if len(self.select) == len(self.model._meta.fields): + self.group_by.append('.'.join([self.model._meta.db_table, + self.model._meta.pk.column])) + return + + for sel in self.select: + self.group_by.append(sel) + def add_count_column(self): """ Converts the query to do count(...) or count(distinct(pk)) in order to get its size. """ - # TODO: When group_by support is added, this needs to be adjusted so - # that it doesn't totally overwrite the select list. if not self.distinct: if not self.select: - select = Count() + count = self.aggregates_module.Count('*', is_summary=True) else: assert len(self.select) == 1, \ "Cannot add count col with multiple cols in 'select': %r" % self.select - select = Count(self.select[0]) + count = self.aggregates_module.Count(self.select[0]) else: opts = self.model._meta if not self.select: - select = Count((self.join((None, opts.db_table, None, None)), - opts.pk.column), True) + count = self.aggregates_module.Count((self.join((None, opts.db_table, None, None)), opts.pk.column), + is_summary=True, distinct=True) else: # Because of SQL portability issues, multi-column, distinct # counts need a sub-query -- see get_count() for details. assert len(self.select) == 1, \ "Cannot add count col with multiple cols in 'select'." - select = Count(self.select[0], True) + count = self.aggregates_module.Count(self.select[0], distinct=True) # Distinct handling is done in Count(), so don't do it at this # level. self.distinct = False - self.select = [select] - self.select_fields = [None] - self.extra_select = {} + self.aggregate_select = {None: count} def add_select_related(self, fields): """ @@ -1758,7 +1930,6 @@ class BaseQuery(object): return empty_iter() else: return - cursor = self.connection.cursor() cursor.execute(sql, params) diff --git a/django/db/models/sql/subqueries.py b/django/db/models/sql/subqueries.py index 524b0894c51..0a59b403c8e 100644 --- a/django/db/models/sql/subqueries.py +++ b/django/db/models/sql/subqueries.py @@ -9,7 +9,7 @@ from django.db.models.sql.query import Query from django.db.models.sql.where import AND, Constraint __all__ = ['DeleteQuery', 'UpdateQuery', 'InsertQuery', 'DateQuery', - 'CountQuery'] + 'AggregateQuery'] class DeleteQuery(Query): """ @@ -400,15 +400,25 @@ class DateQuery(Query): self.distinct = True self.order_by = order == 'ASC' and [1] or [-1] -class CountQuery(Query): +class AggregateQuery(Query): """ - A CountQuery knows how to take a normal query which would select over - multiple distinct columns and turn it into SQL that can be used on a - variety of backends (it requires a select in the FROM clause). + An AggregateQuery takes another query as a parameter to the FROM + clause and only selects the elements in the provided list. """ - def get_from_clause(self): - result, params = self._query.as_sql() - return ['(%s) A1' % result], params + def add_subquery(self, query): + self.subquery, self.sub_params = query.as_sql(with_col_aliases=True) - def get_ordering(self): - return () + def as_sql(self, quote_func=None): + """ + Creates the SQL for this query. Returns the SQL string and list of + parameters. + """ + sql = ('SELECT %s FROM (%s) subquery' % ( + ', '.join([ + aggregate.as_sql() + for aggregate in self.aggregate_select.values() + ]), + self.subquery) + ) + params = self.sub_params + return (sql, params) diff --git a/django/test/testcases.py b/django/test/testcases.py index dcab078553a..81e14a0a142 100644 --- a/django/test/testcases.py +++ b/django/test/testcases.py @@ -14,6 +14,7 @@ from django.test.client import Client from django.utils import simplejson normalize_long_ints = lambda s: re.sub(r'(?%s' want = wrapper % want got = wrapper % got - + # Parse the want and got strings, and compare the parsings. try: want_root = parseString(want).firstChild @@ -174,7 +179,7 @@ class TestCase(unittest.TestCase): """Performs any pre-test setup. This includes: * Flushing the database. - * If the Test Case class has a 'fixtures' member, installing the + * If the Test Case class has a 'fixtures' member, installing the named fixtures. * If the Test Case class has a 'urls' member, replace the ROOT_URLCONF with it. diff --git a/docs/index.txt b/docs/index.txt index dc060b7b618..2e13d2dbab1 100644 --- a/docs/index.txt +++ b/docs/index.txt @@ -42,7 +42,7 @@ The model layer * **Models:** :ref:`Model syntax ` | :ref:`Field types ` | :ref:`Meta options ` * **QuerySets:** :ref:`Executing queries ` | :ref:`QuerySet method reference ` * **Model instances:** :ref:`Instance methods ` | :ref:`Accessing related objects ` - * **Advanced:** :ref:`Managers ` | :ref:`Raw SQL ` | :ref:`Transactions ` | :ref:`Custom fields ` + * **Advanced:** :ref:`Managers ` | :ref:`Raw SQL ` | :ref:`Transactions ` | :ref:`Aggregation ` | :ref:`Custom fields ` * **Other:** :ref:`Supported databases ` | :ref:`Legacy databases ` | :ref:`Providing initial data ` The template layer diff --git a/docs/ref/models/index.txt b/docs/ref/models/index.txt index 0e0510d7075..6918f335da3 100644 --- a/docs/ref/models/index.txt +++ b/docs/ref/models/index.txt @@ -7,7 +7,7 @@ Model API reference. For introductory material, see :ref:`topics-db-models`. .. toctree:: :maxdepth: 1 - + fields relations options diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt index be78f859696..fd6a7c1c628 100644 --- a/docs/ref/models/querysets.txt +++ b/docs/ref/models/querysets.txt @@ -158,6 +158,48 @@ In SQL terms, that evaluates to:: Note the second example is more restrictive. +``annotate(*args, **kwargs)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.1 + +Annotates each object in the ``QuerySet`` with the provided list of +aggregate values (averages, sums, etc) that have been computed over +the objects that are related to the objects in the ``QuerySet``. +Each argument to ``annotate()`` is an annotation that will be added +to each object in the ``QuerySet`` that is returned. + +The aggregation functions that are provided by Django are described +in `Aggregation Functions`_ below. + +Annotations specified using keyword arguments will use the keyword as +the alias for the annotation. Anonymous arguments will have an alias +generated for them based upon the name of the aggregate function and +the model field that is being aggregated. + +For example, if you were manipulating a list of blogs, you may want +to determine how many entries have been made in each blog:: + + >>> q = Blog.objects.annotate(Count('entry')) + # The name of the first blog + >>> q[0].name + 'Blogasaurus' + # The number of entries on the first blog + >>> q[0].entry__count + 42 + +The ``Blog`` model doesn't define an ``entry_count`` attribute by itself, +but by using a keyword argument to specify the aggregate function, you can +control the name of the annotation:: + + >>> q = Blog.objects.annotate(number_of_entries=Count('entry')) + # The number of entries on the first blog, using the name provided + >>> q[0].number_of_entries + 42 + +For an in-depth discussion of aggregation, see :ref:`the topic guide on +Aggregation `. + ``order_by(*fields)`` ~~~~~~~~~~~~~~~~~~~~~ @@ -931,6 +973,38 @@ exist with the given parameters. Note ``latest()`` exists purely for convenience and readability. +``aggregate(*args, **kwargs)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.1 + +Returns a dictionary of aggregate values (averages, sums, etc) calculated +over the ``QuerySet``. Each argument to ``aggregate()`` specifies +a value that will be included in the dictionary that is returned. + +The aggregation functions that are provided by Django are described +in `Aggregation Functions`_ below. + +Aggregates specified using keyword arguments will use the keyword as +the name for the annotation. Anonymous arguments will have an name +generated for them based upon the name of the aggregate function and +the model field that is being aggregated. + +For example, if you were manipulating blog entries, you may want to know +the average number of authors contributing to blog entries:: + + >>> q = Blog.objects.aggregate(Count('entry')) + {'entry__count': 16} + +By using a keyword argument to specify the aggregate function, you can +control the name of the aggregation value that is returned:: + + >>> q = Blog.objects.aggregate(number_of_entries=Count('entry')) + {'number_of_entries': 2.34} + +For an in-depth discussion of aggregation, see :ref:`the topic guide on +Aggregation `. + .. _field-lookups: Field lookups @@ -1326,3 +1400,115 @@ SQL equivalents:: SELECT ... WHERE title REGEXP '(?i)^(an?|the) +'; -- SQLite +.. _aggregation-functions: + +Aggregation Functions +--------------------- + +.. versionadded:: 1.1 + +Django provides the following aggregation functions in the +``django.db.models`` module. + +``Avg`` +~~~~~~~ + +.. class:: Avg(field) + +Returns the mean value of the given field. + + * Default alias: ``__avg`` + * Return type: float + +``Count`` +~~~~~~~~~ + +.. class:: Count(field, distinct=False) + +Returns the number of objects that are related through the provided field. + + * Default alias: ``__count`` + * Return type: integer + +Has one optional argument: + +.. attribute:: distinct + + If distinct=True, the count will only include unique instances. This has + the SQL equivalent of ``COUNT(DISTINCT field)``. Default value is ``False``. + +``Max`` +~~~~~~~ + +.. class:: Max(field) + +Returns the maximum value of the given field. + + * Default alias: ``__max`` + * Return type: same as input field + +``Min`` +~~~~~~~ + +.. class:: Min(field) + +Returns the minimum value of the given field. + + * Default alias: ``__min`` + * Return type: same as input field + +``StdDev`` +~~~~~~~~~ + +.. class:: StdDev(field, sample=False) + +Returns the standard deviation of the data in the provided field. + + * Default alias: ``__stddev`` + * Return type: float + +Has one optional argument: + +.. attribute:: sample + + By default, ``StdDev`` returns the population standard deviation. However, + if ``sample=True``, the return value will be the sample standard deviation. + +.. admonition:: SQLite + + SQLite doesn't provide ``StdDev`` out of the box. An implementation is + available as an extension module for SQLite. Consult the SQlite + documentation for instructions on obtaining and installing this extension. + +``Sum`` +~~~~~~~ + +.. class:: Sum(field) + +Computes the sum of all values of the given field. + + * Default alias: ``__sum`` + * Return type: same as input field + +``Variance`` +~~~~~~~~~ + +.. class:: Variance(field, sample=False) + +Returns the variance of the data in the provided field. + + * Default alias: ``__variance`` + * Return type: float + +Has one optional argument: + +.. attribute:: sample + + By default, ``Variance`` returns the population variance. However, + if ``sample=True``, the return value will be the sample variance. + +.. admonition:: SQLite + + SQLite doesn't provide ``Variance`` out of the box. An implementation is + available as an extension module for SQLite. Consult the SQlite + documentation for instructions on obtaining and installing this extension. diff --git a/docs/topics/db/aggregation.txt b/docs/topics/db/aggregation.txt new file mode 100644 index 00000000000..7b8b1ee64ea --- /dev/null +++ b/docs/topics/db/aggregation.txt @@ -0,0 +1,323 @@ +.. _topics-db-aggregation: + +============= + Aggregation +============= + +.. versionadded:: 1.1 + +.. currentmodule:: django.db.models + +The topic guide on :ref:`Django's database-abstraction API >> Book.objects.all() + +What we need is a way to calculate summary values over the objects that +belong to this ``QuerySet``. This is done by appending an ``aggregate()`` +clause onto the ``QuerySet``:: + + >>> from django.db.models import Avg + >>> Book.objects.all().aggregate(Avg('price')) + {'price__avg': 34.35} + +The ``all()`` is redundant in this example, so this could be simplified to:: + + >>> Book.objects.aggregate(Avg('price')) + {'price__avg': 34.35} + +The argument to the ``aggregate()`` clause describes the aggregate value that +we want to compute - in this case, the average of the ``price`` field on the +``Book`` model. A list of the aggregate functions that are available can be +found in the :ref:`QuerySet reference `. + +``aggregate()`` is a terminal clause for a ``QuerySet`` that, when invoked, +returns a dictionary of name-value pairs. The name is an identifier for the +aggregate value; the value is the computed aggregate. The name is +automatically generated from the name of the field and the aggregate function. +If you want to manually specify a name for the aggregate value, you can do so +by providing that name when you specify the aggregate clause:: + + >>> Book.objects.aggregate(average_price=Avg('price')) + {'average_price': 34.35} + +If you want to generate more than one aggregate, you just add another +argument to the ``aggregate()`` clause. So, if we also wanted to know +the maximum and minimum price of all books, we would issue the query:: + + >>> Book.objects.aggregate(Avg('price'), Max('price'), Min('price')) + {'price__avg': 34.35, 'price__max': Decimal('81.20'), 'price__min': Decimal('12.99')} + +Generating aggregates for each item in a QuerySet +================================================= + +The second way to generate summary values is to generate an independent +summary for each object in a ``Queryset``. For example, if you are retrieving +a list of books, you may want to know how many authors contributed to +each book. Each Book has a many-to-many relationship with the Author; we +want to summarize this relationship for each book in the ``QuerySet``. + +Per-object summaries can be generated using the ``annotate()`` clause. +When an ``annotate()`` clause is specified, each object in the ``QuerySet`` +will be annotated with the specified values. + +The syntax for these annotations is identical to that used for the +``aggregate()`` clause. Each argument to ``annotate()`` describes and +aggregate that is to be calculated. For example, to annotate Books with +the number of authors:: + + # Build an annotated queryset + >>> q = Book.objects.annotate(Count('authors')) + # Interrogate the first object in the queryset + >>> q[0] + + >>> q[0].authors__count + 2 + # Interrogate the second object in the queryset + >>> q[1] + + >>> q[1].authors__count + 1 + +As with ``aggregate()``, the name for the annotation is automatically derived +from the name of the aggregate function and the name of the field being +aggregated. You can override this default name by providing an alias when you +specify the annotation:: + + >>> q = Book.objects.annotate(num_authors=Count('authors')) + >>> q[0].num_authors + 2 + >>> q[1].num_authors + 1 + +Unlike ``aggregate()``, ``annotate()`` is *not* a terminal clause. The output +of the ``annotate()`` clause is a ``QuerySet``; this ``QuerySet`` can be +modified using any other ``QuerySet`` operation, including ``filter()``, +``order_by``, or even additional calls to ``annotate()``. + +Joins and aggregates +==================== + +So far, we have dealt with aggregates over fields that belong to the +model being queries. However, sometimes the value you want to aggregate +will belong to a model that is related to the model you are querying. + +When specifying the field to be aggregated in an aggregate functions, +Django will allow you to use the same +:ref:`double underscore notation ` that is used +when referring to related fields in filters. Django will then handle +any table joins that are required to retrieve and aggregate the +related value. + +For example, to find the price range of books offered in each store, +you could use the annotation:: + + >>> Store.objects.annotate(min_price=Min('books__price'), max_price=Min('books__price')) + +This tells Django to retrieve the Store model, join (through the +many-to-many relationship) with the Book model, and aggregate on the +price field of the book model to produce a minimum and maximum value. + +The same rules apply to the ``aggregate()`` clause. If you wanted to +know the lowest and highest price of any book that is available for sale +in a store, you could use the aggregate:: + + >>> Store.objects.aggregate(min_price=Min('books__price'), max_price=Min('books__price')) + +Join chains can be as deep as you required. For example, to extract the +age of the youngest author of any book available for sale, you could +issue the query:: + + >>> Store.objects.aggregate(youngest_age=Min('books__authors__age')) + +Aggregations and other QuerySet clauses +======================================= + +``filter()`` and ``exclude()`` +------------------------------ + +Aggregates can also participate in filters. Any ``filter()`` (or +``exclude()``) applied to normal model fields will have the effect of +constraining the objects that are considered for aggregation. + +When used with an ``annotate()`` clause, a filter has the effect of +constraining the objects for which an annotation is calculated. For example, +you can generate an annotated list of all books that have a title starting +with "Django" using the query:: + + >>> Book.objects.filter(name__startswith="Django").annotate(num_authors=Count('authors')) + +When used with an ``aggregate()`` clause, a filter has the effect of +constraining the objects over which the aggregate is calculated. +For example, you can generate the average price of all books with a +title that starts with "Django" using the query:: + + >>> Book.objects.filter(name__startswith="Django").aggregate(Avg('price')) + +Filtering on annotations +~~~~~~~~~~~~~~~~~~~~~~~~ + +Annotated values can also be filtered. The alias for the annotation can be +used in ``filter()`` and ``exclude()`` clauses in the same way as any other +model field. + +For example, to generate a list of books that have more than one author, +you can issue the query:: + + >>> Book.objects.annotate(num_authors=Count('authors')).filter(num_authors__gt=1) + +This query generates an annotated result set, and then generates a filter +based upon that annotation. + +Order of ``annotate()`` and ``filter()`` clauses +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When developing a complex query that involves both ``annotate()`` and +``filter()`` clauses, particular attention should be paid to the order +in which the clauses are applied to the ``QuerySet``. + +When an ``annotate()`` clause is applied to a query, the annotation is +computed over the state of the query up to the point where the annotation +is requested. The practical implication of this is that ``filter()`` and +``annotate()`` are not transitive operations -- that is, there is a +difference between the query:: + + >>> Publisher.objects.annotate(num_books=Count('book')).filter(book__rating__gt=3.0) + +and the query:: + + >>> Publisher.objects.filter(book__rating__gt=3.0).annotate(num_books=Count('book')) + +Both queries will return a list of Publishers that have at least one good +book (i.e., a book with a rating exceeding 3.0). However, the annotation in +the first query will provide the total number of all books published by the +publisher; the second query will only include good books in the annotated +count. In the first query, the annotation precedes the filter, so the +filter has no effect on the annotation. In the second query, the filter +preceeds the annotation, and as a result, the filter constrains the objects +considered when calculating the annotation. + +``order_by()`` +-------------- + +Annotations can be used as a basis for ordering. When you +define an ``order_by()`` clause, the aggregates you provide can reference +any alias defined as part of an ``annotate()`` clause in the query. + +For example, to order a ``QuerySet`` of books by the number of authors +that have contributed to the book, you could use the following query:: + + >>> Book.objects.annotate(num_authors=Count('authors')).order_by('num_authors') + +``values()`` +------------ + +Ordinarily, annotations are generated on a per-object basis - an annotated +``QuerySet`` will return one result for each object in the original +``Queryset``. However, when a ``values()`` clause is used to constrain the +columns that are returned in the result set, the method for evaluating +annotations is slightly different. Instead of returning an annotated result +for each result in the original ``QuerySet``, the original results are +grouped according to the unique combinations of the fields specified in the +``values()`` clause. An annotation is then provided for each unique group; +the annotation is computed over all members of the group. + +For example, consider an author query that attempts to find out the average +rating of books written by each author: + + >>> Author.objects.annotate(average_rating=Avg('book_rating')) + +This will return one result for each author in the database, annotate with +their average book rating. + +However, the result will be slightly different if you use a ``values()`` clause:: + + >>> Author.objects.values('name').annotate(average_rating=Avg('book_rating')) + +In this example, the authors will be grouped by name, so you will only get +an annotated result for each *unique* author name. This means if you have +two authors with the same name, their results will be merged into a single +result in the output of the query; the average will be computed as the +average over the books written by both authors. + +The annotation name will be added to the fields returned +as part of the ``ValuesQuerySet``. + +Order of ``annotate()`` and ``filter()`` clauses +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As with the ``filter()`` clause, the order in which ``annotate()`` and +``values()`` clauses are applied to a query is significant. If the +``values()`` clause precedes the ``annotate()``, the annotation will be +computed using the grouping described by the ``values()`` clause. + +However, if the ``annotate()`` clause precedes the ``values()`` clause, +the annotations will be generated over the entire query set. In this case, +the ``values()`` clause only constrains the fields that are generated on +output. + +For example, if we reverse the order of the ``values()`` and ``annotate()`` +clause from our previous example:: + + >>> Author.objects.annotate(average_rating=Avg('book_rating')).values('name') + +This will now yield one unique result for each author; however, only +the author's name and the ``average_rating`` annotation will be returned +in the output data. + +Aggregating annotations +----------------------- + +You can also generate an aggregate on the result of an annotation. When you +define an ``aggregate()`` clause, the aggregates you provide can reference +any alias defined as part of an ``annotate()`` clause in the query. + +For example, if you wanted to calculate the average number of authors per +book you first annotate the set of books with the author count, then +aggregate that author count, referencing the annotation field:: + + >>> Book.objects.annotate(num_authors=Count('authors')).aggregate(Avg('num_authors')) + {'num_authors__avg': 1.66} diff --git a/docs/topics/db/index.txt b/docs/topics/db/index.txt index 83e84d71960..949f4e87e9a 100644 --- a/docs/topics/db/index.txt +++ b/docs/topics/db/index.txt @@ -12,6 +12,7 @@ model maps to a single database table. models queries + aggregation managers sql transactions diff --git a/tests/modeltests/aggregation/__init__.py b/tests/modeltests/aggregation/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/modeltests/aggregation/fixtures/initial_data.json b/tests/modeltests/aggregation/fixtures/initial_data.json new file mode 100644 index 00000000000..a8f04925f03 --- /dev/null +++ b/tests/modeltests/aggregation/fixtures/initial_data.json @@ -0,0 +1,229 @@ +[ + { + "pk": 1, + "model": "aggregation.publisher", + "fields": { + "name": "Apress", + "num_awards": 3 + } + }, + { + "pk": 2, + "model": "aggregation.publisher", + "fields": { + "name": "Sams", + "num_awards": 1 + } + }, + { + "pk": 3, + "model": "aggregation.publisher", + "fields": { + "name": "Prentice Hall", + "num_awards": 7 + } + }, + { + "pk": 4, + "model": "aggregation.publisher", + "fields": { + "name": "Morgan Kaufmann", + "num_awards": 9 + } + }, + { + "pk": 1, + "model": "aggregation.book", + "fields": { + "publisher": 1, + "isbn": "159059725", + "name": "The Definitive Guide to Django: Web Development Done Right", + "price": "30.00", + "rating": 4.5, + "authors": [1, 2], + "pages": 447, + "pubdate": "2007-12-6" + } + }, + { + "pk": 2, + "model": "aggregation.book", + "fields": { + "publisher": 2, + "isbn": "067232959", + "name": "Sams Teach Yourself Django in 24 Hours", + "price": "23.09", + "rating": 3.0, + "authors": [3], + "pages": 528, + "pubdate": "2008-3-3" + } + }, + { + "pk": 3, + "model": "aggregation.book", + "fields": { + "publisher": 1, + "isbn": "159059996", + "name": "Practical Django Projects", + "price": "29.69", + "rating": 4.0, + "authors": [4], + "pages": 300, + "pubdate": "2008-6-23" + } + }, + { + "pk": 4, + "model": "aggregation.book", + "fields": { + "publisher": 3, + "isbn": "013235613", + "name": "Python Web Development with Django", + "price": "29.69", + "rating": 4.0, + "authors": [5, 6, 7], + "pages": 350, + "pubdate": "2008-11-3" + } + }, + { + "pk": 5, + "model": "aggregation.book", + "fields": { + "publisher": 3, + "isbn": "013790395", + "name": "Artificial Intelligence: A Modern Approach", + "price": "82.80", + "rating": 4.0, + "authors": [8, 9], + "pages": 1132, + "pubdate": "1995-1-15" + } + }, + { + "pk": 6, + "model": "aggregation.book", + "fields": { + "publisher": 4, + "isbn": "155860191", + "name": "Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp", + "price": "75.00", + "rating": 5.0, + "authors": [8], + "pages": 946, + "pubdate": "1991-10-15" + } + }, + { + "pk": 1, + "model": "aggregation.store", + "fields": { + "books": [1, 2, 3, 4, 5, 6], + "name": "Amazon.com", + "original_opening": "1994-4-23 9:17:42", + "friday_night_closing": "23:59:59" + } + }, + { + "pk": 2, + "model": "aggregation.store", + "fields": { + "books": [1, 3, 5, 6], + "name": "Books.com", + "original_opening": "2001-3-15 11:23:37", + "friday_night_closing": "23:59:59" + } + }, + { + "pk": 3, + "model": "aggregation.store", + "fields": { + "books": [3, 4, 6], + "name": "Mamma and Pappa's Books", + "original_opening": "1945-4-25 16:24:14", + "friday_night_closing": "21:30:00" + } + }, + { + "pk": 1, + "model": "aggregation.author", + "fields": { + "age": 34, + "friends": [2, 4], + "name": "Adrian Holovaty" + } + }, + { + "pk": 2, + "model": "aggregation.author", + "fields": { + "age": 35, + "friends": [1, 7], + "name": "Jacob Kaplan-Moss" + } + }, + { + "pk": 3, + "model": "aggregation.author", + "fields": { + "age": 45, + "friends": [], + "name": "Brad Dayley" + } + }, + { + "pk": 4, + "model": "aggregation.author", + "fields": { + "age": 29, + "friends": [1], + "name": "James Bennett" + } + }, + { + "pk": 5, + "model": "aggregation.author", + "fields": { + "age": 37, + "friends": [6, 7], + "name": "Jeffrey Forcier " + } + }, + { + "pk": 6, + "model": "aggregation.author", + "fields": { + "age": 29, + "friends": [5, 7], + "name": "Paul Bissex" + } + }, + { + "pk": 7, + "model": "aggregation.author", + "fields": { + "age": 25, + "friends": [2, 5, 6], + "name": "Wesley J. Chun" + } + }, + { + "pk": 8, + "model": "aggregation.author", + "fields": { + "age": 57, + "friends": [9], + "name": "Peter Norvig" + } + }, + { + "pk": 9, + "model": "aggregation.author", + "fields": { + "age": 46, + "friends": [8], + "name": "Stuart Russell" + } + } +] diff --git a/tests/modeltests/aggregation/models.py b/tests/modeltests/aggregation/models.py new file mode 100644 index 00000000000..4c89d7db3e9 --- /dev/null +++ b/tests/modeltests/aggregation/models.py @@ -0,0 +1,379 @@ +# coding: utf-8 +from django.db import models + +try: + sorted +except NameError: + from django.utils.itercompat import sorted # For Python 2.3 + +class Author(models.Model): + name = models.CharField(max_length=100) + age = models.IntegerField() + friends = models.ManyToManyField('self', blank=True) + + def __unicode__(self): + return self.name + +class Publisher(models.Model): + name = models.CharField(max_length=300) + num_awards = models.IntegerField() + + def __unicode__(self): + return self.name + +class Book(models.Model): + isbn = models.CharField(max_length=9) + name = models.CharField(max_length=300) + pages = models.IntegerField() + rating = models.FloatField() + price = models.DecimalField(decimal_places=2, max_digits=6) + authors = models.ManyToManyField(Author) + publisher = models.ForeignKey(Publisher) + pubdate = models.DateField() + + def __unicode__(self): + return self.name + +class Store(models.Model): + name = models.CharField(max_length=300) + books = models.ManyToManyField(Book) + original_opening = models.DateTimeField() + friday_night_closing = models.TimeField() + + def __unicode__(self): + return self.name + +class Entries(models.Model): + EntryID = models.AutoField(primary_key=True, db_column='Entry ID') + Entry = models.CharField(unique=True, max_length=50) + Exclude = models.BooleanField() + +class Clues(models.Model): + ID = models.AutoField(primary_key=True) + EntryID = models.ForeignKey(Entries, verbose_name='Entry', db_column = 'Entry ID') + Clue = models.CharField(max_length=150) + +# Tests on 'aggergate' +# Different backends and numbers. +__test__ = {'API_TESTS': """ +>>> from django.core import management +>>> try: +... from decimal import Decimal +... except: +... from django.utils._decimal import Decimal +>>> from datetime import date + +# Reset the database representation of this app. +# This will return the database to a clean initial state. +>>> management.call_command('flush', verbosity=0, interactive=False) + +# Empty Call - request nothing, get nothing. +>>> Author.objects.all().aggregate() +{} + +>>> from django.db.models import Avg, Sum, Count, Max, Min + +# Single model aggregation +# + +# Single aggregate +# Average age of Authors +>>> Author.objects.all().aggregate(Avg('age')) +{'age__avg': 37.4...} + +# Multiple aggregates +# Average and Sum of Author ages +>>> Author.objects.all().aggregate(Sum('age'), Avg('age')) +{'age__sum': 337, 'age__avg': 37.4...} + +# Aggreates interact with filters, and only +# generate aggregate values for the filtered values +# Sum of the age of those older than 29 years old +>>> Author.objects.all().filter(age__gt=29).aggregate(Sum('age')) +{'age__sum': 254} + +# Depth-1 Joins +# + +# On Relationships with self +# Average age of the friends of each author +>>> Author.objects.all().aggregate(Avg('friends__age')) +{'friends__age__avg': 34.07...} + +# On ManyToMany Relationships +# + +# Forward +# Average age of the Authors of Books with a rating of less than 4.5 +>>> Book.objects.all().filter(rating__lt=4.5).aggregate(Avg('authors__age')) +{'authors__age__avg': 38.2...} + +# Backward +# Average rating of the Books whose Author's name contains the letter 'a' +>>> Author.objects.all().filter(name__contains='a').aggregate(Avg('book__rating')) +{'book__rating__avg': 4.0} + +# On OneToMany Relationships +# + +# Forward +# Sum of the number of awards of each Book's Publisher +>>> Book.objects.all().aggregate(Sum('publisher__num_awards')) +{'publisher__num_awards__sum': 30} + +# Backward +# Sum of the price of every Book that has a Publisher +>>> Publisher.objects.all().aggregate(Sum('book__price')) +{'book__price__sum': Decimal("270.27")} + +# Multiple Joins +# + +# Forward +>>> Store.objects.all().aggregate(Max('books__authors__age')) +{'books__authors__age__max': 57} + +# Backward +# Note that the very long default alias may be truncated +>>> Author.objects.all().aggregate(Min('book__publisher__num_awards')) +{'book__publisher__num_award...': 1} + +# Aggregate outputs can also be aliased. + +# Average amazon.com Book rating +>>> Store.objects.filter(name='Amazon.com').aggregate(amazon_mean=Avg('books__rating')) +{'amazon_mean': 4.08...} + +# Tests on annotate() + +# An empty annotate call does nothing but return the same QuerySet +>>> Book.objects.all().annotate().order_by('pk') +[, , , , , ] + +# Annotate inserts the alias into the model object with the aggregated result +>>> books = Book.objects.all().annotate(mean_age=Avg('authors__age')) +>>> books.get(pk=1).name +u'The Definitive Guide to Django: Web Development Done Right' + +>>> books.get(pk=1).mean_age +34.5 + +# On ManyToMany Relationships + +# Forward +# Average age of the Authors of each book with a rating less than 4.5 +>>> books = Book.objects.all().filter(rating__lt=4.5).annotate(Avg('authors__age')) +>>> sorted([(b.name, b.authors__age__avg) for b in books]) +[(u'Artificial Intelligence: A Modern Approach', 51.5), (u'Practical Django Projects', 29.0), (u'Python Web Development with Django', 30.3...), (u'Sams Teach Yourself Django in 24 Hours', 45.0)] + +# Count the number of authors of each book +>>> books = Book.objects.annotate(num_authors=Count('authors')) +>>> sorted([(b.name, b.num_authors) for b in books]) +[(u'Artificial Intelligence: A Modern Approach', 2), (u'Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 1), (u'Practical Django Projects', 1), (u'Python Web Development with Django', 3), (u'Sams Teach Yourself Django in 24 Hours', 1), (u'The Definitive Guide to Django: Web Development Done Right', 2)] + +# Backward +# Average rating of the Books whose Author's names contains the letter 'a' +>>> authors = Author.objects.all().filter(name__contains='a').annotate(Avg('book__rating')) +>>> sorted([(a.name, a.book__rating__avg) for a in authors]) +[(u'Adrian Holovaty', 4.5), (u'Brad Dayley', 3.0), (u'Jacob Kaplan-Moss', 4.5), (u'James Bennett', 4.0), (u'Paul Bissex', 4.0), (u'Stuart Russell', 4.0)] + +# Count the number of books written by each author +>>> authors = Author.objects.annotate(num_books=Count('book')) +>>> sorted([(a.name, a.num_books) for a in authors]) +[(u'Adrian Holovaty', 1), (u'Brad Dayley', 1), (u'Jacob Kaplan-Moss', 1), (u'James Bennett', 1), (u'Jeffrey Forcier ', 1), (u'Paul Bissex', 1), (u'Peter Norvig', 2), (u'Stuart Russell', 1), (u'Wesley J. Chun', 1)] + +# On OneToMany Relationships + +# Forward +# Annotate each book with the number of awards of each Book's Publisher +>>> books = Book.objects.all().annotate(Sum('publisher__num_awards')) +>>> sorted([(b.name, b.publisher__num_awards__sum) for b in books]) +[(u'Artificial Intelligence: A Modern Approach', 7), (u'Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 9), (u'Practical Django Projects', 3), (u'Python Web Development with Django', 7), (u'Sams Teach Yourself Django in 24 Hours', 1), (u'The Definitive Guide to Django: Web Development Done Right', 3)] + +# Backward +# Annotate each publisher with the sum of the price of all books sold +>>> publishers = Publisher.objects.all().annotate(Sum('book__price')) +>>> sorted([(p.name, p.book__price__sum) for p in publishers]) +[(u'Apress', Decimal("59.69")), (u'Morgan Kaufmann', Decimal("75.00")), (u'Prentice Hall', Decimal("112.49")), (u'Sams', Decimal("23.09"))] + +# Calls to values() are not commutative over annotate(). + +# Calling values on a queryset that has annotations returns the output +# as a dictionary +>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values() +[{'rating': 4.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'pubdate': datetime.date(2007, 12, 6), 'price': Decimal("30..."), 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}] + +>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values('pk', 'isbn', 'mean_age') +[{'pk': 1, 'isbn': u'159059725', 'mean_age': 34.5}] + +# Calling it with paramters reduces the output but does not remove the +# annotation. +>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values('name') +[{'name': u'The Definitive Guide to Django: Web Development Done Right', 'mean_age': 34.5}] + +# An empty values() call before annotating has the same effect as an +# empty values() call after annotating +>>> Book.objects.filter(pk=1).values().annotate(mean_age=Avg('authors__age')) +[{'rating': 4.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'pubdate': datetime.date(2007, 12, 6), 'price': Decimal("30..."), 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}] + +# Calling annotate() on a ValuesQuerySet annotates over the groups of +# fields to be selected by the ValuesQuerySet. + +# Note that an extra parameter is added to each dictionary. This +# parameter is a queryset representing the objects that have been +# grouped to generate the annotation + +>>> Book.objects.all().values('rating').annotate(n_authors=Count('authors__id'), mean_age=Avg('authors__age')).order_by('rating') +[{'rating': 3.0, 'n_authors': 1, 'mean_age': 45.0}, {'rating': 4.0, 'n_authors': 6, 'mean_age': 37.1...}, {'rating': 4.5, 'n_authors': 2, 'mean_age': 34.5}, {'rating': 5.0, 'n_authors': 1, 'mean_age': 57.0}] + +# If a join doesn't match any objects, an aggregate returns None +>>> authors = Author.objects.all().annotate(Avg('friends__age')).order_by('id') +>>> len(authors) +9 +>>> sorted([(a.name, a.friends__age__avg) for a in authors]) +[(u'Adrian Holovaty', 32.0), (u'Brad Dayley', None), (u'Jacob Kaplan-Moss', 29.5), (u'James Bennett', 34.0), (u'Jeffrey Forcier ', 27.0), (u'Paul Bissex', 31.0), (u'Peter Norvig', 46.0), (u'Stuart Russell', 57.0), (u'Wesley J. Chun', 33.6...)] + + +# The Count aggregation function allows an extra parameter: distinct. +# This restricts the count results to unique items +>>> Book.objects.all().aggregate(Count('rating')) +{'rating__count': 6} + +>>> Book.objects.all().aggregate(Count('rating', distinct=True)) +{'rating__count': 4} + +# Retreiving the grouped objects + +# When using Count you can also omit the primary key and refer only to +# the related field name if you want to count all the related objects +# and not a specific column +>>> explicit = list(Author.objects.annotate(Count('book__id'))) +>>> implicit = list(Author.objects.annotate(Count('book'))) +>>> explicit == implicit +True + +# Ordering is allowed on aggregates +>>> Book.objects.values('rating').annotate(oldest=Max('authors__age')).order_by('oldest', 'rating') +[{'rating': 4.5, 'oldest': 35}, {'rating': 3.0, 'oldest': 45}, {'rating': 4.0, 'oldest': 57}, {'rating': 5.0, 'oldest': 57}] + +>>> Book.objects.values('rating').annotate(oldest=Max('authors__age')).order_by('-oldest', '-rating') +[{'rating': 5.0, 'oldest': 57}, {'rating': 4.0, 'oldest': 57}, {'rating': 3.0, 'oldest': 45}, {'rating': 4.5, 'oldest': 35}] + +# It is possible to aggregate over anotated values +>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Avg('num_authors')) +{'num_authors__avg': 1.66...} + +# You can filter the results based on the aggregation alias. + +# Lets add a publisher to test the different possibilities for filtering +>>> p = Publisher(name='Expensive Publisher', num_awards=0) +>>> p.save() +>>> Book(name='ExpensiveBook1', pages=1, isbn='111', rating=3.5, price=Decimal("1000"), publisher=p, pubdate=date(2008,12,1)).save() +>>> Book(name='ExpensiveBook2', pages=1, isbn='222', rating=4.0, price=Decimal("1000"), publisher=p, pubdate=date(2008,12,2)).save() +>>> Book(name='ExpensiveBook3', pages=1, isbn='333', rating=4.5, price=Decimal("35"), publisher=p, pubdate=date(2008,12,3)).save() + +# Publishers that have: + +# (i) more than one book +>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk') +[, , ] + +# (ii) a book that cost less than 40 +>>> Publisher.objects.filter(book__price__lt=Decimal("40.0")).order_by('pk') +[, , , , ] + +# (iii) more than one book and (at least) a book that cost less than 40 +>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1, book__price__lt=Decimal("40.0")).order_by('pk') +[, , ] + +# (iv) more than one book that costs less than $40 +>>> Publisher.objects.filter(book__price__lt=Decimal("40.0")).annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk') +[] + +# Now a bit of testing on the different lookup types +# + +>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 3]).order_by('pk') +[, , , , ] + +>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 2]).order_by('pk') +[, , , ] + +>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__in=[1, 3]).order_by('pk') +[, , ] + +>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__isnull=True) +[] + +>>> p.delete() + +# Does Author X have any friends? (or better, how many friends does author X have) +>> Author.objects.filter(pk=1).aggregate(Count('friends__id')) +{'friends__id__count': 2.0} + +# Give me a list of all Books with more than 1 authors +>>> Book.objects.all().annotate(num_authors=Count('authors__name')).filter(num_authors__ge=2).order_by('pk') +[, ] + +# Give me a list of all Authors that have no friends +>>> Author.objects.all().annotate(num_friends=Count('friends__id', distinct=True)).filter(num_friends=0).order_by('pk') +[] + +# Give me a list of all publishers that have published more than 1 books +>>> Publisher.objects.all().annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk') +[, ] + +# Give me a list of all publishers that have published more than 1 books that cost less than 40 +>>> Publisher.objects.all().filter(book__price__lt=Decimal("40.0")).annotate(num_books=Count('book__id')).filter(num_books__gt=1) +[] + +# Give me a list of all Books that were written by X and one other author. +>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1) +[] + +# Give me the average rating of all Books that were written by X and one other author. +#(Aggregate over objects discovered using membership of the m2m set) + +# Adding an existing author to another book to test it the right way +>>> a = Author.objects.get(name__contains='Norvig') +>>> b = Book.objects.get(name__contains='Done Right') +>>> b.authors.add(a) +>>> b.save() + +# This should do it +>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1).aggregate(Avg('rating')) +{'rating__avg': 4.25} +>>> b.authors.remove(a) + +# Give me a list of all Authors that have published a book with at least one other person +# (Filters over a count generated on a related object) +# +# Cheating: [a for a in Author.objects.all().annotate(num_coleagues=Count('book__authors__id'), num_books=Count('book__id', distinct=True)) if a.num_coleagues - a.num_books > 0] +# F-Syntax is required. Will be fixed after F objects are available + +# Tests on fields with non-default table and column names. +>>> Clues.objects.values('EntryID__Entry').annotate(Appearances=Count('EntryID'), Distinct_Clues=Count('Clue', distinct=True)) +[] + +# Aggregates also work on dates, times and datetimes +>>> Publisher.objects.annotate(earliest_book=Min('book__pubdate')).order_by('earliest_book').values() +[{'earliest_book': datetime.date(1991, 10, 15), 'num_awards': 9, 'id': 4, 'name': u'Morgan Kaufmann'}, {'earliest_book': datetime.date(1995, 1, 15), 'num_awards': 7, 'id': 3, 'name': u'Prentice Hall'}, {'earliest_book': datetime.date(2007, 12, 6), 'num_awards': 3, 'id': 1, 'name': u'Apress'}, {'earliest_book': datetime.date(2008, 3, 3), 'num_awards': 1, 'id': 2, 'name': u'Sams'}] + +>>> Store.objects.aggregate(Max('friday_night_closing'), Min("original_opening")) +{'friday_night_closing__max': datetime.time(23, 59, 59), 'original_opening__min': datetime.datetime(1945, 4, 25, 16, 24, 14)} + +# values_list() can also be used + +>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('pk', 'isbn', 'mean_age') +[(1, u'159059725', 34.5)] + +>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('isbn') +[(u'159059725',)] + +>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('mean_age') +[(34.5,)] + +>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('mean_age', flat=True) +[34.5] + +"""} diff --git a/tests/regressiontests/aggregation_regress/__init__.py b/tests/regressiontests/aggregation_regress/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/regressiontests/aggregation_regress/fixtures/initial_data.json b/tests/regressiontests/aggregation_regress/fixtures/initial_data.json new file mode 100644 index 00000000000..a632c4077ae --- /dev/null +++ b/tests/regressiontests/aggregation_regress/fixtures/initial_data.json @@ -0,0 +1,229 @@ +[ + { + "pk": 1, + "model": "aggregation_regress.publisher", + "fields": { + "name": "Apress", + "num_awards": 3 + } + }, + { + "pk": 2, + "model": "aggregation_regress.publisher", + "fields": { + "name": "Sams", + "num_awards": 1 + } + }, + { + "pk": 3, + "model": "aggregation_regress.publisher", + "fields": { + "name": "Prentice Hall", + "num_awards": 7 + } + }, + { + "pk": 4, + "model": "aggregation_regress.publisher", + "fields": { + "name": "Morgan Kaufmann", + "num_awards": 9 + } + }, + { + "pk": 1, + "model": "aggregation_regress.book", + "fields": { + "publisher": 1, + "isbn": "159059725", + "name": "The Definitive Guide to Django: Web Development Done Right", + "price": "30.00", + "rating": 4.5, + "authors": [1, 2], + "pages": 447, + "pubdate": "2007-12-6" + } + }, + { + "pk": 2, + "model": "aggregation_regress.book", + "fields": { + "publisher": 2, + "isbn": "067232959", + "name": "Sams Teach Yourself Django in 24 Hours", + "price": "23.09", + "rating": 3.0, + "authors": [3], + "pages": 528, + "pubdate": "2008-3-3" + } + }, + { + "pk": 3, + "model": "aggregation_regress.book", + "fields": { + "publisher": 1, + "isbn": "159059996", + "name": "Practical Django Projects", + "price": "29.69", + "rating": 4.0, + "authors": [4], + "pages": 300, + "pubdate": "2008-6-23" + } + }, + { + "pk": 4, + "model": "aggregation_regress.book", + "fields": { + "publisher": 3, + "isbn": "013235613", + "name": "Python Web Development with Django", + "price": "29.69", + "rating": 4.0, + "authors": [5, 6, 7], + "pages": 350, + "pubdate": "2008-11-3" + } + }, + { + "pk": 5, + "model": "aggregation_regress.book", + "fields": { + "publisher": 3, + "isbn": "013790395", + "name": "Artificial Intelligence: A Modern Approach", + "price": "82.80", + "rating": 4.0, + "authors": [8, 9], + "pages": 1132, + "pubdate": "1995-1-15" + } + }, + { + "pk": 6, + "model": "aggregation_regress.book", + "fields": { + "publisher": 4, + "isbn": "155860191", + "name": "Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp", + "price": "75.00", + "rating": 5.0, + "authors": [8], + "pages": 946, + "pubdate": "1991-10-15" + } + }, + { + "pk": 1, + "model": "aggregation_regress.store", + "fields": { + "books": [1, 2, 3, 4, 5, 6], + "name": "Amazon.com", + "original_opening": "1994-4-23 9:17:42", + "friday_night_closing": "23:59:59" + } + }, + { + "pk": 2, + "model": "aggregation_regress.store", + "fields": { + "books": [1, 3, 5, 6], + "name": "Books.com", + "original_opening": "2001-3-15 11:23:37", + "friday_night_closing": "23:59:59" + } + }, + { + "pk": 3, + "model": "aggregation_regress.store", + "fields": { + "books": [3, 4, 6], + "name": "Mamma and Pappa's Books", + "original_opening": "1945-4-25 16:24:14", + "friday_night_closing": "21:30:00" + } + }, + { + "pk": 1, + "model": "aggregation_regress.author", + "fields": { + "age": 34, + "friends": [2, 4], + "name": "Adrian Holovaty" + } + }, + { + "pk": 2, + "model": "aggregation_regress.author", + "fields": { + "age": 35, + "friends": [1, 7], + "name": "Jacob Kaplan-Moss" + } + }, + { + "pk": 3, + "model": "aggregation_regress.author", + "fields": { + "age": 45, + "friends": [], + "name": "Brad Dayley" + } + }, + { + "pk": 4, + "model": "aggregation_regress.author", + "fields": { + "age": 29, + "friends": [1], + "name": "James Bennett" + } + }, + { + "pk": 5, + "model": "aggregation_regress.author", + "fields": { + "age": 37, + "friends": [6, 7], + "name": "Jeffrey Forcier " + } + }, + { + "pk": 6, + "model": "aggregation_regress.author", + "fields": { + "age": 29, + "friends": [5, 7], + "name": "Paul Bissex" + } + }, + { + "pk": 7, + "model": "aggregation_regress.author", + "fields": { + "age": 25, + "friends": [2, 5, 6], + "name": "Wesley J. Chun" + } + }, + { + "pk": 8, + "model": "aggregation_regress.author", + "fields": { + "age": 57, + "friends": [9], + "name": "Peter Norvig" + } + }, + { + "pk": 9, + "model": "aggregation_regress.author", + "fields": { + "age": 46, + "friends": [8], + "name": "Stuart Russell" + } + } +] diff --git a/tests/regressiontests/aggregation_regress/models.py b/tests/regressiontests/aggregation_regress/models.py new file mode 100644 index 00000000000..cb442148bdb --- /dev/null +++ b/tests/regressiontests/aggregation_regress/models.py @@ -0,0 +1,199 @@ +# coding: utf-8 +from django.db import models +from django.conf import settings + +try: + sorted +except NameError: + from django.utils.itercompat import sorted # For Python 2.3 + +class Author(models.Model): + name = models.CharField(max_length=100) + age = models.IntegerField() + friends = models.ManyToManyField('self', blank=True) + + def __unicode__(self): + return self.name + +class Publisher(models.Model): + name = models.CharField(max_length=300) + num_awards = models.IntegerField() + + def __unicode__(self): + return self.name + +class Book(models.Model): + isbn = models.CharField(max_length=9) + name = models.CharField(max_length=300) + pages = models.IntegerField() + rating = models.FloatField() + price = models.DecimalField(decimal_places=2, max_digits=6) + authors = models.ManyToManyField(Author) + publisher = models.ForeignKey(Publisher) + pubdate = models.DateField() + + class Meta: + ordering = ('name',) + + def __unicode__(self): + return self.name + +class Store(models.Model): + name = models.CharField(max_length=300) + books = models.ManyToManyField(Book) + original_opening = models.DateTimeField() + friday_night_closing = models.TimeField() + + def __unicode__(self): + return self.name + +#Extra does not play well with values. Modify the tests if/when this is fixed. +__test__ = {'API_TESTS': """ +>>> from django.core import management +>>> from django.db.models import get_app + +# Reset the database representation of this app. +# This will return the database to a clean initial state. +>>> management.call_command('flush', verbosity=0, interactive=False) + +>>> from django.db.models import Avg, Sum, Count, Max, Min, StdDev, Variance + +# Ordering requests are ignored +>>> Author.objects.all().order_by('name').aggregate(Avg('age')) +{'age__avg': 37.4...} + +# Implicit ordering is also ignored +>>> Book.objects.all().aggregate(Sum('pages')) +{'pages__sum': 3703} + +# Baseline results +>>> Book.objects.all().aggregate(Sum('pages'), Avg('pages')) +{'pages__sum': 3703, 'pages__avg': 617.1...} + +# Empty values query doesn't affect grouping or results +>>> Book.objects.all().values().aggregate(Sum('pages'), Avg('pages')) +{'pages__sum': 3703, 'pages__avg': 617.1...} + +# Aggregate overrides extra selected column +>>> Book.objects.all().extra(select={'price_per_page' : 'price / pages'}).aggregate(Sum('pages')) +{'pages__sum': 3703} + +# Annotations get combined with extra select clauses +>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).get(pk=2).__dict__.items()) +[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)] + +# Order of the annotate/extra in the query doesn't matter +>>> sorted(Book.objects.all().extra(select={'manufacture_cost' : 'price * .5'}).annotate(mean_auth_age=Avg('authors__age')).get(pk=2).__dict__.items()) +[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)] + +# Values queries can be combined with annotate and extra +>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).values().get(pk=2).items()) +[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)] + +# The order of the values, annotate and extra clauses doesn't matter +>>> sorted(Book.objects.all().values().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).get(pk=2).items()) +[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)] + +# A values query that selects specific columns reduces the output +>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).values('name').get(pk=1).items()) +[('mean_auth_age', 34.5), ('name', u'The Definitive Guide to Django: Web Development Done Right')] + +# The annotations are added to values output if values() precedes annotate() +>>> sorted(Book.objects.all().values('name').annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).get(pk=1).items()) +[('mean_auth_age', 34.5), ('name', u'The Definitive Guide to Django: Web Development Done Right')] + +# Check that all of the objects are getting counted (allow_nulls) and that values respects the amount of objects +>>> len(Author.objects.all().annotate(Avg('friends__age')).values()) +9 + +# Check that consecutive calls to annotate accumulate in the query +>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('oldest', 'price').annotate(Max('publisher__num_awards')) +[{'price': Decimal("30..."), 'oldest': 35, 'publisher__num_awards__max': 3}, {'price': Decimal("29.69"), 'oldest': 37, 'publisher__num_awards__max': 7}, {'price': Decimal("23.09"), 'oldest': 45, 'publisher__num_awards__max': 1}, {'price': Decimal("75..."), 'oldest': 57, 'publisher__num_awards__max': 9}, {'price': Decimal("82.8..."), 'oldest': 57, 'publisher__num_awards__max': 7}] + +# Aggregates can be composed over annotations. +# The return type is derived from the composed aggregate +>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Max('pages'), Max('price'), Sum('num_authors'), Avg('num_authors')) +{'num_authors__sum': 10, 'num_authors__avg': 1.66..., 'pages__max': 1132, 'price__max': Decimal("82.80")} + +# Bad field requests in aggregates are caught and reported +>>> Book.objects.all().aggregate(num_authors=Count('foo')) +Traceback (most recent call last): +... +FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store + +>>> Book.objects.all().annotate(num_authors=Count('foo')) +Traceback (most recent call last): +... +FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store + +>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Max('foo')) +Traceback (most recent call last): +... +FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store, num_authors + +# Old-style count aggregations can be mixed with new-style +>>> Book.objects.annotate(num_authors=Count('authors')).count() +6 + +# Non-ordinal, non-computed Aggregates over annotations correctly inherit +# the annotation's internal type if the annotation is ordinal or computed +>>> Book.objects.annotate(num_authors=Count('authors')).aggregate(Max('num_authors')) +{'num_authors__max': 3} + +>>> Publisher.objects.annotate(avg_price=Avg('book__price')).aggregate(Max('avg_price')) +{'avg_price__max': 75.0...} + +# Aliases are quoted to protected aliases that might be reserved names +>>> Book.objects.aggregate(number=Max('pages'), select=Max('pages')) +{'number': 1132, 'select': 1132} + + +""" +} + +if settings.DATABASE_ENGINE != 'sqlite3': + __test__['API_TESTS'] += """ +# Stddev and Variance are not guaranteed to be available for SQLite. + +>>> Book.objects.aggregate(StdDev('pages')) +{'pages__stddev': 311.46...} + +>>> Book.objects.aggregate(StdDev('rating')) +{'rating__stddev': 0.60...} + +>>> Book.objects.aggregate(StdDev('price')) +{'price__stddev': 24.16...} + + +>>> Book.objects.aggregate(StdDev('pages', sample=True)) +{'pages__stddev': 341.19...} + +>>> Book.objects.aggregate(StdDev('rating', sample=True)) +{'rating__stddev': 0.66...} + +>>> Book.objects.aggregate(StdDev('price', sample=True)) +{'price__stddev': 26.46...} + + +>>> Book.objects.aggregate(Variance('pages')) +{'pages__variance': 97010.80...} + +>>> Book.objects.aggregate(Variance('rating')) +{'rating__variance': 0.36...} + +>>> Book.objects.aggregate(Variance('price')) +{'price__variance': 583.77...} + + +>>> Book.objects.aggregate(Variance('pages', sample=True)) +{'pages__variance': 116412.96...} + +>>> Book.objects.aggregate(Variance('rating', sample=True)) +{'rating__variance': 0.44...} + +>>> Book.objects.aggregate(Variance('price', sample=True)) +{'price__variance': 700.53...} + + +""" +