From d382abc8757c4629c314e1ed8854fd06da68e821 Mon Sep 17 00:00:00 2001 From: Adrian Holovaty Date: Thu, 29 Dec 2005 19:11:00 +0000 Subject: [PATCH] magic-removal: Fixed #1136: Removed duplicated DB joins resulting from kwarg evaluation. Thanks, Russ Magee git-svn-id: http://code.djangoproject.com/svn/django/branches/magic-removal@1792 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/db/models/manager.py | 37 +++++--- django/db/models/query.py | 113 ++++++++++++++----------- tests/modeltests/many_to_one/models.py | 14 +++ 3 files changed, 105 insertions(+), 59 deletions(-) diff --git a/django/db/models/manager.py b/django/db/models/manager.py index 78c4f8a30f..cbefdf1aea 100644 --- a/django/db/models/manager.py +++ b/django/db/models/manager.py @@ -33,7 +33,7 @@ class Manager(object): self.creation_counter = Manager.creation_counter Manager.creation_counter += 1 self.klass = None - + def _prepare(self): if self.klass._meta.get_latest_by: self.get_latest = self.__get_latest @@ -49,7 +49,7 @@ class Manager(object): if not hasattr(klass, '_default_manager') or \ self.creation_counter < klass._default_manager.creation_counter: klass._default_manager = self - + def _get_sql_clause(self, **kwargs): def quote_only_if_word(word): if ' ' in word: @@ -61,15 +61,14 @@ class Manager(object): # Construct the fundamental parts of the query: SELECT X FROM Y WHERE Z. select = ["%s.%s" % (backend.quote_name(opts.db_table), backend.quote_name(f.column)) for f in opts.fields] - tables = [opts.db_table] + (kwargs.get('tables') and kwargs['tables'][:] or []) - tables = [quote_only_if_word(t) for t in tables] + tables = (kwargs.get('tables') and [quote_only_if_word(t) for t in kwargs['tables']] or []) where = kwargs.get('where') and kwargs['where'][:] or [] params = kwargs.get('params') and kwargs['params'][:] or [] # Convert the kwargs into SQL. - tables2, join_where2, where2, params2, _ = parse_lookup(kwargs.items(), opts) + tables2, joins, where2, params2 = parse_lookup(kwargs.items(), opts) tables.extend(tables2) - where.extend(join_where2 + where2) + where.extend(where2) params.extend(params2) # Add any additional constraints from the "where_constraints" parameter. @@ -83,6 +82,22 @@ class Manager(object): if kwargs.get('select'): select.extend(['(%s) AS %s' % (quote_only_if_word(s[1]), backend.quote_name(s[0])) for s in kwargs['select']]) + # Start composing the body of the SQL statement. + sql = [" FROM", backend.quote_name(opts.db_table)] + + # Compose the join dictionary into SQL describing the joins. + if joins: + sql.append(" ".join(["%s %s AS %s ON %s" % (join_type, table, alias, condition) + for (alias, (table, join_type, condition)) in joins.items()])) + + # Compose the tables clause into SQL. + if tables: + sql.append(", " + ", ".join(tables)) + + # Compose the where clause into SQL. + if where: + sql.append(where and "WHERE " + " AND ".join(where)) + # ORDER BY clause order_by = [] for f in handle_legacy_orderlist(kwargs.get('order_by', opts.ordering)): @@ -106,16 +121,16 @@ class Manager(object): else: table_prefix = '' order_by.append('%s%s %s' % (table_prefix, backend.quote_name(orderfield2column(col_name, opts)), order)) - order_by = ", ".join(order_by) + if order_by: + sql.append("ORDER BY " + ", ".join(order_by)) # LIMIT and OFFSET clauses if kwargs.get('limit') is not None: - limit_sql = " %s " % backend.get_limit_offset_sql(kwargs['limit'], kwargs.get('offset')) + sql.append("%s " % backend.get_limit_offset_sql(kwargs['limit'], kwargs.get('offset'))) else: assert kwargs.get('offset') is None, "'offset' is not allowed without 'limit'" - limit_sql = "" - return select, " FROM " + ",".join(tables) + (where and " WHERE " + " AND ".join(where) or "") + (order_by and " ORDER BY " + order_by or "") + limit_sql, params + return select, " ".join(sql), params def get_iterator(self, **kwargs): # kwargs['select'] is a dictionary, and dictionaries' key order is @@ -220,7 +235,7 @@ class Manager(object): # We have to manually run typecast_timestamp(str()) on the results, because # MySQL doesn't automatically cast the result of date functions as datetime # objects -- MySQL returns the values as strings, instead. - return [typecast_timestamp(str(row[0])) for row in cursor.fetchall()] + return [typecast_timestamp(str(row[0])) for row in cursor.fetchall()] class ManagerDescriptor(object): def __init__(self, manager): diff --git a/django/db/models/query.py b/django/db/models/query.py index 733cb40450..c0951d4091 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -45,7 +45,7 @@ def orderlist2sql(order_list, opts, prefix=''): output.append('%s%s ASC' % (prefix, backend.quote_name(orderfield2column(f, opts)))) return ', '.join(output) -class QBase: +class QOperator: "Base class for QAnd and QOr" def __init__(self, *args): self.args = args @@ -53,17 +53,17 @@ class QBase: def __repr__(self): return '(%s)' % self.operator.join([repr(el) for el in self.args]) - def get_sql(self, opts, table_count): - tables, join_where, where, params = [], [], [], [] + def get_sql(self, opts): + tables, joins, where, params = [], {}, [], [] for val in self.args: - tables2, join_where2, where2, params2, table_count = val.get_sql(opts, table_count) + tables2, joins2, where2, params2 = val.get_sql(opts) tables.extend(tables2) - join_where.extend(join_where2) + joins.update(joins2) where.extend(where2) params.extend(params2) - return tables, join_where, ['(%s)' % self.operator.join(where)], params, table_count + return tables, joins, ['(%s)' % self.operator.join(where)], params -class QAnd(QBase): +class QAnd(QOperator): "Encapsulates a combined query that uses 'AND'." operator = ' AND ' def __or__(self, other): @@ -80,7 +80,7 @@ class QAnd(QBase): else: raise TypeError, other -class QOr(QBase): +class QOr(QOperator): "Encapsulates a combined query that uses 'OR'." operator = ' OR ' def __and__(self, other): @@ -117,8 +117,8 @@ class Q: else: raise TypeError, other - def get_sql(self, opts, table_count): - return parse_lookup(self.kwargs.items(), opts, table_count) + def get_sql(self, opts): + return parse_lookup(self.kwargs.items(), opts) def get_where_clause(lookup_type, table_prefix, field_name, value): @@ -174,33 +174,40 @@ def throw_bad_kwarg_error(kwarg): # Helper function to remove redundancy. raise TypeError, "got unexpected keyword argument '%s'" % kwarg -def parse_lookup(kwarg_items, opts, table_count=0): +def parse_lookup(kwarg_items, opts): # Helper function that handles converting API kwargs (e.g. # "name__exact": "tom") to SQL. - # Note that there is a distinction between where and join_where. The latter - # is specifically a list of where clauses to use for JOINs. This - # distinction is necessary because of support for "_or". - - # table_count is used to ensure table aliases are unique. - tables, join_where, where, params = [], [], [], [] + # 'joins' is a dictionary describing the tables that must be joined to complete the query. + # Each key-value pair follows the form + # alias: (table, join_type, condition) + # where + # alias is the AS alias for the joined table + # table is the actual table name to be joined + # join_type is the type of join (INNER JOIN, LEFT OUTER JOIN, etc) + # condition is the where-like statement over which narrows the join. + # + # alias will be derived from the lookup list name. + # At present, this method only every returns INNER JOINs; the option is there for others + # to implement custom Q()s, etc that return other join types. + tables, joins, where, params = [], {}, [], [] for kwarg, kwarg_value in kwarg_items: if kwarg in ('order_by', 'limit', 'offset', 'select_related', 'distinct', 'select', 'tables', 'where', 'params'): continue if kwarg_value is None: continue if kwarg == 'complex': - tables2, join_where2, where2, params2, table_count = kwarg_value.get_sql(opts, table_count) + tables2, joins2, where2, params2 = kwarg_value.get_sql(opts) tables.extend(tables2) - join_where.extend(join_where2) + joins.update(joins2) where.extend(where2) params.extend(params2) continue if kwarg == '_or': for val in kwarg_value: - tables2, join_where2, where2, params2, table_count = parse_lookup(val, opts, table_count) + tables2, joins2, where2, params2 = parse_lookup(val, opts) tables.extend(tables2) - join_where.extend(join_where2) + joins.update(joins2) where.append('(%s)' % ' OR '.join(where2)) params.extend(params2) continue @@ -212,17 +219,16 @@ def parse_lookup(kwarg_items, opts, table_count=0): else: lookup_list = lookup_list[:-1] + [opts.pk.name, 'exact'] if len(lookup_list) == 1: - _throw_bad_kwarg_error(kwarg) + throw_bad_kwarg_error(kwarg) lookup_type = lookup_list.pop() current_opts = opts # We'll be overwriting this, so keep a reference to the original opts. current_table_alias = current_opts.db_table param_required = False while lookup_list or param_required: - table_count += 1 try: # "current" is a piece of the lookup list. For example, in # choices.get_list(poll__sites__id__exact=5), lookup_list is - # ["polls", "sites", "id"], and the first current is "polls". + # ["poll", "sites", "id"], and the first current is "poll". try: current = lookup_list.pop(0) except IndexError: @@ -233,15 +239,18 @@ def parse_lookup(kwarg_items, opts, table_count=0): # Try many-to-many relationships first... for f in current_opts.many_to_many: if f.name == current: - rel_table_alias = backend.quote_name('t%s' % table_count) - table_count += 1 - tables.append('%s %s' % \ - (backend.quote_name(f.get_m2m_db_table(current_opts)), rel_table_alias)) - join_where.append('%s.%s = %s.%s' % \ - (backend.quote_name(current_table_alias), - backend.quote_name(current_opts.pk.column), - rel_table_alias, - backend.quote_name(current_opts.object_name.lower() + '_id'))) + rel_table_alias = backend.quote_name(current_table_alias + LOOKUP_SEPARATOR + current) + + joins[rel_table_alias] = ( + backend.quote_name(f.get_m2m_db_table(current_opts)), + "INNER JOIN", + '%s.%s = %s.%s' % + (backend.quote_name(current_table_alias), + backend.quote_name(current_opts.pk.column), + rel_table_alias, + backend.quote_name(current_opts.object_name.lower() + '_id')) + ) + # Optimization: In the case of primary-key lookups, we # don't have to do an extra join. if lookup_list and lookup_list[0] == f.rel.to._meta.pk.name and lookup_type == 'exact': @@ -251,14 +260,17 @@ def parse_lookup(kwarg_items, opts, table_count=0): lookup_list.pop() param_required = False else: - new_table_alias = 't%s' % table_count - tables.append('%s %s' % (backend.quote_name(f.rel.to._meta.db_table), - backend.quote_name(new_table_alias))) - join_where.append('%s.%s = %s.%s' % \ - (backend.quote_name(rel_table_alias), - backend.quote_name(f.rel.to._meta.object_name.lower() + '_id'), - backend.quote_name(new_table_alias), - backend.quote_name(f.rel.to._meta.pk.column))) + new_table_alias = current_table_alias + LOOKUP_SEPARATOR + current + + joins[backend.quote_name(new_table_alias)] = ( + backend.quote_name(f.rel.to._meta.db_table), + "INNER JOIN", + '%s.%s = %s.%s' % + (rel_table_alias, + backend.quote_name(f.rel.to._meta.object_name.lower() + '_id'), + backend.quote_name(new_table_alias), + backend.quote_name(f.rel.to._meta.pk.column)) + ) current_table_alias = new_table_alias param_required = True current_opts = f.rel.to._meta @@ -280,12 +292,17 @@ def parse_lookup(kwarg_items, opts, table_count=0): where.append(get_where_clause(lookup_type, current_table_alias+'.', f.column, kwarg_value)) params.extend(f.get_db_prep_lookup(lookup_type, kwarg_value)) else: - new_table_alias = 't%s' % table_count - tables.append('%s %s' % \ - (backend.quote_name(f.rel.to._meta.db_table), backend.quote_name(new_table_alias))) - join_where.append('%s.%s = %s.%s' % \ - (backend.quote_name(current_table_alias), backend.quote_name(f.column), - backend.quote_name(new_table_alias), backend.quote_name(f.rel.to._meta.pk.column))) + new_table_alias = current_table_alias + LOOKUP_SEPARATOR + current + + joins[backend.quote_name(new_table_alias)] = ( + backend.quote_name(f.rel.to._meta.db_table), + "INNER JOIN", + '%s.%s = %s.%s' % + (backend.quote_name(current_table_alias), + backend.quote_name(f.column), + backend.quote_name(new_table_alias), + backend.quote_name(f.rel.to._meta.pk.column)) + ) current_table_alias = new_table_alias param_required = True current_opts = f.rel.to._meta @@ -301,4 +318,4 @@ def parse_lookup(kwarg_items, opts, table_count=0): throw_bad_kwarg_error(kwarg) except StopIteration: continue - return tables, join_where, where, params, table_count + return tables, joins, where, params diff --git a/tests/modeltests/many_to_one/models.py b/tests/modeltests/many_to_one/models.py index 7e889cb71d..5acaae7af0 100644 --- a/tests/modeltests/many_to_one/models.py +++ b/tests/modeltests/many_to_one/models.py @@ -67,6 +67,19 @@ This is a test >>> Article.objects.get_list(reporter__first_name__exact='John', order_by=['pub_date']) [This is a test, John's second story] +# Query twice over the related field. +>>> Article.objects.get_list(reporter__first_name__exact='John', reporter__last_name__exact='Smith') +[This is a test, John's second story] + +# The underlying query only makes one join when a related table is referenced twice. +>>> null, sql, null = Article.objects._get_sql_clause(reporter__first_name__exact='John', reporter__last_name__exact='Smith') +>>> sql.count('INNER JOIN') +1 + +# The automatically joined table has a predictable name. +>>> Article.objects.get_list(reporter__first_name__exact='John', where=["many_to_one_articles__reporter.last_name='Smith'"]) +[This is a test, John's second story] + # Find all Articles for the Reporter whose ID is 1. >>> Article.objects.get_list(reporter__id__exact=1, order_by=['pub_date']) [This is a test, John's second story] @@ -95,4 +108,5 @@ John Smith >>> a4.save() >>> a4.get_reporter() John Smith + """