Fixed #34176 -- Fixed grouping by ambiguous aliases.

Regression in b7b28c7c18.

Refs #31377.

Thanks Shai Berger for the report and reviews.

test_aggregation_subquery_annotation_values_collision() has been
updated as queries that are explicitly grouped by a subquery should
always be grouped by it and not its outer columns even if its alias
collides with referenced table columns. This was not possible to
accomplish at the time 10866a10 landed because we didn't have compiler
level handling of colliding aliases.
This commit is contained in:
Simon Charette 2023-01-06 09:10:16 -05:00 committed by Mariusz Felisiak
parent 016bead6a2
commit dd68af62b2
8 changed files with 186 additions and 62 deletions

View File

@ -9,7 +9,7 @@ class BaseDatabaseFeatures:
# Oracle can't group by LOB (large object) data types. # Oracle can't group by LOB (large object) data types.
allows_group_by_lob = True allows_group_by_lob = True
allows_group_by_selected_pks = False allows_group_by_selected_pks = False
allows_group_by_refs = True allows_group_by_select_index = True
empty_fetchmany_value = [] empty_fetchmany_value = []
update_can_self_select = True update_can_self_select = True

View File

@ -8,7 +8,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
# Oracle crashes with "ORA-00932: inconsistent datatypes: expected - got # Oracle crashes with "ORA-00932: inconsistent datatypes: expected - got
# BLOB" when grouping by LOBs (#24096). # BLOB" when grouping by LOBs (#24096).
allows_group_by_lob = False allows_group_by_lob = False
allows_group_by_refs = False allows_group_by_select_index = False
interprets_empty_strings_as_nulls = True interprets_empty_strings_as_nulls = True
has_select_for_update = True has_select_for_update = True
has_select_for_update_nowait = True has_select_for_update_nowait = True

View File

@ -123,7 +123,7 @@ class SQLCompiler:
if self.query.group_by is None: if self.query.group_by is None:
return [] return []
expressions = [] expressions = []
allows_group_by_refs = self.connection.features.allows_group_by_refs group_by_refs = set()
if self.query.group_by is not True: if self.query.group_by is not True:
# If the group by is set to a list (by .values() call most likely), # If the group by is set to a list (by .values() call most likely),
# then we need to add everything in it to the GROUP BY clause. # then we need to add everything in it to the GROUP BY clause.
@ -133,21 +133,23 @@ class SQLCompiler:
for expr in self.query.group_by: for expr in self.query.group_by:
if not hasattr(expr, "as_sql"): if not hasattr(expr, "as_sql"):
expr = self.query.resolve_ref(expr) expr = self.query.resolve_ref(expr)
if not allows_group_by_refs and isinstance(expr, Ref): if isinstance(expr, Ref):
expr = expr.source if expr.refs not in group_by_refs:
expressions.append(expr) group_by_refs.add(expr.refs)
expressions.append(expr.source)
else:
expressions.append(expr)
# Note that even if the group_by is set, it is only the minimal # Note that even if the group_by is set, it is only the minimal
# set to group by. So, we need to add cols in select, order_by, and # set to group by. So, we need to add cols in select, order_by, and
# having into the select in any case. # having into the select in any case.
ref_sources = {expr.source for expr in expressions if isinstance(expr, Ref)} selected_expr_indices = {}
aliased_exprs = {} for index, (expr, _, alias) in enumerate(select, start=1):
for expr, _, alias in select:
# Skip members of the select clause that are already included
# by reference.
if expr in ref_sources:
continue
if alias: if alias:
aliased_exprs[expr] = alias selected_expr_indices[expr] = index
# Skip members of the select clause that are already explicitly
# grouped against.
if alias in group_by_refs:
continue
expressions.extend(expr.get_group_by_cols()) expressions.extend(expr.get_group_by_cols())
if not self._meta_ordering: if not self._meta_ordering:
for expr, (sql, params, is_ref) in order_by: for expr, (sql, params, is_ref) in order_by:
@ -162,14 +164,21 @@ class SQLCompiler:
seen = set() seen = set()
expressions = self.collapse_group_by(expressions, having_group_by) expressions = self.collapse_group_by(expressions, having_group_by)
allows_group_by_select_index = (
self.connection.features.allows_group_by_select_index
)
for expr in expressions: for expr in expressions:
if allows_group_by_refs and (alias := aliased_exprs.get(expr)):
expr = Ref(alias, expr)
try: try:
sql, params = self.compile(expr) sql, params = self.compile(expr)
except (EmptyResultSet, FullResultSet): except (EmptyResultSet, FullResultSet):
continue continue
sql, params = expr.select_format(self, sql, params) if (
allows_group_by_select_index
and (select_index := selected_expr_indices.get(expr)) is not None
):
sql, params = str(select_index), ()
else:
sql, params = expr.select_format(self, sql, params)
params_hash = make_hashable(params) params_hash = make_hashable(params)
if (sql, params_hash) not in seen: if (sql, params_hash) not in seen:
result.append((sql, params)) result.append((sql, params))

View File

@ -2211,40 +2211,25 @@ class Query(BaseExpression):
primary key, and the query would be equivalent, the optimization primary key, and the query would be equivalent, the optimization
will be made automatically. will be made automatically.
""" """
if allow_aliases: if allow_aliases and self.values_select:
# Column names from JOINs to check collisions with aliases. # If grouping by aliases is allowed assign selected value aliases
column_names = set() # by moving them to annotations.
seen_models = set() group_by_annotations = {}
for join in list(self.alias_map.values())[1:]: # Skip base table. values_select = {}
model = join.join_field.related_model for alias, expr in zip(self.values_select, self.select):
if model not in seen_models: if isinstance(expr, Col):
column_names.update( values_select[alias] = expr
{field.column for field in model._meta.local_concrete_fields} else:
) group_by_annotations[alias] = expr
seen_models.add(model) self.annotations = {**group_by_annotations, **self.annotations}
if self.values_select: self.append_annotation_mask(group_by_annotations)
# If grouping by aliases is allowed assign selected values self.select = tuple(values_select.values())
# aliases by moving them to annotations. self.values_select = tuple(values_select)
group_by_annotations = {}
values_select = {}
for alias, expr in zip(self.values_select, self.select):
if isinstance(expr, Col):
values_select[alias] = expr
else:
group_by_annotations[alias] = expr
self.annotations = {**group_by_annotations, **self.annotations}
self.append_annotation_mask(group_by_annotations)
self.select = tuple(values_select.values())
self.values_select = tuple(values_select)
group_by = list(self.select) group_by = list(self.select)
for alias, annotation in self.annotation_select.items(): for alias, annotation in self.annotation_select.items():
if not (group_by_cols := annotation.get_group_by_cols()): if not (group_by_cols := annotation.get_group_by_cols()):
continue continue
if ( if allow_aliases and not annotation.contains_aggregate:
allow_aliases
and alias not in column_names
and not annotation.contains_aggregate
):
group_by.append(Ref(alias, annotation)) group_by.append(Ref(alias, annotation))
else: else:
group_by.extend(group_by_cols) group_by.extend(group_by_cols)

View File

@ -1500,27 +1500,29 @@ class AggregateTestCase(TestCase):
], ],
) )
@skipUnlessDBFeature("supports_subqueries_in_group_by")
def test_aggregation_subquery_annotation_values_collision(self): def test_aggregation_subquery_annotation_values_collision(self):
books_rating_qs = Book.objects.filter( books_rating_qs = Book.objects.filter(
publisher=OuterRef("pk"), pk=OuterRef("book"),
price=Decimal("29.69"),
).values("rating") ).values("rating")
publisher_qs = ( publisher_qs = (
Publisher.objects.filter( Publisher.objects.filter(
book__contact__age__gt=20, book__contact__age__gt=20,
name=self.p1.name,
) )
.annotate( .annotate(
rating=Subquery(books_rating_qs), rating=Subquery(books_rating_qs),
contacts_count=Count("book__contact"),
) )
.values("rating") .values("rating")
.annotate(total_count=Count("rating")) .annotate(total_count=Count("*"))
.order_by("rating")
) )
self.assertEqual( self.assertEqual(
list(publisher_qs), list(publisher_qs),
[ [
{"rating": 4.0, "total_count": 2}, {"rating": 3.0, "total_count": 1},
{"rating": 4.0, "total_count": 3},
{"rating": 4.5, "total_count": 1},
{"rating": 5.0, "total_count": 1},
], ],
) )
@ -1642,9 +1644,7 @@ class AggregateTestCase(TestCase):
) )
with self.assertNumQueries(1) as ctx: with self.assertNumQueries(1) as ctx:
self.assertSequenceEqual(books_qs, [book]) self.assertSequenceEqual(books_qs, [book])
# Outerquery SELECT, annotation SELECT, and WHERE SELECT but GROUP BY if connection.features.allows_group_by_select_index:
# selected alias, if allowed.
if connection.features.allows_group_by_refs:
self.assertEqual(ctx[0]["sql"].count("SELECT"), 3) self.assertEqual(ctx[0]["sql"].count("SELECT"), 3)
@skipUnlessDBFeature("supports_subqueries_in_group_by") @skipUnlessDBFeature("supports_subqueries_in_group_by")

View File

@ -89,3 +89,49 @@ class SelfRefFK(models.Model):
parent = models.ForeignKey( parent = models.ForeignKey(
"self", models.SET_NULL, null=True, blank=True, related_name="children" "self", models.SET_NULL, null=True, blank=True, related_name="children"
) )
class AuthorProxy(Author):
class Meta:
proxy = True
class Recipe(models.Model):
name = models.CharField(max_length=20)
author = models.ForeignKey(AuthorProxy, models.CASCADE)
tasters = models.ManyToManyField(AuthorProxy, related_name="recipes")
class RecipeProxy(Recipe):
class Meta:
proxy = True
class AuthorUnmanaged(models.Model):
age = models.IntegerField()
class Meta:
db_table = Author._meta.db_table
managed = False
class RecipeTasterUnmanaged(models.Model):
recipe = models.ForeignKey("RecipeUnmanaged", models.CASCADE)
author = models.ForeignKey(
AuthorUnmanaged, models.CASCADE, db_column="authorproxy_id"
)
class Meta:
managed = False
db_table = Recipe.tasters.through._meta.db_table
class RecipeUnmanaged(models.Model):
author = models.ForeignKey(AuthorUnmanaged, models.CASCADE)
tasters = models.ManyToManyField(
AuthorUnmanaged, through=RecipeTasterUnmanaged, related_name="+"
)
class Meta:
managed = False
db_table = Recipe._meta.db_table

View File

@ -11,6 +11,7 @@ from django.db.models import (
Aggregate, Aggregate,
Avg, Avg,
Case, Case,
CharField,
Count, Count,
DecimalField, DecimalField,
F, F,
@ -23,12 +24,15 @@ from django.db.models import (
Variance, Variance,
When, When,
) )
from django.db.models.functions import Cast, Concat
from django.test import TestCase, skipUnlessDBFeature from django.test import TestCase, skipUnlessDBFeature
from django.test.utils import Approximate from django.test.utils import Approximate
from .models import ( from .models import (
Alfa, Alfa,
Author, Author,
AuthorProxy,
AuthorUnmanaged,
Book, Book,
Bravo, Bravo,
Charlie, Charlie,
@ -37,6 +41,8 @@ from .models import (
HardbackBook, HardbackBook,
ItemTag, ItemTag,
Publisher, Publisher,
RecipeProxy,
RecipeUnmanaged,
SelfRefFK, SelfRefFK,
Store, Store,
WithManualPK, WithManualPK,
@ -188,9 +194,8 @@ class AggregationTests(TestCase):
} }
], ],
) )
if connection.features.allows_group_by_refs: if connection.features.allows_group_by_select_index:
alias = connection.ops.quote_name("discount_price") self.assertIn("GROUP BY 1", ctx[0]["sql"])
self.assertIn(f"GROUP BY {alias}", ctx[0]["sql"])
def test_aggregates_in_where_clause(self): def test_aggregates_in_where_clause(self):
""" """
@ -1827,6 +1832,85 @@ class AggregationTests(TestCase):
) )
self.assertEqual(set(books), {self.b1, self.b4}) self.assertEqual(set(books), {self.b1, self.b4})
def test_aggregate_and_annotate_duplicate_columns(self):
books = (
Book.objects.values("isbn")
.annotate(
name=F("publisher__name"),
num_authors=Count("authors"),
)
.order_by("isbn")
)
self.assertSequenceEqual(
books,
[
{"isbn": "013235613", "name": "Prentice Hall", "num_authors": 3},
{"isbn": "013790395", "name": "Prentice Hall", "num_authors": 2},
{"isbn": "067232959", "name": "Sams", "num_authors": 1},
{"isbn": "155860191", "name": "Morgan Kaufmann", "num_authors": 1},
{"isbn": "159059725", "name": "Apress", "num_authors": 2},
{"isbn": "159059996", "name": "Apress", "num_authors": 1},
],
)
def test_aggregate_and_annotate_duplicate_columns_proxy(self):
author = AuthorProxy.objects.latest("pk")
recipe = RecipeProxy.objects.create(name="Dahl", author=author)
recipe.tasters.add(author)
recipes = RecipeProxy.objects.values("pk").annotate(
name=F("author__name"),
num_tasters=Count("tasters"),
)
self.assertSequenceEqual(
recipes,
[{"pk": recipe.pk, "name": "Stuart Russell", "num_tasters": 1}],
)
def test_aggregate_and_annotate_duplicate_columns_unmanaged(self):
author = AuthorProxy.objects.latest("pk")
recipe = RecipeProxy.objects.create(name="Dahl", author=author)
recipe.tasters.add(author)
recipes = RecipeUnmanaged.objects.values("pk").annotate(
name=F("author__age"),
num_tasters=Count("tasters"),
)
self.assertSequenceEqual(
recipes,
[{"pk": recipe.pk, "name": 46, "num_tasters": 1}],
)
def test_aggregate_group_by_unseen_columns_unmanaged(self):
author = AuthorProxy.objects.latest("pk")
shadow_author = AuthorProxy.objects.create(name=author.name, age=author.age - 2)
recipe = RecipeProxy.objects.create(name="Dahl", author=author)
shadow_recipe = RecipeProxy.objects.create(
name="Shadow Dahl",
author=shadow_author,
)
recipe.tasters.add(shadow_author)
shadow_recipe.tasters.add(author)
# This selects how many tasters each author had according to a
# calculated field "name". The table has a column "name" that Django is
# unaware of, and is equal for the two authors. The grouping column
# cannot be referenced by its name ("name"), as it'd return one result
# which is incorrect.
author_recipes = (
AuthorUnmanaged.objects.annotate(
name=Concat(
Value("Writer at "),
Cast(F("age"), output_field=CharField()),
)
)
.values("name") # Field used for grouping.
.annotate(num_recipes=Count("recipeunmanaged"))
.filter(num_recipes__gt=0)
.values("num_recipes") # Drop grouping column.
)
self.assertSequenceEqual(
author_recipes,
[{"num_recipes": 1}, {"num_recipes": 1}],
)
class JoinPromotionTests(TestCase): class JoinPromotionTests(TestCase):
def test_ticket_21150(self): def test_ticket_21150(self):

View File

@ -449,8 +449,8 @@ class TestQuerying(PostgreSQLTestCase):
expected, expected,
) )
@skipUnlessDBFeature("allows_group_by_refs") @skipUnlessDBFeature("allows_group_by_select_index")
def test_group_by_order_by_aliases(self): def test_group_by_order_by_select_index(self):
with self.assertNumQueries(1) as ctx: with self.assertNumQueries(1) as ctx:
self.assertSequenceEqual( self.assertSequenceEqual(
NullableIntegerArrayModel.objects.filter( NullableIntegerArrayModel.objects.filter(
@ -467,7 +467,7 @@ class TestQuerying(PostgreSQLTestCase):
) )
alias = connection.ops.quote_name("field__0") alias = connection.ops.quote_name("field__0")
sql = ctx[0]["sql"] sql = ctx[0]["sql"]
self.assertIn(f"GROUP BY {alias}", sql) self.assertIn("GROUP BY 1", sql)
self.assertIn(f"ORDER BY {alias}", sql) self.assertIn(f"ORDER BY {alias}", sql)
def test_index(self): def test_index(self):