Fixed #34176 -- Fixed grouping by ambiguous aliases.

Regression in b7b28c7c18.

Refs #31377.

Thanks Shai Berger for the report and reviews.

test_aggregation_subquery_annotation_values_collision() has been
updated as queries that are explicitly grouped by a subquery should
always be grouped by it and not its outer columns even if its alias
collides with referenced table columns. This was not possible to
accomplish at the time 10866a10 landed because we didn't have compiler
level handling of colliding aliases.
This commit is contained in:
Simon Charette 2023-01-06 09:10:16 -05:00 committed by Mariusz Felisiak
parent 016bead6a2
commit dd68af62b2
8 changed files with 186 additions and 62 deletions

View File

@ -9,7 +9,7 @@ class BaseDatabaseFeatures:
# Oracle can't group by LOB (large object) data types.
allows_group_by_lob = True
allows_group_by_selected_pks = False
allows_group_by_refs = True
allows_group_by_select_index = True
empty_fetchmany_value = []
update_can_self_select = True

View File

@ -8,7 +8,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
# Oracle crashes with "ORA-00932: inconsistent datatypes: expected - got
# BLOB" when grouping by LOBs (#24096).
allows_group_by_lob = False
allows_group_by_refs = False
allows_group_by_select_index = False
interprets_empty_strings_as_nulls = True
has_select_for_update = True
has_select_for_update_nowait = True

View File

@ -123,7 +123,7 @@ class SQLCompiler:
if self.query.group_by is None:
return []
expressions = []
allows_group_by_refs = self.connection.features.allows_group_by_refs
group_by_refs = set()
if self.query.group_by is not True:
# If the group by is set to a list (by .values() call most likely),
# then we need to add everything in it to the GROUP BY clause.
@ -133,21 +133,23 @@ class SQLCompiler:
for expr in self.query.group_by:
if not hasattr(expr, "as_sql"):
expr = self.query.resolve_ref(expr)
if not allows_group_by_refs and isinstance(expr, Ref):
expr = expr.source
expressions.append(expr)
if isinstance(expr, Ref):
if expr.refs not in group_by_refs:
group_by_refs.add(expr.refs)
expressions.append(expr.source)
else:
expressions.append(expr)
# Note that even if the group_by is set, it is only the minimal
# set to group by. So, we need to add cols in select, order_by, and
# having into the select in any case.
ref_sources = {expr.source for expr in expressions if isinstance(expr, Ref)}
aliased_exprs = {}
for expr, _, alias in select:
# Skip members of the select clause that are already included
# by reference.
if expr in ref_sources:
continue
selected_expr_indices = {}
for index, (expr, _, alias) in enumerate(select, start=1):
if alias:
aliased_exprs[expr] = alias
selected_expr_indices[expr] = index
# Skip members of the select clause that are already explicitly
# grouped against.
if alias in group_by_refs:
continue
expressions.extend(expr.get_group_by_cols())
if not self._meta_ordering:
for expr, (sql, params, is_ref) in order_by:
@ -162,14 +164,21 @@ class SQLCompiler:
seen = set()
expressions = self.collapse_group_by(expressions, having_group_by)
allows_group_by_select_index = (
self.connection.features.allows_group_by_select_index
)
for expr in expressions:
if allows_group_by_refs and (alias := aliased_exprs.get(expr)):
expr = Ref(alias, expr)
try:
sql, params = self.compile(expr)
except (EmptyResultSet, FullResultSet):
continue
sql, params = expr.select_format(self, sql, params)
if (
allows_group_by_select_index
and (select_index := selected_expr_indices.get(expr)) is not None
):
sql, params = str(select_index), ()
else:
sql, params = expr.select_format(self, sql, params)
params_hash = make_hashable(params)
if (sql, params_hash) not in seen:
result.append((sql, params))

View File

@ -2211,40 +2211,25 @@ class Query(BaseExpression):
primary key, and the query would be equivalent, the optimization
will be made automatically.
"""
if allow_aliases:
# Column names from JOINs to check collisions with aliases.
column_names = set()
seen_models = set()
for join in list(self.alias_map.values())[1:]: # Skip base table.
model = join.join_field.related_model
if model not in seen_models:
column_names.update(
{field.column for field in model._meta.local_concrete_fields}
)
seen_models.add(model)
if self.values_select:
# If grouping by aliases is allowed assign selected values
# aliases by moving them to annotations.
group_by_annotations = {}
values_select = {}
for alias, expr in zip(self.values_select, self.select):
if isinstance(expr, Col):
values_select[alias] = expr
else:
group_by_annotations[alias] = expr
self.annotations = {**group_by_annotations, **self.annotations}
self.append_annotation_mask(group_by_annotations)
self.select = tuple(values_select.values())
self.values_select = tuple(values_select)
if allow_aliases and self.values_select:
# If grouping by aliases is allowed assign selected value aliases
# by moving them to annotations.
group_by_annotations = {}
values_select = {}
for alias, expr in zip(self.values_select, self.select):
if isinstance(expr, Col):
values_select[alias] = expr
else:
group_by_annotations[alias] = expr
self.annotations = {**group_by_annotations, **self.annotations}
self.append_annotation_mask(group_by_annotations)
self.select = tuple(values_select.values())
self.values_select = tuple(values_select)
group_by = list(self.select)
for alias, annotation in self.annotation_select.items():
if not (group_by_cols := annotation.get_group_by_cols()):
continue
if (
allow_aliases
and alias not in column_names
and not annotation.contains_aggregate
):
if allow_aliases and not annotation.contains_aggregate:
group_by.append(Ref(alias, annotation))
else:
group_by.extend(group_by_cols)

View File

@ -1500,27 +1500,29 @@ class AggregateTestCase(TestCase):
],
)
@skipUnlessDBFeature("supports_subqueries_in_group_by")
def test_aggregation_subquery_annotation_values_collision(self):
books_rating_qs = Book.objects.filter(
publisher=OuterRef("pk"),
price=Decimal("29.69"),
pk=OuterRef("book"),
).values("rating")
publisher_qs = (
Publisher.objects.filter(
book__contact__age__gt=20,
name=self.p1.name,
)
.annotate(
rating=Subquery(books_rating_qs),
contacts_count=Count("book__contact"),
)
.values("rating")
.annotate(total_count=Count("rating"))
.annotate(total_count=Count("*"))
.order_by("rating")
)
self.assertEqual(
list(publisher_qs),
[
{"rating": 4.0, "total_count": 2},
{"rating": 3.0, "total_count": 1},
{"rating": 4.0, "total_count": 3},
{"rating": 4.5, "total_count": 1},
{"rating": 5.0, "total_count": 1},
],
)
@ -1642,9 +1644,7 @@ class AggregateTestCase(TestCase):
)
with self.assertNumQueries(1) as ctx:
self.assertSequenceEqual(books_qs, [book])
# Outerquery SELECT, annotation SELECT, and WHERE SELECT but GROUP BY
# selected alias, if allowed.
if connection.features.allows_group_by_refs:
if connection.features.allows_group_by_select_index:
self.assertEqual(ctx[0]["sql"].count("SELECT"), 3)
@skipUnlessDBFeature("supports_subqueries_in_group_by")

View File

@ -89,3 +89,49 @@ class SelfRefFK(models.Model):
parent = models.ForeignKey(
"self", models.SET_NULL, null=True, blank=True, related_name="children"
)
class AuthorProxy(Author):
class Meta:
proxy = True
class Recipe(models.Model):
name = models.CharField(max_length=20)
author = models.ForeignKey(AuthorProxy, models.CASCADE)
tasters = models.ManyToManyField(AuthorProxy, related_name="recipes")
class RecipeProxy(Recipe):
class Meta:
proxy = True
class AuthorUnmanaged(models.Model):
age = models.IntegerField()
class Meta:
db_table = Author._meta.db_table
managed = False
class RecipeTasterUnmanaged(models.Model):
recipe = models.ForeignKey("RecipeUnmanaged", models.CASCADE)
author = models.ForeignKey(
AuthorUnmanaged, models.CASCADE, db_column="authorproxy_id"
)
class Meta:
managed = False
db_table = Recipe.tasters.through._meta.db_table
class RecipeUnmanaged(models.Model):
author = models.ForeignKey(AuthorUnmanaged, models.CASCADE)
tasters = models.ManyToManyField(
AuthorUnmanaged, through=RecipeTasterUnmanaged, related_name="+"
)
class Meta:
managed = False
db_table = Recipe._meta.db_table

View File

@ -11,6 +11,7 @@ from django.db.models import (
Aggregate,
Avg,
Case,
CharField,
Count,
DecimalField,
F,
@ -23,12 +24,15 @@ from django.db.models import (
Variance,
When,
)
from django.db.models.functions import Cast, Concat
from django.test import TestCase, skipUnlessDBFeature
from django.test.utils import Approximate
from .models import (
Alfa,
Author,
AuthorProxy,
AuthorUnmanaged,
Book,
Bravo,
Charlie,
@ -37,6 +41,8 @@ from .models import (
HardbackBook,
ItemTag,
Publisher,
RecipeProxy,
RecipeUnmanaged,
SelfRefFK,
Store,
WithManualPK,
@ -188,9 +194,8 @@ class AggregationTests(TestCase):
}
],
)
if connection.features.allows_group_by_refs:
alias = connection.ops.quote_name("discount_price")
self.assertIn(f"GROUP BY {alias}", ctx[0]["sql"])
if connection.features.allows_group_by_select_index:
self.assertIn("GROUP BY 1", ctx[0]["sql"])
def test_aggregates_in_where_clause(self):
"""
@ -1827,6 +1832,85 @@ class AggregationTests(TestCase):
)
self.assertEqual(set(books), {self.b1, self.b4})
def test_aggregate_and_annotate_duplicate_columns(self):
books = (
Book.objects.values("isbn")
.annotate(
name=F("publisher__name"),
num_authors=Count("authors"),
)
.order_by("isbn")
)
self.assertSequenceEqual(
books,
[
{"isbn": "013235613", "name": "Prentice Hall", "num_authors": 3},
{"isbn": "013790395", "name": "Prentice Hall", "num_authors": 2},
{"isbn": "067232959", "name": "Sams", "num_authors": 1},
{"isbn": "155860191", "name": "Morgan Kaufmann", "num_authors": 1},
{"isbn": "159059725", "name": "Apress", "num_authors": 2},
{"isbn": "159059996", "name": "Apress", "num_authors": 1},
],
)
def test_aggregate_and_annotate_duplicate_columns_proxy(self):
author = AuthorProxy.objects.latest("pk")
recipe = RecipeProxy.objects.create(name="Dahl", author=author)
recipe.tasters.add(author)
recipes = RecipeProxy.objects.values("pk").annotate(
name=F("author__name"),
num_tasters=Count("tasters"),
)
self.assertSequenceEqual(
recipes,
[{"pk": recipe.pk, "name": "Stuart Russell", "num_tasters": 1}],
)
def test_aggregate_and_annotate_duplicate_columns_unmanaged(self):
author = AuthorProxy.objects.latest("pk")
recipe = RecipeProxy.objects.create(name="Dahl", author=author)
recipe.tasters.add(author)
recipes = RecipeUnmanaged.objects.values("pk").annotate(
name=F("author__age"),
num_tasters=Count("tasters"),
)
self.assertSequenceEqual(
recipes,
[{"pk": recipe.pk, "name": 46, "num_tasters": 1}],
)
def test_aggregate_group_by_unseen_columns_unmanaged(self):
author = AuthorProxy.objects.latest("pk")
shadow_author = AuthorProxy.objects.create(name=author.name, age=author.age - 2)
recipe = RecipeProxy.objects.create(name="Dahl", author=author)
shadow_recipe = RecipeProxy.objects.create(
name="Shadow Dahl",
author=shadow_author,
)
recipe.tasters.add(shadow_author)
shadow_recipe.tasters.add(author)
# This selects how many tasters each author had according to a
# calculated field "name". The table has a column "name" that Django is
# unaware of, and is equal for the two authors. The grouping column
# cannot be referenced by its name ("name"), as it'd return one result
# which is incorrect.
author_recipes = (
AuthorUnmanaged.objects.annotate(
name=Concat(
Value("Writer at "),
Cast(F("age"), output_field=CharField()),
)
)
.values("name") # Field used for grouping.
.annotate(num_recipes=Count("recipeunmanaged"))
.filter(num_recipes__gt=0)
.values("num_recipes") # Drop grouping column.
)
self.assertSequenceEqual(
author_recipes,
[{"num_recipes": 1}, {"num_recipes": 1}],
)
class JoinPromotionTests(TestCase):
def test_ticket_21150(self):

View File

@ -449,8 +449,8 @@ class TestQuerying(PostgreSQLTestCase):
expected,
)
@skipUnlessDBFeature("allows_group_by_refs")
def test_group_by_order_by_aliases(self):
@skipUnlessDBFeature("allows_group_by_select_index")
def test_group_by_order_by_select_index(self):
with self.assertNumQueries(1) as ctx:
self.assertSequenceEqual(
NullableIntegerArrayModel.objects.filter(
@ -467,7 +467,7 @@ class TestQuerying(PostgreSQLTestCase):
)
alias = connection.ops.quote_name("field__0")
sql = ctx[0]["sql"]
self.assertIn(f"GROUP BY {alias}", sql)
self.assertIn("GROUP BY 1", sql)
self.assertIn(f"ORDER BY {alias}", sql)
def test_index(self):