From dd68af62b2b27ece50d434f6a351877212e15c3f Mon Sep 17 00:00:00 2001 From: Simon Charette Date: Fri, 6 Jan 2023 09:10:16 -0500 Subject: [PATCH] Fixed #34176 -- Fixed grouping by ambiguous aliases. Regression in b7b28c7c189615543218e81319473888bc46d831. Refs #31377. Thanks Shai Berger for the report and reviews. test_aggregation_subquery_annotation_values_collision() has been updated as queries that are explicitly grouped by a subquery should always be grouped by it and not its outer columns even if its alias collides with referenced table columns. This was not possible to accomplish at the time 10866a10 landed because we didn't have compiler level handling of colliding aliases. --- django/db/backends/base/features.py | 2 +- django/db/backends/oracle/features.py | 2 +- django/db/models/sql/compiler.py | 39 +++++++----- django/db/models/sql/query.py | 45 +++++--------- tests/aggregation/tests.py | 18 +++--- tests/aggregation_regress/models.py | 46 ++++++++++++++ tests/aggregation_regress/tests.py | 90 ++++++++++++++++++++++++++- tests/postgres_tests/test_array.py | 6 +- 8 files changed, 186 insertions(+), 62 deletions(-) diff --git a/django/db/backends/base/features.py b/django/db/backends/base/features.py index ef1fe883366..63b3ab77101 100644 --- a/django/db/backends/base/features.py +++ b/django/db/backends/base/features.py @@ -9,7 +9,7 @@ class BaseDatabaseFeatures: # Oracle can't group by LOB (large object) data types. allows_group_by_lob = True allows_group_by_selected_pks = False - allows_group_by_refs = True + allows_group_by_select_index = True empty_fetchmany_value = [] update_can_self_select = True diff --git a/django/db/backends/oracle/features.py b/django/db/backends/oracle/features.py index df36245af99..3d77a615c88 100644 --- a/django/db/backends/oracle/features.py +++ b/django/db/backends/oracle/features.py @@ -8,7 +8,7 @@ class DatabaseFeatures(BaseDatabaseFeatures): # Oracle crashes with "ORA-00932: inconsistent datatypes: expected - got # BLOB" when grouping by LOBs (#24096). allows_group_by_lob = False - allows_group_by_refs = False + allows_group_by_select_index = False interprets_empty_strings_as_nulls = True has_select_for_update = True has_select_for_update_nowait = True diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py index 41c5c9044f8..97ecd58bdd0 100644 --- a/django/db/models/sql/compiler.py +++ b/django/db/models/sql/compiler.py @@ -123,7 +123,7 @@ class SQLCompiler: if self.query.group_by is None: return [] expressions = [] - allows_group_by_refs = self.connection.features.allows_group_by_refs + group_by_refs = set() if self.query.group_by is not True: # If the group by is set to a list (by .values() call most likely), # then we need to add everything in it to the GROUP BY clause. @@ -133,21 +133,23 @@ class SQLCompiler: for expr in self.query.group_by: if not hasattr(expr, "as_sql"): expr = self.query.resolve_ref(expr) - if not allows_group_by_refs and isinstance(expr, Ref): - expr = expr.source - expressions.append(expr) + if isinstance(expr, Ref): + if expr.refs not in group_by_refs: + group_by_refs.add(expr.refs) + expressions.append(expr.source) + else: + expressions.append(expr) # Note that even if the group_by is set, it is only the minimal # set to group by. So, we need to add cols in select, order_by, and # having into the select in any case. - ref_sources = {expr.source for expr in expressions if isinstance(expr, Ref)} - aliased_exprs = {} - for expr, _, alias in select: - # Skip members of the select clause that are already included - # by reference. - if expr in ref_sources: - continue + selected_expr_indices = {} + for index, (expr, _, alias) in enumerate(select, start=1): if alias: - aliased_exprs[expr] = alias + selected_expr_indices[expr] = index + # Skip members of the select clause that are already explicitly + # grouped against. + if alias in group_by_refs: + continue expressions.extend(expr.get_group_by_cols()) if not self._meta_ordering: for expr, (sql, params, is_ref) in order_by: @@ -162,14 +164,21 @@ class SQLCompiler: seen = set() expressions = self.collapse_group_by(expressions, having_group_by) + allows_group_by_select_index = ( + self.connection.features.allows_group_by_select_index + ) for expr in expressions: - if allows_group_by_refs and (alias := aliased_exprs.get(expr)): - expr = Ref(alias, expr) try: sql, params = self.compile(expr) except (EmptyResultSet, FullResultSet): continue - sql, params = expr.select_format(self, sql, params) + if ( + allows_group_by_select_index + and (select_index := selected_expr_indices.get(expr)) is not None + ): + sql, params = str(select_index), () + else: + sql, params = expr.select_format(self, sql, params) params_hash = make_hashable(params) if (sql, params_hash) not in seen: result.append((sql, params)) diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py index 9e70aadca11..3fe00002bd5 100644 --- a/django/db/models/sql/query.py +++ b/django/db/models/sql/query.py @@ -2211,40 +2211,25 @@ class Query(BaseExpression): primary key, and the query would be equivalent, the optimization will be made automatically. """ - if allow_aliases: - # Column names from JOINs to check collisions with aliases. - column_names = set() - seen_models = set() - for join in list(self.alias_map.values())[1:]: # Skip base table. - model = join.join_field.related_model - if model not in seen_models: - column_names.update( - {field.column for field in model._meta.local_concrete_fields} - ) - seen_models.add(model) - if self.values_select: - # If grouping by aliases is allowed assign selected values - # aliases by moving them to annotations. - group_by_annotations = {} - values_select = {} - for alias, expr in zip(self.values_select, self.select): - if isinstance(expr, Col): - values_select[alias] = expr - else: - group_by_annotations[alias] = expr - self.annotations = {**group_by_annotations, **self.annotations} - self.append_annotation_mask(group_by_annotations) - self.select = tuple(values_select.values()) - self.values_select = tuple(values_select) + if allow_aliases and self.values_select: + # If grouping by aliases is allowed assign selected value aliases + # by moving them to annotations. + group_by_annotations = {} + values_select = {} + for alias, expr in zip(self.values_select, self.select): + if isinstance(expr, Col): + values_select[alias] = expr + else: + group_by_annotations[alias] = expr + self.annotations = {**group_by_annotations, **self.annotations} + self.append_annotation_mask(group_by_annotations) + self.select = tuple(values_select.values()) + self.values_select = tuple(values_select) group_by = list(self.select) for alias, annotation in self.annotation_select.items(): if not (group_by_cols := annotation.get_group_by_cols()): continue - if ( - allow_aliases - and alias not in column_names - and not annotation.contains_aggregate - ): + if allow_aliases and not annotation.contains_aggregate: group_by.append(Ref(alias, annotation)) else: group_by.extend(group_by_cols) diff --git a/tests/aggregation/tests.py b/tests/aggregation/tests.py index 67a57c162c4..54e1e6f13a8 100644 --- a/tests/aggregation/tests.py +++ b/tests/aggregation/tests.py @@ -1500,27 +1500,29 @@ class AggregateTestCase(TestCase): ], ) + @skipUnlessDBFeature("supports_subqueries_in_group_by") def test_aggregation_subquery_annotation_values_collision(self): books_rating_qs = Book.objects.filter( - publisher=OuterRef("pk"), - price=Decimal("29.69"), + pk=OuterRef("book"), ).values("rating") publisher_qs = ( Publisher.objects.filter( book__contact__age__gt=20, - name=self.p1.name, ) .annotate( rating=Subquery(books_rating_qs), - contacts_count=Count("book__contact"), ) .values("rating") - .annotate(total_count=Count("rating")) + .annotate(total_count=Count("*")) + .order_by("rating") ) self.assertEqual( list(publisher_qs), [ - {"rating": 4.0, "total_count": 2}, + {"rating": 3.0, "total_count": 1}, + {"rating": 4.0, "total_count": 3}, + {"rating": 4.5, "total_count": 1}, + {"rating": 5.0, "total_count": 1}, ], ) @@ -1642,9 +1644,7 @@ class AggregateTestCase(TestCase): ) with self.assertNumQueries(1) as ctx: self.assertSequenceEqual(books_qs, [book]) - # Outerquery SELECT, annotation SELECT, and WHERE SELECT but GROUP BY - # selected alias, if allowed. - if connection.features.allows_group_by_refs: + if connection.features.allows_group_by_select_index: self.assertEqual(ctx[0]["sql"].count("SELECT"), 3) @skipUnlessDBFeature("supports_subqueries_in_group_by") diff --git a/tests/aggregation_regress/models.py b/tests/aggregation_regress/models.py index 2687cb4840d..edf0e89a9da 100644 --- a/tests/aggregation_regress/models.py +++ b/tests/aggregation_regress/models.py @@ -89,3 +89,49 @@ class SelfRefFK(models.Model): parent = models.ForeignKey( "self", models.SET_NULL, null=True, blank=True, related_name="children" ) + + +class AuthorProxy(Author): + class Meta: + proxy = True + + +class Recipe(models.Model): + name = models.CharField(max_length=20) + author = models.ForeignKey(AuthorProxy, models.CASCADE) + tasters = models.ManyToManyField(AuthorProxy, related_name="recipes") + + +class RecipeProxy(Recipe): + class Meta: + proxy = True + + +class AuthorUnmanaged(models.Model): + age = models.IntegerField() + + class Meta: + db_table = Author._meta.db_table + managed = False + + +class RecipeTasterUnmanaged(models.Model): + recipe = models.ForeignKey("RecipeUnmanaged", models.CASCADE) + author = models.ForeignKey( + AuthorUnmanaged, models.CASCADE, db_column="authorproxy_id" + ) + + class Meta: + managed = False + db_table = Recipe.tasters.through._meta.db_table + + +class RecipeUnmanaged(models.Model): + author = models.ForeignKey(AuthorUnmanaged, models.CASCADE) + tasters = models.ManyToManyField( + AuthorUnmanaged, through=RecipeTasterUnmanaged, related_name="+" + ) + + class Meta: + managed = False + db_table = Recipe._meta.db_table diff --git a/tests/aggregation_regress/tests.py b/tests/aggregation_regress/tests.py index 444a55276d3..bfb3919b237 100644 --- a/tests/aggregation_regress/tests.py +++ b/tests/aggregation_regress/tests.py @@ -11,6 +11,7 @@ from django.db.models import ( Aggregate, Avg, Case, + CharField, Count, DecimalField, F, @@ -23,12 +24,15 @@ from django.db.models import ( Variance, When, ) +from django.db.models.functions import Cast, Concat from django.test import TestCase, skipUnlessDBFeature from django.test.utils import Approximate from .models import ( Alfa, Author, + AuthorProxy, + AuthorUnmanaged, Book, Bravo, Charlie, @@ -37,6 +41,8 @@ from .models import ( HardbackBook, ItemTag, Publisher, + RecipeProxy, + RecipeUnmanaged, SelfRefFK, Store, WithManualPK, @@ -188,9 +194,8 @@ class AggregationTests(TestCase): } ], ) - if connection.features.allows_group_by_refs: - alias = connection.ops.quote_name("discount_price") - self.assertIn(f"GROUP BY {alias}", ctx[0]["sql"]) + if connection.features.allows_group_by_select_index: + self.assertIn("GROUP BY 1", ctx[0]["sql"]) def test_aggregates_in_where_clause(self): """ @@ -1827,6 +1832,85 @@ class AggregationTests(TestCase): ) self.assertEqual(set(books), {self.b1, self.b4}) + def test_aggregate_and_annotate_duplicate_columns(self): + books = ( + Book.objects.values("isbn") + .annotate( + name=F("publisher__name"), + num_authors=Count("authors"), + ) + .order_by("isbn") + ) + self.assertSequenceEqual( + books, + [ + {"isbn": "013235613", "name": "Prentice Hall", "num_authors": 3}, + {"isbn": "013790395", "name": "Prentice Hall", "num_authors": 2}, + {"isbn": "067232959", "name": "Sams", "num_authors": 1}, + {"isbn": "155860191", "name": "Morgan Kaufmann", "num_authors": 1}, + {"isbn": "159059725", "name": "Apress", "num_authors": 2}, + {"isbn": "159059996", "name": "Apress", "num_authors": 1}, + ], + ) + + def test_aggregate_and_annotate_duplicate_columns_proxy(self): + author = AuthorProxy.objects.latest("pk") + recipe = RecipeProxy.objects.create(name="Dahl", author=author) + recipe.tasters.add(author) + recipes = RecipeProxy.objects.values("pk").annotate( + name=F("author__name"), + num_tasters=Count("tasters"), + ) + self.assertSequenceEqual( + recipes, + [{"pk": recipe.pk, "name": "Stuart Russell", "num_tasters": 1}], + ) + + def test_aggregate_and_annotate_duplicate_columns_unmanaged(self): + author = AuthorProxy.objects.latest("pk") + recipe = RecipeProxy.objects.create(name="Dahl", author=author) + recipe.tasters.add(author) + recipes = RecipeUnmanaged.objects.values("pk").annotate( + name=F("author__age"), + num_tasters=Count("tasters"), + ) + self.assertSequenceEqual( + recipes, + [{"pk": recipe.pk, "name": 46, "num_tasters": 1}], + ) + + def test_aggregate_group_by_unseen_columns_unmanaged(self): + author = AuthorProxy.objects.latest("pk") + shadow_author = AuthorProxy.objects.create(name=author.name, age=author.age - 2) + recipe = RecipeProxy.objects.create(name="Dahl", author=author) + shadow_recipe = RecipeProxy.objects.create( + name="Shadow Dahl", + author=shadow_author, + ) + recipe.tasters.add(shadow_author) + shadow_recipe.tasters.add(author) + # This selects how many tasters each author had according to a + # calculated field "name". The table has a column "name" that Django is + # unaware of, and is equal for the two authors. The grouping column + # cannot be referenced by its name ("name"), as it'd return one result + # which is incorrect. + author_recipes = ( + AuthorUnmanaged.objects.annotate( + name=Concat( + Value("Writer at "), + Cast(F("age"), output_field=CharField()), + ) + ) + .values("name") # Field used for grouping. + .annotate(num_recipes=Count("recipeunmanaged")) + .filter(num_recipes__gt=0) + .values("num_recipes") # Drop grouping column. + ) + self.assertSequenceEqual( + author_recipes, + [{"num_recipes": 1}, {"num_recipes": 1}], + ) + class JoinPromotionTests(TestCase): def test_ticket_21150(self): diff --git a/tests/postgres_tests/test_array.py b/tests/postgres_tests/test_array.py index 4808f886892..feebbd6f592 100644 --- a/tests/postgres_tests/test_array.py +++ b/tests/postgres_tests/test_array.py @@ -449,8 +449,8 @@ class TestQuerying(PostgreSQLTestCase): expected, ) - @skipUnlessDBFeature("allows_group_by_refs") - def test_group_by_order_by_aliases(self): + @skipUnlessDBFeature("allows_group_by_select_index") + def test_group_by_order_by_select_index(self): with self.assertNumQueries(1) as ctx: self.assertSequenceEqual( NullableIntegerArrayModel.objects.filter( @@ -467,7 +467,7 @@ class TestQuerying(PostgreSQLTestCase): ) alias = connection.ops.quote_name("field__0") sql = ctx[0]["sql"] - self.assertIn(f"GROUP BY {alias}", sql) + self.assertIn("GROUP BY 1", sql) self.assertIn(f"ORDER BY {alias}", sql) def test_index(self):