Fixed #29547 -- Added support for partial indexes.

Thanks to Ian Foote, Mariusz Felisiak, Simon Charettes, and
Markus Holtermann for comments and feedback.
This commit is contained in:
Mads Jensen 2018-09-13 09:34:02 +02:00 committed by Tim Graham
parent 9625d13f7b
commit a906c98982
17 changed files with 320 additions and 9 deletions

View File

@ -38,6 +38,7 @@ class PostGISSchemaEditor(DatabaseSchemaEditor):
"using": "USING %s" % self.geom_index_type,
"columns": field_column,
"extra": '',
"condition": '',
}
def _alter_column_type_sql(self, table, old_field, new_field, new_type):

View File

@ -276,6 +276,9 @@ class BaseDatabaseFeatures:
# in UPDATE statements to ensure the expression has the correct type?
requires_casted_case_in_updates = False
# Does the backend support partial indexes (CREATE INDEX ... WHERE ...)?
supports_partial_indexes = True
def __init__(self, connection):
self.connection = connection

View File

@ -75,7 +75,7 @@ class BaseDatabaseSchemaEditor:
sql_create_inline_fk = None
sql_delete_fk = "ALTER TABLE %(table)s DROP CONSTRAINT %(name)s"
sql_create_index = "CREATE INDEX %(name)s ON %(table)s (%(columns)s)%(extra)s"
sql_create_index = "CREATE INDEX %(name)s ON %(table)s (%(columns)s)%(extra)s%(condition)s"
sql_delete_index = "DROP INDEX %(name)s"
sql_create_pk = "ALTER TABLE %(table)s ADD CONSTRAINT %(name)s PRIMARY KEY (%(columns)s)"
@ -326,7 +326,7 @@ class BaseDatabaseSchemaEditor:
def add_index(self, model, index):
"""Add an index on a model."""
self.execute(index.create_sql(model, self))
self.execute(index.create_sql(model, self), params=None)
def remove_index(self, model, index):
"""Remove an index from a model."""
@ -905,7 +905,8 @@ class BaseDatabaseSchemaEditor:
return ''
def _create_index_sql(self, model, fields, *, name=None, suffix='', using='',
db_tablespace=None, col_suffixes=(), sql=None, opclasses=()):
db_tablespace=None, col_suffixes=(), sql=None, opclasses=(),
condition=''):
"""
Return the SQL statement to create the index for one or several fields.
`sql` can be specified if the syntax differs from the standard (GIS
@ -929,6 +930,7 @@ class BaseDatabaseSchemaEditor:
using=using,
columns=self._index_columns(table, columns, col_suffixes, opclasses),
extra=tablespace_sql,
condition=condition,
)
def _index_columns(self, table, columns, col_suffixes, opclasses):

View File

@ -51,6 +51,8 @@ class DatabaseFeatures(BaseDatabaseFeatures):
db_functions_convert_bytes_to_str = True
# Alias MySQL's TRADITIONAL to TEXT for consistency with other backends.
supported_explain_formats = {'JSON', 'TEXT', 'TRADITIONAL'}
# Neither MySQL nor MariaDB support partial indexes.
supports_partial_indexes = False
@cached_property
def _mysql_storage_engine(self):

View File

@ -24,6 +24,8 @@ class DatabaseSchemaEditor(BaseDatabaseSchemaEditor):
sql_create_pk = "ALTER TABLE %(table)s ADD CONSTRAINT %(name)s PRIMARY KEY (%(columns)s)"
sql_delete_pk = "ALTER TABLE %(table)s DROP PRIMARY KEY"
sql_create_index = 'CREATE INDEX %(name)s ON %(table)s (%(columns)s)%(extra)s'
def quote_value(self, value):
self.connection.ensure_connection()
quoted = self.connection.connection.escape(value, self.connection.connection.encoders)

View File

@ -55,6 +55,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
supports_over_clause = True
supports_ignore_conflicts = False
max_query_params = 2**16 - 1
supports_partial_indexes = False
@cached_property
def has_fetch_offset_support(self):

View File

@ -16,6 +16,7 @@ class DatabaseSchemaEditor(BaseDatabaseSchemaEditor):
sql_alter_column_no_default = "MODIFY %(column)s DEFAULT NULL"
sql_delete_column = "ALTER TABLE %(table)s DROP COLUMN %(column)s"
sql_delete_table = "DROP TABLE %(table)s CASCADE CONSTRAINTS"
sql_create_index = "CREATE INDEX %(name)s ON %(table)s (%(columns)s)%(extra)s"
def quote_value(self, value):
if isinstance(value, (datetime.date, datetime.time, datetime.datetime)):

View File

@ -12,7 +12,7 @@ class DatabaseSchemaEditor(BaseDatabaseSchemaEditor):
sql_delete_sequence = "DROP SEQUENCE IF EXISTS %(sequence)s CASCADE"
sql_set_sequence_max = "SELECT setval('%(sequence)s', MAX(%(column)s)) FROM %(table)s"
sql_create_index = "CREATE INDEX %(name)s ON %(table)s%(using)s (%(columns)s)%(extra)s"
sql_create_index = "CREATE INDEX %(name)s ON %(table)s%(using)s (%(columns)s)%(extra)s%(condition)s"
sql_delete_index = "DROP INDEX IF EXISTS %(name)s"
# Setting the constraint to IMMEDIATE runs any deferred checks to allow

View File

@ -34,6 +34,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
supports_cast_with_precision = False
time_cast_precision = 3
can_release_savepoints = True
supports_partial_indexes = Database.version_info >= (3, 8, 0)
# Is "ALTER TABLE ... RENAME COLUMN" supported?
can_alter_table_rename_column = Database.sqlite_version_info >= (3, 25, 0)

View File

@ -1,4 +1,6 @@
from django.db.backends.utils import names_digest, split_identifier
from django.db.models.query_utils import Q
from django.db.models.sql import Query
__all__ = ['Index']
@ -9,9 +11,13 @@ class Index:
# cross-database compatibility with Oracle)
max_name_length = 30
def __init__(self, *, fields=(), name=None, db_tablespace=None, opclasses=()):
def __init__(self, *, fields=(), name=None, db_tablespace=None, opclasses=(), condition=None):
if opclasses and not name:
raise ValueError('An index must be named to use opclasses.')
if not isinstance(condition, (type(None), Q)):
raise ValueError('Index.condition must be a Q instance.')
if condition and not name:
raise ValueError('An index must be named to use condition.')
if not isinstance(fields, (list, tuple)):
raise ValueError('Index.fields must be a list or tuple.')
if not isinstance(opclasses, (list, tuple)):
@ -35,6 +41,7 @@ class Index:
raise ValueError(errors)
self.db_tablespace = db_tablespace
self.opclasses = opclasses
self.condition = condition
def check_name(self):
errors = []
@ -48,12 +55,25 @@ class Index:
self.name = 'D%s' % self.name[1:]
return errors
def _get_condition_sql(self, model, schema_editor):
if self.condition is None:
return ''
query = Query(model=model)
query.add_q(self.condition)
compiler = query.get_compiler(connection=schema_editor.connection)
# Only the WhereNode is of interest for the partial index.
sql, params = query.where.as_sql(compiler=compiler, connection=schema_editor.connection)
# BaseDatabaseSchemaEditor does the same map on the params, but since
# it's handled outside of that class, the work is done here.
return ' WHERE ' + (sql % tuple(map(schema_editor.quote_value, params)))
def create_sql(self, model, schema_editor, using=''):
fields = [model._meta.get_field(field_name) for field_name, _ in self.fields_orders]
col_suffixes = [order[1] for order in self.fields_orders]
condition = self._get_condition_sql(model, schema_editor)
return schema_editor._create_index_sql(
model, fields, name=self.name, using=using, db_tablespace=self.db_tablespace,
col_suffixes=col_suffixes, opclasses=self.opclasses,
col_suffixes=col_suffixes, opclasses=self.opclasses, condition=condition,
)
def remove_sql(self, model, schema_editor):
@ -71,6 +91,8 @@ class Index:
kwargs['db_tablespace'] = self.db_tablespace
if self.opclasses:
kwargs['opclasses'] = self.opclasses
if self.condition:
kwargs['condition'] = self.condition
return (path, (), kwargs)
def clone(self):
@ -107,7 +129,10 @@ class Index:
self.check_name()
def __repr__(self):
return "<%s: fields='%s'>" % (self.__class__.__name__, ', '.join(self.fields))
return "<%s: fields='%s'%s>" % (
self.__class__.__name__, ', '.join(self.fields),
'' if self.condition is None else ', condition=%s' % self.condition,
)
def __eq__(self, other):
return (self.__class__ == other.__class__) and (self.deconstruct() == other.deconstruct())

View File

@ -21,7 +21,7 @@ options`_.
``Index`` options
=================
.. class:: Index(fields=(), name=None, db_tablespace=None, opclasses=())
.. class:: Index(fields=(), name=None, db_tablespace=None, opclasses=(), condition=None)
Creates an index (B-Tree) in the database.
@ -92,3 +92,44 @@ opclasses=['jsonb_path_ops'])`` creates a gin index on ``jsonfield`` using
``opclasses`` are ignored for databases besides PostgreSQL.
:attr:`Index.name` is required when using ``opclasses``.
``condition``
-------------
.. attribute:: Index.condition
.. versionadded:: 2.2
If the table is very large and your queries mostly target a subset of rows,
it may be useful to restrict an index to that subset. Specify a condition as a
:class:`~django.db.models.Q`. For example, ``condition=Q(pages__gt=400)``
indexes records with more than 400 pages.
:attr:`Index.name` is required when using ``condition``.
.. admonition:: Restrictions on PostgreSQL
PostgreSQL requires functions referenced in the condition to be be marked as
IMMUTABLE. Django doesn't validate this but PostgreSQL will error. This
means that functions such as :ref:`date-functions` and
:class:`~django.db.models.functions.Concat` aren't accepted. If you store
dates in :class:`~django.db.models.DateTimeField`, comparison to
:class:`~datetime.datetime` objects may require the ``tzinfo`` argument
to be provided because otherwise the comparison could result in a mutable
function due to the casting Django does for :ref:`lookups <field-lookups>`.
.. admonition:: Restrictions on SQLite
SQLite `imposes restrictions <https://www.sqlite.org/partialindex.html>`_
on how a partial index can be constructed.
.. admonition:: Oracle
Oracle does not support partial indexes. Instead, partial indexes can be
emulated using functional indexes. Use a :doc:`migration
</topics/migrations>` to add the index using :class:`.RunSQL`.
.. admonition:: MySQL and MariaDB
The ``condition`` argument is ignored with MySQL and MariaDB as neither
supports conditional indexes.

View File

@ -201,6 +201,8 @@ Models
* Added support for PostgreSQL operator classes (:attr:`.Index.opclasses`).
* Added support for partial indexes (:attr:`.Index.condition`).
* Added many :ref:`math database functions <math-functions>`.
* Setting the new ``ignore_conflicts`` parameter of
@ -288,6 +290,9 @@ Database backend API
* ``DatabaseFeatures.uses_savepoints`` now defaults to ``True``.
* Third party database backends must implement support for partial indexes or
set ``DatabaseFeatures.supports_partial_indexes`` to ``False``.
:mod:`django.contrib.gis`
-------------------------

View File

@ -27,6 +27,7 @@ class ArticleTranslation(models.Model):
class Article(models.Model):
headline = models.CharField(max_length=100)
pub_date = models.DateTimeField()
published = models.BooleanField(default=False)
# Add virtual relation to the ArticleTranslation model.
translation = CurrentTranslation(ArticleTranslation, models.CASCADE, ['id'], ['article'])

View File

@ -1,10 +1,15 @@
import datetime
from unittest import skipIf, skipUnless
from django.db import connection
from django.db.models import Index
from django.db.models.deletion import CASCADE
from django.db.models.fields.related import ForeignKey
from django.test import TestCase, TransactionTestCase
from django.db.models.query_utils import Q
from django.test import (
TestCase, TransactionTestCase, skipIfDBFeature, skipUnlessDBFeature,
)
from django.utils import timezone
from .models import (
Article, ArticleTranslation, IndexedArticle2, IndexTogetherSingleList,
@ -85,6 +90,28 @@ class SchemaIndexesNotPostgreSQLTests(TransactionTestCase):
editor.add_index(IndexedArticle2, index)
# The `condition` parameter is ignored by databases that don't support partial
# indexes.
@skipIfDBFeature('supports_partial_indexes')
class PartialIndexConditionIgnoredTests(TransactionTestCase):
available_apps = ['indexes']
def test_condition_ignored(self):
index = Index(
name='test_condition_ignored',
fields=['published'],
condition=Q(published=True),
)
with connection.schema_editor() as editor:
# This would error if condition weren't ignored.
editor.add_index(Article, index)
self.assertNotIn(
'WHERE %s.%s' % (editor.quote_name(Article._meta.db_table), 'published'),
str(index.create_sql(Article, editor))
)
@skipUnless(connection.vendor == 'postgresql', 'PostgreSQL tests')
class SchemaIndexesPostgreSQLTests(TransactionTestCase):
available_apps = ['indexes']
@ -139,6 +166,35 @@ class SchemaIndexesPostgreSQLTests(TransactionTestCase):
)
self.assertCountEqual(cursor.fetchall(), expected_ops_classes)
def test_ops_class_partial(self):
index = Index(
name='test_ops_class_partial',
fields=['body'],
opclasses=['text_pattern_ops'],
condition=Q(headline__contains='China'),
)
with connection.schema_editor() as editor:
editor.add_index(IndexedArticle2, index)
with editor.connection.cursor() as cursor:
cursor.execute(self.get_opclass_query % 'test_ops_class_partial')
self.assertCountEqual(cursor.fetchall(), [('text_pattern_ops', 'test_ops_class_partial')])
def test_ops_class_partial_tablespace(self):
indexname = 'test_ops_class_tblspace'
index = Index(
name=indexname,
fields=['body'],
opclasses=['text_pattern_ops'],
condition=Q(headline__contains='China'),
db_tablespace='pg_default',
)
with connection.schema_editor() as editor:
editor.add_index(IndexedArticle2, index)
self.assertIn('TABLESPACE "pg_default" ', str(index.create_sql(IndexedArticle2, editor)))
with editor.connection.cursor() as cursor:
cursor.execute(self.get_opclass_query % indexname)
self.assertCountEqual(cursor.fetchall(), [('text_pattern_ops', indexname)])
@skipUnless(connection.vendor == 'mysql', 'MySQL tests')
class SchemaIndexesMySQLTests(TransactionTestCase):
@ -178,3 +234,108 @@ class SchemaIndexesMySQLTests(TransactionTestCase):
if field_created:
with connection.schema_editor() as editor:
editor.remove_field(ArticleTranslation, new_field)
@skipUnlessDBFeature('supports_partial_indexes')
class PartialIndexTests(TestCase):
# Schema editor is used to create the index to test that it works.
def test_partial_index(self):
with connection.schema_editor() as editor:
index = Index(
name='recent_article_idx',
fields=['pub_date'],
condition=Q(
pub_date__gt=datetime.datetime(
year=2015, month=1, day=1,
# PostgreSQL would otherwise complain about the lookup
# being converted to a mutable function (by removing
# the timezone in the cast) which is forbidden.
tzinfo=timezone.get_current_timezone(),
),
)
)
self.assertIn(
'WHERE %s.%s' % (editor.quote_name(Article._meta.db_table), editor.quote_name("pub_date")),
str(index.create_sql(Article, schema_editor=editor))
)
editor.add_index(index=index, model=Article)
self.assertIn(index.name, connection.introspection.get_constraints(
cursor=connection.cursor(), table_name=Article._meta.db_table,
))
def test_integer_restriction_partial(self):
with connection.schema_editor() as editor:
index = Index(
name='recent_article_idx',
fields=['id'],
condition=Q(pk__gt=1),
)
self.assertIn(
'WHERE %s.%s' % (editor.quote_name(Article._meta.db_table), editor.quote_name('id')),
str(index.create_sql(Article, schema_editor=editor))
)
editor.add_index(index=index, model=Article)
self.assertIn(index.name, connection.introspection.get_constraints(
cursor=connection.cursor(), table_name=Article._meta.db_table,
))
def test_boolean_restriction_partial(self):
with connection.schema_editor() as editor:
index = Index(
name='published_index',
fields=['published'],
condition=Q(published=True),
)
self.assertIn(
'WHERE %s.%s' % (editor.quote_name(Article._meta.db_table), editor.quote_name('published')),
str(index.create_sql(Article, schema_editor=editor))
)
editor.add_index(index=index, model=Article)
self.assertIn(index.name, connection.introspection.get_constraints(
cursor=connection.cursor(), table_name=Article._meta.db_table,
))
def test_multiple_conditions(self):
with connection.schema_editor() as editor:
index = Index(
name='recent_article_idx',
fields=['pub_date', 'headline'],
condition=(
Q(pub_date__gt=datetime.datetime(
year=2015,
month=1,
day=1,
tzinfo=timezone.get_current_timezone(),
)) & Q(headline__contains='China')
),
)
sql = str(index.create_sql(Article, schema_editor=editor))
where = sql.find('WHERE')
self.assertIn(
'WHERE (%s.%s' % (editor.quote_name(Article._meta.db_table), editor.quote_name("pub_date")),
sql
)
# Because each backend has different syntax for the operators,
# check ONLY the occurrence of headline in the SQL.
self.assertGreater(sql.rfind('headline'), where)
editor.add_index(index=index, model=Article)
self.assertIn(index.name, connection.introspection.get_constraints(
cursor=connection.cursor(), table_name=Article._meta.db_table,
))
def test_is_null_condition(self):
with connection.schema_editor() as editor:
index = Index(
name='recent_article_idx',
fields=['pub_date'],
condition=Q(pub_date__isnull=False),
)
self.assertIn(
'WHERE %s.%s IS NOT NULL' % (editor.quote_name(Article._meta.db_table), editor.quote_name("pub_date")),
str(index.create_sql(Article, schema_editor=editor))
)
editor.add_index(index=index, model=Article)
self.assertIn(index.name, connection.introspection.get_constraints(
cursor=connection.cursor(), table_name=Article._meta.db_table,
))

View File

@ -1,5 +1,6 @@
from django.conf import settings
from django.db import connection, models
from django.db.models.query_utils import Q
from django.test import SimpleTestCase, skipUnlessDBFeature
from django.test.utils import isolate_apps
@ -14,8 +15,10 @@ class IndexesTests(SimpleTestCase):
def test_repr(self):
index = models.Index(fields=['title'])
multi_col_index = models.Index(fields=['title', 'author'])
partial_index = models.Index(fields=['title'], name='long_books_idx', condition=Q(pages__gt=400))
self.assertEqual(repr(index), "<Index: fields='title'>")
self.assertEqual(repr(multi_col_index), "<Index: fields='title, author'>")
self.assertEqual(repr(partial_index), "<Index: fields='title', condition=(AND: ('pages__gt', 400))>")
def test_eq(self):
index = models.Index(fields=['title'])
@ -52,6 +55,14 @@ class IndexesTests(SimpleTestCase):
with self.assertRaisesMessage(ValueError, msg):
models.Index(name='test_opclass', fields=['field', 'other'], opclasses=['jsonb_path_ops'])
def test_condition_requires_index_name(self):
with self.assertRaisesMessage(ValueError, 'An index must be named to use condition.'):
models.Index(condition=Q(pages__gt=400))
def test_condition_must_be_q(self):
with self.assertRaisesMessage(ValueError, 'Index.condition must be a Q instance.'):
models.Index(condition='invalid', name='long_book_idx')
def test_max_name_length(self):
msg = 'Index names cannot be longer than 30 characters.'
with self.assertRaisesMessage(ValueError, msg):
@ -110,6 +121,25 @@ class IndexesTests(SimpleTestCase):
{'fields': ['title'], 'name': 'model_index_title_196f42_idx', 'db_tablespace': 'idx_tbls'}
)
def test_deconstruct_with_condition(self):
index = models.Index(
name='big_book_index',
fields=['title'],
condition=Q(pages__gt=400),
)
index.set_name_with_model(Book)
path, args, kwargs = index.deconstruct()
self.assertEqual(path, 'django.db.models.Index')
self.assertEqual(args, ())
self.assertEqual(
kwargs,
{
'fields': ['title'],
'name': 'model_index_title_196f42_idx',
'condition': Q(pages__gt=400),
}
)
def test_clone(self):
index = models.Index(fields=['title'])
new_index = index.clone()

View File

@ -4,8 +4,12 @@ from django.contrib.postgres.indexes import (
BrinIndex, BTreeIndex, GinIndex, GistIndex, HashIndex, SpGistIndex,
)
from django.db import connection
from django.db.models import CharField
from django.db.models.functions import Length
from django.db.models.query_utils import Q
from django.db.utils import NotSupportedError
from django.test import skipUnlessDBFeature
from django.test.utils import register_lookup
from . import PostgreSQLTestCase
from .models import CharFieldModel, IntegerArrayModel
@ -172,6 +176,36 @@ class SchemaTests(PostgreSQLTestCase):
editor.remove_index(IntegerArrayModel, index)
self.assertNotIn(index_name, self.get_constraints(IntegerArrayModel._meta.db_table))
def test_partial_gin_index(self):
with register_lookup(CharField, Length):
index_name = 'char_field_gin_partial_idx'
index = GinIndex(fields=['field'], name=index_name, condition=Q(field__length=40))
with connection.schema_editor() as editor:
editor.add_index(CharFieldModel, index)
constraints = self.get_constraints(CharFieldModel._meta.db_table)
self.assertEqual(constraints[index_name]['type'], 'gin')
with connection.schema_editor() as editor:
editor.remove_index(CharFieldModel, index)
self.assertNotIn(index_name, self.get_constraints(CharFieldModel._meta.db_table))
def test_partial_gin_index_with_tablespace(self):
with register_lookup(CharField, Length):
index_name = 'char_field_gin_partial_idx'
index = GinIndex(
fields=['field'],
name=index_name,
condition=Q(field__length=40),
db_tablespace='pg_default',
)
with connection.schema_editor() as editor:
editor.add_index(CharFieldModel, index)
self.assertIn('TABLESPACE "pg_default" ', str(index.create_sql(CharFieldModel, editor)))
constraints = self.get_constraints(CharFieldModel._meta.db_table)
self.assertEqual(constraints[index_name]['type'], 'gin')
with connection.schema_editor() as editor:
editor.remove_index(CharFieldModel, index)
self.assertNotIn(index_name, self.get_constraints(CharFieldModel._meta.db_table))
@skipUnlessDBFeature('has_gin_pending_list_limit')
def test_gin_parameters(self):
index_name = 'integer_array_gin_params'

View File

@ -2135,6 +2135,7 @@ class SchemaTests(TransactionTestCase):
"using": "",
"columns": editor.quote_name(column),
"extra": "",
"condition": "",
}
)
if connection.features.uppercases_column_names: