diff --git a/django/contrib/postgres/apps.py b/django/contrib/postgres/apps.py index c172281cce..e7cdfd3866 100644 --- a/django/contrib/postgres/apps.py +++ b/django/contrib/postgres/apps.py @@ -3,7 +3,7 @@ from django.db.backends.signals import connection_created from django.db.models import CharField, TextField from django.utils.translation import ugettext_lazy as _ -from .lookups import Unaccent +from .lookups import SearchLookup, Unaccent from .signals import register_hstore_handler @@ -15,3 +15,5 @@ class PostgresConfig(AppConfig): connection_created.connect(register_hstore_handler) CharField.register_lookup(Unaccent) TextField.register_lookup(Unaccent) + CharField.register_lookup(SearchLookup) + TextField.register_lookup(SearchLookup) diff --git a/django/contrib/postgres/lookups.py b/django/contrib/postgres/lookups.py index cdecd8d6ba..1a71725678 100644 --- a/django/contrib/postgres/lookups.py +++ b/django/contrib/postgres/lookups.py @@ -1,5 +1,7 @@ from django.db.models import Lookup, Transform +from .search import SearchVector, SearchVectorExact, SearchVectorField + class PostgresSimpleLookup(Lookup): def as_sql(self, qn, connection): @@ -43,3 +45,13 @@ class Unaccent(Transform): bilateral = True lookup_name = 'unaccent' function = 'UNACCENT' + + +class SearchLookup(SearchVectorExact): + lookup_name = 'search' + + def process_lhs(self, qn, connection): + if not isinstance(self.lhs.output_field, SearchVectorField): + self.lhs = SearchVector(self.lhs) + lhs, lhs_params = super(SearchLookup, self).process_lhs(qn, connection) + return lhs, lhs_params diff --git a/django/contrib/postgres/search.py b/django/contrib/postgres/search.py new file mode 100644 index 0000000000..91358c62aa --- /dev/null +++ b/django/contrib/postgres/search.py @@ -0,0 +1,187 @@ +from django.db.models import Field, FloatField +from django.db.models.expressions import CombinedExpression, Func, Value +from django.db.models.functions import Coalesce +from django.db.models.lookups import Lookup + + +class SearchVectorExact(Lookup): + lookup_name = 'exact' + + def process_rhs(self, qn, connection): + if not hasattr(self.rhs, 'resolve_expression'): + config = getattr(self.lhs, 'config', None) + self.rhs = SearchQuery(self.rhs, config=config) + rhs, rhs_params = super(SearchVectorExact, self).process_rhs(qn, connection) + return rhs, rhs_params + + def as_sql(self, qn, connection): + lhs, lhs_params = self.process_lhs(qn, connection) + rhs, rhs_params = self.process_rhs(qn, connection) + params = lhs_params + rhs_params + return '%s @@ %s = true' % (lhs, rhs), params + + +class SearchVectorField(Field): + + def db_type(self, connection): + return 'tsvector' + + +class SearchQueryField(Field): + + def db_type(self, connection): + return 'tsquery' + + +class SearchVectorCombinable(object): + ADD = '||' + + def _combine(self, other, connector, reversed, node=None): + if not isinstance(other, SearchVectorCombinable) or not self.config == other.config: + raise TypeError('SearchVector can only be combined with other SearchVectors') + if reversed: + return CombinedSearchVector(other, connector, self, self.config) + return CombinedSearchVector(self, connector, other, self.config) + + +class SearchVector(SearchVectorCombinable, Func): + function = 'to_tsvector' + arg_joiner = " || ' ' || " + _output_field = SearchVectorField() + config = None + + def __init__(self, *expressions, **extra): + super(SearchVector, self).__init__(*expressions, **extra) + self.source_expressions = [ + Coalesce(expression, Value('')) for expression in self.source_expressions + ] + self.config = self.extra.get('config', self.config) + weight = self.extra.get('weight') + if weight is not None and not hasattr(weight, 'resolve_expression'): + weight = Value(weight) + self.weight = weight + + def resolve_expression(self, query=None, allow_joins=True, reuse=None, summarize=False, for_save=False): + resolved = super(SearchVector, self).resolve_expression(query, allow_joins, reuse, summarize, for_save) + if self.config: + if not hasattr(self.config, 'resolve_expression'): + resolved.config = Value(self.config).resolve_expression(query, allow_joins, reuse, summarize, for_save) + else: + resolved.config = self.config.resolve_expression(query, allow_joins, reuse, summarize, for_save) + return resolved + + def as_sql(self, compiler, connection, function=None, template=None): + config_params = [] + if template is None: + if self.config: + config_sql, config_params = compiler.compile(self.config) + template = "%(function)s({}::regconfig, %(expressions)s)".format(config_sql.replace('%', '%%')) + else: + template = self.template + sql, params = super(SearchVector, self).as_sql(compiler, connection, function=function, template=template) + extra_params = [] + if self.weight: + weight_sql, extra_params = compiler.compile(self.weight) + sql = 'setweight({}, {})'.format(sql, weight_sql) + return sql, config_params + params + extra_params + + +class CombinedSearchVector(SearchVectorCombinable, CombinedExpression): + def __init__(self, lhs, connector, rhs, config, output_field=None): + self.config = config + super(CombinedSearchVector, self).__init__(lhs, connector, rhs, output_field) + + +class SearchQuery(Value): + invert = False + _output_field = SearchQueryField() + config = None + + BITAND = '&&' + BITOR = '||' + + def __init__(self, value, output_field=None, **extra): + self.config = extra.pop('config', self.config) + self.invert = extra.pop('invert', self.invert) + super(SearchQuery, self).__init__(value, output_field=output_field) + + def resolve_expression(self, query=None, allow_joins=True, reuse=None, summarize=False, for_save=False): + resolved = super(SearchQuery, self).resolve_expression(query, allow_joins, reuse, summarize, for_save) + if self.config: + if not hasattr(self.config, 'resolve_expression'): + resolved.config = Value(self.config).resolve_expression(query, allow_joins, reuse, summarize, for_save) + else: + resolved.config = self.config.resolve_expression(query, allow_joins, reuse, summarize, for_save) + return resolved + + def as_sql(self, compiler, connection): + params = [self.value] + if self.config: + config_sql, config_params = compiler.compile(self.config) + template = 'plainto_tsquery({}::regconfig, %s)'.format(config_sql) + params = config_params + [self.value] + else: + template = 'plainto_tsquery(%s)' + if self.invert: + template = '!!({})'.format(template) + return template, params + + def _combine(self, other, connector, reversed, node=None): + combined = super(SearchQuery, self)._combine(other, connector, reversed, node) + combined.output_field = SearchQueryField() + return combined + + # On Combinable, these are not implemented to reduce confusion with Q. In + # this case we are actually (ab)using them to do logical combination so + # it's consistent with other usage in Django. + def __or__(self, other): + return self._combine(other, self.BITOR, False) + + def __ror__(self, other): + return self._combine(other, self.BITOR, True) + + def __and__(self, other): + return self._combine(other, self.BITAND, False) + + def __rand__(self, other): + return self._combine(other, self.BITAND, True) + + def __invert__(self): + extra = { + 'invert': not self.invert, + 'config': self.config, + } + return type(self)(self.value, **extra) + + +class SearchRank(Func): + function = 'ts_rank' + _output_field = FloatField() + + def __init__(self, vector, query, **extra): + if not hasattr(vector, 'resolve_expression'): + vector = SearchVector(vector) + if not hasattr(query, 'resolve_expression'): + query = SearchQuery(query) + weights = extra.get('weights') + if weights is not None and not hasattr(weights, 'resolve_expression'): + weights = Value(weights) + self.weights = weights + super(SearchRank, self).__init__(vector, query, **extra) + + def as_sql(self, compiler, connection, function=None, template=None): + extra_params = [] + extra_context = {} + if template is None and self.extra.get('weights'): + if self.weights: + template = '%(function)s(%(weights)s, %(expressions)s)' + weight_sql, extra_params = compiler.compile(self.weights) + extra_context['weights'] = weight_sql + sql, params = super(SearchRank, self).as_sql( + compiler, connection, + function=function, template=template, **extra_context + ) + return sql, extra_params + params + + +SearchVectorField.register_lookup(SearchVectorExact) diff --git a/django/db/backends/postgresql/operations.py b/django/db/backends/postgresql/operations.py index f0715a24e3..9b64615001 100644 --- a/django/db/backends/postgresql/operations.py +++ b/django/db/backends/postgresql/operations.py @@ -254,3 +254,9 @@ class DatabaseOperations(BaseDatabaseOperations): rhs_sql, rhs_params = rhs return "age(%s, %s)" % (lhs_sql, rhs_sql), lhs_params + rhs_params return super(DatabaseOperations, self).subtract_temporals(internal_type, lhs, rhs) + + def fulltext_search_sql(self, field_name): + raise NotImplementedError( + "Add 'django.contrib.postgres' to settings.INSTALLED_APPS to use " + "the search operator." + ) diff --git a/django/db/models/expressions.py b/django/db/models/expressions.py index 89d64197e6..2b883c53ac 100644 --- a/django/db/models/expressions.py +++ b/django/db/models/expressions.py @@ -125,9 +125,11 @@ class BaseExpression(object): # aggregate specific fields is_summary = False + _output_field = None def __init__(self, output_field=None): - self._output_field = output_field + if output_field is not None: + self._output_field = output_field def get_db_converters(self, connection): return [self.convert_value] + self.output_field.get_db_converters(connection) diff --git a/docs/index.txt b/docs/index.txt index 1a2bc2667e..58044f574e 100644 --- a/docs/index.txt +++ b/docs/index.txt @@ -105,6 +105,7 @@ manipulating the data of your Web application. Learn more about it below: :doc:`Raw SQL ` | :doc:`Transactions ` | :doc:`Aggregation ` | + :doc:`Search ` | :doc:`Custom fields ` | :doc:`Multiple databases ` | :doc:`Custom lookups ` | diff --git a/docs/ref/contrib/postgres/index.txt b/docs/ref/contrib/postgres/index.txt index fe5b3be2ab..d04ed14889 100644 --- a/docs/ref/contrib/postgres/index.txt +++ b/docs/ref/contrib/postgres/index.txt @@ -37,4 +37,5 @@ release. Some fields require higher versions. functions lookups operations + search validators diff --git a/docs/ref/contrib/postgres/search.txt b/docs/ref/contrib/postgres/search.txt new file mode 100644 index 0000000000..21f41ff534 --- /dev/null +++ b/docs/ref/contrib/postgres/search.txt @@ -0,0 +1,191 @@ +================ +Full text search +================ + +.. versionadded:: 1.10 + +The database functions in the ``django.contrib.postgres.search`` module ease +the use of PostgreSQL's `full text search engine +`_. + +For the examples in this document, we'll use the models defined in +:doc:`/topics/db/queries`. + +.. seealso:: + + For a high-level overview of searching, see the :doc:`topic documentation + `. + +.. currentmodule:: django.contrib.postgres.search + +The ``search`` lookup +===================== + +.. fieldlookup:: search + +The simplest way to use full text search is to search a single term against a +single column in the database. For example:: + + >>> Entry.objects.filter(body_text__search='Cheese') + [, ] + +This creates a ``to_tsvector`` in the database from the ``body_text`` field +and a ``plainto_tsquery`` from the search term ``'Potato'``, both using the +default database search configuration. The results are obtained by matching the +query and the vector. + +To use the ``search`` lookup, ``'django.contrib.postgres'`` must be in your +:setting:`INSTALLED_APPS`. + +``SearchVector`` +================ + +.. class:: SearchVector(\*expressions, config=None, weight=None) + +Searching against a single field is great but rather limiting. The ``Entry`` +instances we're searching belong to a ``Blog``, which has a ``tagline`` field. +To query against both fields, use a ``SearchVector``:: + + >>> from django.contrib.postgres.search import SearchVector + >>> Entry.objects.annotate( + ... search=SearchVector('body_text', 'blog__tagline'), + ... ).filter(search='Cheese') + [, ] + +The arguments to ``SearchVector`` can be any +:class:`~django.db.models.Expression` or the name of a field. Multiple +arguments will be concatenated together using a space so that the search +document includes them all. + +``SearchVector`` objects can be combined together, allowing you to reuse them. +For example:: + + >>> Entry.objects.annotate( + ... search=SearchVector('body_text') + SearchVector('blog__tagline'), + ... ).filter(search='Cheese') + [, ] + +See :ref:`postgresql-fts-search-configuration` and +:ref:`postgresql-fts-weighting-queries` for an explanation of the ``config`` +and ``weight`` parameters. + +``SearchQuery`` +=============== + +.. class:: SearchQuery(value, config=None) + +``SearchQuery`` translates the terms the user provides into a search query +object that the database compares to a search vector. By default, all the words +the user provides are passed through the stemming algorithms, and then it +looks for matches for all of the resulting terms. + +``SearchQuery`` terms can be combined logically to provide more flexibility:: + + >>> from django.contrib.postgres.search import SearchQuery + >>> SearchQuery('potato') & SearchQuery('ireland') # potato AND ireland + >>> SearchQuery('potato') | SearchQuery('penguin') # potato OR penguin + >>> ~SearchQuery('sausage') # NOT sausage + +See :ref:`postgresql-fts-search-configuration` for an explanation of the +``config`` parameter. + +``SearchRank`` +============== + +.. class:: SearchRank(vector, query, weights=None) + +So far, we've just returned the results for which any match between the vector +and the query are possible. It's likely you may wish to order the results by +some sort of relevancy. PostgreSQL provides a ranking function which takes into +account how often the query terms appear in the document, how close together +the terms are in the document, and how important the part of the document is +where they occur. The better the match, the higher the value of the rank. To +order by relevancy:: + + >>> from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector + >>> vector = SearchVector('body_text') + >>> query = SearchQuery('cheese') + >>> Entry.objects.annotate(rank=SearchRank(vector, query)).order_by('-rank') + [, ] + +See :ref:`postgresql-fts-weighting-queries` for an explanation of the +``weights`` parameter. + +.. _postgresql-fts-search-configuration: + +Changing the search configuration +================================= + +You can specify the ``config`` attribute to a :class:`SearchVector` and +:class:`SearchQuery` to use a different search configuration. This allows using +a different language parsers and dictionaries as defined by the database:: + + >>> from django.contrib.postgres.search import SearchQuery, SearchVector + >>> Entry.objects.annotate( + ... search=SearchVector('body_text', config='french'), + ... ).filter(search=SearchQuery('œuf', config='french')) + [] + +The value of ``config`` could also be stored in another column:: + + >>> from djanog.db.models import F + >>> Entry.objects.annotate( + ... search=SearchVector('body_text', config=F('blog__language')), + ... ).filter(search=SearchQuery('œuf', config=F('blog__language'))) + [] + +.. _postgresql-fts-weighting-queries: + +Weighting queries +================= + +Every field may not have the same relevance in a query, so you can set weights +of various vectors before you combine them:: + + >>> from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector + >>> vector = SearchVector('body_text', weight='A') + SearchVector('blog__tagline', weight='B') + >>> query = SearchQuery('cheese') + >>> Entry.objects.annotate(rank=SearchRank(vector, query)).filter(rank__gte=0.3).order_by('rank') + +The weight should be one of the following letters: D, C, B, A. By default, +these weights refer to the numbers ``0.1``, ``0.2``, ``0.4``, and ``1.0``, +respectively. If you wish to weight them differently, pass a list of four +floats to :class:`SearchRank` as ``weights`` in the same order above:: + + >>> rank = SearchRank(vector, query, weights=[0.2, 0.4, 0.6, 0.8]) + >>> Entry.objects.annotate(rank=rank).filter(rank__gte=0.3).order_by('-rank') + +Performance +=========== + +Special database configuration isn't necessary to use any of these functions, +however, if you're searching more than a few hundred records, you're likely to +run into performance problems. Full text search is a more intensive process +than comparing the size of an integer, for example. + +In the event that all the fields you're querying on are contained within one +particular model, you can create a functional index which matches the search +vector you wish to use. For example: + +.. code-block:: sql + + CREATE INDEX body_text_search ON blog_entry (to_tsvector(body_text)); + +This index will then be used by subsequent queries. In many cases this will be +sufficient. + +``SearchVectorField`` +--------------------- + +.. class:: SearchVectorField + +If this approach becomes too slow, you can add a ``SearchVectorField`` to your +model. You'll need to keep it populated with triggers, for example, as +described in the `PostgreSQL documentation`_. You can then query the field as +if it were an annotated ``SearchVector``:: + + >>> Entry.objects.update(search_vector=SearchVector('body_text')) + >>> Entry.objects.filter(search_vector='potato') + [, ] + +.. _PostgreSQL documentation: http://www.postgresql.org/docs/current/static/textsearch-features.html#TEXTSEARCH-UPDATE-TRIGGERS diff --git a/docs/releases/1.10.txt b/docs/releases/1.10.txt index a3928188b0..e63b3bf42b 100644 --- a/docs/releases/1.10.txt +++ b/docs/releases/1.10.txt @@ -24,7 +24,14 @@ recommend** and only officially support the latest release of each series. What's new in Django 1.10 ========================= -... +Full text search for PostgreSQL +------------------------------- + +``django.contrib.postgres`` now includes a :doc:`collection of database +functions ` to allow the use of the full text +search engine. You can search across multiple fields in your relational +database, combine the searches with other lookups, use different language +configurations and weightings, and rank the results by relevance. Minor features -------------- diff --git a/docs/topics/db/index.txt b/docs/topics/db/index.txt index 79624d7fdf..51f60a65d7 100644 --- a/docs/topics/db/index.txt +++ b/docs/topics/db/index.txt @@ -14,6 +14,7 @@ model maps to a single database table. models queries aggregation + search managers sql transactions diff --git a/docs/topics/db/queries.txt b/docs/topics/db/queries.txt index ab58bb4f63..fad4bd73fd 100644 --- a/docs/topics/db/queries.txt +++ b/docs/topics/db/queries.txt @@ -27,7 +27,7 @@ models, which comprise a Weblog application: return self.name class Author(models.Model): - name = models.CharField(max_length=50) + name = models.CharField(max_length=200) email = models.EmailField() def __str__(self): # __unicode__ on Python 2 diff --git a/docs/topics/db/search.txt b/docs/topics/db/search.txt new file mode 100644 index 0000000000..fd62c6909c --- /dev/null +++ b/docs/topics/db/search.txt @@ -0,0 +1,129 @@ +====== +Search +====== + +A common task for web applications is to search some data in the database with +user input. In a simple case, this could be filtering a list of objects by a +category. A more complex use case might require searching with weighting, +categorization, highlighting, multiple languages, and so on. This document +explains some of the possible use cases and the tools you can use. + +We'll refer to the same models used in :doc:`/topics/db/queries`. + +Use Cases +========= + +Standard textual queries +------------------------ + +Text-based fields have a selection of simple matching operations. For example, +you may wish to allow lookup up an author like so:: + + >>> Author.objects.filter(name__contains='Terry') + [, ] + +This is a very fragile solution as it requires the user to know an exact +substring of the author's name. A better approach could be a case-insensitive +match (:lookup:`icontains`), but this is only marginally better. + +A database's more advanced comparison functions +----------------------------------------------- + +If you're using PostgreSQL, Django provides :doc:`a selection of database +specific tools ` to allow you to leverage more +complex querying options. Other databases have different selections of tools, +possibly via plugins or user-defined functions. Django doesn't include any +support for them at this time. We'll use some examples from PostgreSQL to +demonstrate the kind of functionality databases may have. + +.. admonition:: Searching in other databases + + All of the searching tools provided by :mod:`django.contrib.postgres` are + constructed entirely on public APIs such as :doc:`custom lookups + ` and :doc:`database functions + `. Depending on your database, you should + be able to construct queries to allow similar APIs. If there are specific + things which cannot be achieved this way, please open a ticket. + +In the above example, we determined that a case insensitive lookup would be +more useful. When dealing with non-English names, a further improvement is to +use :lookup:`unaccented comparison `:: + + >>> Author.objects.filter(name__unaccent__icontains='Helen') + [, , ] + +This shows another issue, where we are matching against a different spelling of +the name. In this case we have an asymmetry though - a search for ``Helen`` +will pick up ``Helena`` or ``Hélène``, but not the reverse. Another option +would be to use a trigram comparison, which compares sequences of letters. + +For example:: + + >>> Author.objects.filter(name__unaccent__lower__trigram='Hélène') + [, ] + +Now we have a different problem - the longer name of "Helena Bonham Carter" +doesn't show up as it is much longer. Trigram searches consider all +combinations of three letters, and compares how many appear in both search and +source strings. For the longer name, there are more combinations which appear +in the source string so it is no longer considered a close match. + +The correct choice of comparison functions here depends on your particular data +set, for example the language(s) used and the type of text being searched. All +of the examples we've seen are on short strings where the user is likely to +enter something close (by varying definitions) to the source data. + +Document-based search +--------------------- + +Simple database operations are too simple an approach when you start +considering large blocks of text. Whereas the examples above can be thought of +as operations on a string of characters, full text search looks at the actual +words. Depending on the system used, it's likely to use some of the following +ideas: + +- Ignoring "stop words" such as "a", "the", "and". +- Stemming words, so that "pony" and "ponies" are considered similar. +- Weighting words based on different criteria such as how frequently they + appear in the text, or the importance of the fields, such as the title or + keywords, that they appear in. + +There are many alternatives for using searching software, some of the most +prominent are Elastic_ and Solr_. These are full document-based search +solutions. To use them with data from Django models, you'll need a layer which +translates your data into a textual document, including back-references to the +database ids. When a search using the engine returns a certain document, you +can then look it up in the database. There are a variety of third-party +libraries which are designed to help with this process. + +.. _Elastic: https://www.elastic.co/ +.. _Solr: http://lucene.apache.org/solr/ + +PostgreSQL support +~~~~~~~~~~~~~~~~~~ + +PostgreSQL has its own full text search implementation built-in. While not as +powerful as some other search engines, it has the advantage of being inside +your database and so can easily be combined with other relational queries such +as categorization. + +The :mod:`django.contrib.postgres` module provides some helpers to make these +queries. For example, a simple query might be to select all the blog entries +which mention "cheese":: + + >>> Entry.objects.filter(body_text__search='cheese') + [, ] + +You can also filter on a combination of fields and on related models:: + + >>> Entry.objects.annotate( + ... search=SearchVector('blog__tagline', 'body_text'), + ... ).filter(search='cheese') + [ + , + , + , + ] + +See the ``contrib.postgres`` :doc:`/ref/contrib/postgres/search` document for +complete details. diff --git a/tests/postgres_tests/fields.py b/tests/postgres_tests/fields.py index 0926175e1b..d50c6d6a91 100644 --- a/tests/postgres_tests/fields.py +++ b/tests/postgres_tests/fields.py @@ -9,6 +9,7 @@ try: ArrayField, BigIntegerRangeField, DateRangeField, DateTimeRangeField, FloatRangeField, HStoreField, IntegerRangeField, JSONField, ) + from django.contrib.postgres.search import SearchVectorField except ImportError: class DummyArrayField(models.Field): def __init__(self, base_field, size=None, **kwargs): @@ -30,3 +31,4 @@ except ImportError: HStoreField = models.Field IntegerRangeField = models.Field JSONField = models.Field + SearchVectorField = models.Field diff --git a/tests/postgres_tests/migrations/0002_create_test_models.py b/tests/postgres_tests/migrations/0002_create_test_models.py index 872d8aeb58..3e6cbac246 100644 --- a/tests/postgres_tests/migrations/0002_create_test_models.py +++ b/tests/postgres_tests/migrations/0002_create_test_models.py @@ -114,6 +114,40 @@ class Migration(migrations.Migration): options=None, bases=None, ), + migrations.CreateModel( + name='Scene', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('scene', models.CharField(max_length=255)), + ('setting', models.CharField(max_length=255)), + ], + options=None, + bases=None, + ), + migrations.CreateModel( + name='Character', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('name', models.CharField(max_length=255)), + ], + options=None, + bases=None, + ), + migrations.CreateModel( + name='Line', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('scene', models.ForeignKey('postgres_tests.Scene', on_delete=models.SET_NULL)), + ('character', models.ForeignKey('postgres_tests.Character', on_delete=models.SET_NULL)), + ('dialogue', models.TextField(blank=True, null=True)), + ('dialogue_search_vector', SearchVectorField(blank=True, null=True)), + ('dialogue_config', models.CharField(max_length=100, blank=True, null=True)), + ], + options={ + 'required_db_vendor': 'postgresql', + }, + bases=None, + ), migrations.CreateModel( name='AggregateTestModel', fields=[ diff --git a/tests/postgres_tests/models.py b/tests/postgres_tests/models.py index 24addea358..d94eb90d4a 100644 --- a/tests/postgres_tests/models.py +++ b/tests/postgres_tests/models.py @@ -3,6 +3,7 @@ from django.db import connection, models from .fields import ( ArrayField, BigIntegerRangeField, DateRangeField, DateTimeRangeField, FloatRangeField, HStoreField, IntegerRangeField, JSONField, + SearchVectorField, ) @@ -78,6 +79,37 @@ class CharFieldModel(models.Model): class TextFieldModel(models.Model): field = models.TextField() + def __str__(self): + return self.field + + +# Scene/Character/Line models are used to test full text search. They're +# populated with content from Monty Python and the Holy Grail. +class Scene(models.Model): + scene = models.CharField(max_length=255) + setting = models.CharField(max_length=255) + + def __str__(self): + return self.scene + + +class Character(models.Model): + name = models.CharField(max_length=255) + + def __str__(self): + return self.name + + +class Line(PostgreSQLModel): + scene = models.ForeignKey('Scene', models.CASCADE) + character = models.ForeignKey('Character', models.CASCADE) + dialogue = models.TextField(blank=True, null=True) + dialogue_search_vector = SearchVectorField(blank=True, null=True) + dialogue_config = models.CharField(max_length=100, blank=True, null=True) + + def __str__(self): + return self.dialogue or '' + class RangesModel(PostgreSQLModel): ints = IntegerRangeField(blank=True, null=True) diff --git a/tests/postgres_tests/test_search.py b/tests/postgres_tests/test_search.py new file mode 100644 index 0000000000..8c12628be5 --- /dev/null +++ b/tests/postgres_tests/test_search.py @@ -0,0 +1,269 @@ +""" +Test PostgreSQL full text search. + +These tests use dialogue from the 1975 film Monty Python and the Holy Grail. +All text copyright Python (Monty) Pictures. Thanks to sacred-texts.com for the +transcript. +""" +from unittest import skipIf + +from django.contrib.postgres.search import ( + SearchQuery, SearchRank, SearchVector, +) +from django.db.models import F +from django.test import ignore_warnings, modify_settings +from django.utils import six +from django.utils.deprecation import RemovedInDjango20Warning + +from . import PostgreSQLTestCase +from .models import Character, Line, Scene + + +class GrailTestData(object): + + @classmethod + def setUpTestData(cls): + cls.robin = Scene.objects.create(scene='Scene 10', setting='The dark forest of Ewing') + cls.minstrel = Character.objects.create(name='Minstrel') + verses = [ + ( + 'Bravely bold Sir Robin, rode forth from Camelot. ' + 'He was not afraid to die, o Brave Sir Robin. ' + 'He was not at all afraid to be killed in nasty ways. ' + 'Brave, brave, brave, brave Sir Robin!' + ), + ( + 'He was not in the least bit scared to be mashed into a pulp, ' + 'Or to have his eyes gouged out, and his elbows broken. ' + 'To have his kneecaps split, and his body burned away, ' + 'And his limbs all hacked and mangled, brave Sir Robin!' + ), + ( + 'His head smashed in and his heart cut out, ' + 'And his liver removed and his bowels unplugged, ' + 'And his nostrils ripped and his bottom burned off,' + 'And his --' + ), + ] + cls.verses = [Line.objects.create( + scene=cls.robin, + character=cls.minstrel, + dialogue=verse, + ) for verse in verses] + cls.verse0, cls.verse1, cls.verse2 = cls.verses + + cls.witch_scene = Scene.objects.create(scene='Scene 5', setting="Sir Bedemir's Castle") + bedemir = Character.objects.create(name='Bedemir') + crowd = Character.objects.create(name='Crowd') + witch = Character.objects.create(name='Witch') + duck = Character.objects.create(name='Duck') + + cls.bedemir0 = Line.objects.create( + scene=cls.witch_scene, + character=bedemir, + dialogue='We shall use my larger scales!', + dialogue_config='english', + ) + cls.bedemir1 = Line.objects.create( + scene=cls.witch_scene, + character=bedemir, + dialogue='Right, remove the supports!', + dialogue_config='english', + ) + cls.duck = Line.objects.create(scene=cls.witch_scene, character=duck, dialogue=None) + cls.crowd = Line.objects.create(scene=cls.witch_scene, character=crowd, dialogue='A witch! A witch!') + cls.witch = Line.objects.create(scene=cls.witch_scene, character=witch, dialogue="It's a fair cop.") + + trojan_rabbit = Scene.objects.create(scene='Scene 8', setting="The castle of Our Master Ruiz' de lu la Ramper") + guards = Character.objects.create(name='French Guards') + cls.french = Line.objects.create( + scene=trojan_rabbit, + character=guards, + dialogue='Oh. Un cadeau. Oui oui.', + dialogue_config='french', + ) + + +class ContribPostgresNotInstalledTests(PostgreSQLTestCase): + @skipIf(six.PY2, "This test fails occasionally and weirdly on python 2") + @ignore_warnings(category=RemovedInDjango20Warning) + def test_search_lookup_missing(self): + msg = "Add 'django.contrib.postgres' to settings.INSTALLED_APPS to use the search operator." + with self.assertRaisesMessage(NotImplementedError, msg): + list(Line.objects.filter(dialogue__search='elbows')) + + +@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'}) +class SimpleSearchTest(GrailTestData, PostgreSQLTestCase): + + def test_simple(self): + searched = Line.objects.filter(dialogue__search='elbows') + self.assertSequenceEqual(searched, [self.verse1]) + + def test_non_exact_match(self): + searched = Line.objects.filter(dialogue__search='hearts') + self.assertSequenceEqual(searched, [self.verse2]) + + def test_search_two_terms(self): + searched = Line.objects.filter(dialogue__search='heart bowel') + self.assertSequenceEqual(searched, [self.verse2]) + + def test_search_two_terms_with_partial_match(self): + searched = Line.objects.filter(dialogue__search='Robin killed') + self.assertSequenceEqual(searched, [self.verse0]) + + +@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'}) +class SearchVectorFieldTest(GrailTestData, PostgreSQLTestCase): + def test_existing_vector(self): + Line.objects.update(dialogue_search_vector=SearchVector('dialogue')) + searched = Line.objects.filter(dialogue_search_vector=SearchQuery('Robin killed')) + self.assertSequenceEqual(searched, [self.verse0]) + + def test_existing_vector_config_explicit(self): + Line.objects.update(dialogue_search_vector=SearchVector('dialogue')) + searched = Line.objects.filter(dialogue_search_vector=SearchQuery('cadeaux', config='french')) + self.assertSequenceEqual(searched, [self.french]) + + +class MultipleFieldsTest(GrailTestData, PostgreSQLTestCase): + + def test_simple_on_dialogue(self): + searched = Line.objects.annotate( + search=SearchVector('scene__setting', 'dialogue'), + ).filter(search='elbows') + self.assertSequenceEqual(searched, [self.verse1]) + + def test_simple_on_scene(self): + searched = Line.objects.annotate( + search=SearchVector('scene__setting', 'dialogue'), + ).filter(search='Forest') + self.assertSequenceEqual(searched, self.verses) + + def test_non_exact_match(self): + searched = Line.objects.annotate( + search=SearchVector('scene__setting', 'dialogue'), + ).filter(search='heart') + self.assertSequenceEqual(searched, [self.verse2]) + + def test_search_two_terms(self): + searched = Line.objects.annotate( + search=SearchVector('scene__setting', 'dialogue'), + ).filter(search='heart forest') + self.assertSequenceEqual(searched, [self.verse2]) + + def test_terms_adjacent(self): + searched = Line.objects.annotate( + search=SearchVector('character__name', 'dialogue'), + ).filter(search='minstrel') + self.assertSequenceEqual(searched, self.verses) + searched = Line.objects.annotate( + search=SearchVector('scene__setting', 'dialogue'), + ).filter(search='minstrelbravely') + self.assertSequenceEqual(searched, []) + + def test_search_with_null(self): + searched = Line.objects.annotate( + search=SearchVector('scene__setting', 'dialogue'), + ).filter(search='bedemir') + self.assertEqual(set(searched), {self.bedemir0, self.bedemir1, self.crowd, self.witch, self.duck}) + + def test_config_query_explicit(self): + searched = Line.objects.annotate( + search=SearchVector('scene__setting', 'dialogue', config='french'), + ).filter(search=SearchQuery('cadeaux', config='french')) + self.assertSequenceEqual(searched, [self.french]) + + def test_config_query_implicit(self): + searched = Line.objects.annotate( + search=SearchVector('scene__setting', 'dialogue', config='french'), + ).filter(search='cadeaux') + self.assertSequenceEqual(searched, [self.french]) + + def test_config_from_field_explicit(self): + searched = Line.objects.annotate( + search=SearchVector('scene__setting', 'dialogue', config=F('dialogue_config')), + ).filter(search=SearchQuery('cadeaux', config=F('dialogue_config'))) + self.assertSequenceEqual(searched, [self.french]) + + def test_config_from_field_implicit(self): + searched = Line.objects.annotate( + search=SearchVector('scene__setting', 'dialogue', config=F('dialogue_config')), + ).filter(search='cadeaux') + self.assertSequenceEqual(searched, [self.french]) + + +@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'}) +class TestCombinations(GrailTestData, PostgreSQLTestCase): + + def test_vector_add(self): + searched = Line.objects.annotate( + search=SearchVector('scene__setting') + SearchVector('character__name'), + ).filter(search='bedemir') + self.assertEqual(set(searched), {self.bedemir0, self.bedemir1, self.crowd, self.witch, self.duck}) + + def test_vector_add_multi(self): + searched = Line.objects.annotate( + search=( + SearchVector('scene__setting') + + SearchVector('character__name') + + SearchVector('dialogue') + ), + ).filter(search='bedemir') + self.assertEqual(set(searched), {self.bedemir0, self.bedemir1, self.crowd, self.witch, self.duck}) + + def test_query_and(self): + searched = Line.objects.annotate( + search=SearchVector('scene__setting', 'dialogue'), + ).filter(search=SearchQuery('bedemir') & SearchQuery('scales')) + self.assertSequenceEqual(searched, [self.bedemir0]) + + def test_query_or(self): + searched = Line.objects.filter(dialogue__search=SearchQuery('kneecaps') | SearchQuery('nostrils')) + self.assertSequenceEqual(set(searched), {self.verse1, self.verse2}) + + def test_query_invert(self): + searched = Line.objects.filter(character=self.minstrel, dialogue__search=~SearchQuery('kneecaps')) + self.assertEqual(set(searched), {self.verse0, self.verse2}) + + +@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'}) +class TestRankingAndWeights(GrailTestData, PostgreSQLTestCase): + + def test_ranking(self): + searched = Line.objects.filter(character=self.minstrel).annotate( + rank=SearchRank(SearchVector('dialogue'), SearchQuery('brave sir robin')), + ).order_by('rank') + self.assertSequenceEqual(searched, [self.verse2, self.verse1, self.verse0]) + + def test_rank_passing_untyped_args(self): + searched = Line.objects.filter(character=self.minstrel).annotate( + rank=SearchRank('dialogue', 'brave sir robin'), + ).order_by('rank') + self.assertSequenceEqual(searched, [self.verse2, self.verse1, self.verse0]) + + def test_weights_in_vector(self): + vector = SearchVector('dialogue', weight='A') + SearchVector('character__name', weight='D') + searched = Line.objects.filter(scene=self.witch_scene).annotate( + rank=SearchRank(vector, SearchQuery('witch')), + ).order_by('-rank')[:2] + self.assertSequenceEqual(searched, [self.crowd, self.witch]) + + vector = SearchVector('dialogue', weight='D') + SearchVector('character__name', weight='A') + searched = Line.objects.filter(scene=self.witch_scene).annotate( + rank=SearchRank(vector, SearchQuery('witch')), + ).order_by('-rank')[:2] + self.assertSequenceEqual(searched, [self.witch, self.crowd]) + + def test_ranked_custom_weights(self): + vector = SearchVector('dialogue', weight='D') + SearchVector('character__name', weight='A') + searched = Line.objects.filter(scene=self.witch_scene).annotate( + rank=SearchRank(vector, SearchQuery('witch'), weights=[1, 0, 0, 0.5]), + ).order_by('-rank')[:2] + self.assertSequenceEqual(searched, [self.crowd, self.witch]) + + def test_ranking_chaining(self): + searched = Line.objects.filter(character=self.minstrel).annotate( + rank=SearchRank(SearchVector('dialogue'), SearchQuery('brave sir robin')), + ).filter(rank__gt=0.3) + self.assertSequenceEqual(searched, [self.verse0])