From 65ab4f9f03e70733df6afd9d8454ec3700155111 Mon Sep 17 00:00:00 2001 From: Hannes Ljungberg Date: Tue, 19 Nov 2019 14:59:06 +0100 Subject: [PATCH] Fixed #31147 -- Added SearchHeadline to django.contrib.postgres. --- django/contrib/postgres/search.py | 53 +++++++++++ docs/ref/contrib/postgres/search.txt | 54 +++++++++++ docs/releases/3.1.txt | 3 + tests/postgres_tests/test_search.py | 129 +++++++++++++++++++++++++-- tests/postgres_tests/test_trigram.py | 6 +- 5 files changed, 239 insertions(+), 6 deletions(-) diff --git a/django/contrib/postgres/search.py b/django/contrib/postgres/search.py index 484d4315b9..761ce3c70a 100644 --- a/django/contrib/postgres/search.py +++ b/django/contrib/postgres/search.py @@ -1,3 +1,5 @@ +import psycopg2 + from django.db.models import ( CharField, Expression, Field, FloatField, Func, Lookup, TextField, Value, ) @@ -230,6 +232,57 @@ class SearchRank(Func): super().__init__(*expressions) +class SearchHeadline(Func): + function = 'ts_headline' + template = '%(function)s(%(expressions)s%(options)s)' + output_field = TextField() + + def __init__( + self, expression, query, *, config=None, start_sel=None, stop_sel=None, + max_words=None, min_words=None, short_word=None, highlight_all=None, + max_fragments=None, fragment_delimiter=None, + ): + if not hasattr(query, 'resolve_expression'): + query = SearchQuery(query) + options = { + 'StartSel': start_sel, + 'StopSel': stop_sel, + 'MaxWords': max_words, + 'MinWords': min_words, + 'ShortWord': short_word, + 'HighlightAll': highlight_all, + 'MaxFragments': max_fragments, + 'FragmentDelimiter': fragment_delimiter, + } + self.options = { + option: value + for option, value in options.items() if value is not None + } + expressions = (expression, query) + if config is not None: + config = SearchConfig.from_parameter(config) + expressions = (config,) + expressions + super().__init__(*expressions) + + def as_sql(self, compiler, connection, function=None, template=None): + options_sql = '' + options_params = [] + if self.options: + # getquoted() returns a quoted bytestring of the adapted value. + options_params.append(', '.join( + '%s=%s' % ( + option, + psycopg2.extensions.adapt(value).getquoted().decode(), + ) for option, value in self.options.items() + )) + options_sql = ', %s' + sql, params = super().as_sql( + compiler, connection, function=function, template=template, + options=options_sql, + ) + return sql, params + options_params + + SearchVectorField.register_lookup(SearchVectorExact) diff --git a/docs/ref/contrib/postgres/search.txt b/docs/ref/contrib/postgres/search.txt index 813a3db57a..949d95929e 100644 --- a/docs/ref/contrib/postgres/search.txt +++ b/docs/ref/contrib/postgres/search.txt @@ -132,6 +132,60 @@ order by relevancy:: See :ref:`postgresql-fts-weighting-queries` for an explanation of the ``weights`` parameter. +``SearchHeadline`` +================== + +.. versionadded:: 3.1 + +.. class:: SearchHeadline(expression, query, config=None, start_sel=None, stop_sel=None, max_words=None, min_words=None, short_word=None, highlight_all=None, max_fragments=None, fragment_delimiter=None) + +Accepts a single text field or an expression, a query, a config, and a set of +options. Returns highlighted search results. + +Set the ``start_sel`` and ``stop_sel`` parameters to the string values to be +used to wrap highlighted query terms in the document. PostgreSQL's defaults are +```` and ````. + +Provide integer values to the ``max_words`` and ``min_words`` parameters to +determine the longest and shortest headlines. PostgreSQL's defaults are 35 and +15. + +Provide an integer value to the ``short_word`` parameter to discard words of +this length or less in each headline. PostgreSQL's default is 3. + +Set the ``highlight_all`` parameter to ``True`` to use the whole document in +place of a fragment and ignore ``max_words``, ``min_words``, and ``short_word`` +parameters. That's disabled by default in PostgreSQL. + +Provide a non-zero integer value to the ``max_fragments`` to set the maximum +number of fragments to display. That's disabled by default in PostgreSQL. + +Set the ``fragment_delimiter`` string parameter to configure the delimiter +between fragments. PostgreSQL's default is ``" ... "``. + +The PostgreSQL documentation has more details on `highlighting search +results`_. + +Usage example:: + + >>> from django.contrib.postgres.search import SearchHeadline, SearchQuery + >>> query = SearchQuery('red tomato') + >>> entry = Entry.objects.annotate( + ... headline=SearchHeadline( + ... 'body_text', + ... query, + ... start_sel='', + ... stop_sel='', + ... ), + ... ).get() + >>> print(entry.headline) + Sandwich with tomato and red cheese. + +See :ref:`postgresql-fts-search-configuration` for an explanation of the +``config`` parameter. + +.. _highlighting search results: https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-HEADLINE + .. _postgresql-fts-search-configuration: Changing the search configuration diff --git a/docs/releases/3.1.txt b/docs/releases/3.1.txt index 669f2ca01e..b1ae1134cb 100644 --- a/docs/releases/3.1.txt +++ b/docs/releases/3.1.txt @@ -108,6 +108,9 @@ Minor features * :class:`~django.contrib.postgres.search.SearchQuery` now supports ``'websearch'`` search type on PostgreSQL 11+. +* The new :class:`~django.contrib.postgres.search.SearchHeadline` class allows + highlighting search results. + :mod:`django.contrib.redirects` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/tests/postgres_tests/test_search.py b/tests/postgres_tests/test_search.py index 765b846fb3..298932ba2e 100644 --- a/tests/postgres_tests/test_search.py +++ b/tests/postgres_tests/test_search.py @@ -5,16 +5,20 @@ These tests use dialogue from the 1975 film Monty Python and the Holy Grail. All text copyright Python (Monty) Pictures. Thanks to sacred-texts.com for the transcript. """ -from django.contrib.postgres.search import ( - SearchConfig, SearchQuery, SearchRank, SearchVector, -) from django.db import connection from django.db.models import F -from django.test import SimpleTestCase, modify_settings, skipUnlessDBFeature +from django.test import modify_settings, skipUnlessDBFeature from . import PostgreSQLSimpleTestCase, PostgreSQLTestCase from .models import Character, Line, Scene +try: + from django.contrib.postgres.search import ( + SearchConfig, SearchHeadline, SearchQuery, SearchRank, SearchVector, + ) +except ImportError: + pass + class GrailTestData: @@ -436,7 +440,7 @@ class SearchVectorIndexTests(PostgreSQLTestCase): ) -class SearchQueryTests(SimpleTestCase): +class SearchQueryTests(PostgreSQLSimpleTestCase): def test_str(self): tests = ( (~SearchQuery('a'), '~SearchQuery(a)'), @@ -460,3 +464,118 @@ class SearchQueryTests(SimpleTestCase): for query, expected_str in tests: with self.subTest(query=query): self.assertEqual(str(query), expected_str) + + +@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'}) +class SearchHeadlineTests(GrailTestData, PostgreSQLTestCase): + def test_headline(self): + searched = Line.objects.annotate( + headline=SearchHeadline( + F('dialogue'), + SearchQuery('brave sir robin'), + config=SearchConfig('english'), + ), + ).get(pk=self.verse0.pk) + self.assertEqual( + searched.headline, + 'Robin. He was not at all afraid to be killed in nasty ' + 'ways. Brave, brave, brave, brave ' + 'Sir Robin', + ) + + def test_headline_untyped_args(self): + searched = Line.objects.annotate( + headline=SearchHeadline('dialogue', 'killed', config='english'), + ).get(pk=self.verse0.pk) + self.assertEqual( + searched.headline, + 'Robin. He was not at all afraid to be killed in nasty ' + 'ways. Brave, brave, brave, brave Sir Robin!', + ) + + def test_headline_with_config(self): + searched = Line.objects.annotate( + headline=SearchHeadline( + 'dialogue', + SearchQuery('cadeaux', config='french'), + config='french', + ), + ).get(pk=self.french.pk) + self.assertEqual( + searched.headline, + 'Oh. Un beau cadeau. Oui oui.', + ) + + def test_headline_with_config_from_field(self): + searched = Line.objects.annotate( + headline=SearchHeadline( + 'dialogue', + SearchQuery('cadeaux', config=F('dialogue_config')), + config=F('dialogue_config'), + ), + ).get(pk=self.french.pk) + self.assertEqual( + searched.headline, + 'Oh. Un beau cadeau. Oui oui.', + ) + + def test_headline_separator_options(self): + searched = Line.objects.annotate( + headline=SearchHeadline( + 'dialogue', + 'brave sir robin', + start_sel='', + stop_sel='', + ), + ).get(pk=self.verse0.pk) + self.assertEqual( + searched.headline, + 'Robin. He was not at all afraid to be killed in ' + 'nasty ways. Brave, brave, brave' + ', brave Sir Robin', + ) + + def test_headline_highlight_all_option(self): + searched = Line.objects.annotate( + headline=SearchHeadline( + 'dialogue', + SearchQuery('brave sir robin', config='english'), + highlight_all=True, + ), + ).get(pk=self.verse0.pk) + self.assertIn( + 'Bravely bold Sir Robin, rode forth from ' + 'Camelot. He was not afraid to die, o ', + searched.headline, + ) + + def test_headline_short_word_option(self): + searched = Line.objects.annotate( + headline=SearchHeadline( + 'dialogue', + SearchQuery('brave sir robin', config='english'), + short_word=6, + ), + ).get(pk=self.verse0.pk) + self.assertIs(searched.headline.endswith( + 'Brave, brave, brave, brave Sir' + ), True) + + def test_headline_fragments_words_options(self): + searched = Line.objects.annotate( + headline=SearchHeadline( + 'dialogue', + SearchQuery('brave sir robin', config='english'), + fragment_delimiter='...
', + max_fragments=4, + max_words=3, + min_words=1, + ), + ).get(pk=self.verse0.pk) + self.assertEqual( + searched.headline, + 'Sir Robin, rode...
' + 'Brave Sir Robin...
' + 'Brave, brave, brave...
' + 'brave Sir Robin', + ) diff --git a/tests/postgres_tests/test_trigram.py b/tests/postgres_tests/test_trigram.py index 2a123faa5e..19ac4cee31 100644 --- a/tests/postgres_tests/test_trigram.py +++ b/tests/postgres_tests/test_trigram.py @@ -1,9 +1,13 @@ -from django.contrib.postgres.search import TrigramDistance, TrigramSimilarity from django.test import modify_settings from . import PostgreSQLTestCase from .models import CharFieldModel, TextFieldModel +try: + from django.contrib.postgres.search import TrigramDistance, TrigramSimilarity +except ImportError: + pass + @modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'}) class TrigramTest(PostgreSQLTestCase):