Fixed #24938 -- Added PostgreSQL trigram support.
This commit is contained in:
parent
d7334b405f
commit
1962a96a30
|
@ -3,7 +3,7 @@ from django.db.backends.signals import connection_created
|
|||
from django.db.models import CharField, TextField
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from .lookups import SearchLookup, Unaccent
|
||||
from .lookups import SearchLookup, TrigramSimilar, Unaccent
|
||||
from .signals import register_hstore_handler
|
||||
|
||||
|
||||
|
@ -17,3 +17,5 @@ class PostgresConfig(AppConfig):
|
|||
TextField.register_lookup(Unaccent)
|
||||
CharField.register_lookup(SearchLookup)
|
||||
TextField.register_lookup(SearchLookup)
|
||||
CharField.register_lookup(TrigramSimilar)
|
||||
TextField.register_lookup(TrigramSimilar)
|
||||
|
|
|
@ -60,3 +60,8 @@ class SearchLookup(SearchVectorExact):
|
|||
self.lhs = SearchVector(self.lhs)
|
||||
lhs, lhs_params = super(SearchLookup, self).process_lhs(qn, connection)
|
||||
return lhs, lhs_params
|
||||
|
||||
|
||||
class TrigramSimilar(PostgresSimpleLookup):
|
||||
lookup_name = 'trigram_similar'
|
||||
operator = '%%'
|
||||
|
|
|
@ -40,3 +40,9 @@ class UnaccentExtension(CreateExtension):
|
|||
|
||||
def __init__(self):
|
||||
self.name = 'unaccent'
|
||||
|
||||
|
||||
class TrigramExtension(CreateExtension):
|
||||
|
||||
def __init__(self):
|
||||
self.name = 'pg_trgm'
|
||||
|
|
|
@ -185,3 +185,19 @@ class SearchRank(Func):
|
|||
|
||||
|
||||
SearchVectorField.register_lookup(SearchVectorExact)
|
||||
|
||||
|
||||
class TrigramBase(Func):
|
||||
def __init__(self, expression, string, **extra):
|
||||
if not hasattr(string, 'resolve_expression'):
|
||||
string = Value(string)
|
||||
super(TrigramBase, self).__init__(expression, string, output_field=FloatField(), **extra)
|
||||
|
||||
|
||||
class TrigramSimilarity(TrigramBase):
|
||||
function = 'SIMILARITY'
|
||||
|
||||
|
||||
class TrigramDistance(TrigramBase):
|
||||
function = ''
|
||||
arg_joiner = ' <-> '
|
||||
|
|
|
@ -2,6 +2,32 @@
|
|||
PostgreSQL specific lookups
|
||||
===========================
|
||||
|
||||
Trigram similarity
|
||||
==================
|
||||
|
||||
.. fieldlookup:: trigram_similar
|
||||
|
||||
.. versionadded:: 1.10
|
||||
|
||||
The ``trigram_similar`` lookup allows you to perform trigram lookups,
|
||||
measuring the number of trigrams (three consecutive characters) shared, using a
|
||||
dedicated PostgreSQL extension. A trigram lookup is given an expression and
|
||||
returns results that have a similarity measurement greater than the current
|
||||
similarity threshold.
|
||||
|
||||
To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
|
||||
and activate the `pg_trgm extension
|
||||
<http://www.postgresql.org/docs/current/interactive/pgtrgm.html>`_ on
|
||||
PostgreSQL. You can install the extension using the
|
||||
:class:`~django.contrib.postgres.operations.TrigramExtension` migration
|
||||
operation.
|
||||
|
||||
The ``trigram_similar`` lookup can be used on
|
||||
:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`::
|
||||
|
||||
>>> City.objects.filter(name__trigram_similar="Middlesborough")
|
||||
['<City: Middlesbrough>']
|
||||
|
||||
``Unaccent``
|
||||
============
|
||||
|
||||
|
|
|
@ -27,6 +27,16 @@ the ``django.contrib.postgres.operations`` module.
|
|||
which will install the ``hstore`` extension and also immediately set up the
|
||||
connection to interpret hstore data.
|
||||
|
||||
``TrigramExtension``
|
||||
====================
|
||||
|
||||
.. class:: TrigramExtension()
|
||||
|
||||
.. versionadded:: 1.10
|
||||
|
||||
A subclass of :class:`~django.contrib.postgres.operations.CreateExtension`
|
||||
that installs the ``pg_trgm`` extension.
|
||||
|
||||
``UnaccentExtension``
|
||||
=====================
|
||||
|
||||
|
|
|
@ -189,3 +189,58 @@ if it were an annotated ``SearchVector``::
|
|||
[<Entry: Cheese on Toast recipes>, <Entry: Pizza recipes>]
|
||||
|
||||
.. _PostgreSQL documentation: http://www.postgresql.org/docs/current/static/textsearch-features.html#TEXTSEARCH-UPDATE-TRIGGERS
|
||||
|
||||
Trigram similarity
|
||||
==================
|
||||
|
||||
Another approach to searching is trigram similarity. A trigram is a group of
|
||||
three consecutive characters. In addition to the :lookup:`trigram_similar`
|
||||
lookup, you can use a couple of other expressions.
|
||||
|
||||
To use them, you need to activate the `pg_trgm extension
|
||||
<http://www.postgresql.org/docs/current/interactive/pgtrgm.html>`_ on
|
||||
PostgreSQL. You can install it using the
|
||||
:class:`~django.contrib.postgres.operations.TrigramExtension` migration
|
||||
operation.
|
||||
|
||||
``TrigramSimilarity``
|
||||
---------------------
|
||||
|
||||
.. class:: TrigramSimilarity(expression, string, **extra)
|
||||
|
||||
.. versionadded:: 1.10
|
||||
|
||||
Accepts a field name or expression, and a string or expression. Returns the
|
||||
trigram similarity between the two arguments.
|
||||
|
||||
Usage example::
|
||||
|
||||
>>> from django.contrib.postgres.search import TrigramSimilarity
|
||||
>>> Author.objects.create(name='Katy Stevens')
|
||||
>>> Author.objects.create(name='Stephen Keats')
|
||||
>>> test = 'Katie Stephens'
|
||||
>>> Author.objects.annotate(
|
||||
... similarity=TrigramSimilarity('name', test),
|
||||
... ).filter(similarity__gt=0.3).order_by('-similarity')
|
||||
[<Author: Katy Stephens>, <Author: Stephen Keats>]
|
||||
|
||||
``TrigramDistance``
|
||||
-------------------
|
||||
|
||||
.. class:: TrigramDistance(expression, string, **extra)
|
||||
|
||||
.. versionadded:: 1.10
|
||||
|
||||
Accepts a field name or expression, and a string or expression. Returns the
|
||||
trigram distance between the two arguments.
|
||||
|
||||
Usage example::
|
||||
|
||||
>>> from django.contrib.postgres.search import TrigramDistance
|
||||
>>> Author.objects.create(name='Katy Stevens')
|
||||
>>> Author.objects.create(name='Stephen Keats')
|
||||
>>> test = 'Katie Stephens'
|
||||
>>> Author.objects.annotate(
|
||||
... distance=TrigramDistance('name', test),
|
||||
... ).filter(distance__lte=0.7).order_by('distance')
|
||||
[<Author: Katy Stephens>, <Author: Stephen Keats>]
|
||||
|
|
|
@ -33,6 +33,10 @@ search engine. You can search across multiple fields in your relational
|
|||
database, combine the searches with other lookups, use different language
|
||||
configurations and weightings, and rank the results by relevance.
|
||||
|
||||
It also now includes trigram support, using the :lookup:`trigram_similar`
|
||||
lookup, and the :class:`~django.contrib.postgres.search.TrigramSimilarity` and
|
||||
:class:`~django.contrib.postgres.search.TrigramDistance` expressions.
|
||||
|
||||
Minor features
|
||||
--------------
|
||||
|
||||
|
|
|
@ -55,11 +55,12 @@ use :lookup:`unaccented comparison <unaccent>`::
|
|||
This shows another issue, where we are matching against a different spelling of
|
||||
the name. In this case we have an asymmetry though - a search for ``Helen``
|
||||
will pick up ``Helena`` or ``Hélène``, but not the reverse. Another option
|
||||
would be to use a trigram comparison, which compares sequences of letters.
|
||||
would be to use a :lookup:`trigram_similar` comparison, which compares
|
||||
sequences of letters.
|
||||
|
||||
For example::
|
||||
|
||||
>>> Author.objects.filter(name__unaccent__lower__trigram='Hélène')
|
||||
>>> Author.objects.filter(name__unaccent__lower__trigram_similar='Hélène')
|
||||
[<Author: Helen Mirren>, <Actor: Hélène Joy>]
|
||||
|
||||
Now we have a different problem - the longer name of "Helena Bonham Carter"
|
||||
|
|
|
@ -5,12 +5,13 @@ from django.db import migrations
|
|||
|
||||
try:
|
||||
from django.contrib.postgres.operations import (
|
||||
CreateExtension, HStoreExtension, UnaccentExtension,
|
||||
CreateExtension, HStoreExtension, TrigramExtension, UnaccentExtension,
|
||||
)
|
||||
except ImportError:
|
||||
from django.test import mock
|
||||
CreateExtension = mock.Mock()
|
||||
HStoreExtension = mock.Mock()
|
||||
TrigramExtension = mock.Mock()
|
||||
UnaccentExtension = mock.Mock()
|
||||
|
||||
|
||||
|
@ -21,5 +22,6 @@ class Migration(migrations.Migration):
|
|||
# dash in its name.
|
||||
CreateExtension('uuid-ossp'),
|
||||
HStoreExtension(),
|
||||
TrigramExtension(),
|
||||
UnaccentExtension(),
|
||||
]
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
from django.contrib.postgres.search import TrigramDistance, TrigramSimilarity
|
||||
from django.test import modify_settings
|
||||
|
||||
from . import PostgreSQLTestCase
|
||||
from .models import CharFieldModel, TextFieldModel
|
||||
|
||||
|
||||
@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
|
||||
class TrigramTest(PostgreSQLTestCase):
|
||||
Model = CharFieldModel
|
||||
|
||||
@classmethod
|
||||
def setUpTestData(cls):
|
||||
cls.Model.objects.bulk_create([
|
||||
cls.Model(field='Matthew'),
|
||||
cls.Model(field='Cat sat on mat.'),
|
||||
cls.Model(field='Dog sat on rug.'),
|
||||
])
|
||||
|
||||
def test_trigram_search(self):
|
||||
self.assertQuerysetEqual(
|
||||
self.Model.objects.filter(field__trigram_similar='Mathew'),
|
||||
['Matthew'],
|
||||
transform=lambda instance: instance.field,
|
||||
)
|
||||
|
||||
def test_trigram_similarity(self):
|
||||
search = 'Bat sat on cat.'
|
||||
self.assertQuerysetEqual(
|
||||
self.Model.objects.filter(
|
||||
field__trigram_similar=search,
|
||||
).annotate(similarity=TrigramSimilarity('field', search)).order_by('-similarity'),
|
||||
[('Cat sat on mat.', 0.625), ('Dog sat on rug.', 0.333333)],
|
||||
transform=lambda instance: (instance.field, instance.similarity),
|
||||
ordered=True,
|
||||
)
|
||||
|
||||
def test_trigram_similarity_alternate(self):
|
||||
self.assertQuerysetEqual(
|
||||
self.Model.objects.annotate(
|
||||
distance=TrigramDistance('field', 'Bat sat on cat.'),
|
||||
).filter(distance__lte=0.7).order_by('distance'),
|
||||
[('Cat sat on mat.', 0.375), ('Dog sat on rug.', 0.666667)],
|
||||
transform=lambda instance: (instance.field, instance.distance),
|
||||
ordered=True,
|
||||
)
|
||||
|
||||
|
||||
class TrigramTextFieldTest(TrigramTest):
|
||||
"""
|
||||
TextField has the same behavior as CharField regarding trigram lookups.
|
||||
"""
|
||||
Model = TextFieldModel
|
Loading…
Reference in New Issue