mirror of https://github.com/django/django.git
Fixed #24938 -- Added PostgreSQL trigram support.
This commit is contained in:
parent
d7334b405f
commit
1962a96a30
|
@ -3,7 +3,7 @@ from django.db.backends.signals import connection_created
|
||||||
from django.db.models import CharField, TextField
|
from django.db.models import CharField, TextField
|
||||||
from django.utils.translation import ugettext_lazy as _
|
from django.utils.translation import ugettext_lazy as _
|
||||||
|
|
||||||
from .lookups import SearchLookup, Unaccent
|
from .lookups import SearchLookup, TrigramSimilar, Unaccent
|
||||||
from .signals import register_hstore_handler
|
from .signals import register_hstore_handler
|
||||||
|
|
||||||
|
|
||||||
|
@ -17,3 +17,5 @@ class PostgresConfig(AppConfig):
|
||||||
TextField.register_lookup(Unaccent)
|
TextField.register_lookup(Unaccent)
|
||||||
CharField.register_lookup(SearchLookup)
|
CharField.register_lookup(SearchLookup)
|
||||||
TextField.register_lookup(SearchLookup)
|
TextField.register_lookup(SearchLookup)
|
||||||
|
CharField.register_lookup(TrigramSimilar)
|
||||||
|
TextField.register_lookup(TrigramSimilar)
|
||||||
|
|
|
@ -60,3 +60,8 @@ class SearchLookup(SearchVectorExact):
|
||||||
self.lhs = SearchVector(self.lhs)
|
self.lhs = SearchVector(self.lhs)
|
||||||
lhs, lhs_params = super(SearchLookup, self).process_lhs(qn, connection)
|
lhs, lhs_params = super(SearchLookup, self).process_lhs(qn, connection)
|
||||||
return lhs, lhs_params
|
return lhs, lhs_params
|
||||||
|
|
||||||
|
|
||||||
|
class TrigramSimilar(PostgresSimpleLookup):
|
||||||
|
lookup_name = 'trigram_similar'
|
||||||
|
operator = '%%'
|
||||||
|
|
|
@ -40,3 +40,9 @@ class UnaccentExtension(CreateExtension):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.name = 'unaccent'
|
self.name = 'unaccent'
|
||||||
|
|
||||||
|
|
||||||
|
class TrigramExtension(CreateExtension):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.name = 'pg_trgm'
|
||||||
|
|
|
@ -185,3 +185,19 @@ class SearchRank(Func):
|
||||||
|
|
||||||
|
|
||||||
SearchVectorField.register_lookup(SearchVectorExact)
|
SearchVectorField.register_lookup(SearchVectorExact)
|
||||||
|
|
||||||
|
|
||||||
|
class TrigramBase(Func):
|
||||||
|
def __init__(self, expression, string, **extra):
|
||||||
|
if not hasattr(string, 'resolve_expression'):
|
||||||
|
string = Value(string)
|
||||||
|
super(TrigramBase, self).__init__(expression, string, output_field=FloatField(), **extra)
|
||||||
|
|
||||||
|
|
||||||
|
class TrigramSimilarity(TrigramBase):
|
||||||
|
function = 'SIMILARITY'
|
||||||
|
|
||||||
|
|
||||||
|
class TrigramDistance(TrigramBase):
|
||||||
|
function = ''
|
||||||
|
arg_joiner = ' <-> '
|
||||||
|
|
|
@ -2,6 +2,32 @@
|
||||||
PostgreSQL specific lookups
|
PostgreSQL specific lookups
|
||||||
===========================
|
===========================
|
||||||
|
|
||||||
|
Trigram similarity
|
||||||
|
==================
|
||||||
|
|
||||||
|
.. fieldlookup:: trigram_similar
|
||||||
|
|
||||||
|
.. versionadded:: 1.10
|
||||||
|
|
||||||
|
The ``trigram_similar`` lookup allows you to perform trigram lookups,
|
||||||
|
measuring the number of trigrams (three consecutive characters) shared, using a
|
||||||
|
dedicated PostgreSQL extension. A trigram lookup is given an expression and
|
||||||
|
returns results that have a similarity measurement greater than the current
|
||||||
|
similarity threshold.
|
||||||
|
|
||||||
|
To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
|
||||||
|
and activate the `pg_trgm extension
|
||||||
|
<http://www.postgresql.org/docs/current/interactive/pgtrgm.html>`_ on
|
||||||
|
PostgreSQL. You can install the extension using the
|
||||||
|
:class:`~django.contrib.postgres.operations.TrigramExtension` migration
|
||||||
|
operation.
|
||||||
|
|
||||||
|
The ``trigram_similar`` lookup can be used on
|
||||||
|
:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`::
|
||||||
|
|
||||||
|
>>> City.objects.filter(name__trigram_similar="Middlesborough")
|
||||||
|
['<City: Middlesbrough>']
|
||||||
|
|
||||||
``Unaccent``
|
``Unaccent``
|
||||||
============
|
============
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,16 @@ the ``django.contrib.postgres.operations`` module.
|
||||||
which will install the ``hstore`` extension and also immediately set up the
|
which will install the ``hstore`` extension and also immediately set up the
|
||||||
connection to interpret hstore data.
|
connection to interpret hstore data.
|
||||||
|
|
||||||
|
``TrigramExtension``
|
||||||
|
====================
|
||||||
|
|
||||||
|
.. class:: TrigramExtension()
|
||||||
|
|
||||||
|
.. versionadded:: 1.10
|
||||||
|
|
||||||
|
A subclass of :class:`~django.contrib.postgres.operations.CreateExtension`
|
||||||
|
that installs the ``pg_trgm`` extension.
|
||||||
|
|
||||||
``UnaccentExtension``
|
``UnaccentExtension``
|
||||||
=====================
|
=====================
|
||||||
|
|
||||||
|
|
|
@ -189,3 +189,58 @@ if it were an annotated ``SearchVector``::
|
||||||
[<Entry: Cheese on Toast recipes>, <Entry: Pizza recipes>]
|
[<Entry: Cheese on Toast recipes>, <Entry: Pizza recipes>]
|
||||||
|
|
||||||
.. _PostgreSQL documentation: http://www.postgresql.org/docs/current/static/textsearch-features.html#TEXTSEARCH-UPDATE-TRIGGERS
|
.. _PostgreSQL documentation: http://www.postgresql.org/docs/current/static/textsearch-features.html#TEXTSEARCH-UPDATE-TRIGGERS
|
||||||
|
|
||||||
|
Trigram similarity
|
||||||
|
==================
|
||||||
|
|
||||||
|
Another approach to searching is trigram similarity. A trigram is a group of
|
||||||
|
three consecutive characters. In addition to the :lookup:`trigram_similar`
|
||||||
|
lookup, you can use a couple of other expressions.
|
||||||
|
|
||||||
|
To use them, you need to activate the `pg_trgm extension
|
||||||
|
<http://www.postgresql.org/docs/current/interactive/pgtrgm.html>`_ on
|
||||||
|
PostgreSQL. You can install it using the
|
||||||
|
:class:`~django.contrib.postgres.operations.TrigramExtension` migration
|
||||||
|
operation.
|
||||||
|
|
||||||
|
``TrigramSimilarity``
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
.. class:: TrigramSimilarity(expression, string, **extra)
|
||||||
|
|
||||||
|
.. versionadded:: 1.10
|
||||||
|
|
||||||
|
Accepts a field name or expression, and a string or expression. Returns the
|
||||||
|
trigram similarity between the two arguments.
|
||||||
|
|
||||||
|
Usage example::
|
||||||
|
|
||||||
|
>>> from django.contrib.postgres.search import TrigramSimilarity
|
||||||
|
>>> Author.objects.create(name='Katy Stevens')
|
||||||
|
>>> Author.objects.create(name='Stephen Keats')
|
||||||
|
>>> test = 'Katie Stephens'
|
||||||
|
>>> Author.objects.annotate(
|
||||||
|
... similarity=TrigramSimilarity('name', test),
|
||||||
|
... ).filter(similarity__gt=0.3).order_by('-similarity')
|
||||||
|
[<Author: Katy Stephens>, <Author: Stephen Keats>]
|
||||||
|
|
||||||
|
``TrigramDistance``
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
.. class:: TrigramDistance(expression, string, **extra)
|
||||||
|
|
||||||
|
.. versionadded:: 1.10
|
||||||
|
|
||||||
|
Accepts a field name or expression, and a string or expression. Returns the
|
||||||
|
trigram distance between the two arguments.
|
||||||
|
|
||||||
|
Usage example::
|
||||||
|
|
||||||
|
>>> from django.contrib.postgres.search import TrigramDistance
|
||||||
|
>>> Author.objects.create(name='Katy Stevens')
|
||||||
|
>>> Author.objects.create(name='Stephen Keats')
|
||||||
|
>>> test = 'Katie Stephens'
|
||||||
|
>>> Author.objects.annotate(
|
||||||
|
... distance=TrigramDistance('name', test),
|
||||||
|
... ).filter(distance__lte=0.7).order_by('distance')
|
||||||
|
[<Author: Katy Stephens>, <Author: Stephen Keats>]
|
||||||
|
|
|
@ -33,6 +33,10 @@ search engine. You can search across multiple fields in your relational
|
||||||
database, combine the searches with other lookups, use different language
|
database, combine the searches with other lookups, use different language
|
||||||
configurations and weightings, and rank the results by relevance.
|
configurations and weightings, and rank the results by relevance.
|
||||||
|
|
||||||
|
It also now includes trigram support, using the :lookup:`trigram_similar`
|
||||||
|
lookup, and the :class:`~django.contrib.postgres.search.TrigramSimilarity` and
|
||||||
|
:class:`~django.contrib.postgres.search.TrigramDistance` expressions.
|
||||||
|
|
||||||
Minor features
|
Minor features
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
|
|
|
@ -55,11 +55,12 @@ use :lookup:`unaccented comparison <unaccent>`::
|
||||||
This shows another issue, where we are matching against a different spelling of
|
This shows another issue, where we are matching against a different spelling of
|
||||||
the name. In this case we have an asymmetry though - a search for ``Helen``
|
the name. In this case we have an asymmetry though - a search for ``Helen``
|
||||||
will pick up ``Helena`` or ``Hélène``, but not the reverse. Another option
|
will pick up ``Helena`` or ``Hélène``, but not the reverse. Another option
|
||||||
would be to use a trigram comparison, which compares sequences of letters.
|
would be to use a :lookup:`trigram_similar` comparison, which compares
|
||||||
|
sequences of letters.
|
||||||
|
|
||||||
For example::
|
For example::
|
||||||
|
|
||||||
>>> Author.objects.filter(name__unaccent__lower__trigram='Hélène')
|
>>> Author.objects.filter(name__unaccent__lower__trigram_similar='Hélène')
|
||||||
[<Author: Helen Mirren>, <Actor: Hélène Joy>]
|
[<Author: Helen Mirren>, <Actor: Hélène Joy>]
|
||||||
|
|
||||||
Now we have a different problem - the longer name of "Helena Bonham Carter"
|
Now we have a different problem - the longer name of "Helena Bonham Carter"
|
||||||
|
|
|
@ -5,12 +5,13 @@ from django.db import migrations
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from django.contrib.postgres.operations import (
|
from django.contrib.postgres.operations import (
|
||||||
CreateExtension, HStoreExtension, UnaccentExtension,
|
CreateExtension, HStoreExtension, TrigramExtension, UnaccentExtension,
|
||||||
)
|
)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from django.test import mock
|
from django.test import mock
|
||||||
CreateExtension = mock.Mock()
|
CreateExtension = mock.Mock()
|
||||||
HStoreExtension = mock.Mock()
|
HStoreExtension = mock.Mock()
|
||||||
|
TrigramExtension = mock.Mock()
|
||||||
UnaccentExtension = mock.Mock()
|
UnaccentExtension = mock.Mock()
|
||||||
|
|
||||||
|
|
||||||
|
@ -21,5 +22,6 @@ class Migration(migrations.Migration):
|
||||||
# dash in its name.
|
# dash in its name.
|
||||||
CreateExtension('uuid-ossp'),
|
CreateExtension('uuid-ossp'),
|
||||||
HStoreExtension(),
|
HStoreExtension(),
|
||||||
|
TrigramExtension(),
|
||||||
UnaccentExtension(),
|
UnaccentExtension(),
|
||||||
]
|
]
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
from django.contrib.postgres.search import TrigramDistance, TrigramSimilarity
|
||||||
|
from django.test import modify_settings
|
||||||
|
|
||||||
|
from . import PostgreSQLTestCase
|
||||||
|
from .models import CharFieldModel, TextFieldModel
|
||||||
|
|
||||||
|
|
||||||
|
@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
|
||||||
|
class TrigramTest(PostgreSQLTestCase):
|
||||||
|
Model = CharFieldModel
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def setUpTestData(cls):
|
||||||
|
cls.Model.objects.bulk_create([
|
||||||
|
cls.Model(field='Matthew'),
|
||||||
|
cls.Model(field='Cat sat on mat.'),
|
||||||
|
cls.Model(field='Dog sat on rug.'),
|
||||||
|
])
|
||||||
|
|
||||||
|
def test_trigram_search(self):
|
||||||
|
self.assertQuerysetEqual(
|
||||||
|
self.Model.objects.filter(field__trigram_similar='Mathew'),
|
||||||
|
['Matthew'],
|
||||||
|
transform=lambda instance: instance.field,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_trigram_similarity(self):
|
||||||
|
search = 'Bat sat on cat.'
|
||||||
|
self.assertQuerysetEqual(
|
||||||
|
self.Model.objects.filter(
|
||||||
|
field__trigram_similar=search,
|
||||||
|
).annotate(similarity=TrigramSimilarity('field', search)).order_by('-similarity'),
|
||||||
|
[('Cat sat on mat.', 0.625), ('Dog sat on rug.', 0.333333)],
|
||||||
|
transform=lambda instance: (instance.field, instance.similarity),
|
||||||
|
ordered=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_trigram_similarity_alternate(self):
|
||||||
|
self.assertQuerysetEqual(
|
||||||
|
self.Model.objects.annotate(
|
||||||
|
distance=TrigramDistance('field', 'Bat sat on cat.'),
|
||||||
|
).filter(distance__lte=0.7).order_by('distance'),
|
||||||
|
[('Cat sat on mat.', 0.375), ('Dog sat on rug.', 0.666667)],
|
||||||
|
transform=lambda instance: (instance.field, instance.distance),
|
||||||
|
ordered=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TrigramTextFieldTest(TrigramTest):
|
||||||
|
"""
|
||||||
|
TextField has the same behavior as CharField regarding trigram lookups.
|
||||||
|
"""
|
||||||
|
Model = TextFieldModel
|
Loading…
Reference in New Issue