Fixed #23423 -- Added unaccent lookup in django.contrib.postgres

This commit is contained in:
Thomas Chaumeny 2014-09-05 22:53:11 +02:00 committed by Tim Graham
parent 47789410db
commit 17fe0bd808
12 changed files with 163 additions and 4 deletions

View File

@ -642,6 +642,7 @@ answer newbie questions, and generally made Django that much better:
Terry Huang <terryh.tp@gmail.com> Terry Huang <terryh.tp@gmail.com>
thebjorn <bp@datakortet.no> thebjorn <bp@datakortet.no>
Thejaswi Puthraya <thejaswi.puthraya@gmail.com> Thejaswi Puthraya <thejaswi.puthraya@gmail.com>
Thomas Chaumeny <t.chaumeny@gmail.com>
Thomas Güttler <hv@tbz-pariv.de> Thomas Güttler <hv@tbz-pariv.de>
Thomas Kerpe <thomas@kerpe.net> Thomas Kerpe <thomas@kerpe.net>
Thomas Sorrel Thomas Sorrel

View File

@ -1,7 +1,9 @@
from django.apps import AppConfig from django.apps import AppConfig
from django.db.backends.signals import connection_created from django.db.backends.signals import connection_created
from django.db.models import CharField, TextField
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from .lookups import Unaccent
from .signals import register_hstore_handler from .signals import register_hstore_handler
@ -11,3 +13,5 @@ class PostgresConfig(AppConfig):
def ready(self): def ready(self):
connection_created.connect(register_hstore_handler) connection_created.connect(register_hstore_handler)
CharField.register_lookup(Unaccent)
TextField.register_lookup(Unaccent)

View File

@ -0,0 +1,10 @@
from django.db.models import Transform
class Unaccent(Transform):
bilateral = True
lookup_name = 'unaccent'
def as_postgresql(self, compiler, connection):
lhs, params = compiler.compile(self.lhs)
return "UNACCENT(%s)" % lhs, params

View File

@ -32,3 +32,9 @@ class HStoreExtension(CreateExtension):
# extension is installed, a subsequent data migration would use the # extension is installed, a subsequent data migration would use the
# same connection # same connection
register_hstore_handler(schema_editor.connection) register_hstore_handler(schema_editor.connection)
class UnaccentExtension(CreateExtension):
def __init__(self):
self.name = 'unaccent'

View File

@ -26,5 +26,6 @@ a number of PostgreSQL specific data types.
fields fields
forms forms
lookups
operations operations
validators validators

View File

@ -0,0 +1,36 @@
===========================
PostgreSQL specific lookups
===========================
Unaccent
========
.. fieldlookup:: unaccent
The ``unaccent`` lookup allows you to perform accent-insensitive lookups using
a dedicated PostgreSQL extension.
This lookup is implemented using :class:`~django.db.models.Transform`, so it
can be chained with other lookup functions. To use it, you need to add
``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS` and activate
the `unaccent extension on PostgreSQL`_. The
:class:`~django.contrib.postgres.operations.UnaccentExtension` migration
operation is available if you want to perform this activation using migrations).
.. _unaccent extension on PostgreSQL: http://www.postgresql.org/docs/current/interactive/unaccent.html
The ``unaccent`` lookup can be used on
:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`::
>>> City.objects.filter(name__unaccent="México")
['<City: Mexico>']
>>> User.objects.filter(first_name__unaccent__startswith="Jerem")
['<User: Jeremy>', '<User: Jérémy>', '<User: Jérémie>', '<User: Jeremie>']
.. warning::
``unaccent`` lookups should perform fine in most use cases. However, queries
using this filter will generally perform full table scans, which can be slow
on large tables. In those cases, using dedicated full text indexing tools
might be appropriate.

View File

@ -25,3 +25,11 @@ HStoreExtension
A subclass of :class:`~django.contrib.postgres.operations.CreateExtension` A subclass of :class:`~django.contrib.postgres.operations.CreateExtension`
which will install the ``hstore`` extension and also immediately set up the which will install the ``hstore`` extension and also immediately set up the
connection to interpret hstore data. connection to interpret hstore data.
UnaccentExtension
-----------------
.. class:: UnaccentExtension()
A subclass of :class:`~django.contrib.postgres.operations.CreateExtension`
which will install the ``unaccent`` extension.

View File

@ -45,9 +45,10 @@ New PostgreSQL specific functionality
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Django now has a module with extensions for PostgreSQL specific features, such Django now has a module with extensions for PostgreSQL specific features, such
as :class:`~django.contrib.postgres.fields.ArrayField` and as :class:`~django.contrib.postgres.fields.ArrayField`,
:class:`~django.contrib.postgres.fields.HStoreField`. A full breakdown of the :class:`~django.contrib.postgres.fields.HStoreField`, and :lookup:`unaccent`
features is available :doc:`in the documentation</ref/contrib/postgres/index>`. lookup. A full breakdown of the features is available :doc:`in the
documentation </ref/contrib/postgres/index>`.
New data types New data types
~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import unicode_literals from __future__ import unicode_literals
from django.contrib.postgres.operations import HStoreExtension from django.contrib.postgres.operations import HStoreExtension, UnaccentExtension
from django.db import migrations from django.db import migrations
@ -12,4 +12,5 @@ class Migration(migrations.Migration):
operations = [ operations = [
HStoreExtension(), HStoreExtension(),
UnaccentExtension(),
] ]

View File

@ -73,4 +73,22 @@ class Migration(migrations.Migration):
}, },
bases=(models.Model,), bases=(models.Model,),
), ),
migrations.CreateModel(
name='CharFieldModel',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('field', models.CharField(max_length=16)),
],
options=None,
bases=None,
),
migrations.CreateModel(
name='TextFieldModel',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('field', models.TextField()),
],
options=None,
bases=None,
),
] ]

View File

@ -24,3 +24,11 @@ class NestedIntegerArrayModel(models.Model):
class HStoreModel(models.Model): class HStoreModel(models.Model):
field = HStoreField(blank=True, null=True) field = HStoreField(blank=True, null=True)
class CharFieldModel(models.Model):
field = models.CharField(max_length=16)
class TextFieldModel(models.Model):
field = models.TextField()

View File

@ -0,0 +1,65 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import unittest
from django.db import connection
from django.test import TestCase, modify_settings
from .models import CharFieldModel, TextFieldModel
@unittest.skipUnless(connection.vendor == 'postgresql', 'PostgreSQL required')
@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
class UnaccentTest(TestCase):
Model = CharFieldModel
def setUp(self):
self.Model.objects.bulk_create([
self.Model(field="àéÖ"),
self.Model(field="aeO"),
self.Model(field="aeo"),
])
def test_unaccent(self):
self.assertQuerysetEqual(
self.Model.objects.filter(field__unaccent="aeO"),
["àéÖ", "aeO"],
transform=lambda instance: instance.field,
ordered=False
)
def test_unaccent_chained(self):
"""
Check that unaccent can be used chained with a lookup (which should be
the case since unaccent implements the Transform API)
"""
self.assertQuerysetEqual(
self.Model.objects.filter(field__unaccent__iexact="aeO"),
["àéÖ", "aeO", "aeo"],
transform=lambda instance: instance.field,
ordered=False
)
self.assertQuerysetEqual(
self.Model.objects.filter(field__unaccent__endswith="éÖ"),
["àéÖ", "aeO"],
transform=lambda instance: instance.field,
ordered=False
)
def test_unaccent_accentuated_needle(self):
self.assertQuerysetEqual(
self.Model.objects.filter(field__unaccent="aéÖ"),
["àéÖ", "aeO"],
transform=lambda instance: instance.field,
ordered=False
)
class UnaccentTextFieldTest(UnaccentTest):
"""
TextField should have the exact same behavior as CharField
regarding unaccent lookups.
"""
Model = TextFieldModel