Fixed #1465: added support for regex lookups. Thanks, Tom Tobin.
git-svn-id: http://code.djangoproject.com/svn/django/trunk@5555 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
parent
7dc8b1a1a8
commit
24512a74be
|
@ -247,6 +247,8 @@ OPERATOR_MAPPING = {
|
|||
'iexact': 'LIKE %s',
|
||||
'contains': 'LIKE BINARY %s',
|
||||
'icontains': 'LIKE %s',
|
||||
'regex': 'REGEXP BINARY %s',
|
||||
'iregex': 'REGEXP %s',
|
||||
'gt': '> %s',
|
||||
'gte': '>= %s',
|
||||
'lt': '< %s',
|
||||
|
|
|
@ -248,6 +248,8 @@ OPERATOR_MAPPING = {
|
|||
'iexact': 'LIKE %s',
|
||||
'contains': 'LIKE BINARY %s',
|
||||
'icontains': 'LIKE %s',
|
||||
'regex': 'REGEXP BINARY %s',
|
||||
'iregex': 'REGEXP %s',
|
||||
'gt': '> %s',
|
||||
'gte': '>= %s',
|
||||
'lt': '< %s',
|
||||
|
|
|
@ -280,6 +280,8 @@ OPERATOR_MAPPING = {
|
|||
'iexact': 'ILIKE %s',
|
||||
'contains': 'LIKE %s',
|
||||
'icontains': 'ILIKE %s',
|
||||
'regex': '~ %s',
|
||||
'iregex': '~* %s',
|
||||
'gt': '> %s',
|
||||
'gte': '>= %s',
|
||||
'lt': '< %s',
|
||||
|
|
|
@ -225,6 +225,8 @@ OPERATOR_MAPPING = {
|
|||
'iexact': 'ILIKE %s',
|
||||
'contains': 'LIKE %s',
|
||||
'icontains': 'ILIKE %s',
|
||||
'regex': '~ %s',
|
||||
'iregex': '~* %s',
|
||||
'gt': '> %s',
|
||||
'gte': '>= %s',
|
||||
'lt': '< %s',
|
||||
|
|
|
@ -64,9 +64,10 @@ class DatabaseWrapper(local):
|
|||
}
|
||||
kwargs.update(self.options)
|
||||
self.connection = Database.connect(**kwargs)
|
||||
# Register extract and date_trunc functions.
|
||||
# Register extract, date_trunc, and regexp functions.
|
||||
self.connection.create_function("django_extract", 2, _sqlite_extract)
|
||||
self.connection.create_function("django_date_trunc", 2, _sqlite_date_trunc)
|
||||
self.connection.create_function("regexp", 2, _sqlite_regexp)
|
||||
cursor = self.connection.cursor(factory=SQLiteCursorWrapper)
|
||||
cursor.row_factory = utf8rowFactory
|
||||
if settings.DEBUG:
|
||||
|
@ -214,6 +215,13 @@ def _sqlite_date_trunc(lookup_type, dt):
|
|||
elif lookup_type == 'day':
|
||||
return "%i-%02i-%02i 00:00:00" % (dt.year, dt.month, dt.day)
|
||||
|
||||
def _sqlite_regexp(re_pattern, re_string):
|
||||
import re
|
||||
try:
|
||||
return bool(re.search(re_pattern, re_string))
|
||||
except:
|
||||
return False
|
||||
|
||||
# SQLite requires LIKE statements to include an ESCAPE clause if the value
|
||||
# being escaped has a percent or underscore in it.
|
||||
# See http://www.sqlite.org/lang_expr.html for an explanation.
|
||||
|
@ -222,6 +230,8 @@ OPERATOR_MAPPING = {
|
|||
'iexact': "LIKE %s ESCAPE '\\'",
|
||||
'contains': "LIKE %s ESCAPE '\\'",
|
||||
'icontains': "LIKE %s ESCAPE '\\'",
|
||||
'regex': 'REGEXP %s',
|
||||
'iregex': "REGEXP '(?i)' || %s",
|
||||
'gt': '> %s',
|
||||
'gte': '>= %s',
|
||||
'lt': '< %s',
|
||||
|
|
|
@ -174,7 +174,7 @@ class Field(object):
|
|||
|
||||
def get_db_prep_lookup(self, lookup_type, value):
|
||||
"Returns field's value prepared for database lookup."
|
||||
if lookup_type in ('exact', 'gt', 'gte', 'lt', 'lte', 'month', 'day', 'search'):
|
||||
if lookup_type in ('exact', 'regex', 'iregex', 'gt', 'gte', 'lt', 'lte', 'month', 'day', 'search'):
|
||||
return [value]
|
||||
elif lookup_type in ('range', 'in'):
|
||||
return value
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
from django.conf import settings
|
||||
from django.db import backend, connection, transaction
|
||||
from django.db.models.fields import DateField, FieldDoesNotExist
|
||||
from django.db.models import signals, loading
|
||||
|
@ -22,6 +23,7 @@ QUERY_TERMS = (
|
|||
'gt', 'gte', 'lt', 'lte', 'in',
|
||||
'startswith', 'istartswith', 'endswith', 'iendswith',
|
||||
'range', 'year', 'month', 'day', 'isnull', 'search',
|
||||
'regex', 'iregex',
|
||||
)
|
||||
|
||||
# Size of each "chunk" for get_iterator calls.
|
||||
|
@ -797,6 +799,15 @@ def get_where_clause(lookup_type, table_prefix, field_name, value):
|
|||
return "%s%s IS %sNULL" % (table_prefix, field_name, (not value and 'NOT ' or ''))
|
||||
elif lookup_type == 'search':
|
||||
return backend.get_fulltext_search_sql(table_prefix + field_name)
|
||||
elif lookup_type in ('regex', 'iregex'):
|
||||
if settings.DATABASE_ENGINE == 'oracle':
|
||||
if lookup_type == 'regex':
|
||||
match_option = 'c'
|
||||
else:
|
||||
match_option = 'i'
|
||||
return "REGEXP_LIKE(%s%s, %s, '%s')" % (table_prefix, field_name, cast_sql, match_option)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
raise TypeError, "Got invalid lookup_type: %s" % repr(lookup_type)
|
||||
|
||||
def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0):
|
||||
|
|
|
@ -1173,6 +1173,48 @@ like ``contains`` but is significantly faster due to full-text indexing.
|
|||
Note this is only available in MySQL and requires direct manipulation of the
|
||||
database to add the full-text index.
|
||||
|
||||
regex
|
||||
~~~~~
|
||||
|
||||
Case-sensitive regular expression match.
|
||||
|
||||
The regular expression syntax is that of the database backend in use; for the
|
||||
``sqlite`` backend, the syntax is that of Python's ``re`` module.
|
||||
|
||||
Example::
|
||||
|
||||
Entry.objects.get(title__regex=r'^(An?|The) +')
|
||||
|
||||
SQL equivalents::
|
||||
|
||||
SELECT ... WHERE title REGEXP BINARY '^(An?|The) +'; -- MySQL
|
||||
|
||||
SELECT ... WHERE title ~ '^(An?|The) +'; -- PostgreSQL
|
||||
|
||||
SELECT ... WHERE title REGEXP '^(An?|The) +'; -- sqlite
|
||||
|
||||
Using raw strings for passing in the regular expression syntax is recommended.
|
||||
|
||||
Regular expression matching is not supported on the ``ado_mssql`` and
|
||||
``oracle`` backends; these will raise a ``NotImplementedError``.
|
||||
|
||||
iregex
|
||||
~~~~~~
|
||||
|
||||
Case-insensitive regular expression match.
|
||||
|
||||
Example::
|
||||
|
||||
Entry.objects.get(title__iregex=r'^(an?|the) +')
|
||||
|
||||
SQL equivalents::
|
||||
|
||||
SELECT ... WHERE title REGEXP '^(an?|the) +'; -- MySQL
|
||||
|
||||
SELECT ... WHERE title ~* '^(an?|the) +'; -- PostgreSQL
|
||||
|
||||
SELECT ... WHERE title REGEXP '(?i)^(an?|the) +'; -- sqlite
|
||||
|
||||
Default lookups are exact
|
||||
-------------------------
|
||||
|
||||
|
|
|
@ -251,4 +251,98 @@ Traceback (most recent call last):
|
|||
...
|
||||
TypeError: Cannot resolve keyword 'headline__starts' into field. Choices are: id, headline, pub_date
|
||||
|
||||
# Create some articles with a bit more interesting headlines for testing field lookups:
|
||||
>>> now = datetime.now()
|
||||
>>> for a in Article.objects.all():
|
||||
... a.delete()
|
||||
>>> a1 = Article(pub_date=now, headline='f')
|
||||
>>> a1.save()
|
||||
>>> a2 = Article(pub_date=now, headline='fo')
|
||||
>>> a2.save()
|
||||
>>> a3 = Article(pub_date=now, headline='foo')
|
||||
>>> a3.save()
|
||||
>>> a4 = Article(pub_date=now, headline='fooo')
|
||||
>>> a4.save()
|
||||
>>> a5 = Article(pub_date=now, headline='Foo')
|
||||
>>> a5.save()
|
||||
|
||||
# zero-or-more
|
||||
>>> Article.objects.filter(headline__regex=r'fo*')
|
||||
[<Article: f>, <Article: fo>, <Article: foo>, <Article: fooo>]
|
||||
>>> Article.objects.filter(headline__iregex=r'fo*')
|
||||
[<Article: Foo>, <Article: f>, <Article: fo>, <Article: foo>, <Article: fooo>]
|
||||
|
||||
# one-or-more
|
||||
>>> Article.objects.filter(headline__regex=r'fo+')
|
||||
[<Article: fo>, <Article: foo>, <Article: fooo>]
|
||||
|
||||
# wildcard
|
||||
>>> Article.objects.filter(headline__regex=r'fooo?')
|
||||
[<Article: foo>, <Article: fooo>]
|
||||
|
||||
# and some more:
|
||||
>>> a6 = Article(pub_date=now, headline='bar')
|
||||
>>> a6.save()
|
||||
>>> a7 = Article(pub_date=now, headline='Bar')
|
||||
>>> a7.save()
|
||||
>>> a8 = Article(pub_date=now, headline='baz')
|
||||
>>> a8.save()
|
||||
>>> a9 = Article(pub_date=now, headline='baZ')
|
||||
>>> a9.save()
|
||||
|
||||
# leading anchor
|
||||
>>> Article.objects.filter(headline__regex=r'^b')
|
||||
[<Article: baZ>, <Article: bar>, <Article: baz>]
|
||||
>>> Article.objects.filter(headline__iregex=r'^b')
|
||||
[<Article: Bar>, <Article: baZ>, <Article: bar>, <Article: baz>]
|
||||
|
||||
# trailing anchor
|
||||
>>> Article.objects.filter(headline__regex=r'z$')
|
||||
[<Article: baz>]
|
||||
>>> Article.objects.filter(headline__iregex=r'z$')
|
||||
[<Article: baZ>, <Article: baz>]
|
||||
|
||||
# character sets
|
||||
>>> Article.objects.filter(headline__regex=r'ba[rz]')
|
||||
[<Article: bar>, <Article: baz>]
|
||||
>>> Article.objects.filter(headline__regex=r'ba[RZ]')
|
||||
[<Article: baZ>]
|
||||
>>> Article.objects.filter(headline__iregex=r'ba[RZ]')
|
||||
[<Article: Bar>, <Article: baZ>, <Article: bar>, <Article: baz>]
|
||||
|
||||
# and yet more:
|
||||
>>> a10 = Article(pub_date=now, headline='foobar')
|
||||
>>> a10.save()
|
||||
>>> a11 = Article(pub_date=now, headline='foobaz')
|
||||
>>> a11.save()
|
||||
>>> a12 = Article(pub_date=now, headline='FooBarBaz')
|
||||
>>> a12.save()
|
||||
>>> a13 = Article(pub_date=now, headline='foobarbaz')
|
||||
>>> a13.save()
|
||||
>>> a14 = Article(pub_date=now, headline='zoocarfaz')
|
||||
>>> a14.save()
|
||||
>>> a15 = Article(pub_date=now, headline='barfoobaz')
|
||||
>>> a15.save()
|
||||
>>> a16 = Article(pub_date=now, headline='BAZBARFOO')
|
||||
>>> a16.save()
|
||||
|
||||
# alternation
|
||||
>>> Article.objects.filter(headline__regex=r'foo(bar|baz)')
|
||||
[<Article: barfoobaz>, <Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
|
||||
>>> Article.objects.filter(headline__iregex=r'foo(bar|baz)')
|
||||
[<Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
|
||||
>>> Article.objects.filter(headline__regex=r'^foo(bar|baz)')
|
||||
[<Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
|
||||
|
||||
# greedy matching
|
||||
>>> Article.objects.filter(headline__regex=r'f.*z')
|
||||
[<Article: barfoobaz>, <Article: foobarbaz>, <Article: foobaz>, <Article: zoocarfaz>]
|
||||
>>> Article.objects.filter(headline__iregex=r'f.*z')
|
||||
[<Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobarbaz>, <Article: foobaz>, <Article: zoocarfaz>]
|
||||
|
||||
# grouping and backreferences
|
||||
>>> Article.objects.filter(headline__regex=r'b(.).*b\1')
|
||||
[<Article: barfoobaz>, <Article: foobarbaz>]
|
||||
>>> Article.objects.filter(headline__iregex=r'b(.).*b\1')
|
||||
[<Article: BAZBARFOO>, <Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobarbaz>]
|
||||
"""}
|
||||
|
|
|
@ -219,6 +219,9 @@ class Templates(unittest.TestCase):
|
|||
# value will be converted to a bytestring.
|
||||
'filter-syntax18': (r'{{ var }}', {'var': UnicodeInStrClass()}, '\xc5\xa0\xc4\x90\xc4\x86\xc5\xbd\xc4\x87\xc5\xbe\xc5\xa1\xc4\x91'),
|
||||
|
||||
# Numbers as filter arguments should work
|
||||
'filter-syntax19': ('{{ var|truncatewords:1 }}', {"var": "hello world"}, "hello ..."),
|
||||
|
||||
### COMMENT SYNTAX ########################################################
|
||||
'comment-syntax01': ("{# this is hidden #}hello", {}, "hello"),
|
||||
'comment-syntax02': ("{# this is hidden #}hello{# foo #}", {}, "hello"),
|
||||
|
|
Loading…
Reference in New Issue