Fixed #1465: added support for regex lookups. Thanks, Tom Tobin.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@5555 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Jacob Kaplan-Moss 2007-06-27 18:58:10 +00:00
parent 7dc8b1a1a8
commit 24512a74be
10 changed files with 170 additions and 2 deletions

View File

@ -247,6 +247,8 @@ OPERATOR_MAPPING = {
'iexact': 'LIKE %s', 'iexact': 'LIKE %s',
'contains': 'LIKE BINARY %s', 'contains': 'LIKE BINARY %s',
'icontains': 'LIKE %s', 'icontains': 'LIKE %s',
'regex': 'REGEXP BINARY %s',
'iregex': 'REGEXP %s',
'gt': '> %s', 'gt': '> %s',
'gte': '>= %s', 'gte': '>= %s',
'lt': '< %s', 'lt': '< %s',

View File

@ -248,6 +248,8 @@ OPERATOR_MAPPING = {
'iexact': 'LIKE %s', 'iexact': 'LIKE %s',
'contains': 'LIKE BINARY %s', 'contains': 'LIKE BINARY %s',
'icontains': 'LIKE %s', 'icontains': 'LIKE %s',
'regex': 'REGEXP BINARY %s',
'iregex': 'REGEXP %s',
'gt': '> %s', 'gt': '> %s',
'gte': '>= %s', 'gte': '>= %s',
'lt': '< %s', 'lt': '< %s',

View File

@ -280,6 +280,8 @@ OPERATOR_MAPPING = {
'iexact': 'ILIKE %s', 'iexact': 'ILIKE %s',
'contains': 'LIKE %s', 'contains': 'LIKE %s',
'icontains': 'ILIKE %s', 'icontains': 'ILIKE %s',
'regex': '~ %s',
'iregex': '~* %s',
'gt': '> %s', 'gt': '> %s',
'gte': '>= %s', 'gte': '>= %s',
'lt': '< %s', 'lt': '< %s',

View File

@ -225,6 +225,8 @@ OPERATOR_MAPPING = {
'iexact': 'ILIKE %s', 'iexact': 'ILIKE %s',
'contains': 'LIKE %s', 'contains': 'LIKE %s',
'icontains': 'ILIKE %s', 'icontains': 'ILIKE %s',
'regex': '~ %s',
'iregex': '~* %s',
'gt': '> %s', 'gt': '> %s',
'gte': '>= %s', 'gte': '>= %s',
'lt': '< %s', 'lt': '< %s',

View File

@ -64,9 +64,10 @@ class DatabaseWrapper(local):
} }
kwargs.update(self.options) kwargs.update(self.options)
self.connection = Database.connect(**kwargs) self.connection = Database.connect(**kwargs)
# Register extract and date_trunc functions. # Register extract, date_trunc, and regexp functions.
self.connection.create_function("django_extract", 2, _sqlite_extract) self.connection.create_function("django_extract", 2, _sqlite_extract)
self.connection.create_function("django_date_trunc", 2, _sqlite_date_trunc) self.connection.create_function("django_date_trunc", 2, _sqlite_date_trunc)
self.connection.create_function("regexp", 2, _sqlite_regexp)
cursor = self.connection.cursor(factory=SQLiteCursorWrapper) cursor = self.connection.cursor(factory=SQLiteCursorWrapper)
cursor.row_factory = utf8rowFactory cursor.row_factory = utf8rowFactory
if settings.DEBUG: if settings.DEBUG:
@ -214,6 +215,13 @@ def _sqlite_date_trunc(lookup_type, dt):
elif lookup_type == 'day': elif lookup_type == 'day':
return "%i-%02i-%02i 00:00:00" % (dt.year, dt.month, dt.day) return "%i-%02i-%02i 00:00:00" % (dt.year, dt.month, dt.day)
def _sqlite_regexp(re_pattern, re_string):
import re
try:
return bool(re.search(re_pattern, re_string))
except:
return False
# SQLite requires LIKE statements to include an ESCAPE clause if the value # SQLite requires LIKE statements to include an ESCAPE clause if the value
# being escaped has a percent or underscore in it. # being escaped has a percent or underscore in it.
# See http://www.sqlite.org/lang_expr.html for an explanation. # See http://www.sqlite.org/lang_expr.html for an explanation.
@ -222,6 +230,8 @@ OPERATOR_MAPPING = {
'iexact': "LIKE %s ESCAPE '\\'", 'iexact': "LIKE %s ESCAPE '\\'",
'contains': "LIKE %s ESCAPE '\\'", 'contains': "LIKE %s ESCAPE '\\'",
'icontains': "LIKE %s ESCAPE '\\'", 'icontains': "LIKE %s ESCAPE '\\'",
'regex': 'REGEXP %s',
'iregex': "REGEXP '(?i)' || %s",
'gt': '> %s', 'gt': '> %s',
'gte': '>= %s', 'gte': '>= %s',
'lt': '< %s', 'lt': '< %s',

View File

@ -174,7 +174,7 @@ class Field(object):
def get_db_prep_lookup(self, lookup_type, value): def get_db_prep_lookup(self, lookup_type, value):
"Returns field's value prepared for database lookup." "Returns field's value prepared for database lookup."
if lookup_type in ('exact', 'gt', 'gte', 'lt', 'lte', 'month', 'day', 'search'): if lookup_type in ('exact', 'regex', 'iregex', 'gt', 'gte', 'lt', 'lte', 'month', 'day', 'search'):
return [value] return [value]
elif lookup_type in ('range', 'in'): elif lookup_type in ('range', 'in'):
return value return value

View File

@ -1,3 +1,4 @@
from django.conf import settings
from django.db import backend, connection, transaction from django.db import backend, connection, transaction
from django.db.models.fields import DateField, FieldDoesNotExist from django.db.models.fields import DateField, FieldDoesNotExist
from django.db.models import signals, loading from django.db.models import signals, loading
@ -22,6 +23,7 @@ QUERY_TERMS = (
'gt', 'gte', 'lt', 'lte', 'in', 'gt', 'gte', 'lt', 'lte', 'in',
'startswith', 'istartswith', 'endswith', 'iendswith', 'startswith', 'istartswith', 'endswith', 'iendswith',
'range', 'year', 'month', 'day', 'isnull', 'search', 'range', 'year', 'month', 'day', 'isnull', 'search',
'regex', 'iregex',
) )
# Size of each "chunk" for get_iterator calls. # Size of each "chunk" for get_iterator calls.
@ -797,6 +799,15 @@ def get_where_clause(lookup_type, table_prefix, field_name, value):
return "%s%s IS %sNULL" % (table_prefix, field_name, (not value and 'NOT ' or '')) return "%s%s IS %sNULL" % (table_prefix, field_name, (not value and 'NOT ' or ''))
elif lookup_type == 'search': elif lookup_type == 'search':
return backend.get_fulltext_search_sql(table_prefix + field_name) return backend.get_fulltext_search_sql(table_prefix + field_name)
elif lookup_type in ('regex', 'iregex'):
if settings.DATABASE_ENGINE == 'oracle':
if lookup_type == 'regex':
match_option = 'c'
else:
match_option = 'i'
return "REGEXP_LIKE(%s%s, %s, '%s')" % (table_prefix, field_name, cast_sql, match_option)
else:
raise NotImplementedError
raise TypeError, "Got invalid lookup_type: %s" % repr(lookup_type) raise TypeError, "Got invalid lookup_type: %s" % repr(lookup_type)
def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0): def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0):

View File

@ -1173,6 +1173,48 @@ like ``contains`` but is significantly faster due to full-text indexing.
Note this is only available in MySQL and requires direct manipulation of the Note this is only available in MySQL and requires direct manipulation of the
database to add the full-text index. database to add the full-text index.
regex
~~~~~
Case-sensitive regular expression match.
The regular expression syntax is that of the database backend in use; for the
``sqlite`` backend, the syntax is that of Python's ``re`` module.
Example::
Entry.objects.get(title__regex=r'^(An?|The) +')
SQL equivalents::
SELECT ... WHERE title REGEXP BINARY '^(An?|The) +'; -- MySQL
SELECT ... WHERE title ~ '^(An?|The) +'; -- PostgreSQL
SELECT ... WHERE title REGEXP '^(An?|The) +'; -- sqlite
Using raw strings for passing in the regular expression syntax is recommended.
Regular expression matching is not supported on the ``ado_mssql`` and
``oracle`` backends; these will raise a ``NotImplementedError``.
iregex
~~~~~~
Case-insensitive regular expression match.
Example::
Entry.objects.get(title__iregex=r'^(an?|the) +')
SQL equivalents::
SELECT ... WHERE title REGEXP '^(an?|the) +'; -- MySQL
SELECT ... WHERE title ~* '^(an?|the) +'; -- PostgreSQL
SELECT ... WHERE title REGEXP '(?i)^(an?|the) +'; -- sqlite
Default lookups are exact Default lookups are exact
------------------------- -------------------------

View File

@ -251,4 +251,98 @@ Traceback (most recent call last):
... ...
TypeError: Cannot resolve keyword 'headline__starts' into field. Choices are: id, headline, pub_date TypeError: Cannot resolve keyword 'headline__starts' into field. Choices are: id, headline, pub_date
# Create some articles with a bit more interesting headlines for testing field lookups:
>>> now = datetime.now()
>>> for a in Article.objects.all():
... a.delete()
>>> a1 = Article(pub_date=now, headline='f')
>>> a1.save()
>>> a2 = Article(pub_date=now, headline='fo')
>>> a2.save()
>>> a3 = Article(pub_date=now, headline='foo')
>>> a3.save()
>>> a4 = Article(pub_date=now, headline='fooo')
>>> a4.save()
>>> a5 = Article(pub_date=now, headline='Foo')
>>> a5.save()
# zero-or-more
>>> Article.objects.filter(headline__regex=r'fo*')
[<Article: f>, <Article: fo>, <Article: foo>, <Article: fooo>]
>>> Article.objects.filter(headline__iregex=r'fo*')
[<Article: Foo>, <Article: f>, <Article: fo>, <Article: foo>, <Article: fooo>]
# one-or-more
>>> Article.objects.filter(headline__regex=r'fo+')
[<Article: fo>, <Article: foo>, <Article: fooo>]
# wildcard
>>> Article.objects.filter(headline__regex=r'fooo?')
[<Article: foo>, <Article: fooo>]
# and some more:
>>> a6 = Article(pub_date=now, headline='bar')
>>> a6.save()
>>> a7 = Article(pub_date=now, headline='Bar')
>>> a7.save()
>>> a8 = Article(pub_date=now, headline='baz')
>>> a8.save()
>>> a9 = Article(pub_date=now, headline='baZ')
>>> a9.save()
# leading anchor
>>> Article.objects.filter(headline__regex=r'^b')
[<Article: baZ>, <Article: bar>, <Article: baz>]
>>> Article.objects.filter(headline__iregex=r'^b')
[<Article: Bar>, <Article: baZ>, <Article: bar>, <Article: baz>]
# trailing anchor
>>> Article.objects.filter(headline__regex=r'z$')
[<Article: baz>]
>>> Article.objects.filter(headline__iregex=r'z$')
[<Article: baZ>, <Article: baz>]
# character sets
>>> Article.objects.filter(headline__regex=r'ba[rz]')
[<Article: bar>, <Article: baz>]
>>> Article.objects.filter(headline__regex=r'ba[RZ]')
[<Article: baZ>]
>>> Article.objects.filter(headline__iregex=r'ba[RZ]')
[<Article: Bar>, <Article: baZ>, <Article: bar>, <Article: baz>]
# and yet more:
>>> a10 = Article(pub_date=now, headline='foobar')
>>> a10.save()
>>> a11 = Article(pub_date=now, headline='foobaz')
>>> a11.save()
>>> a12 = Article(pub_date=now, headline='FooBarBaz')
>>> a12.save()
>>> a13 = Article(pub_date=now, headline='foobarbaz')
>>> a13.save()
>>> a14 = Article(pub_date=now, headline='zoocarfaz')
>>> a14.save()
>>> a15 = Article(pub_date=now, headline='barfoobaz')
>>> a15.save()
>>> a16 = Article(pub_date=now, headline='BAZBARFOO')
>>> a16.save()
# alternation
>>> Article.objects.filter(headline__regex=r'foo(bar|baz)')
[<Article: barfoobaz>, <Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
>>> Article.objects.filter(headline__iregex=r'foo(bar|baz)')
[<Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
>>> Article.objects.filter(headline__regex=r'^foo(bar|baz)')
[<Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
# greedy matching
>>> Article.objects.filter(headline__regex=r'f.*z')
[<Article: barfoobaz>, <Article: foobarbaz>, <Article: foobaz>, <Article: zoocarfaz>]
>>> Article.objects.filter(headline__iregex=r'f.*z')
[<Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobarbaz>, <Article: foobaz>, <Article: zoocarfaz>]
# grouping and backreferences
>>> Article.objects.filter(headline__regex=r'b(.).*b\1')
[<Article: barfoobaz>, <Article: foobarbaz>]
>>> Article.objects.filter(headline__iregex=r'b(.).*b\1')
[<Article: BAZBARFOO>, <Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobarbaz>]
"""} """}

View File

@ -219,6 +219,9 @@ class Templates(unittest.TestCase):
# value will be converted to a bytestring. # value will be converted to a bytestring.
'filter-syntax18': (r'{{ var }}', {'var': UnicodeInStrClass()}, '\xc5\xa0\xc4\x90\xc4\x86\xc5\xbd\xc4\x87\xc5\xbe\xc5\xa1\xc4\x91'), 'filter-syntax18': (r'{{ var }}', {'var': UnicodeInStrClass()}, '\xc5\xa0\xc4\x90\xc4\x86\xc5\xbd\xc4\x87\xc5\xbe\xc5\xa1\xc4\x91'),
# Numbers as filter arguments should work
'filter-syntax19': ('{{ var|truncatewords:1 }}', {"var": "hello world"}, "hello ..."),
### COMMENT SYNTAX ######################################################## ### COMMENT SYNTAX ########################################################
'comment-syntax01': ("{# this is hidden #}hello", {}, "hello"), 'comment-syntax01': ("{# this is hidden #}hello", {}, "hello"),
'comment-syntax02': ("{# this is hidden #}hello{# foo #}", {}, "hello"), 'comment-syntax02': ("{# this is hidden #}hello{# foo #}", {}, "hello"),