2019-11-19 21:59:06 +08:00
|
|
|
import psycopg2
|
|
|
|
|
2019-08-20 15:54:41 +08:00
|
|
|
from django.db.models import (
|
|
|
|
CharField, Expression, Field, FloatField, Func, Lookup, TextField, Value,
|
2020-01-11 05:17:09 +08:00
|
|
|
)
|
2019-08-20 15:54:41 +08:00
|
|
|
from django.db.models.expressions import CombinedExpression
|
2019-04-19 14:39:25 +08:00
|
|
|
from django.db.models.functions import Cast, Coalesce
|
2015-06-01 05:45:03 +08:00
|
|
|
|
|
|
|
|
|
|
|
class SearchVectorExact(Lookup):
|
|
|
|
lookup_name = 'exact'
|
|
|
|
|
|
|
|
def process_rhs(self, qn, connection):
|
2020-03-04 20:33:12 +08:00
|
|
|
if not isinstance(self.rhs, (SearchQuery, CombinedSearchQuery)):
|
2015-06-01 05:45:03 +08:00
|
|
|
config = getattr(self.lhs, 'config', None)
|
|
|
|
self.rhs = SearchQuery(self.rhs, config=config)
|
2017-01-21 21:13:44 +08:00
|
|
|
rhs, rhs_params = super().process_rhs(qn, connection)
|
2015-06-01 05:45:03 +08:00
|
|
|
return rhs, rhs_params
|
|
|
|
|
|
|
|
def as_sql(self, qn, connection):
|
|
|
|
lhs, lhs_params = self.process_lhs(qn, connection)
|
|
|
|
rhs, rhs_params = self.process_rhs(qn, connection)
|
|
|
|
params = lhs_params + rhs_params
|
2020-02-25 12:10:50 +08:00
|
|
|
return '%s @@ %s' % (lhs, rhs), params
|
2015-06-01 05:45:03 +08:00
|
|
|
|
|
|
|
|
|
|
|
class SearchVectorField(Field):
|
|
|
|
|
|
|
|
def db_type(self, connection):
|
|
|
|
return 'tsvector'
|
|
|
|
|
|
|
|
|
|
|
|
class SearchQueryField(Field):
|
|
|
|
|
|
|
|
def db_type(self, connection):
|
|
|
|
return 'tsquery'
|
|
|
|
|
|
|
|
|
2020-01-11 05:17:09 +08:00
|
|
|
class SearchConfig(Expression):
|
|
|
|
def __init__(self, config):
|
|
|
|
super().__init__()
|
|
|
|
if not hasattr(config, 'resolve_expression'):
|
|
|
|
config = Value(config)
|
|
|
|
self.config = config
|
|
|
|
|
2020-02-25 12:48:07 +08:00
|
|
|
@classmethod
|
|
|
|
def from_parameter(cls, config):
|
|
|
|
if config is None or isinstance(config, cls):
|
|
|
|
return config
|
|
|
|
return cls(config)
|
|
|
|
|
2020-01-11 05:17:09 +08:00
|
|
|
def get_source_expressions(self):
|
|
|
|
return [self.config]
|
|
|
|
|
|
|
|
def set_source_expressions(self, exprs):
|
|
|
|
self.config, = exprs
|
|
|
|
|
|
|
|
def as_sql(self, compiler, connection):
|
|
|
|
sql, params = compiler.compile(self.config)
|
|
|
|
return '%s::regconfig' % sql, params
|
|
|
|
|
|
|
|
|
2017-01-19 15:39:46 +08:00
|
|
|
class SearchVectorCombinable:
|
2015-06-01 05:45:03 +08:00
|
|
|
ADD = '||'
|
|
|
|
|
2017-08-03 09:21:32 +08:00
|
|
|
def _combine(self, other, connector, reversed):
|
2020-02-05 09:51:13 +08:00
|
|
|
if not isinstance(other, SearchVectorCombinable):
|
2020-01-25 06:23:30 +08:00
|
|
|
raise TypeError(
|
|
|
|
'SearchVector can only be combined with other SearchVector '
|
|
|
|
'instances, got %s.' % type(other).__name__
|
|
|
|
)
|
2015-06-01 05:45:03 +08:00
|
|
|
if reversed:
|
|
|
|
return CombinedSearchVector(other, connector, self, self.config)
|
|
|
|
return CombinedSearchVector(self, connector, other, self.config)
|
|
|
|
|
|
|
|
|
|
|
|
class SearchVector(SearchVectorCombinable, Func):
|
|
|
|
function = 'to_tsvector'
|
2019-04-19 14:39:25 +08:00
|
|
|
arg_joiner = " || ' ' || "
|
2017-07-15 09:56:01 +08:00
|
|
|
output_field = SearchVectorField()
|
2015-06-01 05:45:03 +08:00
|
|
|
|
2020-02-25 13:08:55 +08:00
|
|
|
def __init__(self, *expressions, config=None, weight=None):
|
|
|
|
super().__init__(*expressions)
|
2020-02-25 12:48:07 +08:00
|
|
|
self.config = SearchConfig.from_parameter(config)
|
2015-06-01 05:45:03 +08:00
|
|
|
if weight is not None and not hasattr(weight, 'resolve_expression'):
|
|
|
|
weight = Value(weight)
|
|
|
|
self.weight = weight
|
|
|
|
|
|
|
|
def resolve_expression(self, query=None, allow_joins=True, reuse=None, summarize=False, for_save=False):
|
2017-01-21 21:13:44 +08:00
|
|
|
resolved = super().resolve_expression(query, allow_joins, reuse, summarize, for_save)
|
2015-06-01 05:45:03 +08:00
|
|
|
if self.config:
|
2020-01-11 05:17:09 +08:00
|
|
|
resolved.config = self.config.resolve_expression(query, allow_joins, reuse, summarize, for_save)
|
2015-06-01 05:45:03 +08:00
|
|
|
return resolved
|
|
|
|
|
|
|
|
def as_sql(self, compiler, connection, function=None, template=None):
|
2019-05-17 22:27:01 +08:00
|
|
|
clone = self.copy()
|
|
|
|
clone.set_source_expressions([
|
|
|
|
Coalesce(
|
|
|
|
expression
|
|
|
|
if isinstance(expression.output_field, (CharField, TextField))
|
|
|
|
else Cast(expression, TextField()),
|
|
|
|
Value('')
|
|
|
|
) for expression in clone.get_source_expressions()
|
|
|
|
])
|
2020-01-11 05:17:09 +08:00
|
|
|
config_sql = None
|
2015-06-01 05:45:03 +08:00
|
|
|
config_params = []
|
|
|
|
if template is None:
|
2019-05-17 22:27:01 +08:00
|
|
|
if clone.config:
|
|
|
|
config_sql, config_params = compiler.compile(clone.config)
|
2020-01-11 05:17:09 +08:00
|
|
|
template = '%(function)s(%(config)s, %(expressions)s)'
|
2015-06-01 05:45:03 +08:00
|
|
|
else:
|
2019-05-17 22:27:01 +08:00
|
|
|
template = clone.template
|
2020-01-11 05:17:09 +08:00
|
|
|
sql, params = super(SearchVector, clone).as_sql(
|
|
|
|
compiler, connection, function=function, template=template,
|
|
|
|
config=config_sql,
|
|
|
|
)
|
2015-06-01 05:45:03 +08:00
|
|
|
extra_params = []
|
2019-05-17 22:27:01 +08:00
|
|
|
if clone.weight:
|
|
|
|
weight_sql, extra_params = compiler.compile(clone.weight)
|
2015-06-01 05:45:03 +08:00
|
|
|
sql = 'setweight({}, {})'.format(sql, weight_sql)
|
|
|
|
return sql, config_params + params + extra_params
|
|
|
|
|
|
|
|
|
|
|
|
class CombinedSearchVector(SearchVectorCombinable, CombinedExpression):
|
|
|
|
def __init__(self, lhs, connector, rhs, config, output_field=None):
|
|
|
|
self.config = config
|
2017-01-21 21:13:44 +08:00
|
|
|
super().__init__(lhs, connector, rhs, output_field)
|
2015-06-01 05:45:03 +08:00
|
|
|
|
|
|
|
|
2017-01-19 15:39:46 +08:00
|
|
|
class SearchQueryCombinable:
|
2016-08-25 16:36:49 +08:00
|
|
|
BITAND = '&&'
|
|
|
|
BITOR = '||'
|
|
|
|
|
2017-08-03 09:21:32 +08:00
|
|
|
def _combine(self, other, connector, reversed):
|
2016-08-25 16:36:49 +08:00
|
|
|
if not isinstance(other, SearchQueryCombinable):
|
|
|
|
raise TypeError(
|
2020-01-25 06:23:30 +08:00
|
|
|
'SearchQuery can only be combined with other SearchQuery '
|
|
|
|
'instances, got %s.' % type(other).__name__
|
2016-08-25 16:36:49 +08:00
|
|
|
)
|
|
|
|
if reversed:
|
|
|
|
return CombinedSearchQuery(other, connector, self, self.config)
|
|
|
|
return CombinedSearchQuery(self, connector, other, self.config)
|
|
|
|
|
|
|
|
# On Combinable, these are not implemented to reduce confusion with Q. In
|
|
|
|
# this case we are actually (ab)using them to do logical combination so
|
|
|
|
# it's consistent with other usage in Django.
|
|
|
|
def __or__(self, other):
|
|
|
|
return self._combine(other, self.BITOR, False)
|
|
|
|
|
|
|
|
def __ror__(self, other):
|
|
|
|
return self._combine(other, self.BITOR, True)
|
|
|
|
|
|
|
|
def __and__(self, other):
|
|
|
|
return self._combine(other, self.BITAND, False)
|
|
|
|
|
|
|
|
def __rand__(self, other):
|
|
|
|
return self._combine(other, self.BITAND, True)
|
|
|
|
|
|
|
|
|
2020-03-13 17:20:34 +08:00
|
|
|
class SearchQuery(SearchQueryCombinable, Func):
|
2017-07-15 09:56:01 +08:00
|
|
|
output_field = SearchQueryField()
|
2018-09-18 00:03:52 +08:00
|
|
|
SEARCH_TYPES = {
|
|
|
|
'plain': 'plainto_tsquery',
|
|
|
|
'phrase': 'phraseto_tsquery',
|
|
|
|
'raw': 'to_tsquery',
|
2019-12-14 04:10:33 +08:00
|
|
|
'websearch': 'websearch_to_tsquery',
|
2018-09-18 00:03:52 +08:00
|
|
|
}
|
2015-06-01 05:45:03 +08:00
|
|
|
|
2018-09-18 00:03:52 +08:00
|
|
|
def __init__(self, value, output_field=None, *, config=None, invert=False, search_type='plain'):
|
2020-03-13 17:20:34 +08:00
|
|
|
self.function = self.SEARCH_TYPES.get(search_type)
|
|
|
|
if self.function is None:
|
|
|
|
raise ValueError("Unknown search_type argument '%s'." % search_type)
|
2020-03-04 20:33:12 +08:00
|
|
|
if not hasattr(value, 'resolve_expression'):
|
|
|
|
value = Value(value)
|
2020-03-13 17:20:34 +08:00
|
|
|
expressions = (value,)
|
2020-02-25 12:48:07 +08:00
|
|
|
self.config = SearchConfig.from_parameter(config)
|
2020-03-13 17:20:34 +08:00
|
|
|
if self.config is not None:
|
|
|
|
expressions = (self.config,) + expressions
|
2017-02-02 00:41:56 +08:00
|
|
|
self.invert = invert
|
2020-03-13 17:20:34 +08:00
|
|
|
super().__init__(*expressions, output_field=output_field)
|
2015-06-01 05:45:03 +08:00
|
|
|
|
2020-03-13 17:20:34 +08:00
|
|
|
def as_sql(self, compiler, connection, function=None, template=None):
|
|
|
|
sql, params = super().as_sql(compiler, connection, function, template)
|
2015-06-01 05:45:03 +08:00
|
|
|
if self.invert:
|
2020-03-13 17:20:34 +08:00
|
|
|
sql = '!!(%s)' % sql
|
|
|
|
return sql, params
|
2015-06-01 05:45:03 +08:00
|
|
|
|
|
|
|
def __invert__(self):
|
2020-03-13 17:20:34 +08:00
|
|
|
clone = self.copy()
|
|
|
|
clone.invert = not self.invert
|
|
|
|
return clone
|
2015-06-01 05:45:03 +08:00
|
|
|
|
2018-08-08 05:32:29 +08:00
|
|
|
def __str__(self):
|
|
|
|
result = super().__str__()
|
|
|
|
return ('~%s' % result) if self.invert else result
|
|
|
|
|
2015-06-01 05:45:03 +08:00
|
|
|
|
2016-08-25 16:36:49 +08:00
|
|
|
class CombinedSearchQuery(SearchQueryCombinable, CombinedExpression):
|
|
|
|
def __init__(self, lhs, connector, rhs, config, output_field=None):
|
|
|
|
self.config = config
|
2017-01-21 21:13:44 +08:00
|
|
|
super().__init__(lhs, connector, rhs, output_field)
|
2016-08-25 16:36:49 +08:00
|
|
|
|
2018-08-08 05:32:29 +08:00
|
|
|
def __str__(self):
|
|
|
|
return '(%s)' % super().__str__()
|
|
|
|
|
2016-08-25 16:36:49 +08:00
|
|
|
|
2015-06-01 05:45:03 +08:00
|
|
|
class SearchRank(Func):
|
|
|
|
function = 'ts_rank'
|
2017-07-15 09:56:01 +08:00
|
|
|
output_field = FloatField()
|
2015-06-01 05:45:03 +08:00
|
|
|
|
2020-03-21 05:01:26 +08:00
|
|
|
def __init__(
|
|
|
|
self, vector, query, weights=None, normalization=None,
|
|
|
|
cover_density=False,
|
|
|
|
):
|
2015-06-01 05:45:03 +08:00
|
|
|
if not hasattr(vector, 'resolve_expression'):
|
|
|
|
vector = SearchVector(vector)
|
|
|
|
if not hasattr(query, 'resolve_expression'):
|
|
|
|
query = SearchQuery(query)
|
2020-02-25 13:12:04 +08:00
|
|
|
expressions = (vector, query)
|
|
|
|
if weights is not None:
|
|
|
|
if not hasattr(weights, 'resolve_expression'):
|
|
|
|
weights = Value(weights)
|
|
|
|
expressions = (weights,) + expressions
|
2020-03-21 05:01:26 +08:00
|
|
|
if normalization is not None:
|
|
|
|
if not hasattr(normalization, 'resolve_expression'):
|
|
|
|
normalization = Value(normalization)
|
|
|
|
expressions += (normalization,)
|
|
|
|
if cover_density:
|
|
|
|
self.function = 'ts_rank_cd'
|
2020-02-25 13:12:04 +08:00
|
|
|
super().__init__(*expressions)
|
2015-06-01 05:45:03 +08:00
|
|
|
|
|
|
|
|
2019-11-19 21:59:06 +08:00
|
|
|
class SearchHeadline(Func):
|
|
|
|
function = 'ts_headline'
|
|
|
|
template = '%(function)s(%(expressions)s%(options)s)'
|
|
|
|
output_field = TextField()
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
self, expression, query, *, config=None, start_sel=None, stop_sel=None,
|
|
|
|
max_words=None, min_words=None, short_word=None, highlight_all=None,
|
|
|
|
max_fragments=None, fragment_delimiter=None,
|
|
|
|
):
|
|
|
|
if not hasattr(query, 'resolve_expression'):
|
|
|
|
query = SearchQuery(query)
|
|
|
|
options = {
|
|
|
|
'StartSel': start_sel,
|
|
|
|
'StopSel': stop_sel,
|
|
|
|
'MaxWords': max_words,
|
|
|
|
'MinWords': min_words,
|
|
|
|
'ShortWord': short_word,
|
|
|
|
'HighlightAll': highlight_all,
|
|
|
|
'MaxFragments': max_fragments,
|
|
|
|
'FragmentDelimiter': fragment_delimiter,
|
|
|
|
}
|
|
|
|
self.options = {
|
|
|
|
option: value
|
|
|
|
for option, value in options.items() if value is not None
|
|
|
|
}
|
|
|
|
expressions = (expression, query)
|
|
|
|
if config is not None:
|
|
|
|
config = SearchConfig.from_parameter(config)
|
|
|
|
expressions = (config,) + expressions
|
|
|
|
super().__init__(*expressions)
|
|
|
|
|
|
|
|
def as_sql(self, compiler, connection, function=None, template=None):
|
|
|
|
options_sql = ''
|
|
|
|
options_params = []
|
|
|
|
if self.options:
|
|
|
|
# getquoted() returns a quoted bytestring of the adapted value.
|
|
|
|
options_params.append(', '.join(
|
|
|
|
'%s=%s' % (
|
|
|
|
option,
|
|
|
|
psycopg2.extensions.adapt(value).getquoted().decode(),
|
|
|
|
) for option, value in self.options.items()
|
|
|
|
))
|
|
|
|
options_sql = ', %s'
|
|
|
|
sql, params = super().as_sql(
|
|
|
|
compiler, connection, function=function, template=template,
|
|
|
|
options=options_sql,
|
|
|
|
)
|
|
|
|
return sql, params + options_params
|
|
|
|
|
|
|
|
|
2015-06-01 05:45:03 +08:00
|
|
|
SearchVectorField.register_lookup(SearchVectorExact)
|
2015-06-06 00:37:48 +08:00
|
|
|
|
|
|
|
|
|
|
|
class TrigramBase(Func):
|
2017-08-09 01:31:59 +08:00
|
|
|
output_field = FloatField()
|
|
|
|
|
2015-06-06 00:37:48 +08:00
|
|
|
def __init__(self, expression, string, **extra):
|
|
|
|
if not hasattr(string, 'resolve_expression'):
|
|
|
|
string = Value(string)
|
2017-08-09 01:31:59 +08:00
|
|
|
super().__init__(expression, string, **extra)
|
2015-06-06 00:37:48 +08:00
|
|
|
|
|
|
|
|
2021-09-15 18:57:49 +08:00
|
|
|
class TrigramWordBase(Func):
|
|
|
|
output_field = FloatField()
|
|
|
|
|
|
|
|
def __init__(self, string, expression, **extra):
|
|
|
|
if not hasattr(string, 'resolve_expression'):
|
|
|
|
string = Value(string)
|
|
|
|
super().__init__(string, expression, **extra)
|
|
|
|
|
|
|
|
|
2015-06-06 00:37:48 +08:00
|
|
|
class TrigramSimilarity(TrigramBase):
|
|
|
|
function = 'SIMILARITY'
|
|
|
|
|
|
|
|
|
|
|
|
class TrigramDistance(TrigramBase):
|
|
|
|
function = ''
|
|
|
|
arg_joiner = ' <-> '
|
2021-09-15 18:57:49 +08:00
|
|
|
|
|
|
|
|
|
|
|
class TrigramWordDistance(TrigramWordBase):
|
|
|
|
function = ''
|
|
|
|
arg_joiner = ' <<-> '
|
|
|
|
|
|
|
|
|
|
|
|
class TrigramWordSimilarity(TrigramWordBase):
|
|
|
|
function = 'WORD_SIMILARITY'
|