我使用Django 1.5和django-haystack 2.0以及弹性搜索后端。我试图通过精确的属性匹配进行搜索。但是,我得到的相似"结果即使我同时使用__exact
运算符和Exact()类。我该如何防止这种行为?
例如:
# models.py
class Person(models.Model):
name = models.TextField()
# search_indexes.py
class PersonIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
name = indexes.CharField(model_attr="name")
def get_model(self):
return Person
def index_queryset(self, using=None):
return self.get_model().objects.all()
# templates/search/indexes/people/person_text.txt
{{ object.name }}
>>> p1 = Person(name="Simon")
>>> p1.save()
>>> p2 = Person(name="Simons")
>>> p2.save()
$ ./manage.py rebuild_index
>>> person_sqs = SearchQuerySet().models(Person)
>>> person_sqs.filter(name__exact="Simons")
[<SearchResult: people.person (name=u'Simon')>
<SearchResult: people.person (name=u'Simons')>]
>>> person_sqs.filter(name=Exact("Simons", clean=True))
[<SearchResult: people.person (name=u'Simon')>
<SearchResult: people.person (name=u'Simons')>]
我只想要&#34; Simons&#34;的搜索结果 - &#34; Simon&#34;结果不应该出现。
答案 0 :(得分:4)
Python3,Django 1.10,Elasticsearch 2.4.4。
TL; DR:定义自定义标记器(不是过滤器)
详细解释
a)使用EdgeNgramField:
# search_indexes.py
class PersonIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.EdgeNgramField(document=True, use_template=True)
...
b)模板:
# templates/search/indexes/people/person_text.txt
{{ object.name }}
c)创建自定义搜索后端:
# backends.py
from django.conf import settings
from haystack.backends.elasticsearch_backend import (
ElasticsearchSearchBackend,
ElasticsearchSearchEngine,
)
class CustomElasticsearchSearchBackend(ElasticsearchSearchBackend):
def __init__(self, connection_alias, **connection_options):
super(CustomElasticsearchSearchBackend, self).__init__(
connection_alias, **connection_options)
setattr(self, 'DEFAULT_SETTINGS', settings.ELASTICSEARCH_INDEX_SETTINGS)
class CustomElasticsearchSearchEngine(ElasticsearchSearchEngine):
backend = CustomElasticsearchSearchBackend
d)定义自定义标记生成器(不过滤!):
# settings.py
HAYSTACK_CONNECTIONS = {
'default': {
'ENGINE': 'apps.persons.backends.CustomElasticsearchSearchEngine',
'URL': 'http://127.0.0.1:9200/',
'INDEX_NAME': 'haystack',
},
}
ELASTICSEARCH_INDEX_SETTINGS = {
"settings": {
"analysis": {
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "custom_ngram_tokenizer",
"filter": ["asciifolding", "lowercase"]
},
"edgengram_analyzer": {
"type": "custom",
"tokenizer": "custom_edgengram_tokenizer",
"filter": ["asciifolding", "lowercase"]
}
},
"tokenizer": {
"custom_ngram_tokenizer": {
"type": "nGram",
"min_gram": 3,
"max_gram": 12,
"token_chars": ["letter", "digit"]
},
"custom_edgengram_tokenizer": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 12,
"token_chars": ["letter", "digit"]
}
}
}
}
}
HAYSTACK_DEFAULT_OPERATOR = 'AND'
e)使用AutoQuery(更通用):
# views.py
search_value = 'Simons'
...
person_sqs = \
SearchQuerySet().models(Person).filter(
content=AutoQuery(search_value)
)
f)更改后重新索引:
$ ./manage.py rebuild_index
答案 1 :(得分:1)
我遇到了类似的问题。如果您更改干草堆弹性搜索后端的设置,如:
DEFAULT_SETTINGS = {
'settings': {
"analysis": {
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["haystack_ngram", "lowercase"]
},
"edgengram_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["haystack_edgengram", "lowercase"]
}
},
"tokenizer": {
"haystack_ngram_tokenizer": {
"type": "nGram",
"min_gram": 6,
"max_gram": 15,
},
"haystack_edgengram_tokenizer": {
"type": "edgeNGram",
"min_gram": 6,
"max_gram": 15,
"side": "front"
}
},
"filter": {
"haystack_ngram": {
"type": "nGram",
"min_gram": 6,
"max_gram": 15
},
"haystack_edgengram": {
"type": "edgeNGram",
"min_gram": 6,
"max_gram": 15
}
}
}
}
}
然后只有在查询超过6个字符时才会标记化。
如果您想要&#34; xyzsimonsxyz&#34;等结果,则需要使用ngram分析器而不是EdgeNGram,或者根据您的要求使用两者。 EdgeNGram仅从头开始生成令牌。
与NGram&#39; simons&#39;将是xyzsimonsxyz的生成标记之一,假设max_gram&gt; = 6并且您将获得预期结果,search_analyzer也需要不同,否则您将获得奇怪的结果。
如果你有大量文本
,那么索引大小可能会因为ngram而变得非常大答案 2 :(得分:-1)
不使用CharField使用EdgeNgramField。
# search_indexes.py
class PersonIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
name = indexes.EdgeNgramField(model_attr="name")
def get_model(self):
return Person
def index_queryset(self, using=None):
return self.get_model().objects.all()
而非用户过滤,用户自动填充
person_sqs = SearchQuerySet().models(Person)
person_sqs.autocomplete(name="Simons")
来源:http://django-haystack.readthedocs.org/en/v2.0.0/autocomplete.html