我在使用Elasticsearch自定义后端在hackstack中使用同义词过滤器时遇到问题。
此时我想要做的就是为测试目的创建一个同义词。我想添加一个同义词配对单词' tricklenutz'单词'口红'。
我正在使用以下自定义haystack后端:
from django.conf import settings
from haystack.backends.elasticsearch_backend import (ElasticsearchSearchBackend,
ElasticsearchSearchEngine)
class SiteElasticBackend(ElasticsearchSearchBackend):
def __init__(self, connection_alias, **connection_options):
super(SiteElasticBackend, self).__init__(
connection_alias, **connection_options)
MY_SETTINGS = {
'settings': {
"analysis": {
"analyzer": {
"synonym_analyzer": {
"type": "custom",
"tokenizer": "lowercase",
"filter": ["synonym"]
},
"ngram_analyzer": {
"type": "custom",
"tokenizer": "lowercase",
"filter": ["haystack_ngram", "synonym"]
},
"edgengram_analyzer": {
"type": "custom",
"tokenizer": "lowercase",
"filter": ["haystack_edgengram", "synonym"]
}
},
"tokenizer": {
"haystack_ngram_tokenizer": {
"type": "nGram",
"min_gram": 3,
"max_gram": 15,
},
"haystack_edgengram_tokenizer": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 15,
"side": "front"
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms": [
"tricklenutz, lipstick"
]
},
"haystack_ngram": {
"type": "nGram",
"min_gram": 3,
"max_gram": 15
},
"haystack_edgengram": {
"type": "edgeNGram",
"min_gram": 5,
"max_gram": 15
}
}
}
}
}
setattr(self, 'DEFAULT_SETTINGS', MY_SETTINGS)
class ConfigurableElasticSearchEngine(ElasticsearchSearchEngine):
backend = SiteElasticBackend
正如您所看到的,我只是想为口红创建一个同义词。 to' tricklenutz' (在任何搜索中都没有显示的单词)。
我的settings.py
文件中有以下条目:
HAYSTACK_CONNECTIONS = {
'default': {
'ENGINE': 'search.backends.site_elasticsearch_backend.ConfigurableElasticSearchEngine',
'URL': 'http://127.0.0.1:9200/',
'INDEX_NAME': 'sitename'
},
}
以下是Brand
的search_index.py:
class BrandIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
ngram_text = indexes.EdgeNgramField()
name = indexes.NgramField(model_attr='name')
brand_name = indexes.CharField(model_attr='name')
created_date = indexes.DateTimeField(model_attr='created_date')
def get_model(self):
return Brand
def prepare(self, obj):
"""Add the content of text field from final prepared data into ngram_text field
"""
prepared_data = super(BrandIndex, self).prepare(obj)
prepared_data['ngram_text'] = prepared_data['text']
return prepared_data
def index_queryset(self, using=None):
"""Used when the entire index for model is updated."""
return Brand.objects.filter(created_date__lte=datetime.datetime.now())
以下是搜索的视图部分:
class BrandListSearchResults(ListSearchResultsViewMixin, BrandListBase):
template_name = 'search/brand/search.html'
page_template = 'search/brand/page.html'
paginate_by = 50
paginate_by_first = 50
def get_queryset(self):
return self.get_sqs().filter(text=self.search_term)
def get_context_data(self, **kwargs):
data = super(BrandListSearchResults, self).get_context_data(**kwargs)
meta = Meta(
title='All brands matching the search term %s' % self.search_term,
description='Brand search results for %s' % self.search_term
)
data['meta'] = meta
data['paginate_by'] = self.paginate_by
data['paginate_by_first'] = self.paginate_by_first
data['size_list'] = ["90","110","185"]
return data
我重新运行了我的索引,但同义词似乎没有工作。
有没有办法可以查询Elasticsearch以查看同义词是否确实存在? haystack manage命令对于使用自定义过滤器等所做的事情并不十分详细。
更新
我已经能够直接从elasticsearch查询我的设置,我看到同义词在那里:
curl -XGET 'http://localhost:9200/sitename/_settings?pretty'
{
"sitename" : {
"settings" : {
"index" : {
"creation_date" : "1427470212556",
"uuid" : "6eznekoORQKqwswTq1G24w",
"analysis" : {
"analyzer" : {
"synonym_analyzer" : {
"type" : "custom",
"filter" : [ "synonym" ],
"tokenizer" : "lowercase"
},
"ngram_analyzer" : {
"type" : "custom",
"filter" : [ "haystack_ngram", "synonym" ],
"tokenizer" : "lowercase"
},
"edgengram_analyzer" : {
"type" : "custom",
"filter" : [ "haystack_edgengram", "synonym" ],
"tokenizer" : "lowercase"
}
},
"filter" : {
"haystack_ngram" : {
"type" : "nGram",
"min_gram" : "3",
"max_gram" : "15"
},
"haystack_edgengram" : {
"type" : "edgeNGram",
"min_gram" : "5",
"max_gram" : "15"
},
"synonym" : {
"type" : "synonym",
"synonyms" : [ "tricklenutz, lipstick" ]
}
},
"tokenizer" : {
"haystack_edgengram_tokenizer" : {
"max_gram" : "15",
"min_gram" : "2",
"type" : "edgeNGram",
"side" : "front"
},
"haystack_ngram_tokenizer" : {
"type" : "nGram",
"min_gram" : "3",
"max_gram" : "15"
}
}
},
"number_of_replicas" : "1",
"number_of_shards" : "5",
"version" : {
"created" : "1040399"
}
}
}
}
}
答案 0 :(得分:2)
我注意到的第一件事是您配置了synonym_analyzer
分析器但未使用!您需要设置默认分析器或逐个字段地执行此操作(这需要对自定义后端以及扩展字段类进行其他更改; here's an example)。
我在理解如何从Django到ElasticSearch实际处理文档方面遇到了类似的挫败感。您可以结合使用ElasticSearch的HTTP API和Haystack进行一些额外的内省。我在名为show_mapping
的链接elasticstack
包中编写了一个命令,该包显示了用于创建映射的JSON。这样,您至少可以查看您的字段是否配置为使用您设置的分析器。
简短免责声明 - 我没有跟上Haystack的最新变化(在2.0或2.1之后),因此有些建议本身可能需要更新。