是否可以使用Elastic Highlighting API突出显示“最佳匹配”? “最佳匹配”是指单词中的完全匹配,即使整个单词都被查询匹配。例如:
Dubai
duba
<b>Duba</b>i
但是问题出在我的查询中。该查询具有一些“模糊性”查询。
这是示例索引配置:
PUT /highlight_best_match
{
"settings": {
"number_of_shards": "1",
"number_of_replicas": "1",
"analysis": {
"filter": {
"language_stemmer": {
"name": "german2",
"type": "stemmer"
},
"language_stopwords": {
"type": "stop",
"stopwords": "_german_"
}
},
"char_filter": {
"ampersand_to_and": {
"type": "mapping",
"mappings": [
"&=> and "
]
}
},
"analyzer": {
"prefix_analyzer": {
"type": "custom",
"tokenizer": "edge_ngram_tokenizer",
"filter": [
"german_normalization",
"lowercase"
]
},
"match_analyzer": {
"char_filter": [
"html_strip",
"ampersand_to_and"
],
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"asciifolding",
"language_stopwords",
"language_stemmer"
]
},
"search_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"german_normalization",
"lowercase"
]
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": "2",
"max_gram": "20",
"token_chars": [
"letter",
"digit"
]
}
}
}
},
"mappings": {
"default": {
"dynamic": "false",
"properties": {
"id": {
"type": "integer"
},
"title": {
"type": "keyword",
"fields": {
"match": {
"type": "text",
"term_vector": "with_positions_offsets",
"index_options": "offsets",
"analyzer": "match_analyzer"
},
"prefix": {
"type": "text",
"term_vector": "with_positions_offsets",
"index_options": "offsets",
"analyzer": "prefix_analyzer",
"search_analyzer": "search_analyzer"
}
}
}
}
}
}
}
和一些说明该示例的数据:
POST /_bulk
{"create": {"_id": "1", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Dubai"}
{"create": {"_id": "2", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Dumai"}
{"create": {"_id": "3", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Cuba"}
{"create": {"_id": "4", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Kuba Südküste"}
{"create": {"_id": "5", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Dubai Kreuzfahrt"}
查询是
GET /highlight_best_match/_search
{
"query": {
"bool": {
"must": {
"bool": {
"should": [
{
"match": {
"title.prefix": {
"query": "duba",
"fuzziness": 1,
"boost": 1
}
}
},
{
"match": {
"title.match": {
"query": "duba",
"fuzziness": 1,
"boost": 1
}
}
}
]
}
},
"should": [
{
"match_phrase_prefix": {
"title.match": {
"query": "duba",
"boost": 5
}
}
},
{
"match": {
"title.prefix": {
"query": "duba",
"fuzziness": 0,
"boost": 3
}
}
},
{
"match": {
"title.match": {
"query": "duba",
"fuzziness": 0,
"boost": 10
}
}
}
]
}
},
"highlight": {
"encoder": "plain",
"order": "score",
"pre_tags": [
"<b>"
],
"post_tags": [
"</b>"
],
"fields": {
"title.prefix": {
"type": "fvh",
"matched_fields": [
"title.match",
"title.prefix"
]
}
}
}
}
结果是
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 11.402948,
"hits": [
{
"_index": "highlight_best_match",
"_type": "default",
"_id": "1",
"_score": 11.402948,
"_source": {
"title": "Dubai"
},
"highlight": {
"title.prefix": [
"<b>Dubai</b>"
]
}
},
{
"_index": "highlight_best_match",
"_type": "default",
"_id": "5",
"_score": 6.812179,
"_source": {
"title": "Dubai Kreuzfahrt"
},
"highlight": {
"title.prefix": [
"<b>Dubai</b> Kreuzfahrt"
]
}
},
{
"_index": "highlight_best_match",
"_type": "default",
"_id": "3",
"_score": 1.5331156,
"_source": {
"title": "Cuba"
},
"highlight": {
"title.prefix": [
"<b>Cuba</b>"
]
}
},
{
"_index": "highlight_best_match",
"_type": "default",
"_id": "4",
"_score": 1.0343978,
"_source": {
"title": "Kuba Südküste"
},
"highlight": {
"title.prefix": [
"<b>Kuba</b> Südküste"
]
}
},
{
"_index": "highlight_best_match",
"_type": "default",
"_id": "2",
"_score": 0.7896109,
"_source": {
"title": "Dumai"
},
"highlight": {
"title.prefix": [
"<b>Duma</b>i"
]
}
}
]
}
}
请查看ID = 1和ID = 5的结果。是否可以像duba
那样在ID = 2的结果中仅高亮显示<b>Duma</b>i
?
我知道我只能为确切的查询设置highlight_query
。
但无论如何,我都希望突出显示结果,但如果是完全匹配,则首选。
提前谢谢!