我遇到了一个 ElasticSearch 问题: 当搜索一个短语在引号内包含单词分隔符和停止词“_”时,在搜索中找不到它。找到只有停止词或只有词分隔符的短语。
示例: “john_doe 在办公室” - 未找到 “约翰·多伊在办公室” - 找到 “john_doe 办公室” - 找到
我们希望找到“john_doe in office”。
//we use index analyzer:
"index_text_html": {
"tokenizer": "whitespace",
"char_filter": [
"html_strip"
],
"filter": [
"index_text_html_word_delimiter_graph_filter",
"lowercase",
"remove_duplicates"
]
}
}
"filter": {
"index_text_html_word_delimiter_graph_filter": {
"type": "word_delimiter_graph",
"catenate_words": true,
"catenate_numbers": true,
"preserve_original": true
}
//and search analyzer:
"search_text": {
"tokenizer": "whitespace",
"filter": [
"search_text_word_delimiter_graph_filter",
"stop_words_filter",
"lowercase",
"remove_duplicates"
]
}
}
"filter": {
"index_text_html_word_delimiter_graph_filter": {
"type": "word_delimiter_graph",
"catenate_words": true,
"catenate_numbers": true,
"preserve_original": true
},
"search_text_word_delimiter_graph_filter": {
"type": "word_delimiter_graph",
"catenate_all": true,
"generate_number_parts": false,
"preserve_original": true
},
"stop_words_filter": {
"type": "stop",
"remove_trailing": false
}
}
有人有什么想法吗?