我目前正在尝试创建一个分析器来匹配查询的一部分。主要用例是“3D 乳房 X 线照片”这个术语,出于某种原因,使用下面的自动完成分析器,不会产生任何结果。删除“operator”:“AND”选项后,elastic 开始返回结果,但由于某种原因,预期的结果仍然得分较低。
这是我的索引的设置和映射:
映射:
{
"index": {
"properties": {
"code": {
"type": "text"
},
"type": {
"type": "text"
},
"term": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "index_search"
}
}
}
}
设置:
{
"index" : {
"settings" : {
"index" : {
"number_of_shards" : "5",
"provided_name" : "index",
"creation_date" : ".......",
"analysis" : {
"filter" : {
"case_transition_filter" : {
"split_on_numerics" : "true",
"type" : "word_delimiter",
"preserve_original" : "true",
"stem_english_possessive" : "false"
},
"autocomplete_filter" : {
"type" : "edge_ngram",
"min_gram" : "2",
"max_gram" : "15"
},
"hyphen-filter" : {
"pattern" : "-",
"type" : "pattern_replace",
"replacement" : " "
}
},
"analyzer" : {
"autocomplete" : {
"filter" : [ "case_transition_filter", "lowercase", "hyphen-filter", "autocomplete_filter" ],
"type" : "custom",
"tokenizer" : "keyword"
},
"index_search" : {
"type" : "standard"
}
}
},
"number_of_replicas" : "1",
"uuid" : ".....g",
"version" : {
"created" : "..."
}
}
}
}
}
如您所见,我使用了两种不同的分析器 - 一种用于索引的自动完成分析器和一种用于搜索的标准分析器。
从我的后端,我使用这两个包含在 bool 查询中的匹配查询来访问弹性索引:
{
"bool" : {
"should" : [
{
"match" : {
"term" : {
"query" : "3d mammogram",
"operator" : "AND",
"analyzer" : "keyword",
"fuzziness" : "1",
"prefix_length" : 1,
"max_expansions" : 50,
"fuzzy_transpositions" : true,
"lenient" : false,
"zero_terms_query" : "NONE",
"auto_generate_synonyms_phrase_query" : true,
"boost" : 2.0
}
}
},
{
"match" : {
"term" : {
"query" : "3d mammogram",
"operator" : "AND",
"fuzziness" : "1",
"prefix_length" : 1,
"max_expansions" : 50,
"fuzzy_transpositions" : true,
"lenient" : false,
"zero_terms_query" : "NONE",
"auto_generate_synonyms_phrase_query" : true,
"boost" : 1.0
}
}
}
],
"adjust_pure_negative" : true,
"minimum_should_match" : "1",
"boost" : 1.0
}
}
这样的两个查询都不会产生结果,但是从第二个查询中删除“运算符”:“AND”后,我开始获得良好的结果,但不是我期望的结果。 以下是第二个查询的结果:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 93,
"max_score" : 20.951433,
"hits" : [
{
"_index" : "index",
"_type" : "index",
"_id" : ".....",
"_score" : 20.951433,
"_source" : {
"id" : null,
"careNeedCode" : "...",
"careNeedType" : "...",
"term" : "Routine mammogram"
}
},
{
"_index" : "...",
"_type" : "...",
"_id" : "...",
"_score" : 19.059473,
"_source" : {
"id" : null,
"careNeedCode" : "...",
"careNeedType" : "...",
"term" : "Mammogram"
}
},
{
"_index" : "....",
"_type" : "...",
"_id" : "...",
"_score" : 18.515629,
"_source" : {
"id" : null,
"careNeedCode" : "...",
"careNeedType" : "...",
"term" : "Screening mammogram"
}
},
{
"_index" : "...",
"_type" : "search-term",
"_id" : "....",
"_score" : 18.515629,
"_source" : {
"id" : null,
"careNeedCode" : "...",
"careNeedType" : "treatment procedures",
"term" : "Diagnostic mammogram"
}
},
{
"_index" : "....",
"_type" : "...",
"_id" : "...",
"_score" : 18.515629,
"_source" : {
"id" : null,
"careNeedCode" : "...",
"careNeedType" : "...",
"term" : "Digital mammogram"
}
},
{
"_index" : "...",
"_type" : "...",
"_id" : "...",
"_score" : 18.480751,
"_source" : {
"id" : null,
"careNeedCode" : "...",
"careNeedType" : "...",
"term" : "Screening 3D mammogram"
}
},
{
"_index" : "...",
"_type" : "...",
"_id" : "...",
"_score" : 18.376223,
"_source" : {
"id" : null,
"careNeedCode" : "...",
"careNeedType" : "t...",
"term" : "Diagnostic 3D mammogram"
}
},
{
"_index" : "...",
"_type" : "...",
"_id" : "...",
"_score" : 17.930023,
"_source" : {
"id" : null,
"careNeedCode" : "...",
"careNeedType" : "...",
"term" : "Mammography"
}
},
{
"_index" : "...",
"_type" : "...",
"_id" : "....",
"_score" : 17.287262,
"_source" : {
"id" : null,
"careNeedCode" : "...",
"careNeedType" : "...",
"term" : "Screening mammography"
}
},
{
"_index" : "....",
"_type" : "...",
"_id" : "...",
"_score" : 17.287262,
"_source" : {
"id" : null,
"careNeedCode" : "...",
"careNeedType" : "...",
"term" : "Abnormal mammography"
}
}
]
}
}
如您所见,包含“3d 乳房 X 光检查”的结果远低于其中仅包含“乳房 X 光检查”的结果。我不确定我在这里缺少什么。
答案 0 :(得分:1)
根据您的索引映射和设置,为 "Screening 3D mammogram"
生成的令牌将是
{
"tokens": [
{
"token": "sc",
"start_offset": 0,
"end_offset": 22,
"type": "word",
"position": 0
},
{
"token": "scr",
"start_offset": 0,
"end_offset": 22,
"type": "word",
"position": 0
},
{
"token": "scre",
"start_offset": 0,
"end_offset": 22,
"type": "word",
"position": 0
},
{
"token": "scree",
"start_offset": 0,
"end_offset": 22,
"type": "word",
"position": 0
},
{
"token": "screen",
"start_offset": 0,
"end_offset": 22,
"type": "word",
"position": 0
},
{
"token": "screeni",
"start_offset": 0,
"end_offset": 22,
"type": "word",
"position": 0
},
{
"token": "screenin",
"start_offset": 0,
"end_offset": 22,
"type": "word",
"position": 0
},
{
"token": "screening",
"start_offset": 0,
"end_offset": 22,
"type": "word",
"position": 0
},
{
"token": "screening ",
"start_offset": 0,
"end_offset": 22,
"type": "word",
"position": 0
},
{
"token": "screening 3",
"start_offset": 0,
"end_offset": 22,
"type": "word",
"position": 0
},
{
"token": "screening 3d",
"start_offset": 0,
"end_offset": 22,
"type": "word",
"position": 0
},
{
"token": "screening 3d ",
"start_offset": 0,
"end_offset": 22,
"type": "word",
"position": 0
},
{
"token": "screening 3d m",
"start_offset": 0,
"end_offset": 22,
"type": "word",
"position": 0
},
{
"token": "screening 3d ma",
"start_offset": 0,
"end_offset": 22,
"type": "word",
"position": 0
},
{
"token": "sc",
"start_offset": 0,
"end_offset": 9,
"type": "word",
"position": 0
},
{
"token": "scr",
"start_offset": 0,
"end_offset": 9,
"type": "word",
"position": 0
},
{
"token": "scre",
"start_offset": 0,
"end_offset": 9,
"type": "word",
"position": 0
},
{
"token": "scree",
"start_offset": 0,
"end_offset": 9,
"type": "word",
"position": 0
},
{
"token": "screen",
"start_offset": 0,
"end_offset": 9,
"type": "word",
"position": 0
},
{
"token": "screeni",
"start_offset": 0,
"end_offset": 9,
"type": "word",
"position": 0
},
{
"token": "screenin",
"start_offset": 0,
"end_offset": 9,
"type": "word",
"position": 0
},
{
"token": "screening",
"start_offset": 0,
"end_offset": 9,
"type": "word",
"position": 0
},
{
"token": "ma",
"start_offset": 13,
"end_offset": 22,
"type": "word",
"position": 3
},
{
"token": "mam",
"start_offset": 13,
"end_offset": 22,
"type": "word",
"position": 3
},
{
"token": "mamm",
"start_offset": 13,
"end_offset": 22,
"type": "word",
"position": 3
},
{
"token": "mammo",
"start_offset": 13,
"end_offset": 22,
"type": "word",
"position": 3
},
{
"token": "mammog",
"start_offset": 13,
"end_offset": 22,
"type": "word",
"position": 3
},
{
"token": "mammogr",
"start_offset": 13,
"end_offset": 22,
"type": "word",
"position": 3
},
{
"token": "mammogra",
"start_offset": 13,
"end_offset": 22,
"type": "word",
"position": 3
},
{
"token": "mammogram",
"start_offset": 13,
"end_offset": 22,
"type": "word",
"position": 3
}
]
}
没有为 3d
生成令牌。这是因为您已将 "tokenizer" : "keyword"
用于 autocomplete
分析器。您需要修改索引映射并将 tokenizer
从 keyword
更改为 standard
修改后的索引映射为
"analyzer" : {
"autocomplete" : {
"filter" : [ "case_transition_filter", "lowercase", "hyphen-filter", "autocomplete_filter" ],
"type" : "custom",
"tokenizer" : "standard" // note this
},
您需要使用这个新的索引映射重新索引数据。
添加包含索引数据、索引映射、搜索查询和搜索结果的工作示例
索引映射:
{
"settings": {
"analysis": {
"filter": {
"case_transition_filter": {
"split_on_numerics": "true",
"type": "word_delimiter",
"preserve_original": "true",
"stem_english_possessive": "false"
},
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "2",
"max_gram": "15"
},
"hyphen-filter": {
"pattern": "-",
"type": "pattern_replace",
"replacement": " "
}
},
"analyzer": {
"autocomplete": {
"filter": [
"case_transition_filter",
"lowercase",
"hyphen-filter",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard" // note this
},
"search_term_search": {
"type": "standard"
}
}
},
"max_ngram_diff": 20
},
"mappings": {
"properties": {
"term": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "search_term_search"
}
}
}
}
生成的令牌将包括 "3d"
和 "mammogram"
。
索引数据:
{
"term": "Screening mammogram"
}
{
"term": "Diagnostic 3D mammogram"
}
{
"term": "Mammography"
}
搜索查询:
{
"query": {
"match": {
"term": {
"query": "3D mammogram",
"operator": "and"
}
}
}
}
搜索结果:
"hits": [
{
"_index": "67607194",
"_type": "_doc",
"_id": "4",
"_score": 1.4572026,
"_source": {
"term": "Diagnostic 3D mammogram"
}
}
]