Elasticsearch处理超特定术语

时间:2016-09-15 12:24:16

标签: elasticsearch fuzzy-search

我希望能够处理过度具体的搜索字词。因此,如果用户搜索"征税"我希望能够包含 tax 的结果,但前提是它包含在 title 字段中。下面是我的Elasticsearch配置。我使用的是1.5版本。

设置

{  


"content_pages":{  
      "settings":{  
         "index":{  
            "creation_date":"1473848573964",
            "analysis":{  
               "analyzer":{  
                  "string_analyzer":{  
                     "filter":[  
                        "standard",
                        "lowercase",
                        "stop",
                        "asciifolding"
                     ],
                     "char_filter":[  
                        "html_strip"
                     ],
                     "type":"custom",
                     "tokenizer":"standard"
                  }
               }
            },
            "number_of_shards":"2",
            "number_of_replicas":"0",

         }
      }
   }
}

映射

  "mappings":{  
     "content_page_type":{  
        "_all":{  
           "auto_boost":true
        },
        "properties":{  
           "author":{  
              "type":"integer"
           },
           "body:value":{  
              "type":"string",
              "boost":13.0,
              "analyzer":"string_analyzer"
           },
           "changed":{  
              "type":"date",
              "format":"date_time"
           },
           "component":{  
              "type":"string",
              "index":"not_analyzed",
              "analyzer":"string_analyzer"
           },
           "content_page_tab_data":{  
              "type":"string",
              "boost":13.0,
              "analyzer":"string_analyzer"
           },
           "created":{  
              "type":"date",
              "format":"date_time"
           },
           "field_aat_resource_type_taxonomy":{  
              "type":"integer"
           },
           "field_asset_file:file":{  
              "type":"integer"
           },
           "field_body:value":{  
              "type":"string",
              "boost":13.0,
              "analyzer":"string_analyzer"
           },
           "field_file_private:file":{  
              "type":"integer"
           },
           "field_study_resource_file:file":{  
              "type":"integer"
           },
           "field_tabs_page_body:value":{  
              "type":"string",
              "boost":13.0,
              "analyzer":"string_analyzer"
           },
           "id":{  
              "type":"integer",
              "include_in_all":false
           },
           "level":{  
              "type":"string",
              "index":"not_analyzed",
              "analyzer":"string_analyzer"
           },
           "nid":{  
              "type":"integer"
           },
           "programme":{  
              "type":"string",
              "index":"not_analyzed",
              "analyzer":"string_analyzer"
           },
           "search_api_access_node":{  
              "type":"string",
              "index":"not_analyzed",
              "analyzer":"string_analyzer"
           },
           "search_api_language":{  
              "type":"string",
              "index":"not_analyzed",
              "analyzer":"string_analyzer"
           },
           "status":{  
              "type":"boolean"
           },
           "strand":{  
              "type":"string",
              "index":"not_analyzed",
              "analyzer":"string_analyzer"
           },
           "title":{  
              "type":"string",
              "boost":21.0,
              "analyzer":"string_analyzer"
           },
           "type":{  
              "type":"string",
              "index":"not_analyzed",
              "analyzer":"string_analyzer"
           }
        }
     }
  }

搜索查询

{  
   "from":0,
   "size":"10",       
   "query":{  
      "bool":{  
         "must":[  
            {  
               "multi_match":{  
                  "query":"taxation",
                  "fields":[  
                     "body:value^13.0",
                     "content_page_tab_data^13.0",
                     "field_body:value^13.0",
                     "field_tabs_page_body:value^13.0",
                     "title^21.0"
                  ]
               }
            }
         ],
         "should":[  
            {  
               "query_string":{  
                  "query":"(taxation)",
                  "fields":[  
                     "body:value^13.0",
                     "content_page_tab_data^13.0",
                     "field_body:value^13.0",
                     "field_tabs_page_body:value^13.0",
                     "title^21.0"
                  ]
               }
            },
            {
               "fuzzy_like_this" : {
                  "fields" : ["title"],
                  "like_text" : "taxation",
                  "fuzziness": "AUTO"
                }
            }
         ]
      }
   }       
}

以上查询不会返回标题包含" tax"但是"税收"只要。我也不想包含不相关的结果,如"关系"或"行动"。

1 个答案:

答案 0 :(得分:0)

我设法使用Algorithmic Stemmer解决了这个问题。我一直在使用模糊搜索,但这不是达到预期效果的最佳方式。