elasticsearch ngrams结果是错误的

时间:2015-06-03 06:34:25

标签: solr elasticsearch-plugin elasticsearch elasticsearch-river

我正在尝试使用elasticsearch中的ngrams实现部分匹配,但没有从中获得预期的结果 我正在关注此链接: -

https://www.elastic.co/guide/en/elasticsearch/guide/current/_index_time_search_as_you_type.html
我已经完成了此链接中提到的所有内容。我的数据集包含3个字段,即id,name,age。

这是我的my_index的映射和设置

GET /my_index/_settings
{
   "my_index": {
      "settings": {
         "index": {
            "creation_date": "1433249154544",
            "uuid": "hKxHVnqaRVmji31xK92pVA",
            "number_of_replicas": "1",
            "analysis": {
               "filter": {
                  "autocomplete_filter": {
                     "type": "edge_ngram",
                     "min_gram": "1",
                     "max_gram": "20"
                  }
               },
               "analyzer": {
                  "autocomplete": {
                     "type": "custom",
                     "filter": [
                        "lowercase",
                        "autocomplete_filter"
                     ],
                     "tokenizer": "standard"
                  }
               }
            },
            "number_of_shards": "1",
            "version": {
               "created": "1040499"
            }
         }
      }
   }
}


GET /my_index/_mapping/my_type
{
   "my_index": {
      "mappings": {
         "my_type": {
            "properties": {
               "@timestamp": {
                  "type": "date",
                  "format": "dateOptionalTime"
               },
               "@version": {
                  "type": "string"
               },
               "age": {
                  "type": "long"
               },
               "host": {
                  "type": "string"
               },
               "id": {
                  "type": "string"
               },
               "message": {
                  "type": "string"
               },
               "name": {
                  "type": "string",
                  "index_analyzer": "autocomplete",
                  "search_analyzer": "standard"
               },
               "path": {
                  "type": "string"
               },
               "type": {
                  "type": "string"
               }
            }
         }
      }
   }
}


我对弹性搜索的询问是: -

GET /my_index/my_type/_search
{
    "query": {
        "match": {
            "name": {
                "query":    "raman r"
            }
        }
    }
}


据我说,现在结果应该只显示“raman ram”,但它也显示其他结果: -

{
   "took": 13,
   "timed_out": false,
   "_shards": {
      "total": 1,
      "successful": 1,
      "failed": 0
   },
   "hits": {
      "total": 9,
      "max_score": 2.6631343,
      "hits": [
         {
            "_index": "my_index",
            "_type": "my_type",
            "_id": "2",
            "_score": 2.6631343,
            "_source": {
               "message": [
                  "2,raman,23"
               ],
               "@version": "1",
               "@timestamp": "2015-06-02T13:07:18.041Z",
               "type": "my_type",
               "host": "shubham-VirtualBox",
               "path": "/home/shubham/sample.csv",
               "id": "2",
               "name": "raman",
               "age": 23
            }
         },
         {
            "_index": "my_index",
            "_type": "my_type",
            "_id": "10",
            "_score": 1.8003473,
            "_source": {
               "message": [
                  "10,raman ram,43"
               ],
               "@version": "1",
               "@timestamp": "2015-06-02T13:11:03.455Z",
               "type": "my_type",
               "host": "shubham-VirtualBox",
               "path": "/home/shubham/sample.csv",
               "id": "10",
               "name": "raman ram",
               "age": 43
            }
         },
         {
            "_index": "my_index",
            "_type": "my_type",
            "_id": "1",
            "_score": 0.26245093,
            "_source": {
               "message": [
                  "1,Ram,342"
               ],
               "@version": "1",
               "@timestamp": "2015-06-02T13:07:18.040Z",
               "type": "my_type",
               "host": "shubham-VirtualBox",
               "path": "/home/shubham/sample.csv",
               "id": "1",
               "name": "Ram",
               "age": 342
            }
         },
         {
            "_index": "my_index",
            "_type": "my_type",
            "_id": "3",
            "_score": 0.26245093,
            "_source": {
               "message": [
                  "3,ramayan,23"
               ],
               "@version": "1",
               "@timestamp": "2015-06-02T13:07:18.041Z",
               "type": "my_type",
               "host": "shubham-VirtualBox",
               "path": "/home/shubham/sample.csv",
               "id": "3",
               "name": "ramayan",
               "age": 23
            }
         },
         {
            "_index": "my_index",
            "_type": "my_type",
            "_id": "4",
            "_score": 0.26245093,
            "_source": {
               "message": [
                  "4,ramaram,231"
               ],
               "@version": "1",
               "@timestamp": "2015-06-02T13:07:18.041Z",
               "type": "my_type",
               "host": "shubham-VirtualBox",
               "path": "/home/shubham/sample.csv",
               "id": "4",
               "name": "ramaram",
               "age": 231
            }
         },
         {
            "_index": "my_index",
            "_type": "my_type",
            "_id": "5",
            "_score": 0.26245093,
            "_source": {
               "message": [
                  "5,rampy,1"
               ],
               "@version": "1",
               "@timestamp": "2015-06-02T13:07:18.041Z",
               "type": "my_type",
               "host": "shubham-VirtualBox",
               "path": "/home/shubham/sample.csv",
               "id": "5",
               "name": "rampy",
               "age": 1
            }
         },
         {
            "_index": "my_index",
            "_type": "my_type",
            "_id": "6",
            "_score": 0.26245093,
            "_source": {
               "message": [
                  "6,ration,11"
               ],
               "@version": "1",
               "@timestamp": "2015-06-02T13:07:18.041Z",
               "type": "my_type",
               "host": "shubham-VirtualBox",
               "path": "/home/shubham/sample.csv",
               "id": "6",
               "name": "ration",
               "age": 11
            }
         },
         {
            "_index": "my_index",
            "_type": "my_type",
            "_id": "7",
            "_score": 0.26245093,
            "_source": {
               "message": [
                  "7,rita,42"
               ],
               "@version": "1",
               "@timestamp": "2015-06-02T13:07:18.042Z",
               "type": "my_type",
               "host": "shubham-VirtualBox",
               "path": "/home/shubham/sample.csv",
               "id": "7",
               "name": "rita",
               "age": 42
            }
         },
         {
            "_index": "my_index",
            "_type": "my_type",
            "_id": "8",
            "_score": 0.26245093,
            "_source": {
               "message": [
                  "8,roni,45"
               ],
               "@version": "1",
               "@timestamp": "2015-06-02T13:07:18.050Z",
               "type": "my_type",
               "host": "shubham-VirtualBox",
               "path": "/home/shubham/sample.csv",
               "id": "8",
               "name": "roni",
               "age": 45
            }
         }
      ]
   }
}

1 个答案:

答案 0 :(得分:1)

我在查询时使用“minimum_should_match”:“100%”时得到了正确的结果。

GET /my_index/my_type/_search
{
    "query": {
        "match": {
            "name": {
                "query":    "raman r",
                "minimum_should_match": "100%" 
            }
        }
    }
}


虽然排名不对,但给了我更好的结果:

{
   "took": 4,
   "timed_out": false,
   "_shards": {
      "total": 1,
      "successful": 1,
      "failed": 0
   },
   "hits": {
      "total": 2,
      "max_score": 2.6631343,
      "hits": [
         {
            "_index": "my_index",
            "_type": "my_type",
            "_id": "2",
            "_score": 2.6631343,
            "_source": {
               "message": [
                  "2,raman,23"
               ],
               "@version": "1",
               "@timestamp": "2015-06-02T13:07:18.041Z",
               "type": "my_type",
               "host": "shubham-VirtualBox",
               "path": "/home/shubham/sample.csv",
               "id": "2",
               "name": "raman",
               "age": 23
            }
         },
         {
            "_index": "my_index",
            "_type": "my_type",
            "_id": "10",
            "_score": 1.8003473,
            "_source": {
               "message": [
                  "10,raman ram,43"
               ],
               "@version": "1",
               "@timestamp": "2015-06-02T13:11:03.455Z",
               "type": "my_type",
               "host": "shubham-VirtualBox",
               "path": "/home/shubham/sample.csv",
               "id": "10",
               "name": "raman ram",
               "age": 43
            }
         }
      ]
   }
}


不知道这种方法是否正确,但请告诉我是否有替代方案