我需要修复由Elasticsearch 1.7.x支持的搜索实现。我们遇到的主要问题是返回的搜索结果的相关性。
由于各种原因,我正在从命令行尝试一些非常基本的查询(我的意思是 this basic:https://www.elastic.co/guide/en/elasticsearch/guide/1.x/query-dsl-intro.html)。
我有一个包含两个文档的索引,如下所示:
{
"took":1,
"timed_out":false,
"_shards":{
"total":5,
"successful":5,
"failed":0
},
"hits":{
"total":2,
"max_score":1.0,
"hits":[
{
"_index":"merchantv2",
"_type":"searchablemerchant",
"_id":"00000000-0000-0000-0000-000000000000",
"_score":1.0,
"_source":{
"merchantGuid":"00000000-0000-0000-0000-000000000000",
"v1MerchantId":0,
"locatorId":"0",
"address":{
"addressGuid":"00000000-0000-0000-0000-000000000000",
"postCodeDetails":{
"postCodeKey":0,
"postalDistrict":{
"postalDistrictKey":0,
"postalDistrict":""
},
"postalLocation":"0",
"latitude":0.0,
"longitude":0.0,
"townName":"None",
"countyKey":0,
"countryKey":0,
"postCode":{
"postCodeKey":0,
"postCode":" 0"
}
},
"county":{
"countyKey":0,
"countyName":"",
"countryKey":0,
"recStatus":3,
"countryKeyValue":0
},
"countryKey":0,
"addressTypeKey":0,
"updateDate":"0001-01-01T00:00:00+00:00",
"createdDate":"2016-01-07T19:46:28.4463+00:00"
},
"searchableAddress":" 0",
"searchablePhone":"",
"searchableFax":"",
"businessName":"",
"contacts":[
],
"opportunities":[
{
"opportunityGuid":"00000000-0000-0000-0000-000000000000",
"merchantGuid":"00000000-0000-0000-0000-000000000000",
"location":{
"locationGuid":"00000000-0000-0000-0000-000000000000",
"tradingAddress":{
"verified":false,
"addressGuid":"00000000-0000-0000-0000-000000000000",
"postCodeDetails":{
"postCodeKey":0,
"postalDistrict":{
"postalDistrictKey":0,
"postalDistrict":""
},
"postalLocation":"0",
"latitude":0.0,
"longitude":0.0,
"townName":"None",
"countyKey":0,
"countryKey":0,
"postCode":{
"postCodeKey":0,
"postCode":" 0"
}
},
"county":{
"countyKey":0,
"countyName":"",
"countryKey":0,
"recStatus":3,
"countryKeyValue":0
},
"countryKey":0,
"addressTypeKey":0,
"updateDate":"0001-01-01T00:00:00+00:00",
"createdDate":"2016-01-07T19:46:28.4463+00:00"
}
},
"opportunityLocatorId":"000000"
}
]
}
},
{
"_index":"merchantv2",
"_type":"searchablemerchant",
"_id":"5f55fe61-ca65-e411-93f3-0cc47a07ef4a",
"_score":1.0,
"_source":{
"merchantGuid":"5f55fe61-ca65-e411-93f3-0cc47a07ef4a",
"locatorId":"PM227Z02",
"address":{
"addressGuid":"5c55fe61-ca65-e411-93f3-0cc47a07ef4a",
"houseNumber":"242",
"streetName":"Acklam Road",
"houseName":"",
"flatAptSuite":"",
"townName":"London",
"postCodeDetails":{
"postCodeKey":1,
"postalDistrict":{
"postalDistrictKey":2782,
"postalDistrict":"W10"
},
"postalLocation":"5JJ",
"latitude":51.52094651,
"longitude":-0.20149990,
"townName":"London",
"countyKey":0,
"countryKey":224,
"postCode":{
"postCodeKey":1,
"postCode":"W10 5JJ"
}
},
"county":{
"countyKey":626,
"countyName":"Kensington And Chelsea",
"countryKey":224,
"recStatus":1,
"countryKeyValue":224
},
"countryKey":224,
"addressTypeKey":0,
"updateDate":"0001-01-01T00:00:00+00:00",
"createdDate":"2016-01-07T19:46:28.4653+00:00"
},
"searchableAddress":"242 Acklam Road, London, Kensington And Chelsea, W10 5JJ",
"searchablePhone":"+44 2031954484",
"searchableFax":"",
"businessName":"Test Merchant",
"contacts":[
],
"opportunities":[
]
}
}
]
}
}
我想查询businessName
字段。我在命令行中使用以下查询:
curl -XGET http://localhost:9200/merchantv2/_search -d '
{
"query": {
"match": {
"businessName": "test"
}
}
}'
我希望将businessName
设置为“测试商家”的文档匹配,但实际上我没有返回匹配项:
{"took":45,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":0,"max_score":null,"hits":[]}}
我通过以下搜索字词获得了类似的结果:test*
,*test*
,Test
,Test Merchant
,test merchant
。
鉴于businessName
是顶级属性,我对此感到有些困惑。有人有什么想法吗?
编辑 - 忘记包含映射
以下是businessName
字段的映射:
"businessName":{
"type":"string"
},
答案 0 :(得分:0)
在挖掘索引配置后找到它。索引是在运行时使用NEST构建的,看起来它在分析过程中剥离了空格字符。因此,正如Val指出通配符,match
永远不会起作用,因为它需要完全匹配。
match_phrase_prefix
确实有效,直到你尝试类似“test mer”或“mer”之类的东西,它们都不会返回任何结果。但是,“testmer”会返回匹配。
以下是配置的违规部分:
.Analysis(descriptor => descriptor
.Analyzers(bases => bases
.Add("trimmed_specChars", new CustomAnalyzer()
{
CharFilter = new List<string> { "drop_specChars" },
Tokenizer = "standard",
Filter = new List<string>() { "lowercase", "asciifolding", "lowercase" }
})
.Add("no_whitespace", new CustomAnalyzer() // <-- Bad times
{
CharFilter = new List<string> { "drop_whiteSpace" },
Tokenizer = "standard",
Filter = new List<string>() { "lowercase", "asciifolding", "lowercase" }
})
).CharFilters(cf => cf
.Add("drop_specChars", new PatternReplaceCharFilter
{
Pattern = @"[^0-9a-zA-Z]",
Replacement = ""
})
.Add("drop_whiteSpace", new PatternReplaceCharFilter() // <-- Also bad times
{
Pattern = @" ",
Replacement = ""
})
)
我可能会在这里脱离标记,但我觉得剥离空白是不必要的,如果没有这样做,搜索会更可靠地工作,所以我将试验它。