我有ElasticSearch 2.4,我正在尝试对包含多个令牌的String类型的文本字段进行聚合。有问题的字段是一个名为mailingAddress的地址字段。例如,下面是一些在地址字段中查找NY的结果。
{
"from": 0,
"size": 100,
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match": {
"customerprofile.mailingAddress": {
"query": "NY",
"fuzziness": 0,
"operator": "or"
}
}
},
{
"match": {
"customerprofile.companyId": {
"query": "999",
"fuzziness": 0,
"operator": "or"
}
}
}
]
}
}
]
}
}
}
返回
"hits":[
{
"_index":"wht_index_prod_v33_es24",
"_type":"customerprofile",
"_id":"2044",
"_score":2.9787974,
"_source":{
"customerId":2044,
"companyId":2007,
"fullName":"John Doe",
"email":"jon@aol.com",
"pictureURL":"john.png",
"profilePictureContentType":"image/png",
"phone":"(703) 999-8888",
"mailingAddress":"100 Lake Braddock Drive\nBurke, NY 22015",
"gender":"Male",
"emergencyContactsIds":[
],
"wantCorrespondence":false
}
},
{
"_index":"wht_index_prod_v33_es24",
"_type":"customerprofile",
"_id":"2045",
"_score":2.9787974,
"_source":{
"customerId":2045,
"companyId":2007,
"fullName":"Jane Anderson",
"email":"janea@touchva.net",
"pictureURL":"JAnderson.png",
"profilePictureContentType":"image/png",
"phone":"(434) 111-2345",
"mailingAddress":"PO Box 333, Boydton, NY 23917",
"gender":"Male",
"emergencyContactsIds":[
],
"wantCorrespondence":false
}
},
..
..
]
问题
当我通过mailingAddress进行聚合时,我希望在文本字段中看到每个单词的存储桶。从上面的结果我还希望找到一个名为'NY'的桶键,但没有一个。任何人都可以解释为什么 - 我的猜测是它的条目太少了吗?
聚合:
{
"size": 0,
"aggs": {
"group_by_age": {
"terms": {
"field": "mailingAddress"
},
"aggs": {
"group_by_gender": {
"terms": {
"field": "gender"
}
}
}
}
}
}
汇总结果:
{
"took": 16,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 401,
"max_score": 0,
"hits": [
]
},
"aggregations": {
"group_by_age": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 1041,
"buckets": [
{
"key": "st",
"doc_count": 30,
"group_by_gender": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "female",
"doc_count": 17
},
{
"key": "male",
"doc_count": 13
}
]
}
},
{
"key": "ca",
"doc_count": 28,
"group_by_gender": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "female",
"doc_count": 21
},
{
"key": "male",
"doc_count": 7
}
]
}
},
{
"key": "dr",
"doc_count": 16,
"group_by_gender": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "female",
"doc_count": 13
},
{
"key": "male",
"doc_count": 3
}
]
}
},
{
"key": "street",
"doc_count": 15,
"group_by_gender": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "female",
"doc_count": 11
},
{
"key": "male",
"doc_count": 4
}
]
}
},
{
"key": "ave",
"doc_count": 14,
"group_by_gender": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "female",
"doc_count": 7
},
{
"key": "male",
"doc_count": 7
}
]
}
},
{
"key": "box",
"doc_count": 11,
"group_by_gender": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "female",
"doc_count": 9
},
{
"key": "male",
"doc_count": 2
}
]
}
},
{
"key": "fl",
"doc_count": 11,
"group_by_gender": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "female",
"doc_count": 9
},
{
"key": "male",
"doc_count": 2
}
]
}
},
{
"key": "va",
"doc_count": 11,
"group_by_gender": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "male",
"doc_count": 6
},
{
"key": "female",
"doc_count": 5
}
]
}
},
{
"key": "n",
"doc_count": 10,
"group_by_gender": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "female",
"doc_count": 7
},
{
"key": "male",
"doc_count": 3
}
]
}
},
{
"key": "az",
"doc_count": 9,
"group_by_gender": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "female",
"doc_count": 7
},
{
"key": "male",
"doc_count": 2
}
]
}
}
]
}
}
}
答案 0 :(得分:1)
默认情况下,terms
聚合会返回前10个字词,但您可以通过在聚合中指定size
来决定返回更多字段,如下所示:
{
"size": 0,
"aggs": {
"group_by_age": {
"terms": {
"field": "mailingAddress",
"size": 50 <---- add this
},
"aggs": {
"group_by_gender": {
"terms": {
"field": "gender"
}
}
}
}
}
}
您的里程可能会有所不同,您可能需要增加尺寸才能真正看到NY
。