Elasticsearch计算aggs上的doc_count次数

时间:2017-01-06 11:21:43

标签: elasticsearch

我有像这样的弹性搜索聚合查询。

 {
  "size":0,
  "aggs": {
  "Domains": {
     "terms": {
        "field": "domains",
        "size": 0
     },
     "aggs":{
        "Identifier": {
           "terms": {
              "field":"alertIdentifier",
              "size": 0
           }
        }
     }
  }
 }
}

它会导致桶聚合如下:

"aggregations": {
  "Domains": {
     "doc_count_error_upper_bound": 0,
     "sum_other_doc_count": 0,
     "buckets": [
        {
           "key": "IT",
           "doc_count": 147,
           "Identifier": {
              "doc_count_error_upper_bound": 0,
              "sum_other_doc_count": 0,
              "buckets": [
                 {
                    "key": "-2623493027134706869",
                    "doc_count": 7
                 },
                 {
                    "key": "-6590617724257725266",
                    "doc_count": 7
                 },
                 {
                    "key": "1106147277275983835",
                    "doc_count": 4
                 },
                 {
                    "key": "-3070527890944301111",
                    "doc_count": 4
                 },
                 {
                    "key": "-530975388352676402",
                    "doc_count": 3
                 },
                 {
                    "key": "-6225620509938623294",
                    "doc_count": 2
                 },
                 {
                    "key": "1652134630535374656",
                    "doc_count": 1
                 },
                 {
                    "key": "4191687133126999365",
                    "doc_count": 8
                 },
                 {
                    "key": "6882920925888555081",
                    "doc_count": 2
                 }
              ]
           }
        }

我需要的是计算这样的doc_counts出现次数:

1  times:  0
2  times:  2
3  times:  1
equal or more than 4 times:   5

任何想法如何构建ES查询来计算doc_count的出现次数?

提前致谢。

1 个答案:

答案 0 :(得分:1)

ES查询下面的

POST /xt-history*/_search
{    
"query": {
    "filtered": {"query": {"match_all": {} },
     "filter": {
        "and": [
           {"term": {"type": "10"}}
        ]
     }
  }
},
"size": 0, 
"aggs": {
    "repetitions": {
        "scripted_metric": {
            "init_script" : "_agg['all'] = []; _agg['all2'] = [];",
            "map_script" : "_agg['all'].add(_source['alert']['alertIdentifier'])",
            "combine_script" : "for (alertId in _agg['all']) { _agg['all2'].add(alertId); }; return _agg['all2']",
            "reduce_script" : "all3 = []; answer = {}; answer['one'] = []; answer['two'] = []; answer['three'] = []; answer['four'] = []; answer['five'] = []; answer['five_plus'] = []; for (alertIds in _aggs) { for (alertId1 in alertIds) { all3.add(alertId1); }; }; for (alertId in all3) { if (answer['five_plus'].contains(alertId)) {  } else if(answer['five'].contains(alertId)) {answer['five'].remove(alertId); answer['five_plus'].add(alertId);} else if(answer['four'].contains(alertId)) {answer['four'].remove(alertId); answer['five'].add(alertId);} else if(answer['three'].contains(alertId)) {answer['three'].remove(alertId); answer['four'].add(alertId);} else if(answer['two'].contains(alertId)) {answer['two'].remove(alertId); answer['three'].add(alertId);} else if(answer['one'].contains(alertId)) {answer['one'].remove(alertId); answer['two'].add(alertId);} else {answer['one'].add(alertId);}; }; fans = []; fans.add(answer['one'].size()); fans.add(answer['two'].size()); fans.add(answer['three'].size()); fans.add(answer['four'].size()); fans.add(answer['five'].size()); fans.add(answer['five_plus'].size()); return fans"
        }
    }
}
}

查询输出:

{
"took": 4770,
"timed_out": false,
"_shards": {
  "total": 190,
  "successful": 189,
  "failed": 0
},
"hits": {
  "total": 334,
  "max_score": 0,
  "hits": []
},
"aggregations": {
  "repetitions": {
     "value": [
        63,
        39,
        3,
        10,
        2,
        13
     ]
  }
}
}

其中第一个值是doc_count = 1的重复次数,第二个值是doc_count = 2的重复次数,...最后一个值是doc_count的重复次数> = 5