Elasticsearch由多个字段组合并计算小时数(聚合)

时间:2016-08-09 09:41:00

标签: java elasticsearch

我正在进行elasticsearch(1.5)查询,以便在一段时间内获取用户的所有任务及其各自的小时数。例如,2016年1月1日至2016年12月31日。

这是我迄今为止所取得的成就:

{
  "query": {
    "filtered": {
      "query" : {
        "bool" : {
          "must":
          {
            "term": {
              "userId": [1,2,3,4,5,6]
            }
          }
        }
      },
      "filter": {
        "range": {
          "spentOn": {
          "gte": "1451606400000", // 1st Jan 
          "lte": "1483142400000" // 31st Dec
          }
        }
      }
    }
  },
  "size":0,
  "aggs": {
    "group_by_interval": {
      "date_histogram": {
        "field": "spentOn",
        "interval": "month",
        "min_doc_count": 0,
        "extended_bounds": {
          "min": "1451606400000", 
          "max": "1483142400000"
        }
      },
      "aggs": {
        "group_per_project": {
          "histogram": {
            "field": "taskId",
            "interval": 1
          },

          "aggs": {
            "hours": {
              "sum": {
                "field": "hours"

              }
            }
          }
        }
      }
    }
  }
}

以上查询给出了以下结果:

{
...
[{
        "key_as_string" : "2016-01-01T00:00:00.000Z",
        "doc_count" : 10,
        "group_per_project" : {
            "buckets" : [{
                    "doc_count" : 1,
                    "key" : Task A,
                    "hours_per_taskAssignment" : {
                        "value" : 5
                    }
                }, {
                    "doc_count" : 15,
                    "key" : Task  B,
                    "hours_per_taskAssignment" : {
                        "value" : 60
                    }
                }, {
                    "doc_count" : 1,
                    "key" :Task  C,
                    "hours_per_taskAssignment" : {
                        "value" : 10
                    }
                }
            ]
        },
        "key" : 1451606400000
    }, {
        "key_as_string" : "2016-02-01T00:00:00.000Z",
        "doc_count" : 23,
        "group_per_project" : {
            "buckets" : [{
                    "doc_count" : 1,
                    "key" : Task A,
                    "hours" : {
                        "value" : 2
                    }
                }, {
                    "doc_count" : 20,
                    "key" : Task B,
                    "hours" : {
                        "value" : 180
                    }
                }
            ]
        },
        "key" : 1454284800000
    }
...
]

但是,我需要将小时分组并由用户求和,而不是所有用户小时的总和。例如1月份:

{
 "doc_count" : 2,
 "key" : Task A,
 {
    "userId": 1
    "hours": {"value": 2}
 },
  {
    "userId": 2
    "hours": {"value": 5}
 }
}

有没有办法可以使用ElasticSearch 1.5实现上述结果,而无需遍历每个用户并获得总时数,从而降低了应用程序的性能?

提前致谢!

1 个答案:

答案 0 :(得分:2)

  "aggs": {
    "group_by_interval": {
      "date_histogram": {
        "field": "spentOn",
        "interval": "month",
        "min_doc_count": 0,
        "extended_bounds": {
          "min": "1451606400000",
          "max": "1483142400000"
        }
      },
      "aggs": {
        "group_per_project": {
          "histogram": {
            "field": "taskId",
            "interval": 1
          },
          "aggs": {
            "per_user": {
              "terms": {
                "field": "userId"
              },
              "aggs": {
                "hours": {
                  "sum": {
                    "field": "hours"
                  }
                }
              }
            }
          }
        }
      }
    }
  }