我正在尝试计算单个ES查询中所有已定义聚合的平均值值。查询结果用于填充this table。
第一列(“提前期”)是存储桶,其余五个是这些存储桶的指标。问题是我还需要在桶上计算的每个度量的平均值,如第五行所示。
以下是我到目前为止编写的ES查询的相关部分:
"aggs": {
"by_lead_time": {
"range": {
"script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkIn'].value) - new Date(doc['timestamp'].value); return duration.days; }",
"ranges": [
{
"to": 1,
"key": "Same day"
},
{
"from": 1,
"to": 7,
"key": "Same week"
},
{
"from": 7,
"to": 14,
"key": "Next week"
},
{
"from": 14,
"to": 31,
"key": "Same month"
},
{
"from": 31,
"to": 93,
"key": "Within 3 months"
},
{
"from": 93,
"key": "Longer than 3 months"
}
]
},
"aggs": {
"averageDailyRate": {
"avg": {
"script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return doc['totalPreTax'].value / duration.days; }"
}
},
"averageLeadTime": {
"avg": {
"script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkIn'].value) - new Date(doc['timestamp'].value); return duration.days; }"
}
},
"bookingCount": {
"value_count": {
"field": "uuid"
}
},
"roomNights": {
"sum": {
"script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return duration.days; };"
}
},
"averageLengthOfStay": {
"avg": {
"script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return duration.days; }"
}
},
"totalRevenue": {
"sum": {
"field": "totalPreTax"
}
},
"lowestDailyRate": {
"nested": {
"path": "nights"
},
"aggs": {
"min_rate": {
"min": {
"field": "nights.rate.amount"
}
}
}
},
"highestDailyRate": {
"nested": {
"path": "nights"
},
"aggs": {
"max_rate": {
"max": {
"field": "nights.rate.amount"
}
}
}
},
"averageOccupants": {
"avg": {
"script": "return doc['noOfAdults'].value + doc['noOfChildren'].value"
}
}
}
}
}
除了总体平均值之外,这对于提取所需的值起到了预期的作用。问题是,除了客户端应用程序上的手动劳动之外,我不知道在计算之后如何执行"avg"
桶值。从表格图片中可以清楚地看到,但请记住,这是不每个存储桶上的平均,而是每个指标的所有值的平均值。
我应该怎么做呢?
答案 0 :(得分:0)
您可以使用pipeline aggregations在ES 2.0中执行此操作,更具体地说,average bucket aggregation。
我仅使用roomNights
和averageDailyRate
平均值测试了您的方案。 2.0中的查询看起来像这样,其他数字聚合应该以类似的方式完成:
{
"size": 0,
"aggs": {
"by_lead_time": {
"range": {
"script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkIn'].value) - new Date(doc['timestamp'].value); return duration.days; }",
"ranges": [
{
"to": 1,
"key": "Same day"
},
{
"from": 1,
"to": 7,
"key": "Same week"
},
{
"from": 7,
"to": 14,
"key": "Next week"
},
{
"from": 14,
"to": 31,
"key": "Same month"
},
{
"from": 31,
"to": 93,
"key": "Within 3 months"
},
{
"from": 93,
"key": "Longer than 3 months"
}
]
},
"aggs": {
"roomNights": {
"sum": {
"script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return duration.days; };"
}
},
"averageDailyRate": {
"avg": {
"script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return doc['totalPreTax'].value / duration.days; }"
}
}
}
},
"avg_roomNights": {
"avg_bucket": {
"buckets_path": "by_lead_time>roomNights"
}
},
"avg_averageDailyRate": {
"avg_bucket": {
"buckets_path": "by_lead_time>averageDailyRate"
}
}
}
}
另外,你需要注意这个错误 - https://github.com/elastic/elasticsearch/issues/14273 - 在2.0中会使你的脚本无法使用。我测试了我提供的2.0.1快照版本的查询本地。如果您对2.x中的测试感兴趣,these是关于如何直接从github构建版本的说明。