Elasticsearch上的聚合平均值

时间:2015-11-11 15:00:38

标签: elasticsearch

我正在尝试计算单个ES查询中所有已定义聚合的平均值值。查询结果用于填充this table

第一列(“提前期”)是存储桶,其余五个是这些存储桶的指标。问题是我还需要在桶上计算的每个度量的平均值,如第五行所示。

以下是我到目前为止编写的ES查询的相关部分:

  "aggs": {
    "by_lead_time": {
      "range": {
        "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkIn'].value) - new Date(doc['timestamp'].value); return duration.days; }",
        "ranges": [
          {
            "to": 1,
            "key": "Same day"
          },
          {
            "from": 1,
            "to": 7,
            "key": "Same week"
          },
          {
            "from": 7,
            "to": 14,
            "key": "Next week"
          },
          {
            "from": 14,
            "to": 31,
            "key": "Same month"
          },
          {
            "from": 31,
            "to": 93,
            "key": "Within 3 months"
          },
          {
            "from": 93,
            "key": "Longer than 3 months"
          }
        ]
      },
      "aggs": {
        "averageDailyRate": {
          "avg": {
            "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return doc['totalPreTax'].value / duration.days; }"
          }
        },
        "averageLeadTime": {
          "avg": {
            "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkIn'].value) - new Date(doc['timestamp'].value); return duration.days; }"
          }
        },
        "bookingCount": {
          "value_count": {
            "field": "uuid"
          }
        },
        "roomNights": {
          "sum": {
            "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return duration.days; };"
          }
        },
        "averageLengthOfStay": {
          "avg": {
            "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return duration.days; }"
          }
        },
        "totalRevenue": {
          "sum": {
            "field": "totalPreTax"
          }
        },
        "lowestDailyRate": {
          "nested": {
            "path": "nights"
          },
          "aggs": {
            "min_rate": {
              "min": {
                "field": "nights.rate.amount"
              }
            }
          }
        },
        "highestDailyRate": {
          "nested": {
            "path": "nights"
          },
          "aggs": {
            "max_rate": {
              "max": {
                "field": "nights.rate.amount"
              }
            }
          }
        },
        "averageOccupants": {
          "avg": {
            "script": "return doc['noOfAdults'].value + doc['noOfChildren'].value"
          }
        }
      }
    }
  }

除了总体平均值之外,这对于提取所需的值起到了预期的作用。问题是,除了客户端应用程序上的手动劳动之外,我不知道在计算之后如何执行"avg"桶值。从表格图片中可以清楚地看到,但请记住,这是每个存储桶上的平均,而是每个指标的所有值的平均值。

我应该怎么做呢?

1 个答案:

答案 0 :(得分:0)

您可以使用pipeline aggregations在ES 2.0中执行此操作,更具体地说,average bucket aggregation

我仅使用roomNightsaverageDailyRate平均值测试了您的方案。 2.0中的查询看起来像这样,其他数字聚合应该以类似的方式完成:

{
  "size": 0,
  "aggs": {
    "by_lead_time": {
      "range": {
        "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkIn'].value) - new Date(doc['timestamp'].value); return duration.days; }",
        "ranges": [
          {
            "to": 1,
            "key": "Same day"
          },
          {
            "from": 1,
            "to": 7,
            "key": "Same week"
          },
          {
            "from": 7,
            "to": 14,
            "key": "Next week"
          },
          {
            "from": 14,
            "to": 31,
            "key": "Same month"
          },
          {
            "from": 31,
            "to": 93,
            "key": "Within 3 months"
          },
          {
            "from": 93,
            "key": "Longer than 3 months"
          }
        ]
      },
      "aggs": {
        "roomNights": {
          "sum": {
            "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return duration.days; };"
          }
        },
        "averageDailyRate": {
          "avg": {
            "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return doc['totalPreTax'].value / duration.days; }"
          }
        }
      }
    },
    "avg_roomNights": {
      "avg_bucket": {
        "buckets_path": "by_lead_time>roomNights"
      }
    },
    "avg_averageDailyRate": {
      "avg_bucket": {
        "buckets_path": "by_lead_time>averageDailyRate"
      }
    }
  }
}

另外,你需要注意这个错误 - https://github.com/elastic/elasticsearch/issues/14273 - 在2.0中会使你的脚本无法使用。我测试了我提供的2.0.1快照版本的查询本地。如果您对2.x中的测试感兴趣,these是关于如何直接从github构建版本的说明。