Question

我正在尝试计算单个ES查询中所有已定义聚合的平均值值。查询结果用于填充this table。

第一列（“提前期”）是存储桶，其余五个是这些存储桶的指标。问题是我还需要在桶上计算的每个度量的平均值，如第五行所示。

以下是我到目前为止编写的ES查询的相关部分：

  "aggs": {
    "by_lead_time": {
      "range": {
        "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkIn'].value) - new Date(doc['timestamp'].value); return duration.days; }",
        "ranges": [
          {
            "to": 1,
            "key": "Same day"
          },
          {
            "from": 1,
            "to": 7,
            "key": "Same week"
          },
          {
            "from": 7,
            "to": 14,
            "key": "Next week"
          },
          {
            "from": 14,
            "to": 31,
            "key": "Same month"
          },
          {
            "from": 31,
            "to": 93,
            "key": "Within 3 months"
          },
          {
            "from": 93,
            "key": "Longer than 3 months"
          }
        ]
      },
      "aggs": {
        "averageDailyRate": {
          "avg": {
            "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return doc['totalPreTax'].value / duration.days; }"
          }
        },
        "averageLeadTime": {
          "avg": {
            "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkIn'].value) - new Date(doc['timestamp'].value); return duration.days; }"
          }
        },
        "bookingCount": {
          "value_count": {
            "field": "uuid"
          }
        },
        "roomNights": {
          "sum": {
            "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return duration.days; };"
          }
        },
        "averageLengthOfStay": {
          "avg": {
            "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return duration.days; }"
          }
        },
        "totalRevenue": {
          "sum": {
            "field": "totalPreTax"
          }
        },
        "lowestDailyRate": {
          "nested": {
            "path": "nights"
          },
          "aggs": {
            "min_rate": {
              "min": {
                "field": "nights.rate.amount"
              }
            }
          }
        },
        "highestDailyRate": {
          "nested": {
            "path": "nights"
          },
          "aggs": {
            "max_rate": {
              "max": {
                "field": "nights.rate.amount"
              }
            }
          }
        },
        "averageOccupants": {
          "avg": {
            "script": "return doc['noOfAdults'].value + doc['noOfChildren'].value"
          }
        }
      }
    }
  }

除了总体平均值之外，这对于提取所需的值起到了预期的作用。问题是，除了客户端应用程序上的手动劳动之外，我不知道在计算之后如何执行"avg"桶值。从表格图片中可以清楚地看到，但请记住，这是不每个存储桶上的平均，而是每个指标的所有值的平均值。

我应该怎么做呢？

Answer 1

您可以使用pipeline aggregations在ES 2.0中执行此操作，更具体地说，average bucket aggregation。

我仅使用roomNights和averageDailyRate平均值测试了您的方案。 2.0中的查询看起来像这样，其他数字聚合应该以类似的方式完成：

{
  "size": 0,
  "aggs": {
    "by_lead_time": {
      "range": {
        "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkIn'].value) - new Date(doc['timestamp'].value); return duration.days; }",
        "ranges": [
          {
            "to": 1,
            "key": "Same day"
          },
          {
            "from": 1,
            "to": 7,
            "key": "Same week"
          },
          {
            "from": 7,
            "to": 14,
            "key": "Next week"
          },
          {
            "from": 14,
            "to": 31,
            "key": "Same month"
          },
          {
            "from": 31,
            "to": 93,
            "key": "Within 3 months"
          },
          {
            "from": 93,
            "key": "Longer than 3 months"
          }
        ]
      },
      "aggs": {
        "roomNights": {
          "sum": {
            "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return duration.days; };"
          }
        },
        "averageDailyRate": {
          "avg": {
            "script": "use(groovy.time.TimeCategory) { def duration = new Date(doc['checkOut'].value) - new Date(doc['checkIn'].value); return doc['totalPreTax'].value / duration.days; }"
          }
        }
      }
    },
    "avg_roomNights": {
      "avg_bucket": {
        "buckets_path": "by_lead_time>roomNights"
      }
    },
    "avg_averageDailyRate": {
      "avg_bucket": {
        "buckets_path": "by_lead_time>averageDailyRate"
      }
    }
  }
}

另外，你需要注意这个错误 - https://github.com/elastic/elasticsearch/issues/14273 - 在2.0中会使你的脚本无法使用。我测试了我提供的2.0.1快照版本的查询本地。如果您对2.x中的测试感兴趣，these是关于如何直接从github构建版本的说明。

Elasticsearch上的聚合平均值

1 个答案: