Elasticsearch日期直方图聚合与min / max / avg

时间:2017-04-05 09:26:21

标签: elasticsearch

使用elasticsearch 5.2。

我的数据格式是下一个:

{
    "_id": "635636", 
    "_index": "test", 
    "_source": {
        "ad_id": 9368, 
        "body": 1,
        "drivetrain": 1, 
        "engine_capacity": 1, 
        "fuel_type": 1, 
        "has_exchange": false, 
        "id": 635636, 
        "manufacturer_id": 12, 
        "model_id": 10, 
        "odometer_state": 110000, 
        "price_byn": 22802, 
        "price_usd": 12000, 
        "source": 2, 
        "source_date": "2016-10-06", 
        "source_id": "12194309", 
        "state": 2, 
        "state_date": "2017-03-07", 
        "transmission_type": 1, 
        "year": 2012
    }, 
    "_type": "ads", 
    "_version": 4, 
    "found": true
}

我想按日期获得min / max / avg聚合。使用下一个查询:

{
    "size":0,
    "aggs":{
        "avg_price_per_day":{
            "date_histogram":{
                "field":"state_date",
                "interval":"day"
            },
            "aggs":{
                "prices":{
                    "avg":{
                        "field":"price_usd"
                    }
                }
            }
        },
        "max_price_per_day":{
            "date_histogram":{
                "field":"state_date",
                "interval":"day"
            },
            "aggs":{
                "prices":{
                    "max":{
                        "field":"price_usd"
                    }
                }
            }
        },
        "min_price_per_day":{
            "date_histogram":{
                "field":"state_date",
                "interval":"day"
            },
            "aggs":{
                "prices":{
                    "min":{
                        "field":"price_usd"
                    }
                }
            }
        },
        "max_daily_price":{
            "max_bucket":{
                "buckets_path":"max_price_per_day>prices"
            }
        },
        "min_daily_price":{
            "min_bucket":{
                "buckets_path":"min_price_per_day>prices"
            }
        },
        "avg_daily_price":{
            "max_bucket":{
                "buckets_path":"avg_price_per_day>prices"
            }
        }
    },
    "query":{
        "bool":{
            "filter":[
                {
                    "range":{
                        "price_usd":{
                            "gt":0
                        }
                    }
                },
                {
                    "term":{
                        "manufacturer_id":{
                            "value":11,
                            "boost":1
                        }
                    }
                },
                {
                    "term":{
                        "model_id":{
                            "value":7,
                            "boost":1
                        }
                    }
                }
            ]
        }
    }
}

但它只返回一个月的聚合:

{
"took":23,
"timed_out":false,
"_shards":{
    "total":1,
    "successful":1,
    "failed":0
},
"hits":{
    "total":6046,
    "max_score":0.0,
    "hits":[

    ]
},
"aggregations":{
    "avg_price_per_day":{
        "buckets":[
            {
                "key_as_string":"2017-01-02",
                "key":1483315200000,
                "doc_count":1494,
                "prices":{
                    "value":4431.045515394913
                }
            },
            {
                "key_as_string":"2017-01-09",
                "key":1483920000000,
                "doc_count":0,
                "prices":{
                    "value":null
                }
            },
            {
                "key_as_string":"2017-01-16",
                "key":1484524800000,
                "doc_count":840,
                "prices":{
                    "value":4299.322619047619
                }
            },
            {
                "key_as_string":"2017-01-23",
                "key":1485129600000,
                "doc_count":3712,
                "prices":{
                    "value":4383.441540948276
                }
            }
        ]
    },
    "max_price_per_day":{
        "buckets":[
            {
                "key_as_string":"2017-01-02",
                "key":1483315200000,
                "doc_count":1494,
                "prices":{
                    "value":45000.0
                }
            },
            {
                "key_as_string":"2017-01-09",
                "key":1483920000000,
                "doc_count":0,
                "prices":{
                    "value":null
                }
            },
            {
                "key_as_string":"2017-01-16",
                "key":1484524800000,
                "doc_count":840,
                "prices":{
                    "value":15500.0
                }
            },
            {
                "key_as_string":"2017-01-23",
                "key":1485129600000,
                "doc_count":3712,
                "prices":{
                    "value":45000.0
                }
            }
        ]
    },
    "min_price_per_day":{
        "buckets":[
            {
                "key_as_string":"2017-01-02",
                "key":1483315200000,
                "doc_count":1494,
                "prices":{
                    "value":110.0
                }
            },
            {
                "key_as_string":"2017-01-09",
                "key":1483920000000,
                "doc_count":0,
                "prices":{
                    "value":null
                }
            },
            {
                "key_as_string":"2017-01-16",
                "key":1484524800000,
                "doc_count":840,
                "prices":{
                    "value":200.0
                }
            },
            {
                "key_as_string":"2017-01-23",
                "key":1485129600000,
                "doc_count":3712,
                "prices":{
                    "value":200.0
                }
            }
        ]
    },
    "max_daily_price":{
        "value":45000.0,
        "keys":[
            "2017-01-02",
            "2017-01-23"
        ]
    },
    "min_daily_price":{
        "value":110.0,
        "keys":[
            "2017-01-02"
        ]
    },
    "avg_daily_price":{
        "value":4431.045515394913,
        "keys":[
            "2017-01-02"
        ]
    }
}
}

我还有2月和3月的索引数据,但它没有包含在聚合中。如何将它们全部包括在内?

更新

curl -XPOST localhost:9200/avtostat/ads/_search -d '{"query":{"bool":{"filter":[{"range":{"state_date":{"gt":"2017-02-01"}}},{"range":{"price_usd":{"gt":0}}},{"term":{"manufacturer_id":{"value":11,"boost":1}}},{"term":{"model_id":{"value":7,"boost":1}}}]}}}'

{
"took":166,
"timed_out":false,
"_shards":{
    "total":1,
    "successful":1,
    "failed":0
},
"hits":{
    "total":6046,
    "max_score":0.0,
    "hits":[
        {
            "_index":"avtostat",
            "_type":"ads",
            "_id":"272894",
            "_score":0.0,
            "_source":{
                "id":272894,
                "ad_id":111602,
                "manufacturer_id":11,
                "model_id":7,
                "fuel_type":3,
                "engine_capacity":1.6,
                "transmission_type":2,
                "year":1999,
                "body":6,
                "drivetrain":1,
                "state":2,
                "odometer_state":303000,
                "has_exchange":true,
                "price_byn":4816,
                "price_usd":2500,
                "state_date":"2017-02-05",
                "source":1,
                "source_id":"3215650",
                "source_date":"2017-02-05"
            }
        },
        ...
    ]
}
}

1 个答案:

答案 0 :(得分:1)

您的日期格式不正确。请参阅here大写字母的含义,而不是小写字母(yyyy-MM-dd)。那么,您需要的是yyyy-MM-dd而不是YYYY-MM-DD。特别是D具有明显不同的含义:

  

符号含义演示示例

     

第189天的D日

     

每月第10天的日期

这是我的意思的相关例子:

DELETE test
PUT test
{
  "mappings": {
    "test": {
      "properties": {
        "state_date": {
          "type": "date",
          "format": "YYYY-MM-DD"
        },
        "some_id": {
          "type": "long"
        }
      }
    }
  }
}

POST test/test/_bulk
{"index":{}}
{"some_id":272894,"state_date":"2017-08-05"}
{"index":{}}
{"some_id":272894,"state_date":"2017-08-04"}
{"index":{}}
{"some_id":272894,"state_date":"2017-08-03"}
{"index":{}}
{"some_id":272894,"state_date":"2017-08-09"}
{"index":{}}
{"some_id":272894,"state_date":"2017-10-12"}

GET /test/_search
{
  "size": 0,
  "aggs": {
    "avg_price_per_day": {
      "date_histogram": {
        "field": "state_date",
        "interval": "day"
      }
    }
  }
}

因此,在我的测试中,您会看到2017年8月和10月的日期。但是,根据当天的文档和格式(资本D),天数是一年中的天数,而不是几个月的天数,意味着08-05是2017年的第五天,而不是八月。 08-09是2017年的第9天,而不是8月等等。

这意味着您的所有日子实际上都是1月份的日子。从聚合结果可以看出:

  "aggregations": {
    "avg_price_per_day": {
      "buckets": [
        {
          "key_as_string": "2017-01-03",
          "key": 1483401600000,
          "doc_count": 1
        },
        {
          "key_as_string": "2017-01-04",
          "key": 1483488000000,
          "doc_count": 1
        },
        {
          "key_as_string": "2017-01-05",
          "key": 1483574400000,
          "doc_count": 1
        },
        {
          "key_as_string": "2017-01-06",
          "key": 1483660800000,
          "doc_count": 0
        },
        {
          "key_as_string": "2017-01-07",
          "key": 1483747200000,
          "doc_count": 0
        },
        {
          "key_as_string": "2017-01-08",
          "key": 1483833600000,
          "doc_count": 0
        },
        {
          "key_as_string": "2017-01-09",
          "key": 1483920000000,
          "doc_count": 1
        },
        {
          "key_as_string": "2017-01-10",
          "key": 1484006400000,
          "doc_count": 0
        },
        {
          "key_as_string": "2017-01-11",
          "key": 1484092800000,
          "doc_count": 0
        },
        {
          "key_as_string": "2017-01-12",
          "key": 1484179200000,
          "doc_count": 1
        }
      ]
    }
  }