Elasticsearch聚合按每个存储桶的前一个结果进行过滤

时间:2014-10-13 06:25:00

标签: elasticsearch immutability aggregation

在Elasticsearch中的单个索引中给出这样的数据集:

entityId | created    | status
---------+------------+-----------
1        | 2000/01/01 | draft
1        | 2001/01/02 | approved
2        | 2000/01/01 | draft
2        | 2000/01/02 | approved
2        | 2001/01/03 | rejected
3        | 2000/01/01 | draft
3        | 2001/01/03 | approved

我想仅过滤批准最新状态的实体。

所以我一直在尝试使用聚合和子聚合,我已经设法让所有实体都只有最新的状态,如下所示:

{
  "size": 0,
  "aggs": {
    "newest-event-query": {
      "terms": {
        "field": "entityId"
      },
      "aggs": {
        "newest-event": {
          "top_hits": {
            "size": 1,
            "sort": [
              {
                "created": {
                  "order": "desc"
                }
              }
            ]
          }
        }
      }
    }
  }
}

哪个应该得到这样的结果:

entityId | created    | status
---------+------------+-----------
1        | 2001/01/02 | approved
2        | 2001/01/02 | rejected
3        | 2001/01/03 | approved

但是我想进一步过滤该结果,只包括批准的记录(1,3),然后最终能够查询该结果。

在top_hits aggs中添加额外的aggs似乎不起作用:

{
  "size": 0,
  "aggs": {
    "newest-event-query": {
      "terms": {
        "field": "entityId"
      },
      "aggs": {
        "newest-event": {
          "top_hits": {
            "size": 1,
            "sort": [
              {
                "created": {
                  "order": "desc"
                }
              }
            ],
            "aggs": {
              "approved-only": {
                "filter": {
                  "term": {
                    "status": "approved"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

结果:

"error": "SearchPhaseExecutionException[Failed to execute phase [query], all shards failed; shardFailures {[gupa9nwpQWmGa3JqFmF2NA][creations][0]: SearchParseException[[creations][0]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[creations][0]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][events][0]: SearchParseException[[events][0]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[events][0]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][creations][1]: SearchParseException[[creations][1]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[creations][1]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][events][1]: SearchParseException[[events][1]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[events][1]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][creations][2]: SearchParseException[[creations][2]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[creations][2]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][events][2]: SearchParseException[[events][2]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[events][2]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][creations][3]: SearchParseException[[creations][3]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[creations][3]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][events][3]: SearchParseException[[events][3]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[events][3]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][creations][4]: SearchParseException[[creations][4]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[creations][4]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][events][4]: SearchParseException[[events][4]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[events][4]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }]",
"status": 400

任何帮助表示感谢。

修改:对已批准的过滤无效,因为事件可以从已批准状态转回另一个状态。我总是需要按最新状态过滤。这个练习的要点是创建一个不可变的数据结构 - 单个实体可以经历很多阶段,但我们应该始终只查询最新的。

编辑2:为了找到解决方案,我还查看了父子结构,虽然关闭它仍然有一些限制,比如has_parent或has_child需要有一个固定的“id” ”。另一个明显且高效的解决方案是在写入时简单标记最新项目 - 例如。使用布尔值,但我想要原子性并在一个文档上重置该布尔值并在新文档上设置它不是原子操作。

1 个答案:

答案 0 :(得分:0)

我使用了terms aggregationbucket selector aggreation。 在一个术语下,我使用创建日期字段上的最大聚合创建了一个最新条目的存储桶,还创建了一个状态被批准的创建日期的存储桶。”使用存储桶选择器,我保留了最新日期和最新批准日期相同的术语< / p>

Entity: 1                                        --> using terms aggregation
     "Latest created date":"2001-01-02"          --> using max aggregation
     "Latest approved doc":                      --> using filter aggregation
            "Latest approved date":"2000-01-01"  --> Using max aggregation
     "Bucket where Latest created date==Latest approved doc>Latest approved date" 
                                                 --> using bucket selector aggregation

映射

{
  "index90" : {
    "mappings" : {
      "properties" : {
        "created" : {
          "type" : "date",
          "format" : "[yyyy-MM-dd]"
        },
        "entityId" : {
          "type" : "integer"
        },
        "status" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "text"
            }
          }
        }
      }
    }
  }
}

数据:

"hits" : [
      {
        "_index" : "index90",
        "_type" : "_doc",
        "_id" : "xZsmY3EBdTQt60iNXDQB",
        "_score" : 1.0,
        "_source" : {
          "entityId" : 1,
          "created" : "2000-01-01",
          "status" : "draft"
        }
      },
      {
        "_index" : "index90",
        "_type" : "_doc",
        "_id" : "xpsmY3EBdTQt60iNojQc",
        "_score" : 1.0,
        "_source" : {
          "entityId" : 1,
          "created" : "2001-01-02",
          "status" : "approved"
        }
      },
      {
        "_index" : "index90",
        "_type" : "_doc",
        "_id" : "x5smY3EBdTQt60iN7DQc",
        "_score" : 1.0,
        "_source" : {
          "entityId" : 2,
          "created" : "2000-01-01",
          "status" : "draft"
        }
      },
      {
        "_index" : "index90",
        "_type" : "_doc",
        "_id" : "yJsnY3EBdTQt60iNAzT7",
        "_score" : 1.0,
        "_source" : {
          "entityId" : 2,
          "created" : "2000-01-02",
          "status" : "approved"
        }
      },
      {
        "_index" : "index90",
        "_type" : "_doc",
        "_id" : "yZsnY3EBdTQt60iNIjQY",
        "_score" : 1.0,
        "_source" : {
          "entityId" : 2,
          "created" : "2000-01-03",
          "status" : "rejected"
        }
      }
    ]

查询:

{
 "aggs": {
   "entitites": {
     "terms": {
       "field": "entityId",
       "size": 10
     },
     "aggs": {
       "latest_entry": {
         "max": {
           "field": "created"
         }
       },
       "latest_approved_entry":{
         "filter": {
           "term": {
             "status.keyword": "approved"
           }
         },
         "aggs": {
           "approved_date": {
             "max": {
               "field": "created"
             }
           }
         }
       },
       "select_bucket_with":{
         "bucket_selector": {
           "buckets_path": {
             "latest_entry":"latest_entry",
             "latest_approved_entry":"latest_approved_entry>approved_date"
           },
           "script": "if(params['latest_entry']==params['latest_approved_entry']) return true;"
         }
       }
     }
   }
 }
}

结果:

"aggregations" : {
    "entitites" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : 1,
          "doc_count" : 2,
          "latest_entry" : {
            "value" : 9.783936E11,
            "value_as_string" : "2001-01-02"
          },
          "latest_approved_entry" : {
            "doc_count" : 1,
            "approved_date" : {
              "value" : 9.783936E11,
              "value_as_string" : "2001-01-02"
            }
          }
        }
      ]
    }
  }