查询elasticsearch返回计数

时间:2015-11-17 21:03:53

标签: elasticsearch lucene

我正在努力创建帮助我创建警报脚本的查询/规则。我想查询elasticsearch API以获取特定索引的计数,以便在计数达到某个阈值时收到警报。 以下查询是尝试,因为我没有这方面的经验:

{
"query": {
 "filtered": {
  "query": {
    "query_string": {
      "analyze_wildcard": true,
      "query": "*"
    }
  },
  "filter": {
    "bool": {
      "must": [
        {
          "query": {
            "match": {
              "PStream": {
                "query": "*",
                "type": "phrase"
              }
            }
          }
        },
        {
          "range": {
            "@timestamp": {
              "gte": 1447789445320,
              "lte": 1447793045320
            }
          }
        }
      ],
      "must_not": []
     }
    }
   }
  },
   "highlight": {
   "pre_tags": [
   "@kibana-highlighted-field@"
  ],
   "post_tags": [
   "@/kibana-highlighted-field@"
  ],
  "fields": {
    "*": {}
   },
  "fragment_size": 2147483647
 },
  "size": 500,
   "sort": [
  {
  "@timestamp": {
    "order": "desc",
    "unmapped_type": "boolean"
  }
 }
],
"aggs": {
 "2": {
  "date_histogram": {
    "field": "@timestamp",
    "interval": "1m",
    "pre_zone": "-05:00",
    "pre_zone_adjust_large_interval": true,
    "min_doc_count": 0,
    "extended_bounds": {
      "min": 1447789445317,
      "max": 1447793045317
    }
  }
 }
},

字段PStream是我关注的领域

编辑:

转到索引的数据示例:

{
 "_index": "logstash-2015.11.17",
 "_type": "logs",
 "_id": "AVEXMKu2YVnF1NOjr9YT",
 "_score": null,
 "_source": {
 "authorUrl": "",
 "postUrl": "",
 "pubDate": "2015-11-17T15:18:24",
 "scrapeDate": "2015-11-17T15:44:03",
 "clientId": "136902834",
 "query": "Jenny Balatsinou",
 "PType": "post",
 "tLatency": 1539,
 "PLang": "en",
 "PStream": "864321",
 "PName": "xStackOverflow",
 "@version": "1",
 "@timestamp": "2015-11-17T20:44:03.400Z"
},
"fields": {
"@timestamp": [
  1447793043400
],
"pubDate": [
  1447773504000
],
"scrapeDate": [
  1447775043000
  ]
 },
"sort": [
 1447793043400
]

每天有大约2000万条消息被索引到Elasticsearch中。我在Kibana创建了一个仪表板,在那里我查看了这些数据和统计数据。我想编写一个正确的查询,我可以在一个java程序中使用,该程序定期运行并使用此查询检查此索引。它应该返回由具有多个值的PStream变量分组的每小时总计数。因此,只要值为0,它就会发送警报。

EG。输出:

"result": {
  "total": 74,
  "successful": 63,
  "failed": 11,
    {
         {
        "index": "logstash-2015.11.08",
        "PStream": "37647338933",
        "Count":   1234532
          },
          {
        "index": "logstash-2015.11.08",
        "PStream": "45345343566",
        "Count":   156532
          },

1 个答案:

答案 0 :(得分:2)

作为一个简单的例子(根据上面的评论),我只是设置了一个简单的索引:

DELETE /test_index

PUT /test_index

添加了一些(简化)数据:

PUT /test_index/doc/_bulk
{"index":{"_id":1}}
{"PStream": "864321","@timestamp": "2015-11-17T20:44:03.400Z"}
{"index":{"_id":2}}
{"PStream": "864321","@timestamp": "2015-11-17T21:44:03.400Z"}
{"index":{"_id":3}}
{"PStream": "864321","@timestamp": "2015-11-17T20:44:03.400Z"}
{"index":{"_id":4}}
{"PStream": "864322","@timestamp": "2015-11-17T21:44:03.400Z"}

现在我可以在一小时的直方图中获得"PStream"个术语:

POST /test_index/_search
{
    "size": 0, 
     "aggs" : {
        "timestamp_histogram" : {
            "date_histogram" : {
                "field" : "@timestamp",
                "interval" : "hour"
            },
            "aggs": {
                "pstream_terms": {
                    "terms": {
                        "field": "PStream"
                    }
                }
            }
        }
    }
}
...
{
   "took": 6,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 4,
      "max_score": 0,
      "hits": []
   },
   "aggregations": {
      "timestamp_histogram": {
         "buckets": [
            {
               "key_as_string": "2015-11-17T20:00:00.000Z",
               "key": 1447790400000,
               "doc_count": 2,
               "pstream_terms": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 0,
                  "buckets": [
                     {
                        "key": "864321",
                        "doc_count": 2
                     }
                  ]
               }
            },
            {
               "key_as_string": "2015-11-17T21:00:00.000Z",
               "key": 1447794000000,
               "doc_count": 2,
               "pstream_terms": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 0,
                  "buckets": [
                     {
                        "key": "864321",
                        "doc_count": 1
                     },
                     {
                        "key": "864322",
                        "doc_count": 1
                     }
                  ]
               }
            }
         ]
      }
   }
}

或者相反:

POST /test_index/_search
{
   "size": 0,
   "aggs": {
      "pstream_terms": {
         "terms": {
            "field": "PStream"
         },
         "aggs": {
            "timestamp_histogram": {
               "date_histogram": {
                  "field": "@timestamp",
                  "interval": "hour"
               }
            }
         }
      }
   }
}
...
{
   "took": 5,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 4,
      "max_score": 0,
      "hits": []
   },
   "aggregations": {
      "pstream_terms": {
         "doc_count_error_upper_bound": 0,
         "sum_other_doc_count": 0,
         "buckets": [
            {
               "key": "864321",
               "doc_count": 3,
               "timestamp_histogram": {
                  "buckets": [
                     {
                        "key_as_string": "2015-11-17T20:00:00.000Z",
                        "key": 1447790400000,
                        "doc_count": 2
                     },
                     {
                        "key_as_string": "2015-11-17T21:00:00.000Z",
                        "key": 1447794000000,
                        "doc_count": 1
                     }
                  ]
               }
            },
            {
               "key": "864322",
               "doc_count": 1,
               "timestamp_histogram": {
                  "buckets": [
                     {
                        "key_as_string": "2015-11-17T21:00:00.000Z",
                        "key": 1447794000000,
                        "doc_count": 1
                     }
                  ]
               }
            }
         ]
      }
   }
}

这是我使用的代码:

http://sense.qbox.io/gist/6c0c30db1cf0fb8529bcfec21c0ce5c02a5ae94c