我目前正在处理基于时间的数据集。
感谢elasticsearch python API,我在while循环中检索了复合聚合结果。
我正在尝试将结果索引到一个新索引中,但批量不想为每个批量使用索引超过2000行
我已经尝试产生结果的每一行,索引中间批量但注意有效。
def index_metric(query: str, es=es, from_index=ES_INDEX):
res = es.search(index=ES_INDEX, body=query, size=0)
buckets = res["aggregations"]["my_bucket"]["buckets"]
after_key = res["aggregations"]["my_bucket"]["after_key"]
while "after_key" in res["aggregations"]["my_bucket"].keys():
after_key = res["aggregations"]["my_bucket"]["after_key"]
my_metric["aggs"]["my_bucket"]["composite"].update({"after": after_key})
res = es.search(index=ES_INDEX, body=my_metric, size=0)
int_buckets = res["aggregations"]["my_bucket"]["buckets"]
buckets = buckets + int_buckets
return buckets
if __name__ == "__main__":
for query in queries:
print(datetime.now())
to_index = index_query(path=query, es=es, from_index=ES_INDEX)
a = bulk(es, to_index)
print(a)