1)我有一个索引"数据"其中有479427个文件。为了获得我使用GET /data/_search
的计数,我得到了这样的响应
"hits": {
"total": 479427,
"max_score": 1,
2)我使用count api GET /data/_count
进行了检查,我得到的回答是这样的
{
"count": 479427,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
}
}
FROM 1和2文章数相同。
但是当我尝试GET /data/_stats
命令时,我在get和total字段中获得了不同的计数。
"data": {
"primaries": {
"docs": {
"count": 2407154,
"deleted": 357392
},
"indexing": {
"index_total": 4926760,
"index_time_in_millis": 31181542,
"index_current": 2744,
"index_failed": 260136,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 175,
"is_throttled": false,
"throttle_time_in_millis": 0
},
"get": {
"total": 4773829,
"time_in_millis": 857412,
"exists_total": 4773829,
"exists_time_in_millis": 857412,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 82217,
"total_time_in_millis": 19552894,
"total_docs": 87321796,
"total_size_in_bytes": 281542406990,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 488983,
"total_auto_throttle_in_bytes": 35543080
},
"refresh": {
"total": 956338,
"total_time_in_millis": 29144660
},
"flush": {
"total": 697,
"total_time_in_millis": 469603
},
"segments": {
"count": 114,
"memory_in_bytes": 7145544,
"terms_memory_in_bytes": 4862940,
"stored_fields_memory_in_bytes": 1037064,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 116928,
"doc_values_memory_in_bytes": 1128612,
"index_writer_memory_in_bytes": 13753207,
"index_writer_max_memory_in_bytes": 548831232,
"version_map_memory_in_bytes": 2182,
"fixed_bit_set_memory_in_bytes": 350648
},
"translog": {
"operations": 19488,
"size_in_bytes": 799972235
},
"suggest": {
"total": 0,
"time_in_millis": 0,
"current": 0
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 0
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 13306
}
},
**"total": {
"docs": { >Here i am getting total docs count 4814308
"count": 4814308,
"deleted": 715540
},**
"store": {
"size_in_bytes": 11910376476,
"throttle_time_in_millis": 0
},
"indexing": {
"index_total": 9590499,
"index_time_in_millis": 61324893,
"index_current": 2744,
"index_failed": 310323,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 175,
"is_throttled": false,
"throttle_time_in_millis": 0
},
"get": {
"total": 4773829,
"time_in_millis": 857412,
"exists_total": 4773829,
"exists_time_in_millis": 857412,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 21901088,
"query_time_in_millis": 11241895,
"query_current": 0,
"fetch_total": 4578094,
"fetch_time_in_millis": 1774794,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 153172,
"total_time_in_millis": 37586865,
"total_docs": 170014671,
"total_size_in_bytes": 542992816504,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 920242,
"total_auto_throttle_in_bytes": 71693630
},
"refresh": {
"total": 1841635,
"total_time_in_millis": 56292736
},
"flush": {
"total": 1343,
"total_time_in_millis": 946306
},
"warmer": {
"current": 0,
"total": 3822250,
"total_time_in_millis": 1098530
},
"query_cache": {
"memory_size_in_bytes": 2706088,
"total_count": 20222398,
"hit_count": 4846746,
"miss_count": 15375652,
"cache_size": 271,
"cache_count": 2267,
"evictions": 1996
},
"fielddata": {
"memory_size_in_bytes": 0,
"evictions": 0
},
"percolate": {
"total": 0,
"time_in_millis": 0,
"current": 0,
"memory_size_in_bytes": -1,
"memory_size": "-1b",
"queries": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 229,
"memory_in_bytes": 14245875,
"terms_memory_in_bytes": 9804839,
"stored_fields_memory_in_bytes": 2068360,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 234752,
"doc_values_memory_in_bytes": 2137924,
"index_writer_memory_in_bytes": 25849179,
"index_writer_max_memory_in_bytes": 1097662464,
"version_map_memory_in_bytes": 4364,
"fixed_bit_set_memory_in_bytes": 701360
},
"translog": {
"operations": 39262,
"size_in_bytes": 1616348491
},
"suggest": {
"total": 0,
"time_in_millis": 0,
"current": 0
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 0
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 32764
}
}
}
我甚至在策展人中通过给予curator_cli show_indices --verbose
给出了同样的答案:
data open 11.1GB 4814308 5 1 2017-05-31T13:00:37Z
为什么会有差异?
更新----------------------
1)您的意思是统计API会将嵌套的每个字段视为文档吗?
注意:BTW I am talking about the total.docs.count field in stats api
。
2)计数API将根据唯一ID给出索引中存在的文档计数。
所以我的问题是,如果我想知道索引中有多少文档,api给出了正确的数量count
,get
,stats
或curator_cli show_indices --verbose
(使用策展人)?
由于
答案 0 :(得分:1)
stats API包含所谓的nested
文档,因为在映射中标记为nested
的字段被视为自己的lucene文档,其中count count只计算顶级对象