Count和Stats api在Elasticsearch中显示不同的doc计数?

时间:2017-07-04 07:10:25

标签: elasticsearch

1)我有一个索引"数据"其中有479427个文件。为了获得我使用GET /data/_search的计数,我得到了这样的响应

"hits": {
    "total": 479427,
    "max_score": 1,

2)我使用count api GET /data/_count进行了检查,我得到的回答是这样的

{
  "count": 479427,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  }
}

FROM 1和2文章数相同。

但是当我尝试GET /data/_stats命令时,我在get和total字段中获得了不同的计数。

"data": {
  "primaries": {
    "docs": {
      "count": 2407154,
      "deleted": 357392
    },
    "indexing": {
      "index_total": 4926760,
      "index_time_in_millis": 31181542,
      "index_current": 2744,
      "index_failed": 260136,
      "delete_total": 0,
      "delete_time_in_millis": 0,
      "delete_current": 0,
      "noop_update_total": 175,
      "is_throttled": false,
      "throttle_time_in_millis": 0
    },
    "get": {
      "total": 4773829,
      "time_in_millis": 857412,
      "exists_total": 4773829,
      "exists_time_in_millis": 857412,
      "missing_total": 0,
      "missing_time_in_millis": 0,
      "current": 0
    },
    "merges": {
      "current": 0,
      "current_docs": 0,
      "current_size_in_bytes": 0,
      "total": 82217,
      "total_time_in_millis": 19552894,
      "total_docs": 87321796,
      "total_size_in_bytes": 281542406990,
      "total_stopped_time_in_millis": 0,
      "total_throttled_time_in_millis": 488983,
      "total_auto_throttle_in_bytes": 35543080
    },
    "refresh": {
      "total": 956338,
      "total_time_in_millis": 29144660
    },
    "flush": {
      "total": 697,
      "total_time_in_millis": 469603
    },
    "segments": {
      "count": 114,
      "memory_in_bytes": 7145544,
      "terms_memory_in_bytes": 4862940,
      "stored_fields_memory_in_bytes": 1037064,
      "term_vectors_memory_in_bytes": 0,
      "norms_memory_in_bytes": 116928,
      "doc_values_memory_in_bytes": 1128612,
      "index_writer_memory_in_bytes": 13753207,
      "index_writer_max_memory_in_bytes": 548831232,
      "version_map_memory_in_bytes": 2182,
      "fixed_bit_set_memory_in_bytes": 350648
    },
    "translog": {
      "operations": 19488,
      "size_in_bytes": 799972235
    },
    "suggest": {
      "total": 0,
      "time_in_millis": 0,
      "current": 0
    },
    "request_cache": {
      "memory_size_in_bytes": 0,
      "evictions": 0,
      "hit_count": 0,
      "miss_count": 0
    },
    "recovery": {
      "current_as_source": 0,
      "current_as_target": 0,
      "throttle_time_in_millis": 13306
    }
  },
  **"total": {
    "docs": {                >Here i am getting total docs count 4814308
      "count": 4814308,
      "deleted": 715540
    },**
    "store": {
      "size_in_bytes": 11910376476,
      "throttle_time_in_millis": 0
    },
    "indexing": {
      "index_total": 9590499,
      "index_time_in_millis": 61324893,
      "index_current": 2744,
      "index_failed": 310323,
      "delete_total": 0,
      "delete_time_in_millis": 0,
      "delete_current": 0,
      "noop_update_total": 175,
      "is_throttled": false,
      "throttle_time_in_millis": 0
    },
    "get": {
      "total": 4773829,
      "time_in_millis": 857412,
      "exists_total": 4773829,
      "exists_time_in_millis": 857412,
      "missing_total": 0,
      "missing_time_in_millis": 0,
      "current": 0
    },
    "search": {
      "open_contexts": 0,
      "query_total": 21901088,
      "query_time_in_millis": 11241895,
      "query_current": 0,
      "fetch_total": 4578094,
      "fetch_time_in_millis": 1774794,
      "fetch_current": 0,
      "scroll_total": 0,
      "scroll_time_in_millis": 0,
      "scroll_current": 0
    },
    "merges": {
      "current": 0,
      "current_docs": 0,
      "current_size_in_bytes": 0,
      "total": 153172,
      "total_time_in_millis": 37586865,
      "total_docs": 170014671,
      "total_size_in_bytes": 542992816504,
      "total_stopped_time_in_millis": 0,
      "total_throttled_time_in_millis": 920242,
      "total_auto_throttle_in_bytes": 71693630
    },
    "refresh": {
      "total": 1841635,
      "total_time_in_millis": 56292736
    },
    "flush": {
      "total": 1343,
      "total_time_in_millis": 946306
    },
    "warmer": {
      "current": 0,
      "total": 3822250,
      "total_time_in_millis": 1098530
    },
    "query_cache": {
      "memory_size_in_bytes": 2706088,
      "total_count": 20222398,
      "hit_count": 4846746,
      "miss_count": 15375652,
      "cache_size": 271,
      "cache_count": 2267,
      "evictions": 1996
    },
    "fielddata": {
      "memory_size_in_bytes": 0,
      "evictions": 0
    },
    "percolate": {
      "total": 0,
      "time_in_millis": 0,
      "current": 0,
      "memory_size_in_bytes": -1,
      "memory_size": "-1b",
      "queries": 0
    },
    "completion": {
      "size_in_bytes": 0
    },
    "segments": {
      "count": 229,
      "memory_in_bytes": 14245875,
      "terms_memory_in_bytes": 9804839,
      "stored_fields_memory_in_bytes": 2068360,
      "term_vectors_memory_in_bytes": 0,
      "norms_memory_in_bytes": 234752,
      "doc_values_memory_in_bytes": 2137924,
      "index_writer_memory_in_bytes": 25849179,
      "index_writer_max_memory_in_bytes": 1097662464,
      "version_map_memory_in_bytes": 4364,
      "fixed_bit_set_memory_in_bytes": 701360
    },
    "translog": {
      "operations": 39262,
      "size_in_bytes": 1616348491
    },
    "suggest": {
      "total": 0,
      "time_in_millis": 0,
      "current": 0
    },
    "request_cache": {
      "memory_size_in_bytes": 0,
      "evictions": 0,
      "hit_count": 0,
      "miss_count": 0
    },
    "recovery": {
      "current_as_source": 0,
      "current_as_target": 0,
      "throttle_time_in_millis": 32764
    }
  }
}

我甚至在策展人中通过给予curator_cli show_indices --verbose给出了同样的答案:

data open 11.1GB 4814308 5 1 2017-05-31T13:00:37Z

为什么会有差异?

更新----------------------

1)您的意思是统计API会将嵌套的每个字段视为文档吗? 注意:BTW I am talking about the total.docs.count field in stats api。 2)计数API将根据唯一ID给出索引中存在的文档计数。

所以我的问题是,如果我想知道索引中有多少文档,api给出了正确的数量countgetstatscurator_cli show_indices --verbose (使用策展人)?

由于

1 个答案:

答案 0 :(得分:1)

stats API包含所谓的nested文档,因为在映射中标记为nested的字段被视为自己的lucene文档,其中count count只计算顶级对象