我对Elasticsearch
较陌生,并且在确定为什么来自python dataframe
的记录数为何不同于索引文档计数Elasticsearch
时遇到问题。
我首先通过运行以下命令创建索引:如您所见,共有62932条记录。
我正在使用以下方法在Elasticsearch中创建索引: Python code
当我检查Kibana Management/Index Management
中的索引时,只有62630个文档。根据“统计”窗口,有302个被删除的计数。我不知道这是什么意思。
下面是STATS窗口的输出
{
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"stats": {
"uuid": "egOx_6EwTFysBr0WkJyR1Q",
"primaries": {
"docs": {
"count": 62630,
"deleted": 302
},
"store": {
"size_in_bytes": 4433722
},
"indexing": {
"index_total": 62932,
"index_time_in_millis": 3235,
"index_current": 0,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 0
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 140,
"query_time_in_millis": 1178,
"query_current": 0,
"fetch_total": 140,
"fetch_time_in_millis": 1233,
"fetch_current": 0,
"scroll_total": 1,
"scroll_time_in_millis": 6262,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 2,
"total_time_in_millis": 417,
"total_docs": 62932,
"total_size_in_bytes": 4882755,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 0,
"total_auto_throttle_in_bytes": 20971520
},
"refresh": {
"total": 26,
"total_time_in_millis": 597,
"external_total": 24,
"external_total_time_in_millis": 632,
"listeners": 0
},
"flush": {
"total": 1,
"periodic": 0,
"total_time_in_millis": 10
},
"warmer": {
"current": 0,
"total": 23,
"total_time_in_millis": 0
},
"query_cache": {
"memory_size_in_bytes": 17338,
"total_count": 283,
"hit_count": 267,
"miss_count": 16,
"cache_size": 4,
"cache_count": 4,
"evictions": 0
},
"fielddata": {
"memory_size_in_bytes": 0,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 2,
"memory_in_bytes": 22729,
"terms_memory_in_bytes": 17585,
"stored_fields_memory_in_bytes": 2024,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 512,
"points_memory_in_bytes": 2112,
"doc_values_memory_in_bytes": 496,
"index_writer_memory_in_bytes": 0,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {}
},
"translog": {
"operations": 62932,
"size_in_bytes": 17585006,
"uncommitted_operations": 0,
"uncommitted_size_in_bytes": 55,
"earliest_last_modified_age": 0
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 0
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 0
}
},
"total": {
"docs": {
"count": 62630,
"deleted": 302
},
"store": {
"size_in_bytes": 4433722
},
"indexing": {
"index_total": 62932,
"index_time_in_millis": 3235,
"index_current": 0,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 0
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 140,
"query_time_in_millis": 1178,
"query_current": 0,
"fetch_total": 140,
"fetch_time_in_millis": 1233,
"fetch_current": 0,
"scroll_total": 1,
"scroll_time_in_millis": 6262,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 2,
"total_time_in_millis": 417,
"total_docs": 62932,
"total_size_in_bytes": 4882755,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 0,
"total_auto_throttle_in_bytes": 20971520
},
"refresh": {
"total": 26,
"total_time_in_millis": 597,
"external_total": 24,
"external_total_time_in_millis": 632,
"listeners": 0
},
"flush": {
"total": 1,
"periodic": 0,
"total_time_in_millis": 10
},
"warmer": {
"current": 0,
"total": 23,
"total_time_in_millis": 0
},
"query_cache": {
"memory_size_in_bytes": 17338,
"total_count": 283,
"hit_count": 267,
"miss_count": 16,
"cache_size": 4,
"cache_count": 4,
"evictions": 0
},
"fielddata": {
"memory_size_in_bytes": 0,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 2,
"memory_in_bytes": 22729,
"terms_memory_in_bytes": 17585,
"stored_fields_memory_in_bytes": 2024,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 512,
"points_memory_in_bytes": 2112,
"doc_values_memory_in_bytes": 496,
"index_writer_memory_in_bytes": 0,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {}
},
"translog": {
"operations": 62932,
"size_in_bytes": 17585006,
"uncommitted_operations": 0,
"uncommitted_size_in_bytes": 55,
"earliest_last_modified_age": 0
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 0
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 0
}
}
}
}
为什么文档计数与索引总数不同?我已导出数据,且记录数与文档数匹配。我如何找出为什么删除文档并确保将来不再删除它们?
答案 0 :(得分:0)
可能的原因:
https://www.elastic.co/guide/en/elasticsearch/reference/current//cat-indices.html https://www.elastic.co/blog/lucenes-handling-of-deleted-documents