我有一个Elasticsearch 7.2索引,目前约有35亿份文档。大多数查询运行都没有任何问题,但是由于某种原因,按时间戳进行排序并检索最新文档会花费很长时间。
这是查询:
{
"query": { "match_all": {} },
"size": 1,
"sort": [{
"@timestamp": {
"order": "desc"
}
}
]
}
这是我的时间戳字段映射:
{
"mapping": {
"properties": {
"@timestamp": {
"type": "date"
}
// ....
}
}
}
这是我的查询配置文件:
{
"took": 227405,
"timed_out": false,
"_shards": {
"total": 4,
"successful": 4,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 10000,
"relation": "gte"
},
"max_score": null,
"hits": [
{
"_index": "my-index",
"_type": "_doc",
"_id": "7726392481",
"_score": null,
"_source": {
"@timestamp": "2019-07-08T13:11:21.500",
// ...
},
"sort": [
1562591481500
]
}
]
},
"profile": {
"shards": [
{
"id": "[sdKq0LgLRAqxbCjpAPKn_g][my-index][0]",
"searches": [
{
"query": [
{
"type": "MatchAllDocsQuery",
"description": "*:*",
"time_in_nanos": 40925661356,
"breakdown": {
"set_min_competitive_score_count": 0,
"match_count": 0,
"shallow_advance_count": 0,
"set_min_competitive_score": 0,
"next_doc": 40011007556,
"match": 0,
"next_doc_count": 913921033,
"score_count": 0,
"compute_max_score_count": 0,
"compute_max_score": 0,
"advance": 0,
"advance_count": 0,
"score": 0,
"build_scorer_count": 164,
"create_weight": 10615,
"shallow_advance": 0,
"create_weight_count": 1,
"build_scorer": 721987
}
}
],
"rewrite_time": 6503,
"collector": [
{
"name": "CancellableCollector",
"reason": "search_cancelled",
"time_in_nanos": 155966573371,
"children": [
{
"name": "SimpleFieldCollector",
"reason": "search_top_hits",
"time_in_nanos": 78972338307
}
]
}
]
}
],
"aggregations": []
},
{
"id": "[sdKq0LgLRAqxbCjpAPKn_g][my-index][1]",
"searches": [
{
"query": [
{
"type": "MatchAllDocsQuery",
"description": "*:*",
"time_in_nanos": 41782046319,
"breakdown": {
"set_min_competitive_score_count": 0,
"match_count": 0,
"shallow_advance_count": 0,
"set_min_competitive_score": 0,
"next_doc": 40847593118,
"match": 0,
"next_doc_count": 933744955,
"score_count": 0,
"compute_max_score_count": 0,
"compute_max_score": 0,
"advance": 0,
"advance_count": 0,
"score": 0,
"build_scorer_count": 166,
"create_weight": 2479,
"shallow_advance": 0,
"create_weight_count": 1,
"build_scorer": 705600
}
}
],
"rewrite_time": 4551,
"collector": [
{
"name": "CancellableCollector",
"reason": "search_cancelled",
"time_in_nanos": 155800230172,
"children": [
{
"name": "SimpleFieldCollector",
"reason": "search_top_hits",
"time_in_nanos": 78854977582
}
]
}
]
}
],
"aggregations": []
},
{
"id": "[yUQy-JyKQxigULToVE2VSA][my-index][2]",
"searches": [
{
"query": [
{
"type": "MatchAllDocsQuery",
"description": "*:*",
"time_in_nanos": 41297021897,
"breakdown": {
"set_min_competitive_score_count": 0,
"match_count": 0,
"shallow_advance_count": 0,
"set_min_competitive_score": 0,
"next_doc": 40377677681,
"match": 0,
"next_doc_count": 918611705,
"score_count": 0,
"compute_max_score_count": 0,
"compute_max_score": 0,
"advance": 0,
"advance_count": 0,
"score": 0,
"build_scorer_count": 166,
"create_weight": 2962,
"shallow_advance": 0,
"create_weight_count": 1,
"build_scorer": 729382
}
}
],
"rewrite_time": 3312,
"collector": [
{
"name": "CancellableCollector",
"reason": "search_cancelled",
"time_in_nanos": 163874355059,
"children": [
{
"name": "SimpleFieldCollector",
"reason": "search_top_hits",
"time_in_nanos": 87826427892
}
]
}
]
}
],
"aggregations": []
},
{
"id": "[yUQy-JyKQxigULToVE2VSA][my-index][3]",
"searches": [
{
"query": [
{
"type": "MatchAllDocsQuery",
"description": "*:*",
"time_in_nanos": 41461752119,
"breakdown": {
"set_min_competitive_score_count": 0,
"match_count": 0,
"shallow_advance_count": 0,
"set_min_competitive_score": 0,
"next_doc": 40536907380,
"match": 0,
"next_doc_count": 924035258,
"score_count": 0,
"compute_max_score_count": 0,
"compute_max_score": 0,
"advance": 0,
"advance_count": 0,
"score": 0,
"build_scorer_count": 178,
"create_weight": 2959,
"shallow_advance": 0,
"create_weight_count": 1,
"build_scorer": 806343
}
}
],
"rewrite_time": 3316,
"collector": [
{
"name": "CancellableCollector",
"reason": "search_cancelled",
"time_in_nanos": 163785688664,
"children": [
{
"name": "SimpleFieldCollector",
"reason": "search_top_hits",
"time_in_nanos": 87831077038
}
]
}
]
}
],
"aggregations": []
}
]
}
}
它在next_doc
上花费了大量时间,其中next_doc_count
为933,744,955。但是,在为@timestamp
字段建立索引时,为什么需要遍历所有文档来对它们进行排序并返回最新的文档?
修改:
这些是我的索引统计信息:
{
"_shards": {
"total": 4,
"successful": 4,
"failed": 0
},
"stats": {
"uuid": "ub-FElCTTsOAihR0aoNlPw",
"primaries": {
"docs": {
"count": 3467397640,
"deleted": 234593502
},
"store": {
"size_in_bytes": 1087736351822
},
"indexing": {
"index_total": 204398539,
"index_time_in_millis": 80325395,
"index_current": 0,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 0
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 1524,
"query_time_in_millis": 14514546,
"query_current": 0,
"fetch_total": 1018,
"fetch_time_in_millis": 171319,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 192984,
"total_time_in_millis": 87010422,
"total_docs": 1248881022,
"total_size_in_bytes": 394269971046,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 35698367,
"total_auto_throttle_in_bytes": 20971520
},
"refresh": {
"total": 377992,
"total_time_in_millis": 38968248,
"external_total": 377333,
"external_total_time_in_millis": 40675901,
"listeners": 0
},
"flush": {
"total": 383,
"periodic": 155,
"total_time_in_millis": 695510
},
"warmer": {
"current": 0,
"total": 377329,
"total_time_in_millis": 18259
},
"query_cache": {
"memory_size_in_bytes": 0,
"total_count": 0,
"hit_count": 0,
"miss_count": 0,
"cache_size": 0,
"cache_count": 0,
"evictions": 0
},
"fielddata": {
"memory_size_in_bytes": 17973802844,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 319,
"memory_in_bytes": 1174162151,
"terms_memory_in_bytes": 942165919,
"stored_fields_memory_in_bytes": 126159816,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 192640,
"points_memory_in_bytes": 102814280,
"doc_values_memory_in_bytes": 2829496,
"index_writer_memory_in_bytes": 12899088,
"version_map_memory_in_bytes": 29403,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {}
},
"translog": {
"operations": 3766740,
"size_in_bytes": 2307404347,
"uncommitted_operations": 2893204,
"uncommitted_size_in_bytes": 1782804035,
"earliest_last_modified_age": 0
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 268
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 17024187
}
},
"total": {
"docs": {
"count": 3467397640,
"deleted": 234593502
},
"store": {
"size_in_bytes": 1087736351822
},
"indexing": {
"index_total": 204398539,
"index_time_in_millis": 80325395,
"index_current": 0,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 0
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 1524,
"query_time_in_millis": 14514546,
"query_current": 0,
"fetch_total": 1018,
"fetch_time_in_millis": 171319,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 192984,
"total_time_in_millis": 87010422,
"total_docs": 1248881022,
"total_size_in_bytes": 394269971046,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 35698367,
"total_auto_throttle_in_bytes": 20971520
},
"refresh": {
"total": 377992,
"total_time_in_millis": 38968248,
"external_total": 377333,
"external_total_time_in_millis": 40675901,
"listeners": 0
},
"flush": {
"total": 383,
"periodic": 155,
"total_time_in_millis": 695510
},
"warmer": {
"current": 0,
"total": 377329,
"total_time_in_millis": 18259
},
"query_cache": {
"memory_size_in_bytes": 0,
"total_count": 0,
"hit_count": 0,
"miss_count": 0,
"cache_size": 0,
"cache_count": 0,
"evictions": 0
},
"fielddata": {
"memory_size_in_bytes": 17973802844,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 319,
"memory_in_bytes": 1174162151,
"terms_memory_in_bytes": 942165919,
"stored_fields_memory_in_bytes": 126159816,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 192640,
"points_memory_in_bytes": 102814280,
"doc_values_memory_in_bytes": 2829496,
"index_writer_memory_in_bytes": 12899088,
"version_map_memory_in_bytes": 29403,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {}
},
"translog": {
"operations": 3766740,
"size_in_bytes": 2307404347,
"uncommitted_operations": 2893204,
"uncommitted_size_in_bytes": 1782804035,
"earliest_last_modified_age": 0
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 268
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 17024187
}
}
}
}
答案 0 :(得分:1)
建议的分片大小为10-40GB
,并且您大约有250GB
。另外,您似乎已删除了许多文档,这是因为要覆盖具有相同_id
的文档。它们被标记为已删除,但它们仍在消耗RAM并参与操作-More info
排序很慢,因为它不会反转Lucene
中的反转索引。选项是热切地预加载字段数据-More info,并且您应该在doc_value
字段-More info上使用@timestamp
。
另一种选择是,如果您始终按某个字段排序(例如,按timestamp desc
),则可以对索引进行排序,并告诉ES
如何在磁盘上排序文档。 Check this。
希望这会有所帮助。