在Elasticsearch中检索最新文档需要很长时间

时间:2019-07-09 10:06:13

标签: elasticsearch

我有一个Elasticsearch 7.2索引,目前约有35亿份文档。大多数查询运行都没有任何问题,但是由于某种原因,按时间戳进行排序并检索最新文档会花费很长时间。

这是查询:

{
  "query": { "match_all": {} },
  "size": 1,
  "sort": [{
      "@timestamp": {
        "order": "desc"
      }
    }
  ]
}

这是我的时间戳字段映射:

{
  "mapping": {
    "properties": {
      "@timestamp": {
        "type": "date"
      }
      // ....
    }
  }
}

这是我的查询配置文件:

{
  "took": 227405,
  "timed_out": false,
  "_shards": {
    "total": 4,
    "successful": 4,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 10000,
      "relation": "gte"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "my-index",
        "_type": "_doc",
        "_id": "7726392481",
        "_score": null,
        "_source": {
          "@timestamp": "2019-07-08T13:11:21.500",
          // ...
        },
        "sort": [
          1562591481500
        ]
      }
    ]
  },
  "profile": {
    "shards": [
      {
        "id": "[sdKq0LgLRAqxbCjpAPKn_g][my-index][0]",
        "searches": [
          {
            "query": [
              {
                "type": "MatchAllDocsQuery",
                "description": "*:*",
                "time_in_nanos": 40925661356,
                "breakdown": {
                  "set_min_competitive_score_count": 0,
                  "match_count": 0,
                  "shallow_advance_count": 0,
                  "set_min_competitive_score": 0,
                  "next_doc": 40011007556,
                  "match": 0,
                  "next_doc_count": 913921033,
                  "score_count": 0,
                  "compute_max_score_count": 0,
                  "compute_max_score": 0,
                  "advance": 0,
                  "advance_count": 0,
                  "score": 0,
                  "build_scorer_count": 164,
                  "create_weight": 10615,
                  "shallow_advance": 0,
                  "create_weight_count": 1,
                  "build_scorer": 721987
                }
              }
            ],
            "rewrite_time": 6503,
            "collector": [
              {
                "name": "CancellableCollector",
                "reason": "search_cancelled",
                "time_in_nanos": 155966573371,
                "children": [
                  {
                    "name": "SimpleFieldCollector",
                    "reason": "search_top_hits",
                    "time_in_nanos": 78972338307
                  }
                ]
              }
            ]
          }
        ],
        "aggregations": []
      },
      {
        "id": "[sdKq0LgLRAqxbCjpAPKn_g][my-index][1]",
        "searches": [
          {
            "query": [
              {
                "type": "MatchAllDocsQuery",
                "description": "*:*",
                "time_in_nanos": 41782046319,
                "breakdown": {
                  "set_min_competitive_score_count": 0,
                  "match_count": 0,
                  "shallow_advance_count": 0,
                  "set_min_competitive_score": 0,
                  "next_doc": 40847593118,
                  "match": 0,
                  "next_doc_count": 933744955,
                  "score_count": 0,
                  "compute_max_score_count": 0,
                  "compute_max_score": 0,
                  "advance": 0,
                  "advance_count": 0,
                  "score": 0,
                  "build_scorer_count": 166,
                  "create_weight": 2479,
                  "shallow_advance": 0,
                  "create_weight_count": 1,
                  "build_scorer": 705600
                }
              }
            ],
            "rewrite_time": 4551,
            "collector": [
              {
                "name": "CancellableCollector",
                "reason": "search_cancelled",
                "time_in_nanos": 155800230172,
                "children": [
                  {
                    "name": "SimpleFieldCollector",
                    "reason": "search_top_hits",
                    "time_in_nanos": 78854977582
                  }
                ]
              }
            ]
          }
        ],
        "aggregations": []
      },
      {
        "id": "[yUQy-JyKQxigULToVE2VSA][my-index][2]",
        "searches": [
          {
            "query": [
              {
                "type": "MatchAllDocsQuery",
                "description": "*:*",
                "time_in_nanos": 41297021897,
                "breakdown": {
                  "set_min_competitive_score_count": 0,
                  "match_count": 0,
                  "shallow_advance_count": 0,
                  "set_min_competitive_score": 0,
                  "next_doc": 40377677681,
                  "match": 0,
                  "next_doc_count": 918611705,
                  "score_count": 0,
                  "compute_max_score_count": 0,
                  "compute_max_score": 0,
                  "advance": 0,
                  "advance_count": 0,
                  "score": 0,
                  "build_scorer_count": 166,
                  "create_weight": 2962,
                  "shallow_advance": 0,
                  "create_weight_count": 1,
                  "build_scorer": 729382
                }
              }
            ],
            "rewrite_time": 3312,
            "collector": [
              {
                "name": "CancellableCollector",
                "reason": "search_cancelled",
                "time_in_nanos": 163874355059,
                "children": [
                  {
                    "name": "SimpleFieldCollector",
                    "reason": "search_top_hits",
                    "time_in_nanos": 87826427892
                  }
                ]
              }
            ]
          }
        ],
        "aggregations": []
      },
      {
        "id": "[yUQy-JyKQxigULToVE2VSA][my-index][3]",
        "searches": [
          {
            "query": [
              {
                "type": "MatchAllDocsQuery",
                "description": "*:*",
                "time_in_nanos": 41461752119,
                "breakdown": {
                  "set_min_competitive_score_count": 0,
                  "match_count": 0,
                  "shallow_advance_count": 0,
                  "set_min_competitive_score": 0,
                  "next_doc": 40536907380,
                  "match": 0,
                  "next_doc_count": 924035258,
                  "score_count": 0,
                  "compute_max_score_count": 0,
                  "compute_max_score": 0,
                  "advance": 0,
                  "advance_count": 0,
                  "score": 0,
                  "build_scorer_count": 178,
                  "create_weight": 2959,
                  "shallow_advance": 0,
                  "create_weight_count": 1,
                  "build_scorer": 806343
                }
              }
            ],
            "rewrite_time": 3316,
            "collector": [
              {
                "name": "CancellableCollector",
                "reason": "search_cancelled",
                "time_in_nanos": 163785688664,
                "children": [
                  {
                    "name": "SimpleFieldCollector",
                    "reason": "search_top_hits",
                    "time_in_nanos": 87831077038
                  }
                ]
              }
            ]
          }
        ],
        "aggregations": []
      }
    ]
  }
}

它在next_doc上花费了大量时间,其中next_doc_count为933,744,955。但是,在为@timestamp字段建立索引时,为什么需要遍历所有文档来对它们进行排序并返回最新的文档?

修改:

这些是我的索引统计信息:

{
  "_shards": {
    "total": 4,
    "successful": 4,
    "failed": 0
  },
  "stats": {
    "uuid": "ub-FElCTTsOAihR0aoNlPw",
    "primaries": {
      "docs": {
        "count": 3467397640,
        "deleted": 234593502
      },
      "store": {
        "size_in_bytes": 1087736351822
      },
      "indexing": {
        "index_total": 204398539,
        "index_time_in_millis": 80325395,
        "index_current": 0,
        "index_failed": 0,
        "delete_total": 0,
        "delete_time_in_millis": 0,
        "delete_current": 0,
        "noop_update_total": 0,
        "is_throttled": false,
        "throttle_time_in_millis": 0
      },
      "get": {
        "total": 0,
        "time_in_millis": 0,
        "exists_total": 0,
        "exists_time_in_millis": 0,
        "missing_total": 0,
        "missing_time_in_millis": 0,
        "current": 0
      },
      "search": {
        "open_contexts": 0,
        "query_total": 1524,
        "query_time_in_millis": 14514546,
        "query_current": 0,
        "fetch_total": 1018,
        "fetch_time_in_millis": 171319,
        "fetch_current": 0,
        "scroll_total": 0,
        "scroll_time_in_millis": 0,
        "scroll_current": 0,
        "suggest_total": 0,
        "suggest_time_in_millis": 0,
        "suggest_current": 0
      },
      "merges": {
        "current": 0,
        "current_docs": 0,
        "current_size_in_bytes": 0,
        "total": 192984,
        "total_time_in_millis": 87010422,
        "total_docs": 1248881022,
        "total_size_in_bytes": 394269971046,
        "total_stopped_time_in_millis": 0,
        "total_throttled_time_in_millis": 35698367,
        "total_auto_throttle_in_bytes": 20971520
      },
      "refresh": {
        "total": 377992,
        "total_time_in_millis": 38968248,
        "external_total": 377333,
        "external_total_time_in_millis": 40675901,
        "listeners": 0
      },
      "flush": {
        "total": 383,
        "periodic": 155,
        "total_time_in_millis": 695510
      },
      "warmer": {
        "current": 0,
        "total": 377329,
        "total_time_in_millis": 18259
      },
      "query_cache": {
        "memory_size_in_bytes": 0,
        "total_count": 0,
        "hit_count": 0,
        "miss_count": 0,
        "cache_size": 0,
        "cache_count": 0,
        "evictions": 0
      },
      "fielddata": {
        "memory_size_in_bytes": 17973802844,
        "evictions": 0
      },
      "completion": {
        "size_in_bytes": 0
      },
      "segments": {
        "count": 319,
        "memory_in_bytes": 1174162151,
        "terms_memory_in_bytes": 942165919,
        "stored_fields_memory_in_bytes": 126159816,
        "term_vectors_memory_in_bytes": 0,
        "norms_memory_in_bytes": 192640,
        "points_memory_in_bytes": 102814280,
        "doc_values_memory_in_bytes": 2829496,
        "index_writer_memory_in_bytes": 12899088,
        "version_map_memory_in_bytes": 29403,
        "fixed_bit_set_memory_in_bytes": 0,
        "max_unsafe_auto_id_timestamp": -1,
        "file_sizes": {}
      },
      "translog": {
        "operations": 3766740,
        "size_in_bytes": 2307404347,
        "uncommitted_operations": 2893204,
        "uncommitted_size_in_bytes": 1782804035,
        "earliest_last_modified_age": 0
      },
      "request_cache": {
        "memory_size_in_bytes": 0,
        "evictions": 0,
        "hit_count": 0,
        "miss_count": 268
      },
      "recovery": {
        "current_as_source": 0,
        "current_as_target": 0,
        "throttle_time_in_millis": 17024187
      }
    },
    "total": {
      "docs": {
        "count": 3467397640,
        "deleted": 234593502
      },
      "store": {
        "size_in_bytes": 1087736351822
      },
      "indexing": {
        "index_total": 204398539,
        "index_time_in_millis": 80325395,
        "index_current": 0,
        "index_failed": 0,
        "delete_total": 0,
        "delete_time_in_millis": 0,
        "delete_current": 0,
        "noop_update_total": 0,
        "is_throttled": false,
        "throttle_time_in_millis": 0
      },
      "get": {
        "total": 0,
        "time_in_millis": 0,
        "exists_total": 0,
        "exists_time_in_millis": 0,
        "missing_total": 0,
        "missing_time_in_millis": 0,
        "current": 0
      },
      "search": {
        "open_contexts": 0,
        "query_total": 1524,
        "query_time_in_millis": 14514546,
        "query_current": 0,
        "fetch_total": 1018,
        "fetch_time_in_millis": 171319,
        "fetch_current": 0,
        "scroll_total": 0,
        "scroll_time_in_millis": 0,
        "scroll_current": 0,
        "suggest_total": 0,
        "suggest_time_in_millis": 0,
        "suggest_current": 0
      },
      "merges": {
        "current": 0,
        "current_docs": 0,
        "current_size_in_bytes": 0,
        "total": 192984,
        "total_time_in_millis": 87010422,
        "total_docs": 1248881022,
        "total_size_in_bytes": 394269971046,
        "total_stopped_time_in_millis": 0,
        "total_throttled_time_in_millis": 35698367,
        "total_auto_throttle_in_bytes": 20971520
      },
      "refresh": {
        "total": 377992,
        "total_time_in_millis": 38968248,
        "external_total": 377333,
        "external_total_time_in_millis": 40675901,
        "listeners": 0
      },
      "flush": {
        "total": 383,
        "periodic": 155,
        "total_time_in_millis": 695510
      },
      "warmer": {
        "current": 0,
        "total": 377329,
        "total_time_in_millis": 18259
      },
      "query_cache": {
        "memory_size_in_bytes": 0,
        "total_count": 0,
        "hit_count": 0,
        "miss_count": 0,
        "cache_size": 0,
        "cache_count": 0,
        "evictions": 0
      },
      "fielddata": {
        "memory_size_in_bytes": 17973802844,
        "evictions": 0
      },
      "completion": {
        "size_in_bytes": 0
      },
      "segments": {
        "count": 319,
        "memory_in_bytes": 1174162151,
        "terms_memory_in_bytes": 942165919,
        "stored_fields_memory_in_bytes": 126159816,
        "term_vectors_memory_in_bytes": 0,
        "norms_memory_in_bytes": 192640,
        "points_memory_in_bytes": 102814280,
        "doc_values_memory_in_bytes": 2829496,
        "index_writer_memory_in_bytes": 12899088,
        "version_map_memory_in_bytes": 29403,
        "fixed_bit_set_memory_in_bytes": 0,
        "max_unsafe_auto_id_timestamp": -1,
        "file_sizes": {}
      },
      "translog": {
        "operations": 3766740,
        "size_in_bytes": 2307404347,
        "uncommitted_operations": 2893204,
        "uncommitted_size_in_bytes": 1782804035,
        "earliest_last_modified_age": 0
      },
      "request_cache": {
        "memory_size_in_bytes": 0,
        "evictions": 0,
        "hit_count": 0,
        "miss_count": 268
      },
      "recovery": {
        "current_as_source": 0,
        "current_as_target": 0,
        "throttle_time_in_millis": 17024187
      }
    }
  }
}

1 个答案:

答案 0 :(得分:1)

建议的分片大小为10-40GB,并且您大约有250GB。另外,您似乎已删除了许多文档,这是因为要覆盖具有相同_id的文档。它们被标记为已删除,但它们仍在消耗RAM并参与操作-More info

排序很慢,因为它不会反转Lucene中的反转索引。选项是热切地预加载字段数据-More info,并且您应该在doc_value字段-More info上使用@timestamp

另一种选择是,如果您始终按某个字段排序(例如,按timestamp desc),则可以对索引进行排序,并告诉ES如何在磁盘上排序文档。 Check this

希望这会有所帮助。