ES版本:5.4.1 操作系统:Ubuntu 爪哇:1.8
我们目前将系统从es 2.3升级到es5.4.1,一切顺利,直到我们将其部署到生产中。它开始于es节点随机运行一段时间。经过调查,我们在es
中看到了日志[2017-12-15T20:33:28,459][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][1723] overhead, spent [252ms] collecting in the last [1s]
[2017-12-15T20:42:53,599][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][2288] overhead, spent [288ms] collecting in the last [1s]
[2017-12-15T20:53:55,846][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][2950] overhead, spent [313ms] collecting in the last [1s]
[2017-12-15T21:03:18,145][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][3512] overhead, spent [325ms] collecting in the last [1s]
[2017-12-15T21:45:45,815][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][6059] overhead, spent [340ms] collecting in the last [1s]
[2017-12-15T21:46:39,834][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][6113] overhead, spent [445ms] collecting in the last [1s]
[2017-12-15T21:46:59,895][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][6133] overhead, spent [281ms] collecting in the last [1s]
[2017-12-15T21:47:27,901][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][6161] overhead, spent [306ms] collecting in the last [1s]
[2017-12-15T21:51:56,093][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][6429] overhead, spent [297ms] collecting in the last [1.1s]
[2017-12-15T21:51:57,094][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][6430] overhead, spent [353ms] collecting in the last [1s]
[2017-12-15T21:57:56,361][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][6789] overhead, spent [398ms] collecting in the last [1.1s]
[2017-12-15T21:58:23,407][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][6816] overhead, spent [284ms] collecting in the last [1s]
[2017-12-15T21:58:24,444][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][6817] overhead, spent [341ms] collecting in the last [1s]
[2017-12-15T22:01:29,536][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][7002] overhead, spent [341ms] collecting in the last [1s]
[2017-12-15T22:01:31,663][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][7004] overhead, spent [399ms] collecting in the last [1.1s]
[2017-12-15T22:01:35,664][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][7008] overhead, spent [273ms] collecting in the last [1s]
[2017-12-15T22:03:08,779][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][7101] overhead, spent [281ms] collecting in the last [1s]
[2017-12-15T22:05:54,928][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][7267] overhead, spent [310ms] collecting in the last [1.1s]
[2017-12-15T22:05:55,928][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][7268] overhead, spent [305ms] collecting in the last [1s]
[2017-12-15T22:05:58,929][INFO ][o.e.m.j.JvmGcMonitorService] [vzvV7bX] [gc][7271] overhead, spent [287ms] collecting in the last [1s]
``` 早期,ES 2.3曾经在系统上使用8 GB内存,使用32 GB内存。我们将堆空间从8增加到12,然后我们将系统ram增加到64 GB,并为弹性搜索提供32 GB堆,但是当有任何繁重的查询时,es节点会继续下降。请注意,之前的一切工作正常。
对于12 GB,详细信息如下
"jvm" : {
"timestamp" : 1513422077508,
"uptime_in_millis" : 16750330,
"mem" : {
"heap_used_in_bytes" : 9402202880,
"heap_used_percent" : 73,
"heap_committed_in_bytes" : 12815171584,
"heap_max_in_bytes" : 12815171584,
"non_heap_used_in_bytes" : 97693480,
"non_heap_committed_in_bytes" : 103424000,
"pools" : {
"young" : {
"used_in_bytes" : 441150480,
"max_in_bytes" : 558432256,
"peak_used_in_bytes" : 558432256,
"peak_max_in_bytes" : 558432256
},
"survivor" : {
"used_in_bytes" : 42550952,
"max_in_bytes" : 69730304,
"peak_used_in_bytes" : 69730304,
"peak_max_in_bytes" : 69730304
},
"old" : {
"used_in_bytes" : 8918501448,
"max_in_bytes" : 12187009024,
"peak_used_in_bytes" : 8918501448,
"peak_max_in_bytes" : 12187009024
}
}
},
"threads" : {
"count" : 71,
"peak_count" : 95
},
"gc" : {
"collectors" : {
"young" : {
"collection_count" : 104,
"collection_time_in_millis" : 7078
},
"old" : {
"collection_count" : 1,
"collection_time_in_millis" : 170
}
}
},
"buffer_pools" : {
"direct" : {
"count" : 67,
"used_in_bytes" : 847203522,
"total_capacity_in_bytes" : 847203521
},
"mapped" : {
"count" : 1204,
"used_in_bytes" : 16787345787,
"total_capacity_in_bytes" : 16787345787
}
},
"classes" : {
"current_loaded_count" : 11025,
"total_loaded_count" : 11025,
"total_unloaded_count" : 0
}
}
}
```
但到目前为止,我们无法找到问题的具体解决方法
在Github中创建了ticket但已关闭,因此在论坛上创建了topic
现在我们比较了针对es2.3和es5.4
发出的es查询对于es2.3查询就像
{
"from" : 0,
"size" : 200,
"query" : {
"bool" : {
"must" : {
"bool" : {
"must" : [
{
"bool" : {
"must_not" : {
"bool" : {
"should" : {
"terms" : {
"room_id" : [
-- terms upto 30000 -
]
}
}
}
}
}
}
]
}
},
"filter" : {
-- basic filter --
}
}
},
"fields" : [
-- columns --
],
"sort" : [
-- columns --
]
}
和查询5.4是
{
"from" : 0,
"size" : 20,
"query" : {
"bool" : {
"must" : [
{
"bool" : {
"must_not" : [
{
"bool" : {
"should" : [
{
"terms" : {
"room_ids" : [
-- terms can upto be 30000 --
],
"boost" : 1.0
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
],
"filter" : [
-- some basic filters --
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
},
"_source" : {
"includes" : [
--columns--
],
"excludes" : [ ]
},
"sort" : [
--columns--
]
}
主要区别在于2.3我们从fields
获取数据,在5.4中我们从_source
获取数据,其他一切几乎相同,但对于2.3,一切似乎都正常。
有人建议我堆大小太大here。我将java Xms设置为8GB,但节点仍在下降
{
"_nodes" : {
"total" : 1,
"successful" : 1,
"failed" : 0
},
"cluster_name" : "elasticsearch_cc",
"nodes" : {
"vzvV7bXVQd-9aVzF_CrcHA" : {
"timestamp" : 1513839849278,
"name" : "vzvV7bX",
"transport_address" : "127.0.0.1:9300",
"host" : "127.0.0.1",
"ip" : "127.0.0.1:9300",
"roles" : [
"master",
"data",
"ingest"
],
"indices" : {
"docs" : {
"count" : 10490979,
"deleted" : 759806
},
"store" : {
"size_in_bytes" : 12759778301,
"throttle_time_in_millis" : 0
},
"indexing" : {
"index_total" : 86,
"index_time_in_millis" : 738,
"index_current" : 0,
"index_failed" : 0,
"delete_total" : 0,
"delete_time_in_millis" : 0,
"delete_current" : 0,
"noop_update_total" : 40,
"is_throttled" : false,
"throttle_time_in_millis" : 0
},
"get" : {
"total" : 75,
"time_in_millis" : 27,
"exists_total" : 75,
"exists_time_in_millis" : 27,
"missing_total" : 0,
"missing_time_in_millis" : 0,
"current" : 0
},
"search" : {
"open_contexts" : 0,
"query_total" : 5292,
"query_time_in_millis" : 31440,
"query_current" : 0,
"fetch_total" : 3325,
"fetch_time_in_millis" : 117815,
"fetch_current" : 0,
"scroll_total" : 0,
"scroll_time_in_millis" : 0,
"scroll_current" : 0,
"suggest_total" : 0,
"suggest_time_in_millis" : 0,
"suggest_current" : 0
},
"merges" : {
"current" : 0,
"current_docs" : 0,
"current_size_in_bytes" : 0,
"total" : 6,
"total_time_in_millis" : 2203,
"total_docs" : 2303,
"total_size_in_bytes" : 8707990,
"total_stopped_time_in_millis" : 0,
"total_throttled_time_in_millis" : 0,
"total_auto_throttle_in_bytes" : 671088640
},
"refresh" : {
"total" : 154,
"total_time_in_millis" : 1763,
"listeners" : 0
},
"flush" : {
"total" : 57,
"total_time_in_millis" : 830
},
"warmer" : {
"current" : 0,
"total" : 218,
"total_time_in_millis" : 413
},
"query_cache" : {
"memory_size_in_bytes" : 1807608,
"total_count" : 22863,
"hit_count" : 2962,
"miss_count" : 19901,
"cache_size" : 836,
"cache_count" : 836,
"evictions" : 0
},
"fielddata" : {
"memory_size_in_bytes" : 2112304,
"evictions" : 0
},
"completion" : {
"size_in_bytes" : 0
},
"segments" : {
"count" : 490,
"memory_in_bytes" : 54231303,
"terms_memory_in_bytes" : 40778050,
"stored_fields_memory_in_bytes" : 3325704,
"term_vectors_memory_in_bytes" : 0,
"norms_memory_in_bytes" : 637056,
"points_memory_in_bytes" : 580557,
"doc_values_memory_in_bytes" : 8909936,
"index_writer_memory_in_bytes" : 0,
"version_map_memory_in_bytes" : 0,
"fixed_bit_set_memory_in_bytes" : 1199712,
"max_unsafe_auto_id_timestamp" : -1,
"file_sizes" : { }
},
"translog" : {
"operations" : 3,
"size_in_bytes" : 18452
},
"request_cache" : {
"memory_size_in_bytes" : 66679,
"evictions" : 0,
"hit_count" : 84,
"miss_count" : 130
},
"recovery" : {
"current_as_source" : 0,
"current_as_target" : 0,
"throttle_time_in_millis" : 0
}
},
"os" : {
"timestamp" : 1513839849384,
"cpu" : {
"percent" : 3,
"load_average" : {
"1m" : 0.05,
"5m" : 0.38,
"15m" : 1.06
}
},
"mem" : {
"total_in_bytes" : 33738969088,
"free_in_bytes" : 15510724608,
"used_in_bytes" : 18228244480,
"free_percent" : 46,
"used_percent" : 54
},
"swap" : {
"total_in_bytes" : 0,
"free_in_bytes" : 0,
"used_in_bytes" : 0
}
},
"process" : {
"timestamp" : 1513839849384,
"open_file_descriptors" : 344,
"max_file_descriptors" : 65536,
"cpu" : {
"percent" : 0,
"total_in_millis" : 329250
},
"mem" : {
"total_virtual_in_bytes" : 28131942400
}
},
"jvm" : {
"timestamp" : 1513839849384,
"uptime_in_millis" : 12440740,
"mem" : {
"heap_used_in_bytes" : 2964483576,
"heap_used_percent" : 34,
"heap_committed_in_bytes" : 6240751616,
"heap_max_in_bytes" : 8520204288,
"non_heap_used_in_bytes" : 102879720,
"non_heap_committed_in_bytes" : 108969984,
"pools" : {
"young" : {
"used_in_bytes" : 451090984,
"max_in_bytes" : 558432256,
"peak_used_in_bytes" : 558432256,
"peak_max_in_bytes" : 558432256
},
"survivor" : {
"used_in_bytes" : 16900480,
"max_in_bytes" : 69730304,
"peak_used_in_bytes" : 69730304,
"peak_max_in_bytes" : 69730304
},
"old" : {
"used_in_bytes" : 2496492112,
"max_in_bytes" : 7892041728,
"peak_used_in_bytes" : 5608349440,
"peak_max_in_bytes" : 7892041728
}
}
},
"threads" : {
"count" : 88,
"peak_count" : 99
},
"gc" : {
"collectors" : {
"young" : {
"collection_count" : 86,
"collection_time_in_millis" : 14592
},
"old" : {
"collection_count" : 11,
"collection_time_in_millis" : 780
}
}
},
"buffer_pools" : {
"direct" : {
"count" : 57,
"used_in_bytes" : 795161001,
"total_capacity_in_bytes" : 795161000
},
"mapped" : {
"count" : 1080,
"used_in_bytes" : 12730171248,
"total_capacity_in_bytes" : 12730171248
}
},
"classes" : {
"current_loaded_count" : 11264,
"total_loaded_count" : 11388,
"total_unloaded_count" : 124
}
},
"thread_pool" : {
"bulk" : {
"threads" : 5,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 5,
"completed" : 84
},
"fetch_shard_started" : {
"threads" : 1,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 16,
"completed" : 35
},
"fetch_shard_store" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"flush" : {
"threads" : 1,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 4,
"completed" : 114
},
"force_merge" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"generic" : {
"threads" : 4,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 4,
"completed" : 1286
},
"get" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"index" : {
"threads" : 8,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 8,
"completed" : 75
},
"listener" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"management" : {
"threads" : 4,
"queue" : 0,
"active" : 1,
"rejected" : 0,
"largest" : 4,
"completed" : 1653
},
"refresh" : {
"threads" : 4,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 4,
"completed" : 86950
},
"search" : {
"threads" : 13,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 13,
"completed" : 13533
},
"snapshot" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"warmer" : {
"threads" : 4,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 4,
"completed" : 1362
}
},
"fs" : {
"timestamp" : 1513839849386,
"total" : {
"total_in_bytes" : 42141548544,
"free_in_bytes" : 21673201664,
"available_in_bytes" : 19508940800
},
"data" : [
{
"path" : "/data/elasticsearch_cc/nodes/0",
"mount" : "/data (/dev/xvdf)",
"type" : "ext4",
"total_in_bytes" : 42141548544,
"free_in_bytes" : 21673201664,
"available_in_bytes" : 19508940800,
"spins" : "false"
}
],
"io_stats" : {
"devices" : [
{
"device_name" : "xvdf",
"operations" : 32559,
"read_operations" : 29633,
"write_operations" : 2926,
"read_kilobytes" : 1682476,
"write_kilobytes" : 51428
}
],
"total" : {
"operations" : 32559,
"read_operations" : 29633,
"write_operations" : 2926,
"read_kilobytes" : 1682476,
"write_kilobytes" : 51428
}
}
},
"transport" : {
"server_open" : 26,
"rx_count" : 10895,
"rx_size_in_bytes" : 1251104,
"tx_count" : 10893,
"tx_size_in_bytes" : 12114262496
},
"http" : {
"current_open" : 1,
"total_opened" : 1
},
"breakers" : {
"request" : {
"limit_size_in_bytes" : 5112122572,
"limit_size" : "4.7gb",
"estimated_size_in_bytes" : 0,
"estimated_size" : "0b",
"overhead" : 1.0,
"tripped" : 0
},
"fielddata" : {
"limit_size_in_bytes" : 5112122572,
"limit_size" : "4.7gb",
"estimated_size_in_bytes" : 2112304,
"estimated_size" : "2mb",
"overhead" : 1.03,
"tripped" : 0
},
"in_flight_requests" : {
"limit_size_in_bytes" : 8520204288,
"limit_size" : "7.9gb",
"estimated_size_in_bytes" : 3206,
"estimated_size" : "3.1kb",
"overhead" : 1.0,
"tripped" : 0
},
"parent" : {
"limit_size_in_bytes" : 5964143001,
"limit_size" : "5.5gb",
"estimated_size_in_bytes" : 2115510,
"estimated_size" : "2mb",
"overhead" : 1.0,
"tripped" : 0
}
},
"script" : {
"compilations" : 0,
"cache_evictions" : 0
},
"discovery" : {
"cluster_state_queue" : {
"total" : 0,
"pending" : 0,
"committed" : 0
}
},
"ingest" : {
"total" : {
"count" : 0,
"time_in_millis" : 0,
"current" : 0,
"failed" : 0
},
"pipelines" : { }
}
}
}
}