我在一个集合中有大约30M条记录的数据库,集合大小约为100GB(文档和索引总数)。
我有一个复合索引,可根据user_id和其他一些字段过滤数据:is_active
,is_logged_in
等。
使用MongoDB Compass
我看到大约10秒,20秒甚至40秒的慢查询!我运行完全相同的查询,结果提取不到500毫秒(虽然它可能会在第二次尝试时缓存)。
当我获得正在进行的op
统计信息时,我会看到以下锁定状态:
"lockStats": {
"Global": {
"acquireCount": {
"r": 574
}
},
"MMAPV1Journal": {
"acquireCount": {
"r": 295
},
"acquireWaitCount": {
"r": 2
},
"timeAcquiringMicros": {
"r": 15494
}
},
}
acquireCount
:与具有以下状态的快速查询(在另一个集合上)相比,操作在指定模式下获取锁定的次数如此之高:
"lockStats": {
"Global": {
"acquireCount": {
"r": 2
}
},
"MMAPV1Journal": {
"acquireCount": {
"r": 1
}
},
"Database": {
"acquireCount": {
"r": 1
}
},
"Collection": {
"acquireCount": {
"R": 1
}
}
}
当操作很慢时,拥有许多记录的用户需要很长时间才能在几秒钟后对所有其他操作产生多米诺骨牌效应。
当我在大型集合上解释查询时,我可以看到它使用索引的结果:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "cuda.call_history",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"$or" : [
{
"trk.0.extra_data.spam.is_spam" : {
"$eq" : false
}
},
{
"$nor" : [
{
"trk.0.extra_data.spam.is_spam" : {
"$exists" : true
}
}
]
}
]
},
{
"is_removed" : {
"$eq" : false
}
},
{
"user_id" : {
"$eq" : "00000000000040008000000000002a5d"
}
},
{
"trk.0.direction" : {
"$eq" : "ingress"
}
},
{
"trk.0.type" : {
"$eq" : "fax"
}
},
{
"date" : {
"$lt" : "2018-01-09 10:36:31"
}
},
{
"date" : {
"$gt" : "1970-01-01 00:00:00"
}
},
{
"trk.0.data.status" : {
"$in" : [
"p_received",
"success"
]
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$or" : [
{
"trk.0.extra_data.spam.is_spam" : {
"$eq" : false
}
},
{
"$nor" : [
{
"trk.0.extra_data.spam.is_spam" : {
"$exists" : true
}
}
]
}
]
},
{
"trk.0.type" : {
"$eq" : "fax"
}
},
{
"date" : {
"$lt" : "2018-01-09 10:36:31"
}
},
{
"date" : {
"$gt" : "1970-01-01 00:00:00"
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"user_id" : 1,
"trk.0.direction" : 1,
"is_read" : 1,
"trk.0.data.status" : 1,
"is_removed" : 1
},
"indexName" : "user_id_direction_is_read_status_is_removed",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"user_id" : [
"[\"00000000000040008000000000002a5d\", \"00000000000040008000000000002a5d\"]"
],
"trk.0.direction" : [
"[\"ingress\", \"ingress\"]"
],
"is_read" : [
"[MinKey, MaxKey]"
],
"trk.0.data.status" : [
"[\"p_received\", \"p_received\"]",
"[\"success\", \"success\"]"
],
"is_removed" : [
"[false, false]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$or" : [
{
"trk.0.extra_data.spam.is_spam" : {
"$eq" : false
}
},
{
"$nor" : [
{
"trk.0.extra_data.spam.is_spam" : {
"$exists" : true
}
}
]
}
]
},
{
"is_removed" : {
"$eq" : false
}
},
{
"trk.0.direction" : {
"$eq" : "ingress"
}
},
{
"trk.0.type" : {
"$eq" : "fax"
}
},
{
"trk.0.data.status" : {
"$in" : [
"p_received",
"success"
]
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"user_id" : 1,
"date" : -1
},
"indexName" : "user_id_date",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"user_id" : [
"[\"00000000000040008000000000002a5d\", \"00000000000040008000000000002a5d\"]"
],
"date" : [
"(\"2018-01-09 10:36:31\", \"1970-01-01 00:00:00\")"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$or" : [
{
"trk.0.extra_data.spam.is_spam" : {
"$eq" : false
}
},
{
"$nor" : [
{
"trk.0.extra_data.spam.is_spam" : {
"$exists" : true
}
}
]
}
]
},
{
"is_removed" : {
"$eq" : false
}
},
{
"trk.0.direction" : {
"$eq" : "ingress"
}
},
{
"trk.0.type" : {
"$eq" : "fax"
}
},
{
"date" : {
"$lt" : "2018-01-09 10:36:31"
}
},
{
"date" : {
"$gt" : "1970-01-01 00:00:00"
}
},
{
"trk.0.data.status" : {
"$in" : [
"p_received",
"success"
]
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"user_id" : 1,
"to" : 1,
"from" : 1
},
"indexName" : "user_id_to_from",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"user_id" : [
"[\"00000000000040008000000000002a5d\", \"00000000000040008000000000002a5d\"]"
],
"to" : [
"[MinKey, MaxKey]"
],
"from" : [
"[MinKey, MaxKey]"
]
}
}
}
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 4682,
"executionTimeMillis" : 2072,
"totalKeysExamined" : 4688,
"totalDocsExamined" : 4682,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$or" : [
{
"trk.0.extra_data.spam.is_spam" : {
"$eq" : false
}
},
{
"$nor" : [
{
"trk.0.extra_data.spam.is_spam" : {
"$exists" : true
}
}
]
}
]
},
{
"trk.0.type" : {
"$eq" : "fax"
}
},
{
"date" : {
"$lt" : "2018-01-09 10:36:31"
}
},
{
"date" : {
"$gt" : "1970-01-01 00:00:00"
}
}
]
},
"nReturned" : 4682,
"executionTimeMillisEstimate" : 710,
"works" : 4897,
"advanced" : 4682,
"needTime" : 5,
"needYield" : 209,
"saveState" : 234,
"restoreState" : 234,
"isEOF" : 1,
"invalidates" : 1,
"docsExamined" : 4682,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 4682,
"executionTimeMillisEstimate" : 305,
"works" : 4688,
"advanced" : 4682,
"needTime" : 5,
"needYield" : 0,
"saveState" : 234,
"restoreState" : 234,
"isEOF" : 1,
"invalidates" : 1,
"keyPattern" : {
"user_id" : 1,
"trk.0.direction" : 1,
"is_read" : 1,
"trk.0.data.status" : 1,
"is_removed" : 1
},
"indexName" : "user_id_direction_is_read_status_is_removed",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"user_id" : [
"[\"00000000000040008000000000002a5d\", \"00000000000040008000000000002a5d\"]"
],
"trk.0.direction" : [
"[\"ingress\", \"ingress\"]"
],
"is_read" : [
"[MinKey, MaxKey]"
],
"trk.0.data.status" : [
"[\"p_received\", \"p_received\"]",
"[\"success\", \"success\"]"
],
"is_removed" : [
"[false, false]"
]
},
"keysExamined" : 4688,
"seeks" : 6,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"serverInfo" : {
"host" : “hs1.mydomain.com”,
"port" : 27017,
"version" : "3.4.10",
"gitVersion" : "078f28920cb24de0dd479b5ea6c66c644f6326e9"
},
"ok" : 1.0
}
keysExamined
只有4,688!它与30M文档集合的总数据相比并不多。当Mongo在具有多米诺骨牌效应时变慢,CPU使用率和内存不高。 Mongo只占用了40%的内存。如果有帮助,磁盘分区为Ext4
。
完整细节中查询速度非常慢的另一个例子:
{
"desc": "conn199276",
"threadId": "140070259820288",
"connectionId": 199276,
"client": "client_server_ip:45590",
"active": "true",
"opid": 63869351,
"secs_running": 36,
"microsecs_running": 36136211,
"op": "query",
"ns": "cuda.call_history",
"query": {
"find": "call_history",
"filter": {
"is_removed": false,
"trk.0.extra_data.spam.is_spam": true,
"trk.0.direction": "ingress",
"date": {
"$gt": "1970-01-01 00:00:00",
"$lt": "4001-01-01 00:00:00"
},
"trk.0.extra_data.status": {
"$in": [
"success",
"p_received"
]
},
"trk.0.type": "clk",
"owner_id": "00000000000040008000000000003828"
},
"sort": {
"date": -1
},
"limit": 31
},
"numYields": 6600,
"locks": {},
"waitingForLock": "false",
"lockStats": {
"Global": {
"acquireCount": {
"r": 13200
}
},
"MMAPV1Journal": {
"acquireCount": {
"r": 6611
},
"acquireWaitCount": {
"r": 9
},
"timeAcquiringMicros": {
"r": 50854
}
},
"Database": {
"acquireCount": {
"r": 6600
}
},
"Collection": {
"acquireCount": {
"R": 6600
},
"acquireWaitCount": {
"R": 11
},
"timeAcquiringMicros": {
"R": 163707
}
}
}
}
db.stats()
的输出:
rs0:PRIMARY> db.stats()
{
"db" : "cuda",
"collections" : 5,
"views" : 0,
"objects" : 55009248,
"avgObjSize" : 2018.6135346551184,
"dataSize" : 111042412544,
"storageSize" : 113055362336,
"numExtents" : 100,
"indexes" : 7,
"indexSize" : 14223460160,
"fileSize" : 133012914176,
"nsSizeMB" : 16,
"extentFreeList" : {
"num" : 0,
"totalSize" : 0
},
"dataFileVersion" : {
"major" : 4,
"minor" : 22
},
"ok" : 1
}
mongostat
显示以下结果,我认为故障编号很高:
insert query update delete getmore command flushes mapped vsize res faults qrw arw net_in net_out conn set repl time
5 93 4 *0 0 64|0 0 282G 9.11G 26 0|0 0|0 64.3k 187k 481 rs0 PRI Jan 10 06:25:14.476
*0 107 *0 1 0 58|0 0 282G 9.14G 4 0|0 0|0 51.5k 247k 481 rs0 PRI Jan 10 06:25:15.475
2 88 5 *0 0 70|0 0 282G 9.04G 26 0|0 0|0 61.5k 245k 481 rs0 PRI Jan 10 06:25:16.476
3 98 2 *0 0 71|0 0 282G 9.12G 6 0|0 0|0 59.6k 274k 481 rs0 PRI Jan 10 06:25:17.474
1 105 *0 1 0 82|0 0 282G 9.10G 14 0|0 0|0 63.4k 254k 481 rs0 PRI Jan 10 06:25:18.476
1 88 2 *0 0 68|0 0 282G 9.12G 75 0|0 0|0 49.9k 194k 481 rs0 PRI Jan 10 06:25:19.476
5 120 4 *0 0 78|0 0 282G 9.11G 5 0|0 0|0 75.7k 548k 484 rs0 PRI Jan 10 06:25:20.476
3 89 *0 4 0 64|0 0 282G 9.14G 19 0|0 0|0 55.3k 1.59m 483 rs0 PRI Jan 10 06:25:21.476
*0 81 *0 *0 0 62|0 0 282G 9.17G 0 0|0 0|0 46.9k 265k 481 rs0 PRI Jan 10 06:25:22.476
6 105 3 *0 0 67|0 0 282G 9.15G 49 0|0 0|0 61.8k 240k 481 rs0 PRI Jan 10 06:25:23.475
在加载下我们遇到了很高的页面错误:
insert query update delete getmore command flushes mapped vsize res faults qrw arw net_in net_out conn set repl time
1 96 *0 *0 0 70|0 0 282G 10.9G 597 0|0 2|0 59.1k 1.06m 484 rs0 PRI Jan 10 07:48:45.061
1 103 3 *0 0 79|0 0 282G 10.9G 715 0|0 0|0 66.4k 176k 484 rs0 PRI Jan 10 07:48:46.060
2 102 *0 *0 0 72|0 0 282G 10.9G 718 0|0 2|0 59.6k 154k 484 rs0 PRI Jan 10 07:48:47.060
2 104 2 1 0 83|0 0 282G 11.0G 692 0|0 3|0 68.7k 154k 483 rs0 PRI Jan 10 07:48:48.061
2 103 2 *0 0 86|0 0 282G 11.0G 743 0|0 1|0 64.2k 154k 482 rs0 PRI Jan 10 07:48:49.059
*0 116 2 *0 0 95|0 0 282G 11.0G 715 0|0 2|0 68.5k 237k 484 rs0 PRI Jan 10 07:48:50.060
1 112 6 1 0 78|0 0 282G 11.0G 595 0|0 2|0 74.3k 231k 481 rs0 PRI Jan 10 07:48:51.061
2 137 5 1 0 83|0 0 282G 11.0G 733 0|0 3|0 81.6k 301k 480 rs0 PRI Jan 10 07:48:52.060
3 157 5 1 0 92|0 0 282G 11.0G 697 0|0 3|0 89.5k 183k 481 rs0 PRI Jan 10 07:48:53.059
4 102 3 *0 0 73|0 0 282G 10.9G 599 0|0 1|0 67.1k 154k 480 rs0 PRI Jan 10 07:48:54.060
我们有大约15GB的可用内存,其MongoDB
部分用于缓存!
在峰值时,我们达到400 OPS,每秒500个连接。我能做些什么来提高性能?