我有一个使用mongodb的数据库,它有大约一百万个文档,并且还在不断增长。我用它来保持ping日志。随着它的增长,现在查询需要很长时间。
以下是我的查询(我正在使用pymongo):
db['ping'].aggregate([
{'$sort': {'url': 1, 'created': 1}},
{'$group': {
'_id': '$url',
'lastCreated': {'$last': '$created'},
'data': {'$first': '$$ROOT'}, },
},
{'$project': {
'status': '$data.status',
'date': '$lastCreated', }
},
{'$group': {
'_id': '$status',
'count': {'$sum': 1}, }
}
], allowDiskUse=True)
此查询用于获取最新输入的数据,然后对其进行计数。
结果将如下所示:
{ "_id" : "CHECK", "count" : 8 }
{ "_id" : "DEAD", "count" : 7 }
{ "_id" : "OK", "count" : 94 }
以下是executionStats的说明:
{
"stages" : [
{
"$cursor" : {
"query" : {
},
"sort" : {
"url" : 1,
"created" : 1
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "log.ping",
"indexFilterSet" : false,
"parsedQuery" : {
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"url" : 1,
"created" : 1
},
"indexName" : "url_1_created_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"url" : [ ],
"created" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"url" : [
"[MinKey, MaxKey]"
],
"created" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1516493,
"executionTimeMillis" : 15105,
"totalKeysExamined" : 1516493,
"totalDocsExamined" : 1516493,
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 1516493,
"executionTimeMillisEstimate" : 715,
"works" : 1516494,
"advanced" : 1516493,
"needTime" : 0,
"needYield" : 0,
"saveState" : 12104,
"restoreState" : 12104,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 1516493,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1516493,
"executionTimeMillisEstimate" : 268,
"works" : 1516494,
"advanced" : 1516493,
"needTime" : 0,
"needYield" : 0,
"saveState" : 12104,
"restoreState" : 12104,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"url" : 1,
"created" : 1
},
"indexName" : "url_1_created_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"url" : [ ],
"created" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"url" : [
"[MinKey, MaxKey]"
],
"created" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 1516493,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
},
{
"$group" : {
"_id" : "$url",
"lastCreated" : {
"$last" : "$created"
},
"data" : {
"$first" : "$$ROOT"
}
}
},
{
"$project" : {
"_id" : true,
"status" : "$data.status",
"date" : "$lastCreated"
}
},
{
"$group" : {
"_id" : "$status",
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$limit" : NumberLong(5)
}
],
"ok" : 1
}
我怀疑该查询检查了所有文档,导致它需要更长的时间。我确实尝试使用限制,但未对错误数据进行排序。
有没有办法使其运行更快?