所以我在Amazon r3.xlarge盒子上运行MongoDB 3.0.7(4核,30GB RAM,80GB SSD)并且存储了大约40GB的半复杂json数据,大约700万个文档。它看起来像这样:
{
"_id": ObjectId("5665f1fef8adb3ee597af375"),
"some_other_id": "xxxxxxxxxxxxxxxx",
"meta": {
...
},
"raw": {
...
"text": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
...
},
"__v": 0,
"created_at": ISODate("2015-12-07T20:54:10Z")
}
我使用以下命令索引了一个文本字段:
db.messages.createIndex({"raw.text":"text"})
这就是索引的样子:
{
"v" : 1,
"key" : {
"_fts" : "text",
"_ftsx" : 1
},
"name" : "raw.text_text",
"ns" : "DBDB.messages",
"weights" : {
"raw.text" : 1
},
"default_language" : "english",
"language_override" : "language",
"textIndexVersion" : 2
}
这为我的40GB数据产生了大约3GB的索引大小。另外,我在这个集合上还有4个索引,都是数字。总共我在这个集合上有5个索引,索引的总大小是4GB。
我正在对查询执行count()操作,例如:
db.messages.find({$text:{$search:"mouse"}}).count()
我一直在发现,这个计数返回所需的时间或多或少与符合find()标准的jsons的数量成正比。对于返回一堆结果的条件,如500万,至少需要30分钟!再说一次,我没有回复结果。我只是想弄清楚我的文档中有多少个关键字。
这是我从explain()得到的。根据这个链接(https://docs.mongodb.org/v3.0/reference/explain-results/)“TEXT”不是其中一个阶段。我真的不知道这意味着什么,谷歌搜索没有给我什么。它确实提到了我的文本索引,所以我希望我的全文索引正在这个count()操作中使用。
> db.messages.explain().find({$text:{$search:"mouse"}}).count()
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "DBDB.messages",
"indexFilterSet" : false,
"parsedQuery" : {
"$text" : {
"$search" : "mouse",
"$language" : ""
}
},
"winningPlan" : {
"stage" : "COUNT",
"inputStage" : {
"stage" : "TEXT",
"indexPrefix" : {
},
"indexName" : "raw.text_text",
"parsedTextQuery" : {
}
}
},
"rejectedPlans" : [ ]
},
"serverInfo" : {
"host" : "xxxxxxxxxxxx",
"port" : 27017,
"version" : "3.0.7",
"gitVersion" : "6ce7cbe8c6b899552dadd907604559806aa2e9bd"
},
"ok" : 1
}
我的盒子上有30GB的物理RAM,索引总共4GB。我看到当我执行这些查询时,mongod使用了近8GB。所以我假设它完全在内存中加载所有索引。所以我希望几乎每个find()。count()操作都能在几秒内返回,如果它在索引上运行的话。我没有执行任何其他数据库操作,如插入或其他查询。我把其他所有东西都用来隔离数据库。
我显然在这里做错了但是我觉得我已经根据我在网上看到的内容完成了所有工作。不过,我对MongoDB很新。
最后,这是在运行查询5分钟左右后的currentOp()输出:
> db.currentOp()
{
"inprog" : [
{
"desc" : "conn1",
"threadId" : "0x316f1e0",
"connectionId" : 1,
"opid" : 94,
"active" : true,
"secs_running" : 539,
"microsecs_running" : NumberLong(539798074),
"op" : "query",
"ns" : "DBDB.messages",
"query" : {
"count" : "messages",
"query" : {
"$text" : {
"$search" : "mouse"
}
},
"fields" : {
}
},
"planSummary" : "TEXT {}",
"client" : "127.0.0.1:37784",
"numYields" : 3478,
"locks" : {
"Global" : "r",
"MMAPV1Journal" : "r",
"Database" : "r",
"Collection" : "R"
},
"waitingForLock" : false,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(6958)
}
},
"MMAPV1Journal" : {
"acquireCount" : {
"r" : NumberLong(3479)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(3479)
}
},
"Collection" : {
"acquireCount" : {
"R" : NumberLong(3479)
}
}
}
}
]
}
编辑:添加“executionStats”输出 编辑:升级到MongoDB 3.2.1。这些是新的执行统计数据。之前的执行统计数据使用了不同的搜索词,因为“鼠标”占用的时间太长。这一次,我让“鼠标查询完成”,你可以看到它花了将近40分钟。
> db.messages.explain("executionStats").find({$text:{$search:"mouse"}}).count()
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "DBDB.messages",
"indexFilterSet" : false,
"parsedQuery" : {
"$text" : {
"$search" : "mouse",
"$language" : "english",
"$caseSensitive" : false,
"$diacriticSensitive" : false
}
},
"winningPlan" : {
"stage" : "COUNT",
"inputStage" : {
"stage" : "TEXT",
"indexPrefix" : {
},
"indexName" : "raw.text_text",
"parsedTextQuery" : {
"terms" : [
"mous"
],
"negatedTerms" : [ ],
"phrases" : [ ],
"negatedPhrases" : [ ]
},
"inputStage" : {
"stage" : "TEXT_MATCH",
"inputStage" : {
"stage" : "TEXT_OR",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "raw.text_text",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
}
}
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 2305641,
"totalKeysExamined" : 5984163,
"totalDocsExamined" : 5984163,
"executionStages" : {
"stage" : "COUNT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 2277920,
"works" : 11968329,
"advanced" : 0,
"needTime" : 11968328,
"needYield" : 0,
"saveState" : 179094,
"restoreState" : 179094,
"isEOF" : 1,
"invalidates" : 0,
"nCounted" : 5984163,
"nSkipped" : 0,
"inputStage" : {
"stage" : "TEXT",
"nReturned" : 5984163,
"executionTimeMillisEstimate" : 2276450,
"works" : 11968329,
"advanced" : 5984163,
"needTime" : 5984165,
"needYield" : 0,
"saveState" : 179094,
"restoreState" : 179094,
"isEOF" : 1,
"invalidates" : 0,
"indexPrefix" : {
},
"indexName" : "raw.text_text",
"parsedTextQuery" : {
"terms" : [
"mous"
],
"negatedTerms" : [ ],
"phrases" : [ ],
"negatedPhrases" : [ ]
},
"inputStage" : {
"stage" : "TEXT_MATCH",
"nReturned" : 5984163,
"executionTimeMillisEstimate" : 2275970,
"works" : 11968329,
"advanced" : 5984163,
"needTime" : 5984165,
"needYield" : 0,
"saveState" : 179094,
"restoreState" : 179094,
"isEOF" : 1,
"invalidates" : 0,
"docsRejected" : 0,
"inputStage" : {
"stage" : "TEXT_OR",
"nReturned" : 5984163,
"executionTimeMillisEstimate" : 2275550,
"works" : 11968329,
"advanced" : 5984163,
"needTime" : 5984165,
"needYield" : 0,
"saveState" : 179094,
"restoreState" : 179094,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 5984163,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 5984163,
"executionTimeMillisEstimate" : 11520,
"works" : 5984164,
"advanced" : 5984163,
"needTime" : 0,
"needYield" : 0,
"saveState" : 179094,
"restoreState" : 179094,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "raw.text_text",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
},
"keysExamined" : 5984163,
"dupsTested" : 5984163,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
}
},
"serverInfo" : {
"host" : "xxxxxxxxxxxxxxxxxx",
"port" : 27017,
"version" : "3.2.1",
"gitVersion" : "a14d55980c2cdc565d4704a7e3ad37e4e535c1b2"
},
"ok" : 1
}
答案 0 :(得分:1)
我不相信您的查询正在使用索引。
以下是我机器上的外观。
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.grades",
"indexFilterSet" : false,
"parsedQuery" : {
"$text" : {
"$search" : "homework",
"$language" : "english",
"$caseSensitive" : false,
"$diacriticSensitive" : false
}
},
"winningPlan" : {
"stage" : "COUNT",
"inputStage" : {
"stage" : "TEXT",
"indexPrefix" : {
},
"indexName" : "scores.type_text",
"parsedTextQuery" : {
"terms" : [
"homework"
],
"negatedTerms" : [ ],
"phrases" : [ ],
"negatedPhrases" : [ ]
},
"inputStage" : {
"stage" : "TEXT_MATCH",
"inputStage" : {
"stage" : "TEXT_OR",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "scores.type_text",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
}
}
}
}
}
},
"rejectedPlans" : [ ]
},
"serverInfo" : {
"host" : "xxxxxxxxxxx",
"port" : 27017,
"version" : "3.2.1",
"gitVersion" : "a14d55980c2cdc565d4704a7e3ad37e4e535c1b2"
},
"ok" : 1
}
您可以使用IXSCAN查看它。如果可能,请删除索引并重新创建。我个人喜欢使用ensureIndex来执行此操作。
您可能还希望使用db.messages.explain("executionStats").find({$text:{$search:"mouse"}}).count()
编辑:我的机器上的索引如下:
{
"v" : 1,
"key" : {
"_fts" : "text",
"_ftsx" : 1
},
"name" : "scores.type_text",
"ns" : "test.grades",
"weights" : {
"scores.type" : 1
},
"default_language" : "english",
"language_override" : "language",
"textIndexVersion" : 3
}