我正在使用mongodb 3.6而我在我的收藏中有近5-6万卢比的文件。我想搜索全文搜索以及部分搜索。 `
db.temp.find( {$and : [{"status" : {"$in" : [ 1, 2]} },
{$or:[ { $text: { $search: "school" }}
,{ cname : /school/i}
,{ name : /school/i}
]} ]},
{cname:1,name:1,followers:1,status :1, score: { $meta:
"textScore" } } ).sort( { score: { $meta: "textScore"
},status :-1 ,followers :-1 } )
` 临时收集索引。
db.temp.createIndex(
{
name: "text",
cname: "text"
},
{
weights: {
name: 4,
cname: 2
}
}
)
db.getCollection("temp").createIndex({
"cname": 1
}, {background: true})
db.getCollection("temp").createIndex({
"status" : -1.0,
"followers" : -1.0
}, {background: true});
db.getCollection("temp").createIndex({
"name": 1
}, {background: true})`
文件就像:
{
"_id" : 5011.0,
"cname" : "samyselvik",
"name" : "Samy Sam",
"imgname" : "nrwi4769731443194380996.jpg",
"followers" : 1.0,
"status" : 1.0,
"createdat" : 1443194421532.0
}
当我检查执行('executionStats')时,它会显示
"executionStats" :{
"executionSuccess" : true,
"nReturned" : 363.0,
"executionTimeMillis" : 894.0,
"totalKeysExamined" : 921424.0,
"totalDocsExamined" : 372.0,
"executionStages" : {
"stage" : "PROJECTION",
"nReturned" : 363.0,
"executionTimeMillisEstimate" : 808.0,
"works" : 921803.0,
"advanced" : 363.0,
"needTime" : 921439.0,
"needYield" : 0.0,
"saveState" : 7234.0,
"restoreState" : 7234.0,
"isEOF" : 1.0,
"invalidates" : 0.0,
"transformBy" : {
"cname" : 1.0,
"name" : 1.0,
"followers" : 1.0,
"score" : {
"$meta" : "textScore"
}
},
"inputStage" : {
"stage" : "SORT",
"nReturned" : 363.0,
"executionTimeMillisEstimate" : 774.0,
"works" : 921803.0,
"advanced" : 363.0,
"needTime" : 921439.0,
"needYield" : 0.0,
"saveState" : 7234.0,
"restoreState" : 7234.0,
"isEOF" : 1.0,
"invalidates" : 0.0,
"sortPattern" : {
"score" : {
"$meta" : "textScore"
},
"status" : -1.0,
"followers" : -1.0
},
"memUsage" : 131542.0,
"memLimit" : 33554432.0,
"limitAmount" : 500.0,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"nReturned" : 363.0,
"executionTimeMillisEstimate" : 730.0,
"works" : 921439.0,
"advanced" : 363.0,
"needTime" : 921075.0,
"needYield" : 0.0,
"saveState" : 7234.0,
"restoreState" : 7234.0,
"isEOF" : 1.0,
"invalidates" : 0.0,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"status" : {
"$in" : [
1.0,
2.0
]
}
},
"nReturned" : 363.0,
"executionTimeMillisEstimate" : 719.0,
"works" : 921438.0,
"advanced" : 363.0,
"needTime" : 921074.0,
"needYield" : 0.0,
"saveState" : 7234.0,
"restoreState" : 7234.0,
"isEOF" : 1.0,
"invalidates" : 0.0,
"docsExamined" : 363.0,
"alreadyHasObj" : 9.0,
"inputStage" : {
"stage" : "OR",
"nReturned" : 363.0,
"executionTimeMillisEstimate" : 697.0,
"works" : 921438.0,
"advanced" : 363.0,
"needTime" : 921074.0,
"needYield" : 0.0,
"saveState" : 7234.0,
"restoreState" : 7234.0,
"isEOF" : 1.0,
"invalidates" : 0.0,
"dupsTested" : 399.0,
"dupsDropped" : 36.0,
"recordIdsForgotten" : 0.0,
"inputStages" : [
{
"stage" : "TEXT",
"nReturned" : 9.0,
"executionTimeMillisEstimate" : 0.0,
"works" : 21.0,
"advanced" : 9.0,
"needTime" : 11.0,
"needYield" : 0.0,
"saveState" : 7234.0,
"restoreState" : 7234.0,
"isEOF" : 1.0,
"invalidates" : 0.0,
"indexPrefix" : {
},
"indexName" : "name_text_cname_text",
"parsedTextQuery" : {
"terms" : [
"sam"
],
"negatedTerms" : [
],
"phrases" : [
],
"negatedPhrases" : [
]
},
"textIndexVersion" : 3.0,
"inputStage" : {
"stage" : "TEXT_MATCH",
"nReturned" : 9.0,
"executionTimeMillisEstimate" : 0.0,
"works" : 21.0,
"advanced" : 9.0,
"needTime" : 11.0,
"needYield" : 0.0,
"saveState" : 7234.0,
"restoreState" : 7234.0,
"isEOF" : 1.0,
"invalidates" : 0.0,
"docsRejected" : 0.0,
"inputStage" : {
"stage" : "TEXT_OR",
"nReturned" : 9.0,
"executionTimeMillisEstimate" : 0.0,
"works" : 21.0,
"advanced" : 9.0,
"needTime" : 11.0,
"needYield" : 0.0,
"saveState" : 7234.0,
"restoreState" : 7234.0,
"isEOF" : 1.0,
"invalidates" : 0.0,
"docsExamined" : 9.0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 9.0,
"executionTimeMillisEstimate" : 0.0,
"works" : 10.0,
"advanced" : 9.0,
"needTime" : 0.0,
"needYield" : 0.0,
"saveState" : 7234.0,
"restoreState" : 7234.0,
"isEOF" : 1.0,
"invalidates" : 0.0,
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1.0
},
"indexName" : "name_text_cname_text",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2.0,
"direction" : "backward",
"indexBounds" : {
},
"keysExamined" : 9.0,
"seeks" : 1.0,
"dupsTested" : 9.0,
"dupsDropped" : 0.0,
"seenInvalidated" : 0.0
}
}
}
},
{
"stage" : "IXSCAN",
"filter" : {
"$or" : [
{
"cname" : {
"$regex" : "Sam",
"$options" : "i"
}
}
]
},
"nReturned" : 193.0,
"executionTimeMillisEstimate" : 357.0,
"works" : 460693.0,
"advanced" : 193.0,
"needTime" : 460499.0,
"needYield" : 0.0,
"saveState" : 7234.0,
"restoreState" : 7234.0,
"isEOF" : 1.0,
"invalidates" : 0.0,
"keyPattern" : {
"cname" : 1.0
},
"indexName" : "cname_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"cname" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2.0,
"direction" : "forward",
"indexBounds" : {
"cname" : [
"[\"\", {})",
"[/Sam/i, /Sam/i]"
]
},
"keysExamined" : 460692.0,
"seeks" : 1.0,
"dupsTested" : 0.0,
"dupsDropped" : 0.0,
"seenInvalidated" : 0.0
},
{
"stage" : "IXSCAN",
"filter" : {
"$or" : [
{
"name" : {
"$regex" : "Sam",
"$options" : "i"
}
}
]
},
"nReturned" : 197.0,
"executionTimeMillisEstimate" : 318.0,
"works" : 460724.0,
"advanced" : 197.0,
"needTime" : 460526.0,
"needYield" : 0.0,
"saveState" : 7234.0,
"restoreState" : 7234.0,
"isEOF" : 1.0,
"invalidates" : 0.0,
"keyPattern" : {
"name" : 1.0
},
"indexName" : "name_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"name" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2.0,
"direction" : "forward",
"indexBounds" : {
"name" : [
"[\"\", {})",
"[/Sam/i, /Sam/i]"
]
},
"keysExamined" : 460723.0,
"seeks" : 1.0,
"dupsTested" : 0.0,
"dupsDropped" : 0.0,
"seenInvalidated" : 0.0
}
]
}
}
}
}
},
"allPlansExecution" : [
]
}
temp Collection总共有4.60 lkhs文档,检查的密钥多于文档。请告诉我如何优化此查询以便我可以同时使用全文和部分搜索?
答案 0 :(得分:0)
查询阶段的简要说明:
name
需要0.3秒,检查460k密钥,并返回197个文档cname
上的正则表达式需要0.4秒,检查460k密钥,并返回193个文档所有3个or
条件的交点在0.7秒内总共返回363个文档。它检查了460k + 460k = 920k键。
以下阶段阅读文档,应用状态过滤器,在内存中对结果进行排序并预测每个响应时间小于50毫秒,这样您的总时间为0.9秒,这为优化提供了很小的空间。
假设没有正则表达式就无法做到这一点。首先检查.hint("name_text_cname_text")
是否更快。有时使用collscan https://docs.mongodb.com/manual/reference/operator/query/regex/#index-use可以提高regexp的效率。
接下来,您可以将cname
和name
合并到一个字段中来优化数据,以减少正则表达式搜索次数:
{
"_id" : 5011.0,
"cname" : "samyselvik",
"name" : "Samy Sam",
"search_name" : "samyselvik Samy Sam",
"imgname" : "nrwi4769731443194380996.jpg",
"followers" : 1.0,
"status" : 1.0,
"createdat" : 1443194421532.0
}
您需要更新所有文档并更新您的应用,以确保将来的文档具有有效的search_name
字段。
查询将是:
db.temp.find(
{ $and : [
{ "status" : { "$in" : [1, 2]} },
{ $or: [
{ $text: { $search: "school" } },
{ search_name : /school/i}
] }
] },
{ cname:1, name:1, followers:1, status :1, score: { $meta: "textScore" } }
).sort( { score: { $meta: "textScore" }, status :-1, followers :-1 } )
它应该节省大约0.2秒,所以这里没有魔力。 Regexp很昂贵,但在某些情况下是不可避免的。