MongoDb使用部分搜索进行全文搜索

时间:2018-06-13 15:22:24

标签: mongodb indexing full-text-search query-optimization partial

我正在使用mongodb 3.6而我在我的收藏中有近5-6万卢比的文件。我想搜索全文搜索以及部分搜索。 `

 db.temp.find( {$and : [{"status" : {"$in" : [ 1,  2]} },
                          {$or:[ { $text: { $search: "school" }}
                                 ,{ cname : /school/i}
                                 ,{ name : /school/i}
                                  ]}  ]},
         {cname:1,name:1,followers:1,status :1, score: { $meta: 
          "textScore" } } ).sort( { score: { $meta: "textScore" 
          },status :-1 ,followers :-1 } )

` 临时收集索引。

  db.temp.createIndex(
   {
     name: "text",
     cname: "text"
  },
  {
    weights: {
     name: 4,
   cname: 2
     }
   }
   )
     db.getCollection("temp").createIndex({ 
            "cname": 1
        }, {background: true})


     db.getCollection("temp").createIndex({ 
          "status" : -1.0, 
         "followers" : -1.0
          }, {background: true});


        db.getCollection("temp").createIndex({ 
           "name": 1
           }, {background: true})`

文件就像:

{ 
       "_id" : 5011.0, 
       "cname" : "samyselvik", 
       "name" : "Samy Sam", 
       "imgname" : "nrwi4769731443194380996.jpg", 
       "followers" : 1.0, 
       "status" : 1.0, 
        "createdat" : 1443194421532.0
    }

当我检查执行('executionStats')时,它会显示

"executionStats" :{
                    "executionSuccess" : true, 
                    "nReturned" : 363.0, 
                    "executionTimeMillis" : 894.0, 
                    "totalKeysExamined" : 921424.0, 
                    "totalDocsExamined" : 372.0, 
                    "executionStages" : {
                        "stage" : "PROJECTION", 
                        "nReturned" : 363.0, 
                        "executionTimeMillisEstimate" : 808.0, 
                        "works" : 921803.0, 
                        "advanced" : 363.0, 
                        "needTime" : 921439.0, 
                        "needYield" : 0.0, 
                        "saveState" : 7234.0, 
                        "restoreState" : 7234.0, 
                        "isEOF" : 1.0, 
                        "invalidates" : 0.0, 
                        "transformBy" : {
                            "cname" : 1.0, 
                            "name" : 1.0, 
                            "followers" : 1.0, 
                            "score" : {
                                "$meta" : "textScore"
                            }
                        }, 
                        "inputStage" : {
                            "stage" : "SORT", 
                            "nReturned" : 363.0, 
                            "executionTimeMillisEstimate" : 774.0, 
                            "works" : 921803.0, 
                            "advanced" : 363.0, 
                            "needTime" : 921439.0, 
                            "needYield" : 0.0, 
                            "saveState" : 7234.0, 
                            "restoreState" : 7234.0, 
                            "isEOF" : 1.0, 
                            "invalidates" : 0.0, 
                            "sortPattern" : {
                                "score" : {
                                    "$meta" : "textScore"
                                }, 
                                "status" : -1.0, 
                                "followers" : -1.0
                            }, 
                            "memUsage" : 131542.0, 
                            "memLimit" : 33554432.0, 
                            "limitAmount" : 500.0, 
                            "inputStage" : {
                                "stage" : "SORT_KEY_GENERATOR", 
                                "nReturned" : 363.0, 
                                "executionTimeMillisEstimate" : 730.0, 
                                "works" : 921439.0, 
                                "advanced" : 363.0, 
                                "needTime" : 921075.0, 
                                "needYield" : 0.0, 
                                "saveState" : 7234.0, 
                                "restoreState" : 7234.0, 
                                "isEOF" : 1.0, 
                                "invalidates" : 0.0, 
                                "inputStage" : {
                                    "stage" : "FETCH", 
                                    "filter" : {
                                        "status" : {
                                            "$in" : [
                                                1.0, 
                                                2.0
                                            ]
                                        }
                                    }, 
                                    "nReturned" : 363.0, 
                                    "executionTimeMillisEstimate" : 719.0, 
                                    "works" : 921438.0, 
                                    "advanced" : 363.0, 
                                    "needTime" : 921074.0, 
                                    "needYield" : 0.0, 
                                    "saveState" : 7234.0, 
                                    "restoreState" : 7234.0, 
                                    "isEOF" : 1.0, 
                                    "invalidates" : 0.0, 
                                    "docsExamined" : 363.0, 
                                    "alreadyHasObj" : 9.0, 
                                    "inputStage" : {
                                        "stage" : "OR", 
                                        "nReturned" : 363.0, 
                                        "executionTimeMillisEstimate" : 697.0, 
                                        "works" : 921438.0, 
                                        "advanced" : 363.0, 
                                        "needTime" : 921074.0, 
                                        "needYield" : 0.0, 
                                        "saveState" : 7234.0, 
                                        "restoreState" : 7234.0, 
                                        "isEOF" : 1.0, 
                                        "invalidates" : 0.0, 
                                        "dupsTested" : 399.0, 
                                        "dupsDropped" : 36.0, 
                                        "recordIdsForgotten" : 0.0, 
                                        "inputStages" : [
                                            {
                                                "stage" : "TEXT", 
                                                "nReturned" : 9.0, 
                                                "executionTimeMillisEstimate" : 0.0, 
                                                "works" : 21.0, 
                                                "advanced" : 9.0, 
                                                "needTime" : 11.0, 
                                                "needYield" : 0.0, 
                                                "saveState" : 7234.0, 
                                                "restoreState" : 7234.0, 
                                                "isEOF" : 1.0, 
                                                "invalidates" : 0.0, 
                                                "indexPrefix" : {

                                                }, 
                                                "indexName" : "name_text_cname_text", 
                                                "parsedTextQuery" : {
                                                    "terms" : [
                                                        "sam"
                                                    ], 
                                                    "negatedTerms" : [

                                                    ], 
                                                    "phrases" : [

                                                    ], 
                                                    "negatedPhrases" : [

                                                    ]
                                                }, 
                                                "textIndexVersion" : 3.0, 
                                                "inputStage" : {
                                                    "stage" : "TEXT_MATCH", 
                                                    "nReturned" : 9.0, 
                                                    "executionTimeMillisEstimate" : 0.0, 
                                                    "works" : 21.0, 
                                                    "advanced" : 9.0, 
                                                    "needTime" : 11.0, 
                                                    "needYield" : 0.0, 
                                                    "saveState" : 7234.0, 
                                                    "restoreState" : 7234.0, 
                                                    "isEOF" : 1.0, 
                                                    "invalidates" : 0.0, 
                                                    "docsRejected" : 0.0, 
                                                    "inputStage" : {
                                                        "stage" : "TEXT_OR", 
                                                        "nReturned" : 9.0, 
                                                        "executionTimeMillisEstimate" : 0.0, 
                                                        "works" : 21.0, 
                                                        "advanced" : 9.0, 
                                                        "needTime" : 11.0, 
                                                        "needYield" : 0.0, 
                                                        "saveState" : 7234.0, 
                                                        "restoreState" : 7234.0, 
                                                        "isEOF" : 1.0, 
                                                        "invalidates" : 0.0, 
                                                        "docsExamined" : 9.0, 
                                                        "inputStage" : {
                                                            "stage" : "IXSCAN", 
                                                            "nReturned" : 9.0, 
                                                            "executionTimeMillisEstimate" : 0.0, 
                                                            "works" : 10.0, 
                                                            "advanced" : 9.0, 
                                                            "needTime" : 0.0, 
                                                            "needYield" : 0.0, 
                                                            "saveState" : 7234.0, 
                                                            "restoreState" : 7234.0, 
                                                            "isEOF" : 1.0, 
                                                            "invalidates" : 0.0, 
                                                            "keyPattern" : {
                                                                "_fts" : "text", 
                                                                "_ftsx" : 1.0
                                                            }, 
                                                            "indexName" : "name_text_cname_text", 
                                                            "isMultiKey" : true, 
                                                            "isUnique" : false, 
                                                            "isSparse" : false, 
                                                            "isPartial" : false, 
                                                            "indexVersion" : 2.0, 
                                                            "direction" : "backward", 
                                                            "indexBounds" : {

                                                            }, 
                                                            "keysExamined" : 9.0, 
                                                            "seeks" : 1.0, 
                                                            "dupsTested" : 9.0, 
                                                            "dupsDropped" : 0.0, 
                                                            "seenInvalidated" : 0.0
                                                        }
                                                    }
                                                }
                                            }, 
                                            {
                                                "stage" : "IXSCAN", 
                                                "filter" : {
                                                    "$or" : [
                                                        {
                                                            "cname" : {
                                                                "$regex" : "Sam", 
                                                                "$options" : "i"
                                                            }
                                                        }
                                                    ]
                                                }, 
                                                "nReturned" : 193.0, 
                                                "executionTimeMillisEstimate" : 357.0, 
                                                "works" : 460693.0, 
                                                "advanced" : 193.0, 
                                                "needTime" : 460499.0, 
                                                "needYield" : 0.0, 
                                                "saveState" : 7234.0, 
                                                "restoreState" : 7234.0, 
                                                "isEOF" : 1.0, 
                                                "invalidates" : 0.0, 
                                                "keyPattern" : {
                                                    "cname" : 1.0
                                                }, 
                                                "indexName" : "cname_1", 
                                                "isMultiKey" : false, 
                                                "multiKeyPaths" : {
                                                    "cname" : [

                                                    ]
                                                }, 
                                                "isUnique" : false, 
                                                "isSparse" : false, 
                                                "isPartial" : false, 
                                                "indexVersion" : 2.0, 
                                                "direction" : "forward", 
                                                "indexBounds" : {
                                                    "cname" : [
                                                        "[\"\", {})", 
                                                        "[/Sam/i, /Sam/i]"
                                                    ]
                                                }, 
                                                "keysExamined" : 460692.0, 
                                                "seeks" : 1.0, 
                                                "dupsTested" : 0.0, 
                                                "dupsDropped" : 0.0, 
                                                "seenInvalidated" : 0.0
                                            }, 
                                            {
                                                "stage" : "IXSCAN", 
                                                "filter" : {
                                                    "$or" : [
                                                        {
                                                            "name" : {
                                                                "$regex" : "Sam", 
                                                                "$options" : "i"
                                                            }
                                                        }
                                                    ]
                                                }, 
                                                "nReturned" : 197.0, 
                                                "executionTimeMillisEstimate" : 318.0, 
                                                "works" : 460724.0, 
                                                "advanced" : 197.0, 
                                                "needTime" : 460526.0, 
                                                "needYield" : 0.0, 
                                                "saveState" : 7234.0, 
                                                "restoreState" : 7234.0, 
                                                "isEOF" : 1.0, 
                                                "invalidates" : 0.0, 
                                                "keyPattern" : {
                                                    "name" : 1.0
                                                }, 
                                                "indexName" : "name_1", 
                                                "isMultiKey" : false, 
                                                "multiKeyPaths" : {
                                                    "name" : [

                                                    ]
                                                }, 
                                                "isUnique" : false, 
                                                "isSparse" : false, 
                                                "isPartial" : false, 
                                                "indexVersion" : 2.0, 
                                                "direction" : "forward", 
                                                "indexBounds" : {
                                                    "name" : [
                                                        "[\"\", {})", 
                                                        "[/Sam/i, /Sam/i]"
                                                    ]
                                                }, 
                                                "keysExamined" : 460723.0, 
                                                "seeks" : 1.0, 
                                                "dupsTested" : 0.0, 
                                                "dupsDropped" : 0.0, 
                                                "seenInvalidated" : 0.0
                                            }
                                        ]
                                    }
                                }
                            }
                        }
                    }, 
                    "allPlansExecution" : [

                    ]
                }

temp Collection总共有4.60 lkhs文档,检查的密钥多于文档。请告诉我如何优化此查询以便我可以同时使用全文和部分搜索?

1 个答案:

答案 0 :(得分:0)

查询阶段的简要说明:

  • 正则表达式name需要0.3秒,检查460k密钥,并返回197个文档
  • cname上的正则表达式需要0.4秒,检查460k密钥,并返回193个文档
  • 全文检查9个密钥并立即返回9个文档

所有3个or条件的交点在0.7秒内总共返回363个文档。它检查了460k + 460k = 920k键。

以下阶段阅读文档,应用状态过滤器,在内存中对结果进行排序并预测每个响应时间小于50毫秒,这样您的总时间为0.9秒,这为优化提供了很小的空间。

假设没有正则表达式就无法做到这一点。首先检查.hint("name_text_cname_text")是否更快。有时使用collscan https://docs.mongodb.com/manual/reference/operator/query/regex/#index-use可以提高regexp的效率。

接下来,您可以将cnamename合并到一个字段中来优化数据,以减少正则表达式搜索次数:

{ 
   "_id" : 5011.0, 
   "cname" : "samyselvik", 
   "name" : "Samy Sam", 
   "search_name" : "samyselvik Samy Sam",
   "imgname" : "nrwi4769731443194380996.jpg", 
   "followers" : 1.0, 
   "status" : 1.0, 
   "createdat" : 1443194421532.0
}

您需要更新所有文档并更新您的应用,以确保将来的文档具有有效的search_name字段。

查询将是:

db.temp.find( 
    { $and : [
        { "status" : { "$in" : [1, 2]} },
        { $or: [ 
            { $text: { $search: "school" } },
            { search_name : /school/i}
        ] }  
    ] },
    { cname:1, name:1, followers:1, status :1, score: { $meta: "textScore" } } 
).sort( { score: { $meta: "textScore" }, status :-1, followers :-1 } )

它应该节省大约0.2秒,所以这里没有魔力。 Regexp很昂贵,但在某些情况下是不可避免的。