MongoDB:为什么我的scanningObjects值很高,即使查询的所有字段都被编入索引?

时间:2015-01-28 20:36:59

标签: arrays mongodb query-performance

我正在索引集合上的三个字段,其中一个是数组。我正在对这三个字段运行查询,并且查询需要超过一秒钟,集合上有300K字段。当我在查询上调用explain时,我看到我的索引正在被正确使用,但是被扫描对象的数量非常高。我想这就是低性能背后的原因。

{
    "_id" : ObjectId("54c8f110389a46153866d82e"),
    "mmt" : [ 
        "54944cfd90671810ccbf2552", 
        "54c64029038d8c3aff41ad6d", 
        "54c64029038d8c3aff41ad73", 
        "54c8f151038d8c3aff453669", 
        "54c8f151038d8c3aff45366d"
    ],
    "p" : 8700,
    "sui" : "3810d5cf-3032-4a77-9715-a42e010e569c"
    /* also some more fields */
}

使用此索引:

{
    "sui" : 1,
    "p" : 1,
    "mmt" : 1
}

我正在尝试运行此查询:

db.my_coll.find(
{
    "mmt" : { "$all" :
        [
            "54944cfd90671810ccbf2552", "54ac1db0e3f494afd4ded4c8", "54ac1db1e3f494afd4ded66a", "54ac1db1e3f494afd4ded66b", "54c8b671038d8c3aff453649", "54c8f154038d8c3aff45368f", "54c8f154038d8c3aff453694"
        ]
},
    "sui" : { "$ne" : "bde0f517-b942-4823-b2c8-a41900f46641" },
    "p": { $gt: 100, $lt: 1000 }
}

).limit(1000).explain()

解释的结果是:

{
    "cursor" : "BtreeCursor sui_1_p_1_mmt_1",
    "isMultiKey" : true,
    "n" : 16,
    "nscannedObjects" : 14356,
    "nscanned" : 129223,
    "nscannedObjectsAllPlans" : 14356,
    "nscannedAllPlans" : 129223,
    "scanAndOrder" : false,
    "indexOnly" : false,
    "nYields" : 1009,
    "nChunkSkips" : 0,
    "millis" : 1276,
    "indexBounds" : {
        "sui" : [ 
            [ 
                {
                    "$minElement" : 1
                }, 
                "bde0f517-b942-4823-b2c8-a41900f46641"
            ], 
            [ 
                "bde0f517-b942-4823-b2c8-a41900f46641", 
                {
                    "$maxElement" : 1
                }
            ]
        ],
        "p" : [ 
            [ 
                -Infinity, 
                1000
            ]
        ],
        "mmt" : [ 
            [ 
                "54944cfd90671810ccbf2552", 
                "54944cfd90671810ccbf2552"
            ]
        ]
    },
    "server" : "shopkrowdMongo:27017",
    "filterSet" : false,
    "stats" : {
        "type" : "LIMIT",
        "works" : 129224,
        "yields" : 1009,
        "unyields" : 1009,
        "invalidates" : 0,
        "advanced" : 16,
        "needTime" : 129207,
        "needFetch" : 0,
        "isEOF" : 1,
        "children" : [ 
            {
                "type" : "KEEP_MUTATIONS",
                "works" : 129224,
                "yields" : 1009,
                "unyields" : 1009,
                "invalidates" : 0,
                "advanced" : 16,
                "needTime" : 129207,
                "needFetch" : 0,
                "isEOF" : 1,
                "children" : [ 
                    {
                        "type" : "FETCH",
                        "works" : 129224,
                        "yields" : 1009,
                        "unyields" : 1009,
                        "invalidates" : 0,
                        "advanced" : 16,
                        "needTime" : 129207,
                        "needFetch" : 0,
                        "isEOF" : 1,
                        "alreadyHasObj" : 0,
                        "forcedFetches" : 0,
                        "matchTested" : 16,
                        "children" : [ 
                            {
                                "type" : "IXSCAN",
                                "works" : 129223,
                                "yields" : 1009,
                                "unyields" : 1009,
                                "invalidates" : 0,
                                "advanced" : 14356,
                                "needTime" : 114867,
                                "needFetch" : 0,
                                "isEOF" : 1,
                                "keyPattern" : "{ sui: 1.0, p: 1.0, mmt: 1.0 }",
                                "isMultiKey" : 1,
                                "boundsVerbose" : "field #0['sui']: [MinKey, \"bde0f517-b942-4823-b2c8-a41900f46641\"), (\"bde0f517-b942-4823-b2c8-a41900f46641\", MaxKey], field #1['p']: [-inf.0, 1000.0), field #2['mmt']: [\"54944cfd90671810ccbf2552\", \"54944cfd90671810ccbf2552\"]",
                                "yieldMovedCursor" : 0,
                                "dupsTested" : 14356,
                                "dupsDropped" : 0,
                                "seenInvalidated" : 0,
                                "matchTested" : 0,
                                "keysExamined" : 129223,
                                "children" : []
                            }
                        ]
                    }
                ]
            }
        ]
    }
}

找到的项目数是16,但是被扫描对象的数量是14356.我不明白为什么mongodb会扫描这么多文档,即使查询的所有字段都被编入索引。

  1. 为什么mongodb会扫描这么多物体?
  2. 如何更快地获得此查询的结果?
  3. 我使用的mmt数组不会随着时间的推移而增长或缩小,但其中的元素数量在5到15之间变化。我需要使用$ in,$ all和$ nin的几种组合来查询此字段。此集合中的项目数量可能会超过30M。有没有办法可靠地获得这种情况的快速结果?

    更新1:

    我尝试删除sui字段和$ ne查询。更新的解释:

    {
        "cursor" : "BtreeCursor p_1_mmt_1",
        "isMultiKey" : true,
        "n" : 17,
        "nscannedObjects" : 16338,
        "nscanned" : 16963,
        "nscannedObjectsAllPlans" : 16338,
        "nscannedAllPlans" : 33930,
        "scanAndOrder" : false,
        "indexOnly" : false,
        "nYields" : 265,
        "nChunkSkips" : 0,
        "millis" : 230,
        "indexBounds" : {
            "p" : [ 
                [ 
                    -Infinity, 
                    1000
                ]
            ],
            "mmt" : [ 
                [ 
                    "54944cfd90671810ccbf2552", 
                    "54944cfd90671810ccbf2552"
                ]
            ]
        },
        "server" : "shopkrowdMongo:27017",
        "filterSet" : false,
        "stats" : {
            "type" : "LIMIT",
            "works" : 16966,
            "yields" : 265,
            "unyields" : 265,
            "invalidates" : 0,
            "advanced" : 17,
            "needTime" : 16947,
            "needFetch" : 0,
            "isEOF" : 1,
            "children" : [ 
                {
                    "type" : "KEEP_MUTATIONS",
                    "works" : 16966,
                    "yields" : 265,
                    "unyields" : 265,
                    "invalidates" : 0,
                    "advanced" : 17,
                    "needTime" : 16947,
                    "needFetch" : 0,
                    "isEOF" : 1,
                    "children" : [ 
                        {
                            "type" : "FETCH",
                            "works" : 16965,
                            "yields" : 265,
                            "unyields" : 265,
                            "invalidates" : 0,
                            "advanced" : 17,
                            "needTime" : 16947,
                            "needFetch" : 0,
                            "isEOF" : 1,
                            "alreadyHasObj" : 0,
                            "forcedFetches" : 0,
                            "matchTested" : 17,
                            "children" : [ 
                                {
                                    "type" : "IXSCAN",
                                    "works" : 16964,
                                    "yields" : 265,
                                    "unyields" : 265,
                                    "invalidates" : 0,
                                    "advanced" : 16338,
                                    "needTime" : 626,
                                    "needFetch" : 0,
                                    "isEOF" : 1,
                                    "keyPattern" : "{ p: 1.0, mmt: 1.0 }",
                                    "isMultiKey" : 1,
                                    "boundsVerbose" : "field #0['p']: [-inf.0, 1000.0), field #1['mmt']: [\"54944cfd90671810ccbf2552\", \"54944cfd90671810ccbf2552\"]",
                                    "yieldMovedCursor" : 0,
                                    "dupsTested" : 16338,
                                    "dupsDropped" : 0,
                                    "seenInvalidated" : 0,
                                    "matchTested" : 0,
                                    "keysExamined" : 16963,
                                    "children" : []
                                }
                            ]
                        }
                    ]
                }
            ]
        }
    }
    

    查询效果更好,但被扫描的对象仍然很高。

2 个答案:

答案 0 :(得分:2)

我认为marcinn选择$ne作为最可能的罪魁祸首是正确的,但更新1告诉我们$all也是一个问题。查询使用索引的mmt部分查找包含数组中某个值的文档,然后必须扫描mmt数组的其余部分以验证{{1}中的所有值1}}数组位于可能匹配的文档的$all数组中。这意味着必须加载和扫描可能匹配的文档,因此它被视为scanObject。要非常清楚地演示此行为,请考虑以下示例:

mmt

这显示> db.test.drop() > for (var i = 0; i < 100; i++) db.test.insert({ "x" : [1, 2] }) > for (var i = 0; i < 100; i++) db.test.insert({ "x" : [1, 3] }) > db.test.ensureIndex({ "x" : 1 }) > db.test.find({ "x" : { "$all" : [1, 2] } }).explain(true) n = 100使用值1作为索引边界,而逻辑等效查询

nscanned = nscannedObjects = 200

显示> db.test.find({ "x" : { "$all" : [2, 1] } }).explain(true) ,两个索引边界的值均为2。

答案 1 :(得分:1)

基本上是因为$ ne不能使用索引(有效)。因此,您的索引仅用于首先通过mnt字段查询然后再读取

  

某些查询操作不具有选择性。这些操作无法使用   索引有效或根本不能使用索引。

     

不等式运算符$ nin和$ ne不是很有选择性,因为它们   经常匹配索引的很大一部分。因此,在大多数情况下,   带索引的$ nin或$ ne查询可能不会比$ nin或更好   $ ne查询必须扫描集合中的所有文档

http://docs.mongodb.org/manual/core/query-optimization/