加快聚合?

时间:2018-08-01 15:56:50

标签: mongodb mongodb-query aggregation-framework database-performance

我正在从另一个问题中看一个相当复杂的MongoDB聚合查询,我创建了一大堆虚拟文档(200个)以对其进行操作,并在150-350ms之间的某个位置大致评估了性能。

然后我通过添加一些预过滤和后排序/限制等方式对该查询进行了稍微扩展。我希望这实际上可以加快查询速度,但是现在大约需要500-600毫秒。

我尝试创建几个索引(尽管我对Mongo大学的课程有些不了解)。一个在{ ownerId: 1, groupIds: 1 }上,另一个我在{ panels.visConfig.dataConfig.columns.element: 1 }上尝试,但两者似乎都没有什么不同,并且我看不到.explain()的输出有太大变化。

我想知道是否有人可以给我一些提高性能的指示,以及当我尝试的索引不起作用时,任何索引创建等究竟如何工作?

这是示例文档。在这里,有些字段更大一些,layout.smlayout.mdlayout.lgmetaviewConfig。通常,我估计每个文档中会有20个panels用于测试。

{
    "name" : "A Name",
    "description" : "A Description",
    "ownerId" : "00000000-0000-0000-0000-000000000000",
    "groupIds": [
        "11111111-1111-1111-1111-111111111111",
        "22222222-2222-2222-2222-222222222222"
    ],
    "version" : 1,
    "coverId" : "5a8ae2180963493834d3049e",
    "layout" : {
        "sm" : [],
        "md" : [],
        "lg" : []
    },
    "hitCount" : 1,
    "panels" : [ 
        {
            "_id" : ObjectId("000000000000000000000002"),
            "visConfig" : {
                "_id" : ObjectId("000000000000000000000001"),
                "dataConfig" : {
                    "questionId" : 18,
                    "columns" : [ 
                        {
                            "type" : "COUNT",
                            "element" : "SE"
                        }, 
                        {
                            "levelFunction" : "MonthInYear",
                            "type" : "LEVEL",
                            "property" : "CRE",
                            "element" : "SE"
                        }, 
                        {
                            "type" : "LEVEL",
                            "property" : "OUT",
                            "element" : "SE"
                        }
                    ]
                },
                "viewConfig" : {},
                "meta" : {},
            },
        }, 
    ],
}

还有一个示例查询(旨在轻松在Robo 3T中运行)

    var userId = "00000000-0000-0000-0000-000000000000";
    var userGroups = [];
    var mnemonics = ["AB", "XY"];
    var sortAndPagingOptions = {
       take: 10,
       skip: 10,
       sort: { name: 1 },
    };

    /** Find all the boards that the user can see */
    var visibleBoards = {
        $match: {
            $or: [
                { ownerId: userId },                                         // Any where the user is the owner
                { groupIds: { $in: ["$this", userGroups ] } },               // Any which are shared with the users groups
            ],
        },
    };

    /**
     * This pipeline stage does 3 things
     * 1) Calculate the number (totalElements) of columns in use within a dataConfig
     * 2) Calculate the number (matchingElements) of columns that overlap with the given mnemonics within the dataConfig
     * 3) Find the single panel that represents the cover visualization
     */
    var calculateOverlapWithMnemonics = {
        $project: {
            _id: 1,
            name: 1,
            description: 1,
            ownerId: 1,
            coverId: 1,
            coverVis: {
                $filter: {                                              // We're going to filter down to an array of 1 panel
                    input: "$panels",                                   // Look at all the panels
                    as: "panel",                                        // Refer to the current panel as $$panel
                    cond: { $eq: ["$$panel._id", "$coverId"] },         // Find the panel who's _id matches the coverId of the board
                },
            },
            stats: {
                $reduce: {                                                                          // We're going to reduce all the panels
                    input: "$panels.visConfig.dataConfig.columns.element",                          // We use the columns.element field within a dataConfig which is going to yield a set of arrays => ["SE", "SE"] ["DX"] ["AB"]
                    initialValue: { totalElements: 0, matchingElements: 0 },                        // We're going to calculate a total and a match value
                    in: {
                        totalElements: { $add: ["$$value.totalElements", { $size: "$$this" }] },    // Add together the sizes of the dataConfig element arrays
                        matchingElements: {
                            $add: [                                                                 // Add together the following elements (those which match mnemonics)
                                "$$value.matchingElements",
                                {
                                    $sum: {                                                         // Sum the numeric results from the $map function
                                        $map: {                                                     // Map each String element, into a 0 or 1 number
                                            input: "$$this",
                                            in: {
                                                $cond: [{ $in: ["$$this", mnemonics] }, 1, 0],      // Yield a 0 or 1 depending on whether the element exists within the array of mnemonics provided
                                            },
                                        },
                                    },
                                },
                            ],
                        },
                    },
                },
            },
        },
    };

    /**
     * This pipeline stage does 2 things
     * 1) Converts the matchingElements to a percetange value
     * 2) Finds the first panel (coverVis) and represents it as an object instead of an array
     */
    var calculatePercentages = {
        $project: {
            _id: 1,
            name: 1,
            description: 1,
            ownerId: 1,
            coverVis: { $arrayElemAt: ["$coverVis", 0] },
            percent: {
                $multiply: [
                    { $divide: ["$stats.matchingElements", "$stats.totalElements"] },
                    100,
                ],
            },
        }
    };

    /** Only match documents that are over the percent threshold */
    var overThreshold = {
        $match: { percent: { $gte: 25 } },
    };

    var sort = { $sort: sortAndPagingOptions.sort };
    var limit = { $limit: sortAndPagingOptions.take };
    var skip = { $skip: sortAndPagingOptions.skip };

    // Create the aggregation pipeline
    var pipeline = [
        visibleBoards,
        calculateOverlapWithMnemonics,
        calculatePercentages,
        overThreshold,
        sort,
        skip,
        limit,
    ];

db.test.aggregate(pipeline);

0 个答案:

没有答案