拥有组时,MongoDB聚合框架非常慢

时间:2016-06-09 08:53:17

标签: mongodb performance aggregation-framework

我正在尝试使用"group"进行汇总查询以获得结果总数。

总共"requested_items"(我的结果)是+ - 1.900.000。

如果我使用“group”执行,则查询非常慢(+ - 300秒)。

如果我在没有“group”的情况下执行,查询速度非常快(+ - 1秒)。

我做错了什么?

示例代码如下。

慢速查询

db.minute.aggregate([
    { $match: {
        $and: [
            { "status": "Homologado" },
            { "requested_items.status": /aceito/i },
        ]
    } },
    { $sort: {'_id': 1}},
    { $unwind: "$requested_items" },
    { $unwind: "$requested_items.winner" },
    { $match: {
        $and: [
            { "status": "Homologado" },
            { "requested_items.status": /aceito/i },
        ]
    } },
    { $project: {
        "_id": 1
    } },
    { $group: {
        "_id" : null,
        "total" : {$sum: 1},
    } },
], {allowDiskUse: true});


快速查询

db.minute.aggregate([
    { $match: {
        $and: [
            { "status": "Homologado" },
            { "requested_items.status": /aceito/i },
        ]
    } },
    { $sort: {'_id': 1}},
    { $unwind: "$requested_items" },
    { $unwind: "$requested_items.winner" },
    { $match: {
        $and: [
            { "status": "Homologado" },
            { "requested_items.status": /aceito/i },
        ]
    } },
    { $project: {
        "_id": 1
    } },
], {allowDiskUse: true});


数据库结构

{
    "_id" : "12345678ABCD",
    "field_1" : [ 
        {
            "a" : null,
            "b" : "ABC"
        }, 
        {
            "code" : null,
            "b" : "ABCD"
        }
    ],
    "status" : "Homologado",
    "initial_date" : ISODate("2016-05-24T11:31:00.000Z"),
    "field_2" : [ 
        {
            "a" : "ABC",
            "b" : "ABCDE"
        }, 
        {
            "a" : "ABCF",
            "b" : "ABCDEF"
        }
    ],
    "field_3" : "Lorem ipsum dolor sit amet...",
    "field_4" : [ 
        {
            "date" : ISODate("2016-05-24T13:54:48.000Z"),
            "a" : "Text",
            "b" : "More text..."
        }
    ],
    "field_4" : 12312321,
    "field_5" : ISODate("2016-05-24T13:55:00.000Z"),
    "field_6" : "ABCD",
    "requested_items" : [ 
        {
            "status" : " Aceito e Habilitado",
            "field_a" : "Text...",
            "winner" : [ 
                {
                    "a" : "23213.213213.23/232-23",
                    "b" : 130446,
                    "c" : 543223,
                    "d" : NumberLong(2),
                    "e" : "ABC 123 FULANO",
                    "f" : "text",
                    "g" : {
                        "description" : "TEXT TEXT TEXT"
                    }
                },
                {
                    "a" : "23213.213213.23/232-23",
                    "b" : 130446,
                    "c" : 543223,
                    "d" : NumberLong(2),
                    "e" : "ABC 123 FULANO",
                    "f" : "text",
                    "g" : {
                        "description" : "TEXT TEXT TEXT"
                    }
                }
            ],
            "field_c" : {
                "_id" : ObjectId("5744dd3271af88052f0cc343"),
                "a" : "TEXT",
                "b" : "TEXT"
            },
            "field_d" : NumberLong(2),
            "field_e" : 5223,
            "field_f" : "Não",
            "field_g" : "-",
            "field_h" : {
                "field_a1" : [ 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }, 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }
                ],
                "field_a2" : [ 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }, 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }
                ],
                "field_a3" : {},
                "field_a4" : [ 
                    {
                        "date" : ISODate("2016-05-24T11:34:32.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:12:54.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:48:21.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:55:38.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:55:47.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T13:01:36.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T13:15:02.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }
                ]
            },
            "field_i" : "Não",
            "field_j" : 1
        }, 
        {
            "status" : " Aceito e Habilitado",
            "field_a" : "Text...",
            "winner" : [ 
                {
                    "a" : "23213.213213.23/232-23",
                    "b" : 130446,
                    "c" : 543223,
                    "d" : NumberLong(2),
                    "e" : "ABC 123 FULANO",
                    "f" : "text",
                    "g" : {
                        "description" : "TEXT TEXT TEXT"
                    }
                }
            ],
            "field_c" : {
                "_id" : ObjectId("5744dd3271af88052f0cc343"),
                "a" : "TEXT",
                "b" : "TEXT"
            },
            "field_d" : NumberLong(2),
            "field_e" : 5223,
            "field_f" : "Não",
            "field_g" : "-",
            "field_h" : {
                "field_a1" : [ 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }, 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }
                ],
                "field_a2" : [ 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }, 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }
                ],
                "field_a3" : {},
                "field_a4" : [ 
                    {
                        "date" : ISODate("2016-05-24T11:34:32.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:12:54.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:48:21.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:55:38.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:55:47.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T13:01:36.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T13:15:02.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }
                ]
            },
            "field_i" : "Não",
            "field_j" : 2
        }, 
        {
            "status" : " Aceito e Habilitado",
            "field_a" : "Text...",
            "winner" : [ 
                {
                    "a" : "23213.213213.23/232-23",
                    "b" : 130446,
                    "c" : 543223,
                    "d" : NumberLong(2),
                    "e" : "ABC 123 FULANO",
                    "f" : "text",
                    "g" : {
                        "description" : "TEXT TEXT TEXT"
                    }
                }
            ],
            "field_c" : {
                "_id" : ObjectId("5744dd3271af88052f0cc343"),
                "a" : "TEXT",
                "b" : "TEXT"
            },
            "field_d" : NumberLong(2),
            "field_e" : 5223,
            "field_f" : "Não",
            "field_g" : "-",
            "field_h" : {
                "field_a1" : [ 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }, 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }
                ],
                "field_a2" : [ 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }, 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }
                ],
                "field_a3" : {},
                "field_a4" : [ 
                    {
                        "date" : ISODate("2016-05-24T11:34:32.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:12:54.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:48:21.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:55:38.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:55:47.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T13:01:36.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T13:15:02.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }
                ]
            },
            "field_i" : "Não",
            "field_j" : 3
        }, 
    ],
    "field_7" : "TEXT",
    "field_8" : {
        "a" : "TEXT",
        "b" : "TEXT",
        "c" : "324234",
        "d" : "TEXT TEXT TEXT TEXT"
    },
    "field_9" : 43234
}


EXPLAIN

{
    "waitedMS" : NumberLong(0),
    "stages" : [ 
        {
            "$cursor" : {
                "query" : {
                    "$and" : [ 
                        {
                            "status" : "Homologado"
                        }, 
                        {
                            "requested_items.status" : /aceito/i
                        }
                    ]
                },
                "queryPlanner" : {
                    "plannerVersion" : 1,
                    "namespace" : "module_database.minute",
                    "indexFilterSet" : false,
                    "parsedQuery" : {
                        "$and" : [ 
                            {
                                "status" : {
                                    "$eq" : "Homologado"
                                }
                            }, 
                            {
                                "requested_items.status" : /aceito/i
                            }
                        ]
                    },
                    "winningPlan" : {
                        "stage" : "COLLSCAN",
                        "filter" : {
                            "$and" : [ 
                                {
                                    "status" : {
                                        "$eq" : "Homologado"
                                    }
                                }, 
                                {
                                    "requested_items.status" : /aceito/i
                                }
                            ]
                        },
                        "direction" : "forward"
                    },
                    "rejectedPlans" : []
                }
            }
        }, 
        {
            "$unwind" : {
                "path" : "$requested_items"
            }
        }, 
        {
            "$unwind" : {
                "path" : "$requested_items.winner"
            }
        }, 
        {
            "$match" : {
                "$and" : [ 
                    {
                        "status" : "Homologado"
                    }, 
                    {
                        "requested_items.status" : /aceito/i
                    }
                ]
            }
        }, 
        {
            "$group" : {
                "_id" : {
                    "$const" : null
                },
                "numberOfdocs" : {
                    "$sum" : {
                        "$const" : 1
                    }
                }
            }
        }
    ],
    "ok" : 1
}

我的服务器是:
操作系统:UBUNTU14 / 64
CPU:6
RAM:16 GB
总存储量:80 GB
只运行我的问题的测试。

2 个答案:

答案 0 :(得分:7)

最后解决了我的查询问题。 这是设计模式的错误。在SQL世界中思考,我在我的应用程序中思考之前设计了这些集合。结果,查询速度慢。

要解决这个问题,需要重新设计我的馆藏,并将相关数据放在我的文档的第一级。 在我的搜索中,我发现在Aggragation中,索引需要处于管道的第一阶段。如果我在阶段$ unwind之后使用带索引的字段,则不予考虑。

除此之外,我使用包https://github.com/darkskyapp/string-hash为文本字段创建一个int哈希。因此,我的文本字段可以编入索引。

所以我的查询改变了300s 5s。

答案 1 :(得分:1)

由于我们没有环境细节,因此很难确定速度。 您可以尝试通过添加以下内容来了解​​解释如何预测您的查询:

{
   explain:true
}

到您的汇总查询db.coll.aggregate([pipeline], {explain:true},{allowDiskUse: true})。 还需要考虑$unwind要处理的文件数量增加一倍。

当您计算文件数量时 - > 它可能更快只需取一个数组的size(在第一次展开后)并稍后加总

db.inventory.aggregate(
   [
      {
         $group: {
            _id: null,
            numberOfdocs: { $sum:{$size: "$requested_items.winner" }}
         }
      }
   ]
)

修改

在玩这个查询后,我能够将执行时间减少大约45%。 重点是跳过第二个$match,因为它会扫描完整的结果集,因此最后$group包含所有数据,我们可以过滤掉最后需要的内容,因为此操作是在小结果集。

db.coll.aggregate([{
            $match : {
                "status" : "Homologado"
            }
        }, {
            $unwind : "$requested_items"
        }, {
            $unwind : "$requested_items.winner"
        }, {
            $project : {
                x : "$requested_items.status",
            }
        }, {
            $group : {
                _id : "$x",
                numberOfdocs : {
                    $sum : 1
                }
            }
        }, {
            $match : {
                "_id" : /acesssito/i
            }
        }
    ], {
        allowDiskUse: true
});