减少聚合执行时间

时间:2017-01-25 09:08:40

标签: mongodb performance mongodb-query aggregation-framework

我能够得到我想要的结果,但我想知道是否有一些方法可以减少我所做的聚合的执行时间。

首先,这是我的数据:

............................................... ...................................

activites

{ "_id" : ObjectId("58872a885bd87fa3b7e736cf"), "jour" : "2015-01-01", "sgt_id" : 1, "produit_id" : 1, "affichages" : 1525, "clics" : 16, "consultations" : 20, "ajoutsPanier" : 1, "unites" : 0, "commandes" : 0, "recettes" : 0, "demandeBrute" : 0, "txDispo" : "NULL" }
{ "_id" : ObjectId("58872a885bd87fa3b7e736d0"), "jour" : "2015-01-01", "sgt_id" : 1, "produit_id" : 3, "affichages" : 519, "clics" : 6, "consultations" : 7, "ajoutsPanier" : 0, "unites" : 0, "commandes" : 0, "recettes" : 0, "demandeBrute" : 0, "txDispo" : "NULL" }
{ "_id" : ObjectId("58872a885bd87fa3b7e736d1"), "jour" : "2015-01-01", "sgt_id" : 1, "produit_id" : 5, "affichages" : 421, "clics" : 5, "consultations" : 6, "ajoutsPanier" : 1, "unites" : 0, "commandes" : 0, "recettes" : 0, "demandeBrute" : 0, "txDispo" : "NULL" }

和1400万这样的条目...

............................................... ...................................

categories2

{ "_id" : ObjectId("5888609e5bd87fa3b7c72551"), "categorie_id" : 108, "type" : 1, "niveau" : 2, "hierarchie" : 2, "cat_id_client" : "Accessories", "categorie" : "Accessories", "label" : "NULL", "createur_id" : "NULL", "produit_id" : [ 867, 2943, 6443, 6447, 6525 ] }
{ "_id" : ObjectId("5888609e5bd87fa3b7c7259f"), "categorie_id" : 110, "type" : 1, "niveau" : 2, "hierarchie" : 2, "cat_id_client" : "Jewelry & watches", "categorie" : "Jewelry & watches", "label" : "NULL", "createur_id" : "NULL", "produit_id" : [ 2849, 2853, 2857, 2867, 2873, 2885, 2891, 2893, 2897, 2903, 2907, 2913, 2919, 2927, 2945, 2957, 2963, 3531, 3533, 3535, 3537, 3539, 3541, 3543, 3545, 3547, 3549, 3551, 3553, 3555, 3557, 3559, 3561, 3563, 3565, 3567, 3569, 3571, 3573, 3575, 3577, 3579, 3581, 3583, 3585, 3587, 3589, 3591, 3593, 3595, 3597, 3599, 3601, 3603, 3605, 3607, 3609, 3611, 3613, 3615, 3617, 3619, 3621, 3623, 3625, 3627, 3629, 3631, 6441, 6443, 6445, 6449, 6451, 6453, 6455, 6457, 6459, 6461, 6463, 6465, 6467, 6469, 6471, 6473, 6475, 6477, 6479, 6481, 6483, 6485, 6487, 6489, 6491, 6493, 6495, 6497, 6499, 6501, 6503, 6505, 6507, 6509, 6511, 6513, 6515, 6517, 6519, 6521, 6523, 6527 ] }
{ "_id" : ObjectId("5888609e5bd87fa3b7c725a2"), "categorie_id" : 106, "type" : 1, "niveau" : 2, "hierarchie" : 2, "cat_id_client" : "Clothing", "categorie" : "Clothing", "label" : "NULL", "createur_id" : "NULL", "produit_id" : [ 1485, 1487, 1489, 1491, 1493, 1495, 1497, 1499, 1501, 1503, 1505, 1507, 1509, 1511, 1513, 1515, 1517, 1519, 1521, 1523, 1525, 1527, 1681, 1683, 1685, 1687, 1689, 1691, 1693, 1695, 1697, 1699, 1701, 1703, 1705, 1707, 1709, 1711, 1713, 1715, 1717, 1721, 1723, 1725, 1727, 1729, 1731, 1733, 1735, 1737, 1739, 1741, 1743, 1745, 1747, 1749, 1751, 1753, 1755, 1757, 1759, 1761, 1763, 1765, 1767, 1769, 1771, 1773, 1775, 1777, 1779, 1781, 1783, 1785, 1787, 1789, 1791, 1793, 1795, 1797, 1799, 1801, 1803, 1805, 1807, 1809, 1811, 1813, 1815, 1817, 1819, 1821, 1823, 1825, 1827, 1829, 1831, 1833, 1835, 1837, 1839, 1841, 1843, 1845, 1847, 1849, 1851, 1853, 1855, 1857, 1859, 1861, 1863, 1867, 1869, 1871, 1873, 1875, 1877, 1879, 1881, 2845, 2851, 2855, 2859, 2863, 2869, 2871, 2877, 2879, 2881, 2887, 2895, 2905, 2909, 2911, 2917, 2923, 2925, 2929, 2933, 2935, 2939, 2941, 2947, 2951, 2953, 2959, 3849, 3851, 3853, 3855, 3857, 3859, 3861, 3863, 3865, 3867, 3869, 3871, 3873, 3875, 3877, 3879, 3881, 3883, 3885, 3887, 3889, 3891, 3893, 3895, 3897, 3899, 3901, 3903, 3905, 4969, 4971, 4973, 4975, 4977, 4979, 4981, 4983, 4985, 4987, 4989, 4991, 4993, 4995, 4997, 4999, 5001, 5003, 5005, 5007, 5009, 5011, 5013, 5015, 5017, 5019, 5021, 5023, 5025, 5027, 5029, 5031, 5033, 5035, 5037, 5039, 5041, 5043, 5045, 5047, 5049, 5743, 5745, 5747, 5749, 5751, 5753, 5755, 5757, 5759, 5761, 5763, 5765, 5767, 5769, 5771, 5773, 5775, 5777, 5779, 5781, 5783, 5785, 5787, 5789, 5791, 5793, 5795, 5797, 5799, 5801, 5803, 5805, 5807, 5809, 5811, 5813, 5815, 5817, 5819, 5821, 5823, 5825, 5827, 5829, 5831, 5833, 5835, 5837, 5839, 5841, 5843, 5845, 5847, 5849, 5851, 5853, 5855, 5857, 5859, 5861, 5863, 5865, 5867, 5869, 5871, 5873, 5875, 5877, 5879, 5881, 5883, 5885, 5887, 5889, 5891, 5893, 5895, 5897, 5899, 5901, 5903, 5905, 5907, 5909, 5911, 5913, 5915, 5917, 5919, 5921, 5923, 5925, 5927, 5929, 5931, 5933, 5935, 5937, 5939, 5941, 5943, 5945, 5947, 5949, 5951, 5953, 5955, 5957 ] }
{ "_id" : ObjectId("5888609e5bd87fa3b7c725c0"), "categorie_id" : 107, "type" : 1, "niveau" : 2, "hierarchie" : 2, "cat_id_client" : "Shoes", "categorie" : "Shoes", "label" : "NULL", "createur_id" : "NULL", "produit_id" : [ 1719, 1865, 2861, 2875, 2883, 2889, 2899, 2901, 2915, 2921, 2931, 2937, 2949, 2955, 2961, 5487, 5489, 5491, 5493, 5495, 5497, 5499, 5501, 5503, 5505, 5507, 5509, 5511, 5513, 5515, 5517, 5519, 5521, 5523, 5525, 5527, 5529, 5531, 5533, 5535, 5537, 5539, 5541, 5543, 5545, 5547, 5549, 5551, 5553, 5555, 5557, 5559, 5561, 5563, 5565, 5567, 5569 ] }
{ "_id" : ObjectId("5888609e5bd87fa3b7c725ea"), "categorie_id" : 109, "type" : 1, "niveau" : 2, "hierarchie" : 2, "cat_id_client" : "Handbags", "categorie" : "Handbags", "label" : "NULL", "createur_id" : "NULL", "produit_id" : [ 845, 847, 849, 851, 853, 855, 857, 859, 861, 863, 865, 2847, 2865 ] }
{ "_id" : ObjectId("5888609e5bd87fa3b7c725f9"), "categorie_id" : 111, "type" : 1, "niveau" : 2, "hierarchie" : 2, "cat_id_client" : "Health & beauty", "categorie" : "Health & beauty", "label" : "NULL", "createur_id" : "NULL", "produit_id" : [ 3249, 3251, 3253, 3255, 3257, 3259, 3261, 3263, 3265 ] }

我希望得到的结果是:

{ "_id" : 106, "categorie" : "Clothing", "consultations" : 185507, "recettes" : 1592183.49 }
{ "_id" : 107, "categorie" : "Shoes", "consultations" : 53636, "recettes" : 277869.81 }
{ "_id" : 110, "categorie" : "Jewelry & watches", "consultations" : 47071, "recettes" : 116746.03 }
{ "_id" : 109, "categorie" : "Handbags", "consultations" : 7149, "recettes" : 90921.05 }
{ "_id" : 111, "categorie" : "Health & beauty", "consultations" : 4542, "recettes" : 7671.51 }
{ "_id" : 108, "categorie" : "Accessories", "consultations" : 1718, "recettes" : 15689.43 }

对于每个类别,请为属于此类别的每个产品获得consultationsrecettes的总和。

获取此结果的代码:

db.categories2.aggregate([
    {
        $match: {
            type: 1,
            niveau: 2,
            hierarchie: 2
        }
    },
    { 
        "$unwind": "$produit_id" 
    },
    {
        $lookup: {
            from: "activites",
            localField: "produit_id",
            foreignField: "produit_id",
            as: "activites"
        }
    },
    { 
        $project: {
            _id: 1,
            categorie_id: 1,
            categorie: 1,
            produit_id: 1,
            activites : {
                $filter: {
                    input: "$activites",
                    as: "activite",
                    cond : { $and: [
                        { $gte: [ "$$activite.jour", "2016-09-01" ] },
                        { $lte: [ "$$activite.jour", "2016-11-03" ] },
                        { $eq : [ "$$activite.sgt_id", 1] }
                    ] }
                }
            }
        }
    },
    {
        $unwind: "$activites"
    },
    {
        $group: {
            _id: "$categorie_id",
            consultations: { $sum: "$activites.consultations" },
            recettes: { $sum: "$activites.recettes" }
        }
    },
    {
        $sort: { "consultations" : -1 }
    }
])

说明:

  • 匹配用户提出的类别。每个类别都包含一个produits字段,该字段是产品ID
  • 的数组
  • 展开此阵列
  • 对于每一行(所以每个产品),请查看activites表以获取consultationsrecettes字段
  • 过滤活动结果以匹配用户提供的日期
  • 放松所有发现每天活动一行的活动
  • categorie_id对结果进行分组,以得到consultationsrecettes
  • 的总和

问题是:

  • 来自$lookup的{​​{1}}需要大约1~2秒(我认为我们不能因为此表的1400万条目而做得更好)
  • 最后activites categorie_id`并执行总和

总的来说,请求在7,5秒内完成。

有没有办法做得更好,可能有另一种要求?

非常感谢你的帮助!

更新:我想也许有一种方法可以在$group take something like 5 seconds to group all the之后对孩子activites进行分组,因此在此之后避免使用$project$unwind

1 个答案:

答案 0 :(得分:0)

我实现了将执行时间减少到300毫秒,通过拆分两个函数并在Javascript中手动执行求和:

{"MaxNumberOfItemsToReceiveInSingleBatch":null,"LastDocumentEtag":"FFFFFFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF","LastAttachmentEtag":"FFFFFFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF","ServerInstanceId":"26174fc9-74f5-410a-bef0-0f7cd844c481","Source":"http://SOURCE-RAVEN-SERVER:8080/databases/DATABASE-NAME","LastModified":"2017-02-06T09:05:32.8443655Z","LastBatchSize":1}