使用多个分组的MongoDB慢面查询

时间:2018-09-18 07:50:04

标签: mongodb mongodb-query aggregation-framework facet

我正在一个项目中,我们将收集近500万份文档。每个文档的大小约为18571字节,包含120到150个字段。

我必须在不到1秒的时间内返回我的回复。 mongo查询将对最多3,000,000个文档进行多面查询,执行近10到15组的操作。

这是我第一次处理大量数据,必须实时返回响应。

我已经实现了索引,它们将响应时间缩短到5到6秒,但我仍然需要不到1秒的时间。

以下是示例查询:

db.sample.aggregation(
    "$match":{
        "$and":[
            {"is_new": <true/false>},
            {"brand":<some-brand>},
            {"year":{"$gte":<some-year>,"$lte":<some-year>}},
            {"seller_id":{"$in":[<array-of-seller-ids-may-have-40,000-seller-ids>]}}
        ]
    },
    {
        "$facet":{
            "data":[{
                "$project":{
                    "_id":"_id",
                    "brand":"$brand_name",
                    "model":"$model_name",
                    <will have almost 20 keys with lookup>
                }
            }],
            "count":[{"$group":{"_id":"$_id"}},{"$count":"vin_count"}],
            "price":[{"$bucketAuto":{"groupBy":"$price", "buckets":1}}],
            <will have 12-15 group by>
        }
    }
)

下面是示例文档:

{
    "_id" : "KNDMC5C11J6394584",
    "brand_id" : 22,
    "brand_name" : "XYZ",
    "abc_id" : 1234567890,
    "city" : "Gurgaon, IN",
    "fluctuation" : 18,
    "created_at" : ISODate("2018-08-17T06:08:12.940Z"),
    "release_data" : "2018-06-29",
    "seller_name" : "Seller name",
    "seller_price" : 34890,
    "seller_rating" : 4,
    "seller_zip" : "12550",
    "feature1" : "ABC",
    "feature2" : 3300,
    "feature3" : "AB",
    "expected_price" : -1,
    "exterior_color" : "Unknown",
    "registered_dealer" : true,
    "registered_brand" : "ABC",
    "fluctuation_rate" : 20.700000000000003,
    "fluctuation_type" : 2,
    "fluc_type_name" : "Something",
    "has_patents" : false,
    "tested_frequency" : 24,
    "interior_color" : "---",
    "is_certified" : false,
    "is_certified_iso" : false,
    "is_featured" : false,
    "is_new" : true,
    "is_certified_bhel" : false,
    "location" : {
        "type" : "Point",
        "coordinates" : [
            -24.08180236816406,
            31.507198333740234
        ]
    },
    "max_input" : 8,
    "feature4" : 3,
    "feature5" : 206,
    "feature6" : "Something",
    "monthly_payment" : 649,
    "msrp" : 34890,
    "feature7" : false,
    "seller_id" : 123567890,
    "product_family_name" : "abc",
    "product_id" : 15,
    "product_name" : "Something",
    "reflection" : "Something",
    "fluc_id" : 2312,
    "fluc_name" : "something something (abc) ac",
    "updated_at" : ISODate("2018-09-11T17:59:36.889Z"),
    "product_damage_category" : "None",
    "year" : 2018,
    "damage_check" : "-",
    "team_size" : "-",
    "Technology" : {
        "camera_unit" : true
    }
}

以下是解释输出

{
    "stages" : [
        {
            "$cursor" : {
                "query" : {
                    "$and" : [
                        {
                            "is_new" : true
                        },
                        {
                            "year" : {
                                "$gte" : 2018,
                                "$lte" : 2018
                            }
                        },
                        {
                            "sp_id" : {
                                "$in" : [<list of 40,000 seller ids>]
                            }
                        }
                    ]
                },
                "queryPlanner" : {
                    "plannerVersion" : 1,
                    "namespace" : "test_collection.col",
                    "indexFilterSet" : false,
                    "parsedQuery" : {
                        "$and" : [
                            {
                                "is_new" : {
                                    "$eq" : true
                                }
                            },
                            {
                                "year" : {
                                    "$lte" : 2018
                                }
                            },
                            {
                                "year" : {
                                    "$gte" : 2018
                                }
                            },
                            {
                                "sp_id" : {
                                    "$in" : [<list of 40,000 seller ids>]
                                }
                            }
                        ]
                    },
                    "winningPlan" : {
                        "stage" : "FETCH",
                        "inputStage" : {
                            "stage" : "IXSCAN",
                            "keyPattern" : {
                                "is_new" : 1,
                                "year" : 1,
                                "sp_id" : 1
                            },
                            "indexName" : "is_new_1_year_1_sp_id_1",
                            "isMultiKey" : false,
                            "multiKeyPaths" : {
                                "is_new" : [ ],
                                "year" : [ ],
                                "sp_id" : [ ]
                            },
                            "isUnique" : false,
                            "isSparse" : false,
                            "isPartial" : false,
                            "indexVersion" : 2,
                            "direction" : "forward",
                            "indexBounds" : {
                                "is_new" : [
                                    "[true, true]"
                                ],
                                "year" : [
                                    "[2018.0, 2018.0]"
                                ],
                                "sp_id" : [
                                    "[47590.0, 47590.0]",
                                    "[48333.0, 48333.0]",
                                    "[51333.0, 51333.0]",
                                    <range of 40,000 seller_ids>
                                ]
                            }
                        }
                    },
                    "rejectedPlans" : [ ]
                }
            }
        },
        {
            "$facet" : {
                "data" : [
                    {
                        "$project" : {
                            "_id" : "$_id",
                            "brand_name" : "$brand_name",
                            "feature1" : "$feature1",
                            "feature2" : "$feature2",
                            "feature3" : "$feature3",
                            "feature4" : "$feature4",
                            "feature5" : "$feature5",
                            "feature6" : "$feature6",
                            "feature7" : "$feature7",
                            "feature8" : "$feature8",
                            "feature9" : "$feature9",
                            "feature10" : "$feature10",
                            "feature11" : "$feature11",
                            "feature12" : "$feature12",
                            "feature13" : "$feature13",
                            "feature14" : "$feature14",
                            "feature15" : "$feature15",
                            "feature16" : "$feature16",
                            "feature17" : "$feature17",
                            "feature18" : "$feature18",
                            "feature19" : "$feature19",
                            "feature20" : "$feature20"
                        }
                    }
                ],
                "count" : [
                    {
                        "$group" : {
                            "_id" : "$_id"
                        }
                    },
                    {
                        "$group" : {
                            "_id" : {
                                "$const" : null
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$project" : {
                            "_id" : false,
                            "count" : true
                        }
                    }
                ],
                "feature1" : [
                    {
                        "$match" : {
                            "feature1" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature1",
                            "name" : {
                                "$first" : "$feature1"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature2" : [
                    {
                        "$match" : {
                            "feature2" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature2",
                            "name" : {
                                "$first" : "$feature2"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature3" : [
                    {
                        "$match" : {
                            "feature3" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature3",
                            "name" : {
                                "$first" : "$feature3"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature4" : [
                    {
                        "$match" : {
                            "feature4" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature4",
                            "name" : {
                                "$first" : "$feature4"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature5" : [
                    {
                        "$match" : {
                            "feature5" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature5",
                            "name" : {
                                "$first" : "$fuel"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature6" : [
                    {
                        "$match" : {
                            "feature6" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature6",
                            "name" : {
                                "$first" : "$feature6"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature7" : [
                    {
                        "$match" : {
                            "feature7" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature7",
                            "name" : {
                                "$first" : "$feature7"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature8" : [
                    {
                        "$match" : {
                            "feature8" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature8",
                            "name" : {
                                "$first" : "$feature8"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature9" : [
                    {
                        "$match" : {
                            "feature9" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature9",
                            "name" : {
                                "$first" : "$feature9"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature10" : [
                    {
                        "$match" : {
                            "feature10" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature10",
                            "name" : {
                                "$first" : "$feature10"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "_id" : -1
                            }
                        }
                    }
                ],
                "feature11" : [
                    {
                        "$match" : {
                            "feature11" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$bucketAuto" : {
                            "groupBy" : "$feature11",
                            "buckets" : 1,
                            "output" : {
                                "count" : {
                                    "$sum" : {
                                        "$const" : 1
                                    }
                                }
                            }
                        }
                    }
                ],
                "feature12" : [
                    {
                        "$bucketAuto" : {
                            "groupBy" : "$feature11",
                            "buckets" : 1,
                            "output" : {
                                "count" : {
                                    "$sum" : {
                                        "$const" : 1
                                    }
                                }
                            }
                        }
                    }
                ]
            }
        }
    ],
    "ok" : 1
}

如果此信息对于解决方案不完整。我会提供更多。

最近1个月以来,我一直坚持下去。

任何帮助将不胜感激。

0 个答案:

没有答案