我正在一个项目中,我们将收集近500万份文档。每个文档的大小约为18571字节,包含120到150个字段。
我必须在不到1秒的时间内返回我的回复。 mongo查询将对最多3,000,000个文档进行多面查询,执行近10到15组的操作。
这是我第一次处理大量数据,必须实时返回响应。
我已经实现了索引,它们将响应时间缩短到5到6秒,但我仍然需要不到1秒的时间。
以下是示例查询:
db.sample.aggregation(
"$match":{
"$and":[
{"is_new": <true/false>},
{"brand":<some-brand>},
{"year":{"$gte":<some-year>,"$lte":<some-year>}},
{"seller_id":{"$in":[<array-of-seller-ids-may-have-40,000-seller-ids>]}}
]
},
{
"$facet":{
"data":[{
"$project":{
"_id":"_id",
"brand":"$brand_name",
"model":"$model_name",
<will have almost 20 keys with lookup>
}
}],
"count":[{"$group":{"_id":"$_id"}},{"$count":"vin_count"}],
"price":[{"$bucketAuto":{"groupBy":"$price", "buckets":1}}],
<will have 12-15 group by>
}
}
)
下面是示例文档:
{
"_id" : "KNDMC5C11J6394584",
"brand_id" : 22,
"brand_name" : "XYZ",
"abc_id" : 1234567890,
"city" : "Gurgaon, IN",
"fluctuation" : 18,
"created_at" : ISODate("2018-08-17T06:08:12.940Z"),
"release_data" : "2018-06-29",
"seller_name" : "Seller name",
"seller_price" : 34890,
"seller_rating" : 4,
"seller_zip" : "12550",
"feature1" : "ABC",
"feature2" : 3300,
"feature3" : "AB",
"expected_price" : -1,
"exterior_color" : "Unknown",
"registered_dealer" : true,
"registered_brand" : "ABC",
"fluctuation_rate" : 20.700000000000003,
"fluctuation_type" : 2,
"fluc_type_name" : "Something",
"has_patents" : false,
"tested_frequency" : 24,
"interior_color" : "---",
"is_certified" : false,
"is_certified_iso" : false,
"is_featured" : false,
"is_new" : true,
"is_certified_bhel" : false,
"location" : {
"type" : "Point",
"coordinates" : [
-24.08180236816406,
31.507198333740234
]
},
"max_input" : 8,
"feature4" : 3,
"feature5" : 206,
"feature6" : "Something",
"monthly_payment" : 649,
"msrp" : 34890,
"feature7" : false,
"seller_id" : 123567890,
"product_family_name" : "abc",
"product_id" : 15,
"product_name" : "Something",
"reflection" : "Something",
"fluc_id" : 2312,
"fluc_name" : "something something (abc) ac",
"updated_at" : ISODate("2018-09-11T17:59:36.889Z"),
"product_damage_category" : "None",
"year" : 2018,
"damage_check" : "-",
"team_size" : "-",
"Technology" : {
"camera_unit" : true
}
}
以下是解释输出
{
"stages" : [
{
"$cursor" : {
"query" : {
"$and" : [
{
"is_new" : true
},
{
"year" : {
"$gte" : 2018,
"$lte" : 2018
}
},
{
"sp_id" : {
"$in" : [<list of 40,000 seller ids>]
}
}
]
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test_collection.col",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"is_new" : {
"$eq" : true
}
},
{
"year" : {
"$lte" : 2018
}
},
{
"year" : {
"$gte" : 2018
}
},
{
"sp_id" : {
"$in" : [<list of 40,000 seller ids>]
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"is_new" : 1,
"year" : 1,
"sp_id" : 1
},
"indexName" : "is_new_1_year_1_sp_id_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"is_new" : [ ],
"year" : [ ],
"sp_id" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"is_new" : [
"[true, true]"
],
"year" : [
"[2018.0, 2018.0]"
],
"sp_id" : [
"[47590.0, 47590.0]",
"[48333.0, 48333.0]",
"[51333.0, 51333.0]",
<range of 40,000 seller_ids>
]
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$facet" : {
"data" : [
{
"$project" : {
"_id" : "$_id",
"brand_name" : "$brand_name",
"feature1" : "$feature1",
"feature2" : "$feature2",
"feature3" : "$feature3",
"feature4" : "$feature4",
"feature5" : "$feature5",
"feature6" : "$feature6",
"feature7" : "$feature7",
"feature8" : "$feature8",
"feature9" : "$feature9",
"feature10" : "$feature10",
"feature11" : "$feature11",
"feature12" : "$feature12",
"feature13" : "$feature13",
"feature14" : "$feature14",
"feature15" : "$feature15",
"feature16" : "$feature16",
"feature17" : "$feature17",
"feature18" : "$feature18",
"feature19" : "$feature19",
"feature20" : "$feature20"
}
}
],
"count" : [
{
"$group" : {
"_id" : "$_id"
}
},
{
"$group" : {
"_id" : {
"$const" : null
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$project" : {
"_id" : false,
"count" : true
}
}
],
"feature1" : [
{
"$match" : {
"feature1" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature1",
"name" : {
"$first" : "$feature1"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature2" : [
{
"$match" : {
"feature2" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature2",
"name" : {
"$first" : "$feature2"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature3" : [
{
"$match" : {
"feature3" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature3",
"name" : {
"$first" : "$feature3"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature4" : [
{
"$match" : {
"feature4" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature4",
"name" : {
"$first" : "$feature4"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature5" : [
{
"$match" : {
"feature5" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature5",
"name" : {
"$first" : "$fuel"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature6" : [
{
"$match" : {
"feature6" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature6",
"name" : {
"$first" : "$feature6"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature7" : [
{
"$match" : {
"feature7" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature7",
"name" : {
"$first" : "$feature7"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature8" : [
{
"$match" : {
"feature8" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature8",
"name" : {
"$first" : "$feature8"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature9" : [
{
"$match" : {
"feature9" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature9",
"name" : {
"$first" : "$feature9"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature10" : [
{
"$match" : {
"feature10" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature10",
"name" : {
"$first" : "$feature10"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"_id" : -1
}
}
}
],
"feature11" : [
{
"$match" : {
"feature11" : {
"$exists" : true
}
}
},
{
"$bucketAuto" : {
"groupBy" : "$feature11",
"buckets" : 1,
"output" : {
"count" : {
"$sum" : {
"$const" : 1
}
}
}
}
}
],
"feature12" : [
{
"$bucketAuto" : {
"groupBy" : "$feature11",
"buckets" : 1,
"output" : {
"count" : {
"$sum" : {
"$const" : 1
}
}
}
}
}
]
}
}
],
"ok" : 1
}
如果此信息对于解决方案不完整。我会提供更多。
最近1个月以来,我一直坚持下去。
任何帮助将不胜感激。