我已经在以下架构中将对象存储在我的mongodb(版本3.2)集合中,
{
"_id" : ObjectId("585a42b5b7e79d1c0c533f1f"),
"instanceId" : "i-b385a9bd",
"DiskSpaceAvailable" : {
"Datapoints" : [
{
"Timestamp" : ISODate("2016-12-20T12:14:00.000Z"),
"Average" : 4.32112884521484,
"Unit" : "Gigabytes"
},
{
"Timestamp" : ISODate("2016-12-20T12:32:00.000Z"),
"Average" : 4.32107543945312,
"Unit" : "Gigabytes"
},
{
"Timestamp" : ISODate("2016-12-20T12:50:00.000Z"),
"Average" : 4.32101821899414,
"Unit" : "Gigabytes"
}
]
},
"DiskSpaceUsed" : {
"Datapoints" : [
{
"Timestamp" : ISODate("2016-12-20T12:14:00.000Z"),
"Average" : 3.33073806762695,
"Unit" : "Gigabytes"
},
{
"Timestamp" : ISODate("2016-12-20T12:32:00.000Z"),
"Average" : 3.33079147338867,
"Unit" : "Gigabytes"
}
]
},
"MemoryUsed" : {
"Datapoints" : [
{
"Timestamp" : ISODate("2016-12-20T12:14:00.000Z"),
"Average" : 0.753532409667969,
"Unit" : "Gigabytes"
},
{
"Timestamp" : ISODate("2016-12-20T12:32:00.000Z"),
"Average" : 0.753063201904297,
"Unit" : "Gigabytes"
}
]
},
"MemoryUtilization" : {
"Datapoints" : [
{
"Timestamp" : ISODate("2016-12-20T12:18:00.000Z"),
"Average" : 19.5049320125989,
"Unit" : "Percent"
},
{
"Timestamp" : ISODate("2016-12-20T12:36:00.000Z"),
"Average" : 19.5078950721357,
"Unit" : "Percent"
},
{
"Timestamp" : ISODate("2016-12-20T12:54:00.000Z"),
"Average" : 19.5068086169722,
"Unit" : "Percent"
}
]
},
"DiskSpaceUtilization" : {
"Datapoints" : [
{
"Timestamp" : ISODate("2016-12-20T12:18:00.000Z"),
"Average" : 42.9914921714092,
"Unit" : "Percent"
},
{
"Timestamp" : ISODate("2016-12-20T12:36:00.000Z"),
"Average" : 42.9921815029693,
"Unit" : "Percent"
},
{
"Timestamp" : ISODate("2016-12-20T12:54:00.000Z"),
"Average" : 42.992920072498,
"Unit" : "Percent"
}
]
},
"SwapUtilization" : {
"Datapoints" : [
{
"Timestamp" : ISODate("2016-12-20T12:18:00.000Z"),
"Average" : 0,
"Unit" : "Percent"
},
{
"Timestamp" : ISODate("2016-12-20T12:36:00.000Z"),
"Average" : 0,
"Unit" : "Percent"
},
{
"Timestamp" : ISODate("2016-12-20T12:54:00.000Z"),
"Average" : 0,
"Unit" : "Percent"
},
{
"Timestamp" : ISODate("2016-12-20T13:12:00.000Z"),
"Average" : 0,
"Unit" : "Percent"
}
]
},
"SwapUsed" : {
"Datapoints" : [
{
"Timestamp" : ISODate("2016-12-20T13:06:00.000Z"),
"Average" : 0,
"Unit" : "Gigabytes"
},
{
"Timestamp" : ISODate("2016-12-20T13:24:00.000Z"),
"Average" : 0,
"Unit" : "Gigabytes"
},
{
"Timestamp" : ISODate("2016-12-20T12:36:00.000Z"),
"Average" : 0,
"Unit" : "Gigabytes"
}
]
},
"MemoryAvailable" : {
"Datapoints" : [
{
"Timestamp" : ISODate("2016-12-20T12:14:00.000Z"),
"Average" : 3.10872268676758,
"Unit" : "Gigabytes"
},
{
"Timestamp" : ISODate("2016-12-20T12:32:00.000Z"),
"Average" : 3.10919189453125,
"Unit" : "Gigabytes"
},
{
"Timestamp" : ISODate("2016-12-20T12:50:00.000Z"),
"Average" : 3.10895538330078,
"Unit" : "Gigabytes"
}
]
}
}
我正在尝试使用mongodb聚合,以下是我的查询
db.collectionSchema.aggregate([
{
$match :{ "instanceId" : "i-b385a9bd" }
},
{
$unwind : "$DiskSpaceAvailable.Datapoints"
},
{
$unwind : "$DiskSpaceUtilization.Datapoints"
},
{
$unwind : "$DiskSpaceUsed.Datapoints"
},
{
$unwind : "$MemoryUsed.Datapoints"
},
{
$unwind : "$SwapUtilization.Datapoints"
},
{
$unwind : "$MemoryAvailable.Datapoints"
},
{
$unwind : "$MemoryUtilization.Datapoints"
},
{
$unwind : "$SwapUsed.Datapoints"
},
{
$group : { _id : "$instanceId" ,
DiskSpaceAvailable : { "$avg" : "$DiskSpaceAvailable.Datapoints.Average" } ,
DiskSpaceAvailableUnit : { "$addToSet" : "$DiskSpaceAvailable.Datapoints.Unit" },
DiskSpaceUtilization : {"$avg" : "$DiskSpaceUtilization.Datapoints.Average"},
DiskSpaceUtilizationUnit : {"$addToSet" : "$DiskSpaceUtilization.Datapoints.Unit"},
DiskSpaceUsed : {"$avg" : "$DiskSpaceUsed.Datapoints.Average"},
DiskSpaceUsedUnit : {"$addToSet" : "$DiskSpaceUsed.Datapoints.Unit"},
MemoryUsed :{"$avg" : "$MemoryUsed.Datapoints.Average"},
MemoryUsedUnit:{"$addToSet" : "$MemoryUsed.Datapoints.Unit"},
SwapUtilization:{"$avg" : "$SwapUtilization.Datapoints.Average"},
SwapUtilizationUnit:{"$addToSet" : "$SwapUtilization.Datapoints.Unit"},
MemoryAvailable:{"$avg" : "$MemoryAvailable.Datapoints.Average"},
MemoryAvailableUnit:{"$addToSet" : "$MemoryAvailable.Datapoints.Unit"},
MemoryUtilization:{"$avg" : "$MemoryUtilization.Datapoints.Average"},
MemoryUtilizationUnit: {"$addToSet" : "$MemoryUtilization.Datapoints.Unit"},
SwapUsed:{"$avg" : "$SwapUsed.Datapoints.Average"},
SwapUsedUnit: {"$addToSet" : "$SwapUsed.Datapoints.Unit"}
}
},
{
$project : { _id:1 ,
DiskSpaceAvailable:1 ,
DiskSpaceAvailableUnit : 1,
DiskSpaceUtilization : 1,
DiskSpaceUtilizationUnit : 1,
DiskSpaceUsed : 1,
DiskSpaceUsedUnit : 1,
MemoryUsed :1,
MemoryUsedUnit:1,
SwapUtilization:1,
SwapUtilizationUnit:1,
MemoryAvailable:1,
MemoryAvailableUnit:1,
MemoryUtilization:1,
MemoryUtilizationUnit: 1,
SwapUsed:1,
SwapUsedUnit:1
}
}
]);
此查询不会返回并无限期运行,我尝试使用前4个展开运算符,它的工作时间大约需要3-4秒,但在添加第5个展开运算符后,查询会进行折腾并且不会返回。 我确信我做错了但无法指责它,有人可以指出我是否犯了错误。
欢迎任何类型的建议,我也愿意更改架构。
谢谢:)
答案 0 :(得分:0)
这是单个文档中的大量数据。展开这么多嵌套文档并计算相同的平均值不仅会增加响应时间,还会增加消耗的资源!
要快速进行聚合查询,我坚持要求您在插入文档时尝试执行平均值而不是在检索时执行此操作。
例如 - 添加第一个文档(平均值为5)时, DiskSpaceAvailable 的总体平均值为5&当添加第二个子文档(平均值为2)时,总平均值计算为5 + 2/2 = 3.5。
数据设计类似于: -
{
"_id" : ObjectId("585a42b5b7e79d1c0c533f1f"),
"instanceId" : "i-b385a9bd",
"DiskSpaceAvailableUnit": "Gigabytes",
"DiskSpaceAvailableAverage": <The computed average value>,
"DiskSpaceAvailable" : {
"Datapoints" : [
{
"Timestamp" : ISODate("2016-12-20T12:14:00.000Z"),
"Average" : 4.32112884521484,
"Unit" : "Gigabytes"
},
{
"Timestamp" : ISODate("2016-12-20T12:32:00.000Z"),
"Average" : 4.32107543945312,
"Unit" : "Gigabytes"
},
{
"Timestamp" : ISODate("2016-12-20T12:50:00.000Z"),
"Average" : 4.32101821899414,
"Unit" : "Gigabytes"
}
]
},
....
}
因此,您只需要在不进行任何计算的情况下获取数据。响应速度也非常快(与当前响应时间相比要少得多)。
但是,这种结构随后会增加计算时间。插入/更新的复杂性。但如果更快的检索是最重要的,那么你应该考虑这个结构。