我有一个现有的深层嵌套的mongoDB架构,我必须弄平,因为我有一个复杂的查询,无法用当前结构有效地进行。这是架构的MWE:
db.test.insert({
"_id" : ObjectId("58e574a768afb6085ec3a388"),
"details" : [
{
"_id" : ObjectId("58e55f0f68afb6085ec3a2cc"),
"a" : [
{
"unit" : "08",
"size" : "5",
"pos" : "Far",
"_id" : ObjectId("58e55f0f68afb6085ec3a2d0")
}
],
"b" : [
{
"unit" : "08",
"size" : "5",
"pos" : "Far",
"_id" : ObjectId("58e55f0f68afb6085ec3a2cd")
}
],
"c" : [
{
"unit" : "08",
"size" : "3",
"pos" : "Far",
"_id" : ObjectId("58e55f0f68afb6085ec3a2ce")
}
],
"d" : [
{
"unit" : "08",
"size" : "5",
"pos" : "Far",
"_id" : ObjectId("58e55f0f68afb6085ec3a2cf")
}
]
}
]
})
我想弄平图式。期望的结果是:
"_id" : ObjectId("58e574a768afb6085ec3a388"),
"tests" : [
{
"_id" : ObjectId("58e542fb68afb6085ec3a1d2"),
"aUnit" : "08",
"aSize" : "5",
"aPos" : "Far",
"bPos" : "Far",
"bSize" : "5",
"bUnit" : "08",
"cPos" : "Far",
"cSize" : "3",
"cUnit" : "08",
"dPos" : "Far",
"dSize" : "5",
"dUnit" : "08"
}
]
我愿意一次一个地执行每个条目类型,我认为我有一个方法可以这样做,但它不起作用。这是我试过的:
db.test.find({"tests.$.details.a.unit":{$exists:true}}).forEach(function(doc) {
doc.tests = {aUnit:tests.details.a.unit};
delete tests.details.a.unit;
db.test.save(doc);
});
然而,这没有任何改变。如何改进我的查询以展平我的架构?
编辑:我意识到MWE与我打算用它的那个相比有一个小错误。我关闭了每个条目。例如,"a" : [{ ... }],
被错误地写为{"a" : [{ ... }]},
。但是,它现在已更新。
答案 0 :(得分:5)
打印数据
import boto3
s3 = boto3.resource('s3')
copy_source = {
'Bucket': 'mybucket',
'Key': 'mykey'
}
s3.meta.client.copy(copy_source, 'otherbucket', 'otherkey')
更新数据
db.test.find().forEach(doc => {
doc.details = doc.details.map( detail => {
Object.keys(detail).filter( k => k !== "_id" ).forEach( k => {
detail[k].forEach( item => {
Object.keys(item).filter(i => i !== "_id" ).forEach( inner => {
detail[k + inner.charAt(0).toUpperCase() + inner.substr(1)]
= item[inner];
})
});
delete detail[k];
});
return detail;
});
printjson(doc);
});
输出表单
db.test.find().forEach(doc => {
doc.details = doc.details.map( detail => {
Object.keys(detail).filter( k => k !== "_id" ).forEach( k => {
detail[k].forEach( item => {
Object.keys(item).filter(i => i !== "_id" ).forEach( inner => {
detail[k + inner.charAt(0).toUpperCase() + inner.substr(1)]
= item[inner];
})
});
delete detail[k];
});
return detail;
});
ops = [
...ops,
{ "updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { "doc.details": doc.details } }
}}
];
if ( ops.length >= 500 ) {
db.test.bulkWrite(ops);
ops = [];
}
});
if ( ops.length > 0 ) {
db.test.bulkWrite(ops);
ops = [];
}
{
"_id" : ObjectId("58e574a768afb6085ec3a388"),
"details" : [
{
"_id" : ObjectId("58e55f0f68afb6085ec3a2cc"),
"aUnit" : "08",
"aSize" : "5",
"aPos" : "Far",
"bUnit" : "08",
"bSize" : "5",
"bPos" : "Far",
"cUnit" : "08",
"cSize" : "3",
"cPos" : "Far",
"dUnit" : "08",
"dSize" : "5",
"dPos" : "Far"
}
]
}
如果您正在尝试"更新"你的数据,然后它比你正在尝试的更多涉及。你有几个数组,你需要实际"遍历"数组元素,而不是直接尝试访问它们。
这里只是一个样本来打印"" "扁平"数据:
{
"_id" : ObjectId("58e574a768afb6085ec3a388"),
"tests" : [
{
"_id" : ObjectId("58e542fb68afb6085ec3a1d2"),
"details" : [
{
"a" : [
{
"unit" : "08",
"size" : "5",
"pos" : "Far",
"_id" : ObjectId("58e542fb68afb6085ec3a1d6")
}
]
},
{
"b" : [
{
"pos" : "Drive Side Far",
"size" : "5",
"unit" : "08",
"_id" : ObjectId("58e542fb68afb6085ec3a1d3")
}
]
},
{
"c" : [
{
"pos" : "Far",
"size" : "3",
"unit" : "08",
"_id" : ObjectId("58e542fb68afb6085ec3a1d4")
}
]
},
{
"d" : [
{
"pos" : "Far",
"size" : "5",
"unit" : "08",
"_id" : ObjectId("58e542fb68afb6085ec3a1d5")
}
]
}
]
}
]
}
我相信它会给你想要的结构:
db.test.find().forEach(doc => {
doc.tests = doc.tests.map( test => {
test.details.forEach( detail => {
Object.keys(detail).forEach( key => {
detail[key].forEach( item => {
Object.keys(item).forEach( inner => {
if ( inner !== '_id' ) {
test[key + inner.charAt(0).toUpperCase() + inner.substr(1)]
= item[inner];
}
});
});
});
});
delete test.details;
return test;
});
printjson(doc);
})
现在我没有考虑到{
"_id" : ObjectId("58e574a768afb6085ec3a388"),
"tests" : [
{
"_id" : ObjectId("58e542fb68afb6085ec3a1d2"),
"aUnit" : "08",
"aSize" : "5",
"aPos" : "Far",
"bPos" : "Drive Side Far",
"bSize" : "5",
"bUnit" : "08",
"cPos" : "Far",
"cSize" : "3",
"cUnit" : "08",
"dPos" : "Far",
"dSize" : "5",
"dUnit" : "08"
}
]
}
数组中有"details"
等密钥的文档可能出现多次的可能性。所以我只是考虑到其中只有一个文档,其中有"a"
或"a"
等,并且在将新键添加到该键时始终分配与该键匹配的最后找到的值。 "b"
文档的顶级。
如果您的实际情况有所不同,那么您需要修改其中的各种"details"
循环以使用"索引"作为参数并将该索引值包含在键名称中。即:
.forEach()
但是,如果有必要,您需要制定一个细节,因为这与数据在问题中的呈现方式不同。
但是,如果这非常适合您想要更新的内容,那么只需运行定期执行.bulkWrite()
语句的循环:
"a0Unit": "08",
"a0Size": "05",
"a1Unit": "09",
"a1Size": "06"
从每个阵列成员文档中的let ops = [];
db.test.find().forEach(doc => {
doc.tests = doc.tests.map( test => {
test.details.forEach( detail => {
Object.keys(detail).forEach( key => {
detail[key].forEach( item => {
Object.keys(item).forEach( inner => {
if ( inner !== '_id' ) {
test[key + inner.charAt(0).toUpperCase() + inner.substr(1)]
= item[inner];
}
});
});
});
});
delete test.details;
return test;
});
ops = [
...ops,
{ "updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { "tests": doc.tests } }
}}
];
if ( ops.length >= 500 ) {
db.test.bulkWrite(ops);
ops = [];
}
});
if ( ops.length > 0 ) {
db.test.bulkWrite(ops);
ops = [];
}
字段中也可以看到您正在使用mongoose。所以无论你做什么,都不要尝试使用mongoose本身运行代码。它是一个"一个关闭"批量更新您的数据,应该直接从shell运行。当然,您需要修改架构以适应新结构。
但这就是为什么你应该首先使用_id
方法在shell中运行你的数据。
答案 1 :(得分:0)
以下
db.collection.aggregate(
[{$unwind:"$tests"},
{$unwind:"$tests.details"},
{$unwind:"$tests.details.a"},
{$group:{
_id:"$_id",
"tests": {"$push":{
"aPos":"$tests.details.a.pos",
"aSize":"$tests.details.a.size",
"aUnit":"$tests.details.a.unit"
}}}},
])
产生
{ "_id" : ObjectId("58e574a768afb6085ec3a388"), "tests" : [ { "aPos" : "Far", "aSize" : "5", "aUnit" : "08" } ] }
以上只产生了一组字段:值对;在同一级别进行多次$ unwind不起作用:
db.collection.aggregate(
[{$unwind:"$tests"},
{$unwind:"$tests.details"},
{$unwind:"$tests.details.a"},
{$unwind:"$tests.details.b"},
{$group:{
_id:"$_id",
"tests": {"$push":{
"aPos":"$tests.details.a.pos",
"aSize":"$tests.details.a.size",
"aUnit":"$tests.details.a.unit",
"bPos":"$tests.details.b.pos",
"bSize":"$tests.details.b.size",
"bUnit":"$tests.details.b.unit"
}}}},
]) //does not run
因此,需要有$facet的另一个聚合阶段来执行类似的详细步骤.b,details.c和details.d。