块模型(在块0上进行 - >块1 - >块2 - >块3 - > [...]):
示例输入文档[modulestore.structures
集合中的700多个]:
{
_id: ObjectId('5932d50ff8f46c0a8098ab79'),
blocks: [
{
definition: ObjectId('5923556ef8f46c0a787e9c0f'),
block_type: 'chapter',
block_id: '5b053a7f10ba41df85a3221c3ef3956e',
fields: {
format: 'Foo exam',
children: [
[
'sequential',
'9f1e58553ad448818ec8e7915d3d94d3'
],
[
'sequential',
'f052c7aa44274769a4631e95405834e0'
]
]
}
},
{
definition: ObjectId('59235569f8f46c0a7be1debc'),
block_type: 'sequential',
block_id: '9f1e58553ad448818ec8e7915d3d94d3',
fields: {
display_name: 'FooBar'
}
},
{
definition: ObjectId('59317406f8f46c0a8098aaf5'),
block_type: 'sequential',
block_id: 'f052c7aa44274769a4631e95405834e0',
fields: {
display_name: 'CanHaz'
}
}
]
}
我的目标是:
children
数组用于遍历; top_ancestor_fields
财产的新财产fields
。示例输出:
[
{
_id: ObjectId('5a00f611f995363c2b63c9a6'),
block_type: 'chapter',
block_id: '5b053a7f10ba41df85a3221c3ef3956e',
fields: {
format: 'Foo exam'
children: [
[
'sequential',
'9f1e58553ad448818ec8e7915d3d94d3'
],
[
'sequential',
'f052c7aa44274769a4631e95405834e0'
]
]
},
top_ancestor_fields: {
format: 'Foo exam'
}
},
{
_id: ObjectId('5a00f611f995363c2b63c9a7'),
block_id: '9f1e58553ad448818ec8e7915d3d94d3',
block_type: 'sequential',
fields: {
display_name: 'FooBar'
},
top_ancestor_fields: {
format: 'Foo exam'
}
},
{
_id: ObjectId('5a00f611f995363c2b63c9a8'),
block_id: 'f052c7aa44274769a4631e95405834e0',
block_type: 'sequential',
fields: {
display_name: 'CanHaz'
},
top_ancestor_fields: {
format: 'Foo exam'
}
},
]
基于@ neil-lunn的建议几乎让它有效:
db.modulestore.structures.aggregate([
{ $unwind: '$blocks' },
{ $project: { _id: 0,
block_id: '$blocks.block_id',
children: '$blocks.fields.children',
display_name: '$blocks.fields.display_name',
block_type: '$blocks.block_type',
exam: '$blocks.fields.format',
fields: '$blocks.fields'
}},
{ $out: 'modulestore.mapped0' }
])
db.modulestore.mapped0.aggregate([
{ $graphLookup: {
from: 'modulestore.mapped0',
startWith: '$block_id',
connectToField: 'children',
connectFromField: 'block_id',
as: 'block_ids',
maxDepth: 0
} },
{ $unwind: '$block_ids' },
{ $project: {
name: 1,
_id: 0,
ancestor: '$block_ids.block_id'
} },
{ $out: 'modulestore.mapped1' }
]);
但这只是挂起。我已尝试配置maxDepth
$graphLookup
选项。仅供参考:db.modulestore.mapped0.count()
对我来说是80772。
每个文档可能包含一个children
数组,最多包含180个元素。
不确定如何处理这个更大的管道来映射children
层次结构......
答案 0 :(得分:0)
以下内容可以帮助您入门:
db.modulestore.structures.aggregate([{
$unwind: '$blocks' // flatten "blocks" array
}, {
$replaceRoot: { // move "blocks" field to top level
newRoot: "$blocks"
}
}, {
$unwind: { // flatten "fields.children" array
path: "$fields.children",
preserveNullAndEmptyArrays: true
}
}, {
// this step is technically not needed but it might speed up things - try running with and without that
$addFields: { // we only keep the second (last, really) entry of all your arrays since this is the only valid join key for the graphLookup
"fields.children": {
$slice: [ "$fields.children", -1 ]
}
}
}, {
$unwind: { // flatten "fields.children" array one more time because it was nested before
path: "$fields.children",
preserveNullAndEmptyArrays: true
}
}, {
$group: { // reduce the number of lookups required later by eliminating duplicate parent-child paths
"_id": "$block_id",
"block_type": { $first: "$block_type" },
"definition": { $first: "$definition" },
"fieldsFormat": { $first: "$fields.format" },
"fieldsChildren": { $addToSet: "$fields.children" }
}
}, {
$project: { // restore original structure
"block_id": "$_id",
"block_type": "$block_type",
"definition": "$definition",
"fields": {
"format": "$fieldsFormat",
"children": "$fieldsChildren"
}
}
}, { // spit out the result into "modulestore.mapped0" collection, overwriting all existing content
$out: 'modulestore.mapped0'
}])
然后
db.modulestore.mapped0.aggregate([{
$graphLookup: {
from: 'modulestore.mapped0',
startWith: '$block_id',
connectToField: 'fields.children',
connectFromField: 'block_id',
as: 'block_ids',
maxDepth: 0
}
}, {
$lookup: {
from: 'modulestore.mapped0',
localField: 'block_ids.fields.children',
foreignField: '_id',
as: 'block_ids.fields.children'
}
}])
答案 1 :(得分:0)
部分解决方案[gist]:
def update_descendants(modulestore, blocks, ancestor_fields):
"""
:keyword modulestore: modulestore containing the blocks
:type modulestore: ``Collection``
:keyword blocks: iterator over the blocks (collections within modulestore)
:type blocks: ``Cursor`` | `tuple`
:keyword ancestor_fields: fields of the top most ancestor
:type ancestor_fields: ``dict``
"""
for block in blocks:
modulestore.replace_one({'block_id': block['block_id'],
'block_type': block['block_type']},
update_d(block, add={'ancestor_fields': ancestor_fields},
rm=('_id',)))
update_descendants.counter += 1
print 'Updated:', update_descendants.counter
if 'children' in block and block['children']:
for block_type, block_id in block['children']:
update_descendants(modulestore,
modulestore.find({'block_id': block_id,
'block_type': block_type,
'ancestor_fields': {
'$exists': False
}}),
ancestor_fields)
更喜欢完全在数据库中的解决方案,并且没有所有这些低效的查询。