Question

块模型（在块0上进行 - ＆gt;块1 - ＆gt;块2 - ＆gt;块3 - ＆gt; [...]）：

示例输入文档[modulestore.structures集合中的700多个]：

{
  _id: ObjectId('5932d50ff8f46c0a8098ab79'),
  blocks: [
    {
      definition: ObjectId('5923556ef8f46c0a787e9c0f'),
      block_type: 'chapter',
      block_id: '5b053a7f10ba41df85a3221c3ef3956e',
      fields: {
        format: 'Foo exam',
        children: [ 
          [ 
            'sequential', 
            '9f1e58553ad448818ec8e7915d3d94d3'
          ], 
          [ 
            'sequential', 
            'f052c7aa44274769a4631e95405834e0'
          ]
        ]
      }
    },
    {
      definition: ObjectId('59235569f8f46c0a7be1debc'),
      block_type: 'sequential',
      block_id: '9f1e58553ad448818ec8e7915d3d94d3',
      fields: {
        display_name: 'FooBar'
      }
    },
    {
      definition: ObjectId('59317406f8f46c0a8098aaf5'),
      block_type: 'sequential',
      block_id: 'f052c7aa44274769a4631e95405834e0',
      fields: {
        display_name: 'CanHaz'
      }
    }
  ]
}

我的目标是：

将块压平，使所有块都处于收集级别;
将children数组用于遍历;
走路并修改'树'，使每个孩子/孙子/曾孙/ * - 孩子从他们最顶层的祖先获得一个包含top_ancestor_fields财产的新财产fields。

示例输出：

[
  {
    _id: ObjectId('5a00f611f995363c2b63c9a6'),
    block_type: 'chapter',
    block_id: '5b053a7f10ba41df85a3221c3ef3956e',
    fields: {
      format: 'Foo exam'
      children: [ 
        [ 
          'sequential',
          '9f1e58553ad448818ec8e7915d3d94d3'
        ], 
        [
          'sequential',
          'f052c7aa44274769a4631e95405834e0'
        ]
      ]
    },
    top_ancestor_fields: {
      format: 'Foo exam'
    }
  },
  {
     _id: ObjectId('5a00f611f995363c2b63c9a7'),
     block_id: '9f1e58553ad448818ec8e7915d3d94d3',
     block_type: 'sequential',
     fields: {
       display_name: 'FooBar'
     },
     top_ancestor_fields: {
       format: 'Foo exam'
     }
  },
  {
     _id: ObjectId('5a00f611f995363c2b63c9a8'),
     block_id: 'f052c7aa44274769a4631e95405834e0',
     block_type: 'sequential',
     fields: {
       display_name: 'CanHaz'
     },
     top_ancestor_fields: {
       format: 'Foo exam'
     }
  },
]

基于@ neil-lunn的建议几乎让它有效：

db.modulestore.structures.aggregate([
  { $unwind: '$blocks' },
  { $project: { _id: 0,
                block_id: '$blocks.block_id',
                children: '$blocks.fields.children',
                display_name: '$blocks.fields.display_name',
                block_type: '$blocks.block_type',
                exam: '$blocks.fields.format',
                fields: '$blocks.fields'
               }},
  { $out: 'modulestore.mapped0' }
])

db.modulestore.mapped0.aggregate([
    { $graphLookup: {
        from: 'modulestore.mapped0',
        startWith: '$block_id',
        connectToField: 'children',
        connectFromField: 'block_id',
        as: 'block_ids',
        maxDepth: 0
    } },
    { $unwind: '$block_ids' },
    { $project: {
        name: 1,
        _id: 0,
        ancestor: '$block_ids.block_id'
    } },
    { $out: 'modulestore.mapped1' }
]);

但这只是挂起。我已尝试配置maxDepth $graphLookup选项。仅供参考：db.modulestore.mapped0.count()对我来说是80772。

每个文档可能包含一个children数组，最多包含180个元素。

不确定如何处理这个更大的管道来映射children层次结构......

Answer 1

以下内容可以帮助您入门：

db.modulestore.structures.aggregate([{
    $unwind: '$blocks' // flatten "blocks" array
}, {
    $replaceRoot: { // move "blocks" field to top level
        newRoot: "$blocks"
    }
}, {
    $unwind: { // flatten "fields.children" array
        path: "$fields.children",
        preserveNullAndEmptyArrays: true
    }
}, {
    // this step is technically not needed but it might speed up things - try running with and without that
    $addFields: { // we only keep the second (last, really) entry of all your arrays since this is the only valid join key for the graphLookup
        "fields.children": {
            $slice: [ "$fields.children", -1 ]
        }
    }
}, {
    $unwind: { // flatten "fields.children" array one more time because it was nested before
        path: "$fields.children",
        preserveNullAndEmptyArrays: true
    }
}, {
    $group: { // reduce the number of lookups required later by eliminating duplicate parent-child paths
        "_id": "$block_id",
        "block_type": { $first: "$block_type" },
        "definition": { $first: "$definition" },
        "fieldsFormat": { $first: "$fields.format" },
        "fieldsChildren": { $addToSet: "$fields.children" }
    }
}, {
    $project: { // restore original structure
        "block_id": "$_id",
        "block_type": "$block_type",
        "definition": "$definition",
        "fields": {
            "format": "$fieldsFormat",
            "children": "$fieldsChildren"
        }
    }
}, { // spit out the result into "modulestore.mapped0" collection, overwriting all existing content
    $out: 'modulestore.mapped0'
}])

然后

db.modulestore.mapped0.aggregate([{
    $graphLookup: {
        from: 'modulestore.mapped0',
        startWith: '$block_id',
        connectToField: 'fields.children',
        connectFromField: 'block_id',
        as: 'block_ids',
        maxDepth: 0
    }
}, { 
    $lookup: { 
        from: 'modulestore.mapped0', 
        localField: 'block_ids.fields.children', 
        foreignField: '_id', 
        as: 'block_ids.fields.children' 
    } 
}])

Answer 2

部分解决方案[gist]：

def update_descendants(modulestore, blocks, ancestor_fields):
    """    
    :keyword modulestore: modulestore containing the blocks
    :type modulestore: ``Collection``   

    :keyword blocks: iterator over the blocks (collections within modulestore)
    :type blocks: ``Cursor`` | `tuple`

    :keyword ancestor_fields: fields of the top most ancestor
    :type ancestor_fields: ``dict``
    """
for block in blocks:
    modulestore.replace_one({'block_id': block['block_id'],
                             'block_type': block['block_type']},
                            update_d(block, add={'ancestor_fields': ancestor_fields},
                                     rm=('_id',)))
    update_descendants.counter += 1
    print 'Updated:', update_descendants.counter

    if 'children' in block and block['children']:
        for block_type, block_id in block['children']:
            update_descendants(modulestore,
                               modulestore.find({'block_id': block_id,
                                                 'block_type': block_type,
                                                 'ancestor_fields': {
                                                     '$exists': False
                                                 }}),
                               ancestor_fields)

更喜欢完全在数据库中的解决方案，并且没有所有这些低效的查询。

使用数组将MongoDB文档集合分层压缩到文档中

2 个答案: