Question

考虑一下，我们有一堆看起来像这样的子标题：

subtree1 = {
    "id": "root",
    "children": [
        {
            "id": "file",
            "caption": "File",
            "children": []
        },
        {
            "id": "edit",
            "caption": "Edit",
            "children": []
        },
        {
            "id": "tools",
            "caption": "Tools",
            "children": [
                {
                    "id": "packages",
                    "caption": "Packages",
                    "children": []
                }
            ]
        },
        {
            "id": "help",
            "caption": "Help",
            "children": []
        },
    ]
}

subtree2 = {
    "id": "root",
    "children": [
        {
            "id": "file",
            "caption": "File",
            "children": [
                {"caption": "New"},
                {"caption": "Exit"},
            ]        
        }
    ]
}

subtree3 = {
    "id": "root",
    "children": [
        {
            "id": "edit",
            "children": [
                {"caption": "Copy"},
                {"caption": "Cut"},
                {"caption": "Paste"},
            ]
        },
        {
            "id": "help",
            "children": [
                {"caption": "About"},
            ]
        }
    ]
}

subtree4 = {
    "id": "root",
    "children": [
        {
            "id": "edit",
            "children": [
                {
                    "id": "text",
                    "caption": "Text",
                    "children": [
                        { "caption": "Insert line before" },
                        { "caption": "Insert line after" }
                    ]
                }
            ]
        }
    ]
}

我试图弄清楚如何编写merge函数的代码，例如执行以下操作：

tree0 = merge(subtree1, subtree2)
tree0 = merge(tree0, subtree3)
tree0 = merge(tree0, subtree4)

将产生：

tree0 = {
    "id": "root",
    "children": [
        {
            "id": "file",
            "caption": "File",
            "children": [
                {"caption": "New"},
                {"caption": "Exit"},
            ]   
        },
        {
            "id": "edit",
            "caption": "Edit",
            "children": [
                {"caption": "Copy"},
                {"caption": "Cut"},
                {"caption": "Paste"},
                {
                    "id": "text",
                    "caption": "Text",
                    "children": [
                        { "caption": "Insert line before" },
                        { "caption": "Insert line after" }
                    ]
                }
            ]
        },
        {
            "id": "tools",
            "caption": "Tools",
            "children": [
                {
                    "id": "packages",
                    "caption": "Packages",
                    "children": []
                }
            ]
        },
        {
            "id": "help",
            "caption": "Help",
            "children": [
                {"caption": "About"},
            ]
        },
    ]
}

但是要这样做：

tree1 = merge(subtree1, subtree2)
tree1 = merge(tree1, subtree4)
tree1 = merge(tree1, subtree3)

会产生：

tree1 = {
    "id": "root",
    "children": [
        {
            "id": "file",
            "caption": "File",
            "children": [
                {"caption": "New"},
                {"caption": "Exit"},
            ]   
        },
        {
            "id": "edit",
            "caption": "Edit",
            "children": [
                {
                    "id": "text",
                    "caption": "Text",
                    "children": [
                        { "caption": "Insert line before" },
                        { "caption": "Insert line after" }
                    ]
                },
                {"caption": "Copy"},
                {"caption": "Cut"},
                {"caption": "Paste"},
            ]
        },
        {
            "id": "tools",
            "caption": "Tools",
            "children": [
                {
                    "id": "packages",
                    "caption": "Packages",
                    "children": []
                }
            ]
        },
        {
            "id": "help",
            "caption": "Help",
            "children": [
                {"caption": "About"},
            ]
        },
    ]
}

否则，以相同的顺序加载子树将始终生成相同的树，但是如果您以不同的顺序使用相同的子树列表，则不能保证生成相同的树（因为子列表可以以不同的方式扩展订单）。

我已经尝试对此进行编码，但是我不知道merge算法的行为如何，这就是我的问题。谁能提供代码/伪代码/解释以便我实现？

PS：在下面，您会发现一些随机尝试，我认为这可能会导致我获得胜利

if __name__ == '__main__':
    from collections import defaultdict

    subtree1 = {
        "id": "root",
        "children": [
            {
                "id": "file",
                "caption": "File",
                "children": []
            },
            {
                "id": "edit",
                "caption": "Edit",
                "children": []
            },
            {
                "id": "tools",
                "caption": "Tools",
                "children": [
                    {
                        "id": "packages",
                        "caption": "Packages",
                        "children": []
                    }
                ]
            },
            {
                "id": "help",
                "caption": "Help",
                "children": []
            },
        ]
    }

    subtree2 = {
        "id": "root",
        "children": [
            {
                "id": "file",
                "caption": "File",
                "children": [
                    {"caption": "New"},
                    {"caption": "Exit"},
                ]
            }
        ]
    }

    subtree3 = {
        "id": "root",
        "children": [
            {
                "id": "edit",
                "children": [
                    {"caption": "Copy"},
                    {"caption": "Cut"},
                    {"caption": "Paste"},
                ]
            },
            {
                "id": "help",
                "children": [
                    {"caption": "About"},
                ]
            }
        ]
    }

    subtree4 = {
        "id": "root",
        "children": [
            {
                "id": "edit",
                "children": [
                    {
                        "id": "text",
                        "caption": "Text",
                        "children": [
                            {"caption": "Insert line before"},
                            {"caption": "Insert line after"}
                        ]
                    }
                ]
            }
        ]
    }

    lst = [
        subtree1,
        subtree2,
        subtree3,
        subtree4
    ]

    def traverse(node, path=[]):
        yield node, tuple(path)

        for c in node.get("children", []):
            path.append(c.get("id", None))
            yield from traverse(c)
            path.pop()

    # Levels & Hooks
    dct_levels = defaultdict(list)
    dct_hooks = defaultdict(list)
    for subtree in lst:
        for n, p in traverse(subtree):
            if p not in dct_levels[len(p)]:
                dct_levels[len(p)].append(p)
            dct_hooks[p].append(n)

    print(dct_levels)
    print(dct_hooks[("file",)])

    # Merge should happen here
    tree = {
        "id": "root",
        "children": []
    }

    for level in range(1, max(dct_levels.keys()) + 1):
        print("populating level", level, dct_levels[level])

但是不确定我是否在这里创建正确的结构/助手，因为目前尚不清楚整个算法如何工作……这就是这个问题的全部内容

Answer 1

使用您的示例在Python 3.5上进行了测试。

from copy import deepcopy


def merge(x: dict, y: dict) -> dict:
    'Merge subtrees x y, and return the results as a new tree.'
    return merge_inplace(deepcopy(x), y)


def merge_inplace(dest: dict, src: dict) -> dict:
    'Merge subtree src into dest, and return dest.'

    # perform sanity checks to make the code more rock solid
    # feel free to remove those lines if you don't need
    assert dest.get('id'), 'Cannot merge anonymous subtrees!'
    assert dest.get('id') == src.get('id'), 'Identity mismatch!'

    # merge attributes
    dest.update((k, v) for k, v in src.items() if k != 'children')

    # merge children
    if not src.get('children'):  # nothing to do, so just exit
        return dest
    elif not dest.get('children'):  # if the children list didn't exist
        dest['children'] = []  # then create an empty list for it

    named_dest_children = {
        child['id']: child
        for child in dest['children']
        if 'id' in child
    }
    for child in src['children']:
        if 'id' not in child:  # anonymous child, just append it
            dest['children'].append(child)
        elif child['id'] in named_dest_children:  # override a named subtree
            merge_inplace(named_dest_children[child['id']], child)
        else:  # create a new subtree
            dest['children'].append(child)
            named_dest_children[child['id']] = child
    return dest

Answer 2

您可以将itertools.groupby用于递归：

from itertools import groupby
def merge(*args):
   if len(args) < 2 or any('id' not in i for i in args):
      return list(args)
   _d = [(a, list(b)) for a, b in groupby(sorted(args, key=lambda x:x['id']), key=lambda x:x['id'])]
   return [{**{j:k for h in b for j, k in h.items()}, 'id':a, 'children':merge(*[i for c in b for i in c['children']])} for a, b in _d]

通过args，此解决方案将每个传递的字典视为children列表的成员。这是为了解决将两个或更多具有不同merge即id和{'id':'root', 'children':[...]}的字典传递给{'id':'root2', 'children':[...]}的可能性。这样，该解决方案将返回[{'id':'root', 'children':[...]}, {'id':'root2', 'children':[...]}]的列表，因为不同的id不提供匹配的途径。因此，在当前问题的上下文中，您需要使用索引来访问结果列表的单个返回元素：合并的dict与id 'root'：

import json
tree0 = merge(subtree1, subtree2)[0]
tree0 = merge(tree0, subtree3)[0]
tree0 = merge(tree0, subtree4)[0]
print(json.dumps(tree0, indent=4))

输出：

{
  "id": "root",
  "children": [
    {
        "id": "edit",
        "caption": "Edit",
        "children": [
            {
                "caption": "Copy"
            },
            {
                "caption": "Cut"
            },
            {
                "caption": "Paste"
            },
            {
                "id": "text",
                "caption": "Text",
                "children": [
                    {
                        "caption": "Insert line before"
                    },
                    {
                        "caption": "Insert line after"
                    }
                ]
            }
        ]
    },
    {
        "id": "file",
        "caption": "File",
        "children": [
            {
                "caption": "New"
            },
            {
                "caption": "Exit"
            }
        ]
    },
    {
        "id": "help",
        "caption": "Help",
        "children": [
            {
                "caption": "About"
            }
        ]
    },
    {
        "id": "tools",
        "caption": "Tools",
        "children": [
            {
                "id": "packages",
                "caption": "Packages",
                "children": []
            }
        ]
      }
   ]
}

Answer 3

用于合并JSON文档/对象的手工编码可能不是最佳解决方案。干！
我在这里使用了genson，jsonschema和jsonmerge软件包进行合并。

genson从JSON实例文档生成JSON模式。
jsonschema使用JSON模式验证JSON实例文档。
jsonmerge通过扩展JSON模式来合并对象/ JSON文档。

首先让我们从JSON实例生成JSON模式。

trees = (subtree1, subtree2, subtree3, subtree4)
schema_builder = genson.SchemaBuilder()
for tree in trees:
    schema_builder.add_object(tree)

schema = schema_builder.to_schema()

现在指定合并策略。

schema['properties']['children']['mergeStrategy'] = 'arrayMergeById'
schema['properties']['children']['items']['properties']['children']['mergeStrategy'] = 'append'

arrayMergeById策略通过对象的id属性合并对象。 append策略收集数组中的对象。
这是完整的代码；

import genson
import jsonmerge
import jsonschema

subtree1 = {
    "id":
    "root",
    "children": [
        {
            "id": "file",
            "caption": "File",
            "children": []
        },
        {
            "id": "edit",
            "caption": "Edit",
            "children": []
        },
        {
            "id": "tools",
            "caption": "Tools",
            "children": [{
                "id": "packages",
                "caption": "Packages",
                "children": []
            }]
        },
        {
            "id": "help",
            "caption": "Help",
            "children": []
        },
    ]
}

subtree2 = {
    "id":
    "root",
    "children": [{
        "id": "file",
        "caption": "File",
        "children": [
            {
                "caption": "New"
            },
            {
                "caption": "Exit"
            },
        ]
    }]
}

subtree3 = {
    "id":
    "root",
    "children": [{
        "id":
        "edit",
        "children": [
            {
                "caption": "Copy"
            },
            {
                "caption": "Cut"
            },
            {
                "caption": "Paste"
            },
        ]
    }, {
        "id": "help",
        "children": [
            {
                "caption": "About"
            },
        ]
    }]
}

subtree4 = {
    "id":
    "root",
    "children": [{
        "id":
        "edit",
        "children": [{
            "id":
            "text",
            "caption":
            "Text",
            "children": [{
                "caption": "Insert line before"
            }, {
                "caption": "Insert line after"
            }]
        }]
    }]
}

trees = (subtree1, subtree2, subtree3, subtree4)
schema_builder = genson.SchemaBuilder()
for tree in trees:
    schema_builder.add_object(tree)

schema = schema_builder.to_schema()
print("Validating schema...", end='')
for tree in trees:
    jsonschema.validate(tree, schema)
print(' done')
schema['properties']['children']['mergeStrategy'] = 'arrayMergeById'
schema['properties']['children']['items']['properties']['children']['mergeStrategy'] = 'append'

merger = jsonmerge.Merger(schema=schema)
tree = merger.merge(subtree1, subtree2)
tree = merger.merge(tree, subtree3)
tree = merger.merge(tree, subtree4)
print(tree)

如何从子树列表中创建树？

3 个答案: