在python中完成以下任务的惯用方法是什么?该算法不必高效或使用常量内存。
给出元组[("location", data)]
的列表,例如:
from_this = [
("global.module1.submodule1.function1", "some data"),
("global.module1.func1", "other data"),
("global.module1.submodule1.func2", "data again"),
("global.module1.submodule1.func3", "data"),
("global.module2.func3", "data"),
("global.module4.submodule1.func3", "data"),
("global.module4.submodule2.func4", "daata"),
("global.module4.submodule2", "da_ta")
]
目标是构建一个字典的分层树(基于位置)。例如:
{"name": "global", "children": [
{"name": "module1", "children": [
{"name": "submodule1", "children": [
{"name": "function1", "data": "some data"},
{"name": "func2", "data": "data again"},
{"name": "func3", "data": "data"}
]},
{"name": "func1", "data": "other data"}
]
},
{"name": "module2", "children": [{"name": "func3", "data": "data"}]},
{"name": "module4", "children": [
{"name": "submodule1", "children": [{"name": "func3", "data": "data"}]},
{"name": "submodule2", "data": "da_ta", "children": [{"name": "func4", "data": "daata"}]}
]}
] }
答案 0 :(得分:1)
这是一个解决方案。我没有优化它,但我认为它应该合理有效并使用恒定的内存。
def process(entries):
root = {'name': None}
for entry in entries:
node = root
for name in entry[0].split('.'):
try:
node = next(child for child in node['children'] if child['name'] == name)
except KeyError:
node['children'] = [{'name': name}]
node = node['children'][-1]
except StopIteration:
node['children'].append({'name': name})
node = node['children'][-1]
node['data'] = entry[1]
return root
请注意,返回值将是树的根,并且看起来像{'name': None, 'children': [ ... ]}
,因为可能有多个顶级命名空间。在您给出的示例中,只有一个顶级命名空间global
,但一般情况下可能不是这样。
示例:使用示例数据from_this
>>> pprint(process(from_this))
{'children': [{'children': [{'children': [{'children': [{'data': 'some data',
'name': 'function1'},
{'data': 'data again',
'name': 'func2'},
{'data': 'data',
'name': 'func3'}],
'name': 'submodule1'},
{'data': 'other data',
'name': 'func1'}],
'name': 'module1'},
{'children': [{'data': 'data',
'name': 'func3'}],
'name': 'module2'},
{'children': [{'children': [{'data': 'data',
'name': 'func3'}],
'name': 'submodule1'},
{'children': [{'data': 'daata',
'name': 'func4'}],
'data': 'da_ta',
'name': 'submodule2'}],
'name': 'module4'}],
'name': 'global'}],
'name': None}
答案 1 :(得分:1)
所以第一个解决方案类似于Reximus的答案,一个交互式解决方案。请注意,树根植于名为root的节点,允许多个顶级命名空间:
#! /usr/bin/env python
import pprint
from_this = [
("global.module1.submodule1.function1", "some data"),
("global.module1.func1", "other data"),
("global.module1.submodule1.func2", "data again"),
("global.module1.submodule1.func3", "data"),
("global.module2.func3", "data"),
("global.module4.submodule1.func3", "data"),
("global.module4.submodule2.func4", "daata"),
("global.module4.submodule2", "da_ta")
]
def make_node():
d = {
"name": "",
"data": "",
"children": []
}
return d
path_sep = "."
tree = make_node()
tree["name"] = "root"
for path, data in from_this:
current = tree
path_list = path.split(path_sep)
for name in path_list:
next_node = None
for child in current["children"]:
if child["name"] == name:
next_node = child
break
if next_node is None:
n = make_node()
n["name"] = name
current["children"].append(n)
current = n
else:
current = next_node
current["data"] = data
pprint.pprint(tree, indent=1)
但也许使用递归是更多"惯用"做任何树相关问题的方法:
def insert(path, data, node):
if len(path) == 0:
node["data"] = data
return
for child in node["children"]:
if child["name"] == path[0]:
insert(path[1:], data, child)
return
child = make_node()
child["name"] = path[0]
node["children"].append(child)
insert(path[1:], data, child)
tree = make_node()
tree["name"] = "root"
for path, data in from_this:
insert(path.split(path_sep), data, tree)
pprint.pprint(tree)