我试图在多个进程中使用共享内存来更新包含字典的字典...我尝试使用多处理模块中的管理器,但是我很难在其中添加字典。请参阅下面的代码和评论。本质上,这段代码应该在另一个名为" output的字典中创建输入的副本。"一旦我开始工作,就会有逻辑来复制某些"刀片"来自输入,但必须维护节点/集群/刀片层次结构。
from multiprocessing import Process, Lock, Manager
# Define dictionary that will be used in this example
input = {
"Node_1": {
"IP": "127.0.0.1",
"clusters": {
"cluster_1": {
"blades": {
"blade_0_1": "127.0.1.1",
"blade_0_2": "127.0.1.2"
}
},
"cluster_2": {
"blades": {
"blade_0_3": "127.0.1.3",
"blade_0_4": "127.0.1.4"
}
}
}
},
"Node_2": {
"IP": "127.0.0.2",
"clusters": {
"cluster_1": {
"blades": {
"blade_0_1": "127.0.1.1",
"blade_0_2": "127.0.1.2"
}
},
"cluster_2": {
"blades": {
"blade_0_3": "127.0.1.3",
"blade_0_4": "127.0.1.4"
}
}
}
}
}
def iterate_over_clusters_in_node(input, node, lock, output):
""" Iterate over the clusters in the node, then over the blades in the cluster. Add each blade to the output dictionary."""
for cluster in input[node]['clusters']:
for blade in input[node]['clusters'][cluster]['blades']:
with lock:
print "node: " + node + ", node_IP: " + input[node]['IP'] + ", cluster: " + cluster + ", Blade: " + blade + ", cluster_IP: " + input[node]['clusters'][cluster]['blades'][blade]
with lock:
add_blade_to_output(input, node, cluster, blade, output)
def add_blade_to_output(input, node, cluster, blade, output):
''' Adds a blade to the managed output dictionary'''
if node not in output:
output[node] = {}
output[node]['IP'] = input[node]['IP']
output[node]['clusters'] = {}
# At this point, I would expect output[node]['IP'] and output[node]['clusters'] to exist
# But the following print raises KeyError: 'IP'
print output[node]['IP']
if cluster not in output[node]['clusters']:
# Raises KeyError: 'clusters'
output[node]['clusters'][cluster] = {}
output[node]['clusters'][cluster]['blades'] = {}
output[node]['clusters'][cluster]['blades'][blade] = input[node]['clusters'][cluster]['blades'][blade]
if __name__ == "__main__":
# Create lock to ensure correct handling of output from multiple processes
lock = Lock()
# Create dictionary to hold any failed blades so that appropriate measures can be taken
# Must use a Manager so that the dictionary can be shared among processes
manager = Manager()
output = manager.dict()
# Create array to track our processes
procs = []
# Iterate over all nodes in input
for node in input:
p = Process(target = iterate_over_clusters_in_node, args = (input, node, lock, output))
p.start()
procs.append(p)
# Join processes and wait for them to stop
for p in procs:
p.join()
print "The final output is:"
print output
# Expectation: should print the same dictionary as the input
# Actual: prints "{'Node_2': {}, 'Node_1': {}}"
我是否需要将manager.dict()添加到输出[node]而不是内置字典类型?或者我是不是错了?
谢谢!
编辑:我不反对将其转换为"线程"实现而不是"多处理。"我是并行运行的新手,所以如果线程更适合这种类型的内存共享,请告诉我。
编辑:这是工作代码:
from multiprocessing import Process, Lock, Manager
# Define dictionary that will be used in this example
input = {
"Node_1": {
"IP": "127.0.0.1",
"clusters": {
"cluster_1": {
"blades": {
"blade_0_1": "127.0.1.1",
"blade_0_2": "127.0.1.2"
}
},
"cluster_2": {
"blades": {
"blade_0_3": "127.0.1.3",
"blade_0_4": "127.0.1.4"
}
}
}
},
"Node_2": {
"IP": "127.0.0.2",
"clusters": {
"cluster_1": {
"blades": {
"blade_0_1": "127.0.1.1",
"blade_0_2": "127.0.1.2"
}
},
"cluster_2": {
"blades": {
"blade_0_3": "127.0.1.3",
"blade_0_4": "127.0.1.4"
}
}
}
}
}
# Create dictionary to hold any failed blades so that appropriate measures can be taken
# Must use a Manager so that the dictionary can be shared among processes
manager = Manager()
output = manager.dict()
def iterate_over_clusters_in_node(input, node, lock):
""" Iterate over the clusters in the node, then over the blades in the cluster. Add each blade to the output dictionary."""
for cluster in input[node]['clusters']:
for blade in input[node]['clusters'][cluster]['blades']:
with lock:
add_blade_to_output(input, node, cluster, blade)
def add_blade_to_output(input, node, cluster, blade):
''' Adds a blade to the managed output dictionary'''
if node not in output:
new_node = {}
new_node[node] = {'IP': input[node]['IP'], 'clusters': {}}
output.update(new_node)
new_node = {}
new_node.update(output)
if cluster not in output[node]['clusters']:
new_node[node]['clusters'][cluster] = {}
new_node[node]['clusters'][cluster]['blades'] = {blade: input[node]['clusters'][cluster]['blades'][blade]}
else:
new_node[node]['clusters'][cluster]['blades'][blade] = input[node]['clusters'][cluster]['blades'][blade]
output.update(new_node)
if __name__ == "__main__":
# Create lock to ensure correct handling of output from multiple processes
lock = Lock()
# Create array to track our processes
procs = []
# Iterate over all nodes in input
for node in input:
p = Process(target = iterate_over_clusters_in_node, args = (input, node, lock))
p.start()
procs.append(p)
# Join processes and wait for them to stop
for p in procs:
p.join()
print "The final output is:"
print output
答案 0 :(得分:1)
根据python文档,
对dict和列表代理中的可变值或项的修改将会 不能通过管理器传播,因为代理无法通过 知道何时修改其值或项目。要修改这样的项目, 您可以将修改后的对象重新分配给容器代理。
有了这些信息,我们可以相应地更新经理:
#output[node] = {}
#output[node]['IP'] = input[node]['IP']
#output[node]['clusters'] = {} These changes are not propagated through the manager
new_node = {}
new_node[node] = {'IP': input[node]['IP'], 'clusters': {}}
output.update(new_node)
#if cluster not in output[node]['clusters']:
# Raises KeyError: 'clusters'
#output[node]['clusters'][cluster] = {}
#output[node]['clusters'][cluster]['blades'] = {}
#output[node]['clusters'][cluster]['blades'][blade] = input[node]['clusters'][cluster]['blades'][blade]
node_copy = output.copy()
if cluster not in node_copy[node]['clusters']:
node_copy[node]['clusters'].setdefault(cluster, {'blades': {}})
node_copy[node]['clusters'][cluster]['blades'][blade] = input[node]['clusters'][cluster]['blades'][blade]
output.update(node_copy)