我使用comm.Scatterv
和comm.Gatherv
使用以下MWE在给定数量的核心(size
)上分发4D阵列
import numpy as np
from mpi4py import MPI
import matplotlib.pyplot as plt
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
if rank == 0:
test = np.random.rand(411,48,52,40) #Create array of random numbers
outputData = np.zeros(np.shape(test))
split = np.array_split(test,size,axis = 0) #Split input array by the number of available cores
split_sizes = []
for i in range(0,len(split),1):
split_sizes = np.append(split_sizes, len(split[i]))
displacements = np.insert(np.cumsum(split_sizes),0,0)[0:-1]
plt.imshow(test[0,0,:,:])
plt.show()
else:
#Create variables on other cores
split_sizes = None
displacements = None
split = None
test = None
outputData = None
#Broadcast variables to other cores
test = comm.bcast(test, root = 0)
split = comm.bcast(split, root=0)
split_sizes = comm.bcast(split_sizes, root = 0)
displacements = comm.bcast(displacements, root = 0)
output_chunk = np.zeros(np.shape(split[rank])) #Create array to receive subset of data on each core, where rank specifies the core
print("Rank %d with output_chunk shape %s" %(rank,output_chunk.shape))
comm.Scatterv([test,split_sizes, displacements,MPI.DOUBLE],output_chunk,root=0) #Scatter data from test across cores and receive in output_chunk
output = output_chunk
plt.imshow(output_chunk[0,0,:,:])
plt.show()
print("Output shape %s for rank %d" %(output.shape,rank))
comm.Barrier()
comm.Gatherv(output,[outputData,split_sizes,displacements,MPI.DOUBLE], root=0) #Gather output data together
if rank == 0:
print("Final data shape %s" %(outputData.shape,))
plt.imshow(outputData[0,0,:,:])
plt.show()
这会创建一个随机数的4D数组,原则上应该在重新组合之前将它分配到size
个核心。我期望Scatterv
根据向量split_sizes
和displacements
中的起始整数和位移沿轴0(长度411)划分。但是,在重新组合Gatherv
(mpi4py.MPI.Exception: MPI_ERR_TRUNCATE: message truncated
)时出现错误,每个核心上的output_chunk图表显示大部分输入数据已丢失,因此看起来没有发生拆分第一轴。
我的问题是:为什么不沿第一轴发生分裂,我如何知道分裂发生在哪个轴上,是否可以改变/指定这种轴发生在哪个轴上?
答案 0 :(得分:4)
comm.Scatterv
和comm.Gatherv
对numpy数组维度一无所知。他们只看到sendbuf
作为一块内存。因此,在指定sendcounts
和displacements
时需要考虑到这一点(有关详细信息,请参阅http://materials.jeremybejarano.com/MPIwithPython/collectiveCom.html)。假设数据在内存中以C风格(行主要)布局。
下面给出2D矩阵的示例。此代码的关键部分是正确设置split_sizes_input
/ split_sizes_output
和displacements_input
/ displacements_output
。代码将第二个维度大小考虑在内以指定内存块中的正确分区:
split_sizes_input = split_sizes*512
对于更高的尺寸,此行将更改为:
split_sizes_input = split_sizes*indirect_dimension_sizes
,其中
indirect_dimension_sizes = npts2*npts3*npts4*....*nptsN
同样适用于split_sizes_output
。
代码创建一个2D数组,其中数字1到512在一个维度上递增。从图中可以很容易地看出数据是否已被拆分并正确重新组合。
import numpy as np
from mpi4py import MPI
import matplotlib.pyplot as plt
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
if rank == 0:
test = np.arange(0,512,dtype='float64')
test = np.tile(test,[256,1]) #Create 2D input array. Numbers 1 to 512 increment across dimension 2.
outputData = np.zeros([256,512]) #Create output array of same size
split = np.array_split(test,size,axis = 0) #Split input array by the number of available cores
split_sizes = []
for i in range(0,len(split),1):
split_sizes = np.append(split_sizes, len(split[i]))
split_sizes_input = split_sizes*512
displacements_input = np.insert(np.cumsum(split_sizes_input),0,0)[0:-1]
split_sizes_output = split_sizes*512
displacements_output = np.insert(np.cumsum(split_sizes_output),0,0)[0:-1]
print("Input data split into vectors of sizes %s" %split_sizes_input)
print("Input data split with displacements of %s" %displacements_input)
plt.imshow(test)
plt.colorbar()
plt.title('Input data')
plt.show()
else:
#Create variables on other cores
split_sizes_input = None
displacements_input = None
split_sizes_output = None
displacements_output = None
split = None
test = None
outputData = None
split = comm.bcast(split, root=0) #Broadcast split array to other cores
split_sizes = comm.bcast(split_sizes_input, root = 0)
displacements = comm.bcast(displacements_input, root = 0)
split_sizes_output = comm.bcast(split_sizes_output, root = 0)
displacements_output = comm.bcast(displacements_output, root = 0)
output_chunk = np.zeros(np.shape(split[rank])) #Create array to receive subset of data on each core, where rank specifies the core
print("Rank %d with output_chunk shape %s" %(rank,output_chunk.shape))
comm.Scatterv([test,split_sizes_input, displacements_input,MPI.DOUBLE],output_chunk,root=0)
output = np.zeros([len(output_chunk),512]) #Create output array on each core
for i in range(0,np.shape(output_chunk)[0],1):
output[i,0:512] = output_chunk[i]
plt.imshow(output)
plt.title("Output shape %s for rank %d" %(output.shape,rank))
plt.colorbar()
plt.show()
print("Output shape %s for rank %d" %(output.shape,rank))
comm.Barrier()
comm.Gatherv(output,[outputData,split_sizes_output,displacements_output,MPI.DOUBLE], root=0) #Gather output data together
if rank == 0:
outputData = outputData[0:len(test),:]
print("Final data shape %s" %(outputData.shape,))
plt.imshow(outputData)
plt.colorbar()
plt.show()
print(outputData)