我正在尝试迭代PyCUDA中的2D数组,但我最终得到了重复的数组值。我最初抛出一个小的随机整数数组,并且按预期工作,但是当我向它投射图像时,我会一遍又一遍地看到相同的值。
这是我的代码
img = np.random.randint(20, size = (4,5))
print "Input array"
print img
img_size=img.shape
print img_size
#nbtes determines the number of bytes for the numpy array a
img_gpu = cuda.mem_alloc(img.nbytes)
#Copies the memory from CPU to GPU
cuda.memcpy_htod(img_gpu, img)
mod = SourceModule("""
#include <stdio.h>
__global__ void AHE(int *a, int row, int col)
{
int i = threadIdx.x+ blockIdx.x* blockDim.x;
int j = threadIdx.y+ blockIdx.y* blockDim.y;
if(i==0 && j ==0)
printf("Output array ");
if(i <row && j < col)
{
printf(" %d",a[j + i*col]);
}
}
""")
col = np.int32(img.shape[-1])
row = np.int32(img.shape[0])
func = mod.get_function("AHE")
func(img_gpu, row, col, block=(32,32,1))
img_ahe = np.empty_like(img)
cuda.memcpy_dtoh(img_ahe, img_gpu)
现在当我用一个转换为numpy数组的图像替换随机整数数组时,我最终得到了这个
img = cv2.imread('Chest.jpg',0)
img_size=img.shape
print img_size
#nbtes determines the number of bytes for the numpy array a
img_gpu = cuda.mem_alloc(img.nbytes)
#Copies the memory from CPU to GPU
cuda.memcpy_htod(img_gpu, img)
mod = SourceModule("""
#include <stdio.h>
__global__ void AHE(int *a, int row, int col)
{
int i = threadIdx.x+ blockIdx.x* blockDim.x;
int j = threadIdx.y+ blockIdx.y* blockDim.y;
if(i==0 && j ==0)
printf("Output array ");
if(i <row && j < col)
{
printf(" %d",a[j + i*col]);
}
}
""")
#Gives you the number of columns
col = np.int32(img.shape[-1])
row = np.int32(img.shape[0])
func = mod.get_function("AHE")
func(img_gpu, row, col, block=(32,32,1))
img_ahe = np.empty_like(img)
cuda.memcpy_dtoh(img_ahe, img_gpu)