我打算在PyCUDA中编写一个内核来生成2d高斯补丁。但是,我在主机中定义的值在将它们复制到设备后会发生变化。以下是代码。
import numpy as np
import matplotlib.pyplot as plt
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import pycuda.autoinit
# kernel
kernel = SourceModule("""
#include <stdio.h>
__global__ void gaussian2D(float *output, float x, float y, float sigma, int
n_rows, int n_cols)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
int j = threadIdx.y + blockIdx.y * blockDim.y;
printf("%d ", n_cols);
if (i < n_cols && j < n_rows) {
size_t idx = j*n_cols +i;
//printf("%d ", idx);
}
}
""")
# host code
def gpu_gaussian2D(point, sigma, shape):
# Convert parameters into numpy array
x, y = np.array(point, dtype=np.float32)
sigma = np.float32(sigma)
n_rows, n_cols = np.array(shape, dtype=np.int)
print(n_rows)
output = np.empty((1, shape[0]*shape[1]), dtype= np.float32)
# Get kernel function
gaussian2D = kernel.get_function("gaussian2D")
# Define block, grid and compute
blockDim = (32, 32, 1) # 1024 threads in total
dx, mx = divmod(shape[1], blockDim[0])
dy, my = divmod(shape[0], blockDim[1])
gridDim = ((dx + (mx>0)), (dy + (my>0)), 1)
# Kernel function
gaussian2D (
cuda.Out(output), cuda.In(x), cuda.In(y), cuda.In(sigma),
cuda.In(n_rows), cuda.In(n_cols),
block=blockDim, grid=gridDim)
return output
point = (5, 5)
sigma = 3.0
shape = (10, 10)
result = gpu_gaussian2D(point, sigma, shape)
检查n_cols
的打印值后,它不是预期的10。任何人都可以帮助我,我无法弄清楚这里出了什么问题。
答案 0 :(得分:2)
.In()
and .Out()
仅用于将通过内核中的指针参数传递的缓冲区(因此仅适用于output
)。普通的按值传递参数可以直接使用。
$ cat t7.py
import numpy as np
# import matplotlib.pyplot as plt
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import pycuda.autoinit
# kernel
kernel = SourceModule("""
#include <stdio.h>
__global__ void gaussian2D(float *output, float x, float y, float sigma, int
n_rows, int n_cols)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
int j = threadIdx.y + blockIdx.y * blockDim.y;
printf("%d ", n_cols);
if (i < n_cols && j < n_rows) {
size_t idx = j*n_cols +i;
//printf("%d ", idx);
}
}
""")
# host code
def gpu_gaussian2D(point, sigma, shape):
# Convert parameters into numpy array
x, y = np.array(point, dtype=np.float32)
sigma = np.float32(sigma)
n_rows, n_cols = np.array(shape, dtype=np.int)
print(n_rows)
output = np.empty((1, shape[0]*shape[1]), dtype= np.float32)
# Get kernel function
gaussian2D = kernel.get_function("gaussian2D")
# Define block, grid and compute
blockDim = (32, 32, 1) # 1024 threads in total
dx, mx = divmod(shape[1], blockDim[0])
dy, my = divmod(shape[0], blockDim[1])
gridDim = ((dx + (mx>0)), (dy + (my>0)), 1)
# Kernel function
gaussian2D (
cuda.Out(output), x, y, sigma,
n_rows, n_cols,
block=blockDim, grid=gridDim)
return output
point = (5, 5)
sigma = 3.0
shape = (10, 10)
result = gpu_gaussian2D(point, sigma, shape)
$ python t7.py
10
10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10