这是比较2d数组元素的cuda代码。但是在编译时会显示错误。我使用的是pycuda,我发布了我想实现的所有代码。
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy
c = numpy.empty((2,2), dtype = object)
d = numpy.empty((2,2), dtype = object)
for i in range(0,2):
for j in range(0,2):
c[i][j]= input("Value for c")
d[i][j]= input("Value for d")
c_gpu = cuda.mem_alloc(c.nbytes)
d_gpu = cuda.mem_alloc(d.nbytes)
cuda.memcpy_htod(c_gpu, c)
cuda.memcpy_htod(d_gpu, d)
module1 = SourceModule ("""
__device__ int mystrcmp(const char *str_a, const char *str_b, unsigned len = 256){
int match = 1;
unsigned i = 0;
while ((i < len) && (match == 1) ){
if (str_a[i] == str_b[i])
i= i+1;
else
match = 0;
}
return match;
}
__device__ void mystrasgn(const char *str_a, const char *str_b, unsigned len = 256){
unsigned i = 0;
while (str_b[i]!= '\0' ){
str_a[i] = str_b[i])
i= i+1;
}
str_a[i]='\0';
}
__global__ void com(char **c, char **d)
{
int idx = threadIdx.x + threadIdx.y*blockDim.x;
if(mystrcmp(c[idx], d[idx]))
mystrasgn(c[idx], c[idx]);
else
mystrasgn(c[idx], d[idx]);
}
""")
__device__ int mystrcmp(const char *str_a, const char *str_b, unsigned len = 256){
int match = 1;
unsigned i = 0;
while ((i < len) && (match == 1) ){
if (str_a[i] == str_b[i])
i= i+1;
else
match = 0;
}
return match;
}
__device__ void mystrasgn(const char *str_a, const char *str_b, unsigned len = 256){
unsigned i = 0;
while (str_b[i]!= '\0' ){
str_a[i] = str_b[i])
i= i+1;
}
str_a[i]='\0';
}
__global__ void com(char **c, char **d)
{
int idx = threadIdx.x + threadIdx.y*blockDim.x;
if(mystrcmp(c[idx], d[idx]))
mystrasgn(c[idx], c[idx]);
else
mystrasgn(c[idx], d[idx]);
}
module1 = SourceModule ("""
__device__ int mystrcmp(const char *str_a, const char *str_b, unsigned len = 256){
int match = 1;
unsigned i = 0;
while ((i < len) && (match == 1) ){
if (str_a[i] == str_b[i])
i= i+1;
else
match = 0;
}
return match;
}
__device__ void mystrasgn(const char *str_a, const char *str_b, unsigned len = 256){
unsigned i = 0;
while (str_b[i]!= '\0' ){
str_a[i] = str_b[i])
i= i+1;
}
str_a[i]='\0';
}
__global__ void com(char **c, char **d)
{
int idx = threadIdx.x + threadIdx.y*blockDim.x;
if(mystrcmp(c[idx], d[idx]))
mystrasgn(c[idx], c[idx]);
else
mystrasgn(c[idx], d[idx]);
}
""")
fy = module1.get_function("com")
fy(c_gpu, d_gpu, block=(2,2,1))
x = numpy.empty_like(c)
cuda.memcpy_dtoh(x, c_gpu)
print(x)
我写了2个函数,一个比较两个2d数组的元素,另一个比较数组的元素。但是在编译时我收到如下错误: