我有这个结构
struct Data {
int x
int y;
float z;
};
我发送内核没有问题
__global__ void calculate(Data *d_data) {
d_data[myCounter].x = 1;
d_data[myCounter].y = 1;
d_data[myCounter].z = 1.0;
}
#DEFINE MAX_SIZE 100
int main() {
Data * data = (Data *)malloc(MAX_SIZE * sizeof(Data));
Data *d_data;
const int DATA_BYTES = MAX_SIZE * sizeof(Data);
int elements = 20;
cudaError_t cudaStatus;
cudaStatus = cudaMalloc((void **)&d_data, DATA_BYTES);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
}
cudaStatus = cudaMemcpy(d_data, data, DATA_BYTES, cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
}
calculate << < 1, elements >> > (d_data);
cudaMemcpy(data, d_data, DATA_BYTES, cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
}
cudaDeviceSynchronize();
for (i = 0; i < elements; i++) {
printf("%2d %2.1f %s\n", d_data[i].x, d_data[i].y,
d_data[i].z); // this prints nothing
}
cudaStatus = cudaDeviceReset();
}
当我在内核函数中测试struct array时,计算它打印出正确的结果但是当我尝试使用cudaMemcpy程序将数据从设备发送到主机时没有错误并且没有打印任何内容,我怎么能从设备传输这个struct数组?
答案 0 :(得分:1)
您所显示的代码存在一些问题。
myCounter
i
main
提供定义
d_data
而不是主变量data
进行打印。这在CUDA中是非法的。复制到主变量data
后,从那里打印。printf
格式说明符不正确。结构中的数据类型为int
,int
和float
。您使用的%2d %2.1f %s
匹配int
,float
和字符串变量(以空字符结尾的字符数组),但结构不正确。以下代码解决了上述问题,似乎对我来说正确运行:
$ cat t430.cu
#include <stdio.h>
struct Data {
int x; // was missing semicolon
int y;
float z;
};
__global__ void calculate(Data *d_data) {
int myCounter = threadIdx.x; // this line was missing
d_data[myCounter].x = 1;
d_data[myCounter].y = 1;
d_data[myCounter].z = 1.0;
}
#define MAX_SIZE 100
int main() {
Data * data = (Data *)malloc(MAX_SIZE * sizeof(Data));
Data *d_data;
int i; // this line was missing
const int DATA_BYTES = MAX_SIZE * sizeof(Data);
int elements = 20;
cudaError_t cudaStatus;
cudaStatus = cudaMalloc((void **)&d_data, DATA_BYTES);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
}
cudaStatus = cudaMemcpy(d_data, data, DATA_BYTES, cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
}
calculate << < 1, elements >> > (d_data);
cudaMemcpy(data, d_data, DATA_BYTES, cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
}
cudaDeviceSynchronize();
for (i = 0; i < elements; i++) {
printf("%2d %2d %2.1f\n", data[i].x, data[i].y,
data[i].z); // this was trying to print from d_data
}
cudaStatus = cudaDeviceReset();
}
$ nvcc -arch=sm_61 -o t430 t430.cu
$ cuda-memcheck ./t430
========= CUDA-MEMCHECK
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
1 1 1.0
========= ERROR SUMMARY: 0 errors
$
答案 1 :(得分:-1)
您应该将此宏添加到您的代码中
#define CUDA_SAFE_CALL(call)
do {
cudaError_t err = call;
if (cudaSuccess != err) {
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.",
__FILE__, __LINE__, cudaGetErrorString(err) );
exit(EXIT_FAILURE);
}
} while (0)
然后:
CUDA_SAFE_CALL(cudaMemcpy(data, d_data, DATA_BYTES, cudaMemcpyDeviceToHost));
cudaDeviceSynchronize();
顺便说一下,你的myCounter
似乎不对。你能否在上面的代码中提供myCounter值的一些细节?