我目前正在使用cuda
实现光线跟踪。
但是我在内核方面遇到了一些麻烦。
我不知道到底是什么问题。
这是我的代码。
void cMain(Sphere *spheres, Vec3d *hostColorBuffer, int numSpheres, int _width, int _height)
{
cudaError_t cudaStatus;
// sphere
int sphereSize = numSpheres * sizeof(Sphere);
cudaStatus = (cudaMalloc((void**)&deviceSphereBuffer, sphereSize));
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaMalloc(): %s\n", cudaGetErrorString(cudaStatus));
}
cudaStatus = (cudaMemcpy(deviceSphereBuffer, spheres, sphereSize, cudaMemcpyHostToDevice));
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaMemcpy(cudaMemcpyHostToDevice): %s\n", cudaGetErrorString(cudaStatus));
}
// color
const int colorSize = _width * _height * sizeof(Vec3d);
cudaStatus = (cudaMalloc((void**)&deviceColorBuffer, colorSize));
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaMalloc(): %s\n", cudaGetErrorString(cudaStatus));
}
cudaStatus = cudaMemcpy(deviceColorBuffer, hostColorBuffer, colorSize, cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaMemcpy(cudaMemcpyHostToDevice): %s\n", cudaGetErrorString(cudaStatus));
}
double invWidth = 1 / double(_width);
double invHeight = 1 / double(_height);
double fov = 30, aspectRatio = _width / double(_height);
double angle = tan(M_PI * 0.5 * fov / 180);
// one block per grid
dim3 blockPerGrid(_width / BLK_WIDTH, _height / BLK_HEIGHT);
// one thread per pixel
dim3 threadsPerBlock(BLK_WIDTH, BLK_HEIGHT);
render<<<blockPerGrid, threadsPerBlock >>> (deviceSphereBuffer, numSpheres, _width, _height, invWidth, invHeight, angle, aspectRatio, deviceColorBuffer);
cudaGetErrorString(cudaGetLastError());
printf("Sync: %s\n", cudaGetErrorString(cudaGetLastError()));
cudaStatus = cudaDeviceSynchronize();
//printf("cudaDeviceSynchronize: %s\n", cudaGetErrorString(cudaStatus));
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaDeviceSynchronize: %s\n", cudaGetErrorString(cudaStatus));
}
cudaStatus = cudaMemcpy(hostColorBuffer, deviceColorBuffer, colorSize, cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaMemcpy(cudaMemcpyDeviceToHost): %s\n", cudaGetErrorString(cudaStatus));
}
}
BLK_WIDTH
和BLK_HEIGHT
是32
。
参数_width
和_height
是320
。
这样的错误打印。
cudaDeviceSynchronize: unspecified launch failure
cudaMemcpy(cudaMemcpyDeviceToHost): unspecified launch failure
我想知道这段代码出了什么问题。