CUDA:cudaDeviceSynchronize返回错误代码4

时间:2019-06-23 01:31:31

标签: cuda

我目前正在使用cuda实现光线跟踪。 但是我在内核方面遇到了一些麻烦。 我不知道到底是什么问题。

这是我的代码。

void cMain(Sphere *spheres, Vec3d *hostColorBuffer, int numSpheres, int _width, int _height)
{
    cudaError_t cudaStatus;

    // sphere
    int sphereSize = numSpheres * sizeof(Sphere);
    cudaStatus = (cudaMalloc((void**)&deviceSphereBuffer, sphereSize));
    if (cudaStatus != cudaSuccess)
    {   
        fprintf(stderr, "cudaMalloc(): %s\n", cudaGetErrorString(cudaStatus));
    }
    cudaStatus = (cudaMemcpy(deviceSphereBuffer, spheres, sphereSize, cudaMemcpyHostToDevice));
    if (cudaStatus != cudaSuccess) 
    { 
        fprintf(stderr, "cudaMemcpy(cudaMemcpyHostToDevice): %s\n", cudaGetErrorString(cudaStatus));
    }

    // color
    const int colorSize = _width * _height * sizeof(Vec3d);
    cudaStatus = (cudaMalloc((void**)&deviceColorBuffer, colorSize));
    if (cudaStatus != cudaSuccess) 
    { 
        fprintf(stderr, "cudaMalloc(): %s\n", cudaGetErrorString(cudaStatus));
    }
    cudaStatus = cudaMemcpy(deviceColorBuffer, hostColorBuffer, colorSize, cudaMemcpyHostToDevice);
    if (cudaStatus != cudaSuccess) 
    { 
        fprintf(stderr, "cudaMemcpy(cudaMemcpyHostToDevice): %s\n", cudaGetErrorString(cudaStatus)); 
    }

    double invWidth = 1 / double(_width);
    double invHeight = 1 / double(_height);
    double fov = 30, aspectRatio = _width / double(_height);
    double angle = tan(M_PI * 0.5 * fov / 180);

    // one block per grid
    dim3 blockPerGrid(_width / BLK_WIDTH, _height / BLK_HEIGHT);
    // one thread per pixel
    dim3 threadsPerBlock(BLK_WIDTH, BLK_HEIGHT);

    render<<<blockPerGrid, threadsPerBlock >>> (deviceSphereBuffer, numSpheres, _width, _height, invWidth, invHeight, angle, aspectRatio, deviceColorBuffer);
    cudaGetErrorString(cudaGetLastError());
    printf("Sync: %s\n", cudaGetErrorString(cudaGetLastError()));

    cudaStatus = cudaDeviceSynchronize();
    //printf("cudaDeviceSynchronize: %s\n", cudaGetErrorString(cudaStatus));
    if (cudaStatus != cudaSuccess)
    {
        fprintf(stderr, "cudaDeviceSynchronize: %s\n", cudaGetErrorString(cudaStatus));
    }

    cudaStatus = cudaMemcpy(hostColorBuffer, deviceColorBuffer, colorSize, cudaMemcpyDeviceToHost);
    if (cudaStatus != cudaSuccess)
    {
        fprintf(stderr, "cudaMemcpy(cudaMemcpyDeviceToHost): %s\n", cudaGetErrorString(cudaStatus));
    }
}

BLK_WIDTHBLK_HEIGHT32。 参数_width_height320

这样的错误打印。
cudaDeviceSynchronize: unspecified launch failure
cudaMemcpy(cudaMemcpyDeviceToHost): unspecified launch failure

我想知道这段代码出了什么问题。

0 个答案:

没有答案