我正在使用openCL实现光线跟踪器。我安装了NVidia的CUDA sdk,一切似乎都设置得很好,我的平台都被检测到了(Intel和Nvidia),每个人都看到了它的设备(intel有HD Graphics 4000,Nvidia有我的GPU:GeForce GT 630M)。 / p>
我的问题是我可以使用英特尔平台运行我的应用程序,但不能使用Nvidia的平台。我不相信问题出现在我的代码中,但这是我的设备代码:
#include "constants.h" //only a couple of #define
typedef struct Sphere {
float x, y, z;
float radius;
float r, g, b;
}Sphere;
float hit(Sphere s, float ox, float oy, float *n) {
float radius = s.radius;
float dx = ox - s.x;
float dy = oy - s.y;
if (dx*dx + dy*dy < radius*radius) {
float dz = sqrt(radius*radius - dx*dx - dy*dy);
*n = dz / sqrt(radius * radius);
return dz + s.z;
}
return -INF;
}
__kernel void rayTracer(__global Sphere* spheres, write_only image2d_t res) {
// Get the index of the current element to be processed
int x = get_global_id(0);
int y = get_global_id(1);
int ox = x - WIDTH / 2;
int oy = y - HEIGHT / 2;
float r = 0, g = 0, b = 0;
float maxz = (float) -INF;
for (int i = 0; i<NUM_SPHERES; i++)
{
float n;
float t = hit(spheres[i], ox, oy, &n);
if (t > maxz)
{
float fscale = 1;
r = spheres[i].r * fscale;
g = spheres[i].g * fscale;
b = spheres[i].b * fscale;
}
}
write_imagei(res, (int2)(x, y), (int4)(r, g, b, 0));
}
我的主机应用程序也很简单。我只是初始化openCL结构,设置数据然后再读回来。
同样,当使用英特尔平台时,我的应用程序运行正常,我可以看到光线追踪图像。使用Nvidia时,尽管API错误代码始终为0,但不会显示任何结果。
有人有任何想法可能是什么问题吗?
提前致谢
--- --- EDIT
以下是一些主机代码
设置OpenCL结构:
//Setup OpenCL
cl_platform_id platform = getPlatforms();
cl_device_id device = getDevices(platform, CL_DEVICE_TYPE_GPU);
cl_context_properties ctxProps[] =
{
CL_CONTEXT_PLATFORM, (cl_context_properties)platform,
0, 0
};
cl_context ctx = clCreateContext(ctxProps, 1, &device, NULL, NULL, &err);
cl_command_queue queue1 = clCreateCommandQueue(ctx, device, NULL, &err);
GetPlatforms和GetDevices是要求用户选择平台和设备的功能
创建程序并构建它:
cl_program prog = clCreateProgramWithSource(ctx, 1, srcs, &srcSize, &err);
err = clBuildProgram(prog, 1, &device, NULL, NULL, NULL);
if (err < 0)
{
//PRINT BUILD ERROR
size_t log_size;
clGetProgramBuildInfo(prog, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
char* log = (char*)calloc(log_size + 1, sizeof(char));
clGetProgramBuildInfo(prog, device, CL_PROGRAM_BUILD_LOG, log_size + 1, log, NULL);
printf("%s/n", log);
free(log);
std::cin >> err;
return 1;
}
cl_kernel krn = clCreateKernel(prog, "rayTracer", &err);
//....CREATE SOME SPHERES...
//Setup device data
cl_image_format fmt;
fmt.image_channel_order = CL_RGBA;
fmt.image_channel_data_type = CL_UNSIGNED_INT8;
cl_mem spheresBuff = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, spheres.size() * sizeof(Sphere), spheres.data(), &err);
cl_mem resBuff = clCreateImage2D(ctx, CL_MEM_WRITE_ONLY, &fmt, WIDTH, HEIGHT, 0, NULL, &err);
//Setup kernel arguments
err = clSetKernelArg(krn, 0, sizeof(cl_mem), (void*)&spheresBuff);
err = clSetKernelArg(krn, 1, sizeof(cl_mem), (void*)&resBuff);
//Run kernel
size_t gSize[] = { WIDTH, HEIGHT };
err = clEnqueueNDRangeKernel(queue1, krn, 2, NULL, gSize, NULL, 0, NULL, NULL);
//Read result
Image img = createRGBAImage(WIDTH, HEIGHT);
size_t origin[] = { 0, 0, 0 };
size_t region[] = { WIDTH , HEIGHT , 1 };
err = clEnqueueReadImage(queue1, resBuff, CL_TRUE, origin, region, 0, 0, img.pixel.data(), 0, NULL, NULL);
答案 0 :(得分:0)
在内核执行之前尝试将clEnqueueMapBuffer
与CL_MAP_READ一起使用,在clEnqueueUnmapMemObject
的内核执行之后尝试使用spheresBuff
。