我想建立一个cuda程序进行射线追踪,我有以下代码:
void build_world(World *w, RGBAColor* buffer){
w->vp = (ViewPlane*) malloc(sizeof(ViewPlane));
w->vp->hres = 512;
w->vp->vres = 512;
w->vp->buffer = buffer;
w->vp->s = 1;
ViewPlane *viewplane;
cudaMalloc(&viewplane,sizeof(ViewPlane)); //return cudaSuccess but pointer still NULL
cudaMemcpy(viewplane,w->vp,sizeof(ViewPlane),cudaMemcpyHostToDevice);
free(w->vp);
w->vp = viewplane;
cudaMalloc(&(w->background_color),sizeof(RGBAColor)); //return cudaSuccess but pointer still NULL
*(w->background_color) = black; //Memory access error
cudaMalloc(&(w->sphere),sizeof(Sphere)); //return cudaSuccess but pointer still NULL
w->sphere->center = Point3D(0.0,0.0,0.0);
w->sphere->radius = 300;
}
World * w是一个静态全局指针,它位于全局内存中。 我的问题是我无法在设备内存中分配内存,所有“cudaMalloc”调用在大多数情况下都不起作用。
我做@RobertCrovella在评论中建议的内容,如下:
void build_world(World *w, RGBAColor* buffer){
checkCudaErrors( cudaMalloc(&(w->vp),sizeof(ViewPlane)));
getLastCudaError("viewplane allocate failed");
w->vp->hres = 512; //memory access errors occurs here
w->vp->vres = 512;
w->vp->buffer = buffer;
w->vp->s = 1;
checkCudaErrors( cudaMalloc(&(w->background_color),sizeof(RGBAColor)));
getLastCudaError("background allocate failed");
*(w->background_color) = black;
checkCudaErrors( cudaMalloc(&(w->sphere),sizeof(Sphere)));
getLastCudaError("sphere allocate failed");
w->sphere->center = Point3D(0.0,0.0,0.0);
w->sphere->radius = 300;
}
并且它可以工作一次......当cudaMalloc API没有时,它仍会返回“cudaSuccess”。
这是结构的定义:
typedef float3 Point3D;
typedef uchar4 RGBAColor;
struct Sphere{
Point3D center;
float radius;
};
struct ViewPlane{
public:
int hres;
int vres;
float s;
//float gamma;
//float inv_gamma;
RGBAColor *buffer;
};
struct World{
public:
ViewPlane *vp;
RGBAColor *background_color;
Sphere *sphere;
};
在考虑了@RobertCrovella在下面的答案中提到的问题之后,这是build_world的第三个版本:
struct World{
public:
ViewPlane *vp;
RGBAColor background_color;
Sphere *sphere;
};
void build_world(World *w, RGBAColor* buffer){
World *h_world;
h_world = (World*)malloc(sizeof(World));
ViewPlane *h_vp = (ViewPlane*)malloc(sizeof(ViewPlane));
h_vp->hres = 512;
h_vp->vres = 512;
h_vp->buffer = buffer;
h_vp->s = 1;
checkCudaErrors( cudaMalloc(&(h_world->vp),sizeof(ViewPlane)));
getLastCudaError("viewplane allocate failed");
checkCudaErrors( cudaMemcpy(h_world->vp,h_vp,sizeof(ViewPlane),cudaMemcpyHostToDevice));
getLastCudaError("viewplane memory copy failed");
h_world->background_color = black;
Sphere *h_sphere = (Sphere*)malloc(sizeof(Sphere));
h_sphere->center = Point3D(0.0,0.0,0.0);
h_sphere->radius = 300;
checkCudaErrors( cudaMalloc(&(h_world->sphere),sizeof(Sphere)));
getLastCudaError("sphere allocate failed");
checkCudaErrors( cudaMemcpy(h_world->sphere,h_sphere,sizeof(Sphere),cudaMemcpyHostToDevice));
getLastCudaError("sphere memory copy failed");
checkCudaErrors( cudaMalloc( &w , sizeof(World)));
getLastCudaError( "world allocate failed" );
checkCudaErrors( cudaMemcpy(w,h_world,sizeof(World),cudaMemcpyHostToDevice));
getLastCudaError("world memory copy failed");
free(h_world);free(h_vp);free(h_sphere);
}
这一次,所有cudaMemcpy
次调用都不起作用:当运行到此函数的末尾时,h_vp
和h_sphere
的值很好; h_world->vp
和h_world->sphere
确实指向设备momery的某个区域但包含错误的值; w
没有正确的值,它包含的所有指针都是0x00000000 ......
答案 0 :(得分:0)
这个问题正式变得“一团糟”,因为你发布了两个截然不同的build_world
版本,除了我要求你添加的错误检查之外,它们在重要方面有所不同。我会尝试解决一些问题,但是我的理解因你的帖子中的混乱而蒙上阴影。
*w
的指针build_world
已经是设备指针(即分配了cudaMalloc
),这似乎就是您所说的,那么这一切都不会工作。在设备上创建数据结构(也包含指向设备上其他数据结构的指针)是一个有点不直观的过程。您无法传递指向已存在于设备上的cudaMalloc
的指针(即已经是使用cudaMalloc
创建的区域的一部分。而是需要创建一组并行指针在主机上,单独使用cudaMalloc
这些指针,然后使用cudaMemcpy将指针值复制到设备数据结构中的相应区域。要查看我所指的另一个示例,请查看here 您无法在主机代码中取消引用设备指针。例如:
w->vp->hres = 512;
如果w
或w->vp
是使用cudaMalloc
设置的指针,则上述操作无效。相反,有必要在主机上创建并行数据结构,在那里设置值,然后从主机到设备cudaMemcpy
:
h_vp->hres = 512;
cudaMemcpy(d_vp, h_vp, sizeof(vp_struct), cudaMemcpyHostToDevice);
请注意,在这个简化的描述中,我正在掩饰我在上面第一点提到的问题。
如果您反复调用build_world
,则需要确保在传递相同的cudaFree
指针时正确使用*w
。
编辑:为响应第3版build_world
的额外发布,我选择创建一个示例代码,其中应修复其余问题:
#include <stdio.h>
#include <vector_functions.h>
#define black make_uchar4(4,3,2,1)
#define white make_uchar4(0,1,2,3)
#define cudaCheckErrors(msg) \
do { \
cudaError_t __err = cudaGetLastError(); \
if (__err != cudaSuccess) { \
fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
msg, cudaGetErrorString(__err), \
__FILE__, __LINE__); \
fprintf(stderr, "*** FAILED - ABORTING\n"); \
exit(1); \
} \
} while (0)
typedef float3 Point3D;
typedef uchar4 RGBAColor;
struct Sphere{
Point3D center;
float radius;
};
struct ViewPlane{
public:
int hres;
int vres;
float s;
//float gamma;
//float inv_gamma;
RGBAColor *buffer;
};
struct World{
public:
ViewPlane *vp;
RGBAColor background_color;
Sphere *sphere;
};
__global__ void my_kernel(World *w){
printf("w->vp->hres = %d\n", w->vp->hres);
printf("w->background_color.y = %d\n", w->background_color.y);
printf("w->sphere->radius = %f\n", w->sphere->radius);
printf("w->vp->buffer->y = %d\n", w->vp->buffer->y);
}
void build_world(World **w, RGBAColor* buffer){
World *h_world;
h_world = (World*)malloc(sizeof(World));
ViewPlane *h_vp = (ViewPlane*)malloc(sizeof(ViewPlane));
h_vp->hres = 512;
h_vp->vres = 512;
h_vp->s = 1;
cudaMalloc((void **)&(h_vp->buffer), sizeof(RGBAColor));
cudaCheckErrors("viewplane RGBAColor allocate failed");
cudaMemcpy(h_vp->buffer, buffer, sizeof(RGBAColor), cudaMemcpyHostToDevice);
cudaCheckErrors("viewplane RGBAColor copy failed");
cudaMalloc((void **)&(h_world->vp),sizeof(ViewPlane));
cudaCheckErrors("viewplane allocate failed");
cudaMemcpy(h_world->vp,h_vp,sizeof(ViewPlane),cudaMemcpyHostToDevice);
cudaCheckErrors("viewplane memory copy failed");
h_world->background_color = black;
Sphere *h_sphere = (Sphere*)malloc(sizeof(Sphere));
h_sphere->center = (Point3D) make_float3(0.0,0.0,0.0);
h_sphere->radius = 300;
cudaMalloc((void **)&(h_world->sphere),sizeof(Sphere));
cudaCheckErrors("sphere allocate failed");
cudaMemcpy(h_world->sphere,h_sphere,sizeof(Sphere),cudaMemcpyHostToDevice);
cudaCheckErrors("sphere memory copy failed");
cudaMalloc((void **)w , sizeof(World));
cudaCheckErrors( "world allocate failed" );
cudaMemcpy(*w,h_world,sizeof(World),cudaMemcpyHostToDevice);
cudaCheckErrors("world memory copy failed");
free(h_world);free(h_vp);free(h_sphere);
}
int main(){
World *d_w;
RGBAColor my_buffer = white;
build_world(&d_w, &my_buffer);
my_kernel<<<1,1>>>(d_w);
cudaDeviceSynchronize();
cudaCheckErrors("kernel fail");
return 0;
}
您可以使用nvcc -arch=sm_20 -o t98 t98.cu
当我编译并运行此代码时,我没有得到错误和以下输出:
$ ./t98
w->vp->hres = 512
w->background_color.y = 3
w->sphere->radius = 300.000000
w->vp->buffer->y = 1
$