当我使用恒定记忆时,我遇到了问题。它会发生错误:
ERROR: an illegal memory access was encountered
似乎内核函数没有执行。 但如果我不选择常量内存,一切都还可以。所以它让我很困惑。我想了很长时间。但我仍然没有理由。你能帮我解决一下这个问题吗?非常感谢你。
如果变量s未使用常量内存,则一切正常。但如果s使用常量内存,程序将会中断。
使用常量内存的变量定义如下:
#ifdef USE_CONST_MEM
__constant__ Sphere s[SPHERES];
#else
Sphere *s;
#endif
内核函数定义如下:
#ifdef USE_CONST_MEM
__global__ void kernel(unsigned char *ptr) {
printf("ok2");
#else
__global__ void kernel(Sphere *s, unsigned char *ptr) {
#endif
// map from threadIdx/BlockIdx to pixel position
printf("ok2");
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
REAL ox = (x - DIM / 2);
REAL oy = (y - DIM / 2);
REAL r = 0, g = 0, b = 0;
REAL maxz = -INF;
__syncthreads();
for (int i = 0; i<SPHERES; i++) {
REAL n;
REAL t = s[i].hit(ox, oy, &n);
if (t > maxz) {
REAL fscale = n;
r = s[i].r * fscale;
g = s[i].g * fscale;
b = s[i].b * fscale;
maxz = t;
printf("r: %.2f g: %.2f, b %.2f\n", r, g, b);
}
}
__syncthreads();
ptr[offset * 4 + 0] = (int)(r * 255);
ptr[offset * 4 + 1] = (int)(g * 255);
ptr[offset * 4 + 2] = (int)(b * 255);
ptr[offset * 4 + 3] = 255;
}
// globals needed by the update routine
struct DataBlock {
unsigned char *dev_bitmap;
CPUAnimBitmap *bitmap;
};
有调用内核函数的函数。
void generate_frame(DataBlock *d, int ticks) {
//START_GPU
//movin the spheres
kernelMoving << <128, 32 >> >(s, SPHERES);
printf("ok0\n");
// generate a bitmap from our sphere data
dim3 grids(DIM / 16, DIM / 16);
dim3 threads(16, 16);
#ifdef USE_CONST_MEM
Sphere *d_s;
cudaGetSymbolAddress((void **)&d_s, s);
printf("ok0-1\n");
kernel << <grids, threads >> >(s, d->dev_bitmap);
cudaDeviceSynchronize();
cudaError_t error = cudaGetLastError();
if(error!=cudaSuccess)
{
fprintf(stderr,"ERROR: %s\n", cudaGetErrorString(error) );
exit(-1);
}
printf("ok0-1-1\n");
#else
printf("ok0-2\n");
kernel << <grids, threads >> >(s, d->dev_bitmap);
#endif
printf("ok1\n");
//END_GPU
HANDLE_ERROR(cudaMemcpy(d->bitmap->get_ptr(),
d->dev_bitmap,
d->bitmap->image_size(),
cudaMemcpyDeviceToHost));
}
初始化代码如下:
#ifdef USE_CONST_MEM
#else
HANDLE_ERROR(cudaMalloc((void**)&s,
sizeof(Sphere) * SPHERES));
#endif
// allocate temp memory, initialize it, copy to constant
// memory on the GPU, then free our temp memory
Sphere *temp_s = (Sphere*)malloc(sizeof(Sphere) * SPHERES);
for (int i = 0; i<SPHERES; i++) {
temp_s[i].r = rnd(1.0f);
temp_s[i].g = rnd(1.0f);
temp_s[i].b = rnd(1.0f);
temp_s[i].x = rnd(1000.0f) - 500;
temp_s[i].y = rnd(1000.0f) - 500;
temp_s[i].z = rnd(1000.0f) - 500;
temp_s[i].radius = rnd(10.0f) + 5;
temp_s[i].dx = STEP_SIZE * ((rand() / (float)RAND_MAX) * 2 - 1);
temp_s[i].dy = STEP_SIZE * ((rand() / (float)RAND_MAX) * 2 - 1);
temp_s[i].dz = STEP_SIZE * ((rand() / (float)RAND_MAX) * 2 - 1);
}
#ifdef USE_CONST_MEM
HANDLE_ERROR(cudaMemcpyToSymbol(s, temp_s,
sizeof(Sphere) * SPHERES));
#else
HANDLE_ERROR(cudaMemcpy(s, temp_s, sizeof(Sphere)*SPHERES, cudaMemcpyHostToDevice));
#endif
free(temp_s);
cuda的版本是8.0。系统是ubuntu 16.04。
答案 0 :(得分:1)
rand()