我正在使用PGI C编译器--pgcc v16.10.0 64位 - 了解如何使用OpenACC进行编程
这是我模拟粒子传输过程的代码
typedef struct {
double position;
double direction;
double weight;
int cell;
int group;
int alive;
} Particle;
int size = 100000; // number of particles to be simulated
int tot = (int) (1.3 * size); // this variable limits the maximum of next generation particles
int capacity = 0; // this variable indicates the actual number of next generation particles
/* particles to be simulated */
Particle *par = (Particle *) malloc(size * sizeof(Particle));
/* next generation particles produced */
particle *next = (Particle *) malloc(tot * sizeof(Particle));
/* initialization */
for (int i = 0; i < size; i++){
par[i].position = rand1() * 100.0; // random number between 0.0~1.0
par[i].direction = rand2(); // random number between -1.0~1.0
par[i].weight = 1.0;
par[i].cell = 2;
par[i].group = rand1() > 0.5 ? 1 : 2;
par[i].alive = 1;
}
/* some parameters used in simulation */
double keff = 1.0;
double tracklength, collision, absorption;
/* start simulating */
int generation;
for (generation = 1; generation <= 100; generation++){
int CellID, MatID, GroupID;
int k; // k-th particle to be simulated
#pragma acc parallel copy(capacity) copyin(par[0:size],size, keff) copyout(next[0:tot])
#pragma acc loop reduction(+:tracklength, collision, absorption)
for (k = 0; k < size; k++){
/* do some calculating with par[k] */
/* secondary particle produced under certain circumstances */
if (condition){
next[capacity].position = par[k].position;
next[capacity].direction = rand2();
next[capacity].weight = 1.0;
next[capacity].cell = par[k].cell;
next[capacity].group = rand1() < 0.9 ? 1 : 2;
next[capacity].alive = 1;
capacity++;
}
}
/* after simulation of current generation, update the parameters */
keff = ........ // one formula to update keff
size = capacity;
capacity = 0;
tot = (int) (1.3 * size);
free(par);
par = next;
next = (Particle *) malloc(tot * sizeof(Particle));
}
free(par);
free(next);
我用
编译了代码 pgcc -acc -Minfo=accel -ta=tesla:cc30,time -O0 main.c -o test
并获取以下信息:
Loop carried dependence of par->alive, par->cell, par->direction, par->position, par->weight, par->group prevents parallelization
Loop carried dependence of par->direction, par->group, par->position prevents vectorization
Loop carried reuse of next->position prevents parallelization
然后运行可执行文件./test
发生错误
call to cuMemFreeHost returned error 700: Illegal address during kernel execution
我不知道如何解决这个问题[SAD]。
BTW代码运行良好,并在gcc忽略#pragma
编译时返回正确的结果