请帮帮我。我有以下代码
...
#include <accelmath.h>
#include <openacc.h>
const long int G=100000;
const unsigned int GL=100000;
const long int K=G;
const int LE=1.0f;
struct Particle
{
float x;
float rs;
};
Particle particles[GL];
int sort[GL];
int ind01[GL];
long int MAX_ELEMENT=1;
int POSITION1;
int POSITION0;
int LIFE=0;
bool start=true;
int mini;
int count0;
int count1;
int GL1;
int js;
#pragma acc declare device_resident(ind01,POSITION0,POSITION1,mini,GL1,js,MAX_ELEMENT,count0,count1,K)
#pragma acc declare create(LIFE,particles,sort)
void function_device()
{
#pragma acc update host(LIFE,particles) async
std::cout<<"LIFE before="<<LIFE<<std::endl;
#pragma acc update device(LIFE,particles) async
#pragma acc parallel num_gangs(1) vector_length(1) present(particles) async
{
count0=0;
count1=0;
if(LIFE<K)
{
particles[LIFE].x=5.0;
particles[LIFE].rs=MAX_ELEMENT;
++MAX_ELEMENT;
++LIFE;
}
}
#pragma acc loop vector reduction(+:count0,count1)
for(int i=0; i<LIFE; ++i)
{
if(particles[i].x>=LE)
{
sort[i]=1;
count1=count1+1;
}
else
{
sort[i]=0;
count0=count0+1;
}
}
#pragma acc parallel num_gangs(1) vector_length(1)
{
GL1=LIFE-1;
count0=GL1;
count1=0;
}
#pragma acc loop seq
for(int i=0; i<LIFE; ++i)
{
if(sort[i]==1)
{
ind01[count1++]=i;
}
else
{
ind01[count0--]=i;
}
}
#pragma acc parallel num_gangs(1) vector_length(1)
{
mini=GL1-count0;
if(count1<mini) mini=count1;
js=0;
}
...
#pragma acc update host(LIFE) async
}
int main(int argc, char **argv)
{
acc_init(acc_device_nvidia);
int step=1;
while(start==true || LIFE>0)
{
std::cout<<" LIFE="<<LIFE<<std::endl;
start=false;
function_device();
std::cout<<"MAIN LOOP # "<<step<<std::endl;
++step;
}
}
在我填写了在gpu上分配的数组之后:
particles[LIFE].x=5.0;
但是打印的输出:
#pragma acc update host(LIFE,particles) async
std::cout<<"LIFE after injector="<<LIFE<<std::endl;
for(int i=0; i<LIFE; ++i) std::cout<<" particles: "<<particles[i]<<std::endl;
和
#pragma acc update host(LIFE,sort) async
std::cout<<"LIFE after 1="<<LIFE<<" c0="<<count0<<" c1="<<count1<<std::endl;
for(int i=0; i<LIFE; ++i) std::cout<<"sort: "<<sort[i]<<std::endl;
是LIFE = 1,数组只包含0,好像我没有用5.0f填充数组粒子。如何填充阵列&#34;粒子&#34;?我是否在gpu上正确启动了串行代码:
#pragma acc parallel num_gangs(1) vector_length(1)
{
mini=GL1-count0;
if(count1<mini) mini=count1;
js=0;
}
如果openacc指令的使用有错误,请显示它们。为什么
#pragma acc update host(LIFE) async
如果没有异步,指令不起作用?
答案 0 :(得分:0)
我更新了您的代码以使其正常运行。你有一些孤立的循环需要&#34; parallel&#34;在他们加上因为几个变量是全局变量,你需要使用原子。在使用&#34; declare create&#34;中定义的变量时,我确实遇到了编译器问题。在原子中需要将一些变量移动到数据区域。我会向我们的工程师报告。
请注意,当LIFE达到G时,我也处于终止状态,否则它似乎进入无限循环。
#include <iostream>
#include <accelmath.h>
#include <openacc.h>
const long int G=100000;
const unsigned int GL=100000;
const long int K=G;
const int LE=1.0f;
struct Particle
{
float x;
float rs;
};
Particle particles[GL];
int sort[GL];
int ind01[GL];
int MAX_ELEMENT;
int POSITION1;
int POSITION0;
int LIFE;
bool start=true;
int mini;
int GL1;
int js;
int count0;
int count1;
#pragma acc declare device_resident(ind01,POSITION0,POSITION1,mini,GL1,js)
#pragma acc declare create(particles,sort)
void function_device()
{
#pragma acc parallel num_gangs(1) vector_length(1) present(particles)
{
#pragma acc atomic write
count0=0;
#pragma acc atomic write
count1=0;
if(LIFE<K)
{
particles[LIFE].x=5.0;
particles[LIFE].rs=MAX_ELEMENT;
#pragma acc atomic update
++MAX_ELEMENT;
#pragma acc atomic update
++LIFE;
}
}
#pragma acc parallel loop
for(int i=0; i<LIFE; ++i)
{
if(particles[i].x>=LE)
{
sort[i]=1;
#pragma acc atomic update
count1=count1+1;
}
else
{
sort[i]=0;
#pragma acc atomic update
count0=count0+1;
}
}
#pragma acc parallel num_gangs(1) vector_length(1)
{
GL1=LIFE-1;
count0=GL1;
count1=0;
}
#pragma acc parallel loop
for(int i=0; i<LIFE; ++i)
{
int cnt;
if(sort[i]==1)
{
#pragma acc atomic capture
{
cnt = count1++;
}
ind01[cnt]=i;
}
else
{
#pragma acc atomic capture
{
cnt = count0--;
}
ind01[cnt]=i;
}
}
#pragma acc parallel num_gangs(1) vector_length(1)
{
mini=GL1-count0;
if(count1<mini) mini=count1;
js=0;
}
}
int main(int argc, char **argv)
{
acc_init(acc_device_nvidia);
int step=1;
LIFE=0;
MAX_ELEMENT=1;
#pragma acc data copyin(LIFE,MAX_ELEMENT,count0,count1)
{
while(start==true || (LIFE>0 && LIFE < G ))
{
std::cout<<" LIFE="<<LIFE<<std::endl;
start=false;
function_device();
#pragma acc update self(LIFE)
std::cout<<"MAIN LOOP # "<<step<<std::endl;
++step;
}
}
}