大家。希望有人能帮助我。我有一个代码来在openMP上并行Prim的算法,我需要让它在Xeon Phi上运行。请帮我。我真的不明白怎么做。这是我在openMP上的代码。
void ParallelPrim(double *pMatrix, TTreeNode** pMinSpanningTree, int Size)
{
int LastAdded;
TGraphNode NearestNode;
TGraphNode **NotInMinSpanningTree = new TGraphNode* [Size-1];
LastAdded = 0;
for(int i = 0; i < Size-1; i++)
{
NotInMinSpanningTree[i] = new TGraphNode;
NotInMinSpanningTree[i]->NodeNum = i+1;
NotInMinSpanningTree[i]->Distance = -1.0f;
NotInMinSpanningTree[i]->ParentNodeNum = -1;
}
for(int Iter = 1; Iter < Size; Iter++)
{
#pragma omp parallel for
for(int i = 0; i < Size-1; i++)
if(NotInMinSpanningTree[i] != NULL)
{
double t1 = NotInMinSpanningTree[i]->Distance;
double t2 = pMatrix[(NotInMinSpanningTree[i]->NodeNum) * Size + LastAdded];
if(((t1 < 0) && (t2 > 0)) || (t1>0) && (t2 > 0) && (t1 > t2))
{
NotInMinSpanningTree[i]->Distance = t2;
NotInMinSpanningTree[i]->ParentNodeNum = LastAdded;
}
}
NearestNode.NodeNum = -1;
NearestNode.Distance = 3000;
#pragma omp parallel
{
TGraphNode ThreadNearestNode;
ThreadNearestNode.NodeNum = -1;
ThreadNearestNode.Distance = 3000;
#pragma omp for
for(int i = 0; i < Size-1; i++)
{
if(NotInMinSpanningTree[i] != NULL)
{
double t1 = NotInMinSpanningTree[i]->Distance;
double t2 = ThreadNearestNode.Distance;
if((t1 > 0) && (t1 < t2) )
{
ThreadNearestNode.Distance = t1;
ThreadNearestNode.NodeNum = NotInMinSpanningTree[i]->NodeNum;
}
}
}
#pragma omp critical
{
if(ThreadNearestNode.Distance < NearestNode.Distance)
{
NearestNode.Distance = ThreadNearestNode.Distance;
NearestNode.NodeNum = ThreadNearestNode.NodeNum;
}
}
}
pMinSpanningTree[NearestNode.NodeNum] = new TTreeNode;
pMinSpanningTree[NearestNode.NodeNum]->NodeNum = NotInMinSpanningTree[NearestNode.NodeNum-1]->ParentNodeNum;
pMinSpanningTree[NearestNode.NodeNum]->Distance = NearestNode.Distance;
int Parent = NotInMinSpanningTree[NearestNode.NodeNum-1]->ParentNodeNum;
if(pMinSpanningTree[Parent] != NULL)
{
TTreeNode *tmp = new TTreeNode;
tmp->Distance = NearestNode.Distance;
tmp->NodeNum = NearestNode.NodeNum;
}
else
{
pMinSpanningTree[Parent] = new TTreeNode;
pMinSpanningTree[Parent]->Distance = NearestNode.Distance;
pMinSpanningTree[Parent]->NodeNum = NearestNode.NodeNum;
}
LastAdded = NearestNode.NodeNum;
delete NotInMinSpanningTree[NearestNode.NodeNum - 1];
NotInMinSpanningTree[NearestNode.NodeNum - 1] = NULL;
}
delete[] NotInMinSpanningTree;
}
答案 0 :(得分:0)
在Intel Xeon Phi协处理器上运行代码有两个基本选项。您可以使用-mmic和-qopenmp标志编译整个程序,然后使用micnativeloadex或使用scp将可执行文件和所需库复制到协处理器来运行它。或者,您可以省略-mmic,而是修改您的代码,以便您希望在协处理器上运行的代码部分属于卸载部分,其中只有部分代码将被发送到协处理器以运行,其余部分代码将在主机上运行。
Avi发送给您的演示文稿概述了协处理器的编程。此外,您还可以在https://software.intel.com/en-us/articles/programming-and-compiling-for-intel-many-integrated-core-architecture找到有关协处理器编译和优化的基本信息。
然而,这是一个很大的问题,但是你的代码没有矢量化并且具有重要的连续部分。要在协处理器上获得最佳性能,您的代码必须进行矢量化和并行化。