我正在使用Xeon Phi通过Stampede处理Collatz猜想问题。 我已经测试了我的代码已经过测试,并且对于高达100,000的值可以正常工作,但测试值高达100万,我几乎立即收到了分段错误(“SIGSEV”)。几天我一直在撞墙,但根本无法找出错误。真的很感激任何帮助。
typedef unsigned long long bigInt;
// Number to test to (starting from 1)
#define bigSize 100000
typedef struct {
int numSteps;
bigInt stopPoint;
} batcher;
typedef struct {
bigInt num;
batcher to_batch;
} to_ret;
int main () {
//Stores values as [num][#steps to smaller val][smaller val]
to_ret retlist[bigSize];
//Stores values as [#steps to smaller val][smaller val], sorted by num
batcher results[bigSize];
...
#pragma offload target(mic:0) inout(retlist) shared(retlist)
{
#pragma omp parallel for
for(i = 1; i < bigSize; i++){
retlist[i].num = i + 1;
bigInt next = retlist[i].num;
int count = 0;
do {
count++;
if (next%2 == 1)
next=(3*next+1)/2;
else
next/=2;
} while(next > retlist[i].num);
retlist[i].to_batch.numSteps = count;
retlist[i].to_batch.stopPoint = next;
}
}
///Organizes data into a sorted array
#pragma omp parallel for
for (i = 0; i < bigSize; i++){
results[retlist[i].num - 1] = retlist[i].to_batch;
}
...
}
我非常有信心这个问题会出现在上面的代码段中。
答案 0 :(得分:0)
以下代码正确编译:
不会将bigNum隐藏为无符号long long int。
确实包含索引变量的声明&#39; i&#39;
我无权访问优化编译指示,所以暂时将其注释掉。
//typedef unsigned long long bigInt;
// Number to test to (starting from 1)
#define bigSize (100000)
struct batcher
{
int numSteps;
//bigInt stopPoint;
unsigned long long stopPoint;
};
struct to_ret
{
//bigInt num;
unsigned long long num;
struct batcher to_batch;
};
//Stores values as [num][#steps to smaller val][smaller val]
static struct to_ret retlist[bigSize];
//Stores values as [#steps to smaller val][smaller val], sorted by num
static struct batcher results[bigSize];
int main ()
{
int i;
// more code here
////#pragma offload target(mic:0) inout(retlist) shared(retlist)
{
////#pragma omp parallel for
for(i = 1; i < bigSize; i++)
{
retlist[i].num = i + 1;
//bigInt next = retlist[i].num;
unsigned long long next = retlist[i].num;
int count = 0;
do
{
count++;
if (next%2 == 1)
next=(3*next+1)/2;
else
next/=2;
} while(next > retlist[i].num);
retlist[i].to_batch.numSteps = count;
retlist[i].to_batch.stopPoint = next;
}
}
///Organizes data into a sorted array
////#pragma omp parallel for
for (i = 0; i < bigSize; i++){
results[retlist[i].num - 1] = retlist[i].to_batch;
}
// more code here
return(0);
} // end function: main
答案 1 :(得分:0)
完整的代码可以在github here上找到,虽然虽然它仍然存在很多效率问题(可以使用矢量化支持),但我目前所处的是这个(利用这个建议)由barak-manos):
typedef unsigned long long bigInt;
/// Number to test up to (starting from 1)
#define bigSize 1000000000 //340282366920938463463374607431768211455
typedef struct {
int numSteps;
bigInt stopPoint;
} batcher;
typedef struct {
bigInt num;
batcher to_batch;
} to_ret;
__attribute__((target(mic))) to_ret retlist[bigSize]; ///Stores values as [num][#steps to smaller val][smaller val]
__attribute__((target(mic))) batcher results[bigSize]; ///Stores values as [#steps to smaller val][smaller val] & is sorted by num
int main () {
bigInt j;
double start, end;
retlist[0].num = 1; retlist[0].to_batch.numSteps = 0; retlist[0].to_batch.stopPoint = 1;
start = omp_get_wtime();
#pragma offload target(mic:0) out(results)
{
int count;
bigInt i, next;
#pragma omp parallel for
for(i = 1; i < bigSize; i++){
next = retlist[i].num = i + 1;
count = 0;
do {
count++;
if (next%2 == 1)
next=(3*next+1)/2;
else
next/=2;
} while(next > retlist[i].num);
retlist[i].to_batch.numSteps = count;
retlist[i].to_batch.stopPoint = next;
}
///Organizes data into a sorted array
#pragma omp parallel for
for (i = 0; i < bigSize; i++){
results[i] = retlist[i].to_batch;
}
}
...
for(j = 0; j < bigSize; j++){
results[j].numSteps += results[results[j].stopPoint-1].numSteps;
}
return(0);
}
如果有人有兴趣,请随意创建我项目的分支。