我使用TBB实现了Jacobi算法,它运行得很好。然后我使用缩减来并行化收敛计算,但出于某种原因,如果我使用多于1个逻辑核心,我会得到一个分段错误,我无法找出原因。
我可以在只有1个逻辑核心的系统上使用多个线程。
使用OpenMP的相同实现无需麻烦
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <tbb/parallel_for.h>
#include <tbb/parallel_reduce.h>
#include <tbb/blocked_range.h>
#include <tbb/task_scheduler_init.h>
#include <tbb/tick_count.h>
// ----------------------------------------------------------------
#define SIZE 1024
#define RESIDUO 0.0009f*SIZE
#define THREADS 2
using namespace tbb;
// ----------------------------------------------------------------
struct Sum {
float ret;
float (*a)[SIZE];
float (*x);
float (*b);
Sum(float A[SIZE][SIZE], float X[SIZE], float B[SIZE]) : ret(0), a(A), x(X), b(B) {}
Sum( Sum&, split ) {ret = 0;}
void operator()( const blocked_range<int>& r ) {
float temp = ret;
for( int i = r.begin(); i != r.end(); i++ ) {
float sum = 0.0f;
for(int j = 0; j < SIZE; j++){
sum += a[i][j] * x[j];
}
temp += pow(b[i] - sum, 2);
}
ret = temp;
}
void join( Sum& rhs ) {ret += rhs.ret;}
};
/*
// || b - Ax ||
*/
int converge(float a[SIZE][SIZE], float x[SIZE], float b[SIZE]){
Sum total(a, x, b);
parallel_reduce( blocked_range<int>(0, SIZE), total );
float norm = sqrt(total.ret);
printf("Ret: %f | Residuo: %f\n", total.ret, norm);
return (norm <= RESIDUO);
}
// ----------------------------------------------------------------
float randomFloat()
{
float r = (float)rand()/(float)RAND_MAX;
return r;
}
// ----------------------------------------------------------------
int check_ddm(float (*a)[SIZE]){
float sum = 0.0f;
int i = 0, j = 0;
for(i = 0; i < SIZE; i++){
sum = 0.0f;
for(j = 0; j < SIZE; j++){
if(i != j){
sum += a[i][j];
}
}
if(a[i][i] < sum){
printf("line: %d, sum: %f, a[i][i]: %f \n", i, sum, a[i][i]);
for(j = 0; j < SIZE; j++){
if(i != j) printf("%f ", a[i][j]);
else printf("(%f) ", a[i][j]);
}
printf("\n");
return 0;
}
}
return 1;
}
// ----------------------------------------------------------------
int generate_ddm(float (*a)[SIZE], float *b)
{
int i = 0, j = 0;
float line = 0.0f;
for(i = 0; i < SIZE; i++){
line = 0.0f;
for(j = 0; j < SIZE; j++){
if(i != j){
a[i][j] = randomFloat();
}
line += a[i][j];
}
a[i][i] = SIZE;
b[i] = line + SIZE;
}
return check_ddm(a);
}
// ----------------------------------------------------------------
int main( )
{
float (*x)[SIZE] = (float(*)[SIZE])malloc(sizeof *x * 2);
float (*a)[SIZE] = (float(*)[SIZE])malloc(sizeof *a * SIZE);
float (*b) = (float*)malloc(sizeof(float) * SIZE);
int i = 0, j = 0;
float delta = 0.0f;
int read = 0;
int write = 1;
srand(time(NULL));
tbb::task_scheduler_init init(THREADS);
// set up initial solution
for(i = 0; i < SIZE; i++){
x[0][i] = i;
x[1][i] = i;
}
// generate a diagonal dominant matrix
if(!generate_ddm(a, b)){
printf("Array generated is not ddm!\n");
return 1;
}
tick_count startTime = tick_count::now();
while(!converge(a, x[write], b)){
read = !read;
write = !write;
parallel_for(blocked_range<int>(0,SIZE),
[&] (const blocked_range<int>& r) {
for (int i = r.begin(); i < r.end(); i++) {
float delta = 0.0f;
for(int j = 0; j < SIZE; j++){
if(j != i){
delta += a[i][j] * x[read][j];
}
}
x[write][i] = (b[i] - delta) / a[i][i];
}
});
}
tick_count lastTime = tick_count::now();
float walltime = (lastTime - startTime).seconds();
printf("tbb %f\n", walltime);
converge(a, x[write], b);
printf("x0: %f | x%d: %f\n", x[write][0], SIZE-1, x[write][SIZE-1]);
free(a);
free(b);
free(x);
return 0;
}
段错误发生在Sum
类内的以下行:
sum += a[i][j] * x[j];
如果我将该行更改为
float tmpa = a[i][j];
float tmpx = x[j];
sum += tmpa * tmpx;
错误仍在继续
sum += tmpa * tmpx;
答案 0 :(得分:3)
在原始版本中,“拆分构造函数”使a,x和b未定义。他们需要从收到的Sum&amp; amp;论点。例如,将拆分构造函数更改为:
Sum( Sum& s, split ) {a=s.a; b=s.b; x=s.x; ret = 0;}
答案 1 :(得分:0)
将Class更改为lambda表达式解决了这个问题。它可能是TBB的parallel_reduce
int converge(float a[SIZE][SIZE], float x[SIZE], float b[SIZE]){
float val = 0.0f;
val = parallel_reduce(
blocked_range<int>(0, SIZE),
0.0f,
[&]( const blocked_range<int>& r, float init )->float {
float temp = init;
for(int i = r.begin(); i != r.end(); i++ ) {
float sum = 0.0f;
for(int j = 0; j < SIZE; j++){
sum += a[i][j] * x[j];
}
temp += pow(b[i] - sum, 2);
}
return temp;
},
[]( float x, float y)->float{
return x+y;
}
);
float norm = sqrt(val);
printf("Ret: %f | Residuo: %f\n", val, norm);
return (norm <= RESIDUO);
}