请添加fork(),join()的代码,只要它可以添加。 我需要将此矩阵乘法并行化。
代码如下:
public static long[][] dc(long[][] M1, long[][] M2, int n)
{
long[][] M = new long[n][n];
if (n == 1)
M[0][0] = M1[0][0] * M2[0][0];
else
{
int size= n/2;
long[][] A11 = new long[n/2][n/2];
long[][] A12 = new long[n/2][n/2];
long[][] A21 = new long[n/2][n/2];
long[][] A22 = new long[n/2][n/2];
long[][] B11 = new long[n/2][n/2];
long[][] B12 = new long[n/2][n/2];
long[][] B21 = new long[n/2][n/2];
long[][] B22 = new long[n/2][n/2];
copy1(A11,M1,0,0,size);
copy1(A12,M1,0,(n/2),size);
copy1(A21,M1,(n/2),0,size);
copy1(A22,M1,(n/2),(n/2),size);
copy1(B11,M2,0,0,size);
copy1(B12,M2,0,(n/2),size);
copy1(B21,M2,(n/2),0,size);
copy1(B22,M2,(n/2),(n/2),size);
long[][] C11 = add( dc(A11, B11,size), dc(A12, B21,size) ,size);//need to fork
long[][] C12 = add( dc(A11, B12,size), dc(A12, B22,size) ,size);//need to fork
long[][] C21 = add( dc(A21, B11,size), dc(A22, B21,size) ,size);//need to fork
long[][] C22 = add( dc(A21, A12,size), dc(A22, B22,size) ,size);//need to fork
copy2(M,C11,0,0,size);
copy2(M,C12,0,(n/2),size);
copy2(M,C21,(n/2),0,size);
copy2(M,C22,(n/2),(n/2),size);
}
return M;
}
我应该使用java还是fork(),exec(),pthread()?
编辑:
这是我的并行化代码。它不适用于大于N = 32 我得到一个例外 - Java.outofmemory Exception
public static long[][] dc(long[][] M1, long[][] M2, int n)
{
long[][] M=new long[n][n];
if (n == 1)
M[0][0] = M1[0][0] * M2[0][0];
else
{
int size= n/2;
long[][] A11 = new long[n/2][n/2];
long[][] A12 = new long[n/2][n/2];
long[][] A21 = new long[n/2][n/2];
long[][] A22 = new long[n/2][n/2];
long[][] B11 = new long[n/2][n/2];
long[][] B12 = new long[n/2][n/2];
long[][] B21 = new long[n/2][n/2];
long[][] B22 = new long[n/2][n/2];
copy1(A11,M1,0,0,size);
copy1(A12,M1,0,(n/2),size);
copy1(A21,M1,(n/2),0,size);
copy1(A22,M1,(n/2),(n/2),size);
copy1(B11,M2,0,0,size);
copy1(B12,M2,0,(n/2),size);
copy1(B21,M2,(n/2),0,size);
copy1(B22,M2,(n/2),(n/2),size);
class T1 extends Thread
{
private long[][] p1;
public void run()
{
p1=dc(A11,B11,size);
//System.out.println("P1");
//disp(p1);
}
};
class T2 extends Thread
{
private long[][] p2;
public void run()
{
p2=dc(A12,B21,size);
//System.out.println("P2");
//disp(p2);
}
};
class T3 extends Thread
{
private long[][] p3;
public void run()
{
p3=dc(A11,B12,size);
//System.out.println("P3");
//disp(p3);
}
};
class T4 extends Thread
{
private long[][] p4;
public void run()
{
p4=dc(A12,B22,size);
//System.out.println("P4");
//disp(p4);
}
};
class T5 extends Thread
{
private long[][] p5;
public void run()
{
p5=dc(A21,B11,size);
//System.out.println("P5");
//disp(p5);
}
};
class T6 extends Thread
{
private long[][] p6;
public void run()
{
p6=dc(A22,B21,size);
//System.out.println("P6");
//disp(p6);
}
};
class T7 extends Thread
{
private long[][] p7;
public void run()
{
p7=dc(A21,B12,size);
//System.out.println("P7");
//disp(p7);
}
};
class T8 extends Thread
{
private long[][] p8;
public void run()
{
p8=dc(A22,B22,size);
//System.out.println("P8");
//disp(p8);
}
};
T1 t1=new T1(); T2 t2=new T2(); T3 t3=new T3(); T4 t4=new T4();
T5 t5=new T5(); T6 t6=new T6(); T7 t7=new T7(); T8 t8=new T8();
t1.start();t2.start();t3.start();t4.start();t5.start();t6.start();t7.start();t8.start();
try
{
t1.join();
t2.join();
t3.join();
t4.join();
t5.join();
t6.join();
t7.join();
t8.join();
//System.out.println("Threads completed");
//disp(t1.p1);
}
catch (InterruptedException e)
{
// TODO Auto-generated catch block
System.out.println("Exception occured");
}
long[][] C11=add(t1.p1,t2.p2,size);
long[][] C12=add(t3.p3,t4.p4,size);
long[][] C21=add(t5.p5,t6.p6,size);
long[][] C22=add(t7.p7,t8.p8,size);
copy2(M,C11,0,0,size);
copy2(M,C12,0,(n/2),size);
copy2(M,C21,(n/2),0,size);
copy2(M,C22,(n/2),(n/2),size);
}
return M;
}
public static long[][] add(long[][] M1, long[][] M2, int size)
{
long[][] M=new long[size][size];
for(int i=0;i<size;i++)
{
for(int j=0;j<size;j++)
{
M[i][j]=M1[i][j]+M2[i][j];
}
}
//System.out.println("Add method");
return M;
}
您能告诉我如何让线程适用于大量输入吗? 这段代码实际上比没有线程的代码慢。 SO线程实际上根本没用。 冷,请你更正代码,让我知道如何有效地并行化,以便它比没有并行化的代码更快?
谢谢!