我试图为矩阵乘法编写并行分治算法,矩阵为2 ^ N x 2 ^ n。我的解决方案适用于较小的矩阵,但是当我尝试将两个2 ^ 6 x 2 ^ 6或更大的矩阵相乘时,它开始写入随机0,并且计算结果非常糟糕。 我认为问题是太多的线程可能从那个大小开始,但我不知道如何解决它的一天。谢谢你的帮助。 这是我到目前为止的尝试:
bool ParallelMultiply(const std::vector<std::vector<int> > &m1, const std::vector<std::vector<int> > &m2, std::vector<std::vector<int> > &dest)
{
if((m1.size() == 1) && m1.size() == m1[0].size())
dest[0][0] = m1[0][0]*m2[0][0];
else
{
std::vector<std::vector<int> > temp(m1.size());
for(int i=0; i < temp.size(); i++)
temp[i].resize(temp.size());
int subSize = m1.size()/2;
std::vector<std::vector<int> > A11 = SubMatrix(m1, 0, 0);
std::vector<std::vector<int> > A12 = SubMatrix(m1, 0, subSize);
std::vector<std::vector<int> > A21 = SubMatrix(m1, subSize, 0);
std::vector<std::vector<int> > A22 = SubMatrix(m1, subSize, subSize);
std::vector<std::vector<int> > B11 = SubMatrix(m2, 0, 0);
std::vector<std::vector<int> > B12 = SubMatrix(m2, 0, subSize);
std::vector<std::vector<int> > B21 = SubMatrix(m2, subSize, 0);
std::vector<std::vector<int> > B22 = SubMatrix(m2, subSize, subSize);
std::vector<std::vector<int> > dest11 = SubMatrix(dest, 0, 0);
std::vector<std::vector<int> > dest12 = SubMatrix(dest, 0, subSize);
std::vector<std::vector<int> > dest21 = SubMatrix(dest, subSize, 0);
std::vector<std::vector<int> > dest22 = SubMatrix(dest, subSize, subSize);
std::vector<std::vector<int> > temp11 = SubMatrix(temp, 0, 0);
std::vector<std::vector<int> > temp12 = SubMatrix(temp, 0, subSize);
std::vector<std::vector<int> > temp21 = SubMatrix(temp, subSize, 0);
std::vector<std::vector<int> > temp22 = SubMatrix(temp, subSize, subSize);
std::future<bool> f1 = std::async(std::launch::async, ParallelMultiply, std::cref(A11), std::cref(B11), std::ref(dest11));
std::future<bool> f2 = std::async(std::launch::async, ParallelMultiply, std::cref(A12), std::cref(B21), std::ref(temp11));
std::future<bool> f3 = std::async(std::launch::async, ParallelMultiply, std::cref(A11), std::cref(B12), std::ref(dest12));
std::future<bool> f4 = std::async(std::launch::async, ParallelMultiply, std::cref(A12), std::cref(B22), std::ref(temp12));
std::future<bool> f5 = std::async(std::launch::async, ParallelMultiply, std::cref(A21), std::cref(B11), std::ref(dest21));
std::future<bool> f6 = std::async(std::launch::async, ParallelMultiply, std::cref(A22), std::cref(B21), std::ref(temp21));
std::future<bool> f7 = std::async(std::launch::async, ParallelMultiply, std::cref(A21), std::cref(B12), std::ref(dest22));
std::future<bool> f8 = std::async(std::launch::async, ParallelMultiply, std::cref(A22), std::cref(B22), std::ref(temp22));
f1.wait(); f2.wait(); f3.wait(); f4.wait(); f5.wait(); f6.wait(); f7.wait(); f8.wait();
AddMatrices(temp11, dest11);
AddMatrices(temp12, dest12);
AddMatrices(temp21, dest21);
AddMatrices(temp22, dest22);
MergeMatrices(dest11, dest12, dest21, dest22, dest);
}
return true;
}
另外两个功能:
void AddMatrices(const std::vector<std::vector<int> > &m, std::vector<std::vector<int> > &res)
{
for(int i=0; i < m.size(); i++)
for(int j=0; j < m.size(); j++)
res[i][j] += m[i][j];
}
void MergeMatrices(const std::vector<std::vector<int> > &m1, const std::vector<std::vector<int> > &m2, const std::vector<std::vector<int> > &m3, const std::vector<std::vector<int> > &m4, std::vector<std::vector<int> > &res)
{
int x = res.size()/2;
for(int i=0; i<x; i++)
for(int j=0; j<x; j++)
{
res[i][j] = m1[i][j];
res[i][x+j] = m2[i][j];
res[x+i][j] = m3[i][j];
res[x+i][x+j] = m4[i][j];
}
}