我目前正在C上实施Strassen的矩阵乘法算法并使其正常工作。但是,我在每次递归时绕过子矩阵的方式远非最佳,因为我创建了一个新数组并从原始矩阵中复制了元素。这是我的实现
// this algorithm assumes that both matrices have the same size and are square matrices.
int **matrix_multiply_divide_conquer(const int **A, const int **B, int length, int threshold)
{
int **result;
int i, j, k;
// allocate result space
result = (int **)malloc(sizeof(int *) * length);
for (i = 0; i < length; i++)
{
result[i] = (int *)malloc(sizeof(int) * length);
}
if (length == threshold)
{
// compute the dot product
matrix_multiply((const int **)A, (const int **)B, result, length, length, length, false);
return result;
}
else
{
// stored in an int given that the matrix length is guaranteed to be a power of 2
int nLength = length / 2;
int i;
int **a11;
int **a12;
int **a21;
int **a22;
int **b11;
int **b12;
int **b21;
int **b22;
// allocate new sub matrices
a11 = (int **)malloc(sizeof(int *) * nLength);
a12 = (int **)malloc(sizeof(int *) * nLength);
a21 = (int **)malloc(sizeof(int *) * nLength);
a22 = (int **)malloc(sizeof(int *) * nLength);
b11 = (int **)malloc(sizeof(int *) * nLength);
b12 = (int **)malloc(sizeof(int *) * nLength);
b21 = (int **)malloc(sizeof(int *) * nLength);
b22 = (int **)malloc(sizeof(int *) * nLength);
for (i = 0; i < nLength; i++)
{
a11[i] = (int *)malloc(sizeof(int) * nLength);
a12[i] = (int *)malloc(sizeof(int) * nLength);
a21[i] = (int *)malloc(sizeof(int) * nLength);
a22[i] = (int *)malloc(sizeof(int) * nLength);
b11[i] = (int *)malloc(sizeof(int) * nLength);
b12[i] = (int *)malloc(sizeof(int) * nLength);
b21[i] = (int *)malloc(sizeof(int) * nLength);
b22[i] = (int *)malloc(sizeof(int) * nLength);
}
a11 = get_sub_matrix((const int **)A, 0, 0, nLength);
a12 = get_sub_matrix((const int **)A, 0, nLength, nLength);
a21 = get_sub_matrix((const int **)A, nLength, 0, nLength);
a22 = get_sub_matrix((const int **)A, nLength, nLength, nLength);
b11 = get_sub_matrix((const int **)B, 0, 0, nLength);
b12 = get_sub_matrix((const int **)B, 0, nLength, nLength);
b21 = get_sub_matrix((const int **)B, nLength, 0, nLength);
b22 = get_sub_matrix((const int **)B, nLength, nLength, nLength);
// combine the results
int **c11 = matrix_addition((const int **)matrix_multiply_divide_conquer((const int **)a11, (const int **)b11, nLength, 4), (const int **)matrix_multiply_divide_conquer((const int **)a12, (const int **)b21, nLength, 4), nLength, nLength);
int **c12 = matrix_addition((const int **)matrix_multiply_divide_conquer((const int **)a11, (const int **)b12, nLength, 4), (const int **)matrix_multiply_divide_conquer((const int **)a12, (const int **)b22, nLength, 4), nLength, nLength);
int **c21 = matrix_addition((const int **)matrix_multiply_divide_conquer((const int **)a21, (const int **)b11, nLength, 4), (const int **)matrix_multiply_divide_conquer((const int **)a22, (const int **)b21, nLength, 4), nLength, nLength);
int **c22 = matrix_addition((const int **)matrix_multiply_divide_conquer((const int **)a21, (const int **)b12, nLength, 4), (const int **)matrix_multiply_divide_conquer((const int **)a22, (const int **)b22, nLength, 4), nLength, nLength);
// combine result quarters
combine_sub_matrix((const int **)c11, result, nLength, 0, 0);
combine_sub_matrix((const int **)c12, result, nLength, 0, nLength);
combine_sub_matrix((const int **)c21, result, nLength, nLength, 0);
combine_sub_matrix((const int **)c22, result, nLength, nLength, nLength);
return result;
}
}
这是get_sub_matrix方法:
int **get_sub_matrix(const int **A, int verticalOffset, int horizontalOffset, int size)
{
int **sub_matrix;
sub_matrix = (int **)malloc(sizeof(int *) * size);
int i, j;
for (i = 0; i < size; i++)
{
//each row will contain a number of columns size
sub_matrix[i] = (int *)malloc(sizeof(int) * size);
}
for (i = 0; i < size; i++)
{
for (j = 0; j < size; j++)
{
sub_matrix[i][j] = A[i + verticalOffset][j + horizontalOffset];
}
}
return sub_matrix;
}
我确信这不是传递子矩阵的最佳方式,因为它们只是被读取而且从未被修改过,所以我想问你是否可以使用指针解决这个问题以及如何做到这一点(因为我仍然围绕着他们。)
我尝试过以下但是没有用:
int **a11 = &A[0][0];
int **a12 = &A[0][nLength];
int **a21 = &A[nLength][0] ;
int **a22 = &A[nLength][nLength];
int **b11 = &B[0][0];
int **b12 = &B[0][nLength];
int **b21 = &B[nLength][0] ;
int **b22 = &B[nLength][nLength];
感谢您帮助我。
答案 0 :(得分:0)
您现在正在处理一系列行
int **x = malloc(sizeof(int *) * size);
然后为每一行分配行内存。
for (i = 0; i < length; i++)
{
x[i] = malloc(sizeof(int) * length);
}
相反,您可以使用单个内存块,将其视为二维数组:
int *x = malloc(sizeof(int) * size * size);
这允许您指向子矩阵的开头并将其传递给其他函数。但请注意您必须如何进行矩阵索引。因为编译器现在还不知道您将单个块视为2维度的aray,所以必须自己明确地进行索引。因此,给定length
大小为length
的矩阵,您将x[2][3]
(第三行的第四个元素,因为C从零开始)作为:
x [ (2*length) + 3 ];
此外,在将指向子矩阵的指针传递给函数时,必须开发一个约定来通知函数它操作的子矩阵。例如:
void my_matrix_op(int *X, int quadrant, int length)
我没有查看您的所有代码以确定此方法对您的算法是否真的可行,但我希望这会对您有所帮助。