块矩阵转置

时间:2013-04-21 18:21:24

标签: c++ algorithm caching matrix

我想通过将输入矩阵分成块然后转置它们来实现矩阵的转置。我提到了相应的帖子A Cache Efficient Matrix Transpose Program?并编写了我的代码:

#include<iostream>
#include<stdlib.h>
#define m 4
#include<sys/time.h>
#include<time.h>
#include<malloc.h>

using namespace std;

int **a, **b, **c;
int count = 0;
clock_t t1, t2;    
int blocksize = 2;

int main(){
    a = (int **)malloc(m*sizeof(int *));
    for(int i = 0;i<m;i++){
            a[i] = (int *)malloc(m*sizeof(int));
    }
    b = (int **)malloc(m*sizeof(int *));
    for(int i = 0;i<m;i++){
            b[i] = (int *)malloc(m*sizeof(int));
    }
    for(int i=0; i<m; i++){
            for(int j =0; j<m; j++){
                    a[i][j]=(2*i)+(3*j);
            }
    }
    for(int i=0; i<m; i++){
            for(int j =0; j<m; j++){
                    cout << a[i][j] << "\t";
            }
            cout << "\n";
     }
    cout << "\n";
    t1 = clock();
    // MAIN BLOCK TRANSPOSE CODE
    for (int i = 0; i < m; i += blocksize) {
        for (int j = 0; j < m; j += blocksize) {
                    for (int k = i; k < i + blocksize; ++k) {
                            for (int l = j; l < j + blocksize; ++l) {
                                    b[k + l*m] = a[l + k*m];
                            }
                    }
            }
    }
    t2 = clock();
    for(int i=0; i<m; i++){
            for(int j =0; j<m; j++){
                    cout << b[i][j] << "\t";
            }
            cout << "\n";
     }
    free(a);
    free(b);
    cout << "\n";
    cout << (double)(t2-t1)/CLOCKS_PER_SEC << "\n";
return 0;
}  

但是,代码未按预期工作。我实现了据说在相应帖子中工作的代码。请尽可能帮助。

输入数组:

0       3       6       9
2       5       8       11
4       7       10      13
6       9       12      15  

预期输出数组:

0       2       4       6
3       5       7       9  
6       8       10      12  
9       11      13      15  

获得的结果:

0       3       6       9
Segmentation fault

1 个答案:

答案 0 :(得分:2)

我认为您的矩阵应该在单个数组中编码,而不是在数组数组中编码(请参阅链接问题的编辑2)。

你可能想尝试一下:

int *a, *b, *c;

a = (int *)malloc(m*m*sizeof(int));
b = (int *)malloc(m*m*sizeof(int));
for(int i=0; i<m; i++){
        for(int j =0; j<m; j++){
                a[i*m+j]=(2*i)+(3*j);
        }
}
for(int i=0; i<m; i++){
        for(int j =0; j<m; j++){
                cout << a[i*m+j] << "\t";
        }
        cout << "\n";
 }
cout << "\n";