C:矩阵乘法在矩阵大小为4096x4096时卡住

时间:2017-11-16 14:09:11

标签: c matrix-multiplication loop-unrolling

这是C中的代码,矩阵乘法,矩阵的填充是整数。循环嵌套和循环展开的优化。运行良好,直到N为7168.对于N 8196,内存不足。如果你运行它,只需屏幕,等到得到时间结果。这是来自我的电脑(华硕A456UQ)的警报或代码仍然是错误的?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include<time.h>
#include <sys/time.h>
#include<assert.h>

#define mat(m, col, row) \
m.data[row*m.cols + col]

#define N 8192
typedef struct
{
    int cols, rows;
    int *data;
}matrix_t;

void matrix_init(matrix_t *matrix, int cols, int rows)
{
    int i,j;
    matrix->cols = cols;
    matrix->rows = rows;
    matrix->data = malloc(sizeof(int)*cols*rows);

    if (!matrix->data)
    {
        fprintf(stderr, "Insufficient memory for a %dx%d matrix", cols, rows);
        exit(EXIT_FAILURE);
    }

    // 0 fill
    memset(matrix->data, 0, cols*rows);

}

void matrix_print(matrix_t *matrix)
{
    int i, n;

    n = matrix->cols * matrix->rows;

    printf("--M%dx%d--\n", matrix->rows, matrix->cols);
    for (i=0; i<n; i++)
    {
        printf("%d ", matrix->data[i]);

        if ((i+1)%matrix->cols == 0) printf("\n");
    }
}

void free_array(matrix_t *matrix) {
    free(matrix->data);
    matrix->data = NULL;
    matrix->cols = matrix->rows = 0;
}

void mul_matrix(matrix_t A, matrix_t B, matrix_t C){
    int ii, kk,ib,kb,j,i,k;
    int acc00, acc01, acc10,acc11;

    int size = A.cols;
    ib=16;
    kb=16;

    for (ii = 0; ii < size; ii += ib)
    {
        for (kk = 0; kk < size; kk += kb)
        {
            for (j=0; j < size; j += 2)
            {
                for(i = ii; i < ii + ib; i += 2 )
                {
                    if (kk == 0)
                        acc00 = acc01 = acc10 = acc11 = 0;
                    else
                    {
                        acc00 = mat(C,i,j);
                        acc01 = mat(C,i,(j+1));
                        acc10 = mat(C,(i+1),j);
                        acc11 = mat(C,(i+1),(j+1));
                    }
                }
                for (k = kk; k < kk + kb; k++)
                {
                    acc00 += mat(A,i,k) * mat(B,k,j);
                    acc01 += mat(A,i,k) * mat(B,k,j+1);
                    acc10 += mat(A,(i+1),k) * mat(B,k,j);
                    acc11 += mat(A,(i+1),k) * mat(B,k,(j+1));
                }

                mat(C,i,j) = acc00;
                mat(C,i,(j+1)) = acc01;
                mat(C,(i+1),j) = acc10;
                mat(C,(i+1),(j+1)) = acc11;

            }
        }
    }
}
/*
void mul_matrix(matrix_t A, matrix_t B, matrix_t C){
    int i,j,k;
    int t;
    int size = A.cols;


    for (int j=0; j<size; j++) {
       // iterates the j-th row of c
       for (int i=0; i<size; i++) {
           mat(C,i,j)=0;
       }

       // iterates the j-th row of b
       for (int k=0; k<size; k++) {
          t = mat(B,k,j);
          // iterates the j-th row of c
          // iterates the k-th row of a
          for (int i=0; i<size; i++) {
            mat(C,i,j) += mat(A,i,k) * t;
          }
       }
    }

 */

/*

    for (i = 0; i < size; i++){
        for (j = 0; j < size; j++) {
            mat(C,i,j) = 0;
            for (k = 0; k < size; k++)
                mat(C,i,j) += mat(A,i,k) * mat(B, k,j);
        }
    }
}
*/
void sum_matrix (matrix_t A, matrix_t B, matrix_t C){
    int i,j;
    int size = A.cols;

    for(i=0; i< size; i++) {
        for (j=0; j<size; j++)
            mat(C,i,j) = mat(A,i,j) + mat(B, i,j);
    }

}

void sub_matrix (matrix_t A, matrix_t B, matrix_t C){
    int i,j;
    int size = A.cols;

    for(i=0; i< size; i++) {
        for (j=0; j<size; j++)
            mat(C,i,j) = mat(A,i,j) - mat(B, i,j);
    }

}

int main(int argc, char **argv)
{
    matrix_t x;
    matrix_t y;
    matrix_t z;
    matrix_t m1,m2,m3,m4,m5,m6,m7;
    matrix_t a11,a12,a21,a22;
    matrix_t b11,b12,b21,b22;
    matrix_t aresult, bresult;
    matrix_t c11,c12,c21,c22;



    int i,j,k =0;

    int half = N/2;

    matrix_init(&x, N, N);
    matrix_init(&y, N, N);
    matrix_init(&z, N, N);
    //for(i = 0; i<N; i++)
      //  for(j = 0; j<N; j++)
         //   mat(z,i,j) =0;
    //matrix_print(&z);

    matrix_init(&a11, half, half);
    matrix_init(&a12, half, half);
    matrix_init(&a21, half, half);
    matrix_init(&a22, half, half);
    matrix_init(&b11, half, half);
    matrix_init(&b12, half, half);
    matrix_init(&b21, half, half);
    matrix_init(&b22, half, half);

    matrix_init(&aresult,half,half);
    matrix_init(&bresult,half,half);

    matrix_init(&m1, half, half);
    matrix_init(&m2, half, half);
    matrix_init(&m3, half, half);
    matrix_init(&m4, half, half);
    matrix_init(&m5, half, half);
    matrix_init(&m6, half, half);
    matrix_init(&m7, half, half);

    matrix_init(&c11, half, half);
    matrix_init(&c12, half, half);
    matrix_init(&c21, half, half);
    matrix_init(&c22, half, half);


    struct timeval  tv1, tv2;

    //srand((unsigned)time(NULL));


    for(i = 0; i<N; i++)
        for(j = 0; j<N; j++)
            mat(x,i,j) =1;// rand() % 10;

    for(i = 0; i<N; i++)
        for(j = 0; j<N; j++)
            mat(y,i,j) =1;// rand() % 10;




    gettimeofday(&tv1, NULL);


    //dividing the matrices in 4 sub-matrices:
    for(i=0; i<half; i++){
        for (j=0; j<half;j++){
            mat(a11,i,j)= mat(x,i,j);
            mat(a12,i,j)= mat(x,i,j+half);
            mat(a21,i,j)= mat(x,i+half,j);
            mat(a22,i,j)= mat(x,i+half,j+half);

            mat(b11,i,j)= mat(y,i,j);
            mat(b12,i,j)= mat(y,i,j+half);
            mat(b21,i,j)= mat(y,i+half, j);
            mat(b22,i,j)= mat(y, i+half, j+half);
        }
    }
    //matrix_print(&a11);
    //matrix_print(&a12);
    //matrix_print(&a21);
    //matrix_print(&a22);

    // Calculating m1 to m7:
    sum_matrix(a11,a22,aresult);
    sum_matrix(b11,b22,bresult);
    mul_matrix(aresult,bresult,m1);

    sum_matrix(a21,a22,aresult);
    mul_matrix(aresult,b11,m2);

    sub_matrix(b12,b22,aresult);
    mul_matrix(a11,aresult,m3);

    sub_matrix(b21,b11, aresult);
    mul_matrix(a22,aresult,m4);

    sum_matrix(a11,a12,aresult);
    mul_matrix(aresult,b22,m5);

    sub_matrix(a21,a11,aresult);
    sum_matrix(b11,b12,bresult);
    mul_matrix(aresult,bresult,m6);

    sub_matrix(a12,a22,aresult);
    sum_matrix(b21,b22,bresult);
    mul_matrix(aresult,bresult,m7);

    sum_matrix(m1,m4,aresult);
    sub_matrix(aresult,m5,bresult);
    sum_matrix(bresult,m7,c11);

    sum_matrix(m3,m5,c12);
    sum_matrix(m2,m4,c21);
    sub_matrix(m1,m2, aresult);
    sum_matrix(aresult,m3,bresult);
    sum_matrix(bresult,m6,c22);

    //matrix_print(&c11);
    //matrix_print(&c12);
    //matrix_print(&c21);
    //matrix_print(&c22);

    //combine matrix

    for(i=0; i < half; i++){
        for (j=0; j<half;j++){
            mat(z,i,j)= mat(c11,i,j);
            mat(z,(i+half),j)= mat(c12,i,j);
            mat(z,i,(j+half))= mat(c21,i,j);
            mat(z,(i+half),(j+half))= mat(c22,i,j);
       }
    }









    gettimeofday(&tv2, NULL);

    //matrix_print(&x);
    //matrix_print(&y);
    //matrix_print(&z);
    printf ("\n \nTotal time = %f seconds\n",(double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + (double) (tv2.tv_sec - tv1.tv_sec));

    free_array(&x);
    free_array(&y);
    free_array(&z);

    free_array(&m1);
    free_array(&m2);
    free_array(&m3);
    free_array(&m4);
    free_array(&m5);
    free_array(&m6);
    free_array(&m7);

    free_array(&a11);
    free_array(&a12);
    free_array(&a21);
    free_array(&a22);
    free_array(&b11);
    free_array(&b12);
    free_array(&b21);
    free_array(&b22);

    free_array(&aresult);
    free_array(&bresult);

    free_array(&c11);
    free_array(&c12);
    free_array(&c21);
    free_array(&c22);
    return 0;
}

0 个答案:

没有答案