时间总和按列不同的数组大小和缓存行为

时间:2015-11-06 13:30:31

标签: c caching memory timing

我正在使用2D数组和1D数组来计算数组a和数组s的行和列的总和。目的是计算数组s中的2D阵列a的总和。 N是数组的大小我的问题是当我们N = 512和N = 1024时我们得到重大改变,如excel http://i.stack.imgur.com/p6ukv.png

中所示
/* sumcol_bycol: Basic implementation of column sum operation.
 * Computes sums of the columns of array a into array s.
 * s[c] = SUM(r=0..N-1)  a[r][c]
 */
static void FN_ALIGN sumcol_bycol(int a[N][N], int s[N]) {
    int r, c, sum;
    // Iterate over all columns
    for (c = 0; c < N; c++) {
        // Compute the sum of data in column c
        sum = 0;
        for (r = 0; r < N; r++) {
            sum += a[r][c];
        }
        // Return the sum in element c of array s
        s[c] = sum;
    }
}

/* Computing down columns with loop unrolling */
static void FN_ALIGN sumcol_bycol_u4(int a[N][N], int s[N]) {
    int r, c, sum;
    for (c = 0; c < N; c++) {
        sum = 0;
        for (r = 0; r < N-3; r+=4) {
            sum += a[r][c];
            sum += a[r+1][c];
            sum += a[r+2][c];
            sum += a[r+3][c];
        }
        /* The additional cases if unrolling factor does not divide N evenly */
# if N%4 >= 1
        sum += a[r][c];
# endif
# if N%4 >= 2
        sum += a[r+1][c];
# endif
# if N%4 >= 3
        sum += a[r+2][c];
# endif
        s[c] = sum;
    }
}

/* Unrolling to a factor of 8 */
static void FN_ALIGN sumcol_bycol_u8(int a[N][N], int s[N]) {
    int r, c, sum;
    for (c = 0; c < N; c++) {
        sum = 0;
        for (r = 0; r < N-7; r+=8) {
            sum += a[r][c];
            sum += a[r+1][c];
            sum += a[r+2][c];
            sum += a[r+3][c];
            sum += a[r+4][c];
            sum += a[r+5][c];
            sum += a[r+6][c];
            sum += a[r+7][c];
        }
# if N%8 >= 1
        sum += a[r][c];
# endif
# if N%8 >= 2
        sum += a[r+1][c];
# endif
# if N%8 >= 3
        sum += a[r+2][c];
# endif
# if N%8 >= 4
        sum += a[r+3][c];
# endif
# if N%8 >= 5
        sum += a[r+4][c];
# endif
# if N%8 >= 6
        sum += a[r+5][c];
# endif
# if N%8 >= 7
        sum += a[r+6][c];
# endif
        s[c] = sum;
    }
}

/* Grouping: Compute two columns together */
static void FN_ALIGN sumcol_bycol_g2(int a[N][N], int s[N]) {
    int r, c, sum0, sum1;
    for (c = 0; c < N-1; c+=2) {
        sum0 = sum1 = 0;
        for (r = 0; r < N; r++) {
            sum0 += a[r][c];
            sum1 += a[r][c+1];
        }
        s[c] = sum0;
        s[c+1] = sum1;
    }
    /* If there is an odd number of columns */
# if N%2 == 1
    sum0 = 0;
    for (r = 0; r < N; r++) {
        sum0 += a[r][c];
    }
    s[c] = sum0;
# endif
}

/* Grouping 3 columns at once */
static void FN_ALIGN sumcol_bycol_g3(int a[N][N], int s[N]) {
    int r, c, sum0, sum1, sum2;
    for (c = 0; c < N-2; c+=3) {
        sum0 = sum1 = sum2 = 0;
        for (r = 0; r < N; r++) {
            sum0 += a[r][c];
            sum1 += a[r][c+1];
            sum2 += a[r][c+2];
        }
        s[c] = sum0;
        s[c+1] = sum1;
        s[c+2] = sum2;
    }
# if N%3 != 0
    for ( ; c < N; c++) {
        sum0 = 0;
        for (r = 0; r < N; r++) {
            sum0 += a[r][c];
        }
        s[c] = sum0;
    }
# endif
}

/* Grouping 4 columns at once */
static void FN_ALIGN sumcol_bycol_g4(int a[N][N], int s[N]) {
    int r, c, sum0, sum1, sum2, sum3;
    for (c = 0; c < N-3; c+=4) {
        sum0 = sum1 = sum2 = sum3 = 0;
        for (r = 0; r < N; r++) {
            sum0 += a[r][c];
            sum1 += a[r][c+1];
            sum2 += a[r][c+2];
            sum3 += a[r][c+3];
        }
        s[c] = sum0;
        s[c+1] = sum1;
        s[c+2] = sum2;
        s[c+3] = sum3;
    }
# if N%4 != 0
    for ( ; c < N; c++) {
        sum0 = 0;
        for (r = 0; r < N; r++) {
            sum0 += a[r][c];
        }
        s[c] = sum0;
    }
# endif
}

/* Grouping 5 columns at once */
static void FN_ALIGN sumcol_bycol_g5(int a[N][N], int s[N]) {
    int r, c, sum0, sum1, sum2, sum3, sum4;
    for (c = 0; c < N-4; c+=5) {
        sum0 = sum1 = sum2 = sum3 = sum4 = 0;
        for (r = 0; r < N; r++) {
            sum0 += a[r][c];
            sum1 += a[r][c+1];
            sum2 += a[r][c+2];
            sum3 += a[r][c+3];
            sum4 += a[r][c+4];
        }
        s[c] = sum0;
        s[c+1] = sum1;
        s[c+2] = sum2;
        s[c+3] = sum3;
        s[c+4] = sum4;
    }
# if N%5 != 0
    for ( ; c < N; c++) {
        sum0 = 0;
        for (r = 0; r < N; r++) {
            sum0 += a[r][c];
        }
        s[c] = sum0;
    }
# endif
}

/* sumcol_byrow: Column sums computed using row-wise array access.
 */
static void FN_ALIGN sumcol_byrow(int a[N][N], int s[N]) {
    int r, c, sum;
    // Initialise all sums to zero.
    for (c = 0; c < N; c++) 
        s[c] = 0;
    // Iterate over all array elements, adding
    // each one onto the appropriate sum.
    for (r = 0; r < N; r++) {
        for (c = 0; c < N; c++) {
            s[c] += a[r][c];
        }
    }
}

/* Row-wise array access with loop unrolling */
static void FN_ALIGN sumcol_byrow_u4(int a[N][N], int s[N]) {
    int r, c, sum;
    for (c = 0; c < N; c++) 
        s[c] = 0;
    for (r = 0; r < N; r++) {
        for (c = 0; c < N-3; c+=4) {
            s[c] += a[r][c];
            s[c+1] += a[r][c+1];
            s[c+2] += a[r][c+2];
            s[c+3] += a[r][c+3];
        }
# if N%4 >= 1
        s[c] += a[r][c];
# endif
# if N%4 >= 2
        s[c+1] += a[r][c+1];
# endif
# if N%4 >= 3
        s[c+2] += a[r][c+2];
# endif
    }
}

/* Row-wise computation using 2x2 blocks. The main loops are in 2x2 row-wise blocks
 * then each block updates two sums */
static void FN_ALIGN sumcol_byrow_b2x2(int a[N][N], int s[N]) {
    int r, c, sum;
    for (c = 0; c < N; c++) 
        s[c] = 0;
    for (r = 0; r < N-1; r+=2) {
        for (c = 0; c < N-1; c+=2) {
            s[c] += a[r][c] + a[r+1][c];
            s[c+1] += a[r][c+1] + a[r+1][c+1];
        }
# if N%2 != 0
        s[c] += a[r][c] + a[r+1][c];
# endif
    }
# if N%2 != 0
    /* Process the remaining row */
    for (c = 0; c < N-1; c+=2) {
        s[c] += a[r][c];
        s[c+1] += a[r][c+1];
    }
    /* Process the corner element */
    s[c] += a[r][c];
# endif
}

0 个答案:

没有答案