我编写了一个代码,使用openmp parallel for(对于外部循环)将两个n乘n矩阵相乘。我在编译时给出了数组大小。我希望将n赋予2000.但是作为第二个版本,我通过动态地为三个矩阵分配空格并给出n(2000)的大小作为参数来编写相同的代码。当n = 2000时,我为两个版本获得的两个执行时间大不相同。对于第一版(静态分配),它约为13秒,对于第二版(动态分配),它约为32秒。但是两者都给出n <= 1000的相同时间。我只测量并行乘法运算的时间。那么这两种方法怎么能给我不同的结果呢? (唯一的变化是内存分配类型)
这是版本1
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <omp.h>
#define N 1000
double A[N][N], B[N][N], C[N][N]; // declaring matrices of NxN size
int main ()
{
/* DECLARING VARIABLES */
int i, j, m; // indices for matrix multiplication
double t_1; // Execution time measures
/* FILLING MATRICES WITH RANDOM NUMBERS */
srand ( time(NULL) );
for(i=0;i<N;i++) {
for(j=0;j<N;j++) {
A[i][j]= (rand()%10);
B[i][j]= (rand()%10);
}
}
double st=omp_get_wtime();
for(i=0;i<N;i++)
{
for(j=0;j<N;j++)
{
for(m=0;m<N;m++)
{
C[i][j]+=A[i][m]*B[m][j];
}
}
}
/* TIME MEASURE + OUTPUT */
double en=omp_get_wtime();
printf("Serial: %lf\n",en-st);
}
这是第二个版本
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <omp.h>
int N = 0; // no of columns and rows of a matrix
double **A, **B, **C; // declaring pointers for matrices of NxN size
void getArguments(int argc, char *argv[]);
int main (int argc, char *argv[])
{
/* DECLARING VARIABLES */
int i, j, m; // indices for matrix multiplication
double t_1; // Execution time MEASURES
getArguments(argc,argv);
/* ALLOCATE MEMORY FOR MATRIX A */
A = (double **) malloc( sizeof(double *) * N);
for(i = 0;i < N; i++)
{
A[i] = (double *) malloc(sizeof(double *) * N);
}
/* ALLOCATE MEMORY FOR MATRIX B */
B = (double **) malloc( sizeof(double *) * N);
for(i = 0;i < N; i++)
{
B[i] = (double *) malloc(sizeof(double *) * N);
}
/* ALLOCATE MEMORY FOR MATRIX C */
C = (double **) malloc( sizeof(double *) * N);
for(i = 0;i < N; i++)
{
C[i] = (double *) malloc(sizeof(double *) * N);
}
/* FILLING MATRICES WITH RANDOM NUMBERS */
srand ( time(NULL) );
for(i = 0;i < N;i++) {
for(j = 0;j < N;j++) {
A[i][j] = (rand() % 10);
B[i][j] = (rand() % 10);
}
}
double st = omp_get_wtime();
#pragma omp parallel for private(m,j)
for(i = 0;i < N;i++) {
for(j = 0;j < N;j++) {
C[i][j] = 0.; // set initial value of resulting matrix C = 0
for(m = 0;m < N;m++) {
C[i][j] = A[i][m] * B[m][j] + C[i][j];
}
}
}
/* TIME MEASURE */
double en = omp_get_wtime();
printf("Parallel: %lf\n",en - st);
/* FREE MEMORY */
for(i = 0;i < N; i++){
free(A[i]);
}
free(A);
for(i = 0;i < N; i++){
free(B[i]);
}
free(B);
for(i = 0;i < N; i++){
free(C[i]);
}
free(C);
}
void getArguments(int argc, char *argv[]){
// Check the N
if (argc != 2) {
printf("Please give a valid number for N\n");
exit(0);
}
N = (int) strtol(argv[1], (char **) NULL, 10);
if (N == 0) {
printf("Please give a number for N more than 0\n");
exit(0);
}
}
答案 0 :(得分:0)
动态分配有三个主要差异:
N
在编译时不再为人所知,因此编译器必须为N
上的循环生成更多通用代码。