我试图弄清楚为什么程序(下面的代码)使用两个踏板而不是四个在Intel(R)Core(TM)i5-3210M CPU @ 2.50GHz而不是Intel(R)Core(TM)上运行得更快i7-4510U CPU @ 2.00GHz四线程版本运行速度比同一程序的两个线程版本快。
# include <stdlib.h>
# include <stdio.h>
# include <math.h>
# include <time.h>
# include <omp.h>
int main ( void );
void timestamp ( void );
int main ( void )
{
double a[500][500];
double angle;
double b[500][500];
double c[500][500];
int i;
int j;
int k;
int n = 500;
double pi = M_PI;
double s;
int thread_num;
double wtime;
timestamp ( );
printf ( "\n" );
printf ( "MXM_OPENMP:\n" );
printf ( " C/OpenMP version\n" );
printf ( " Compute matrix product C = A * B.\n" );
omp_set_dynamic(0);
omp_set_num_threads(2); /* Change param to modify number of threads */
thread_num = omp_get_max_threads ( );
printf ( "\n" );
printf ( " The number of processors available = %d\n", omp_get_num_procs ( ) );
printf ( " The number of threads available = %d\n", thread_num );
printf ( " The matrix order N = %d\n", n );
/*Loop 1: Evaluate A.*/
s = 1.0 / sqrt ( ( double ) ( n ) );
wtime = omp_get_wtime ( );
# pragma omp parallel shared ( a, b, c, n, pi, s ) private ( angle, i, j, k )
{
# pragma omp for
for ( i = 0; i < n; i++ )
{
for ( j = 0; j < n; j++ )
{
angle = 2.0 * pi * i * j / ( double ) n;
a[i][j] = s * ( sin ( angle ) + cos ( angle ) );
}
}
/* Loop 2: Copy A into B.*/
# pragma omp for
for ( i = 0; i < n; i++ )
{
for ( j = 0; j < n; j++ )
{
b[i][j] = a[i][j];
}
}
/* Loop 3: Compute C = A * B.*/
# pragma omp for
for ( i = 0; i < n; i++ )
{
for ( j = 0; j < n; j++ )
{
c[i][j] = 0.0;
for ( k = 0; k < n; k++ )
{
c[i][j] = c[i][j] + a[i][k] * b[k][j];
}
}
}
printf ( " The number of threads used = %d\n", omp_get_num_threads() );
}
wtime = omp_get_wtime ( ) - wtime;
printf ( " Elapsed seconds = %g\n", wtime );
printf ( " C(100,100) = %g\n", c[99][99] );
/*Terminate.*/
printf ( "\n" );
printf ( "MXM_OPENMP:\n" );
printf ( " Normal end of execution.\n" );
printf ( "\n" );
timestamp ( );
return 0;
}
void timestamp ( void )
{
# define TIME_SIZE 40
static char time_buffer[TIME_SIZE];
const struct tm *tm;
time_t now;
now = time ( NULL );
tm = localtime ( &now );
strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );
printf ( "%s\n", time_buffer );
return;
# undef TIME_SIZE
}
答案 0 :(得分:0)
即使你拥有完全相同的机器,所有相同的功能仍然执行时间可能不同。为什么? 因为,当可能在机器1中时,200个服务正在运行,而在机器2,225中,然后机器2中的程序将在处理器/资源上获得更少的时间,这可能导致额外的时间。
这完全取决于运行服务(背景和前景)。