我是新手,我道歉,如果我写一些愚蠢的东西。
我在2.7GHz CPU上使用LAPACK。
我对运行时间的期望基于出版物中的图4.1 Performance and Accuracy of The Lapack...,以及Fanfan对此Stackoverflow Post的回答。
所提到的两台计算机(尤其是2.2 GHz Opteron)应该比我的CPU慢很多。然而,对于典型的随机1000X1000对称矩阵,Opteron似乎需要不到一秒的时间。我的CPU平均需要大约2.2秒,变化非常小。
我是否忽略了我的实施应具备的一些功能,以便联系Opteron?
我编译的方式是:
g ++ -Ddsyev = dsyev_ -o combo.x combo.cc -L / usr / local / lib -lgsl -llapack -lblas&& ./combo.x
我尝试过选项-O2和-O3,它们没有改变任何东西。
我用来衡量时间的C ++代码如下所示。我试图将official example here与一个有意义的大矩阵一起使用。多次这样做,只是为了确保我没有偶然得到任何特殊的随机矩阵。
#include <stdlib.h>
#include <stdio.h>
#include <fstream>
#include <vector>
#include <sys/time.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_matrix.h>
/* Parameters */
#define N 1000
#define REPS 100
#define LDA N
#define TOWERSIZE N
struct TOWER
{
gsl_matrix *matrix_random_covariance;
};
int COMPUTE_RANDOM_COVARIANCE(struct TOWER *tower);
int WRITE_RANDOM_COVARIANCE(struct TOWER *tower);
int read_covariance (std::vector<double> & data)
{
double tmp;
std::ifstream fin("random_covariance.data");
while(fin >> tmp)
{
data.push_back(tmp);
}
return 0;
}
/* DSYEV prototype */
extern "C"{
void dsyev( char* jobz, char* uplo, int* n, double* a, int* lda,
double* w, double* work, int* lwork, int* info );
}
/* Auxiliary routines prototypes */
extern "C"{
void print_matrix( char* desc, int m, int n, double* a, int lda );
}
/* Main program */
int main() {
std::vector<double> time_series;
double time_series_sum =0.0;
double time_series_average =0.0;
struct TOWER *tower;
tower = (struct TOWER *)calloc(1,sizeof(struct TOWER));
double* work;
for (uint repj = 0; repj < REPS; repj++)
{
COMPUTE_RANDOM_COVARIANCE(tower);
WRITE_RANDOM_COVARIANCE(tower);
printf( "-----------------------------------------------> Entry main.\n" );
/* Locals */
std::vector<double> data;
int n = N, lda = LDA, info, lwork;
double wkopt;
/* Local arrays */
double w[N];
static double a[LDA*N];
printf( "-----------------------------------------------> Point 0.\n" );
read_covariance(data);
printf( "-----------------------------------------------> Point 1.\n" );
std::copy(data.begin(), data.end(), a);
printf( "-----------------------------------------------> Point 2.\n" );
struct timeval tval_before, tval_after, tval_result;\
gettimeofday(&tval_before, NULL);
/* Executable statements */
printf( " DSYEV Example Program Results\n" );
/* Query and allocate the optimal workspace */
lwork = -1;
dsyev( "Vectors", "Upper", &n, a, &lda, w, &wkopt, &lwork, &info );
printf( "-----------------------------------------------> Point 4.\n" );
lwork = (int)wkopt;
work = (double*)malloc( lwork*sizeof(double) );
/* Solve eigenproblem */
dsyev( "Vectors", "Upper", &n, a, &lda, w, work, &lwork, &info );
/* Check for convergence */
if( info > 0 ) {
printf( "The algorithm failed to compute eigenvalues.\n" );
exit( 1 );
}
gettimeofday(&tval_after, NULL);
timersub(&tval_after, &tval_before, &tval_result);
printf("Time one diag / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec);
time_series.push_back(tval_result.tv_sec);
time_series_sum = time_series_sum + tval_result.tv_sec + 0.000001*tval_result.tv_usec;
}
time_series_average = time_series_sum/REPS;
printf("Time Series Average = %e\n", time_series_average);
printf("Time Series Length = %u\n", REPS);
/* Print eigenvalues */
//print_matrix( "Eigenvalues", 1, n, w, 1 ); //nocheat!!
/* Print eigenvectors */
//print_matrix( "Eigenvectors (stored columnwise)", n, n, a, lda ); //nocheat!!
/* Free workspace */
free( (void*)work );
free(tower);
exit( 0 );
} /* End of DSYEV Example */
/* Auxiliary routine: printing a matrix */
void print_matrix( char* desc, int m, int n, double* a, int lda ) {
int i, j;
printf( "\n %.2f\n", desc );
for( i = 0; i < m; i++ ) {
for( j = 0; j < n; j++ ) printf( " %.2f", a[i+j*lda] );
printf( "\n" );
}
}
/* --- --- */
int COMPUTE_RANDOM_COVARIANCE(struct TOWER *tower) //relying on spline !!
{
int a,b;
double aux, correl;
const gsl_rng_type * T;
gsl_rng * r;
T = gsl_rng_default;
r = gsl_rng_alloc (T);
struct timeval tv;
srand((time(0))); // srand & time are built-in
unsigned long int s = random(); // gsl_rng_uniform will eventually
// want a non-negative "long" integer
gsl_rng_env_setup();
gsl_rng_set(r,s); // seed the random number generator;
printf("extreme_kappa: building random covariance matrix....\n");
//#pragma omp parallel
//{
tower->matrix_random_covariance = gsl_matrix_calloc(TOWERSIZE,TOWERSIZE);
for(a=0;a<TOWERSIZE;a++)
{
//#pragma omp for
for(b=a;b<TOWERSIZE;b++)
{
correl = gsl_ran_flat(r,0.0,1.0);
gsl_matrix_set(tower->matrix_random_covariance,a,b,correl);
gsl_matrix_set(tower->matrix_random_covariance,b,a,correl);
}
}
//} //PARALLEL LOOP!!
gsl_rng_free(r);
return(0);
}
/* --- --- */
/* --- --- */
int WRITE_RANDOM_COVARIANCE(struct TOWER *tower)
{
int a,b;
FILE *file;
char fname[256];
double aux;
printf("extreme_kappa: writing random covariances....\n");
sprintf(fname,"./random_covariance.data");
file = fopen(fname,"w+");
if(file == NULL)
{
printf("extreme_kappa: error opening file %s\n",fname);
exit(2);
}
for(a=0;a<TOWERSIZE;a++)
{
for(b=0;b<TOWERSIZE;b++)
{
aux = gsl_matrix_get(tower->matrix_random_covariance,a,b);
fprintf(file,"%e ",aux);
}
fprintf(file,"\n");
}
fclose(file);
return(0);
}
/* --- --- */
我会感激任何提示!