Question

我是新手，我道歉，如果我写一些愚蠢的东西。

我在2.7GHz CPU上使用LAPACK。

我对运行时间的期望基于出版物中的图4.1 Performance and Accuracy of The Lapack...，以及Fanfan对此Stackoverflow Post的回答。

所提到的两台计算机（尤其是2.2 GHz Opteron）应该比我的CPU慢很多。然而，对于典型的随机1000X1000对称矩阵，Opteron似乎需要不到一秒的时间。我的CPU平均需要大约2.2秒，变化非常小。

我是否忽略了我的实施应具备的一些功能，以便联系Opteron？

我编译的方式是：

g ++ -Ddsyev = dsyev_ -o combo.x combo.cc -L / usr / local / lib -lgsl -llapack -lblas＆amp;＆amp; ./combo.x

我尝试过选项-O2和-O3，它们没有改变任何东西。

我用来衡量时间的C ++代码如下所示。我试图将official example here与一个有意义的大矩阵一起使用。多次这样做，只是为了确保我没有偶然得到任何特殊的随机矩阵。

#include <stdlib.h>
#include <stdio.h>
#include <fstream>
#include <vector>
#include <sys/time.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_matrix.h>
/* Parameters */
#define N 1000
#define REPS 100
#define LDA N
#define TOWERSIZE N

struct TOWER 
{
    gsl_matrix *matrix_random_covariance;

};


int COMPUTE_RANDOM_COVARIANCE(struct TOWER *tower);
int WRITE_RANDOM_COVARIANCE(struct TOWER *tower);

int read_covariance (std::vector<double> & data)
  {
    double tmp;

    std::ifstream fin("random_covariance.data");

    while(fin >> tmp)
    {
        data.push_back(tmp);
    }

    return 0;
}

/* DSYEV prototype */
extern "C"{
void dsyev( char* jobz, char* uplo, int* n, double* a, int* lda,
                double* w, double* work, int* lwork, int* info );
}
/* Auxiliary routines prototypes */
extern "C"{ 
void print_matrix( char* desc, int m, int n, double* a, int lda );
}

/* Main program */
int main() {
        std::vector<double> time_series;
    double time_series_sum =0.0;
    double time_series_average =0.0;

        struct TOWER *tower;
        tower = (struct TOWER *)calloc(1,sizeof(struct TOWER));
            double* work;
    for (uint repj = 0; repj < REPS; repj++)
        {
        COMPUTE_RANDOM_COVARIANCE(tower);
        WRITE_RANDOM_COVARIANCE(tower);
        printf( "-----------------------------------------------> Entry main.\n" );
        /* Locals */
        std::vector<double> data;
        int n = N, lda = LDA, info, lwork;
        double wkopt;
        /* Local arrays */
    double w[N];
    static double a[LDA*N];
        printf( "-----------------------------------------------> Point 0.\n" );
    read_covariance(data);
        printf( "-----------------------------------------------> Point 1.\n" );
        std::copy(data.begin(), data.end(), a);
        printf( "-----------------------------------------------> Point 2.\n" );

    struct timeval tval_before, tval_after, tval_result;\
    gettimeofday(&tval_before, NULL);

    /* Executable statements */
        printf( " DSYEV Example Program Results\n" );
        /* Query and allocate the optimal workspace */
        lwork = -1;
        dsyev( "Vectors", "Upper", &n, a, &lda, w, &wkopt, &lwork, &info );
        printf( "-----------------------------------------------> Point 4.\n" );
        lwork = (int)wkopt;
        work = (double*)malloc( lwork*sizeof(double) );
        /* Solve eigenproblem */
        dsyev( "Vectors", "Upper", &n, a, &lda, w, work, &lwork, &info );
        /* Check for convergence */
        if( info > 0 ) {
                printf( "The algorithm failed to compute eigenvalues.\n" );
                exit( 1 );
        }
            gettimeofday(&tval_after, NULL);
            timersub(&tval_after, &tval_before, &tval_result);
            printf("Time one diag / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec);
                time_series.push_back(tval_result.tv_sec);
        time_series_sum = time_series_sum + tval_result.tv_sec + 0.000001*tval_result.tv_usec;
        }

        time_series_average = time_series_sum/REPS;
        printf("Time Series Average = %e\n", time_series_average);
        printf("Time Series Length = %u\n", REPS);
        /* Print eigenvalues */
        //print_matrix( "Eigenvalues", 1, n, w, 1 ); //nocheat!!
        /* Print eigenvectors */
        //print_matrix( "Eigenvectors (stored columnwise)", n, n, a, lda ); //nocheat!!
        /* Free workspace */
        free( (void*)work );
    free(tower);
        exit( 0 );
} /* End of DSYEV Example */

/* Auxiliary routine: printing a matrix */
void print_matrix( char* desc, int m, int n, double* a, int lda ) {
        int i, j;
        printf( "\n %.2f\n", desc );
        for( i = 0; i < m; i++ ) {
                for( j = 0; j < n; j++ ) printf( " %.2f", a[i+j*lda] );
                printf( "\n" );
        }
}
/* ---  --- */
int COMPUTE_RANDOM_COVARIANCE(struct TOWER *tower) //relying on spline !!
{
    int a,b;
    double aux, correl;
    const gsl_rng_type * T;
    gsl_rng * r;

    T = gsl_rng_default;
    r = gsl_rng_alloc (T);

    struct timeval tv;


    srand((time(0)));                // srand & time are built-in
    unsigned long int s = random();  // gsl_rng_uniform will eventually
                                     // want a non-negative "long" integer
    gsl_rng_env_setup();
    gsl_rng_set(r,s); // seed the random number generator;



    printf("extreme_kappa: building random covariance matrix....\n");
//#pragma omp parallel
//{     
    tower->matrix_random_covariance = gsl_matrix_calloc(TOWERSIZE,TOWERSIZE);

    for(a=0;a<TOWERSIZE;a++)  
    {           

//#pragma omp for       
        for(b=a;b<TOWERSIZE;b++)
        {



            correl = gsl_ran_flat(r,0.0,1.0);
            gsl_matrix_set(tower->matrix_random_covariance,a,b,correl);
            gsl_matrix_set(tower->matrix_random_covariance,b,a,correl);
        }
    }
//} //PARALLEL LOOP!!
    gsl_rng_free(r);
    return(0);
}
/* ---  --- */
/* --- --- */
int WRITE_RANDOM_COVARIANCE(struct TOWER *tower)
{
    int a,b;
    FILE *file;
    char fname[256];
    double aux;

    printf("extreme_kappa: writing random covariances....\n");

    sprintf(fname,"./random_covariance.data");
    file = fopen(fname,"w+");
    if(file == NULL)
    {
        printf("extreme_kappa: error opening file %s\n",fname);
        exit(2);
    }

    for(a=0;a<TOWERSIZE;a++)
    {
        for(b=0;b<TOWERSIZE;b++)
        {
            aux = gsl_matrix_get(tower->matrix_random_covariance,a,b);
            fprintf(file,"%e ",aux);
        }
        fprintf(file,"\n");
    }

    fclose(file);
    return(0);
}
/* --- --- */

我会感激任何提示！

LAPACK在我的系统上运行缓慢吗？如果是这样，为什么？

0 个答案: