用C语言实现PCA

时间:2016-11-10 15:06:36

标签: c matlab pca

我正在尝试优化代码。我需要在C中实现PCA,其结果与Matlab中的PCA相当。我使用链接pca从gsl库实现了PCA。以下是代码:

#include <assert.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_statistics.h>
#include <gsl/gsl_eigen.h>
#include <gsl/gsl_blas.h>
gsl_matrix* pca(const gsl_matrix* data, unsigned int L);
int main() {

    int k = 0, i = 0,j,rows,cols,count = 1,r,c;
    gsl_matrix *red_data,*m;
        rows=5;cols=5;
        m = gsl_matrix_alloc(rows,cols); /* create a matrix */

  /* initialise a matrix */
  for (i=0;i<rows;i++)
    {
      for (j=0;j<cols;j++)
    {
      gsl_matrix_set(m,i,j,count);
          count++;
    }

    }


  printf("Matrix m\n");
  for (i=0;i<rows;i++)
    {
      for (j=0;j<cols;j++)
    {
      printf("%f ",gsl_matrix_get(m,i,j));
    }
      printf("\n");
    }



    red_data = pca(m,4);
        r = red_data->size1;
        c = red_data->size2;
        printf("rows = %d, columns = %d \n",r,c); 
    for (i = 0; i<r; i++)
         {
             for(k = 0; k < c; k++)
              {
                 printf("%lf  ",gsl_matrix_get(red_data,i,k));
               }
                 printf("\n");  
          } 
    return 0;
}


gsl_matrix* pca(const gsl_matrix* data, unsigned int L)
{
    /*
    @param data - matrix of data vectors, MxN matrix, each column is a data vector, M - dimension, N - data vector count
    @param L - dimension reduction
    */
    assert(data != NULL);
    assert(L > 0 && L < data->size2);
    unsigned int i;
    unsigned int rows = data->size1;
    unsigned int cols = data->size2;
    gsl_vector* mean = gsl_vector_alloc(rows);

    for(i = 0; i < rows; i++) {
        gsl_vector_set(mean, i, gsl_stats_mean(data->data + i * cols, 1, cols));
    }

    // Get mean-substracted data into matrix mean_substracted_data.
    gsl_matrix* mean_substracted_data = gsl_matrix_alloc(rows, cols);
    gsl_matrix_memcpy(mean_substracted_data, data);
    for(i = 0; i < cols; i++) {
        gsl_vector_view mean_substracted_point_view = gsl_matrix_column(mean_substracted_data, i);
        gsl_vector_sub(&mean_substracted_point_view.vector, mean);
    }
    gsl_vector_free(mean);

    // Compute Covariance matrix
    gsl_matrix* covariance_matrix = gsl_matrix_alloc(rows, rows);
    gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0 / (double)(cols - 1), mean_substracted_data, mean_substracted_data, 0.0, covariance_matrix);
    gsl_matrix_free(mean_substracted_data);

    // Get eigenvectors, sort by eigenvalue.
    gsl_vector* eigenvalues = gsl_vector_alloc(rows);
    gsl_matrix* eigenvectors = gsl_matrix_alloc(rows, rows);
    gsl_eigen_symmv_workspace* workspace = gsl_eigen_symmv_alloc(rows);
    gsl_eigen_symmv(covariance_matrix, eigenvalues, eigenvectors, workspace);
    gsl_eigen_symmv_free(workspace);
    gsl_matrix_free(covariance_matrix);

    // Sort the eigenvectors
    gsl_eigen_symmv_sort(eigenvalues, eigenvectors, GSL_EIGEN_SORT_ABS_DESC);
    gsl_vector_free(eigenvalues);

    // Project the original dataset
    gsl_matrix* result = gsl_matrix_alloc(L, cols);
    gsl_matrix_view L_eigenvectors = gsl_matrix_submatrix(eigenvectors, 0, 0, rows, L);
    gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, &L_eigenvectors.matrix, data, 0.0, result);
    gsl_matrix_free(eigenvectors);

    // Result is n LxN matrix, each column is the original data vector with reduced dimension from M to L
    return result;
}  

我使用了以下命令:

$ gcc pca.c -o pca -lgsl -lgslcblas -lm
$ ./pca

我给出了5 X 5矩阵,没有:主成分= 4。 我将结果与matlab进行了比较。它们不匹配。我该怎么做以获得可比较的结果?或者是否有任何PCA代码在C(不一定是gsl库)中实现匹配matlab结果。谢谢你。

0 个答案:

没有答案