我正在尝试优化代码。我需要在C中实现PCA,其结果与Matlab中的PCA相当。我使用链接pca从gsl库实现了PCA。以下是代码:
#include <assert.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_statistics.h>
#include <gsl/gsl_eigen.h>
#include <gsl/gsl_blas.h>
gsl_matrix* pca(const gsl_matrix* data, unsigned int L);
int main() {
int k = 0, i = 0,j,rows,cols,count = 1,r,c;
gsl_matrix *red_data,*m;
rows=5;cols=5;
m = gsl_matrix_alloc(rows,cols); /* create a matrix */
/* initialise a matrix */
for (i=0;i<rows;i++)
{
for (j=0;j<cols;j++)
{
gsl_matrix_set(m,i,j,count);
count++;
}
}
printf("Matrix m\n");
for (i=0;i<rows;i++)
{
for (j=0;j<cols;j++)
{
printf("%f ",gsl_matrix_get(m,i,j));
}
printf("\n");
}
red_data = pca(m,4);
r = red_data->size1;
c = red_data->size2;
printf("rows = %d, columns = %d \n",r,c);
for (i = 0; i<r; i++)
{
for(k = 0; k < c; k++)
{
printf("%lf ",gsl_matrix_get(red_data,i,k));
}
printf("\n");
}
return 0;
}
gsl_matrix* pca(const gsl_matrix* data, unsigned int L)
{
/*
@param data - matrix of data vectors, MxN matrix, each column is a data vector, M - dimension, N - data vector count
@param L - dimension reduction
*/
assert(data != NULL);
assert(L > 0 && L < data->size2);
unsigned int i;
unsigned int rows = data->size1;
unsigned int cols = data->size2;
gsl_vector* mean = gsl_vector_alloc(rows);
for(i = 0; i < rows; i++) {
gsl_vector_set(mean, i, gsl_stats_mean(data->data + i * cols, 1, cols));
}
// Get mean-substracted data into matrix mean_substracted_data.
gsl_matrix* mean_substracted_data = gsl_matrix_alloc(rows, cols);
gsl_matrix_memcpy(mean_substracted_data, data);
for(i = 0; i < cols; i++) {
gsl_vector_view mean_substracted_point_view = gsl_matrix_column(mean_substracted_data, i);
gsl_vector_sub(&mean_substracted_point_view.vector, mean);
}
gsl_vector_free(mean);
// Compute Covariance matrix
gsl_matrix* covariance_matrix = gsl_matrix_alloc(rows, rows);
gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0 / (double)(cols - 1), mean_substracted_data, mean_substracted_data, 0.0, covariance_matrix);
gsl_matrix_free(mean_substracted_data);
// Get eigenvectors, sort by eigenvalue.
gsl_vector* eigenvalues = gsl_vector_alloc(rows);
gsl_matrix* eigenvectors = gsl_matrix_alloc(rows, rows);
gsl_eigen_symmv_workspace* workspace = gsl_eigen_symmv_alloc(rows);
gsl_eigen_symmv(covariance_matrix, eigenvalues, eigenvectors, workspace);
gsl_eigen_symmv_free(workspace);
gsl_matrix_free(covariance_matrix);
// Sort the eigenvectors
gsl_eigen_symmv_sort(eigenvalues, eigenvectors, GSL_EIGEN_SORT_ABS_DESC);
gsl_vector_free(eigenvalues);
// Project the original dataset
gsl_matrix* result = gsl_matrix_alloc(L, cols);
gsl_matrix_view L_eigenvectors = gsl_matrix_submatrix(eigenvectors, 0, 0, rows, L);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, &L_eigenvectors.matrix, data, 0.0, result);
gsl_matrix_free(eigenvectors);
// Result is n LxN matrix, each column is the original data vector with reduced dimension from M to L
return result;
}
我使用了以下命令:
$ gcc pca.c -o pca -lgsl -lgslcblas -lm
$ ./pca
我给出了5 X 5矩阵,没有:主成分= 4。 我将结果与matlab进行了比较。它们不匹配。我该怎么做以获得可比较的结果?或者是否有任何PCA代码在C(不一定是gsl库)中实现匹配matlab结果。谢谢你。