如何使用gsl计算多项式回归数据点?

时间:2016-04-09 21:24:39

标签: c polynomial-math gsl

(免责声明:我对数学非常糟糕,并且来自JavaScript,所以我为任何不准确之处道歉,并会尽力纠正它们。)

Rosetta代码上的example显示了如何使用gsl计算系数。这是代码:

polifitgsl.h:

$('.addName1b').click(function() {
    var $this = $(this);
    $this.toggleClass('addName1b');
    if ($this.hasClass('addName1b')) {
        $this.text('Add name');
    } else {
        $this.text('Change name');
    }
    $('.addName1').toggle();
    $(/*TEXT FROM INPUT*/).appendTo(".p1");
});

polifitgsl.cpp:

#ifndef _POLIFITGSL_H
#define _POLIFITGSL_H
#include <gsl/gsl_multifit.h>
#include <stdbool.h>
#include <math.h>
bool polynomialfit(int obs, int degree, 
           double *dx, double *dy, double *store); /* n, p */
#endif

main.cpp(注意我用自己的x替换了x的样本编号):

#include "polifitgsl.h"

bool polynomialfit(int obs, int degree, 
           double *dx, double *dy, double *store) /* n, p */
{
  gsl_multifit_linear_workspace *ws;
  gsl_matrix *cov, *X;
  gsl_vector *y, *c;
  double chisq;

  int i, j;

  X = gsl_matrix_alloc(obs, degree);
  y = gsl_vector_alloc(obs);
  c = gsl_vector_alloc(degree);
  cov = gsl_matrix_alloc(degree, degree);

  for(i=0; i < obs; i++) {
    for(j=0; j < degree; j++) {
      gsl_matrix_set(X, i, j, pow(dx[i], j));
    }
    gsl_vector_set(y, i, dy[i]);
  }

  ws = gsl_multifit_linear_alloc(obs, degree);
  gsl_multifit_linear(X, y, c, cov, &chisq, ws);

  /* store result ... */
  for(i=0; i < degree; i++)
  {
    store[i] = gsl_vector_get(c, i);
  }

  gsl_multifit_linear_free(ws);
  gsl_matrix_free(X);
  gsl_matrix_free(cov);
  gsl_vector_free(y);
  gsl_vector_free(c);
  return true; /* we do not "analyse" the result (cov matrix mainly)
          to know if the fit is "good" */
}

这是输出:

#include <stdio.h>

#include "polifitgsl.h"

#define NP 11
double x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = {98.02, 98.01, 98.01, 98.02, 97.98, 97.97, 97.96, 97.94, 97.96, 97.96, 97.97, 97.97, 97.94, 97.94, 97.94, 97.92, 97.96, 97.9, 97.85, 97.9};

#define DEGREE 3
double coeff[DEGREE];

int main()
{
  int i;

  polynomialfit(NP, DEGREE, x, y, coeff);
  for(i=0; i < DEGREE; i++) {
    printf("%lf\n", coeff[i]);
  }
  return 0;
}

这样就给了我系数。但我真正想要的是实际的拟合点。在JavaScript中,我使用regression package来计算点数:

98.030909
-0.016182
0.000909

产生:

var regression = require('regression');

var calculateRegression = function(values, degree) {
    var data = [];
    var regressionOutput;
    var valuesCount = values.length;
    var i = 0;

    // Format the data in a way the regression library expects.
    for (i = 0; i < valuesCount; i++) {
        data[i] = [i, values[i]];
    }

    // Use the library to calculate the regression.
    regressionOutput = regression('polynomial', data, degree);

    return regressionOutput;
};

var y = [98.02, 98.01, 98.01, 98.02, 97.98, 97.97, 97.96, 97.94, 97.96, 97.96, 97.97, 97.97, 97.94, 97.94, 97.94, 97.92, 97.96, 97.9, 97.85, 97.9];

console.log(calculateRegression(y, 3));

(注意JavaScript中存在浮点问题,因此数字不完全准确。)

{ equation: [ 98.02987916431594, -0.017378390369880512, 0.0015748071645344357, -0.00005721503635571101 ], points: [ [ 0, 98.02987916431594 ], [ 1, 98.01401836607424 ], [ 2, 98.00096389194348 ], [ 3, 97.9903724517055 ], [ 4, 97.98190075514219 ], [ 5, 97.97520551203543 ], [ 6, 97.96994343216707 ], [ 7, 97.96577122531896 ], [ 8, 97.96234560127297 ], [ 9, 97.959323269811 ], [ 10, 97.95636094071487 ], [ 11, 97.95311532376647 ], [ 12, 97.94924312874768 ], [ 13, 97.94440106544033 ], [ 14, 97.93824584362629 ], [ 15, 97.93043417308745 ], [ 16, 97.92062276360569 ], [ 17, 97.90846832496283 ], [ 18, 97.89362756694074 ], [ 19, 97.87575719932133 ] ], string: 'y = 0x^3 + 0x^2 + -0.02x + 98.03' } 这是我想要使用gsl生成的内容。有没有办法可以做到这一点?

1 个答案:

答案 0 :(得分:1)

我遇到了同样的问题,并采取了上述答案并添加了一个直接计算的解决方案(没有gsl)取自http://www.cplusplus.com/forum/general/181580/

下面,您将找到一个独立的测试程序,其中包含基于gsl和直接计算的解决方案。

我已经完成了一些分析运行,直接计算的性能在我的系统上是令人印象深刻的高65倍,对于相应功能的1000次调用,5.22s与0.08s。

作为旁注,直接计算使用Cramer的规则,因此您应该注意病态条件数据。通常情况下我会避免使用Cramer,但是对于3x3系统使用全线性系统求解器对我来说似乎有点过分了。

/*
* =====================================================================================
*
*       Filename:  polyfit.cpp
*
*    Description:  Least squares fit of second order polynomials 
*                  Test program using gsl and direct calculation
*        Version:  1.0
*        Created:  2017-07-17 09:32:55
*       Compiler:  gcc
*
*         Author:  Bernhard Brunner, brb_blog@epr.ch
*     References:  This code was merged, adapted and optimized from these two sources:
*                  http://www.cplusplus.com/forum/general/181580/
*                  https://stackoverflow.com/questions/36522882/how-can-i-use-gsl-to-calculate-polynomial-regression-data-points
*                  http://brb.epr.ch/blog/blog:least_squares_regression_of_parabola
*          Build:  compile and link using 
*                  g++    -c -o polifitgsl.o polifitgsl.cpp
*                  gcc  polifitgsl.o -lgsl -lm -lblas -o polifitgsl
* 
*      Profiling:
*                  valgrind --tool=callgrind ./polifitgsl
*                  kcachegrind
*                  
*                  polynomialfit        takes 5.22s for 1000 calls
*                  findQuadCoefficients takes 0.08s for 1000 calls
*                   65x faster
* =====================================================================================
*/

#include <stdio.h>
#include <gsl/gsl_multifit.h>
#include <stdbool.h>
#include <math.h>

bool polynomialfit(int obs, int degree, 
        double *dx, double *dy, double *store); /* n, p */
double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
            10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
            97.97, 97.96, 97.94, 97.96, 97.96,
            97.97, 97.97, 97.94, 97.94, 97.94,
            97.92, 97.96, 97.9,  97.85, 97.9 };

#define NP (sizeof(x)/sizeof(double)) // 20

#define DEGREE 3
double coeff[DEGREE];

bool findQuadCoefficients(double timeArray[], double valueArray[], double *coef, double &critPoint, int PointsNum){
    const double S00=PointsNum;//points number
    double S40=0, S10=0, S20=0, S30=0, S01=0, S11=0, S21 = 0;
//    const double MINvalue = valueArray[0];
//    const double MINtime = timeArray[0];
    for (int i=0; i<PointsNum; i++ ){
        double value = valueArray[i]; //  - MINvalue); //normalizing
//      cout << "i=" << i << " index=" << index << " value=" << value << endl;

        int index = timeArray[i]; //  - MINtime;
        int index2 = index * index;
        int index3 = index2 * index;
        int index4 = index3 * index;

        S40+= index4;
        S30+= index3; 
        S20+= index2;
        S10+= index;

        S01 += value;
        S11 += value*index;
        S21 += value*index2;
    }

    double S20squared = S20*S20;

    //minors M_ij=M_ji
    double M11 = S20*S00 - S10*S10;
    double M21 = S30*S00 - S20*S10;
    double M22 = S40*S00 - S20squared;
    double M31 = S30*S10 - S20squared;

    double M32 = S40*S10 - S20*S30;
//  double M33 = S40*S20 - pow(S30,2);

    double discriminant = S40*M11 - S30*M21 + S20*M31;
//    printf("discriminant :%lf\n", discriminant);
    if (abs(discriminant) < .00000000001) return  false;

    double Da = S21*M11
               -S11*M21
               +S01*M31;
    coef[2] = Da/discriminant;
//  cout << "discriminant=" << discriminant;
//  cout << " Da=" << Da;

    double Db = -S21*M21
                +S11*M22
                -S01*M32;
    coef[1] = Db/discriminant;
//  cout << " Db=" << Db << endl;

    double Dc =   S40*(S20*S01 - S10*S11) 
                - S30*(S30*S01 - S10*S21) 
                + S20*(S30*S11 - S20*S21);
    coef[0] = Dc/discriminant;
//    printf("c=%lf\n", c);

    critPoint = -Db/(2*Da); // + MINtime; //-b/(2*a)= -Db/discriminant / (2*(Da/discriminant)) = -Db/(2*Da);

    return true;
}

bool polynomialfit(int obs, int degree, 
        double *dx, double *dy, double *store) /* n, p */
{
gsl_multifit_linear_workspace *ws;
gsl_matrix *cov, *X;
gsl_vector *y, *c;
double chisq;

int i, j;

X = gsl_matrix_alloc(obs, degree);
y = gsl_vector_alloc(obs);
c = gsl_vector_alloc(degree);
cov = gsl_matrix_alloc(degree, degree);

for(i=0; i < obs; i++) {
    for(j=0; j < degree; j++) {
        gsl_matrix_set(X, i, j, pow(dx[i], j));
    }
    gsl_vector_set(y, i, dy[i]);
}

ws = gsl_multifit_linear_alloc(obs, degree);
gsl_multifit_linear(X, y, c, cov, &chisq, ws);

/* store result ... */
for(i=0; i < degree; i++) {
    store[i] = gsl_vector_get(c, i);
}

gsl_multifit_linear_free(ws);
gsl_matrix_free(X);
gsl_matrix_free(cov);
gsl_vector_free(y);
gsl_vector_free(c);
return true; /* we do not "analyse" the result (cov matrix mainly)
        to know if the fit is "good" */
}

void testcoeff(double *coeff)
{
    printf ("\n polynomial coefficients\n");
    for (int i = 0; i < DEGREE; i++) {
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    }
    putchar ('\n');

    printf (" computed values:\n\n   x, yi, yip\n");
    for (unsigned i = 0; i < NP; i++) {
        printf ("%2u,%.7lf,%.7lf\n", i, 
                y[i], 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    }
    putchar ('\n');
}

int main (void)
{
    #define ITER 1000
    for (int i=0; i< ITER; i++) {
        polynomialfit (NP, DEGREE, x, y, coeff);
    }
    testcoeff(coeff);

    double sx; 
    for (int i=0; i< ITER; i++) {
        findQuadCoefficients(x, y, coeff, sx, NP);
    }
    printf("critical point %lf\n", sx);
    testcoeff(coeff);
    return 0;
}

参考文献: