Go语言的线性回归库

时间:2013-05-07 14:58:46

标签: statistics go linear-regression

我正在寻找一个使用MLE或LSE实现线性回归的Go库。 有人见过吗?

有这个统计资料库,但它似乎没有我需要的东西: https://github.com/grd/statistics

谢谢!

4 个答案:

答案 0 :(得分:4)

我使用渐变下降实现了以下功能,它只给出了系数,但是它采用了任意数量的解释变量并且相当准确:

package main

import "fmt"

func calc_ols_params(y []float64, x[][]float64, n_iterations int, alpha float64) []float64 {

    thetas := make([]float64, len(x))

    for i := 0; i < n_iterations; i++ {

        my_diffs := calc_diff(thetas, y, x)

        my_grad := calc_gradient(my_diffs, x)

        for j := 0; j < len(my_grad); j++ {
            thetas[j] += alpha * my_grad[j]
        }
    }
    return thetas
}

func calc_diff (thetas []float64, y []float64, x[][]float64) []float64 {
    diffs := make([]float64, len(y))
    for i := 0; i < len(y); i++ {
        prediction := 0.0
        for j := 0; j < len(thetas); j++ {
            prediction += thetas[j] * x[j][i]
        }
        diffs[i] = y[i] - prediction
    }
    return diffs
}

func calc_gradient(diffs[] float64, x[][]float64) []float64 {
    gradient := make([]float64, len(x))
    for i := 0; i < len(diffs); i++ {
        for j := 0; j < len(x); j++ {
            gradient[j] += diffs[i] * x[j][i]
        }
    }
    for i := 0; i < len(x); i++ {
        gradient[i] = gradient[i] / float64(len(diffs))
    }

    return gradient
}

func main(){
    y := []float64 {3,4,5,6,7}
    x := [][]float64 {{1,1,1,1,1}, {4,3,2,1,3}}

    thetas := calc_ols_params(y, x, 100000, 0.001)

    fmt.Println("Thetas : ", thetas)

    y_2 := []float64 {1,2,3,4,3,4,5,4,5,5,4,5,4,5,4,5,6,5,4,5,4,3,4}

    x_2 := [][]float64 {{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
                            {4,2,3,4,5,4,5,6,7,4,8,9,8,8,6,6,5,5,5,5,5,5,5},
                    {4,1,2,3,4,5,6,7,5,8,7,8,7,8,7,8,7,7,7,7,7,6,5},
                    {4,1,2,5,6,7,8,9,7,8,7,8,7,7,7,7,7,7,6,6,4,4,4},}

    thetas_2 := calc_ols_params(y_2, x_2, 100000, 0.001)

    fmt.Println("Thetas_2 : ", thetas_2)

}

结果:

Thetas :  [6.999959251448524 -0.769216974483968]
Thetas_2 :  [1.5694174539341945 -0.06169183063112409 0.2359981255871977 0.2424327101610395]

go playground

我使用python.pandas检查了我的搜索结果,结果非常接近:

In [24]: from pandas.stats.api import ols

In [25]: df = pd.DataFrame(np.array(x).T, columns=['x1','x2','x3','y'])

In [26]: from pandas.stats.api import ols

In [27]: x = [
     [4,2,3,4,5,4,5,6,7,4,8,9,8,8,6,6,5,5,5,5,5,5,5],
     [4,1,2,3,4,5,6,7,5,8,7,8,7,8,7,8,7,7,7,7,7,6,5],
     [4,1,2,5,6,7,8,9,7,8,7,8,7,7,7,7,7,7,6,6,4,4,4]
     ]

In [28]: y = [1,2,3,4,3,4,5,4,5,5,4,5,4,5,4,5,6,5,4,5,4,3,4]

In [29]: x.append(y)

In [30]: df = pd.DataFrame(np.array(x).T, columns=['x1','x2','x3','y'])

In [31]: ols(y=df['y'], x=df[['x1', 'x2', 'x3']])
Out[31]: 

-------------------------Summary of Regression Analysis-------------------------

Formula: Y ~ <x1> + <x2> + <x3> + <intercept>

Number of Observations:         23
Number of Degrees of Freedom:   4

R-squared:         0.5348
Adj R-squared:     0.4614

Rmse:              0.8254

F-stat (3, 19):     7.2813, p-value:     0.0019

Degrees of Freedom: model 3, resid 19

-----------------------Summary of Estimated Coefficients------------------------
      Variable       Coef    Std Err     t-stat    p-value    CI 2.5%   CI 97.5%
--------------------------------------------------------------------------------
            x1    -0.0618     0.1446      -0.43     0.6741    -0.3453     0.2217
            x2     0.2360     0.1487       1.59     0.1290    -0.0554     0.5274
            x3     0.2424     0.1394       1.74     0.0983    -0.0309     0.5156
     intercept     1.5704     0.6331       2.48     0.0226     0.3296     2.8113
---------------------------------End of Summary---------------------------------

In [34]: df_1 = pd.DataFrame(np.array([[3,4,5,6,7], [4,3,2,1,3]]).T, columns=['y', 'x'])

In [35]: df_1
Out[35]: 
   y  x
0  3  4
1  4  3
2  5  2
3  6  1
4  7  3

[5 rows x 2 columns]

In [36]: ols(y=df_1['y'], x=df_1['x'])
Out[36]: 

-------------------------Summary of Regression Analysis-------------------------

Formula: Y ~ <x> + <intercept>

Number of Observations:         5
Number of Degrees of Freedom:   2

R-squared:         0.3077
Adj R-squared:     0.0769

Rmse:              1.5191

F-stat (1, 3):     1.3333, p-value:     0.3318

Degrees of Freedom: model 1, resid 3

-----------------------Summary of Estimated Coefficients------------------------
      Variable       Coef    Std Err     t-stat    p-value    CI 2.5%   CI 97.5%
--------------------------------------------------------------------------------
             x    -0.7692     0.6662      -1.15     0.3318    -2.0749     0.5365
     intercept     7.0000     1.8605       3.76     0.0328     3.3534    10.6466
---------------------------------End of Summary---------------------------------


In [37]: df_1 = pd.DataFrame(np.array([[3,4,5,6,7], [4,3,2,1,3]]).T, columns=['y', 'x'])

In [38]: ols(y=df_1['y'], x=df_1['x'])
Out[38]: 

-------------------------Summary of Regression Analysis-------------------------

Formula: Y ~ <x> + <intercept>

Number of Observations:         5
Number of Degrees of Freedom:   2

R-squared:         0.3077
Adj R-squared:     0.0769

Rmse:              1.5191

F-stat (1, 3):     1.3333, p-value:     0.3318

Degrees of Freedom: model 1, resid 3

-----------------------Summary of Estimated Coefficients------------------------
      Variable       Coef    Std Err     t-stat    p-value    CI 2.5%   CI 97.5%
--------------------------------------------------------------------------------
             x    -0.7692     0.6662      -1.15     0.3318    -2.0749     0.5365
     intercept     7.0000     1.8605       3.76     0.0328     3.3534    10.6466
---------------------------------End of Summary---------------------------------

答案 1 :(得分:3)

实现LSE(最小平方误差)线性回归非常简单。

Here是JavaScript中的一个实现 - 移植到Go应该是微不足道的。


Here是一个(未经测试的)端口:

package main

import "fmt"

type Point struct {
    X float64
    Y float64
}

func linearRegressionLSE(series []Point) []Point {

    q := len(series)

    if q == 0 {
        return make([]Point, 0, 0)
    }

    p := float64(q)

    sum_x, sum_y, sum_xx, sum_xy := 0.0, 0.0, 0.0, 0.0

    for _, p := range series {
        sum_x += p.X
        sum_y += p.Y
        sum_xx += p.X * p.X
        sum_xy += p.X * p.Y
    }

    m := (p*sum_xy - sum_x*sum_y) / (p*sum_xx - sum_x*sum_x)
    b := (sum_y / p) - (m * sum_x / p)

    r := make([]Point, q, q)

    for i, p := range series {
        r[i] = Point{p.X, (p.X*m + b)}
    }

    return r
}

func main() {
    // ...
}

答案 2 :(得分:1)

有一个名为gostat的项目,它有一个贝叶斯软件包,可以进行线性回归。

不幸的是文档有点缺乏,因此您可能必须阅读代码以了解如何使用它。我自己稍微涉足了它,但没有触及贝叶斯包装。

答案 3 :(得分:1)

我的绅士AS75(在线)线性回归算法的端口用Go(golang)编写。它做正常的OLS普通最小二乘回归。在线部分意味着它可以处理无限数据行:如果您习惯于提供(nxp)设计矩阵,这有点不同:您调用Includ()n次,(或者如果您获得更多数据则更多) ,每次给它一个p值的向量。这有效地处理了n变大的情况,你可能不得不从磁盘中传输数据,因为它不会全部适合内存。

https://github.com/glycerine/zettalm