VBA中Lasso回归系数的标准误差

时间:2018-06-07 18:14:00

标签: excel-vba regression lasso vba excel

我找到了这个很好的代码(感谢https://quantmacro.wordpress.com/2016/01/03/lasso-regression-in-vba/) 从这个能够找到我的套索回归系数的代码中,我还想提取系数的标准误差(此后它将使我能够计算我的tstat及其p值)

有人可以帮助我将这个初始代码(对于系数)转换为一个代码,它会给我这个系数的标准误差

提前感谢您的帮助

Function Lasso_Regression(Y As Variant, X As Variant, Lambda As Double, Optional Constant As Boolean = True) As Variant

' tolerance for convergence
Dim Tol As Double: Tol = 0.001
' Maximum Iteration
Dim MaxIt As Integer:   MaxIt = 500

Dim NRows As Integer: NRows = X.Rows.Count
Dim NCol As Integer: NCol = X.Columns.Count
Dim Cnst As Integer

If Constant = True Then
    Cnst = 1
Else
    Cnst = 0
End If

ReDim target(NRows, 1) As Double
ReDim NewX(NRows, NCol + Cnst) As Double
Dim ii As Integer:       ii = 1
Dim jj As Integer:       jj = 1

'--create Matrix with intercept term included


For ii = 1 To NRows
    NewX(ii, 1) = 1
    target(ii, 1) = Y(ii)
    For jj = 1 To NCol
        NewX(ii, jj + Cnst) = X(ii, jj)
    Next jj
Next ii

'------precompute factor normalization factor


ReDim norm_factor(NCol + Cnst) As Double
For jj = 1 To (NCol + Cnst)
    norm_factor(jj) = Application.SumProduct(Application.Index(NewX, 0, jj), Application.Index(NewX, 0, jj))
Next jj

'------inititialaistion weights vector Theta

ReDim old_theta(NCol + Cnst) As Double
ReDim new_theta(NCol + Cnst) As Double

For ii = 1 To (NCol + Cnst)
    old_theta(ii) = Rnd - 0.5
    new_theta(ii) = old_theta(ii)
Next ii
ReDim theta_diff(NCol + Cnst) As Double
Dim max_theta_diff As Double: max_theta_diff = 1000000

Dim bb As Integer: bb = 1
'has to be variant due to matrix computaton
Dim rho As Variant

' main loop of the coordinate

Do While max_theta_diff > Tol And bb <= MaxIt
'----cycle through each feature to update theta

    For ii = 1 To (NCol + Cnst)
        ' Set old weight equal to new weight
        old_theta(ii) = new_theta(ii)

        '--------calculate rho x


        With Application
        ReDim model_fit_without_feature(NRows) As Variant
        model_fit_without_feature = myMsubstract(.MMult(NewX, .Transpose(new_theta)), .MMult(.Index(NewX, 0, ii), new_theta(ii)))

        ReDim residuals(NRows) As Variant: residuals = myMsubstract(target, model_fit_without_feature)
        rho = .MMult(.Transpose(.Index(NewX, 0, ii)), residuals)
        End With

        '-----------calculate theta update one at a time

        If Cnst = 1 And ii = 1 Then
            new_theta(ii) = rho(1) / norm_factor(ii)
        ElseIf rho(1) < -0.5 * Lambda Then
            new_theta(ii) = (rho(1) + 0.5 * Lambda) / norm_factor(ii)
        ElseIf rho(1) > 0.5 * Lambda Then
            new_theta(ii) = (rho(1) - 0.5 * Lambda) / norm_factor(ii)
        Else
            new_theta(ii) = 0
        End If

        '-----calculate change in theta coefficient from previous update


        theta_diff(ii) = Abs((old_theta(ii) - new_theta(ii)) / (old_theta(ii) + 0.000001))

    Next ii

    '----- algo continue until max % change in the theta is less than tol

    max_theta_diff = Application.Max(theta_diff)
    bb = bb + 1

Loop

Lasso_Regression = new_theta()

End Function

Function myMsubstract(M As Variant, N As Variant) As Variant

Dim minrow As Integer:  minrow = LBound(M, 1)
Dim maxrow As Integer:  maxrow = UBound(M, 1)
Dim mincol As Integer:  mincol = LBound(M, 2)
Dim maxcol As Integer:  maxcol = UBound(M, 2)

Dim output() As Variant:    ReDim output(minrow To maxrow, mincol To maxcol)
Dim ii As Integer
Dim jj As Integer

For ii = minrow To maxrow
    For jj = mincol To maxcol
        output(ii, jj) = M(ii, jj) - N(ii, jj)
    Next jj
Next ii

myMsubstract = output

End Function

0 个答案:

没有答案