答案 0 :(得分:3)
如果没有回应,可能没有支持:)。原因很可能是典型的Lasso等优先于前进/后退选择。你支持正规化,例如在Regression.jl。
但是,编写自己的逐步选择非常简单:
using DataFrames
using RDatasets
using StatsBase
using GLM
function compose(lhs::Symbol, rhs::AbstractVector{Symbol})
Formula(lhs, Expr(:call, :+, [1;rhs]...))
end
function step(df, lhs::Symbol, rhs::AbstractVector{Symbol},
forward::Bool, use_aic::Bool)
options = forward ? setdiff(names(df), [lhs; rhs]) : rhs
fun = use_aic ? aic : bic
isempty(options) && return (rhs, false)
best_fun = fun(lm(compose(lhs, rhs), df))
improved = false
best_rhs = rhs
for opt in options
this_rhs = forward ? [rhs; opt] : setdiff(rhs, [opt])
this_fun = fun(lm(compose(lhs, this_rhs), df))
if this_fun < best_fun
best_fun = this_fun
best_rhs = this_rhs
improved = true
end
end
(best_rhs, improved)
end
function stepwise(df, lhs::Symbol, forward::Bool, use_aic::Bool)
rhs = forward ? Symbol[] : setdiff(names(df), [lhs])
while true
rhs, improved = step(df, lhs, rhs, forward, use_aic)
improved || return lm(compose(lhs, sort(rhs)), df)
end
end
上面两个关键参数是forward
(我们是向前还是向后选择)和use_aic
(我们使用AIC还是BIC)。当然,这一切都可以很容易地改变。实现没有针对速度进行优化,但在简单的情况下应该足够好。
以下是如何使用它:
df = dataset("datasets", "swiss")[2:end]
stepwise(df, :Fertility, true, false)
stepwise(df, :Fertility, true, true)
stepwise(df, :Fertility, false, true)
stepwise(df, :Fertility, false, false)
(所有选项都返回相同的模型并与R中的参考示例一致)