我想通过自动算法减少线性模型,该算法的规则基于每个系数的t检验的p值。当我包含分类变量及其与其他预测变量的交互时,就会出现问题。您可以在下面找到有关mtcars
数据集的修改版本的两个示例。两者都使用我的算法:第一个不包括分类变量,一切都很顺利。第二个将变量am
转换为因子,算法失败。它进入了一个无限循环。
select_variables.lm()函数:
select_variables.lm = function( model, data, pval = 0.05 ) {
cat( "---\n" )
message( "pval threshold set to: ", pval )
cat( "---\n" )
max = max( summary( model )$coefficients[
2:nrow( summary( model )$coefficients ), 4 ] )
if ( pval > max ) {
stop( "pval is too high to start" )
}
to_delete = names( which.max( summary( model )$coefficients[
2:nrow( summary( model )$coefficients ), 4 ] ) )
mod = update( model, paste0( "~ . - ", to_delete ), evaluate = FALSE )
mod = eval.parent( mod )
while ( max > pval ) {
max = max( summary( mod )$coefficients[
2:nrow( summary( mod )$coefficients ), 4 ] )
to_delete = names( which.max( summary( mod )$coefficients[
2:nrow( summary( mod )$coefficients ), 4 ] ) )
mod = update( mod, paste0( "~ . - ", to_delete ), evaluate = FALSE )
mod = eval.parent( mod )
cat( "deleting", to_delete, "with a pval of:", max, "\n" )
}
return( invisible( mod ) )
}
示例1:
# saving the data
data_no_factors = mtcars
# building a silly model
model1 = lm( hp ~ qsec + cyl + mpg + disp + drat + wt + qsec + vs
+ am + gear + carb + am:qsec, data = data_no_factors )
summary( model1 )
# reducing the silly model
model1_reduced = select_variables.lm( model1, data = data_no_factors )
summary( model1_reduced )
示例2:
# saving the data and converting am to a factor
data_factor = mtcars
data_factor$am = as.factor( data_factor$am )
levels( data_factor$am ) = c( "no", "yes" )
# building another silly model
model2 = lm( hp ~ qsec + cyl + mpg + disp + drat + wt + qsec + vs
+ am + gear + carb + am:qsec, data = data_factor )
summary( model2 )
# reducing the new silly model: this will cause an infinite loop
model2_reduced = select_variables.lm( model2, data = data_factors )
summary( model2_reduced )
希望提出的问题很好,任何提示都将受到赞赏。 感谢