我正试图在零膨胀负二项式回归上使用插入符号的递归特征消除。我根据github教程创建了一组自定义函数:http://topepo.github.io/caret/rfe.html
以下是我的自定义功能:
rfZeroinfl <- list(
# Explicit default to add print statement
summary = function(data, lev=NULL, model=NULL){
print("SUMMARY FUNCTION CALLED")
if (is.character(data$obs))
data$obs <- factor(data$obs, levels = lev)
postResample(data[, "pred"], data[, "obs"])
},
fit = function(x, y, first, last, ...){
print("FIT FUNCTION CALLED")
library(pscl)
tmp <- if (is.data.frame(x))
x
else as.data.frame(x)
tmp$y <- y
zeroinfl(y ~ ., data = tmp, dist = "negbin", EM = TRUE)
#zeroinfl(y ~ ., data = tmp, dist = "negbin")
},
pred = function(object, x) {
print("PRED FUNCTION CALLED")
predict(object, x)
},
rank = function(object, x, y) {
print("RANK FUNCTION CALLED")
coefs <- object$coefficients
coefs.both <- c(coefs$count, coefs$zero)
# Find smallest coefficient for either count or zero process
# for every variable in the analysis
print(paste("NUM COEFS IS: ", toString(length(coefs.both))))
coefs_agg <- aggregate(coefs,
by = list(names(t)),
fun = function(x_coefs){
x_out <- min(abs(x_coefs))
return(x_out)
})
colnames(coefs_agg) <- c("var", "coeffs")
coefs_sort <- coefs_agg[order(-coeffs_agg),]
print(head(coefs_sort))
coefs_sort
},
# Explicit default to add print statement
selectSize = function(x, metric, maximize){
print ("SIZE FUNCTION CALLED")
best <- if (maximize)
which.max(x[, metric])
else which.min(x[, metric])
min(x[best, "Variables"])
},
# Explicit default to add print statement
selectVar = function(y, size){
print ("VAR FUNCTION CALLED")
finalImp <- ddply(y[, c("Overall", "var")], .(var),
function(x) mean(x$Overall, na.rm = TRUE))
names(finalImp)[2] <- "Overall"
finalImp <- finalImp[order(finalImp$Overall, decreasing = TRUE),
]
as.character(finalImp$var[1:size])
})
以下是我打电话给他们的方式:
zinb <- read.csv("http://www.ats.ucla.edu/stat/data/fish.csv")
z_x <- zinb[,c("nofish", "livebait", "camper", "persons")]
z_y <- zinb$count
z_ctrl <- rfeControl(functions = rfZeroinfl, method="repeatedcv", repeats=5, verbose=TRUE)
z_Profile <- rfe(z_x, z_y, sizes=c(3, 4), rfeControl=z_ctrl)
以下是最终输出和错误消息:
[1] "FIT FUNCTION CALLED"
+(rfe) fit Fold10.Rep5 size: 4
[1] "FIT FUNCTION CALLED"
-(rfe) fit Fold10.Rep5 size: 4
[1] "PRED FUNCTION CALLED"
+(rfe) imp Fold10.Rep5
[1] "RANK FUNCTION CALLED"
[1] "NUM COEFS IS: 10"
Error in { :
task 1 failed - "system is computationally singular: reciprocal condition number = 8.59945e-18"
输出似乎暗示某些矩阵在某处是不可逆的,但我无法弄清楚如何达到这一点。有没有其他人有尝试将这些东西放在一起的经验?提前感谢任何建议或想法。