我有一个大型数据库,我按年分开,并在每年的火车和测试中创建
#split the dataset into a list of datasets
Y <- split(dat_all, dat_all$year)
#create a train and test dataset for all years
#takes Y is inp
create_sets <- function(x){
train_set <- sample(2, nrow(x), replace = TRUE, prob = c(0.7, 0.3))
train <- x[train_set == 1, ]
test <- x[train_set == 2, ]
assign('x', list(train = train, test = test))
}
Ylist <- lapply(Y, create_sets)
要拨打每个项目,请使用Ylist $&#39; 2016&#39; $ train
我已经制作了一个准确率函数,我可以单独运行每个列表,但我正在寻找一种方法来一次性完成以节省大量代码(16年的数据)
以下是我目前如何创建一年的准确率
val_train<-Ylist$'2016'$train
val_train$pred<-predict(modf,newdata=Ylist$'2016'$train)
val_train$probs<-exp(val_train$pred)/(1+exp(val_train$pred))
x<-data.frame(rcorr.cens(val_train$probs, val_train$default_flag))
train_AR<-x[2,1]
train_AR
modfull <-ModFit(test)
val_test<-test
val_test$pred<-predict(modf,newdata=test)
val_test$probs<-exp(val_test$pred)/(1+exp(val_test$pred))
x<-data.frame(rcorr.cens(val_test$probs, val_test$default_flag))
test_AR<-x[2,1]
test_AR
AR_Logistic1<-c(train_AR,test_AR,)
AR_Logistic2<-c(train_AR,test_AR) #just in to see if table works
AccuracyRatio<-rbind(AR_Logistic1,AR_Logistic2)
colnames(AccuracyRatio)<-c("Train","Test","All")
AccuracyRatio
只是为了澄清我试图通过我的准确率来浏览整个列表,然后输出每年的AR进行列车和测试。
非常感谢任何帮助
答案 0 :(得分:0)
使用lapply
并将AR计算包装在函数中,您可以按如下方式汇总输出。
没有样本数据,我无法测试,但如果您遇到任何错误,请告知我们。
fn_Calc_AR <- function(yearDat = listInput) {
#yearDat <== Ylist$'2016'
trainDat <- yearDat$train
testDat <- yearDat$test
val_train<- trainDat
val_train$pred<-predict(modf,newdata= trainDat)
val_train$probs<-exp(val_train$pred)/(1+exp(val_train$pred))
x<-data.frame(rcorr.cens(val_train$probs, val_train$default_flag))
train_AR<-x[2,1]
#train_AR
modfull <-ModFit(testDat)
val_test<-testDat
val_test$pred<-predict(modf,newdata=testDat)
val_test$probs<-exp(val_test$pred)/(1+exp(val_test$pred))
x<-data.frame(rcorr.cens(val_test$probs, val_test$default_flag))
test_AR<-x[2,1]
#test_AR
AR_Logistic1<-c(train_AR,test_AR) # removed extraneous comma, previous input c(train_AR,test_AR,)
AR_Logistic2<-c(train_AR,test_AR) #just in to see if table works
AccuracyRatio<-rbind(AR_Logistic1,AR_Logistic2)
colnames(AccuracyRatio)<-c("Train","Test","All")
#confirm yearName is being created
try(yearName <- head(names(x),1)) #retain only year
if(length(yearName) > 0L) {
AR_DF <- data.frame(yearName = yearName , AccuracyRatio,stringsAsFactors=FALSE)
}else{
AR_DF <- AccuracyRatio
}
return(AR_DF)
}
汇总输出:
AR_Summary = do.call(rbind,lapply(Ylist,fn_Calc_AR))
汇总数据集:
aggregateTrain = do.call(rbind,lapply(Ylist,function(x) x$train))
aggregateTest = do.call(rbind,lapply(Ylist,function(x) x$test))
aggregateList = list(train = aggregateTrain,test = aggregateTest)
AR_AggregateSummary = do.call(rbind,lapply(aggregateList,function(x) fn_Calc_AR(x) ))