将函数应用于列表并输出结果

时间:2017-02-22 15:41:22

标签: r

我有一个大型数据库,我按年分开,并在每年的火车和测试中创建

#split the dataset into a list of datasets 
Y <- split(dat_all, dat_all$year)
#create a train and test dataset for all years
#takes Y is inp
create_sets <- function(x){
    train_set <- sample(2, nrow(x), replace = TRUE, prob = c(0.7, 0.3)) 
    train <- x[train_set == 1, ]
    test <- x[train_set == 2, ]
    assign('x', list(train = train, test = test))
}
Ylist <- lapply(Y, create_sets)

要拨打每个项目,请使用Ylist $&#39; 2016&#39; $ train

我已经制作了一个准确率函数,我可以单独运行每个列表,但我正在寻找一种方法来一次性完成以节省大量代码(16年的数据)

以下是我目前如何创建一年的准确率

val_train<-Ylist$'2016'$train
val_train$pred<-predict(modf,newdata=Ylist$'2016'$train)
val_train$probs<-exp(val_train$pred)/(1+exp(val_train$pred))
x<-data.frame(rcorr.cens(val_train$probs, val_train$default_flag))
train_AR<-x[2,1]
train_AR

modfull <-ModFit(test)
val_test<-test
val_test$pred<-predict(modf,newdata=test)
val_test$probs<-exp(val_test$pred)/(1+exp(val_test$pred))
x<-data.frame(rcorr.cens(val_test$probs, val_test$default_flag))
test_AR<-x[2,1]
test_AR

AR_Logistic1<-c(train_AR,test_AR,)
AR_Logistic2<-c(train_AR,test_AR) #just in to see if table works
AccuracyRatio<-rbind(AR_Logistic1,AR_Logistic2)
colnames(AccuracyRatio)<-c("Train","Test","All")
AccuracyRatio

只是为了澄清我试图通过我的准确率来浏览整个列表,然后输出每年的AR进行列车和测试。

非常感谢任何帮助

1 个答案:

答案 0 :(得分:0)

使用lapply并将AR计算包装在函数中,您可以按如下方式汇总输出。 没有样本数据,我无法测试,但如果您遇到任何错误,请告知我们。

fn_Calc_AR <- function(yearDat = listInput)  {

#yearDat <== Ylist$'2016'

trainDat <- yearDat$train
testDat <- yearDat$test

val_train<- trainDat
val_train$pred<-predict(modf,newdata= trainDat)
val_train$probs<-exp(val_train$pred)/(1+exp(val_train$pred))
x<-data.frame(rcorr.cens(val_train$probs, val_train$default_flag))
train_AR<-x[2,1]
#train_AR

modfull <-ModFit(testDat)
val_test<-testDat
val_test$pred<-predict(modf,newdata=testDat)
val_test$probs<-exp(val_test$pred)/(1+exp(val_test$pred))
x<-data.frame(rcorr.cens(val_test$probs, val_test$default_flag))
test_AR<-x[2,1]
#test_AR

AR_Logistic1<-c(train_AR,test_AR)   # removed extraneous comma, previous input c(train_AR,test_AR,)
AR_Logistic2<-c(train_AR,test_AR)   #just in to see if table works
AccuracyRatio<-rbind(AR_Logistic1,AR_Logistic2)
colnames(AccuracyRatio)<-c("Train","Test","All")

#confirm yearName is being created 
try(yearName <- head(names(x),1)) #retain only year

if(length(yearName) > 0L) {
 AR_DF <- data.frame(yearName = yearName , AccuracyRatio,stringsAsFactors=FALSE)
}else{
 AR_DF <- AccuracyRatio
}

return(AR_DF)

}

汇总输出:

AR_Summary = do.call(rbind,lapply(Ylist,fn_Calc_AR))

汇总数据集:

aggregateTrain  = do.call(rbind,lapply(Ylist,function(x) x$train))
aggregateTest  = do.call(rbind,lapply(Ylist,function(x) x$test))

aggregateList = list(train = aggregateTrain,test = aggregateTest)

AR_AggregateSummary = do.call(rbind,lapply(aggregateList,function(x) fn_Calc_AR(x) ))