我正在尝试在R中适合Ctree模型。尽管在将数据分为训练和测试时效果很好。但是,当我尝试在另一个会话中保存(saveRDS
)和加载(loadRDS
)模型时,这次仅获取测试数据,准确性降低。我不明白为什么会这样。
示例代码:
features <- c("critical", "BU","DCHAIN_SPEC_STATUS","DPRIO"
,"KEY_MARKET","SALES_DOCUMENT_TYPE"
)
Model.ctree <- ctree(critical ~ .,data=trainData[,features])
saveRDS(Model.ctree,"mod.rds")
rm(list=ls())
Then I try using a different session :-
super_model <- readRDS("mod.rds")
testData <- read.csv("luck.csv")
features <- c("critical", "BU","DCHAIN_SPEC_STATUS","DPRIO"
,"KEY_MARKET","SALES_DOCUMENT_TYPE"
)
predictCtree <- predict(super_model,testData[,features])
predictNode <- predict(super_model, newdata = testData[,features], type="node")
library(ROCR)
pr <- prediction(predictCtree, testData$critical)#ts_label)#
prf <- performance(pr, measure = "tpr", x.measure = "fpr")
plot(prf)
auc <- performance(pr, measure = "auc")
auc <- auc@y.values[[1]]
auc
testData$predictCtree <- predictCtree
testData$TerminalNode <- predictNode
summary(testData$predictCtree)
newData <- testData[order(-testData$predictCtree),]
rows <- nrow(newData)
rank <- seq(from =1, to = rows)
newData$rank <- c(rank)
(intervalLength <- table(findInterval(newData$rank,quantile(newData$rank, seq(0,1, by = 0.01)))))
(hits <- tapply(newData$critical,findInterval(newData$rank,quantile(newData$rank, seq(0,1, by = 0.01))),sum))
(cutoff <- round(tapply(newData$predictCtree,findInterval(newData$rank,quantile(newData$rank, seq(0,1, by = 0.01))),min),3))
(Percentiles <- round(100* (hits/intervalLength),2) / round((sum(newData$critical)/intervalLength),2))
(cumPercentile <- cumsum(round(100* (hits/intervalLength),2) / round((sum(newData$critical)/intervalLength),2)))