任何帮助表示赞赏。已经好几个星期了。 :(
install.packages("klaR", dependencies=TRUE)
library(klaR)
install.packages("caret", dependencies=TRUE)
library(caret)
install.packages("e1071", dependencies=TRUE)
library(e1071)
install.packages("gmodels", dependencies=TRUE)
library(gmodels)
install.packages("gbm", dependencies=TRUE)
library(gbm)
install.packages("foreach", dependencies=TRUE)
library(foreach)
加载评分数据
grading <- read.csv("~/PA_DataFinal/GradingData160315.csv")
创建分层样本@ 1%
dfstrat <- stratified(grading, "FailPass", .01)
save(dfstrat, file = "c:/Users/gillisn/Documents/PA_DataFinal/RResults/GradingRResults/iteration 1/dfstrat.rda")
将数据分成火车并进行测试@ 75:25。 FailPass是responseVble
set.seed(1)
inTrainingSet <- createDataPartition(dfstrat$FailPass, p = .75, list = FALSE)
trainSet <- dfstrat[inTrainingSet,]
testSet <- dfstrat[-inTrainingSet, ]
设置预测变量和标签 有48个标签,最后一个想要训练。 采取所有预测因子1-47 x,y是训练数据
x <- trainSet[,-48]
y <- as.factor(trainSet$FailPass)
i,j是测试数据
i <- testSet[,-48,]
j <- as.factor(testSet$FailPass)
设置训练控制参数 引导自己25次。
bootControl <- trainControl(number = 25)
网格用于决策树
gbmGrid <- expand.grid(.interaction.depth = (1:5) * 2, .n.trees = (1:10)*25, .shrinkage = .1)
nbGrid <- expand.grid(.fL=0, .usekernel=FALSE)
svmGrid >- expandGrid(.sigma=, .c=)
set.seed(2)
训练模型 天真的贝叶斯
nbFit <- train(x,y,method='nb',tuneGrid="nbGrid")
SVM
svmFit <- train(x, y,method = "svmRadial", tuneLength = 10,trControl = bootControl, scaled = FALSE)
GBM
gbmFit <- train(x, y,method = "gbm", trControl = bootControl, verbose = FALSE, bag.fraction = 0.5, tuneGrid = gbmGrid)
预测训练数据的模型
models <- list(svm = svmFit, nb = nbFit, gbm = gbmFit)
predict(models)