希望你还好!
从今天早上开始,我正在尝试修复此错误消息:
“ Erreur:data
和reference
应该是具有相同水平的因素。”
我不明白为什么,我已经尝试使用as.factor ...
提前感谢社区!
dataset <- as.factor(dataset)
set.seed(1)
train=sample(1:nrow(dataset), 92) #80%
dataset.test=dataset[-train,]
dataset.train=dataset[train,]
Classification.test=Classification[-train]
tree.dataset=tree(Classification~Age+BMI+Glucose+Insulin+HOMA+Leptin+Adiponectin+Resistin+MCP.1, data= dataset, subset=train)
summary(tree.dataset)
plot(tree.dataset)
text(tree.dataset, pretty = 0, cex=0.6)
tree.pred=predict(tree.dataset,dataset.test,type="class")
table(tree.pred, Classification.test) #68%
#cross validation
set.seed(3)
cv.dataset=cv.tree(tree.dataset,FUN=prune.misclass)
plot(cv.dataset)
cv.dataset$dev
best.size <- cv.dataset$size[which(cv.dataset$dev==min(cv.dataset$dev))] # which size is better?
best.size
#reffiting
cv.dataset.pruned <- prune.misclass(tree.dataset, best=best.size)
summary(cv.dataset.pruned)
plot(cv.dataset.pruned)
text(prune.dataset,pretty=0,cex=0.8)
#plot(cv.dataset$size, cv.dataset$dev ,type="b") #inversement proportionnel
#prediction_tree <- predict(cv.dataset.pruned)
prediction_tree = predict(cv.dataset.pruned, newdata = dataset.test, type = "vector")
confusionMatrix(dataset_test, prediction_tree, treshold= 0.5, type = "tree")