我不会在同一图中绘制训练和测试模型的数据集。我已经尝试过klaR :: partimat,效果很好。
set.seed(123)
df.train <- data.frame(x=c(rnorm(20,1,1),rnorm(20,2,1),rnorm(20,2,1)),
y=c(rnorm(20,2,1),rnorm(20,1,1),rnorm(20,3,1)),
z=factor(rep(c(1:3),each=20),levels=c(1:3)))
col_light <- c("lightblue","lightgreen","pink")
klaR::partimat(z ~ x + y , data=df.train, method="lda",prec=250,
image.colors=col_light, gs=NA, col.mean=NA)
col_dark <- c("blue","darkgreen","red4")
points(df.train,pch=15,col=col_dark[df.train$z])
我可以“手工”做同样的事情:
mdl1 <- MASS::lda(z~x+y,data=df.train)
df.test <- data.frame(x=rep(seq(0,3,length.out=100),each=100),
y=rep(seq(0,5,length.out=100),100))
df.test$z <- factor(as.numeric(as.character( predict(mdl1, df.test)[["class"]])),levels=c(1:3))
ggplot(mapping=aes(x=x,y=y,color=z)) +
geom_point(data=df.test) +
scale_color_manual(values=col_light)
Q1:我如何在这里用ggplot通过col_dark方案添加训练点?
bootstrap方法可计算95%的置信度:
for (j in 1:100){
r1 <- sample(c(1:nrow(df.train)),size=54,replace=FALSE)
df.train.1 <- df.train[r1,]
mdl1 <- MASS::lda(z~x+y,data=df.train.1)
z1 <- factor(as.numeric(as.character( predict(mdl1, df.test)[["class"]])),levels=c(1:3))
df.test <- cbind(df.test,z1)
colnames(df.test)[ncol(df.test)] <- paste("z",as.character(j),sep="")
}
mfv <- function(x){ux <- unique(x); ux[which.max(tabulate(match(x, ux)))]}
df.test$z <- apply(df.test[,3:ncol(df.test)], 1, mfv)
df.test$prob <- rowSums(df.test[,3:102]==df.test$z0)
df.test.1 <- df.test[df.test$prob>95,c("x","y","z")]
ggplot(mapping=aes(x=x,y=y,color=z)) +
geom_point(data=df.test.1) +
scale_color_manual(values=col_light)
Q2:是否有更好的方法绘制预测区域(例如,用多边形代替点)?