如何在R中的PCA图中添加多个置信椭圆?

时间:2012-08-22 17:18:43

标签: r pca

我对R和统计数据非常陌生,并且无法在PCA图中添加多个置信椭圆。

我的兴趣在于用95%置信度椭圆突出显示PCA图中的潜在分组/聚类。我已经尝试在R中使用dataEllipse函数,但是我无法弄清楚如何在PCA图中添加具有不同中心的多个椭圆(中心将位于看似包含聚类的各个点,在这种情况下是岩石源和岩性工具可能是由该来源制成的。)

感谢您对此提供的任何帮助! {

lithic_final <- LITHIC.DATASHEET.FOR.R.COMPLETE.FORMAT
lithic_final

pca1 <- princomp(lithic_final); pca1

lithic_source <- c("A1", "A1", "A1", "A1", "A2","A2", "A2", "A3","A3","A3","B","B","B","B","B","B","C","C","C","C","C","C","C","D","D","D","D","D","D","D","D","E","E","E","E","E","E","E","E","F","F","G","G","G","G","H","H","H","H","H","H","H","I1","I1","I1","I2","I2","I2","I2","I2","J1","J1","J2","J2","J2","J2","J2","J2","J2","J2","J2","K","K","K","K","K","K","K","L","L","L","L","L","L","L","L","L","L","L","L","L","L","BB1","BB1","BB1","FC","FC","FC","JRPP","JRPP","JRPP","BB2","BB2","BB2","BB2","MWP","MWP","MWP","MWP","RPO","RPO","RPO")

lithic_source

summary(pca1)

plot(pca1)

#Plotting the scores with the Lithic Source Info
round(pca1$scores[,1:2], 2)
pca_scores <-round(pca1$scores[,1:2], 2)
plot(pca1$scores[,1], pca1$scores[,2], type="n")
text(pca1$scores[,1], pca1$scores[,2],labels=abbreviate(lithic_source, minlength=3), cex=.45)



#Plotting PCA Scores of EACH SAMPLE for PCA 2 and 3 with Lithic Source Info
round(pca1$scores[,2:3], 2)
pca2_3_scores <-round(pca1$scores[,2:3], 2)
plot(pca1$scores[,2], pca1$scores[,3], type="n")
text(pca1$scores[,2], pca1$scores[,3], labels=abbreviate(lithic_source, minlength=3), cex=.45)

#Plotting PCA Scores of EACH SAMPLE for PCA 3 and 4 with Lithic Source Info
round(pca1$scores[,3:4], 2)
pca3_4_scores <-round(pca1$scores[,3:4], 2)
plot(pca1$scores[,3], pca1$scores[,4], type="n")
text(pca1$scores[,3], pca1$scores[,4], labels=abbreviate(lithic_source, minlength=3), cex=.45)

#Plotting PCA Scores of EACH SAMPLE for PCA 1 and 3 with Lithic Source Info
round(pca1$scores[,1:3], 2)
pca1_3_scores <-round(pca1$scores[,1:3], 2)
plot(pca1$scores[,1], pca1$scores[,3], type="n")
text(pca1$scores[,1], pca1$scores[,3], labels=abbreviate(lithic_source, minlength=3), cex=.45)

#Plotting PCA Scores of EACH SAMPLE for PCA 1 and 4 with Lithic Source Info
round(pca1$scores[,1:4], 2)
pca1_4_scores <-round(pca1$scores[,1:4], 2)
plot(pca1$scores[,1], pca1$scores[,4], type="n")
text(pca1$scores[,1], pca1$scores[,4], labels=abbreviate(lithic_source, minlength=3), cex=.45)

#TRYING TO GET ELLIPSES ADDED TO PCA 1 and 4 scores
dataEllipse(pca1$scores[,1], pca1$scores[,4],centers=12,add=TRUE,levels=0.9, plot.points=FALSE)


structure(list(Ca.K12 = c(418L, 392L, 341L, 251L, 297L, 238L, 
258L, 5L, 2L, 37L), Cr.K12 = c(1L, 12L, 15L, 6L, 9L, 6L, 35L, 
7L, 45L, 32L), Cu.K12 = c(89L, 96L, 81L, 63L, 88L, 103L, 104L, 
118L, 121L, 90L), Fe.K12 = c(18627L, 18849L, 18413L, 12893L, 
17757L, 17270L, 16198L, 2750L, 4026L, 3373L), K.K12 = c(20L, 
23L, 28L, 0L, 34L, 17L, 45L, 102L, 150L, 147L), Mn.K12 = c(205L, 
212L, 235L, 120L, 216L, 212L, 246L, 121L, 155L, 115L), Nb.K12 = c(139L, 
119L, 154L, 91L, 122L, 137L, 137L, 428L, 414L, 428L), Rb.K12 = c(99L, 
42L, 79L, 49L, 210L, 243L, 168L, 689L, 767L, 705L), Sr.K12 = c(3509L, 
3766L, 3481L, 2715L, 2851L, 2668L, 2695L, 202L, 220L, 217L), 
    Ti.K12 = c(444L, 520L, 431L, 293L, 542L, 622L, 531L, 82L, 
    129L, 84L), Y.K12 = c(135L, 121L, 105L, 74L, 144L, 79L, 85L, 
    301L, 326L, 379L), Zn.K12 = c(131L, 133L, 108L, 78L, 124L, 
    111L, 114L, 81L, 78L, 59L), Zr.K12 = c(1348L, 1479L, 1333L, 
    964L, 1506L, 1257L, 1296L, 3967L, 4697L, 4427L)), .Names = c("Ca.K12", 
"Cr.K12", "Cu.K12", "Fe.K12", "K.K12", "Mn.K12", "Nb.K12", "Rb.K12", 
"Sr.K12", "Ti.K12", "Y.K12", "Zn.K12", "Zr.K12"), row.names = c(NA, 
10L), class = "data.frame")

2 个答案:

答案 0 :(得分:6)

如果你专注于你的问题而不是所有无关紧要的东西,我想你会收到更快的回复。您向我们提供了绘制一系列与您的问题无关的主要组件的命令。问题是,你如何按组绘制省略号? 10行和3组的样本数据没有用,因为3点不足以绘制数据省略号。您正在使用package car中的dataEllipse函数,它对您的问题有最简单的答案:

首先,一个可重复的例子:

set.seed(42) # so you can get the same numbers I get
source_a <- data.frame(X1=rnorm(25, 50, 5), X2=rnorm(25, 40, 5))
source_b <- data.frame(X1=rnorm(25, 20, 5), X2=rnorm(25, 40, 5))
source_c <- data.frame(X1=rnorm(25, 35, 5), X2=rnorm(25, 25, 5))
lithic_dat <- rbind(source_a, source_b, source_c)
lithic_source <- c(rep("a", 25), rep("b", 25), rep("c", 25))

使用scatterplot()绘制省略号并添加文字:

scatterplot(X2~X1 | lithic_source, data=lithic_dat, pch="", smooth=FALSE, 
     reg.line=FALSE, ellipse=TRUE, levels=.9)
text(lithic_dat$X1, lithic_dat$X2, lithic_source, cex=.75)

可以调整Scatterplot来做你想做的一切,但它也是 可以在不使用椭圆的情况下绘制椭圆:

sources <- unique(lithic_source) # vector of the different sources
plot(lithic_dat$X1, lithic_dat$X1, type="n")
text(lithic_dat$X1, lithic_dat$X2, lithic_source, cex=.75)
for (i in sources) with(lithic_dat, dataEllipse(X1[lithic_source==i],
     X2[lithic_source==i], levels=.9, plot.points=FALSE))

这适用于您的主要组件和任何其他数据。

答案 1 :(得分:3)

这是一个简单的解决方案,使用一个名为ggbiplot的包(在github上可用)和Iris数据。我希望这就是你要找的东西。

struct