需要帮助着色变量并在R中向PCA添加省略号

时间:2016-03-24 20:27:00

标签: r plot pca

我是R的新手,我正试图弄清楚如何使用它来做PCA。到目前为止,我已经尝试了两种方法,但两者都存在问题。

以下是第一个代码:

library('FactoMineR')
my.data.7 <- read.csv("Principal Component Analysis Input 2.csv", header=TRUE)
attach(my.data.7)
head(my.data.7)
PCA_result_2 <- PCA(my.data.7, scale.unit=TRUE, ncp=3, graph=TRUE)
result <- PCA_result_2$ind$coord
write.csv(result, file="PCA ind_coord 2.csv")
my.data.8 <- read.csv("Principal Component Analysis (second part) 2.csv")
attach(my.data.8)
head(my.data.8)
pc1 <- (my.data.8$Dim1)
pc2 <- (my.data.8$Dim2)
pc3 <- (my.data.8$Dim3)
acc <- (my.data.8$CDS)
colour <- (my.data.8$Root.skin.colour)
plot(pc1,pc3,col=c('black','yellow','orange','red','purple')[unclass(colour)],pch=19)

情节输出是: Dim1, Dim3 plot

我的问题是1)我怎么知道颜色已正确分配,2)如何添加省略号。

这是我的第二种方法的代码(我使用这个网站寻求帮助,但仍然卡住了):

install.packages("devtools")
library(devtools)
install_github("fawda123/ggord")
library(ggord)
library('FactoMineR')
my.data.9 <- read.csv("hello.csv")
attach(my.data.9)
head(my.data.9)
woo <- PCA(my.data.9[,2:5], scale.unit=TRUE, ncp=3, graph=TRUE)
ggord(woo, my.data.9$Root.skin.colour)

情节: Dim1, Dim2 plot

我的问题是我想要像以前一样绘制第1和第3维度,而不是第1和第2维度,但不知道如何指定。另外我不明白如何改变颜色。

这看起来是否正朝着正确的方向前进?如果可能的话,我真的很感激一些帮助。我不知道我在做什么,我应该在写我的文章,而不是仍然坚持分析。也很抱歉文件有像“你好”这样奇怪的名字,我感到很沮丧。

以下是您要求提供的信息

> dput(my.data.8)
structure(list(CDS = structure(1:69, .Label = c("CDS010", "CDS011", 
"CDS012", "CDS013", "CDS015", "CDS016", "CDS017", "CDS019", "CDS020", 
"CDS021", "CDS022", "CDS023", "CDS024", "CDS027", "CDS028", "CDS029", 
"CDS030", "CDS031", "CDS032", "CDS033", "CDS034", "CDS035", "CDS036", 
"CDS037", "CDS038", "CDS039", "CDS040", "CDS042", "CDS043", "CDS044", 
"CDS045", "CDS046", "CDS047", "CDS048", "CDS049", "CDS050", "CDS051", 
"CDS052", "CDS053", "CDS054", "CDS056", "CDS058", "CDS059", "CDS060", 
"CDS061", "CDS062", "CDS064", "CDS066", "CDS067", "CDS068", "CDS070", 
"CDS072", "CDS073", "CDS075", "CDS076", "CDS078", "CDS079", "CDS080", 
"CDS081", "CDS082", "CDS083", "CDS084", "CDS085", "CDS086", "CDS087", 
"CDS089", "CDS090", "CDS091", "CDS092"), class = "factor"), Dim1 = c(0.989923706, 
1.002847033, -0.323384931, 0.0280602, -2.103144589, -1.1750233, 
-0.297369615, -1.285073349, -1.18724867, -2.440381033, 2.451488481, 
0.432753586, -0.324628407, 1.83987238, 1.082150477, 1.222767528, 
0.648419317, 1.17034895, -0.959949524, 0.405826882, 3.578749912, 
1.315904789, -0.69599653, -2.650500936, 2.847954059, -1.124700789, 
-1.345309845, -1.571390397, 0.808331242, -0.126459344, 1.978484169, 
-0.372882529, -1.941508494, -1.895565455, -2.308079318, -2.734023717, 
-3.326982705, -0.62297258, 0.4400687, 0.878134622, -0.143118506, 
2.902361971, 1.852738657, 1.318157841, 1.525866109, 0.527018259, 
-2.17646324, -0.938267968, -0.663267011, -1.626999833, -0.725444227, 
4.181058153, -1.663567082, -0.797809065, -0.660857937, 1.275243335, 
-1.246799754, -0.658948097, 3.148052501, 1.22737428, 5.770370659, 
-0.659363823, 0.201377447, -0.250249239, -3.29492153, -2.525333499, 
0.451643578, -2.285229864, -2.05602107), Dim2 = c(0.114080736, 
0.189737473, 0.289738365, 0.15686147, -0.65967629, -0.618998916, 
0.752658445, -0.187202662, 0.601081452, -0.488843082, -0.461435771, 
0.376119902, 0.054640472, -0.352416385, -0.61155099, 0.287520862, 
2.072955276, 1.368287549, -1.598022058, -2.35115053, 0.362478564, 
-1.16829247, -1.161712522, 0.193574061, 0.582209805, 0.090423462, 
0.272987178, 0.762263319, 0.164563899, 1.271976678, 2.169652432, 
-0.304444502, 0.333864962, 0.086432067, 0.03375057, 0.42547905, 
-0.332663346, 0.230207958, 0.416122611, 0.807386059, -0.622165091, 
0.688807153, -0.419360229, -0.024587973, 0.099352776, 0.593489815, 
-0.571526951, -0.587510558, 1.141107254, 0.341089899, -0.234002113, 
-0.675011549, 0.523417802, 0.570074523, -0.2595101, -0.537050791, 
0.691005207, -0.259618613, -0.525167633, -0.181669151, -1.681387716, 
0.150321845, 0.528057749, 0.704124706, -1.3872153, 0.22736727, 
0.459455992, -0.278329399, -0.183119019), Dim3 = c(0.048353514, 
-0.025653037, 0.014924755, -0.00167208, -0.060333422, 0.020764933, 
0.043057079, 0.002591265, 0.02799806, -0.01339572, 0.292361681, 
0.094879935, 0.020261073, -0.147776529, -0.09613908, 0.015407622, 
0.505027604, -0.011850932, 0.162592304, 0.331023774, -0.276654985, 
0.13868844, -0.000362491, -0.124098518, -0.03418057, 0.055507617, 
-0.044387737, 0.08246021, -0.023457465, 0.070215547, 0.090226544, 
0.13499154, -0.115205136, -0.012187001, 0.016720661, -0.112492876, 
-0.156433429, 0.03202894, 0.064274887, -0.121235242, 0.014234763, 
-0.167012976, -0.063685493, 0.045759055, -0.047058619, -0.113183437, 
-0.077100994, 0.039550025, 0.003385481, 0.044618291, 0.025128582, 
0.047673827, -0.112599294, -0.096384527, 0.031407524, 0.095437746, 
0.037062126, 0.026957783, -0.181217407, 0.411980154, -0.189412218, 
-0.042853115, 0.040207038, -0.040919986, -0.082645255, -0.148945175, 
0.030315385, -0.094242334, -0.05746962), Root.skin.colour = structure(c(1L, 
5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 3L, 4L, 1L, 4L, 
4L, 4L, 4L, 1L, 1L, 2L, 4L, 4L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 1L, 1L, 1L, 1L, 4L, 2L, 3L, 5L, 1L, 2L, 5L, 4L, 1L, 1L, 
1L, 1L, 2L, 4L, 1L, 5L, 1L, 1L, 1L, 3L, 1L, 3L, 5L, 4L, 1L, 1L, 
4L, 1L, 2L, 1L), .Label = c("Orange", "Purple", "Red", "White", 
"Yellow"), class = "factor")), .Names = c("CDS", "Dim1", "Dim2", 
"Dim3", "Root.skin.colour"), class = "data.frame", row.names = c(NA, 
-69L))

1 个答案:

答案 0 :(得分:0)

您可以考虑使用包factoextra进行可视化。您可以使用PCA模型的不同包的输出(可能有一些格式)。此外,您可以在以下网站上找到有用的教程,以便可视化PCA:STHDA: Principal Component Methods in R: Practical Guide。一个简短的代码示例,其中包含您提供的数据,以帮助您从下面开始。我希望这是你所需要的,并帮助你解决你的任务。

library(factoextra)

#apply PCA to numeric part of data
data.pca <- prcomp(data[, c("Dim1", "Dim2", "Dim3")],  scale = TRUE, center = TRUE)

str(data.pca) #in $x you can see that there are 3 components

fviz_pca_biplot(data.pca
                #chose components to plot, here 1 and 3
                ,axes = c(1,3)
                ,geom = c("point", "text")
                ,addEllipses = TRUE
                #color the individual points by an additional factor
                #here contained in the Root.skin.colour column of the data
                #for each data point
                ,col.ind = data[,"Root.skin.colour"]
                ,var.axes=TRUE
)