这似乎是一个微不足道的问题,但我无法解决问题!
我已经采用虹膜数据集的数字列..然后将其标准化为
newiris<-iris[,1:4]
iris.norm<-data.frame(scale(newiris))
head(iris.norm)
Sepal.Length Sepal.Width Petal.Length Petal.Width
1 -0.8976739 1.01560199 -1.335752 -1.311052
2 -1.1392005 -0.13153881 -1.335752 -1.311052
3 -1.3807271 0.32731751 -1.392399 -1.311052
4 -1.5014904 0.09788935 -1.279104 -1.311052
5 -1.0184372 1.24503015 -1.335752 -1.311052
6 -0.5353840 1.93331463 -1.165809 -1.048667
# performed PCA now
pccomp <- prcomp(iris.norm )
summary(pccomp)
a <- summary(pccomp)
df<- as.data.frame(a$importance)
df <- t(df)
df
## Standard deviation Proportion of Variance Cumulative Proportion
## PC1 1.7083611 0.72962 0.72962
## PC2 0.9560494 0.22851 0.95813
## PC3 0.3830886 0.03669 0.99482
## PC4 0.1439265 0.00518 1.00000
现在将rownames转换为df列,以便将rownames的PC组成第一列以进行进一步操作
library(tibble)
library(dplyr)
df<-rownames_to_column(as.data.frame(df), var="PrinComp") %>% head
df
## PrinComp Standard deviation Proportion of Variance Cumulative Proportion
## 1 PC1 1.7083611 0.72962 0.72962
## 2 PC2 0.9560494 0.22851 0.95813
## 3 PC3 0.3830886 0.03669 0.99482
## 4 PC4 0.1439265 0.00518 1.00000
# Now will be selecting only those PCs where the cumulative proportion is say less than 96%
# subsetting
pcs<-as.vector(as.character(df[which(df$`Cumulative Proportion`<0.96),][,1])) # cumulative prop less than 96%
pcs
## [1] "PC1" "PC2"
现在我正在创建一个PC数据框,静态地从我们从上述条件得到的前两个主要成分中得出矢量得分(附带支柱<0.96)
x1 <- pccomp$x[,1]
x2 <- pccomp$x[,2]
pcdf <- cbind(x1,x2)
head(pcdf)
## x1 x2
## [1,] -2.257141 -0.4784238
## [2,] -2.074013 0.6718827
## [3,] -2.356335 0.3407664
## [4,] -2.291707 0.5953999
## [5,] -2.381863 -0.6446757
## [6,] -2.068701 -1.4842053
我的问题是,一旦我知道基于累积比例小于0.95的条件的PC,我怎么能动态创建上面的pc数据框
答案 0 :(得分:1)
您可以在df's cumulative proportion
字段上运行while循环,并附加转换后的值,直到它小于所需的阈值。
threshold = 0.96
pcdf = list()
i = 1
while(df$`Cumulative Proportion`[i]<threshold){
pcdf[[i]] = pccomp$x[,i]
i = i +1
}
pcdf = as.data.frame(pcdf)
names(pcdf) = paste("x",c(1:ncol(pcdf)),sep="")
输出
> head(pcdf)
x1 x2
1 -2.257141 -0.4784238
2 -2.074013 0.6718827
3 -2.356335 0.3407664
4 -2.291707 0.5953999
5 -2.381863 -0.6446757
6 -2.068701 -1.4842053
运行相同代码的threshold = 0.999
提供
> head(pcdf)
x1 x2 x3
1 -2.257141 -0.4784238 0.12727962
2 -2.074013 0.6718827 0.23382552
3 -2.356335 0.3407664 -0.04405390
4 -2.291707 0.5953999 -0.09098530
5 -2.381863 -0.6446757 -0.01568565
6 -2.068701 -1.4842053 -0.02687825
<强>更新强>
假设您知道您想要的主要组件数i
。您可以使用
a <- sapply(X = c(1:i),FUN = function(X){pcdf[[X]] = pccomp$x[,X]})
而不是整个while loop section
。
所以对于i = 2你得到
> head(a)
[,1] [,2]
[1,] -2.257141 -0.4784238
[2,] -2.074013 0.6718827
[3,] -2.356335 0.3407664
[4,] -2.291707 0.5953999
[5,] -2.381863 -0.6446757
[6,] -2.068701 -1.4842053
其中a是你的结果。
答案 1 :(得分:1)
假设您总是想要至少一台PC,这是一个单行版本
p <- 0.96
pccomp$x[,1:nrow(df[which(df$`Cumulative Proportion`<p),])] # first two PCs
p <- 0.75
pccomp$x[,1:nrow(df[which(df$`Cumulative Proportion`<p),])] # first PC
答案 2 :(得分:0)
添加到上面提供的gr8解决方案:
pcs<-as.vector(as.character(df1[which(df1$`Cumulative Proportion`<0.96),][,1])) # cumulative prop less than 96%
pcs
## [1] "PC1" "PC2"
i=length(pcs) # we get the no of PCs fulfilling the cum prop condition
a <- sapply(X = c(1:i),FUN = function(X){pcdf[[X]] = pccomp$x[,X]})
head(a)
> head(a)
[,1] [,2]
[1,] -2.257141 -0.4784238
[2,] -2.074013 0.6718827
[3,] -2.356335 0.3407664
[4,] -2.291707 0.5953999
[5,] -2.381863 -0.6446757
[6,] -2.068701 -1.4842053
完成!