Question

尊敬的StackOverflow社区：

我试图制作一个p值矩阵，该矩阵对应于我通过获取相关值获得的矩阵

以下是我的数据（为简单起见，仅做5行，对于具有50行的每个数据框，我的实际数据是3列）。

FG_Smooth <- data.frame(FS_1 = c(0.43, 0.33, 3.47, 5.26, 1.09), FS2 = c(0.01, 0.02, 6.86, 3.27, 0.86), FS_3 = c(0.07, 0.36, 1.91, 5.61, 0.84), row.names = c("Group_3", "Thermo", "Embryophyta", "Flavo", "Cyclo")) 
FMG_Smooth <- data.frame(GS_1 = c(1.13, 1.20, 0.52, 2.81, 0.70), GS_2 = c(1.18, 1.7, 0.42, 2.93, 0.78), GS_3 = c(1.17, 1.11, 0.60, 3.10, 0.87), row.names = c("Proline", "Trigonelline", "L-Lysine", "Nioctine", "Caffeate"))                         

library(Hmisc)
rcorr(t(FG_Smooth), t(FMG_Smooth), type = "pearson")

但我收到此错误：

rcorr（t（FG_Smooth），t（FMG_Smooth），type =“ pearson”）中的错误：必须有> 4个观察结果

我每个人只有3个生物样本-因此，我无法在多个帖子中使用建议多花费时间的rcorr命令。 rcorr命令为您提供1）相关矩阵； 2）相关的p值。

因此，为了回避此问题，我已经执行了以下操作：如其他帖子中所建议：

library(stats)
cor(t(FG_Smooth), t(FMG_Smooth), method = "pearson")

这有效并且给出了我所有相关性的矩阵。

我的下一步是在相关矩阵中找到与每个值关联的p值。函数cor.test仅给出总体p值，这不是我所需要的。

仔细阅读多个帖子后-我碰到了这个帖子： rcorr() function for correlations

我按照说明进行操作：

tblcols <- expand.grid(1:ncol(FG_Smooth), 1:ncol(FMG_Smooth))
cfunc <- function(var1, var2) {
  cor.test(FG_Smooth[,var1], FMG_Smooth[,var2], method="pearson")
}

res <- mapply(function(a,b) {
  cfunc(var1 = a, var2 = b)
}, tblcols$Var1, tblcols$Var2)

head(res)
[,1]        [,2]        [,3]        [,4]        [,5]        [,6]        [,7]       
statistic   1.324125    -0.1022017  2.422883    0.9131595   -0.3509424  1.734178    1.53494    
parameter   3           3           3           3           3           3           3          
p.value     0.2773076   0.9250449   0.09392613  0.4284906   0.74883     0.1812997   0.2223626  
estimate    0.6073388   -0.05890371 0.8135079   0.4663678   -0.1985814  0.7075406   0.663238   
null.value  0           0           0           0           0           0           0          
alternative "two.sided" "two.sided" "two.sided" "two.sided" "two.sided" "two.sided" "two.sided"
            [,8]         [,9]       
statistic   -0.009291327 2.880821   
parameter   3            3          
p.value     0.99317      0.06348644 
estimate    -0.005364273 0.8570256  
null.value  0            0          
alternative "two.sided"  "two.sided"

这仅给我9个p值，而不是与cor命令获得的每个相关值相对应的p值矩阵。在此示例中，它将是p值的5x5矩阵，因为cor命令会生成5x5的相关值矩阵。

有没有区别。做到这一点的方法？

Answer 1

这是一个tidyverse解决方案，它创建所有感兴趣的对，然后为每个对执行一个cor.test，并提取相关值和相应的p值。

# example data
FG_Smooth <- data.frame(FS_1 = c(0.43, 0.33, 3.47, 5.26, 1.09), FS2 = c(0.01, 0.02, 6.86, 3.27, 0.86), FS_3 = c(0.07, 0.36, 1.91, 5.61, 0.84), row.names = c("Group_3", "Thermo", "Embryophyta", "Flavo", "Cyclo")) 
FMG_Smooth <- data.frame(GS_1 = c(1.13, 1.20, 0.52, 2.81, 0.70), GS_2 = c(1.18, 1.7, 0.42, 2.93, 0.78), GS_3 = c(1.17, 1.11, 0.60, 3.10, 0.87), row.names = c("Proline", "Trigonelline", "L-Lysine", "Nioctine", "Caffeate"))                         

library(tidyverse)

expand.grid(v1 = row.names(FG_Smooth),                                # create combinations of names
            v2 = row.names(FMG_Smooth)) %>%
  tbl_df() %>%                                                        # use for visualisation purpose
  mutate(cor_test = map2(v1, v2, ~cor.test(unlist(FG_Smooth[.x,]),    # perform the correlation test for each pair and store it
                                           unlist(FMG_Smooth[.y,]))), 
         cor_value = map_dbl(cor_test, "estimate"),                   # get the correlation value from the test
         cor_p_value = map_dbl(cor_test, "p.value"))                  # get the p value from the test

# # A tibble: 25 x 5
#   v1          v2           cor_test    cor_value cor_p_value
#   <fct>       <fct>        <list>          <dbl>       <dbl>
# 1 Group_3     Proline      <S3: htest>    -0.998     0.0367 
# 2 Thermo      Proline      <S3: htest>    -0.592     0.596  
# 3 Embryophyta Proline      <S3: htest>     0.390     0.745  
# 4 Flavo       Proline      <S3: htest>    -0.544     0.634  
# 5 Cyclo       Proline      <S3: htest>    -0.966     0.167  
# 6 Group_3     Trigonelline <S3: htest>    -0.492     0.673  
# 7 Thermo      Trigonelline <S3: htest>    -0.998     0.0396 
# 8 Embryophyta Trigonelline <S3: htest>     0.985     0.109  
# 9 Flavo       Trigonelline <S3: htest>    -1.000     0.00188
#10 Cyclo       Trigonelline <S3: htest>    -0.305     0.803  
# # ... with 15 more rows

v1和v2是将为相关性测试创建对的数据集的行名，cor_test列具有每对相关性测试对象，{{1} }具有提取的相关系数，cor_value具有提取的p值。

如果将以上输出另存为数据框，则可以轻松调整形状。例如，如果将其另存为cor_p_value，则可以得到一个p值为5x5的数据帧，如下所示：

也使用d %>% select(v1, v2, cor_p_value) %>% spread(v2, cor_p_value) # # A tibble: 5 x 6 # v1 Caffeate `L-Lysine` Nioctine Proline Trigonelline # <fct> <dbl> <dbl> <dbl> <dbl> <dbl> # 1 Cyclo 0.309 0.995 0.351 0.167 0.803 # 2 Embryophyta 0.779 0.0931 0.737 0.745 0.109 # 3 Flavo 0.890 0.204 0.848 0.634 0.00188 # 4 Group_3 0.439 0.875 0.481 0.0367 0.673 # 5 Thermo 0.928 0.242 0.886 0.596 0.0396软件包的替代版本是：

broom

，它为您提供了library(tidyverse) library(broom) expand.grid(v1 = row.names(FG_Smooth), v2 = row.names(FMG_Smooth)) %>% tbl_df() %>% mutate(cor_test = map2(v1, v2, ~tidy(cor.test(unlist(FG_Smooth[.x,]), unlist(FMG_Smooth[.y,]))))) %>% unnest() # # A tibble: 25 x 8 # v1 v2 estimate statistic p.value parameter method alternative # <fct> <fct> <dbl> <dbl> <dbl> <int> <chr> <chr> # 1 Group_3 Proline -0.998 -17.3 0.0367 1 Pearson's product-moment correlation two.sided # 2 Thermo Proline -0.592 -0.735 0.596 1 Pearson's product-moment correlation two.sided # 3 Embryophyta Proline 0.390 0.423 0.745 1 Pearson's product-moment correlation two.sided # 4 Flavo Proline -0.544 -0.648 0.634 1 Pearson's product-moment correlation two.sided # 5 Cyclo Proline -0.966 -3.73 0.167 1 Pearson's product-moment correlation two.sided # 6 Group_3 Trigonelline -0.492 -0.565 0.673 1 Pearson's product-moment correlation two.sided # 7 Thermo Trigonelline -0.998 -16.0 0.0396 1 Pearson's product-moment correlation two.sided # 8 Embryophyta Trigonelline 0.985 5.78 0.109 1 Pearson's product-moment correlation two.sided # 9 Flavo Trigonelline -1.000 -339. 0.00188 1 Pearson's product-moment correlation two.sided #10 Cyclo Trigonelline -0.305 -0.320 0.803 1 Pearson's product-moment correlation two.sided # # ... with 15 more rows格式的相关性测试对象。您需要使用列tidy（相关系数）和estimate。

Answer 2

当前设置会引起一些问题：

首先，您的cor.test必须使用转置版本和t()：

cor.test(t(FG_Smooth)[,var1], t(FMG_Smooth)[,var2], method="pearson")

第二，cor.test返回一个元素列表，您只需要提取p.value项：

cor.test(t(FG_Smooth)[,var1], t(FMG_Smooth)[,var2], method="pearson")$p.value

因此，考虑将mapply结果绑定到具有所需colnames和rownames（即dimnames）的5 X 5矩阵中：

P值矩阵

# COMBINATION PAIRS
tblcols <- expand.grid(1:ncol(t(FG_Smooth)), 1:ncol(t(FMG_Smooth)))

# COR TEST FUNCTION
cfunc <- function(var1, var2) {
  cor.test(t(FG_Smooth)[,var1], t(FMG_Smooth)[,var2], method="pearson")$p.value
}

# P-VALUE MATRIX BUILD
matrix(mapply(cfunc, tblcols$Var1, tblcols$Var2),
       ncol = ncol(t(FG_Smooth)), nrow = ncol(t(FMG_Smooth)),
       dimnames = list(colnames(t(FG_Smooth)), colnames(t(FMG_Smooth))))

#               Proline Trigonelline   L-Lysine  Nioctine  Caffeate
# Group_3     0.0367145  0.672775387 0.87489349 0.4808196 0.4392690
# Thermo      0.5964129  0.039648033 0.24176614 0.8860530 0.9276037
# Embryophyta 0.7450881  0.109027230 0.09309087 0.7373778 0.7789284
# Flavo       0.6341827  0.001878145 0.20399625 0.8482831 0.8898338
# Cyclo       0.1669023  0.802963162 0.99491874 0.3506318 0.3090812

相关矩阵

并且可以肯定的是，如果我们使用$estimate，则矩阵构建将复制cor()调用：

t_FG <- t(FG_Smooth)
t_FMG <- t(FMG_Smooth)

cfunc <- function(var1, var2) {
  cor.test(t_FG[,var1], t_FMG[,var2], method="pearson")$estimate
}

# COR MATRIX BUILD
m <- matrix(mapply(cfunc, tblcols$Var1, tblcols$Var2),
            ncol = ncol(t_FG), nrow = ncol(t_FMG),
            dimnames = list(colnames(t_FG), colnames(t_FMG)))

cor(t(FG_Smooth), t(FMG_Smooth), method = "pearson")
#                Proline Trigonelline     L-Lysine   Nioctine   Caffeate
# Group_3     -0.9983375   -0.4916671  0.195254411 -0.7280867 -0.7712447
# Thermo      -0.5923344   -0.9980613  0.928751644  0.1780333  0.1134750
# Embryophyta  0.3898002    0.9853709 -0.989327898 -0.4009247 -0.3403212
# Flavo       -0.5435196   -0.9999956  0.949098001  0.2360668  0.1721863
# Cyclo       -0.9658300   -0.3045869 -0.007981547 -0.8521212 -0.8844400

m
#                Proline Trigonelline     L-Lysine   Nioctine   Caffeate
# Group_3     -0.9983375   -0.4916671  0.195254411 -0.7280867 -0.7712447
# Thermo      -0.5923344   -0.9980613  0.928751644  0.1780333  0.1134750
# Embryophyta  0.3898002    0.9853709 -0.989327898 -0.4009247 -0.3403212
# Flavo       -0.5435196   -0.9999956  0.949098001  0.2360668  0.1721863
# Cyclo       -0.9658300   -0.3045869 -0.007981547 -0.8521212 -0.8844400

获得皮尔逊相关矩阵的p值矩阵

2 个答案: