我有两个数据框:
setA_df <- structure(list(gene_symbol = c("Wif1", "Wisp1", "Wnt11", "Wnt6",
"Wnt9a", "Xxylt1", "Zbp1", "Zbtb7c", "Zc2hc1c", "Zfp300", "Zfp36",
"Zfp367", "Zfp54", "Zfp612", "Zfp748", "Zfp783", "Zfp791", "Zic1",
"Zic3", "Zic4"), Sample1 = c(6, 1420, 7, 2, 47, 290, 13, 34,
4, 16, 165, 545, 100, 9, 160, 1, 15, 0, 0, 0), Sample2 = c(0,
2617, 25, 1, 105, 206, 4, 83, 25, 4, 187, 159, 63, 34, 147, 6,
4, 4, 0, 1)), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame"), .Names = c("gene_symbol", "Sample1", "Sample2"
))
setA_df
#> gene_symbol Sample1 Sample2
#> 1 Wif1 6 0
#> 2 Wisp1 1420 2617
#> 3 Wnt11 7 25
#> 4 Wnt6 2 1
#> 5 Wnt9a 47 105
#> 6 Xxylt1 290 206
#> 7 Zbp1 13 4
#> 8 Zbtb7c 34 83
#> 9 Zc2hc1c 4 25
#> 10 Zfp300 16 4
#> 11 Zfp36 165 187
#> 12 Zfp367 545 159
#> 13 Zfp54 100 63
#> 14 Zfp612 9 34
#> 15 Zfp748 160 147
#> 16 Zfp783 1 6
#> 17 Zfp791 15 4
#> 18 Zic1 0 4
#> 19 Zic3 0 0
#> 20 Zic4 0 1
setB_df <- structure(list(gene_symbol = c("Wif1", "Wisp1", "Wnt11", "Wnt6",
"Wnt9a", "Xxylt1", "Zbp1", "Zbtb7c", "Zc2hc1c", "Zfp300", "Zfp36",
"Zfp367", "Zfp54", "Zfp612", "Zfp748", "Zfp783", "Zfp791", "Zic1",
"Zic3", "Zic4"), Blood = c(991.833, 104.167, 52.5, 4.833, 33.333,
163.667, 131.333, 7, 77.667, 52.5, 222.333, 10.833, 63.5, 0.167,
184.922, 20.167, 17.333, 0.833, 20.833, 0), Bone = c(4282.167,
642.333, 5.667, 66.667, 16.667, 143.167, 112.667, 61.667, 45.833,
24.833, 1070.333, 41.833, 140.402, 13.667, 156.167, 84.613, 5.333,
44.667, 3.167, 18.167)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"), .Names = c("gene_symbol", "Blood", "Bone"
))
setB_df
#> gene_symbol Blood Bone
#> 1 Wif1 991.833 4282.167
#> 2 Wisp1 104.167 642.333
#> 3 Wnt11 52.500 5.667
#> 4 Wnt6 4.833 66.667
#> 5 Wnt9a 33.333 16.667
#> 6 Xxylt1 163.667 143.167
#> 7 Zbp1 131.333 112.667
#> 8 Zbtb7c 7.000 61.667
#> 9 Zc2hc1c 77.667 45.833
#> 10 Zfp300 52.500 24.833
#> 11 Zfp36 222.333 1070.333
#> 12 Zfp367 10.833 41.833
#> 13 Zfp54 63.500 140.402
#> 14 Zfp612 0.167 13.667
#> 15 Zfp748 184.922 156.167
#> 16 Zfp783 20.167 84.613
#> 17 Zfp791 17.333 5.333
#> 18 Zic1 0.833 44.667
#> 19 Zic3 20.833 3.167
#> 20 Zic4 0.000 18.167
我想要做的是计算所有成对列的Pearson相关性(gene_symbol
除外。最终的df如下所示:
Sample1 Blood -0.01524773 #cor(setA$Sample1,setB_df$Blood)
Sample2 Blood -0.003247626
Sample1 Bone 0.03889127
Sample2 Bone 0.06420447
如何用dplyr实现? 实际上,每组都有更多的列。
答案 0 :(得分:2)
这给出了定义的输出,但如上所述,dplyr在这里并没有增加多少。
library(reshape2)
library(dplyr)
setA_df %>%
select(-gene_symbol) %>%
cor(setB_df %>% select(-gene_symbol)) %>%
melt()
# Var1 Var2 value
# 1 Sample1 Blood -0.015247732
# 2 Sample2 Blood -0.003247626
# 3 Sample1 Bone 0.038891268
# 4 Sample2 Bone 0.064204473