我需要根据列名和每列上的值重塑数据框。如何使用reshape(或其他任何库,例如dplyr)来实现这一目标?
我相信melt函数将很有用,但我对此不太熟悉。
df <- data.frame(
Group1 = c("EMX1", "EXO_C3L4", "FAF2P1", "FAM224A","GATC",
"FAM43A", "FAT4", "EXO_FEZF1-AS1"),
AVG_EXPR = rnorm(8),
Group2 = c("EXO_BRPF3", "AFS", "IJAS", "CCDC187",
"CCDC200", "CCDC7", "CCL27", "CD6"),
AVG_EXPR = rnorm(8))
> df
Group1 AVG_EXPR Group2 AVG_EXPR.1
1 EMX1 -0.4792520 EXO_BRPF3 -0.8802157
2 EXO_C3L4 -0.4820114 AFS -0.2135738
3 FAF2P1 1.0326328 IJAS -0.6290447
4 FAM224A 1.1318738 CCDC187 1.9813584
5 GATC -0.5019679 CCDC200 0.6749088
6 FAM43A -2.0718576 CCDC7 -0.4217126
7 FAT4 -0.1194380 CCL27 -0.3552010
8 EXO_FEZF1-AS1 1.4106888 CD6 0.4217045
所以我的新data.frame看起来像:
df2 <- data.frame(
Gene = c("EMX1", "EXO_C3L4", "FAF2P1", "FAM224A","GATC",
"FAM43A", "FAT4", "EXO_FEZF1-AS1","EXO_BRPF3", "AFS", "IJAS", "CCDC187",
"CCDC200", "CCDC7", "CCL27", "CD6"),
AVG_EXPR = c(df$AVG_EXPR,df$AVG_EXPR.1),
Group = c(rep("Group1",8),rep("Group2",8))
)
> df2
Gene AVG_EXPR Group
1 EMX1 -0.4792520 Group1
2 EXO_C3L4 -0.4820114 Group1
3 FAF2P1 1.0326328 Group1
4 FAM224A 1.1318738 Group1
5 GATC -0.5019679 Group1
6 FAM43A -2.0718576 Group1
7 FAT4 -0.1194380 Group1
8 EXO_FEZF1-AS1 1.4106888 Group1
9 EXO_BRPF3 -0.8802157 Group2
10 AFS -0.2135738 Group2
11 IJAS -0.6290447 Group2
12 CCDC187 1.9813584 Group2
13 CCDC200 0.6749088 Group2
14 CCDC7 -0.4217126 Group2
15 CCL27 -0.3552010 Group2
16 CD6 0.4217045 Group2
答案 0 :(得分:1)
一个选项是pivot_longer
的开发版本中的tidyr
library(tidyr)
library(dplyr)
library(tibble)
nm1 <- sub("\\.?\\d+$", "", names(df))
names(df) <- paste0(nm1, ":", ave(seq_along(nm1), nm1, FUN = seq_along))
df %>%
rownames_to_column('rn') %>%
pivot_longer(-rn, names_to= c(".value", "Group"), names_sep= ":")
答案 1 :(得分:1)
您可以使用基数R的reshape
。诀窍是嵌套的varying
和cbind
的{{1}}列。
id
reshape(cbind(id=1:nrow(DF), DF), varying=list(c(2, 4), c(3, 5)), direction="long",
timevar="Group", v.names=c("Gene", "AVG_EXPR"), times=c("Group1", "Group2"))
# id Group Gene AVG_EXPR
# 1.Group1 1 Group1 EMX1 0.55748718
# 2.Group1 2 Group1 EXO_C3L4 -0.71399573
# 3.Group1 3 Group1 FAF2P1 0.72595733
# 4.Group1 4 Group1 FAM224A 0.78106399
# 5.Group1 5 Group1 GATC 0.33296728
# 6.Group1 6 Group1 FAM43A -1.14699031
# 7.Group1 7 Group1 FAT4 0.64475462
# 8.Group1 8 Group1 EXO_FEZF1-AS1 0.88621529
# 1.Group2 1 Group2 EXO_BRPF3 -0.62445044
# 2.Group2 2 Group2 AFS 0.14577195
# 3.Group2 3 Group2 IJAS -1.15083794
# 4.Group2 4 Group2 CCDC187 0.95483371
# 5.Group2 5 Group2 CCDC200 -1.55217630
# 6.Group2 6 Group2 CCDC7 -0.51225644
# 7.Group2 7 Group2 CCL27 0.52270643
# 8.Group2 8 Group2 CD6 -0.08751736
答案 2 :(得分:1)
您可以将两个dplyr
呼叫与bind_rows
合并。
df %>% select(gene=Group1,AVG_EXPR) %>% mutate(group='group 1') %>%
bind_rows(.,df %>% select(gene=Group2,AVG_EXPR=AVG_EXPR.1) %>% mutate(group='group 2'))