我有一个df:
structure(list(chrom = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("2.11E+14",
"2L", "2R", "3L", "3R", "mitochondrion_genome", "rDNA", "Unmapped_Scaffold_58_D1862",
"X", "X3X4_mapped_Scaffold_14_D1732", "X3X4_mapped_Scaffold_6_D1712"
), class = "factor"), pos = c(32476L, 56682L, 95341L, 161983L,
190298L, 214264L, 344985L, 424235L, 424262L, 424295L, 431370L,
444713L, 449545L, 464397L, 464407L, 465414L, 465416L, 465424L,
483832L, 523201L), nFreq = c(48, 61, 43, 53, 38, 52, 44, 55,
55, 56, 64, 42, 31, 31, 30, 44, 47, 45, 53, 40), n_dist = c(2,
11, 7, 3, 12, 2, 6, 5, 5, 6, 14, 8, 19, 19, 20, 6, 3, 5, 3, 10
), tFreq = c(90, 100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, 86,
87, 0, 0, 0, 100, 100), t_dist = c(40, 50, 50, 50, 50, 50, 50,
50, 50, 50, 50, 50, 50, 36, 37, 50, 50, 50, 50, 50)), .Names = c("chrom",
"pos", "nFreq", "n_dist", "tFreq", "t_dist"), row.names = c(NA,
20L), class = "data.frame")
看起来像这样:
chrom pos nFreq n_dist tFreq t_dist
2L 32476 48 2 90 40
2L 56682 61 11 100 50
2L 95341 43 7 0 50
2L 161983 53 3 0 50
2L 190298 38 12 0 50
2L 214264 52 2 0 50
我正在尝试使用gather将其转换为如下所示的长数据框:
chrom pos freq dist condition
2L 32476 48 2 norm
2L 32476 90 40 tum
2L 56682 61 11 norm
2L 56682 100 50 tum
2L 95341 43 7 norm
2L 95341 0 50 tum
我不知道如何添加两个以上的将来的列标题,这是我正在尝试的方法:
library(tidyr)
long_df <- df %>%
tidyr::gather(freq, dist, nFreq:t_dist, factor_key=TRUE)
哪个会产生:
chrom pos aFreq dist
1 2L 32476 nFreq 48
2 2L 56682 nFreq 61
3 2L 95341 nFreq 43
4 2L 161983 nFreq 53
5 2L 190298 nFreq 38
6 2L 214264 nFreq 52
如何调整此值以实现所需的输出?
答案 0 :(得分:0)
这里是一种解决方案,先收集所有列,然后将其分为两部分(范数和tum),然后进行扩展和合并。
library(tidyr)
library(dplyr)
#make data into long format
new<-gather(df, "key", "value", -c(1:2))
#split into 2 parts into wide format
norm<-spread(new[new$key=="nFreq"|new$key=="n_dist",], key, value) %>% mutate(condition="norm")
tum<-spread(new[new$key=="tFreq"|new$key=="t_dist",], key, value) %>% mutate(condition="tum")
#Ensure columns names are the same
names(norm)<-c("chrom", "pos", "freq", "dist", "condition")
names(tum)<-c("chrom", "pos", "freq", "dist", "condition")
#combine and reorder
answer<-rbind(norm, tum)
answer<-answer[order(answer$pos),]
这可行,但我相信可以简化。