我有一个看起来像这样的data.frame:
chr1:29250635-29582124
chr1:46026531-46214183
chr1:46554517-46718374
chr1:51008171-51235816
chr1:63862069-64092146
chr1:78052717-78289590
chr1:85066633-85177704
chr1:94639336-94839130
chr1:97229888-97509581
chr1:98053562-98445381
chr1:114057822-114381300
chr1:115116433-115378793
我想将框架分成三列,如:
CHR BP1 BP2
chr1 29250635 29582124
chr1 46026531 46214183
chr1 46554517 46718374
chr1 51008171 51235816
chr1 63862069 64092146
我该怎么做?
答案 0 :(得分:2)
我们可以使用separate
library(tidyr)
separate(df1, V1, into = c("CHR", "BP1", "BP2"))
# CHR BP1 BP2
#1 chr1 29250635 29582124
#2 chr1 46026531 46214183
#3 chr1 46554517 46718374
#4 chr1 51008171 51235816
#5 chr1 63862069 64092146
#6 chr1 78052717 78289590
#7 chr1 85066633 85177704
#8 chr1 94639336 94839130
#9 chr1 97229888 97509581
#10 chr1 98053562 98445381
#11 chr1 114057822 114381300
#12 chr1 115116433 115378793
或使用cSplit
library(splitstackshape)
setnames(cSplit(df1, "V1", "-|:", fixed = FALSE), c("CHR", "BP1", "BP2"))[]
使用base R
将分隔符更改为单个分隔符后,使用read.table
进行sub
方法。
read.table(text=sub(":", "-", df1$V1), sep="-", header=FALSE,
col.names = c("CHR", "BP1", "BP2"), stringsAsFactors=FALSE)
# CHR BP1 BP2
#1 chr1 29250635 29582124
#2 chr1 46026531 46214183
#3 chr1 46554517 46718374
#4 chr1 51008171 51235816
#5 chr1 63862069 64092146
#6 chr1 78052717 78289590
#7 chr1 85066633 85177704
#8 chr1 94639336 94839130
#9 chr1 97229888 97509581
#10 chr1 98053562 98445381
#11 chr1 114057822 114381300
#12 chr1 115116433 115378793
df1 <- structure(list(V1 = c("chr1:29250635-29582124", "chr1:46026531-46214183",
"chr1:46554517-46718374", "chr1:51008171-51235816", "chr1:63862069-64092146",
"chr1:78052717-78289590", "chr1:85066633-85177704", "chr1:94639336-94839130",
"chr1:97229888-97509581", "chr1:98053562-98445381", "chr1:114057822-114381300",
"chr1:115116433-115378793")), .Names = "V1", class = "data.frame", row.names = c(NA,
-12L))