我想将数据框中的每个名称分开,并将其插入相应的列中。一个例子如下: 采取以下数据框架:
N0H0 <-c(rep("N0H0",3))
N1H1 <-c(rep("N1H1",5))
N0H05<-c(rep("N0H05",4))
Out20<-c(rep("20_Out",2))
Out10<-c(rep("10_Out",4))
In5 <-c(rep("5_In",3))
In15 <-c(rep("15_In",3))
df <- data.frame (Field =c(N0H0,N1H1,N0H05),
Border =c(Out20,Out10,In5,In15),
N = NA,
H = NA,
Number = NA,
Out.In = NA
)
并希望获得以下输出:
output <- data.frame (Field = c(N0H0,N1H1,N0H05),
Border = c(Out20,Out10,In5,In15),
N = c(c(rep("N0",3)),c(rep("N1",5)),c(rep("N0",4))),
H = c(c(rep("H0",3)),c(rep("H1",5)),c(rep("H05",4))),
Number = c(c(rep("20",2)),c(rep("10",4)),c(rep("5",3)),c(rep("15",3))),
Out.In = c(c(rep("Out",6)),c(rep("In",6))))
答案 0 :(得分:1)
我们可以使用extract
中的separate
,tidyr
来分隔列
library(dplyr)
library(tidyr)
df %>%
select(Field, Border) %>%
extract(Field, into = c("N", "H"), "^([^0-9]*\\d+)(.*)", remove = FALSE) %>%
separate(Border, into = c("Number", "Out.In"), remove = FALSE) %>%
select_(.dots = names(output))
# Field Border N H Number Out.In
#1 N0H0 20_Out N0 H0 20 Out
#2 N0H0 20_Out N0 H0 20 Out
#3 N0H0 10_Out N0 H0 10 Out
#4 N1H1 10_Out N1 H1 10 Out
#5 N1H1 10_Out N1 H1 10 Out
#6 N1H1 10_Out N1 H1 10 Out
#7 N1H1 5_In N1 H1 5 In
#8 N1H1 5_In N1 H1 5 In
#9 N0H05 5_In N0 H05 5 In
#10 N0H05 15_In N0 H05 15 In
#11 N0H05 15_In N0 H05 15 In
#12 N0H05 15_In N0 H05 15 In
或者使用base R
,使用lapply
遍历前2列,使用sub
创建分隔符,使用read.csv
,cbind
读取字符串list
个元素并将其分配回除前2个
df[-(1:2)] <- do.call(cbind, lapply(df[1:2],
function(x) read.csv(text=sub("(\\d+)_*", "\\1,", x),
header=FALSE, stringsAsFactors=FALSE)))
df
# Field Border N H Number Out.In
#1 N0H0 20_Out N0 H0 20 Out
#2 N0H0 20_Out N0 H0 20 Out
#3 N0H0 10_Out N0 H0 10 Out
#4 N1H1 10_Out N1 H1 10 Out
#5 N1H1 10_Out N1 H1 10 Out
#6 N1H1 10_Out N1 H1 10 Out
#7 N1H1 5_In N1 H1 5 In
#8 N1H1 5_In N1 H1 5 In
#9 N0H05 5_In N0 H05 5 In
#10 N0H05 15_In N0 H05 15 In
#11 N0H05 15_In N0 H05 15 In
#12 N0H05 15_In N0 H05 15 In
答案 1 :(得分:1)
使用基本R功能,您可以尝试:
output <- df
output[, 5:6] <- do.call(rbind, strsplit(as.character(df$Border), "_"))
output[, 3:4] <- do.call(rbind, strsplit(as.character(df$Field), "H"))
output$H <- paste0("H", output$H)
output
Field Border N H Number Out.In
1 N0H0 20_Out N0 H0 20 Out
2 N0H0 20_Out N0 H0 20 Out
3 N0H0 10_Out N0 H0 10 Out
4 N1H1 10_Out N1 H1 10 Out
5 N1H1 10_Out N1 H1 10 Out
6 N1H1 10_Out N1 H1 10 Out
7 N1H1 5_In N1 H1 5 In
8 N1H1 5_In N1 H1 5 In
9 N0H05 5_In N0 H05 5 In
10 N0H05 15_In N0 H05 15 In
11 N0H05 15_In N0 H05 15 In
12 N0H05 15_In N0 H05 15 In