单独的名称并插入R中的新列

时间:2017-01-17 16:09:33

标签: r dataframe split pattern-matching

我想将数据框中的每个名称分开,并将其插入相应的列中。一个例子如下: 采取以下数据框架:

    N0H0 <-c(rep("N0H0",3))
    N1H1 <-c(rep("N1H1",5))
    N0H05<-c(rep("N0H05",4))
    Out20<-c(rep("20_Out",2))
    Out10<-c(rep("10_Out",4))
    In5  <-c(rep("5_In",3))
    In15 <-c(rep("15_In",3))

    df <- data.frame (Field =c(N0H0,N1H1,N0H05),
              Border =c(Out20,Out10,In5,In15),
              N      = NA,
              H      = NA,
              Number = NA,
              Out.In = NA
                )

并希望获得以下输出:

     output <-  data.frame (Field = c(N0H0,N1H1,N0H05),
                    Border = c(Out20,Out10,In5,In15),
                    N = c(c(rep("N0",3)),c(rep("N1",5)),c(rep("N0",4))),
                    H = c(c(rep("H0",3)),c(rep("H1",5)),c(rep("H05",4))),
                    Number = c(c(rep("20",2)),c(rep("10",4)),c(rep("5",3)),c(rep("15",3))),
                    Out.In = c(c(rep("Out",6)),c(rep("In",6))))

2 个答案:

答案 0 :(得分:1)

我们可以使用extract中的separatetidyr来分隔列

library(dplyr)
library(tidyr)
df %>% 
   select(Field, Border) %>%
   extract(Field, into = c("N", "H"), "^([^0-9]*\\d+)(.*)", remove = FALSE) %>% 
   separate(Border, into = c("Number", "Out.In"), remove = FALSE) %>%
   select_(.dots = names(output))
#    Field Border  N   H Number Out.In
#1   N0H0 20_Out N0  H0     20    Out
#2   N0H0 20_Out N0  H0     20    Out
#3   N0H0 10_Out N0  H0     10    Out
#4   N1H1 10_Out N1  H1     10    Out
#5   N1H1 10_Out N1  H1     10    Out
#6   N1H1 10_Out N1  H1     10    Out
#7   N1H1   5_In N1  H1      5     In
#8   N1H1   5_In N1  H1      5     In
#9  N0H05   5_In N0 H05      5     In
#10 N0H05  15_In N0 H05     15     In
#11 N0H05  15_In N0 H05     15     In
#12 N0H05  15_In N0 H05     15     In

或者使用base R,使用lapply遍历前2列,使用sub创建分隔符,使用read.csvcbind读取字符串list个元素并将其分配回除前2个

之外的列
df[-(1:2)] <-  do.call(cbind, lapply(df[1:2], 
        function(x) read.csv(text=sub("(\\d+)_*", "\\1,", x), 
        header=FALSE, stringsAsFactors=FALSE)))
df
#   Field Border  N   H Number Out.In
#1   N0H0 20_Out N0  H0     20    Out
#2   N0H0 20_Out N0  H0     20    Out
#3   N0H0 10_Out N0  H0     10    Out
#4   N1H1 10_Out N1  H1     10    Out
#5   N1H1 10_Out N1  H1     10    Out
#6   N1H1 10_Out N1  H1     10    Out
#7   N1H1   5_In N1  H1      5     In
#8   N1H1   5_In N1  H1      5     In
#9  N0H05   5_In N0 H05      5     In
#10 N0H05  15_In N0 H05     15     In
#11 N0H05  15_In N0 H05     15     In
#12 N0H05  15_In N0 H05     15     In

答案 1 :(得分:1)

使用基本R功能,您可以尝试:

output <- df
output[, 5:6] <- do.call(rbind, strsplit(as.character(df$Border), "_"))
output[, 3:4] <- do.call(rbind, strsplit(as.character(df$Field), "H"))
output$H <- paste0("H", output$H)
output
   Field Border  N   H Number Out.In
1   N0H0 20_Out N0  H0     20    Out
2   N0H0 20_Out N0  H0     20    Out
3   N0H0 10_Out N0  H0     10    Out
4   N1H1 10_Out N1  H1     10    Out
5   N1H1 10_Out N1  H1     10    Out
6   N1H1 10_Out N1  H1     10    Out
7   N1H1   5_In N1  H1      5     In
8   N1H1   5_In N1  H1      5     In
9  N0H05   5_In N0 H05      5     In
10 N0H05  15_In N0 H05     15     In
11 N0H05  15_In N0 H05     15     In
12 N0H05  15_In N0 H05     15     In