根据列中的值拆分行

时间:2018-05-09 09:34:46

标签: r

我有一个包含5列的表格。我需要将cols a,b,c,d中具有多个值的行拆分为单独的行,并将该值乘以总值。

输入:

data = read_delim("a,b,c,d,total\n1,NA,NA,NA,10\nNA,0.5,0.5,NA,20\n0.2,0.3,NA,0.5,30\n", delim=",")
data
    a     b     c     d     total
1   1    NA    NA     NA      10
2  NA    0.5   0.5    NA      20
3  0.2   0.3   NA     0.5     30

期望的输出:

desired_output = read_delim("a,b,c,d,total\n1,NA,NA,NA,10\nNA,0.5,NA,NA,10\nNA,NA,0.5,NA,10\n0.2,NA,NA,NA,6\nNA,0.3,NA,NA,9\n NA,NA,NA,0.5,15\n", delim=",")
desired_output
  a      b     c     d     total
1 1      NA    NA    NA      10
2 NA     0.5   NA    NA      10
3 NA     NA    0.5   NA      10
4 0.2    NA    NA    NA       6
5 NA     0.3   NA    NA       9
6 NA     NA    NA    0.5     15

过滤需要拆分的行

combined <- data %>% dplyr::filter(rowSums(!is.na(.[,1:4]))>1)
combined
      a     b     c     d total
1  NA     0.5   0.5  NA      20
2   0.2   0.3  NA     0.5    30

1 个答案:

答案 0 :(得分:3)

您可以尝试tidyverse方法

library(tidyverse)
data %>% 
  rownames_to_column() %>% 
  gather(k,v,-rowname, -total) %>% 
  mutate(total_new=total*v) %>%
  group_by(rowname, v) %>% 
  mutate(n=1:n()) %>% 
  select(-total) %>% 
  spread(k, v) %>% 
  select(-n) %>% 
  filter(rowSums(cbind(a, b, c, d), na.rm = T) > 0) %>% 
  ungroup()
# A tibble: 6 x 6
  rowname total_new     a     b     c     d
  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl>
1 1              10   1    NA    NA    NA  
2 2              10  NA     0.5  NA    NA  
3 2              10  NA    NA     0.5  NA  
4 3               6   0.2  NA    NA    NA  
5 3               9  NA     0.3  NA    NA  
6 3              15  NA    NA    NA     0.5