使用其他列中的范围填充一列

时间:2018-08-13 22:49:59

标签: r dplyr

这是我的数据框。对于section_start和section_end之间的所有page_nums,我想使用section_num值创建fill_section_num。例如,如果page_nums在5到6之间,则section_num是2。因此,在fill_section_num中,我希望第5和第6行使用2,依此类推。

df <- tribble(
  ~page_nums, ~section_start, ~section_end, ~section_num, ~fill_section_num,
       1,      NA_integer_,    NA_integer_,  NA_integer_,   NA_integer_,
       2,      NA_integer_,    NA_integer_,  NA_integer_,      1,
       3,           2,              4,            1,           1,
       4,           5,              6,            2,           1,
       5,           7,              7,            3,           2,
       6,           8,              10,           4,           2,
       7,      NA_integer_,     NA_integer_,  NA_integer_,     3,
       8,      NA_integer_,     NA_integer_,  NA_integer_,     4,
       9,      NA_integer_,     NA_integer_,  NA_integer_,     4,
       10,     NA_integer_,     NA_integer_,  NA_integer_,     4)

我打算做这样的事情,但是失败了。你能帮忙吗?

 df <- df %>% 
          mutate(fill_section_num = if_else(between(page_nums, section_start, section_end), section_num, NA_real_))

所需的输出是使用R代码计算的fill_section_num列。

2 个答案:

答案 0 :(得分:1)

一种进行方式:

# example data (with simpler varnames because I'm lazy):
df <- data.frame(
    pages = 3:6,
    start = c(2,5,7,8),
    end   = c(4,6,7,10),
    section = 1:4
)

# create two-column dataframe of all page numbers and the fill_section they belong to
myfun <- function(x) {
    cbind( pages = seq(x[1], x[2]), 
           full_section = rep(x[3], times=x[2]-x[1]+1) )
}

temp_list <- apply(t(df[,2:4]), 2, myfun)
temp_df   <- data.frame(do.call(rbind, temp_list))

# Merge this fill_section information back onto the original dataframe:
result <- merge(df, temp_df, by="pages")

检查结果是否合理:

result
  pages start end section fill_section
      3     2   4       1            1
      4     5   6       2            1
      5     7   7       3            2
      6     8  10       4            2

答案 1 :(得分:1)

您可以展开以下部分:start:section_end,然后取消嵌套,最后将表连接到自身:

df%>%
  mutate(nn=map2(section_start,section_end,
                 ~if(any(is.na(c(.x,.y)))) NA else .x:.y))%>%
  unnest()%>%
  select(nn,section_num)%>%
  right_join(df,c(nn="page_nums"))%>%
  select(-section_num.x,everything())
# A tibble: 10 x 6
      nn section_start section_end section_num.y fill_section_num section_num.x
   <dbl>         <dbl>       <dbl>         <dbl>            <dbl>         <dbl>
 1     1            NA          NA            NA               NA            NA
 2     2            NA          NA            NA                1             1
 3     3             2           4             1                1             1
 4     4             5           6             2                1             1
 5     5             7           7             3                2             2
 6     6             8          10             4                2             2
 7     7            NA          NA            NA                3             3
 8     8            NA          NA            NA                4             4
 9     9            NA          NA            NA                4             4
10    10            NA          NA            NA                4             4

上面的最后一列是您要寻找的。