通过减少tidyr组中的值来完成并填充系列

时间:2018-11-13 22:18:55

标签: r dplyr tidyr fill complete

我有一个相当复杂的问题,我希望首先完成空白行并填写相关数据。下一步,更复杂的是通过减小值直到达到最小值来填充序列。例如以下数据:

dat <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), id2 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), id3 = c("ID_1", 
"ID_1", "ID_1", "ID_1", "ID_1", "ID_2", "ID_3", "ID_4", "ID_5", 
"ID_6", "ID_7", "ID_8", "ID_9", "ID_10", "ID_11", "ID_12", "ID_12", 
"ID_12", "ID_12", "ID_13", "ID_14", "ID_15", "ID_16", "ID_17", 
"ID_18", "ID_19"), n_clstr = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 3L, 3L, 2L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 2L, 
1L, 1L), clstr_number = c(1L, 2L, 3L, 4L, 5L, NA, NA, NA, NA, 
NA, NA, NA, 1L, 1L, 1L, 1L, 2L, 3L, 4L, NA, NA, NA, NA, NA, 1L, 
1L), value = c(0.35, 0.43, 0.51, 0.57, 1, NA, NA, NA, NA, NA, 
NA, NA, 1, 1, 1, 0.2, 0.62, 0.79, 1, NA, NA, NA, NA, NA, 1, 1
)), row.names = c(NA, -26L), class = c("tbl_df", "tbl", "data.frame"
), spec = structure(list(cols = list(id = structure(list(), class = c("collector_integer", 
"collector")), id2 = structure(list(), class = c("collector_integer", 
"collector")), id3 = structure(list(), class = c("collector_character", 
"collector")), n_clstr = structure(list(), class = c("collector_integer", 
"collector")), clstr_number = structure(list(), class = c("collector_integer", 
"collector")), value = structure(list(), class = c("collector_double", 
"collector"))), default = structure(list(), class = c("collector_guess", 
"collector"))), class = "col_spec"))

因此您可以看到数据按嵌套ID分组。为了简洁起见,我没有提供额外的id,但是会有多个。因此,我设想的步骤是:

  1. 按`c(id,id2,id3)分组
  2. 基于n_cluster填写clstr_number并分配clstr_number = seq(max(n_clstr)-n_clstr, n_clstr,1)
  3. 从上方填充相应的值
  4. 随着n_cluster的减小,用最高的value值填充n

但是我无法找出正确的语法来完成和以这种复杂性填充,以及填充减少的集合。最终数据如下所示:

dat_final <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L), id2 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), 
  X3 = c("ID_1", "ID_1", "ID_1", "ID_1", "ID_1", "ID_2", "ID_2", 
  "ID_2", "ID_2", "ID_2", "ID_3", "ID_3", "ID_3", "ID_3", "ID_3", 
  "ID_4", "ID_4", "ID_4", "ID_4", "ID_4", "ID_5", "ID_5", "ID_5", 
  "ID_5", "ID_5", "ID_6", "ID_6", "ID_6", "ID_7", "ID_7", "ID_7", 
  "ID_8", "ID_8", "ID_9", "ID_10", "ID_11", "ID_12", "ID_12", 
  "ID_12", "ID_12", "ID_13", "ID_13", "ID_13", "ID_13", "ID_14", 
  "ID_14", "ID_14", "ID_14", "ID_15", "ID_15", "ID_15", "ID_15", 
  "ID_16", "ID_16", "ID_16", "ID_16", "ID_17", "ID_18", "ID_19"
  ), n_clstr = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
  5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 
  3L, 3L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 
  4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 2L, 
  2L, 1L, 1L), clstr_number = c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 
  3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 
  3L, 4L, 5L, 3L, 4L, 5L, 3L, 4L, 5L, 4L, 5L, 1L, 1L, 1L, 1L, 
  2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 
  2L, 3L, 4L, 3L, 4L, 1L, 1L), value = c(0.35, 0.43, 0.51, 
  0.57, 1, 0.35, 0.43, 0.51, 0.57, 1, 0.35, 0.43, 0.51, 0.57, 
  1, 0.35, 0.43, 0.51, 0.57, 1, 0.35, 0.43, 0.51, 0.57, 1, 
  0.51, 0.57, 1, 0.51, 0.57, 1, 0.57, 1, 1, 1, 1, 0.2, 0.62, 
  0.79, 1, 0.2, 0.62, 0.79, 1, 0.2, 0.62, 0.79, 1, 0.2, 0.62, 
  0.79, 1, 0.62, 0.79, 1, 0.79, 1, 1, 1)), row.names = c(NA, 
-59L), class = c("tbl_df", "tbl", "data.frame"), spec = structure(list(
  cols = list(id = structure(list(), class = c("collector_integer", 
  "collector")), id2 = structure(list(), class = c("collector_integer", 
  "collector")), X3 = structure(list(), class = c("collector_character", 
  "collector")), n_clstr = structure(list(), class = c("collector_integer", 
  "collector")), clstr_number = structure(list(), class = c("collector_integer", 
  "collector")), value = structure(list(), class = c("collector_double", 
  "collector"))), default = structure(list(), class = c("collector_guess", 
  "collector"))), class = "col_spec"))

0 个答案:

没有答案