Question

我正在尝试找到一种简洁的Dplyr解决方案来转换此数据帧；

Rule <- c('Rule 1', 'Rule 1', 'Rule 1', 'Rule 1', 'Rule 2', 'Rule 2', 'Rule 2')
Condition <- c('1 of 4', '2 of 4', '3 of 4', '4 of 4', '1 of 3', '2 of 3', '3 of 3')
Clause <- c('Temperature > 60', 'Temperature < 90', 'Rain = 0', 'Wind < 20', 'Temperature > 55', 'Temperature < 85', 'Rain <= 2')
Lift <- c('1.30', '1.30', '1.30', '1.30', '1.60', '1.60', '1.60')
Coverage <- c('20%','20%','20%','20%','35%','35%','35%')
DF <- data.frame(Rule, Condition, Clause, Lift, Coverage)

进入此数据框；

Rule <- c('Rule 1', 'Rule 1', 'Rule 1', 'Rule 1','', 'Rule 2', 'Rule 2', 'Rule 2')
Condition <- c('1 of 4', '2 of 4', '3 of 4', '4 of 4','', '1 of 3', '2 of 3', '3 of 3')
Clause <- c('Temperature > 60', 'Temperature < 90', 'Rain = 0', 'Wind < 20','', 'Temperature > 55', 'Temperature < 85', 'Rain <= 2')
Lift <- c('', '', '', '1.30', '','', '', '1.60')
Coverage <- c('','','','20%','','','','35%')
Result <- data.frame(Rule, Condition, Clause, Lift, Coverage)

请注意，用于分隔规则的新空白行和重复的提升和覆盖率指标已被删除。仅保留每条规则最后一行的“提升和覆盖范围”。

Answer 1

您可以创建一个空白行以插入每个Rule中：

empty_df <- data.frame(matrix('', nrow = 1, ncol = ncol(DF), 
                       dimnames = list(NULL, names(DF))))

分割每个唯一Rule的数据，用空白添加Lift替换Coverage和empty_df列中的重复值，然后合并结果。

library(dplyr)

DF %>%
  group_split(Rule) %>%
  purrr::map_df(~.x %>% 
        mutate(across(c(Lift, Coverage), 
              ~replace(., duplicated(., fromLast = TRUE), ''))) %>%
        bind_rows(empty_df)
        ) %>%
  #Remove the blank row from last `Rule`. 
  slice(-n())

#    Rule     Condition Clause             Lift   Coverage
#  <chr>    <chr>     <chr>              <chr>  <chr>   
#1 "Rule 1" "1 of 4"  "Temperature > 60" ""     ""      
#2 "Rule 1" "2 of 4"  "Temperature < 90" ""     ""      
#3 "Rule 1" "3 of 4"  "Rain = 0"         ""     ""      
#4 "Rule 1" "4 of 4"  "Wind < 20"        "1.30" "20%"   
#5 ""       ""        ""                 ""     ""      
#6 "Rule 2" "1 of 3"  "Temperature > 55" ""     ""      
#7 "Rule 2" "2 of 3"  "Temperature < 85" ""     ""      
#8 "Rule 2" "3 of 3"  "Rain <= 2"        "1.60" "35%"

使用Dplyr处理数据框

1 个答案: