我正在尝试找到一种简洁的Dplyr解决方案来转换此数据帧;
Rule <- c('Rule 1', 'Rule 1', 'Rule 1', 'Rule 1', 'Rule 2', 'Rule 2', 'Rule 2')
Condition <- c('1 of 4', '2 of 4', '3 of 4', '4 of 4', '1 of 3', '2 of 3', '3 of 3')
Clause <- c('Temperature > 60', 'Temperature < 90', 'Rain = 0', 'Wind < 20', 'Temperature > 55', 'Temperature < 85', 'Rain <= 2')
Lift <- c('1.30', '1.30', '1.30', '1.30', '1.60', '1.60', '1.60')
Coverage <- c('20%','20%','20%','20%','35%','35%','35%')
DF <- data.frame(Rule, Condition, Clause, Lift, Coverage)
进入此数据框;
Rule <- c('Rule 1', 'Rule 1', 'Rule 1', 'Rule 1','', 'Rule 2', 'Rule 2', 'Rule 2')
Condition <- c('1 of 4', '2 of 4', '3 of 4', '4 of 4','', '1 of 3', '2 of 3', '3 of 3')
Clause <- c('Temperature > 60', 'Temperature < 90', 'Rain = 0', 'Wind < 20','', 'Temperature > 55', 'Temperature < 85', 'Rain <= 2')
Lift <- c('', '', '', '1.30', '','', '', '1.60')
Coverage <- c('','','','20%','','','','35%')
Result <- data.frame(Rule, Condition, Clause, Lift, Coverage)
请注意,用于分隔规则的新空白行和重复的提升和覆盖率指标已被删除。仅保留每条规则最后一行的“提升和覆盖范围”。
答案 0 :(得分:2)
您可以创建一个空白行以插入每个Rule
中:
empty_df <- data.frame(matrix('', nrow = 1, ncol = ncol(DF),
dimnames = list(NULL, names(DF))))
分割每个唯一Rule
的数据,用空白添加Lift
替换Coverage
和empty_df
列中的重复值,然后合并结果。
library(dplyr)
DF %>%
group_split(Rule) %>%
purrr::map_df(~.x %>%
mutate(across(c(Lift, Coverage),
~replace(., duplicated(., fromLast = TRUE), ''))) %>%
bind_rows(empty_df)
) %>%
#Remove the blank row from last `Rule`.
slice(-n())
# Rule Condition Clause Lift Coverage
# <chr> <chr> <chr> <chr> <chr>
#1 "Rule 1" "1 of 4" "Temperature > 60" "" ""
#2 "Rule 1" "2 of 4" "Temperature < 90" "" ""
#3 "Rule 1" "3 of 4" "Rain = 0" "" ""
#4 "Rule 1" "4 of 4" "Wind < 20" "1.30" "20%"
#5 "" "" "" "" ""
#6 "Rule 2" "1 of 3" "Temperature > 55" "" ""
#7 "Rule 2" "2 of 3" "Temperature < 85" "" ""
#8 "Rule 2" "3 of 3" "Rain <= 2" "1.60" "35%"