使用菱形数据集,我试图运行回归模型,以比较颜色“ D”与每个切割级别内的其他每种颜色(在回归模型中未指定交互作用)。
为此,我尝试创建按cut和color分组的嵌套数据框,但是在每个组中,我都希望使用适当cut的参考颜色“ D”。
以下代码无法实现我想要的功能,因为每组颜色都不包含颜色“ D”:
library(tidyverse)
> diamonds %>%
+ group_by(cut, color) %>%
+ nest() %>% arrange(cut, color)
# A tibble: 35 x 3
cut color data
<ord> <ord> <list>
1 Fair D <tibble [163 x 8]>
2 Fair E <tibble [224 x 8]>
3 Fair F <tibble [312 x 8]>
4 Fair G <tibble [314 x 8]>
5 Fair H <tibble [303 x 8]>
6 Fair I <tibble [175 x 8]>
7 Fair J <tibble [119 x 8]>
8 Good D <tibble [662 x 8]>
9 Good E <tibble [933 x 8]>
10 Good F <tibble [909 x 8]>
# ... with 25 more rows
下面的代码可以完成工作,但是我正在寻找一个tidyverse版本:
data_fair_de = diamonds %>% filter(cut=="Fair" & color %in% c("D", "E")) %>% mutate( grouping_var = "data_fair_de" )
data_fair_df = diamonds %>% filter(cut=="Fair" & color %in% c("D", "F")) %>% mutate( grouping_var = "data_fair_df" )
data_fair_dg = diamonds %>% filter(cut=="Fair" & color %in% c("D", "G")) %>% mutate( grouping_var = "data_fair_dg" )
data_fair_dh = diamonds %>% filter(cut=="Fair" & color %in% c("D", "H")) %>% mutate( grouping_var = "data_fair_dh" )
data_fair_di = diamonds %>% filter(cut=="Fair" & color %in% c("D", "I")) %>% mutate( grouping_var = "data_fair_di" )
data_fair_dj = diamonds %>% filter(cut=="Fair" & color %in% c("D", "J")) %>% mutate( grouping_var = "data_fair_dj" )
data_good_de = diamonds %>% filter(cut=="Good" & color %in% c("D", "E")) %>% mutate( grouping_var = "data_good_de " )
data_good_df = diamonds %>% filter(cut=="Good" & color %in% c("D", "F")) %>% mutate( grouping_var = "data_good_df " )
data_good_dg = diamonds %>% filter(cut=="Good" & color %in% c("D", "G")) %>% mutate( grouping_var = "data_good_dg " )
data_good_dh = diamonds %>% filter(cut=="Good" & color %in% c("D", "H")) %>% mutate( grouping_var = "data_good_dh " )
data_good_di = diamonds %>% filter(cut=="Good" & color %in% c("D", "I")) %>% mutate( grouping_var = "data_good_di " )
data_good_dj = diamonds %>% filter(cut=="Good" & color %in% c("D", "J")) %>% mutate( grouping_var = "data_good_dj " )
data_very_de = diamonds %>% filter(cut=="Very Good" & color %in% c("D", "E")) %>% mutate( grouping_var = "data_very_de " )
data_very_df = diamonds %>% filter(cut=="Very Good" & color %in% c("D", "F")) %>% mutate( grouping_var = "data_very_df " )
data_very_dg = diamonds %>% filter(cut=="Very Good" & color %in% c("D", "G")) %>% mutate( grouping_var = "data_very_dg " )
data_very_dh = diamonds %>% filter(cut=="Very Good" & color %in% c("D", "H")) %>% mutate( grouping_var = "data_very_dh " )
data_very_di = diamonds %>% filter(cut=="Very Good" & color %in% c("D", "I")) %>% mutate( grouping_var = "data_very_di " )
data_very_dj = diamonds %>% filter(cut=="Very Good" & color %in% c("D", "J")) %>% mutate( grouping_var = "data_very_dj " )
data_premium_de = diamonds %>% filter(cut=="Premium" & color %in% c("D", "E")) %>% mutate( grouping_var = "data_premium_de " )
data_premium_df = diamonds %>% filter(cut=="Premium" & color %in% c("D", "F")) %>% mutate( grouping_var = "data_premium_df " )
data_premium_dg = diamonds %>% filter(cut=="Premium" & color %in% c("D", "G")) %>% mutate( grouping_var = "data_premium_dg " )
data_premium_dh = diamonds %>% filter(cut=="Premium" & color %in% c("D", "H")) %>% mutate( grouping_var = "data_premium_dh " )
data_premium_di = diamonds %>% filter(cut=="Premium" & color %in% c("D", "I")) %>% mutate( grouping_var = "data_premium_di " )
data_premium_dj = diamonds %>% filter(cut=="Premium" & color %in% c("D", "J")) %>% mutate( grouping_var = "data_premium_dj " )
data_ideal_de = diamonds %>% filter(cut=="Ideal" & color %in% c("D", "E")) %>% mutate( grouping_var = "data_ideal_de " )
data_ideal_df = diamonds %>% filter(cut=="Ideal" & color %in% c("D", "F")) %>% mutate( grouping_var = "data_ideal_df " )
data_ideal_dg = diamonds %>% filter(cut=="Ideal" & color %in% c("D", "G")) %>% mutate( grouping_var = "data_ideal_dg " )
data_ideal_dh = diamonds %>% filter(cut=="Ideal" & color %in% c("D", "H")) %>% mutate( grouping_var = "data_ideal_dh " )
data_ideal_di = diamonds %>% filter(cut=="Ideal" & color %in% c("D", "I")) %>% mutate( grouping_var = "data_ideal_di " )
data_ideal_dj = diamonds %>% filter(cut=="Ideal" & color %in% c("D", "J")) %>% mutate( grouping_var = "data_ideal_dj " )
bind_rows(
data_fair_de , data_fair_df , data_fair_dg , data_fair_dh , data_fair_di , data_fair_dj ,
data_good_de , data_good_df , data_good_dg , data_good_dh , data_good_di , data_good_dj ,
data_very_de , data_very_df , data_very_dg , data_very_dh , data_very_di , data_very_dj ,
data_premium_de, data_premium_df, data_premium_dg, data_premium_dh, data_premium_di, data_premium_dj,
data_ideal_de , data_ideal_df , data_ideal_dg , data_ideal_dh , data_ideal_di , data_ideal_dj ) %>%
group_by(grouping_var) %>%
nest()
答案 0 :(得分:1)
library(tidyverse)
# function to get data based on your filter
f = function(xx,yy,zz) {diamonds %>% filter(cut==xx & color %in% c(yy,zz))}
expand(diamonds, cut, color, color_D="D") %>% # create all combinations of interest
mutate_all(as.character) %>% # update to character variables
rowwise() %>% # for each row
mutate(data = list(f(cut, color, color_D))) %>% # apply your function
ungroup() %>% # forget the grouping
filter(color != color_D) # exclude cases where pair of colours is {D,D}
# # A tibble: 30 x 4
# cut color color_D data
# <chr> <chr> <chr> <list>
# 1 Fair E D <tibble [387 x 10]>
# 2 Fair F D <tibble [475 x 10]>
# 3 Fair G D <tibble [477 x 10]>
# 4 Fair H D <tibble [466 x 10]>
# 5 Fair I D <tibble [338 x 10]>
# 6 Fair J D <tibble [282 x 10]>
# 7 Good E D <tibble [1,595 x 10]>
# 8 Good F D <tibble [1,571 x 10]>
# 9 Good G D <tibble [1,533 x 10]>
# 10 Good H D <tibble [1,364 x 10]>
# # ... with 20 more rows
答案 1 :(得分:1)
如果要将D添加到每个组,则只需在每个组后面附加适当的“ D”
df <- diamonds %>%
group_by(cut, color) %>%
nest() %>%
arrange(cut, color) %>%
ungroup()%>%
group_by(cut) %>%
mutate(append_data = map(data, ~rbind(.x, data[[which(row_number() == 1)]])))
df
# cut color data append_data
# 1 Fair D <tibble [163 × 8]> <tibble [326 × 8]>
# 2 Fair E <tibble [224 × 8]> <tibble [387 × 8]>
# 3 Fair F <tibble [312 × 8]> <tibble [475 × 8]>
# 4 Fair G <tibble [314 × 8]> <tibble [477 × 8]>
# 5 Fair H <tibble [303 × 8]> <tibble [466 × 8]>
# # ... with 30 more rows
这应该为您提供一个新的嵌套列,每个组也具有D。
答案 2 :(得分:1)
我们可以按照您的想法在嵌套的data.frames上循环,然后按组添加颜色D
的行,然后删除D
行:
diamonds %>%
nest(-cut,-color) %>%
group_by(cut) %>%
mutate(data = map(data, ~ bind_rows(data[[which(color=="D")]], .x))) %>%
ungroup %>%
filter(color != "D") %>%
arrange(cut, color)
# # A tibble: 30 x 3
# cut color data
# <ord> <ord> <list>
# 1 Fair E <tibble [387 x 8]>
# 2 Fair F <tibble [475 x 8]>
# 3 Fair G <tibble [477 x 8]>
# 4 Fair H <tibble [466 x 8]>
# 5 Fair I <tibble [338 x 8]>
# 6 Fair J <tibble [282 x 8]>
# 7 Good E <tibble [1,595 x 8]>
# 8 Good F <tibble [1,571 x 8]>
# 9 Good G <tibble [1,533 x 8]>
# 10 Good H <tibble [1,364 x 8]>
# # ... with 20 more rows