我需要根据多列因子变量的子组合的所有组合对data.frame进行子集化。此外,列因子变量的数量可能会发生变化,因此该方法需要灵活地接受不同数量的属性。我可以在一个简单的例子中弄清楚如何创建变量组合,但不能有效地对data.frame进行子集化。有什么想法吗?
#setup an example data.frame
a <- c("a", "b", "b", "b", "e")
b <- c("b", "c", "b", "b", "f")
c <- c("c", "d", "b", "b", "g")
df <- data.table(a = a, b = b, c = c)
#build a data.frame of unique combos to subset on
df_unique <- df[!duplicated(df), ]
df_combos <- data.table()
for(i in 1:ncol(df_unique)){
for(x in 1:ncol(df_unique)){
df_sub <- df_unique[,i:x, with = F]
df_combos <- rbind(df_combos, df_sub, fill = T)
}
}
df_combos <- df_combos[!duplicated(df_combos), ]
rm(df_unique)
#create a loop to build the subsets
combos_out <- data.table()
for(i in 1:nrow(df_combos)){
df_combos_sub <- df_combos[i, ]
df_combos_sub <- df_combos_sub[,which(unlist(lapply(df_combos_sub, function(x)!all(is.na(x))))),with=F]
df_sub <- merge(df, df_combos_sub, by = colnames(df_combos_sub))
#interesting code here that performs analysis on the subsets
}