我有5个条件,可以是present
(= 1)或not
(= 0):
set.seed(101)
df <- data.frame(
alfa = sample(c(0, 1), 30, replace = TRUE),
beta = sample(c(0, 1), 30, replace = TRUE),
gamma = sample(c(0, 1), 30, replace = TRUE),
delta = sample(c(0, 1), 30, replace = TRUE),
epsilon = sample(c(0, 1), 30, replace = TRUE)
)
我想从这些条件的存在的所有可能组合(2 5 )中生成一组虚拟变量。 具体来说,我想验证以下条件的同时存在:
alfa + beta
; alfa + gamma
; alfa + delta
; alfa + epsilon
; alfa + beta + gamma
; alfa + beta + gamma + delta + epsilon;
v <- as.matrix(expand.grid(rep(list(c(FALSE, TRUE)), ncol(df))))
head(v)
Var1 Var2 Var3 Var4 Var5
[1,] TRUE FALSE FALSE FALSE FALSE
[2,] FALSE TRUE FALSE FALSE FALSE
[3,] TRUE TRUE FALSE FALSE FALSE
[4,] FALSE FALSE TRUE FALSE FALSE
[5,] TRUE FALSE TRUE FALSE FALSE
[6,] FALSE TRUE TRUE FALSE FALSE
indexes <- lapply(seq_len(nrow(v)), function(x) v[x, ])
names(indexes) <- apply(v, 1, function(x) paste(names(df)[x], collapse="."))
不幸的是我被困在这里。
我需要根据上述组合生成27个虚拟变量(32-5)。
# my df
set.seed(101)
df <- data.frame(
alfa = sample(c(0, 1), 30, replace = TRUE),
beta = sample(c(0, 1), 30, replace = TRUE),
gamma = sample(c(0, 1), 30, replace = TRUE),
delta = sample(c(0, 1), 30, replace = TRUE),
epsilon = sample(c(0, 1), 30, replace = TRUE)
)
# count the numebr of coesistent conditions:
df$n <- rowSums(df[1:5], na.rm = TRUE)
# Dirty way to compute multiple combinations:
df$alfa.beta <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 0 & df$delta == 0 & df$epsilon == 0 , 1, 0)
df$alfa.gamma <- ifelse (df$alfa == 1 & df$beta == 0 & df$gamma == 1 & df$delta == 0 & df$epsilon == 0 , 1, 0)
df$beta.gamma <- ifelse (df$alfa == 0 & df$beta == 1 & df$gamma == 1 & df$delta == 0 & df$epsilon == 0 , 1, 0)
df$alfa.beta.gamma <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 1 & df$delta == 0 & df$epsilon == 0 , 1, 0)
df$alfa.delta <- ifelse (df$alfa == 1 & df$beta == 0 & df$gamma == 0 & df$delta == 1 & df$epsilon == 0 , 1, 0)
df$beta.delta <- ifelse (df$alfa == 0 & df$beta == 1 & df$gamma == 0 & df$delta == 1 & df$epsilon == 0 , 1, 0)
df$alfa.beta.delta <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 0 & df$delta == 1 & df$epsilon == 0 , 1, 0)
df$gamma.delta <- ifelse (df$alfa == 0 & df$beta == 0 & df$gamma == 1 & df$delta == 1 & df$epsilon == 0 , 1, 0)
df$alfa.gamma.delta <- ifelse (df$alfa == 1 & df$beta == 0 & df$gamma == 1 & df$delta == 1 & df$epsilon == 0 , 1, 0)
df$beta.gamma.delta <- ifelse (df$alfa == 0 & df$beta == 1 & df$gamma == 1 & df$delta == 1 & df$epsilon == 0 , 1, 0)
df$alfa.beta.gamma.delta <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 1 & df$delta == 1 & df$epsilon == 0 , 1, 0)
df$alfa.epsilon <- ifelse (df$alfa == 1 & df$beta == 0 & df$gamma == 0 & df$delta == 0 & df$epsilon == 1 , 1, 0)
df$beta.epsilon <- ifelse (df$alfa == 0 & df$beta == 1 & df$gamma == 0 & df$delta == 0 & df$epsilon == 1 , 1, 0)
df$alfa.beta.epsilon <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 0 & df$delta == 0 & df$epsilon == 1 , 1, 0)
df$gamma.epsilon <- ifelse (df$alfa == 0 & df$beta == 0 & df$gamma == 1 & df$delta == 0 & df$epsilon == 1 , 1, 0)
df$alfa.gamma.epsilon <- ifelse (df$alfa == 1 & df$beta == 0 & df$gamma == 1 & df$delta == 0 & df$epsilon == 1 , 1, 0)
df$beta.gamma.epsilon <- ifelse (df$alfa == 0 & df$beta == 1 & df$gamma == 1 & df$delta == 0 & df$epsilon == 1 , 1, 0)
df$alfa.beta.gamma.epsilon <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 1 & df$delta == 0 & df$epsilon == 1 , 1, 0)
df$delta.epsilon <- ifelse (df$alfa == 0 & df$beta == 0 & df$gamma == 0 & df$delta == 1 & df$epsilon == 1 , 1, 0)
df$alfa.delta.epsilon <- ifelse (df$alfa == 1 & df$beta == 0 & df$gamma == 0 & df$delta == 1 & df$epsilon == 1 , 1, 0)
df$beta.delta.epsilon <- ifelse (df$alfa == 0 & df$beta == 1 & df$gamma == 0 & df$delta == 1 & df$epsilon == 1 , 1, 0)
df$alfa.beta.delta.epsilon <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 0 & df$delta == 1 & df$epsilon == 1 , 1, 0)
df$gamma.delta.epsilon <- ifelse (df$alfa == 0 & df$beta == 0 & df$gamma == 1 & df$delta == 1 & df$epsilon == 1 , 1, 0)
df$alfa.gamma.delta.epsilon <- ifelse (df$alfa == 1 & df$beta == 0 & df$gamma == 1 & df$delta == 1 & df$epsilon == 1 , 1, 0)
df$beta.gamma.delta.epsilon <- ifelse (df$alfa == 0 & df$beta == 1 & df$gamma == 1 & df$delta == 1 & df$epsilon == 1 , 1, 0)
df$alfa.beta.gamma.delta.epsilon <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 1 & df$delta == 1 & df$epsilon == 1 , 1, 0)
答案 0 :(得分:1)
也许您可以通过在模型公式中使用stats::model.matrix
和^
运算符返回所有(高阶)变量交互来实现您所追求的目标:
library(stats)
df <- data.frame(
alfa = sample(c(TRUE, FALSE), 30, replace = TRUE),
beta = sample(c(TRUE, FALSE), 30, replace = TRUE),
gamma = sample(c(TRUE, FALSE), 30, replace = TRUE),
delta = sample(c(TRUE, FALSE), 30, replace = TRUE),
epsilon = sample(c(TRUE, FALSE), 30, replace = TRUE)
)
df_dummy <- model.matrix(~ .^5, data = df)
colnames(df_dummy)
#> [1] "(Intercept)"
#> [2] "alfaTRUE"
#> [3] "betaTRUE"
#> [4] "gammaTRUE"
#> [5] "deltaTRUE"
#> [6] "epsilonTRUE"
#> [7] "alfaTRUE:betaTRUE"
#> [8] "alfaTRUE:gammaTRUE"
#> [9] "alfaTRUE:deltaTRUE"
#> [10] "alfaTRUE:epsilonTRUE"
#> [11] "betaTRUE:gammaTRUE"
#> [12] "betaTRUE:deltaTRUE"
#> [13] "betaTRUE:epsilonTRUE"
#> [14] "gammaTRUE:deltaTRUE"
#> [15] "gammaTRUE:epsilonTRUE"
#> [16] "deltaTRUE:epsilonTRUE"
#> [17] "alfaTRUE:betaTRUE:gammaTRUE"
#> [18] "alfaTRUE:betaTRUE:deltaTRUE"
#> [19] "alfaTRUE:betaTRUE:epsilonTRUE"
#> [20] "alfaTRUE:gammaTRUE:deltaTRUE"
#> [21] "alfaTRUE:gammaTRUE:epsilonTRUE"
#> [22] "alfaTRUE:deltaTRUE:epsilonTRUE"
#> [23] "betaTRUE:gammaTRUE:deltaTRUE"
#> [24] "betaTRUE:gammaTRUE:epsilonTRUE"
#> [25] "betaTRUE:deltaTRUE:epsilonTRUE"
#> [26] "gammaTRUE:deltaTRUE:epsilonTRUE"
#> [27] "alfaTRUE:betaTRUE:gammaTRUE:deltaTRUE"
#> [28] "alfaTRUE:betaTRUE:gammaTRUE:epsilonTRUE"
#> [29] "alfaTRUE:betaTRUE:deltaTRUE:epsilonTRUE"
#> [30] "alfaTRUE:gammaTRUE:deltaTRUE:epsilonTRUE"
#> [31] "betaTRUE:gammaTRUE:deltaTRUE:epsilonTRUE"
#> [32] "alfaTRUE:betaTRUE:gammaTRUE:deltaTRUE:epsilonTRUE"
由reprex package(v0.3.0)
创建于2019-06-16展开的data.frame df_dummy
返回所有31种可能的交互组合(除截距外)的一热编码列。要丢失截距,请用~ .^5 + 0
或~.^5 - 1
替换模型公式。请注意,通过用5
中的列数替换df
,可以轻松地将其扩展到更多变量。
编辑:上面的代码不会为您提供变量组合的排他性存在(类似于已编辑问题中的手动编码)。为此,您可以尝试:
df_dummy <- model.matrix(~ .^5 - .^4 - 1, data = df)
colnames(df_dummy)
#> [1] "alfaFALSE:betaFALSE:gammaFALSE:deltaFALSE:epsilonFALSE"
#> [2] "alfaTRUE:betaFALSE:gammaFALSE:deltaFALSE:epsilonFALSE"
#> [3] "alfaFALSE:betaTRUE:gammaFALSE:deltaFALSE:epsilonFALSE"
#> [4] "alfaTRUE:betaTRUE:gammaFALSE:deltaFALSE:epsilonFALSE"
#> [5] "alfaFALSE:betaFALSE:gammaTRUE:deltaFALSE:epsilonFALSE"
#> [6] "alfaTRUE:betaFALSE:gammaTRUE:deltaFALSE:epsilonFALSE"
#> [7] "alfaFALSE:betaTRUE:gammaTRUE:deltaFALSE:epsilonFALSE"
#> [8] "alfaTRUE:betaTRUE:gammaTRUE:deltaFALSE:epsilonFALSE"
#> [9] "alfaFALSE:betaFALSE:gammaFALSE:deltaTRUE:epsilonFALSE"
#> [10] "alfaTRUE:betaFALSE:gammaFALSE:deltaTRUE:epsilonFALSE"
#> [11] "alfaFALSE:betaTRUE:gammaFALSE:deltaTRUE:epsilonFALSE"
#> [12] "alfaTRUE:betaTRUE:gammaFALSE:deltaTRUE:epsilonFALSE"
#> [13] "alfaFALSE:betaFALSE:gammaTRUE:deltaTRUE:epsilonFALSE"
#> [14] "alfaTRUE:betaFALSE:gammaTRUE:deltaTRUE:epsilonFALSE"
#> [15] "alfaFALSE:betaTRUE:gammaTRUE:deltaTRUE:epsilonFALSE"
#> [16] "alfaTRUE:betaTRUE:gammaTRUE:deltaTRUE:epsilonFALSE"
#> [17] "alfaFALSE:betaFALSE:gammaFALSE:deltaFALSE:epsilonTRUE"
#> [18] "alfaTRUE:betaFALSE:gammaFALSE:deltaFALSE:epsilonTRUE"
#> [19] "alfaFALSE:betaTRUE:gammaFALSE:deltaFALSE:epsilonTRUE"
#> [20] "alfaTRUE:betaTRUE:gammaFALSE:deltaFALSE:epsilonTRUE"
#> [21] "alfaFALSE:betaFALSE:gammaTRUE:deltaFALSE:epsilonTRUE"
#> [22] "alfaTRUE:betaFALSE:gammaTRUE:deltaFALSE:epsilonTRUE"
#> [23] "alfaFALSE:betaTRUE:gammaTRUE:deltaFALSE:epsilonTRUE"
#> [24] "alfaTRUE:betaTRUE:gammaTRUE:deltaFALSE:epsilonTRUE"
#> [25] "alfaFALSE:betaFALSE:gammaFALSE:deltaTRUE:epsilonTRUE"
#> [26] "alfaTRUE:betaFALSE:gammaFALSE:deltaTRUE:epsilonTRUE"
#> [27] "alfaFALSE:betaTRUE:gammaFALSE:deltaTRUE:epsilonTRUE"
#> [28] "alfaTRUE:betaTRUE:gammaFALSE:deltaTRUE:epsilonTRUE"
#> [29] "alfaFALSE:betaFALSE:gammaTRUE:deltaTRUE:epsilonTRUE"
#> [30] "alfaTRUE:betaFALSE:gammaTRUE:deltaTRUE:epsilonTRUE"
#> [31] "alfaFALSE:betaTRUE:gammaTRUE:deltaTRUE:epsilonTRUE"
#> [32] "alfaTRUE:betaTRUE:gammaTRUE:deltaTRUE:epsilonTRUE"
由reprex package(v0.3.0)
创建于2019-06-16