我有一个包含3个变量的DF:
unix# VAGRANT_LOG=info vagrant up >vagrant.log
unix# VAGRANT_LOG=info vagrant up | tee vagrant.log
power-shell> vagrant up --debug &> vagrant.log
power-shell> vagrant up --debug | Tee-Object -FilePath ".\vagrant.log"
我提出了这个(不完整的)代码:
# Generate df
set.seed(101)
df <- data.frame("phd" = sample(c("yes", "no"), 100, replace = TRUE),
"age" = sample(c(23:45), 100, replace = TRUE),
"gre" = sample(c(130:170), 100, replace = TRUE))
我在编写此代码时遇到了麻烦。我指的是几个posts
答案 0 :(得分:3)
我认为您对此过于复杂(或过度概念化)。逻辑不需要分支树,只需将三个选择粘贴到一个答案中即可。这是一个整齐的诗歌版本,可一步完成-3种选择。
library(dplyr)
df2<- df %>% mutate(phd.status = paste(if_else(phd =="yes", "PhD", "No_PhD"),
if_else(age < 30, "30yr", ""),
if_else(gre < 151 , "low score", "high score") ))
head(df2)
phd age gre phd.status
1 yes 39 132 PhD low score
2 yes 34 166 PhD high score
3 yes 32 153 PhD high score
4 yes 33 132 PhD low score
5 yes 43 132 PhD low score
6 yes 27 169 PhD 30yr high score
答案 1 :(得分:1)
类似的事情应该起作用:
df$phd.status <- with(df, ifelse(phd == "yes",
ifelse(age < 30,
ifelse(gre < 151, "PhD 30yr low score", "PhD 30yr high score"),
ifelse(gre < 151, "PhD 30yr low score", "PhD 30yr high score")),
ifelse(gre < 151, "No PhD low score", "No PhD high score")))
答案 2 :(得分:1)
严格遵循图表时:
df$phd.state = ifelse(df$phd == "no", # Did you get Ph.d?
# No
ifelse(df$gre < 151, # GRE < 151?
#Yes
"No PhD low score",
#No
"No PhD high score"
),
# Yes
ifelse(df$age < 30, # < 30 Yr?
#Yes
ifelse(df$gre < 151, # GRE < 151?
#Yes
"PhD 30yr low score",
#No
"PhD 30yr high score"
),
#No
ifelse(df$gre < 151, # GRE < 151?
#Yes
"PhD +30yr low score",
#No
"PhD +30yr high score"
)
)
)
答案 3 :(得分:1)
第一个ifelse()
语句可能会很容易地将每个变量更改为图片。根据该算法,顺序并不重要,因此仅mutate(variable = ifelse())
是合理的。
如果要使用给出的算法中的输出,可以在tidyr::unite()
之后使用mutate()
。通过sep = " "
,在unite
之后的三个之间将有一个空格。
library(tidyverse)
df %>% # your data
mutate( # each ifelse
phd = ifelse(phd == "yes", "PhD", "No PhD"),
age = ifelse(age < 30, "30yr", "+30yr"),
gre = ifelse(gre < 151, "low score", "high score")
) %>%
unite(col = status, sep = " ") # unite all three column to new status column
#> status
#> 1 PhD 30yr high score
#> 2 PhD 30yr high score
#> 3 No PhD +30yr low score
#> 4 No PhD +30yr low score
#> 5 PhD +30yr high score
#> 6 PhD +30yr low score
#> 7 No PhD 30yr high score
#> 8 PhD 30yr low score
#> 9 No PhD +30yr high score
#> 10 No PhD +30yr low score
#> 11 No PhD 30yr low score
#> 12 No PhD 30yr high score
#> 13 No PhD +30yr high score
#> 14 No PhD +30yr low score
#> 15 PhD +30yr low score
#> 16 No PhD +30yr high score
#> 17 No PhD +30yr low score
#> 18 PhD +30yr high score
#> 19 PhD +30yr high score
#> 20 PhD +30yr low score
#> 21 No PhD +30yr low score
#> 22 No PhD 30yr low score
#> 23 PhD 30yr low score
#> 24 No PhD +30yr low score
#> 25 No PhD 30yr low score
#> 26 No PhD +30yr low score
#> 27 PhD +30yr high score
#> 28 PhD +30yr low score
#> 29 PhD +30yr high score
#> 30 No PhD 30yr high score
#> 31 PhD +30yr low score
#> 32 PhD +30yr low score
#> 33 PhD 30yr low score
#> 34 PhD 30yr high score
#> 35 No PhD +30yr low score
#> 36 No PhD +30yr high score
#> 37 PhD +30yr low score
#> 38 No PhD +30yr low score
#> 39 PhD +30yr high score
#> 40 No PhD +30yr low score
#> 41 PhD +30yr low score
#> 42 PhD 30yr high score
#> 43 No PhD +30yr low score
#> 44 PhD +30yr low score
#> 45 PhD +30yr low score
#> 46 PhD 30yr low score
#> 47 PhD +30yr low score
#> 48 No PhD +30yr low score
#> 49 No PhD 30yr high score
#> 50 PhD +30yr low score
#> 51 PhD 30yr low score
#> 52 PhD +30yr low score
#> 53 No PhD +30yr low score
#> 54 No PhD +30yr high score
#> 55 PhD 30yr low score
#> 56 No PhD 30yr high score
#> 57 No PhD +30yr high score
#> 58 No PhD +30yr high score
#> 59 No PhD +30yr low score
#> 60 No PhD +30yr low score
#> 61 No PhD +30yr low score
#> 62 No PhD +30yr high score
#> 63 No PhD +30yr high score
#> 64 No PhD +30yr low score
#> 65 No PhD +30yr low score
#> 66 No PhD +30yr high score
#> 67 PhD 30yr low score
#> 68 PhD +30yr high score
#> 69 No PhD 30yr high score
#> 70 No PhD +30yr high score
#> 71 PhD +30yr low score
#> 72 No PhD +30yr high score
#> 73 No PhD +30yr high score
#> 74 No PhD +30yr low score
#> 75 No PhD +30yr high score
#> 76 PhD +30yr low score
#> 77 PhD +30yr high score
#> 78 PhD +30yr high score
#> 79 No PhD +30yr low score
#> 80 No PhD +30yr low score
#> 81 No PhD +30yr low score
#> 82 No PhD +30yr low score
#> 83 No PhD +30yr low score
#> 84 No PhD 30yr low score
#> 85 PhD +30yr low score
#> 86 No PhD 30yr low score
#> 87 PhD 30yr high score
#> 88 PhD 30yr high score
#> 89 PhD +30yr high score
#> 90 PhD 30yr low score
#> 91 No PhD 30yr low score
#> 92 No PhD 30yr high score
#> 93 No PhD +30yr high score
#> 94 PhD 30yr high score
#> 95 PhD +30yr low score
#> 96 No PhD +30yr low score
#> 97 PhD +30yr low score
#> 98 PhD +30yr high score
#> 99 PhD 30yr low score
#> 100 PhD +30yr low score
这些值就是图中的值。
答案 4 :(得分:0)
对二叉树的最终状态进行分类的一种简单方法是执行以下操作:
df2 <- df %>%
mutate(phd=ifelse(phd=='yes', 100, 0),
age=ifelse(age<30, 10, 0),
gre=ifelse(gre<151, 1, 0),
bucket = phd + age + gre
) %>%
arrange(bucket)
存储桶值为您提供每种可能状态的输出。