我正在尝试根据许多其他变量的条件评估来创建新变量。我正在使用一些嵌套的“ if_else”语句,但是只有一部分条件语句正在按我的意愿进行求值。
以下是一些示例数据的输出:
structure(list(`Cultivation` = c("No", "No", "Yes",
"Yes", "No", "Yes", "No", "No", "No", "No", "Yes", "Yes"),
`Processing` = c("No",
"No", "Yes", "Yes", "No", "No", "No", "No", "No", "No", "No",
"Yes"), `Federal Sales` = c("No", "No", "Yes", "Yes", "Yes",
"Yes", "No", "No", "No", "No", "Yes", "Yes"), `Cultivation
Type` = c(NA,
NA, "Standard", "Standard", NA, "Micro", NA, NA, NA, NA, "Nursery",
"Standard"), `Processing Type` = c(NA, NA, "Standard",
"Standard", NA, NA, NA, NA, NA, NA, NA, "Standard"), `Type` = c(NA,
NA, "Standard", "Standard", NA, "Micro", NA, NA, NA, NA, NA,
"Standard")), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-12L))
这是我正在使用的代码:
DF.2 <- DF.1 %>%
dplyr::mutate("Type" = if_else(str_detect(tolower(`Cultivation Type`),
"micro") |
str_detect(tolower(`Processing Type`), "micro"), "Micro",
if_else(str_detect(tolower(`Cultivation Type`), "standard") |
str_detect(tolower(`Processing Type`), "standard"), "Standard",
if_else(str_detect(tolower(`Cultivation Type`), "nursery"),
"Nursery","Other"))))
满足前两个条件,我得到的类型变量为“标准”或“微”,但是“托儿所”和“其他”不求值,而我得到“不适用”。
答案 0 :(得分:1)
在您的情况下,最好使用case_when
而不是if_else
。此处所有NA
都产生了Other
:
library(dplyr)
library(stringr)
DF.2 <- DF.1 %>%
mutate("Type" = case_when(
str_detect(tolower(`Cultivation Type`),"micro") | str_detect(tolower(`Processing Type`), "micro") ~ "Micro",
str_detect(tolower(`Cultivation Type`), "standard") | str_detect(tolower(`Processing Type`), "standard") ~ "Standard",
str_detect(tolower(`Cultivation Type`), "nursery") ~ "Nursery",
TRUE ~ "Other")
)
输出:
> DF.2
# A tibble: 12 x 6
Cultivation Processing `Federal Sales` `Cultivation Type` `Processing Type` Type
<chr> <chr> <chr> <chr> <chr> <chr>
1 No No No NA NA Other
2 No No No NA NA Other
3 Yes Yes Yes Standard Standard Standard
4 Yes Yes Yes Standard Standard Standard
5 No No Yes NA NA Other
6 Yes No Yes Micro NA Micro
7 No No No NA NA Other
8 No No No NA NA Other
9 No No No NA NA Other
10 No No No NA NA Other
11 Yes No Yes Nursery NA Nursery
12 Yes Yes Yes Standard Standard Standard
>
答案 1 :(得分:0)
我们需要对代码进行一些更改以仅返回TRUE/FALSE
,因为NA
元素仅返回NA
,这可能会导致问题
library(dplyr)
DF.1 %>%
dplyr::mutate("Type" = if_else((str_detect(tolower(`Cultivation Type`), "micro") | str_detect(tolower(`Processing Type`), "micro")) & !(is.na(`Cultivation Type`) | is.na(`Processing Type`)), "Micro",
if_else((str_detect(tolower(`Cultivation Type`), "standard") | str_detect(tolower(`Processing Type`), "standard")) & !(is.na(`Cultivation Type`) | is.na(`Processing Type`)), "Standard",
if_else(str_detect(tolower(`Cultivation Type`), "nursery") & !is.na(`Cultivation Type`), "Nursery","Other"))))
# A tibble: 12 x 6
# Cultivation Processing `Federal Sales` `Cultivation Type` `Processing Type` Type
# <chr> <chr> <chr> <chr> <chr> <chr>
# 1 No No No <NA> <NA> Other
# 2 No No No <NA> <NA> Other
# 3 Yes Yes Yes Standard Standard Standard
# 4 Yes Yes Yes Standard Standard Standard
# 5 No No Yes <NA> <NA> Other
# 6 Yes No Yes Micro <NA> Other
# 7 No No No <NA> <NA> Other
# 8 No No No <NA> <NA> Other
# 9 No No No <NA> <NA> Other
#10 No No No <NA> <NA> Other
#11 Yes No Yes Nursery <NA> Nursery
#12 Yes Yes Yes Standard Standard Standard
或者,如果我们需要使用与OP相同的代码,只需在“类型”列之前替换NA
,然后在转换后将替换的值更改为NA即可
DF.1 %>%
mutate_at(vars(ends_with('Type')), replace_na, 'new') %>%
dplyr::mutate("Type" = if_else(str_detect(tolower(`Cultivation Type`),
"micro") |
str_detect(tolower(`Processing Type`), "micro"), "Micro",
if_else(str_detect(tolower(`Cultivation Type`), "standard") |
str_detect(tolower(`Processing Type`), "standard"), "Standard",
if_else(str_detect(tolower(`Cultivation Type`), "nursery"),
"Nursery","Other")))) %>%
mutate_at(vars(ends_with('Type')), na_if, 'new')
如果我们对其他更简单的选项感兴趣,那么另一个选择是创建键/值数据集,然后进行模糊连接