我有一个数据框:
a = c("yes", "yes", "no", "yes", "no")
b = c("brown", "grey", "white", "grey", NA)
c = c(7, 6, NA, 10, 8)
d = c("male", "female", "female", "male", "female")
Zoo = cbind.data.frame(a, b, c, d)
colnames(Zoo) = c("animal", "colour", "age", "gender")
animal colour age gender
yes brown 7 male
yes grey 6 female
no white NA female
yes grey 10 male
no NA 8 female
如果动物的价值为'是的,我想将相应列中的任何非NA值更改为" NL" (非逻辑的)。我可以按如下方式一次完成这一栏:
Zoo$colour = as.character(Zoo$colour)
Zoo$colour =
ifelse(Zoo$animal == "no" & !is.na(Zoo$colour), "NL", Zoo$colour)
并最终到达:
animal colour age gender
yes brown 7 male
yes grey 6 female
no NL NA NL
yes grey 10 male
no NA NL NL
我确信有一种方法可以更有效地完成这项工作。在那儿?谢谢!
答案 0 :(得分:3)
这是另一种方式。请注意,我使用stringsAsFactors = FALSE
创建了一个data.frame,因为在此设置中使用因子级别非常繁琐。完成后,您可以自由地将字符列转换为因子。
基本上,此代码遍历每一行,找到具有非NA的列并在其位置插入"NL"
。
a = c("yes", "yes", "no", "yes", "no")
b = c("brown", "grey", "white", "grey", NA)
c = c(7, 6, NA, 10, 8)
d = c("male", "female", "female", "male", "female")
zoo <- data.frame(animal = a, color = b, age = c, gender = d, stringsAsFactors = FALSE)
for (i in 1:nrow(zoo)) {
if (zoo[i, "animal"] == "no") {
find.el <- !is.na(zoo[i, which(colnames(zoo) != "animal")])
zoo[, 2:ncol(zoo)][i, find.el] <- "NL"
}
}
animal color age gender
1 yes brown 7 male
2 yes grey 6 female
3 no NL <NA> NL
4 yes grey 10 male
5 no <NA> NL NL
答案 1 :(得分:0)
对于多列,我们可以使用set
data.table
的有效方法
library(data.table)
setDT(Zoo)
for(nm in names(Zoo)[-1]) {
set(Zoo, i = NULL, j = nm, as.character(Zoo[[nm]]))
set(Zoo, i = which(Zoo[['animal']]=='no' & !is.na(Zoo[[nm]])),
j = nm, value = "NL")
}
Zoo
# animal colour age gender
#1: yes brown 7 male
#2: yes grey 6 female
#3: no NL NA NL
#4: yes grey 10 male
#5: no NA NL NL
注意:这应该非常有效,因为我们正在使用set
或者,我们可以使用优雅的tidyverse
语法
library(dplyr)
Zoo %>%
mutate_at(2:4, funs(replace(., Zoo[['animal']]== 'no' & !is.na(.), 'NL')))
# animal colour age gender
#1 yes brown 7 male
#2 yes grey 6 female
#3 no NL <NA> NL
#4 yes grey 10 male
#5 no <NA> NL NL
Zoo1 <- Zoo[rep(1:nrow(Zoo), 1e5),]
Zoo2 <- copy(Zoo1)
Zoo3 <- copy(Zoo2)
system.time({
setDT(Zoo2)
for(nm in names(Zoo2)[-1]) {
set(Zoo2, i = NULL, j = nm, as.character(Zoo2[[nm]]))
set(Zoo2, i = which(Zoo[['animal']]=='no' & !is.na(Zoo2[[nm]])),
j = nm, value = "NL")
}
})
# user system elapsed
# 0.40 0.01 0.42
system.time({
Zoo3 %>%
mutate_at(2:4, funs(replace(., Zoo3[['animal']]== 'no' & !is.na(.), 'NL')))
})
#user system elapsed
# 0.42 0.03 0.46
system.time({
for (i in 1:nrow(Zoo1)) {
if (Zoo1[i, "animal"] == "no") {
find.el <- !is.na(Zoo1[i, which(colnames(Zoo1) != "animal")])
Zoo1[, 2:ncol(Zoo1)][i, find.el] <- "NL"
}
}
})
# user system elapsed
# 2086.49 577.51 2686.83
Zoo <- data.frame(animal = a, colour = b, age = c, gender = d, stringsAsFactors=FALSE)