我想按行在选定的列中计算NA并将结果保存在新列中。我希望使用mutate()
dplyr
函数实现此目的
它应该如何运作:
loop for each row i in test{
test$SUM <-sum(is.na(test[i,1:2]))
test$SUM2 <-sum(is.na(test[i,3:4]))
test$SUM3 <-sum(is.na(test[i,5:6]))
}
使用的数据:
test<-data.frame(
BIEZ_01 = c(59000, 61462, NA, 33000, 30840, 36612),
BIEZ_02 = c(5060, 55401, 33000, 33000, 30840, 28884),
BIEZ_03 = c(NA, 60783, 20000, 20000, NA, 19248),
BIEZ_04 = c(22100, 59885, 15000, 15000, 20840, 10000),
BIEZ_05 = c(NA, 59209, 15000, 15000, 20840, NA),
BIEZ_06 = c(4400, 6109, NA, 500, 10840, 10000))
答案 0 :(得分:1)
以下是使用 apply 功能的解决方案:
NA_counts <- apply(test,1,function(x){
c(SUM1=sum(is.na(x[c(1,2)])),SUM2=sum(is.na(x[c(3,4)])),SUM3=sum(is.na(x[c(5,6)])))
})
cbind(test,t(NA_counts))
答案 1 :(得分:1)
for (i in seq(1,ncol(test),2)) {
test[[paste('SUM',(i+1)/2)]] <- rowSums(is.na(test[c(i,i+1)]))
}
# BIEZ_01 BIEZ_02 BIEZ_03 BIEZ_04 BIEZ_05 BIEZ_06 SUM 1 SUM 2 SUM 3
# 1 59000 5060 NA 22100 NA 4400 0 1 1
# 2 61462 55401 60783 59885 59209 6109 0 0 0
# 3 NA 33000 20000 15000 15000 NA 1 0 1
# 4 33000 33000 20000 15000 15000 500 0 0 0
# 5 30840 30840 NA 20840 20840 10840 0 1 0
# 6 36612 28884 19248 10000 NA 10000 0 0 1
这有点“整洁”:
library(tidyverse)
split(seq(ncol(test)),((1:ncol(test))-1) %/% 2 + 1) %>%
imap(~test[.] %>% mutate_at(paste0("SUM",.y),function(x) rowSums(is.na(.)))) %>%
bind_cols
# BIEZ_01 BIEZ_02 SUM1 BIEZ_03 BIEZ_04 SUM2 BIEZ_05 BIEZ_06 SUM3
# 1 59000 5060 0 NA 22100 1 NA 4400 1
# 2 61462 55401 0 60783 59885 0 59209 6109 0
# 3 NA 33000 1 20000 15000 0 15000 NA 1
# 4 33000 33000 0 20000 15000 0 15000 500 0
# 5 30840 30840 0 NA 20840 1 20840 10840 0
# 6 36612 28884 0 19248 10000 0 NA 10000 1
这将是非常整洁的版本:
test %>%
rowid_to_column("rowid") %>%
gather(,,-1) %>%
mutate(SUM = ceiling(group_indices(.,key)/2)) %>%
group_by(rowid,SUM) %>%
summarize(sum_val = sum(is.na(value))) %>%
ungroup %>%
spread(SUM,sum_val,sep="") %>%
select(-1) %>%
bind_cols(test,.)
# BIEZ_01 BIEZ_02 BIEZ_03 BIEZ_04 BIEZ_05 BIEZ_06 SUM1 SUM2 SUM3
# 1 59000 5060 NA 22100 NA 4400 0 1 1
# 2 61462 55401 60783 59885 59209 6109 0 0 0
# 3 NA 33000 20000 15000 15000 NA 1 0 1
# 4 33000 33000 20000 15000 15000 500 0 0 0
# 5 30840 30840 NA 20840 20840 10840 0 1 0
# 6 36612 28884 19248 10000 NA 10000 0 0 1
我还尝试使用nest
将列分组为2,并在嵌套结果上使用map_dfc
来改变新列,但是我试图使用{{1}由于reduce
参数的非标准评估,nest
与.key
...这会更短,更易读。
答案 2 :(得分:1)
另一个选择
NA.counts <- sapply(split(seq(ncol(test)), ceiling(seq(ncol(test))/2))
, function(x) rowSums(is.na(test[, x])))
如果您想使用tidyverse
添加可以执行的列
library(tidyverse)
test %>%
cbind(NA.counts = map(seq(ncol(test)) %>% split(ceiling(./2))
, ~rowSums(is.na(test[, .]))))
# BIEZ_01 BIEZ_02 BIEZ_03 BIEZ_04 BIEZ_05 BIEZ_06 NA.counts.1 NA.counts.2 NA.counts.3
# 1 59000 5060 NA 22100 NA 4400 0 1 1
# 2 61462 55401 60783 59885 59209 6109 0 0 0
# 3 NA 33000 20000 15000 15000 NA 1 0 1
# 4 33000 33000 20000 15000 15000 500 0 0 0
# 5 30840 30840 NA 20840 20840 10840 0 1 0
# 6 36612 28884 19248 10000 NA 10000 0 0 1
正如@Moody_Mudskipper指出的那样,如果你想修改数据帧,cbind
是不必要的。您可以使用
test[paste0("SUM",seq(ncol(test)/2))] <- map(seq(ncol(test)) %>% split(ceiling(./2)),
~rowSums(is.na(test[.])))