按条件将所有行值转换为NA

时间:2014-01-22 20:21:12

标签: r

我试图有条件地将10列的值逐行转换为NAs。如果每个[1 row, 10 columns]的有效观察值之和小于4,则[1 row, 10 columns]范围内的所有值都必须转换为NA。

sum(!is.na(x)) < 4

但是,我能获得的最远的是将列更改为TRUEFALSE。我错过了什么?

我现在的代码:

for (i in seq(2,22,11)) {
test[,i:(i+9)] <- apply(test[,i:(i+9)], 1, function(x) {is.na(x) <- sum(!is.na(x)) < 4})
}

仅1 [1 row, 10 columns]组合的代码是什么样的:

is.na(test[1,13:22]) <- sum(!is.na(test[1,13:22])) < 4

我的数据(10x22数据框):

structure(list(a = c(23.02522869, 22.75698125, 23.25638988, 23.4618945, 
22.79093387, 23.03077455, 22.79532129, 22.88151619, 22.55282018, 
22.82755365), b = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), c = c(25.1671462, 
24.50974654, 24.37007385, 25.79732045, 25.97977955, 27.75533066, 
25.61258476, 24.68043644, 25.06214083, 24.14456558), d = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), e = c(23.20319674, 23.01461971, 23.44023974, 
23.43422643, 22.74593993, 23.10000038, 22.84586729, 22.76342647, 
22.8174631, 22.90336953), f = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), g = c(22.21603961, 22.13781649, 21.77009694, 21.97247332, 
20.35237025, 21.3321401, 20.67065374, 20.37071737, 20.35318725, 
21.46554323), h = c(21.41724009, 22.76705639, 21.98989349, 21.87247294, 
20.50963963, 20.83750966, 21.52251395, 20.53171348, 20.54405661, 
21.23782679), i = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), j = c(22.83327369, 
22.67193019, 23.45060031, 23.46845388, 23.08248018, 23.96606992, 
23.07933679, 23.02442296, 22.37349943, 22.90674004), k = c(23.14628986, 
23.61461919, 23.35085341, 23.8013399, 22.76644009, 23.06428044, 
23.3082666, 23.06342696, 23.05868003, 23.20000076), `1` = c(25.45000076, 
25.45000076, 25.45000076, 25.45000076, 25.45000076, 25.45000076, 
25.45000076, 25.45000076, 25.45000076, 25.45000076), `2` = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), `3` = c(26.85134277, 27.53970357, 27.51278, 
28.3514299, 27.78354957, 27.10130081, 26.80624001, 26.21709938, 
26.94341987, 27.314133), `4` = c(31.19462648, 31.15539722, 31.21277968, 
31.05590996, 30.81308378, 31.25366008, 31.49064028, 31.04006683, 
31.10000038, 31.38852975), `5` = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), `6` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `7` = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), `8` = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), 
`9` = c(32.5, 32.23973073, 32.59473101, 32.74366054, 32.57046727, 
31.45634057, 31.95467871, 31.40423339, 31.66785945, 32.32585331
), `10` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `11` = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_)), .Names = c("a", "b", "c", 
"d", "e", "f", "g", "h", "i", "j", "k", "1", "2", "3", "4", "5", 
"6", "7", "8", "9", "10", "11"), row.names = c(NA, 10L), class = "data.frame")

预期输出如下:

structure(list(a = c(23.02522869, 22.75698125, 23.25638988, 23.4618945, 
22.79093387, 23.03077455, 22.79532129, 22.88151619, 22.55282018, 
22.82755365), b = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), c = c(25.1671462, 
24.50974654, 24.37007385, 25.79732045, 25.97977955, 27.75533066, 
25.61258476, 24.68043644, 25.06214083, 24.14456558), d = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), e = c(23.20319674, 23.01461971, 23.44023974, 
23.43422643, 22.74593993, 23.10000038, 22.84586729, 22.76342647, 
22.8174631, 22.90336953), f = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), g = c(22.21603961, 22.13781649, 21.77009694, 21.97247332, 
20.35237025, 21.3321401, 20.67065374, 20.37071737, 20.35318725, 
21.46554323), h = c(21.41724009, 22.76705639, 21.98989349, 21.87247294, 
20.50963963, 20.83750966, 21.52251395, 20.53171348, 20.54405661, 
21.23782679), i = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), j = c(22.83327369, 
22.67193019, 23.45060031, 23.46845388, 23.08248018, 23.96606992, 
23.07933679, 23.02442296, 22.37349943, 22.90674004), k = c(23.14628986, 
23.61461919, 23.35085341, 23.8013399, 22.76644009, 23.06428044, 
23.3082666, 23.06342696, 23.05868003, 23.20000076), `1` = c(25.45000076, 
25.45000076, 25.45000076, 25.45000076, 25.45000076, 25.45000076, 
25.45000076, 25.45000076, 25.45000076, 25.45000076), `2` = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA), `3` = c(NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA), `4` = c(NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA), `5` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
`6` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `7` = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA), `8` = c(NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA), `9` = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA), `10` = c(NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA), `11` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA)), .Names = c("a", "b", "c", "d", "e", "f", "g", "h", 
"i", "j", "k", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", 
"11"), row.names = c(NA, 10L), class = "data.frame")

1 个答案:

答案 0 :(得分:1)

test[which(rowSums(is.na(test)*1L) >= 4), as.character(2:11)] <- NA

接下来尝试:

test <- matrix(runif(100*10), ncol=100)
test[9, 28:37] <- NA
idx <- c(6:15, 17:26, 28:37, 39:48, 50:59, 61:70)
test[,idx] <- apply(test[,idx], 1, function(x) {ifelse(sum(is.na(x)) >= 4, x, NA)})
test