我试图有条件地将10列的值逐行转换为NAs。如果每个[1 row, 10 columns]
的有效观察值之和小于4,则[1 row, 10 columns]
范围内的所有值都必须转换为NA。
sum(!is.na(x)) < 4
但是,我能获得的最远的是将列更改为TRUE
或FALSE
。我错过了什么?
我现在的代码:
for (i in seq(2,22,11)) {
test[,i:(i+9)] <- apply(test[,i:(i+9)], 1, function(x) {is.na(x) <- sum(!is.na(x)) < 4})
}
仅1 [1 row, 10 columns]
组合的代码是什么样的:
is.na(test[1,13:22]) <- sum(!is.na(test[1,13:22])) < 4
我的数据(10x22数据框):
structure(list(a = c(23.02522869, 22.75698125, 23.25638988, 23.4618945,
22.79093387, 23.03077455, 22.79532129, 22.88151619, 22.55282018,
22.82755365), b = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), c = c(25.1671462,
24.50974654, 24.37007385, 25.79732045, 25.97977955, 27.75533066,
25.61258476, 24.68043644, 25.06214083, 24.14456558), d = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), e = c(23.20319674, 23.01461971, 23.44023974,
23.43422643, 22.74593993, 23.10000038, 22.84586729, 22.76342647,
22.8174631, 22.90336953), f = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), g = c(22.21603961, 22.13781649, 21.77009694, 21.97247332,
20.35237025, 21.3321401, 20.67065374, 20.37071737, 20.35318725,
21.46554323), h = c(21.41724009, 22.76705639, 21.98989349, 21.87247294,
20.50963963, 20.83750966, 21.52251395, 20.53171348, 20.54405661,
21.23782679), i = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), j = c(22.83327369,
22.67193019, 23.45060031, 23.46845388, 23.08248018, 23.96606992,
23.07933679, 23.02442296, 22.37349943, 22.90674004), k = c(23.14628986,
23.61461919, 23.35085341, 23.8013399, 22.76644009, 23.06428044,
23.3082666, 23.06342696, 23.05868003, 23.20000076), `1` = c(25.45000076,
25.45000076, 25.45000076, 25.45000076, 25.45000076, 25.45000076,
25.45000076, 25.45000076, 25.45000076, 25.45000076), `2` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), `3` = c(26.85134277, 27.53970357, 27.51278,
28.3514299, 27.78354957, 27.10130081, 26.80624001, 26.21709938,
26.94341987, 27.314133), `4` = c(31.19462648, 31.15539722, 31.21277968,
31.05590996, 30.81308378, 31.25366008, 31.49064028, 31.04006683,
31.10000038, 31.38852975), `5` = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), `6` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `7` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), `8` = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
`9` = c(32.5, 32.23973073, 32.59473101, 32.74366054, 32.57046727,
31.45634057, 31.95467871, 31.40423339, 31.66785945, 32.32585331
), `10` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `11` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_)), .Names = c("a", "b", "c",
"d", "e", "f", "g", "h", "i", "j", "k", "1", "2", "3", "4", "5",
"6", "7", "8", "9", "10", "11"), row.names = c(NA, 10L), class = "data.frame")
预期输出如下:
structure(list(a = c(23.02522869, 22.75698125, 23.25638988, 23.4618945,
22.79093387, 23.03077455, 22.79532129, 22.88151619, 22.55282018,
22.82755365), b = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), c = c(25.1671462,
24.50974654, 24.37007385, 25.79732045, 25.97977955, 27.75533066,
25.61258476, 24.68043644, 25.06214083, 24.14456558), d = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), e = c(23.20319674, 23.01461971, 23.44023974,
23.43422643, 22.74593993, 23.10000038, 22.84586729, 22.76342647,
22.8174631, 22.90336953), f = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), g = c(22.21603961, 22.13781649, 21.77009694, 21.97247332,
20.35237025, 21.3321401, 20.67065374, 20.37071737, 20.35318725,
21.46554323), h = c(21.41724009, 22.76705639, 21.98989349, 21.87247294,
20.50963963, 20.83750966, 21.52251395, 20.53171348, 20.54405661,
21.23782679), i = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), j = c(22.83327369,
22.67193019, 23.45060031, 23.46845388, 23.08248018, 23.96606992,
23.07933679, 23.02442296, 22.37349943, 22.90674004), k = c(23.14628986,
23.61461919, 23.35085341, 23.8013399, 22.76644009, 23.06428044,
23.3082666, 23.06342696, 23.05868003, 23.20000076), `1` = c(25.45000076,
25.45000076, 25.45000076, 25.45000076, 25.45000076, 25.45000076,
25.45000076, 25.45000076, 25.45000076, 25.45000076), `2` = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), `3` = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), `4` = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), `5` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
`6` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `7` = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), `8` = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), `9` = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), `10` = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), `11` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA)), .Names = c("a", "b", "c", "d", "e", "f", "g", "h",
"i", "j", "k", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
"11"), row.names = c(NA, 10L), class = "data.frame")
答案 0 :(得分:1)
test[which(rowSums(is.na(test)*1L) >= 4), as.character(2:11)] <- NA
接下来尝试:
test <- matrix(runif(100*10), ncol=100)
test[9, 28:37] <- NA
idx <- c(6:15, 17:26, 28:37, 39:48, 50:59, 61:70)
test[,idx] <- apply(test[,idx], 1, function(x) {ifelse(sum(is.na(x)) >= 4, x, NA)})
test