我正在尝试用R填充缺失的值。
如果所有其他值均为0,那么我想用0填充缺失值。
下面显示一个示例。在此数据中,c
列中除NA
之外的所有值均为0。因此,我想用0填充Na
。
set.seed(1000)
a<-rnorm(10)
b<-rnorm(10)
c<-rep(0,10)
c[c(2,4,8)]<-NA
test<-cbind(a,b,c)
a b c
[1,] 0.1901328 0.6141360 0
[2,] -0.9884426 0.6508993 NA
[3,] -0.9783197 2.1059862 0
[4,] -1.8584651 0.4354903 NA
[5,] 0.6623067 1.6382126 0
[6,] -1.2542872 0.1370791 0
[7,] -1.9971880 1.9302738 0
[8,] 1.9417941 0.0449239 NA
[9,] 1.7046508 1.0726263 0
[10,] -0.7289351 -2.8374912 0
我找不到很好的代码示例。你能给我一个好的建议吗?
答案 0 :(得分:1)
在基数R中,我们可以使用apply
逐列检查列中的所有其他值是否为0,并将缺失的值替换为0。
apply(test, 2, function(x)
if(all(x == 0, na.rm = TRUE)) replace(x, is.na(x), 0) else x)
# a b c
# [1,] -0.4458 -0.9824 0
# [2,] -1.2059 -0.5545 0
# [3,] 0.0411 0.1214 0
# [4,] 0.6394 -0.1209 0
# [5,] -0.7866 -1.3360 0
# [6,] -0.3855 0.1701 0
# [7,] -0.4759 0.1551 0
# [8,] 0.7198 0.0249 0
# [9,] -0.0185 -2.0466 0
#[10,] -1.3731 0.2132 0
答案 1 :(得分:1)
快速值,如果您所有的列都是数字:
colSums(!is.na(test)) == colSums(test==0,na.rm=TRUE)
a b c
FALSE FALSE TRUE
我们更改TRUE列
wh = which(colSums(!is.na(test)) == colSums(test==0,na.rm=TRUE))
for(i in wh){test[is.na(test[,i]),i] = 0}
a b c
[1,] -0.44577826 -0.98242783 0
[2,] -1.20585657 -0.55448870 0
[3,] 0.04112631 0.12138119 0
[4,] 0.63938841 -0.12087232 0
[5,] -0.78655436 -1.33604105 0
[6,] -0.38548930 0.17005748 0
[7,] -0.47586788 0.15507872 0
[8,] 0.71975069 0.02493187 0
[9,] -0.01850562 -2.04658541 0
[10,] -1.37311776 0.21315411 0
答案 2 :(得分:1)
使用setnafill
中的data.table
可以进行两次传递-检查哪些列全为0,然后填充它们:
library(data.table)
test = data.table(test)
# this will warn about converting double->numeric;
# you may want to suppressWarnings here; more
# "properly" you would do
# sapply(test, function(x) any(x != 0, na.rm = TRUE))
empty_cols = !sapply(test, any, na.rm = TRUE)
# use setnafill to do the replacement in-place
setnafill(test, type = 'const', fill = 0, cols = which(empty_cols))
test[]
# a b c
# 1: -0.44577826 -0.98242783 0
# 2: -1.20585657 -0.55448870 0
# 3: 0.04112631 0.12138119 0
# 4: 0.63938841 -0.12087232 0
# 5: -0.78655436 -1.33604105 0
# 6: -0.38548930 0.17005748 0
# 7: -0.47586788 0.15507872 0
# 8: 0.71975069 0.02493187 0
# 9: -0.01850562 -2.04658541 0
# 10: -1.37311776 0.21315411 0
答案 3 :(得分:0)
一个<nlog>
<targets>
<target type="file" name="actionResultFile" fileName="nlog-${logger:shortName=true}-${shortdate}.txt" />
</targets>
<rules>
<logger name="*.ActionResult.*" minlevel="Trace" writeTo="actionResultFile" />
</rules>
</nlog>
选项可能是:
dplyr
或者:
test %>%
as.data.frame() %>%
mutate_if(~ all(. %in% c(0, NA)), ~ replace(., is.na(.), 0))
a b c
1 -0.44577826 -0.98242783 0
2 -1.20585657 -0.55448870 0
3 0.04112631 0.12138119 0
4 0.63938841 -0.12087232 0
5 -0.78655436 -1.33604105 0
6 -0.38548930 0.17005748 0
7 -0.47586788 0.15507872 0
8 0.71975069 0.02493187 0
9 -0.01850562 -2.04658541 0
10 -1.37311776 0.21315411 0
答案 4 :(得分:0)
首先将test
转换为数据帧以访问$
运算符
set.seed(1000)
a<-rnorm(10)
b<-rnorm(10)
c<-rep(0,10)
c[c(2,4,8)]<-NA
test<-cbind(a,b,c)
test <- data.frame(test)
如果c
为“ NA”,则将变量test$c
转换为因子并创建级别“ 0”
test$c <- as.factor(test$c)
test$c[is.na(test$c)] <- "0"
将针对“ NA”的检查测试数据集替换为“ 0”
test
a b c
-0.44577826 -0.98242783 0
-1.20585657 -0.55448870 0
0.04112631 0.12138119 0
0.63938841 -0.12087232 0
-0.78655436 -1.33604105 0
-0.38548930 0.17005748 0
-0.47586788 0.15507872 0
0.71975069 0.02493187 0
-0.01850562 -2.04658541 0
-1.37311776 0.21315411 0
答案 5 :(得分:0)
单行基本选项:
test[, colSums(test, na.rm = T) == 0L] <- 0
还有dplyr
library(dplyr)
test%>%
as_tibble()%>%
mutate_if(~ sum(., na.rm = T) == 0L, function(x) x = 0)