Question

我正在尝试用R填充缺失的值。

如果所有其他值均为0，那么我想用0填充缺失值。

下面显示一个示例。在此数据中，c列中除NA之外的所有值均为0。因此，我想用0填充Na。

set.seed(1000)
a<-rnorm(10)
b<-rnorm(10)
c<-rep(0,10)
c[c(2,4,8)]<-NA
test<-cbind(a,b,c)

               a          b  c
 [1,]  0.1901328  0.6141360  0
 [2,] -0.9884426  0.6508993 NA
 [3,] -0.9783197  2.1059862  0
 [4,] -1.8584651  0.4354903 NA
 [5,]  0.6623067  1.6382126  0
 [6,] -1.2542872  0.1370791  0
 [7,] -1.9971880  1.9302738  0
 [8,]  1.9417941  0.0449239 NA
 [9,]  1.7046508  1.0726263  0
[10,] -0.7289351 -2.8374912  0

我找不到很好的代码示例。你能给我一个好的建议吗？

Answer 1

在基数R中，我们可以使用apply逐列检查列中的所有其他值是否为0，并将缺失的值替换为0。

apply(test, 2, function(x) 
          if(all(x == 0, na.rm = TRUE)) replace(x, is.na(x), 0) else x)

#            a       b c
# [1,] -0.4458 -0.9824 0
# [2,] -1.2059 -0.5545 0
# [3,]  0.0411  0.1214 0
# [4,]  0.6394 -0.1209 0
# [5,] -0.7866 -1.3360 0
# [6,] -0.3855  0.1701 0
# [7,] -0.4759  0.1551 0
# [8,]  0.7198  0.0249 0
# [9,] -0.0185 -2.0466 0
#[10,] -1.3731  0.2132 0

Answer 2

快速值，如果您所有的列都是数字：

colSums(!is.na(test)) == colSums(test==0,na.rm=TRUE)
    a     b     c 
FALSE FALSE  TRUE

我们更改TRUE列

wh = which(colSums(!is.na(test)) == colSums(test==0,na.rm=TRUE))
for(i in wh){test[is.na(test[,i]),i] = 0}

                a           b c
 [1,] -0.44577826 -0.98242783 0
 [2,] -1.20585657 -0.55448870 0
 [3,]  0.04112631  0.12138119 0
 [4,]  0.63938841 -0.12087232 0
 [5,] -0.78655436 -1.33604105 0
 [6,] -0.38548930  0.17005748 0
 [7,] -0.47586788  0.15507872 0
 [8,]  0.71975069  0.02493187 0
 [9,] -0.01850562 -2.04658541 0
[10,] -1.37311776  0.21315411 0

Answer 3

使用setnafill中的data.table可以进行两次传递-检查哪些列全为0，然后填充它们：

library(data.table)
test = data.table(test)

# this will warn about converting double->numeric;
#   you may want to suppressWarnings here; more
#   "properly" you would do
#   sapply(test, function(x) any(x != 0, na.rm = TRUE))
empty_cols = !sapply(test, any, na.rm = TRUE)

# use setnafill to do the replacement in-place
setnafill(test, type = 'const', fill = 0, cols = which(empty_cols))
test[]
#               a           b c
#  1: -0.44577826 -0.98242783 0
#  2: -1.20585657 -0.55448870 0
#  3:  0.04112631  0.12138119 0
#  4:  0.63938841 -0.12087232 0
#  5: -0.78655436 -1.33604105 0
#  6: -0.38548930  0.17005748 0
#  7: -0.47586788  0.15507872 0
#  8:  0.71975069  0.02493187 0
#  9: -0.01850562 -2.04658541 0
# 10: -1.37311776  0.21315411 0

Answer 4

一个<nlog> <targets> <target type="file" name="actionResultFile" fileName="nlog-${logger:shortName=true}-${shortdate}.txt" /> </targets> <rules> <logger name="*.ActionResult.*" minlevel="Trace" writeTo="actionResultFile" /> </rules> </nlog>选项可能是：

dplyr

或者：

test %>%
 as.data.frame() %>%
 mutate_if(~ all(. %in% c(0, NA)), ~ replace(., is.na(.), 0))

             a           b c
1  -0.44577826 -0.98242783 0
2  -1.20585657 -0.55448870 0
3   0.04112631  0.12138119 0
4   0.63938841 -0.12087232 0
5  -0.78655436 -1.33604105 0
6  -0.38548930  0.17005748 0
7  -0.47586788  0.15507872 0
8   0.71975069  0.02493187 0
9  -0.01850562 -2.04658541 0
10 -1.37311776  0.21315411 0

Answer 5

首先将test转换为数据帧以访问$运算符

   set.seed(1000)
   a<-rnorm(10)
   b<-rnorm(10)
   c<-rep(0,10)
   c[c(2,4,8)]<-NA
   test<-cbind(a,b,c)

   test <- data.frame(test)

如果c为“ NA”，则将变量test$c转换为因子并创建级别“ 0”

test$c <- as.factor(test$c)
test$c[is.na(test$c)] <- "0"

将针对“ NA”的检查测试数据集替换为“ 0”

test

     a           b      c
-0.44577826 -0.98242783 0
-1.20585657 -0.55448870 0
0.04112631  0.12138119  0
0.63938841  -0.12087232 0
-0.78655436 -1.33604105 0
-0.38548930 0.17005748  0
-0.47586788 0.15507872  0
0.71975069  0.02493187  0
-0.01850562 -2.04658541 0
-1.37311776 0.21315411  0

Answer 6

单行基本选项：

test[, colSums(test, na.rm = T) == 0L] <- 0

还有dplyr

中的类似想法

library(dplyr)
test%>%
  as_tibble()%>%
  mutate_if(~ sum(., na.rm = T) == 0L, function(x) x = 0)

如果R的所有值均为0，则填充0

6 个答案: