根据行的条件用值替换NA

时间:2019-02-13 11:35:12

标签: r

五列表格(“ id”,“ othermood_v”,“ rass_v”,“ gcs_v”和“ cam_v”)约有52000行。最后一列(“ cam_v”):0,1,2中有三个值作为类标签。现在,“ cam_v”列的值为1、2和NA。我想根据其他三列“ othermood_v”,“ rass_v”和“ gcs_v”将NA值替换为0或1。因此,如果同一行中的这三列中的任何一个的值都为1,则cam_v将被标记为1,否则将被标记为0。我试图遍历条件为

的数据
 if df$othermood_v>0|df$rass_v>0|df$gcs_v >0, then df$cam_v=1 else 
  0, rm NA = True

if (df$othermood_v+df$rass_v+df$gcs_v) >0, then cam_v=1 else 0

但是我不知道如何使它工作。有什么建议么?顺便说一句,id现在是唯一的。谢谢。

id  othermood_v rass_v  gcs_v   cam_v
100078  0   0   0   NA
100079  0   0   0   NA
100081  0   0   0   NA
100085  1   1   0   NA
100087  1   1   0   NA
100088  1   0   0   NA
100091  1   1   1   2
100094  0   1   0   NA
100095  1   0   0   NA
100096  0   0   0   NA
100098  1   1   1   2
100099  0   1   0   NA
100102  1   0   0   NA
100103  1   0   0   NA
100104  1   1   0   2
100106  0   0   0   NA
100108  1   0   0   NA
100109  1   0   0   NA
100112  1   0   0   NA
100113  1   1   1   1
100114  1   0   0   NA
100116  1   0   0   NA
100117  1   0   0   NA
100118  0   1   0   NA

table screenshot

3 个答案:

答案 0 :(得分:1)

我们创建一个逻辑向量,然后使用由rowSums创建的另一个条件替换

i1 <- is.na(df1$cam_v) # logical index of NA elements in 'cam_v'
# assign the values 0 or 1 based on the occurrence of 1 in 
# either one of the columns from 2 to 4
df1$cam_v[i1] <- +(rowSums(df1[i1, 2:4] > 0) > 0)

数据

df1 <- structure(list(id = c(100078L, 100079L, 100081L, 100085L, 100087L, 
100088L, 100091L, 100094L, 100095L, 100096L, 100098L, 100099L, 
100102L, 100103L, 100104L, 100106L, 100108L, 100109L, 100112L, 
100113L, 100114L, 100116L, 100117L, 100118L), othermood_v = c(0L, 
0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 0L), rass_v = c(0L, 0L, 0L, 1L, 1L, 0L, 
1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 
0L, 1L), gcs_v = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L), cam_v = c(NA, 
NA, NA, NA, NA, NA, 2L, NA, NA, NA, 2L, NA, NA, NA, 2L, NA, NA, 
NA, NA, 1L, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA, 
-24L))

答案 1 :(得分:1)

使用dplyr

的解决方案
library(dplyr)
df_clean <- df %>% 
  mutate(cam_v = ifelse(!is.na(cam_v), cam_v, 
                               ifelse((othermood_v + rass_v + gcs_v) > 0, 1, 0)))
> df_clean
       id othermood_v rass_v gcs_v cam_v
1  100078           0      0     0     0
2  100079           0      0     0     0
3  100081           0      0     0     0
4  100085           1      1     0     1
5  100087           1      1     0     1
6  100088           1      0     0     1
7  100091           1      1     1     2
8  100094           0      1     0     1
9  100095           1      0     0     1
10 100096           0      0     0     0
11 100098           1      1     1     2
12 100099           0      1     0     1
13 100102           1      0     0     1
14 100103           1      0     0     1
15 100104           1      1     0     2
16 100106           0      0     0     0
17 100108           1      0     0     1
18 100109           1      0     0     1
19 100112           1      0     0     1
20 100113           1      1     1     1
21 100114           1      0     0     1
22 100116           1      0     0     1
23 100117           1      0     0     1
24 100118           0      1     0     1

数据

通常,这里最好使用dput(head(data, 20))为您的代码提供示例数据。我用它来转换您的数据:

df <- read.table(text =
  "id  othermood_v rass_v  gcs_v   cam_v
  100078  0   0   0   NA
  100079  0   0   0   NA
  100081  0   0   0   NA
  100085  1   1   0   NA
  100087  1   1   0   NA
  100088  1   0   0   NA
  100091  1   1   1   2
  100094  0   1   0   NA
  100095  1   0   0   NA
  100096  0   0   0   NA
  100098  1   1   1   2
  100099  0   1   0   NA
  100102  1   0   0   NA
  100103  1   0   0   NA
  100104  1   1   0   2
  100106  0   0   0   NA
  100108  1   0   0   NA
  100109  1   0   0   NA
  100112  1   0   0   NA
  100113  1   1   1   1
  100114  1   0   0   NA
  100116  1   0   0   NA
  100117  1   0   0   NA
  100118  0   1   0   NA", header = TRUE)

答案 2 :(得分:0)

您与方法很接近,否则,您只需更改操作方式即可。 以下应该可以工作:

df$cam_v<-ifelse((df$othermood_v>0|df$rass_v>0|df$gcs_v >0), 1,0) 
相关问题