在两种条件下替换NA值

时间:2014-07-24 12:55:38

标签: r

我有这个数据:

       Pclass          Name    Sex   Age     Fare family

56       1            Mr   male    NA  35.5000      0
2        1           Mrs female 38.00  71.2833      1
3        3          Miss female 26.00   7.9250      0
890      1            Mr   male 26.00  30.0000      0
861      3            Mr   male 41.00  14.1083      2
864      3          Miss female    NA  69.5500     10
862      2            Mr   male 21.00  11.5000      1

我想在此条件下替换年龄NA值:

If Name== Mr & Pcalss==1 Then Age=41

我该怎么做?

1 个答案:

答案 0 :(得分:1)

 dat <- structure(list(Pclass = c(1L, 1L, 3L, 1L, 3L, 3L, 2L), Name = c("Mr", 
 "Mrs", "Miss", "Mr", "Mr", "Miss", "Mr"), Sex = c("male", "female", 
 "female", "male", "male", "female", "male"), Age = c(NA, 38, 
 26, 26, 41, NA, 21), Fare = c(35.5, 71.2833, 7.925, 30, 14.1083, 
 69.55, 11.5), family = c(0L, 1L, 0L, 0L, 2L, 10L, 1L)), .Names = c("Pclass", 
 "Name", "Sex", "Age", "Fare", "family"), class = "data.frame", row.names = c("56", 
 "2", "3", "890", "861", "864", "862"))

 dat$Age[with(dat, Name=="Mr" & Pclass==1  & is.na(Age))] <- 41

 dat
 #    Pclass Name    Sex Age    Fare family
 #56       1   Mr   male  41 35.5000      0   ##NA in Age got replaced by 41
 #2        1  Mrs female  38 71.2833      1
 #3        3 Miss female  26  7.9250      0
 #890      1   Mr   male  26 30.0000      0
 #861      3   Mr   male  41 14.1083      2
 #864      3 Miss female  NA 69.5500     10
 #862      2   Mr   male  21 11.5000      1

更新

假设您的数据集是:

dat <-  structure(list(Pclass = c(1L, 1L, 3L, 1L, 3L, 3L, 2L), Name = structure(c(2L, 
4L, 1L, 2L, 3L, 1L, 2L), .Label = c("Miss", "  Mr", "  Mr ", 
"Mrs"), class = "factor"), Sex = structure(c(2L, 1L, 1L, 2L, 
2L, 1L, 2L), .Label = c("female", "male"), class = "factor"), 
Age = c(NA, 38, 26, 26, 41, NA, 21), Fare = c(35.5, 71.2833, 
7.925, 30, 14.1083, 69.55, 11.5), family = c(0L, 1L, 0L, 
0L, 2L, 10L, 1L)), .Names = c("Pclass", "Name", "Sex", "Age", 
"Fare", "family"), class = "data.frame", row.names = c("56", 
"2", "3", "890", "861", "864", "862"))

dat$Age[with(dat, Name=="Mr" & Pclass==1  & is.na(Age))] <- 41 #will not work
 levels(dat$Name)
[1] "Miss"  "  Mr"  "  Mr " "Mrs"  

library(stringr) 
dat$Name <- str_trim(dat$Name)
dat$Age[with(dat, Name=="Mr" & Pclass==1  & is.na(Age))] <- 41

head(dat,1)
#   Pclass Name  Sex Age Fare family
#56      1   Mr male  41 35.5      0