'忽略'索引条件下的缺失数据

时间:2014-01-31 14:09:15

标签: r missing-data

我正在尝试创建一个索引,如果满足条件,该索引会增加1。如果没有丢失的数据,代码似乎可以工作。但是,如果缺少数据,索引也会变为“NA”。我怎样才能避免这种情况(基本上忽略了丢失的数据)?

我试过na.rm = TRUE / FALSE,但据我所知,这只涉及函数。 会假设有一个相当直接的解决方案吗?非常感谢。

这是我的代码:

IAEP$PSIndex <- 0
IAEP$PSIndex[IAEP$lelecsystem==3] <- 1   
IAEP$PSIndex <- IAEP$PSIndex + (IAEP$govstruct==3) 
IAEP$PSIndex <- IAEP$PSIndex + (IAEP$reservedseat==2) 
IAEP$PSIndex <- IAEP$PSIndex + (IAEP$uppub==1) 
IAEP$PSIndex <- IAEP$PSIndex + (IAEP$bankpol==1) 
IAEP$PSIndex <- IAEP$PSIndex + (IAEP$execveto==1 & IAEP$legveto==1)  

以下是一些示例数据:

IAEP <- as.data.frame(structure(list(cowc = structure(c(18L, 18L, 18L, 18L, 18L, 18L, 
18L, 18L, 18L, 18L), .Label = c("AFG", "ALB", "ALG", "ANG", "ARG", 
"ARM", "AUL", "AUS", "AZE", "BAH", "BEL", "BEN", "BFO", "BHU", 
"BLR", "BNG", "BOL", "BOS", "BOT", "BRA", "BUI", "BUL", "CAM", 
"CAN", "CAO", "CDI", "CEN", "CHA", "CHL", "CHN", "COL", "COM", 
"CON", "COS", "CRO", "CUB", "CYP", "CZE", "CZR", "DEN", "DJI", 
"DOM", "DRC", "DRV", "ECU", "EGY", "EQG", "ERI", "EST", "ETH", 
"ETM", "FIJ", "FIN", "FRN", "GAB", "GAM", "GDR", "GFR", "GHA", 
"GMY", "GNB", "GRC", "GRG", "GUA", "GUI", "GUY", "HAI", "HON", 
"HUN", "IND", "INS", "IRE", "IRN", "IRQ", "ISR", "ITA", "JAM", 
"JOR", "JPN", "KEN", "KUW", "KYR", "KZK", "LAO", "LAT", "LBR", 
"LEB", "LES", "LIB", "LIT", "MAA", "MAC", "MAG", "MAL", "MAS", 
"MAW", "MEX", "MLD", "MLI", "MON", "MOR", "MYA", "MZM", "NAM", 
"NEP", "NEW", "NIC", "NIG", "NIR", "NOR", "NTH", "OMA", "PAK", 
"PAN", "PAR", "PER", "PHI", "PNG", "POL", "POR", "PRK", "QAT", 
"ROK", "ROM", "RUS", "RVN", "RWA", "SAF", "SAL", "SAU", "SEN", 
"SIE", "SIN", "SLO", "SLV", "SOL", "SOM", "SPN", "SRI", "SUD", 
"SWA", "SWD", "SWZ", "SYR", "TAJ", "TAW", "TAZ", "THI", "TKM", 
"TOG", "TRI", "TUN", "TUR", "UAE", "UGA", "UKG", "UKR", "URU", 
"USA", "UZB", "VEN", "YAR", "YEM", "YPR", "YUG", "ZAM", "ZIM"
), class = "factor"), year = 1993:2002, PSIndex = c(NA, NA, NA, 
4, 4, 4, 4, 4, 4, 4), lelecsystem = c(3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L), govstruct = c(NA, NA, NA, 3L, 3L, 3L, 3L, 3L, 
3L, 3L), courtexec = c(NA, NA, NA, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), reservedseat = c(NA, NA, NA, 2L, 2L, 2L, 2L, 2L, 2L, 2L), 
    uppub = c(1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), bankpol = c(NA, 
    NA, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L), execveto = c(NA, NA, 
    NA, 0L, 0L, 0L, 0L, 0L, 0L, 0L), legveto = c(NA, NA, NA, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("cowc", "year", 
"PSIndex", "lelecsystem", "govstruct", "courtexec", "reservedseat", 
"uppub", "bankpol", "execveto", "legveto"), row.names = 1474:1483, class = "data.frame"))

更新/澄清:例如在1993年至1995年期间,该指数为NA,因为govstruct失踪了。我希望索引中的NA不是NA,因为索引的2个条件得到满足(leclecsystem == 3和uppub == 1)。

cowc    year    PSIndex lelecsystem govstruct   courtexec   reservedseat    uppub   bankpol execveto    legveto
1   BOS 1993    NA  3   NA  NA  NA  1   NA  NA  NA
2   BOS 1994    NA  3   NA  NA  NA  1   NA  NA  NA
3   BOS 1995    NA  3   NA  NA  NA  1   NA  NA  NA
4   BOS 1996    4   3   3   0   2   0   1   0   1
5   BOS 1997    4   3   3   0   2   0   1   0   1
6   BOS 1998    4   3   3   0   2   0   1   0   1
7   BOS 1999    4   3   3   0   2   0   1   0   1
8   BOS 2000    4   3   3   0   2   0   1   0   1
9   BOS 2001    4   3   3   0   2   0   1   0   1
10  BOS 2002    4   3   3   0   2   0   1   0   1

1 个答案:

答案 0 :(得分:2)

这是一个解决方案:

conds <- cbind(IAEP$govstruct==3, IAEP$reservedseat==2, IAEP$uppub==1, IAEP$bankpol==1, IAEP$execveto==1 & IAEP$legveto==1)  
IAEP$PSIndex <- apply(conds, 1, sum, na.rm=T)