我正在尝试创建一个索引,如果满足条件,该索引会增加1。如果没有丢失的数据,代码似乎可以工作。但是,如果缺少数据,索引也会变为“NA”。我怎样才能避免这种情况(基本上忽略了丢失的数据)?
我试过na.rm = TRUE / FALSE,但据我所知,这只涉及函数。 会假设有一个相当直接的解决方案吗?非常感谢。
这是我的代码:
IAEP$PSIndex <- 0
IAEP$PSIndex[IAEP$lelecsystem==3] <- 1
IAEP$PSIndex <- IAEP$PSIndex + (IAEP$govstruct==3)
IAEP$PSIndex <- IAEP$PSIndex + (IAEP$reservedseat==2)
IAEP$PSIndex <- IAEP$PSIndex + (IAEP$uppub==1)
IAEP$PSIndex <- IAEP$PSIndex + (IAEP$bankpol==1)
IAEP$PSIndex <- IAEP$PSIndex + (IAEP$execveto==1 & IAEP$legveto==1)
以下是一些示例数据:
IAEP <- as.data.frame(structure(list(cowc = structure(c(18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L), .Label = c("AFG", "ALB", "ALG", "ANG", "ARG",
"ARM", "AUL", "AUS", "AZE", "BAH", "BEL", "BEN", "BFO", "BHU",
"BLR", "BNG", "BOL", "BOS", "BOT", "BRA", "BUI", "BUL", "CAM",
"CAN", "CAO", "CDI", "CEN", "CHA", "CHL", "CHN", "COL", "COM",
"CON", "COS", "CRO", "CUB", "CYP", "CZE", "CZR", "DEN", "DJI",
"DOM", "DRC", "DRV", "ECU", "EGY", "EQG", "ERI", "EST", "ETH",
"ETM", "FIJ", "FIN", "FRN", "GAB", "GAM", "GDR", "GFR", "GHA",
"GMY", "GNB", "GRC", "GRG", "GUA", "GUI", "GUY", "HAI", "HON",
"HUN", "IND", "INS", "IRE", "IRN", "IRQ", "ISR", "ITA", "JAM",
"JOR", "JPN", "KEN", "KUW", "KYR", "KZK", "LAO", "LAT", "LBR",
"LEB", "LES", "LIB", "LIT", "MAA", "MAC", "MAG", "MAL", "MAS",
"MAW", "MEX", "MLD", "MLI", "MON", "MOR", "MYA", "MZM", "NAM",
"NEP", "NEW", "NIC", "NIG", "NIR", "NOR", "NTH", "OMA", "PAK",
"PAN", "PAR", "PER", "PHI", "PNG", "POL", "POR", "PRK", "QAT",
"ROK", "ROM", "RUS", "RVN", "RWA", "SAF", "SAL", "SAU", "SEN",
"SIE", "SIN", "SLO", "SLV", "SOL", "SOM", "SPN", "SRI", "SUD",
"SWA", "SWD", "SWZ", "SYR", "TAJ", "TAW", "TAZ", "THI", "TKM",
"TOG", "TRI", "TUN", "TUR", "UAE", "UGA", "UKG", "UKR", "URU",
"USA", "UZB", "VEN", "YAR", "YEM", "YPR", "YUG", "ZAM", "ZIM"
), class = "factor"), year = 1993:2002, PSIndex = c(NA, NA, NA,
4, 4, 4, 4, 4, 4, 4), lelecsystem = c(3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), govstruct = c(NA, NA, NA, 3L, 3L, 3L, 3L, 3L,
3L, 3L), courtexec = c(NA, NA, NA, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), reservedseat = c(NA, NA, NA, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
uppub = c(1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), bankpol = c(NA,
NA, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L), execveto = c(NA, NA,
NA, 0L, 0L, 0L, 0L, 0L, 0L, 0L), legveto = c(NA, NA, NA,
1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("cowc", "year",
"PSIndex", "lelecsystem", "govstruct", "courtexec", "reservedseat",
"uppub", "bankpol", "execveto", "legveto"), row.names = 1474:1483, class = "data.frame"))
更新/澄清:例如在1993年至1995年期间,该指数为NA,因为govstruct失踪了。我希望索引中的NA不是NA,因为索引的2个条件得到满足(leclecsystem == 3和uppub == 1)。
cowc year PSIndex lelecsystem govstruct courtexec reservedseat uppub bankpol execveto legveto
1 BOS 1993 NA 3 NA NA NA 1 NA NA NA
2 BOS 1994 NA 3 NA NA NA 1 NA NA NA
3 BOS 1995 NA 3 NA NA NA 1 NA NA NA
4 BOS 1996 4 3 3 0 2 0 1 0 1
5 BOS 1997 4 3 3 0 2 0 1 0 1
6 BOS 1998 4 3 3 0 2 0 1 0 1
7 BOS 1999 4 3 3 0 2 0 1 0 1
8 BOS 2000 4 3 3 0 2 0 1 0 1
9 BOS 2001 4 3 3 0 2 0 1 0 1
10 BOS 2002 4 3 3 0 2 0 1 0 1
答案 0 :(得分:2)
这是一个解决方案:
conds <- cbind(IAEP$govstruct==3, IAEP$reservedseat==2, IAEP$uppub==1, IAEP$bankpol==1, IAEP$execveto==1 & IAEP$legveto==1)
IAEP$PSIndex <- apply(conds, 1, sum, na.rm=T)