我的两个大数据集如下:
df1=data.frame(subject = c(rep(1, 11), rep(2, 10)), day =c(1,1,1,1,1,2,3,15,15,15,15,1,1,1,1,2,3,15,15,15,15),stime=c('4/16/2012 6:25','4/16/2012 7:01','4/16/2012 17:22','4/16/2012 17:45','4/16/2012 18:13','4/18/2012 6:50','4/19/2012 6:55','5/1/2012 6:28','5/1/2012 7:00','5/1/2012 16:28','5/1/2012 17:00','4/23/2012 5:56','4/23/2012 6:30','4/23/2012 16:55','4/23/2012 17:20','4/25/2012 6:32','4/26/2012 6:28','5/8/2012 5:54','5/8/2012 6:30','5/8/2012 15:55','5/8/2012 16:30'))
df2=data.frame(subject = c(rep(1, 10), rep(2, 10)), day=c(1,1,2,2,3,3,9,9,15,15,1,1,2,2,3,3,9,9,15,15),dtime=c('4/16/2012 6:15','4/16/2012 15:16','4/18/2012 7:15','4/18/2012 21:45','4/19/2012 7:05','4/19/2012 23:17','4/28/2012 7:15','4/28/2012 21:12','5/1/2012 7:15','5/1/2012 15:15','4/23/2012 6:45','4/23/2012 16:45','4/25/2012 6:45','4/25/2012 21:30','4/26/2012 6:45','4/26/2012 22:00','5/2/2012 7:00','5/2/2012 22:00','5/8/2012 6:45','5/8/2012 15:45'))
运行以下代码后,R一直给我这样的错误,我将stringsAsFators设置更改为FALSE并将日期时间列转换为字符,但错误仍然存在。所以我删除了'stime'和'dtime'中缺少值的行,但错误没有消失。我使用了traceback()并且还列出了输出。有人可以就如何避免错误提出一些想法吗?
Error in if (as.POSIXct(x["stime"], format = "%m/%d/%Y %H:%M") < as.POSIXct(choices[2], : missing value where TRUE/FALSE needed
df4 <- df1
df4$dtime <- apply(df4, 1, function(x){
choices <- df2[ df2$subject==as.numeric(x["subject"]) &
df2$day==as.numeric(x["day"]) , "dtime"]
if( as.POSIXct(x["stime"], format="%m/%d/%Y %H:%M") <
as.POSIXct(choices[1],format="%m/%d/%Y %H:%M") ) {
choices[1]
}else{ choices[2] }
} )
traceback()
2: FUN(newX[, i], ...)
1: apply(fullset1_1, 1, function(x) {
choices <- adar_1[adar_1$SID1A == as.numeric(x["subject"]) &
adar_1$DYS1N == as.numeric(x["day"]), "dtime"]
if (as.POSIXct(x["stime"], format = "%m/%d/%Y %H:%M") < as.POSIXct(choices[2],
format = "%m/%d/%Y %H:%M")) {
choices[1]
}
else {
choices[2]
}
})