我有一些模型输出,我想要集成回原始数据文件。我能够使用嵌套的ifelse()来做到这一点,但是我想要一种方法来概括流程,以便我可以将它作为跨多个数据集的批处理来运行。这就是我最初的尝试。
模型输出对应于时间块,而每个原始数据点与离散时间相关联。
我决定一次手动运行一天(这是一天中一个参数的示例),并且这个非常大且丑陋的ifelse能够正确地聚合数据。
track[,"phase"]= ifelse((phaseTable1$start[1]<=track$Time)& (track$Time< phaseTable1$end[1]), phaseTable1$phase[1],
ifelse((phaseTable1$start[2]<=track$Time)& (track$Time< phaseTable1$end[2]), phaseTable1$phase[2],
ifelse((phaseTable1$start[3]<=track$Time)& (track$Time< phaseTable1$end[3]), phaseTable1$phase[3],
ifelse((phaseTable1$start[4]<=track$Time)& (track$Time< phaseTable1$end[4]), phaseTable1$phase[4],
ifelse((phaseTable1$start[5]<=track$Time)& (track$Time< phaseTable1$end[5]), phaseTable1$phase[5],
ifelse((phaseTable1$start[6]<=track$Time)& (track$Time< phaseTable1$end[6]), phaseTable1$phase[6],
ifelse((phaseTable1$start[7]<=track$Time)& (track$Time< phaseTable1$end[7]), phaseTable1$phase[7],
ifelse((phaseTable1$start[8]<=track$Time)& (track$Time< phaseTable1$end[8]), phaseTable1$phase[8],
ifelse((phaseTable1$start[9]<=track$Time)& (track$Time< phaseTable1$end[9]), phaseTable1$phase[9],
ifelse((phaseTable1$start[10]<=track$Time)& (track$Time< phaseTable1$end[10]), phaseTable1$phase[10],
ifelse((phaseTable1$start[11]<=track$Time)& (track$Time< phaseTable1$end[11]), phaseTable1$phase[11],
ifelse((phaseTable1$start[12]<=track$Time)& (track$Time< phaseTable1$end[12]), phaseTable1$phase[12],
ifelse((phaseTable1$start[13]<=track$Time)& (track$Time< phaseTable1$end[13]), phaseTable1$phase[13],
ifelse((phaseTable1$start[14]<=track$Time)& (track$Time<phaseTable1$end[14]), phaseTable1$phase[14],
ifelse((phaseTable1$start[15]<=track$Time)& (track$Time< phaseTable1$end[15]), phaseTable1$phase[15],
ifelse((phaseTable1$start[16]<=track$Time)& (track$Time< phaseTable1$end[16]), phaseTable1$phase[16],
ifelse((phaseTable1$start[17]<=track$Time)& (track$Time< phaseTable1$end[17]), phaseTable1$phase[17],
ifelse((phaseTable1$start[18]<=track$Time)& (track$Time< phaseTable1$end[18]), phaseTable1$phase[18],
ifelse((phaseTable1$start[19]<=track$Time)& (track$Time< phaseTable1$end[19]), phaseTable1$phase[19],
ifelse((phaseTable1$start[20]<=track$Time)& (track$Time< phaseTable1$end[20]), phaseTable1$phase[20],
ifelse((phaseTable1$start[21]<=track$Time)& (track$Time< phaseTable1$end[21]), phaseTable1$phase[21],
ifelse((phaseTable1$start[22]<=track$Time)& (track$Time< phaseTable1$end[22]), phaseTable1$phase[22],
ifelse((phaseTable1$start[23]<=track$Time)& (track$Time< phaseTable1$end[23]), phaseTable1$phase[23],
ifelse((phaseTable1$start[24]<=track$Time)& (track$Time< phaseTable1$end[24]), phaseTable1$phase[24],
ifelse((phaseTable1$start[25]<=track$Time)& (track$Time< phaseTable1$end[25]), phaseTable1$phase[25],
ifelse((phaseTable1$start[26]<=track$Time)& (track$Time< phaseTable1$end[26]), phaseTable1$phase[26],
ifelse((phaseTable1$start[27]<=track$Time)& (track$Time< phaseTable1$end[27]), phaseTable1$phase[27],
ifelse((phaseTable1$start[28]<=track$Time)& (track$Time< phaseTable1$end[28]), phaseTable1$phase[28],
ifelse((phaseTable1$start[29]<=track$Time)& (track$Time< phaseTable1$end[29]), phaseTable1$phase[29],
ifelse((phaseTable1$start[30]<=track$Time)& (track$Time< phaseTable1$end[30]), phaseTable1$phase[30],
ifelse((phaseTable1$start[31]<=track$Time)& (track$Time< phaseTable1$end[31]), phaseTable1$phase[31],
ifelse((phaseTable1$start[32]<=track$Time)& (track$Time< phaseTable1$end[32]), phaseTable1$phase[32],
ifelse((phaseTable1$start[33]<=track$Time)& (track$Time< phaseTable1$end[33]), phaseTable1$phase[33],
ifelse((phaseTable1$start[34]<=track$Time)& (track$Time< phaseTable1$end[34]), phaseTable1$phase[34],
ifelse((phaseTable1$start[35]<=track$Time)& (track$Time< phaseTable1$end[35]), phaseTable1$phase[35],phaseTable1$phase[35]
)))))))))))))))))))))))))))))))))))
这很有效,但是它非常笨拙,嵌套条件的数量在数据中每天都有所不同。
我尝试将其重新设计成更实用的循环
for ( j in 1:nrow(phaseTable1)){
if((phaseTable1$start[j]<=track$Time)&(track$Time< phaseTable1$end[j])){track$tau== phaseTable1$tau[j]}
}
并不断收到此警告,导致无法汇总数据
In if ((phaseTable1$start[j] <= track$Time) & (track$Time < ... the condition has length > 1 and only the first element will be used
我再次尝试了这个
for ( j in 1:nrow(phaseTable1)){
track$phase<-ifelse(((phaseTable1$star [j]<=track$Time)&(track$Time< phaseTable1$end[j])), phaseTable1$phase[j],"")))
}
新列出现在数据框中,但它们是空的。
我再次尝试使用博客文章中推荐的thatssorandom包中的包装器,这也导致了错误。
for ( j in 1:nrow(phaseTable1)){
ie(
i(((phaseTable1$start[j]<=track$Time)&(track$Time< phaseTable1$end[j])),track$phase<- phaseTable1$phase[j]),
e("na"))
}
我是否有明显的错误或是否有其他解决方案可以实现我的目标?我承认我是一个相对业余的用户,我已经探索了其他ifelse论坛问题,但我们无法弄清楚我做错了什么。我有一个工作循环,允许我在数据帧中每天运行我的模型。如果我能够运行下一个循环,那么我将能够将其嵌套到第一个循环中,并且能够批量聚合数据。任何有关解决方案的见解都将受到高度赞赏!
答案 0 :(得分:0)
如果没有数据集可以使用,可以使用findInterval
df1 <- data.frame(start = seq(as.POSIXct("2017-08-07 00:00:00"), by = "hour", length.out = 24))
df1$end <- df1$start + 3600
df1$phase <- letters[seq_len(nrow(df1))]
v <- findInterval(c(as.POSIXct("2017-08-07 02:38:24"), as.POSIXct("2017-08-07 21:59:59")), df1$start)
df1$phase[v]
[1] "c" "v"
除非间隔之间存在间隙,否则不需要结束时间
对于第一个错误,请查看?&
&安培;和&amp;&amp;表示逻辑AND和|和||表示逻辑OR。较短的形式以与算术运算符大致相同的方式执行元素比较。较长的形式从左到右评估仅检查每个向量的第一个元素。评估仅在确定结果之前进行。较长的形式适用于编程控制流程,通常在if子句中是首选。
第二个错误:错误phaseTable1$star [j]
应为phaseTable1$start[j]
第三个错误:错误i
应为if
答案 1 :(得分:0)
我找到了一个似乎有效的解决方案。不得不重新思考我如何设置循环。
for ( j in 1:nrow(phaseTable1)){
for ( k in 1:nrow(track)){
if((phaseTable1$start[j]<=track$Time[k])&(track$Time[k]< phaseTable1$end[j])){track$model[k]= phaseTable1$model[j]}
if((phaseTable1$start[j]<=track$Time[k])&(track$Time[k]< phaseTable1$end[j])){track$phase[k]= phaseTable1$phase[j]}
if((phaseTable1$start[j]<=track$Time[k])&(track$Time[k]< phaseTable1$end[j])){track$tau[k]= phaseTable1$tau[j]}
if((phaseTable1$start[j]<=track$Time[k])&(track$Time[k]< phaseTable1$end[j])){track$eta[k]= phaseTable1$eta[j]}
}
}