我在使用for循环中涉及具有多个条件的ifelse语句的某些代码的一部分进行矢量化时遇到了问题,如果可能的话,我无法找到具有完全相同问题的任何人。以下是我的数据示例:
> build[1:25, ]
truckid readdate_from route_from to_readdate to_route distance_travelled TTTfrom toTTT timebtwpnts
1 100284 2015-03-13 00:05:00 2015-03-13 00:05:20 0.349115293 6206 6206 20
2 100284 2015-03-13 00:05:20 2015-03-13 00:15:00 9.345457793 6206 2420 580
3 100284 2015-03-13 00:15:00 2015-03-13 00:20:00 I15 4.609846782 2420 2420 300
4 100284 2015-03-13 00:20:00 I15 2015-03-13 00:34:34 I15 6.097244827 2420 2420 874
5 100284 2015-03-13 00:34:34 I15 2015-03-13 00:36:52 I15 0.112831213 2420 2420 138
6 100284 2015-03-13 00:36:52 I15 2015-03-13 00:37:52 I15 0.051011669 2420 2420 60
7 100284 2015-03-13 00:37:52 I15 2015-03-13 00:50:00 I15 8.244393968 2420 2420 728
8 100284 2015-03-13 00:50:00 I15 2015-03-13 00:57:06 7.900808243 2420 6188 426
9 100284 2015-03-13 00:57:06 2015-03-13 00:58:14 1.298096162 6188 6019 68
10 100284 2015-03-13 00:58:14 2015-03-13 01:03:30 1.782517401 6019 6188 316
11 100284 2015-03-13 01:03:30 2015-03-13 01:05:18 0.002000153 6188 6188 108
12 100284 2015-03-13 01:05:18 2015-03-13 01:25:34 0.052831508 6188 6188 1216
13 100284 2015-03-13 01:25:34 2015-03-13 01:35:00 6.471993106 6188 6183 566
14 100284 2015-03-13 01:35:00 2015-03-13 01:35:20 0.398436396 6183 6183 20
15 100665 2015-03-01 20:46:00 2015-03-01 20:51:00 4.916207734 6180 6188 300
16 100665 2015-03-01 20:51:00 2015-03-01 20:56:00 4.977879172 6188 6019 300
17 100665 2015-03-01 20:56:00 2015-03-01 20:57:22 I15 1.339368219 6019 2420 82
18 100665 2015-03-01 20:57:22 I15 2015-03-01 21:01:00 I15 3.489645997 2420 2420 218
19 100665 2015-03-01 21:01:00 I15 2015-03-01 21:06:00 I15 5.181996399 2420 2420 300
20 100665 2015-03-01 21:06:16 I15 2015-03-01 21:11:00 I15 4.193124975 2420 2420 284
21 100665 2015-03-01 21:11:00 I15 2015-03-01 21:16:00 I15 3.247763697 2420 2420 300
22 100665 2015-03-01 21:16:00 I15 2015-03-01 21:21:00 I15 4.074412199 2420 2420 300
23 100665 2015-03-01 21:21:00 I15 2015-03-01 21:26:00 I15 4.282198968 2420 2420 300
24 100665 2015-03-01 21:26:00 I15 2015-03-01 21:28:48 2.661758135 2420 6205 168
25 100665 2015-03-01 21:28:48 2015-03-01 21:31:00 2.281483570 6205 6030 132
mins speedbtwpnts stopmove
1 0.3333333 62.84075269 1
2 9.6666667 58.00628975 1
3 5.0000000 55.31816138 1
4 14.5666667 25.11450958 1
5 2.3000000 2.94342294 0
6 1.0000000 3.06070012 0
7 12.1333333 40.76898116 1
8 7.1000000 66.76739360 1
9 1.1333333 68.72273800 1
10 5.2666667 20.30716027 1
11 1.8000000 0.06667178 0
12 20.2666667 0.15640907 0
13 9.4333333 41.16462046 1
14 0.3333333 71.71855135 1
15 5.0000000 58.99449281 1
16 5.0000000 59.73455007 1
17 1.3666667 58.80153156 1
18 3.6333333 57.62718160 1
19 5.0000000 62.18395678 1
20 4.7333333 53.15228842 1
21 5.0000000 38.97316437 1
22 5.0000000 48.89294639 1
23 5.0000000 51.38638762 1
24 2.8000000 57.03767432 1
25 2.2000000 62.22227919 1
我成功编写了以下内容:
res <- numeric(nrow(build))
system.time(for (i in 1:length(build$truckid)){
res[i] <- i
ifelse(i == 1,
res[i] <- build$readdate_from[i],
ifelse(build$truckid[i] != build$truckid[i-1],
res[i] <- build$readdate_from[i],
res[i] <- res[i-1]))
})
build$starttime <- res
build <- within(build,
pretrip <- paste(truckid,
starttime,
sep = "_"))
为:
res <- as.numeric(build$readdate_from)
cond <- c(FALSE, (build[-nrow(build), 1] == build[-1, 1]))
system.time(for (i in 1:nrow(build)){
ifelse(cond[i],
res[i] <- res[i-1],
res[i] <- res[i])
})
build$starttime <- res
build <- within(build,
pretrip <- paste(truckid,
starttime,
sep = "_"))
运行速度快10倍左右。我现在要做的是对下面的一个for循环中包含多个ifelse条件的块做同样的事情:
res <- numeric(nrow(build))
system.time(for (i in 1:length(build$pretrip)){
res[i] <- i
ifelse(i == 1,
res[i] <- build$mins[i],
ifelse(build$pretrip[i] == build$pretrip[i-1] && build$stopmove[i-1] == 1 && build$stopmove[i] == 0,
res[i] <- build$mins[i],
ifelse(build$pretrip[i] == build$pretrip[i-1] && build$stopmove[i] == 0 && build$stopmove[i-1] == 0,
res[i] <- round(res[i-1] + build$mins[i], 5),
ifelse(build$pretrip[i] != build$pretrip[i-1] && build$stopmove[i] == 0,
res[i] <- build$mins[i],
res[i] <- 0))))
})
build$timestopped <- res
此代码执行我想要的操作,一旦停止由零指示,则为聚合时间:
> build[1:25, ]
truckid readdate_from route_from to_readdate to_route distance_travelled TTTfrom toTTT timebtwpnts
1 100284 2015-03-13 00:05:00 2015-03-13 00:05:20 0.349115293 6206 6206 20
2 100284 2015-03-13 00:05:20 2015-03-13 00:15:00 9.345457793 6206 2420 580
3 100284 2015-03-13 00:15:00 2015-03-13 00:20:00 I15 4.609846782 2420 2420 300
4 100284 2015-03-13 00:20:00 I15 2015-03-13 00:34:34 I15 6.097244827 2420 2420 874
5 100284 2015-03-13 00:34:34 I15 2015-03-13 00:36:52 I15 0.112831213 2420 2420 138
6 100284 2015-03-13 00:36:52 I15 2015-03-13 00:37:52 I15 0.051011669 2420 2420 60
7 100284 2015-03-13 00:37:52 I15 2015-03-13 00:50:00 I15 8.244393968 2420 2420 728
8 100284 2015-03-13 00:50:00 I15 2015-03-13 00:57:06 7.900808243 2420 6188 426
9 100284 2015-03-13 00:57:06 2015-03-13 00:58:14 1.298096162 6188 6019 68
10 100284 2015-03-13 00:58:14 2015-03-13 01:03:30 1.782517401 6019 6188 316
11 100284 2015-03-13 01:03:30 2015-03-13 01:05:18 0.002000153 6188 6188 108
12 100284 2015-03-13 01:05:18 2015-03-13 01:25:34 0.052831508 6188 6188 1216
13 100284 2015-03-13 01:25:34 2015-03-13 01:35:00 6.471993106 6188 6183 566
14 100284 2015-03-13 01:35:00 2015-03-13 01:35:20 0.398436396 6183 6183 20
15 100665 2015-03-01 20:46:00 2015-03-01 20:51:00 4.916207734 6180 6188 300
16 100665 2015-03-01 20:51:00 2015-03-01 20:56:00 4.977879172 6188 6019 300
17 100665 2015-03-01 20:56:00 2015-03-01 20:57:22 I15 1.339368219 6019 2420 82
18 100665 2015-03-01 20:57:22 I15 2015-03-01 21:01:00 I15 3.489645997 2420 2420 218
19 100665 2015-03-01 21:01:00 I15 2015-03-01 21:06:00 I15 5.181996399 2420 2420 300
20 100665 2015-03-01 21:06:16 I15 2015-03-01 21:11:00 I15 4.193124975 2420 2420 284
21 100665 2015-03-01 21:11:00 I15 2015-03-01 21:16:00 I15 3.247763697 2420 2420 300
22 100665 2015-03-01 21:16:00 I15 2015-03-01 21:21:00 I15 4.074412199 2420 2420 300
23 100665 2015-03-01 21:21:00 I15 2015-03-01 21:26:00 I15 4.282198968 2420 2420 300
24 100665 2015-03-01 21:26:00 I15 2015-03-01 21:28:48 2.661758135 2420 6205 168
25 100665 2015-03-01 21:28:48 2015-03-01 21:31:00 2.281483570 6205 6030 132
mins speedbtwpnts stopmove starttime pretrip timestopped
1 0.3333333 62.84075269 1 1426205100 100284_1426205100 0.3333333
2 9.6666667 58.00628975 1 1426205100 100284_1426205100 0.0000000
3 5.0000000 55.31816138 1 1426205100 100284_1426205100 0.0000000
4 14.5666667 25.11450958 1 1426205100 100284_1426205100 0.0000000
5 2.3000000 2.94342294 0 1426205100 100284_1426205100 2.3000000
6 1.0000000 3.06070012 0 1426205100 100284_1426205100 3.3000000
7 12.1333333 40.76898116 1 1426205100 100284_1426205100 0.0000000
8 7.1000000 66.76739360 1 1426205100 100284_1426205100 0.0000000
9 1.1333333 68.72273800 1 1426205100 100284_1426205100 0.0000000
10 5.2666667 20.30716027 1 1426205100 100284_1426205100 0.0000000
11 1.8000000 0.06667178 0 1426205100 100284_1426205100 1.8000000
12 20.2666667 0.15640907 0 1426205100 100284_1426205100 22.0666700
13 9.4333333 41.16462046 1 1426205100 100284_1426205100 0.0000000
14 0.3333333 71.71855135 1 1426205100 100284_1426205100 0.0000000
15 5.0000000 58.99449281 1 1425242760 100665_1425242760 0.0000000
16 5.0000000 59.73455007 1 1425242760 100665_1425242760 0.0000000
17 1.3666667 58.80153156 1 1425242760 100665_1425242760 0.0000000
18 3.6333333 57.62718160 1 1425242760 100665_1425242760 0.0000000
19 5.0000000 62.18395678 1 1425242760 100665_1425242760 0.0000000
20 4.7333333 53.15228842 1 1425242760 100665_1425242760 0.0000000
21 5.0000000 38.97316437 1 1425242760 100665_1425242760 0.0000000
22 5.0000000 48.89294639 1 1425242760 100665_1425242760 0.0000000
23 5.0000000 51.38638762 1 1425242760 100665_1425242760 0.0000000
24 2.8000000 57.03767432 1 1425242760 100665_1425242760 0.0000000
25 2.2000000 62.22227919 1 1425242760 100665_1425242760 0.0000000
然而,这不能很好地扩展,我无法弄清楚如何一次性矢量化多个条件。我得到的最接近的是以下内容;它仅适用于第一个条件,它不是我想要的,因为它为FALSE
的每个实例返回build$cond
:
cond1 <- c(FALSE, (build[-nrow(build), 14] == build[-1, 14]))
build$cond1 <- cond1
cond2 <- c(FALSE, (build[-1, 12] == 1))
build$cond2 <- cond2
cond3 <- c((build[ , 12] == 0))
build$cond3 <- cond3
cond <- cond1 & cond2 & cond3
build$cond <- cond
非常感谢任何帮助。
修改在下方添加dput()
:
dput(build[1:25,])
structure(list(truckid = c(100284L, 100284L, 100284L, 100284L,
100284L, 100284L, 100284L, 100284L, 100284L, 100284L, 100284L,
100284L, 100284L, 100284L, 100665L, 100665L, 100665L, 100665L,
100665L, 100665L, 100665L, 100665L, 100665L, 100665L, 100665L
), readdate_from = structure(c(1426205100, 1426205120, 1426205700,
1426206000, 1426206874, 1426207012, 1426207072, 1426207800, 1426208226,
1426208294, 1426208610, 1426208718, 1426209934, 1426210500, 1425242760,
1425243060, 1425243360, 1425243442, 1425243660, 1425243976, 1425244260,
1425244560, 1425244860, 1425245160, 1425245328), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), route_from = structure(c(1L, 1L, 1L,
3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 1L), .Label = c("", "I10", "I15", "I17",
"I19", "I40", "I8"), class = "factor"), to_readdate = structure(c(1426205120,
1426205700, 1426206000, 1426206874, 1426207012, 1426207072, 1426207800,
1426208226, 1426208294, 1426208610, 1426208718, 1426209934, 1426210500,
1426210520, 1425243060, 1425243360, 1425243442, 1425243660, 1425243960,
1425244260, 1425244560, 1425244860, 1425245160, 1425245328, 1425245460
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), to_route = structure(c(1L,
1L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L), .Label = c("", "I10", "I15",
"I17", "I19", "I40", "I8"), class = "factor"), distance_travelled = c(0.3491152927,
9.3454577932, 4.6098467817, 6.0972448268, 0.1128312127, 0.0510116686,
8.2443939681, 7.9008082426, 1.2980961622, 1.7825174014, 0.0020001533,
0.052831508, 6.4719931061, 0.3984363964, 4.9162077343, 4.9778791724,
1.3393682188, 3.4896459968, 5.1819963986, 4.1931249754, 3.2477636975,
4.0744121993, 4.2821989681, 2.6617581347, 2.2814835704), TTTfrom = c(6206,
6206, 2420, 2420, 2420, 2420, 2420, 2420, 6188, 6019, 6188, 6188,
6188, 6183, 6180, 6188, 6019, 2420, 2420, 2420, 2420, 2420, 2420,
2420, 6205), toTTT = c(6206, 2420, 2420, 2420, 2420, 2420, 2420,
6188, 6019, 6188, 6188, 6188, 6183, 6183, 6188, 6019, 2420, 2420,
2420, 2420, 2420, 2420, 2420, 6205, 6030), timebtwpnts = c(20,
580, 300, 874, 138, 60, 728, 426, 68, 316, 108, 1216, 566, 20,
300, 300, 82, 218, 300, 284, 300, 300, 300, 168, 132), mins = c(0.333333333333333,
9.66666666666667, 5, 14.5666666666667, 2.3, 1, 12.1333333333333,
7.1, 1.13333333333333, 5.26666666666667, 1.8, 20.2666666666667,
9.43333333333333, 0.333333333333333, 5, 5, 1.36666666666667,
3.63333333333333, 5, 4.73333333333333, 5, 5, 5, 2.8, 2.2), speedbtwpnts = c(62.840752686,
58.0062897508966, 55.3181613804, 25.1145095840732, 2.94342294,
3.060700116, 40.7689811609341, 66.7673935994366, 68.7227379988235,
20.3071602691139, 0.0666717766666667, 0.156409069736842, 41.1646204628269,
71.718551352, 58.9944928116, 59.7345500688, 58.8015315570732,
57.6271815985321, 62.1839567832, 53.1522884205634, 38.97316437,
48.8929463916, 51.3863876172, 57.037674315, 62.2222791927273),
stopmove = c(1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), .Names = c("truckid", "readdate_from",
"route_from", "to_readdate", "to_route", "distance_travelled",
"TTTfrom", "toTTT", "timebtwpnts", "mins", "speedbtwpnts", "stopmove"
), row.names = c(NA, 25L), class = "data.frame")
答案 0 :(得分:2)
第一个理智修复:
res <- c(build$mins[1], 2:length(build$pretrip))
same_trip <- c(FALSE, (build$pretrip[-1] == build$pretrip[-nrow(build)]) )
cond1 <- c(FALSE, (build$stopmove[-nrow(build)] == 1) & (build$stopmove[-1] == 0) ) )
cond2 <- ...
for (i in 1:length(build$pretrip)) {
if( same_trip[i] ) {
if( cond1[i] ) {
res[i] <- build$mins[i]
} else if (cond2[i]) {
res[i] <- round(res[i-1] + build$mins[i], 5)
} else {
res[i] <- 0
}
} else {
if( build$stopmove[i] == 0 ) {
res[i] <- build$mins[i]
} else {
res[i] <- 0
}
}
}
build$timestopped <- res
现在我们实际上可以看到难度/问题,在res [i]&lt; - round(res [i-1] + build $ mins [i],5)你需要之前的值,在所有其他情况下你可以在向量上使用ifelse函数,因此我的建议是:
same_trip <- c(FALSE, (build$pretrip[-1] == build$pretrip[-nrow(build)]) )
cond1 <- c(FALSE, (build$stopmove[-nrow(build)] == 1) & (build$stopmove[-1] == 0) ) )
cond2 <- ...
res <- ifelse( (same_trip & cond1) | (build$stopmove[i] == 0),
build$mins,
0)
for (i in 1:length(build$pretrip)) {
if( same_trip[i] && cond2[i]) {
res[i] <- round(res[i-1] + build$mins[i], 5)
}
}
build$timestopped <- res
如果您知道连续两次cond2不能为真,那么您也可以将其转换为向量操作,否则您通常会遇到它。 (可能有一个cumsum hack,但我不这么认为)
修改
我认为最好的解决方案是将for循环减少到仅仅覆盖你需要的情况:
for (i in (1:length(build$pretrip))[same_trip & cond2] ) {
res[i] <- round(res[i-1] + build$mins[i], 5)
}
我认为不可能有更快(并且仍然易于理解)的解决方案。
答案 1 :(得分:0)
我相信你根本不需要循环。
当第i行和第(i - 1)行具有相同的卡车ID时,向量cond
将为TRUE
,否则将为FALSE
。然后,您不必迭代cond,只需使用
res <- ifelse(cond, c(0, res[1:(nrow(res)-1)]), res)
ifelse()适用于矢量。
0用作该向量中的第一个元素,因为res[0]
未定义(所以cond[1]
是FALSE
或undefined
)但是如果我们使用{{1}该产品也是NA
。
对于NA
的等效行为res
的任何行,以及cond
的所有行,TRUE
的前一个值应返回res
1}}是cond
。