for循环中的ifelse语句向量化

时间:2015-06-23 19:56:20

标签: r

我在使用for循环中涉及具有多个条件的ifelse语句的某些代码的一部分进行矢量化时遇到了问题,如果可能的话,我无法找到具有完全相同问题的任何人。以下是我的数据示例:

> build[1:25, ]
   truckid       readdate_from route_from         to_readdate to_route distance_travelled TTTfrom toTTT timebtwpnts
1   100284 2015-03-13 00:05:00            2015-03-13 00:05:20                 0.349115293    6206  6206          20
2   100284 2015-03-13 00:05:20            2015-03-13 00:15:00                 9.345457793    6206  2420         580
3   100284 2015-03-13 00:15:00            2015-03-13 00:20:00      I15        4.609846782    2420  2420         300
4   100284 2015-03-13 00:20:00        I15 2015-03-13 00:34:34      I15        6.097244827    2420  2420         874
5   100284 2015-03-13 00:34:34        I15 2015-03-13 00:36:52      I15        0.112831213    2420  2420         138
6   100284 2015-03-13 00:36:52        I15 2015-03-13 00:37:52      I15        0.051011669    2420  2420          60
7   100284 2015-03-13 00:37:52        I15 2015-03-13 00:50:00      I15        8.244393968    2420  2420         728
8   100284 2015-03-13 00:50:00        I15 2015-03-13 00:57:06                 7.900808243    2420  6188         426
9   100284 2015-03-13 00:57:06            2015-03-13 00:58:14                 1.298096162    6188  6019          68
10  100284 2015-03-13 00:58:14            2015-03-13 01:03:30                 1.782517401    6019  6188         316
11  100284 2015-03-13 01:03:30            2015-03-13 01:05:18                 0.002000153    6188  6188         108
12  100284 2015-03-13 01:05:18            2015-03-13 01:25:34                 0.052831508    6188  6188        1216
13  100284 2015-03-13 01:25:34            2015-03-13 01:35:00                 6.471993106    6188  6183         566
14  100284 2015-03-13 01:35:00            2015-03-13 01:35:20                 0.398436396    6183  6183          20
15  100665 2015-03-01 20:46:00            2015-03-01 20:51:00                 4.916207734    6180  6188         300
16  100665 2015-03-01 20:51:00            2015-03-01 20:56:00                 4.977879172    6188  6019         300
17  100665 2015-03-01 20:56:00            2015-03-01 20:57:22      I15        1.339368219    6019  2420          82
18  100665 2015-03-01 20:57:22        I15 2015-03-01 21:01:00      I15        3.489645997    2420  2420         218
19  100665 2015-03-01 21:01:00        I15 2015-03-01 21:06:00      I15        5.181996399    2420  2420         300
20  100665 2015-03-01 21:06:16        I15 2015-03-01 21:11:00      I15        4.193124975    2420  2420         284
21  100665 2015-03-01 21:11:00        I15 2015-03-01 21:16:00      I15        3.247763697    2420  2420         300
22  100665 2015-03-01 21:16:00        I15 2015-03-01 21:21:00      I15        4.074412199    2420  2420         300
23  100665 2015-03-01 21:21:00        I15 2015-03-01 21:26:00      I15        4.282198968    2420  2420         300
24  100665 2015-03-01 21:26:00        I15 2015-03-01 21:28:48                 2.661758135    2420  6205         168
25  100665 2015-03-01 21:28:48            2015-03-01 21:31:00                 2.281483570    6205  6030         132
         mins speedbtwpnts stopmove
1   0.3333333  62.84075269        1
2   9.6666667  58.00628975        1
3   5.0000000  55.31816138        1
4  14.5666667  25.11450958        1
5   2.3000000   2.94342294        0
6   1.0000000   3.06070012        0
7  12.1333333  40.76898116        1
8   7.1000000  66.76739360        1
9   1.1333333  68.72273800        1
10  5.2666667  20.30716027        1
11  1.8000000   0.06667178        0
12 20.2666667   0.15640907        0
13  9.4333333  41.16462046        1
14  0.3333333  71.71855135        1
15  5.0000000  58.99449281        1
16  5.0000000  59.73455007        1
17  1.3666667  58.80153156        1
18  3.6333333  57.62718160        1
19  5.0000000  62.18395678        1
20  4.7333333  53.15228842        1
21  5.0000000  38.97316437        1
22  5.0000000  48.89294639        1
23  5.0000000  51.38638762        1
24  2.8000000  57.03767432        1
25  2.2000000  62.22227919        1

我成功编写了以下内容:

res <- numeric(nrow(build))
system.time(for (i in 1:length(build$truckid)){
  res[i] <- i
  ifelse(i == 1,
     res[i] <- build$readdate_from[i],
     ifelse(build$truckid[i] != build$truckid[i-1],
            res[i] <- build$readdate_from[i],
            res[i] <- res[i-1]))
})
build$starttime <- res

build <- within(build,
                pretrip <- paste(truckid,
                                 starttime,
                                 sep = "_"))

为:

res <- as.numeric(build$readdate_from)
cond <- c(FALSE, (build[-nrow(build), 1] == build[-1, 1]))
system.time(for (i in 1:nrow(build)){
  ifelse(cond[i],
     res[i] <- res[i-1],
     res[i] <- res[i])
})
build$starttime <- res       

build <- within(build,
                pretrip <- paste(truckid,
                                 starttime,
                                 sep = "_"))

运行速度快10倍左右。我现在要做的是对下面的一个for循环中包含多个ifelse条件的块做同样的事情:

res <- numeric(nrow(build))
system.time(for (i in 1:length(build$pretrip)){
  res[i] <- i
  ifelse(i == 1,
     res[i] <- build$mins[i],
     ifelse(build$pretrip[i] == build$pretrip[i-1] && build$stopmove[i-1] == 1 && build$stopmove[i] == 0,
            res[i] <- build$mins[i],
            ifelse(build$pretrip[i] == build$pretrip[i-1] && build$stopmove[i] == 0 && build$stopmove[i-1] == 0,
                   res[i] <- round(res[i-1] + build$mins[i], 5),
                   ifelse(build$pretrip[i] != build$pretrip[i-1] && build$stopmove[i] == 0,
                          res[i] <- build$mins[i],
                          res[i] <- 0))))
})
build$timestopped <- res

此代码执行我想要的操作,一旦停止由零指示,则为聚合时间:

> build[1:25, ]
   truckid       readdate_from route_from         to_readdate to_route distance_travelled TTTfrom toTTT timebtwpnts
1   100284 2015-03-13 00:05:00            2015-03-13 00:05:20                 0.349115293    6206  6206          20
2   100284 2015-03-13 00:05:20            2015-03-13 00:15:00                 9.345457793    6206  2420         580
3   100284 2015-03-13 00:15:00            2015-03-13 00:20:00      I15        4.609846782    2420  2420         300
4   100284 2015-03-13 00:20:00        I15 2015-03-13 00:34:34      I15        6.097244827    2420  2420         874
5   100284 2015-03-13 00:34:34        I15 2015-03-13 00:36:52      I15        0.112831213    2420  2420         138
6   100284 2015-03-13 00:36:52        I15 2015-03-13 00:37:52      I15        0.051011669    2420  2420          60
7   100284 2015-03-13 00:37:52        I15 2015-03-13 00:50:00      I15        8.244393968    2420  2420         728
8   100284 2015-03-13 00:50:00        I15 2015-03-13 00:57:06                 7.900808243    2420  6188         426
9   100284 2015-03-13 00:57:06            2015-03-13 00:58:14                 1.298096162    6188  6019          68
10  100284 2015-03-13 00:58:14            2015-03-13 01:03:30                 1.782517401    6019  6188         316
11  100284 2015-03-13 01:03:30            2015-03-13 01:05:18                 0.002000153    6188  6188         108
12  100284 2015-03-13 01:05:18            2015-03-13 01:25:34                 0.052831508    6188  6188        1216
13  100284 2015-03-13 01:25:34            2015-03-13 01:35:00                 6.471993106    6188  6183         566
14  100284 2015-03-13 01:35:00            2015-03-13 01:35:20                 0.398436396    6183  6183          20
15  100665 2015-03-01 20:46:00            2015-03-01 20:51:00                 4.916207734    6180  6188         300
16  100665 2015-03-01 20:51:00            2015-03-01 20:56:00                 4.977879172    6188  6019         300
17  100665 2015-03-01 20:56:00            2015-03-01 20:57:22      I15        1.339368219    6019  2420          82
18  100665 2015-03-01 20:57:22        I15 2015-03-01 21:01:00      I15        3.489645997    2420  2420         218
19  100665 2015-03-01 21:01:00        I15 2015-03-01 21:06:00      I15        5.181996399    2420  2420         300
20  100665 2015-03-01 21:06:16        I15 2015-03-01 21:11:00      I15        4.193124975    2420  2420         284
21  100665 2015-03-01 21:11:00        I15 2015-03-01 21:16:00      I15        3.247763697    2420  2420         300
22  100665 2015-03-01 21:16:00        I15 2015-03-01 21:21:00      I15        4.074412199    2420  2420         300
23  100665 2015-03-01 21:21:00        I15 2015-03-01 21:26:00      I15        4.282198968    2420  2420         300
24  100665 2015-03-01 21:26:00        I15 2015-03-01 21:28:48                 2.661758135    2420  6205         168
25  100665 2015-03-01 21:28:48            2015-03-01 21:31:00                 2.281483570    6205  6030         132
         mins speedbtwpnts stopmove  starttime           pretrip timestopped
1   0.3333333  62.84075269        1 1426205100 100284_1426205100   0.3333333
2   9.6666667  58.00628975        1 1426205100 100284_1426205100   0.0000000
3   5.0000000  55.31816138        1 1426205100 100284_1426205100   0.0000000
4  14.5666667  25.11450958        1 1426205100 100284_1426205100   0.0000000
5   2.3000000   2.94342294        0 1426205100 100284_1426205100   2.3000000
6   1.0000000   3.06070012        0 1426205100 100284_1426205100   3.3000000
7  12.1333333  40.76898116        1 1426205100 100284_1426205100   0.0000000
8   7.1000000  66.76739360        1 1426205100 100284_1426205100   0.0000000
9   1.1333333  68.72273800        1 1426205100 100284_1426205100   0.0000000
10  5.2666667  20.30716027        1 1426205100 100284_1426205100   0.0000000
11  1.8000000   0.06667178        0 1426205100 100284_1426205100   1.8000000
12 20.2666667   0.15640907        0 1426205100 100284_1426205100  22.0666700
13  9.4333333  41.16462046        1 1426205100 100284_1426205100   0.0000000
14  0.3333333  71.71855135        1 1426205100 100284_1426205100   0.0000000
15  5.0000000  58.99449281        1 1425242760 100665_1425242760   0.0000000
16  5.0000000  59.73455007        1 1425242760 100665_1425242760   0.0000000
17  1.3666667  58.80153156        1 1425242760 100665_1425242760   0.0000000
18  3.6333333  57.62718160        1 1425242760 100665_1425242760   0.0000000
19  5.0000000  62.18395678        1 1425242760 100665_1425242760   0.0000000
20  4.7333333  53.15228842        1 1425242760 100665_1425242760   0.0000000
21  5.0000000  38.97316437        1 1425242760 100665_1425242760   0.0000000
22  5.0000000  48.89294639        1 1425242760 100665_1425242760   0.0000000
23  5.0000000  51.38638762        1 1425242760 100665_1425242760   0.0000000
24  2.8000000  57.03767432        1 1425242760 100665_1425242760   0.0000000
25  2.2000000  62.22227919        1 1425242760 100665_1425242760   0.0000000

然而,这不能很好地扩展,我无法弄清楚如何一次性矢量化多个条件。我得到的最接近的是以下内容;它仅适用于第一个条件,它不是我想要的,因为它为FALSE的每个实例返回build$cond

cond1 <- c(FALSE, (build[-nrow(build), 14] == build[-1, 14]))
    build$cond1 <- cond1
cond2 <- c(FALSE, (build[-1, 12] == 1))
    build$cond2 <- cond2
cond3 <- c((build[ , 12] == 0))
    build$cond3 <- cond3
cond <- cond1 & cond2 & cond3
    build$cond <- cond

非常感谢任何帮助。

修改在下方添加dput()

dput(build[1:25,])
structure(list(truckid = c(100284L, 100284L, 100284L, 100284L, 
100284L, 100284L, 100284L, 100284L, 100284L, 100284L, 100284L, 
100284L, 100284L, 100284L, 100665L, 100665L, 100665L, 100665L, 
100665L, 100665L, 100665L, 100665L, 100665L, 100665L, 100665L
), readdate_from = structure(c(1426205100, 1426205120, 1426205700, 
1426206000, 1426206874, 1426207012, 1426207072, 1426207800, 1426208226, 
1426208294, 1426208610, 1426208718, 1426209934, 1426210500, 1425242760, 
1425243060, 1425243360, 1425243442, 1425243660, 1425243976, 1425244260, 
1425244560, 1425244860, 1425245160, 1425245328), class = c("POSIXct", 
"POSIXt"), tzone = "UTC"), route_from = structure(c(1L, 1L, 1L, 
3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 1L), .Label = c("", "I10", "I15", "I17", 
"I19", "I40", "I8"), class = "factor"), to_readdate = structure(c(1426205120, 
1426205700, 1426206000, 1426206874, 1426207012, 1426207072, 1426207800, 
1426208226, 1426208294, 1426208610, 1426208718, 1426209934, 1426210500, 
1426210520, 1425243060, 1425243360, 1425243442, 1425243660, 1425243960, 
1425244260, 1425244560, 1425244860, 1425245160, 1425245328, 1425245460
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), to_route = structure(c(1L, 
1L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L), .Label = c("", "I10", "I15", 
"I17", "I19", "I40", "I8"), class = "factor"), distance_travelled = c(0.3491152927, 
9.3454577932, 4.6098467817, 6.0972448268, 0.1128312127, 0.0510116686, 
8.2443939681, 7.9008082426, 1.2980961622, 1.7825174014, 0.0020001533, 
0.052831508, 6.4719931061, 0.3984363964, 4.9162077343, 4.9778791724, 
1.3393682188, 3.4896459968, 5.1819963986, 4.1931249754, 3.2477636975, 
4.0744121993, 4.2821989681, 2.6617581347, 2.2814835704), TTTfrom = c(6206, 
6206, 2420, 2420, 2420, 2420, 2420, 2420, 6188, 6019, 6188, 6188, 
6188, 6183, 6180, 6188, 6019, 2420, 2420, 2420, 2420, 2420, 2420, 
2420, 6205), toTTT = c(6206, 2420, 2420, 2420, 2420, 2420, 2420, 
6188, 6019, 6188, 6188, 6188, 6183, 6183, 6188, 6019, 2420, 2420, 
2420, 2420, 2420, 2420, 2420, 6205, 6030), timebtwpnts = c(20, 
580, 300, 874, 138, 60, 728, 426, 68, 316, 108, 1216, 566, 20, 
300, 300, 82, 218, 300, 284, 300, 300, 300, 168, 132), mins = c(0.333333333333333, 
9.66666666666667, 5, 14.5666666666667, 2.3, 1, 12.1333333333333, 
7.1, 1.13333333333333, 5.26666666666667, 1.8, 20.2666666666667, 
9.43333333333333, 0.333333333333333, 5, 5, 1.36666666666667, 
3.63333333333333, 5, 4.73333333333333, 5, 5, 5, 2.8, 2.2), speedbtwpnts = c(62.840752686, 
58.0062897508966, 55.3181613804, 25.1145095840732, 2.94342294, 
3.060700116, 40.7689811609341, 66.7673935994366, 68.7227379988235, 
20.3071602691139, 0.0666717766666667, 0.156409069736842, 41.1646204628269, 
71.718551352, 58.9944928116, 59.7345500688, 58.8015315570732, 
57.6271815985321, 62.1839567832, 53.1522884205634, 38.97316437, 
48.8929463916, 51.3863876172, 57.037674315, 62.2222791927273), 
    stopmove = c(1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), .Names = c("truckid", "readdate_from", 
"route_from", "to_readdate", "to_route", "distance_travelled", 
"TTTfrom", "toTTT", "timebtwpnts", "mins", "speedbtwpnts", "stopmove"
), row.names = c(NA, 25L), class = "data.frame")

2 个答案:

答案 0 :(得分:2)

第一个理智修复:

res <- c(build$mins[1], 2:length(build$pretrip))
same_trip <- c(FALSE, (build$pretrip[-1] == build$pretrip[-nrow(build)]) )
cond1 <- c(FALSE, (build$stopmove[-nrow(build)] == 1) & (build$stopmove[-1] == 0) ) )
cond2 <- ...

for (i in 1:length(build$pretrip)) {

     if( same_trip[i] ) {
        if( cond1[i] ) {
           res[i] <- build$mins[i]
        } else if (cond2[i]) {
           res[i] <- round(res[i-1] + build$mins[i], 5)
        } else {
           res[i] <- 0
        }
     } else {
        if( build$stopmove[i] == 0 ) {
           res[i] <- build$mins[i]
        } else {
           res[i] <- 0
        }
     }
}
build$timestopped <- res

现在我们实际上可以看到难度/问题,在res [i]&lt; - round(res [i-1] + build $ mins [i],5)你需要之前的值,在所有其他情况下你可以在向量上使用ifelse函数,因此我的建议是:

same_trip <- c(FALSE, (build$pretrip[-1] == build$pretrip[-nrow(build)]) )
cond1 <- c(FALSE, (build$stopmove[-nrow(build)] == 1) & (build$stopmove[-1] == 0) ) )
cond2 <- ...

res <- ifelse( (same_trip & cond1) | (build$stopmove[i] == 0),
                build$mins,
                0)

for (i in 1:length(build$pretrip)) {
   if( same_trip[i] && cond2[i]) {
      res[i] <- round(res[i-1] + build$mins[i], 5)
   }
}
build$timestopped <- res

如果您知道连续两次cond2不能为真,那么您也可以将其转换为向量操作,否则您通常会遇到它。 (可能有一个cumsum hack,但我不这么认为)

修改

我认为最好的解决方案是将for循环减少到仅仅覆盖你需要的情况:

for (i in (1:length(build$pretrip))[same_trip & cond2]  ) {
    res[i] <- round(res[i-1] + build$mins[i], 5)
}

我认为不可能有更快(并且仍然易于理解)的解决方案。

答案 1 :(得分:0)

我相信你根本不需要循环。

当第i行和第(i - 1)行具有相同的卡车ID时,向量cond将为TRUE,否则将为FALSE。然后,您不必迭代cond,只需使用

res <- ifelse(cond, c(0, res[1:(nrow(res)-1)]), res)

ifelse()适用于矢量。

0用作该向量中的第一个元素,因为res[0]未定义(所以cond[1]FALSEundefined)但是如果我们使用{{1}该产品也是NA

对于NA的等效行为res的任何行,以及cond的所有行,TRUE的前一个值应返回res 1}}是cond