有效地编写代码而不是使用for循环

时间:2017-08-16 13:56:24

标签: r

以下是玩具示例。实际上,我将模拟6000次蒙特卡罗复制的数据并计算每次复制的St,并且在每次复制中l的长度将很大。如何有效地编写代码以减少运行时间。

time <- c(6,6,6,6,7,9,10,10,11,13,16,17,19,20,22,23,25,32,32,34,35)
cens <- c(1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0)
l <- length(time)

n <- NULL
d <- NULL
St <- NULL

n[1] <- sum(time[1]<=time)
d[1] <- sum(time==time[1] & cens==1)
St[1] <- (n[1]-d[1])/n[1]

for(i in 2:l){

   if(time[i]==time[i-1]){

      n[i]  <- n[i-1]
      d[i]  <- d[i-1]
      St[i] <- St[i-1]

 } else{

    n[i] <- sum(time[i]<=time)
    d[i] <- sum(time==time[i] & cens==1)
    St[i] <- St[i-1] * ((n[i]-d[i])/n[i])
 }

 }# end of for loop

 fit <- data.frame(ti=time, ni=n, di=d, St )

编辑:生成数据

set.seed(5)

l <- 500
time <- round(runif(l,3,38))
cens <- round(runif(l,0,1))

n <- NULL
d <- NULL
St <- NULL

n[1] <- sum(time[1]<=time)
d[1] <- sum(time==time[1] & cens==1)
St[1] <- (n[1]-d[1])/n[1]

for(i in 2:l){

   if(time[i]==time[i-1]){

      n[i]  <- n[i-1]
      d[i]  <- d[i-1]
      St[i] <- St[i-1]

 } else{

    n[i] <- sum(time[i]<=time)
    d[i] <- sum(time==time[i] & cens==1)
    St[i] <- St[i-1] * ((n[i]-d[i])/n[i])
 }

 }# end of for loop

 fit <- data.frame(ti=time, ni=n, di=d, St )

2 个答案:

答案 0 :(得分:4)

你应该避免循环,并在编译(矢量化)代码中尽可能多地做。以下内容应该相当快,因为​​它是矢量化的:

library(data.table)
library(zoo)

DT <- data.table(time, cens)

#sum cens by time, this is why I use data.table but you could also use dplyr
DT[, d := sum(cens == 1L), by = time]

#calculate n and St
DT[, c("n", "St") := {
 #find time changes
 dn <- c(TRUE, diff(time) > 0)
 #calculate remaining length for time changing points
 nt <- length(time) - which(dn) + 1
 #vector of NA values
 n <- rep(NA, length(time))
 #fill in nt values
 n[dn] <- nt
 #vector of NA values
 St <- rep(NA, length(time))
 #fill in St values for time change points
 St[dn] <- cumprod(((n - d) / n)[dn])
 #last observation carried forward
 list(na.locf(n), na.locf(St))
}]
#     time cens d  n        St
#  1:    6    1 3 21 0.8571429
#  2:    6    1 3 21 0.8571429
#  3:    6    1 3 21 0.8571429
#  4:    6    0 3 21 0.8571429
#  5:    7    1 1 17 0.8067227
#  6:    9    0 0 16 0.8067227
#  7:   10    1 1 15 0.7529412
#  8:   10    0 1 15 0.7529412
#  9:   11    0 0 13 0.7529412
# 10:   13    1 1 12 0.6901961
# 11:   16    1 1 11 0.6274510
# 12:   17    0 0 10 0.6274510
# 13:   19    0 0  9 0.6274510
# 14:   20    0 0  8 0.6274510
# 15:   22    1 1  7 0.5378151
# 16:   23    1 1  6 0.4481793
# 17:   25    0 0  5 0.4481793
# 18:   32    0 0  4 0.4481793
# 19:   32    0 0  4 0.4481793
# 20:   34    0 0  2 0.4481793
# 21:   35    0 0  1 0.4481793
#     time cens d  n        St

答案 1 :(得分:1)

myF <- function(x) {
  require(data.table)

  d <- data.table(time, cens)

  tt <- function(x, y)sum(x <= y)
  tt <- Vectorize(tt, vectorize.args = "x")
  d[, nnew := tt(time, time)]

  tt2 <- function(x, y, cens)sum(x == y & cens == 1)
  tt2 <- Vectorize(tt2, vectorize.args = "x")
  d[, dnew := tt2(time, time, cens)]

  d[, multi := (nnew - dnew) / nnew]
  d[duplicated(time), multi := 1]
  d[, St := cumprod(multi)]
  d[, multi := NULL][, cens := NULL]
  setnames(d, "time", "ti")
  setnames(d, "nnew", "ni")
  setnames(d, "dnew", "di")
  d[]
}
> myF()
    ti ni di        St
 1:  6 21  3 0.8571429
 2:  6 21  3 0.8571429
 3:  6 21  3 0.8571429
 4:  6 21  3 0.8571429
 5:  7 17  1 0.8067227
 6:  9 16  0 0.8067227
 7: 10 15  1 0.7529412
 8: 10 15  1 0.7529412
 9: 11 13  0 0.7529412
10: 13 12  1 0.6901961
11: 16 11  1 0.6274510
12: 17 10  0 0.6274510
13: 19  9  0 0.6274510
14: 20  8  0 0.6274510
15: 22  7  1 0.5378151
16: 23  6  1 0.4481793
17: 25  5  0 0.4481793
18: 32  4  0 0.4481793
19: 32  4  0 0.4481793
20: 34  2  0 0.4481793
21: 35  1  0 0.4481793
    ti ni di        St
> all.equal(fit, as.data.frame(myF()))
[1] TRUE