我有以下代码:
library(dplyr)
Q = 10000
span = 1995:2016
time = rep(span,times = Q, each= Q)
id = rep(1:Q,times=length(span))
s1 = rep(rnorm(Q,0,1),times=length(span))
gdp = rep(rnorm(Q,0,1),times=length(span))
e = rep(rnorm(Q,0,1),times=length(span))
dfA = data.frame(id,time,s1,e,gdp)
mgr = double()
stp = 10
for(K in seq(10,Q,stp)){
gr = double()
for(t in span){
wt1 = dfA %>% filter(time == t-1) %>%
arrange(desc(s1)) %>% mutate(w= s1/gdp)
zt1 = dfA %>% filter(time == t-1) %>% mutate(z1 = log(s1/e))
zt = dfA %>% filter(time == t) %>% mutate(z = log(s1/e))
gt = left_join(zt1,zt,by="name") %>%
mutate(g = z-z1) %>% select(name,g) %>% na.omit()
a = left_join(wt1,gt,by="name") %>% na.omit()
a = a %>% mutate(id = 1:length(a$name)) %>%
filter(id <= Q) %>% mutate(gbar = mean(g)) %>%
filter(id <= K) %>% mutate(sck = g-gbar,
gamma = w*sck)
gr = append(gr, sum(a$gamma))
}
mgr = append(mgr,mean(gr))
}
其中dfA是包含ID变量和时间变量等的数据帧。由于时间变量的范围是1995年至2016年,并且K是带有步骤10的序列,因此我诉诸append()
分别存储gr
和mgr
。问题是计算时间太长。
所以我的问题是:有什么方法可以避免使用append()
来填充向量gr
和mgr
,从而减少计算代码所花费的时间?
答案 0 :(得分:0)
您可以启动具有固定长度的'gr'和'mgr'向量,而不仅仅是将它们初始化为double并让R在每次迭代中对其进行扩展。优点是向量的内存是预先分配的,您不必重新定义整个变量mgr / gr。
## initiate vectors with set length
mgr <- double(length = length(seq(10,Q,stp)))
gr <- double(length = length(1995:2016))
# fill the positions in each iteration
outerIteration <- (K - 10) / stp
innerIteration <- t - 1994
gr[innerIteration] <- sum(a$gamma)
# take the mean for each block of length 21 (2016 - 1995)
mgr[outerIteration] <- mean(gr[(outerIteraion -1)*21 + 1 : outerIteration*21])