Question

我有这个循环，我必须将其应用于一个非常大的数据集。但它很慢。有人可以提出一些方法来加快速度吗？

###  calculate distance ###
# matrices for results
distsum <- matrix(ncol=1, nrow=nrow(data))
grp1sum <- matrix(ncol=1, nrow=nrow(data))
# loop over each observation 
for (i in 1:nrow(data)) { 
  dist_count <- 0
  grp1_count <- 0
  if (data[i,21]==101 | data[i,21]==155 | data[i,21]==147 | data[i,21]==185) {
    limit <- 5
  } else {
    limit <- 15
  }
  # reset counters
  dist_count <- 0
  grp1_count <- 0
  # loop over all ohter obs
  for (j in 1:nrow(data)) {
    cond_A <- data[i,37]>data[j,38] # check of daterintervals overlap. e.i if the two firms is present on the samw time
    cond_B <- data[i,38]<data[j,27]
    if (data[i,21]==data[j,21] & !(cond_A | cond_B) & data[i,2]==data[j,2]){
      distance <- gcd.hf(data[i,36],data[i,35],data[j,36],data[j,35]) # measure the distance
      if (distance <= limit & distance!=0) {
         dist_count <- dist_count + 1 #count number of physician in limit
         grp1_count <- grp1_count + data[j,5]  #sum number of patients registret
       }
   }
  } 
  distsum[i,] <- dist_count
  grp1sum[i,] <- grp1_count
}

在R中加速双循环

0 个答案: