我有这个循环,我必须将其应用于一个非常大的数据集。但它很慢。有人可以提出一些方法来加快速度吗?
### calculate distance ###
# matrices for results
distsum <- matrix(ncol=1, nrow=nrow(data))
grp1sum <- matrix(ncol=1, nrow=nrow(data))
# loop over each observation
for (i in 1:nrow(data)) {
dist_count <- 0
grp1_count <- 0
if (data[i,21]==101 | data[i,21]==155 | data[i,21]==147 | data[i,21]==185) {
limit <- 5
} else {
limit <- 15
}
# reset counters
dist_count <- 0
grp1_count <- 0
# loop over all ohter obs
for (j in 1:nrow(data)) {
cond_A <- data[i,37]>data[j,38] # check of daterintervals overlap. e.i if the two firms is present on the samw time
cond_B <- data[i,38]<data[j,27]
if (data[i,21]==data[j,21] & !(cond_A | cond_B) & data[i,2]==data[j,2]){
distance <- gcd.hf(data[i,36],data[i,35],data[j,36],data[j,35]) # measure the distance
if (distance <= limit & distance!=0) {
dist_count <- dist_count + 1 #count number of physician in limit
grp1_count <- grp1_count + data[j,5] #sum number of patients registret
}
}
}
distsum[i,] <- dist_count
grp1sum[i,] <- grp1_count
}