请任何人帮助我让这个功能更快地运行:
st=c(0 ,0, 9,39,44 ,100, 0, 0, 8,26 ,100, 0, 0, 6, 9,16,20,24,29,35,37,47,54,73 ,100, 0, 0, 6,35,44 ,100, 0, 0,10,16,27,40,51,91, 100, 0, 0,3, 7,28,69,71,75, 100, 0, 0,19 ,100, 0, 0, 7,24,29,35 ,100, 0, 0, 8,11,14,15,18,31,32,33,50,53,56,62,79,80,82,87,88,89, 100, 0, 0, 2,7,31,34,40 ,100, 0, 0,10,41,51,76 ,100, 0, 0, 4,32,41,46 ,100, 0, 0,19,26,59,76,83,88,92 ,100, 0, 0,11,27,51, 100, 0, 0, 5, 7,45,56,78,3 ,100, 0, 0, 3,12,23,46,53,72 ,100)
id=c(1:length(st))
data=cbind(id,st)
a=gRbase::combnPrim(c(0:100), 4, simplify = T)
c=a[,a[1,]==0]
d=c[,c[4,]==100]
list_d=c(unname(as.data.frame(d)))
p=c()
f=function(dataf,dec,...){
cc<-vector("list", 3)
dataf=data.frame(dataf)
for(j in 1:3){
cc[[j]] <-c(dec[j],dec[j+1]);
for(k in 1:nrow(dataf)){
if(round(dataf[k,"st"],digits=3)>= round(cc[[j]][1],digits=3) && round(dataf[k,"st"],digits=3) <= round(cc[[j]][2],digits=3)){
dataf[k,"p"]=j
}
}
}
return(dataf)
}
l=vector("list",length(list_d))
library(doSNOW)
cluster = makeCluster(4, type = "SOCK")
registerDoSNOW(cluster)
system.time(l<-foreach(i= 1:length(list_d)) %dopar% f(data,list_d[[i]]))
stopCluster(cluster)
这是分析的结果:
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
答案 0 :(得分:1)
据我所知,你的函数确定某个值在哪个区间内,dataf $ st包含值,dec包含区间的值。
在这种情况下,这有效:
# original function
f=function(dataf,dec,...){
cc<-vector("list", 3)
dataf=data.frame(dataf)
for(j in 1:3){
cc[[j]] <-c(dec[j],dec[j+1]);
for(k in 1:nrow(dataf)){
if(round(dataf[k,"st"],digits=3)>= round(cc[[j]][1],digits=3) && round(dataf[k,"st"],digits=3) <= round(cc[[j]][2],digits=3)){
dataf[k,"p"]=j
}
}
}
return(dataf)
}
# my proposal
f2=function(dataf,dec,...){
dec <- round(dec,digits=3)
dataf=data.frame(dataf)
dataf$p <- sapply(round(dataf$st,digits=3), function(x){match(TRUE,x<dec)-1})
dataf$p[dataf$p==0] <- NA
return(dataf)
}
dec <- 1:4
df1 <- data.frame(st = sample(1:50000,1000)/10000,stringsAsFactors=FALSE)
original <- f(df1,dec)
new <- f2(df1,dec)
all.equal(original,new) # TRUE, results are the same
# now let's check the speed
library(microbenchmark)
microbenchmark(original = f(df1,dec),
new = f2(df1,dec),
times = 10)
# Unit: milliseconds
# expr min lq mean median uq max neval
# original 82.025627 82.600381 84.834773 84.059917 86.214891 89.83188 10
# new 2.115691 2.142234 2.234287 2.192843 2.289462 2.49650 10
快40倍