我跟随这篇关于"网球普通对手"的文章,我的目标是以最有效的方式编写脚本。下面你可以找到我的代码,但速度很慢。为了计算1匹配的结果,我的笔记本电脑花费了120秒或更少,我有一个150k行的数据集来计算。
文章:https://core.ac.uk/download/pdf/82518495.pdf
需要您的帮助来清理和改进我的代码。任何建议都表示赞赏
tableA:https://1drv.ms/u/s!At-ZKKnf0H4jafxCX96NLxu00nc
tableB:https://1drv.ms/u/s!At-ZKKnf0H4javHgoPjzfCMXhg4
data_tennis_co:https://1drv.ms/u/s!At-ZKKnf0H4jaJyNkYrr8muff8k
data_tennis_co = read.table("test_co.csv", header=FALSE, sep=",", fill = TRUE)
A = read.table("tableA.csv", header=FALSE, sep=",", fill = TRUE)
B = read.table("tableB.csv", header=FALSE, sep=",", fill = TRUE)
#BASIC FUNCTIONS
G<-function(p){res<- p^4*(15-4*p-((10*p^2)/(1-2*p*(1-p))))}
d<- function(p,q) {res<- p*q*(1-(p*(1-q)+(1-p)*q))^-1}
TB <- function(p,q) {res <- foreach(i = seq_along(1:28), .combine = sum) %dopar% {tb<-A[i,1]*p^A[i,2]*(1-p)^A[i,3]*q^A[i,4]*(1-q)^A[i,5]*d(p,q)^A[i,6]}}
S <- function(p,q) {res <- foreach(i = seq_along(1:21), .combine = rbind) %dopar% {s<-B[i,1]*G(p)^B[i,2]*(1-G(p))^B[i,3]*G(q)^B[i,4]*(1-G(q))^B[i,5]*(G(p)*G(q)+(G(p)*(1-G(q))+(1-G(p))*G(q))*TB(p,q))^B[i,6]} sum(res)}
M3 <- function(p,q) {res <- S(p,q)^2*(1+2*(1-S(p,q)))}
DELTA_AB <- function(spwAC,rpwAC,spwBC,rpwBC) {res <- (spwAC-(1-rpwAC))-(spwBC-(1-rpwBC))}
PR<- function(spwAC,rpwAC,spwBC,rpwBC) {res <- (M3(0.6+DELTA_AB(spwAC,rpwAC,spwBC,rpwBC),(1-0.6))+M3(0.6,(1-(0.6-DELTA_AB(spwAC,rpwAC,spwBC,rpwBC)))))/2}
#COMMON OPPONENTS
MAL<-function(id1,id2){
prova<- subset(data_tennis_co, V3 == 1 & V4==2)
previous<-subset(data_tennis_co, V2 < prova$V2)
s1 <- subset(previous, V3 == 1 | V4==1)
s2 <- subset(previous, V3 ==2 | V4==2)
s1$opp <- ifelse(s1$V3==1, s1$V4, s1$V3)
s2$opp <- ifelse(s2$V3==2, s2$V4, s2$V3)
inn<- intersect(s1$opp,s2$opp)
common1<-s1[s1$opp %in% inn,]
common2<-s2[s2$opp %in% inn,]
# fare media se id non unico
COM <- merge(common1, common2,by=c("opp"))
COM$OMALLEY <- unlist(mapply(PR, COM$V5.x, COM$V6.x, COM$V7.y, COM$V8.y))
COM$OMALLEY[is.nan(COM$OMALLEY)] <- 0.5
return(tryCatch(sum(COM$OMALLEY)/nrow(COM), error=function(e) NaN))
}
tic()
RESA<-MAL(1,2)
toc()