我是一个相对较新的R程序员,并编写了一个脚本,该脚本获取了一些统计结果,并最终将其与目标变量随机化的结果向量进行比较。结果向量包含n次模拟的统计结果。随着模拟次数的增加(我想至少运行10,000次模拟),运行时间比我想要的要长。我已经尝试过以我修改代码的方式提高性能,但是在优化代码时会喜欢其他人的帮助。代码的相关部分如下。
#CREATE DATA
require(plyr)
Simulations <- 10001
Variation <- c("Control", "A", "B","C")
Trials <- c(727,724,723,720)
NonResponse <- c(692,669,679,682)
Response <- c(35,55,44,38)
ConfLevel <- .95
#PERFORM INITIAL CALCS
NonResponse <- Trials-Response
Data <-data.frame(Variation, NonResponse, Response, Trials)
total <- ddply(Data,.(Variation),function(x){data.frame(value = rep(c(0,1),times = c(x$NonResponse,x$Response)))})
total <- total[sample(1:nrow(total)), ]
colnames(total) <- c("Variation","Response")
#CREATE FUNCTION TO PERFORM SIMULATIONS
targetshuffle <- function(x)
{
shuffle_target <- x[,"Response"]
shuffle_target <- data.frame(sample(shuffle_target))
revised <- cbind(x[,"Variation"], shuffle_target)
colnames(revised) <- c("Variation","Yes")
yes_variation <- data.frame(table(revised$Yes,revised$Variation))
colnames(yes_variation) <- c("Yes","Variation","Shuffled_Response")
Shuffled_Data <- subset(yes_variation, yes_variation$Yes==1)
Shuffled_Data <- Shuffled_Data[match(Variation, Shuffled_Data$Variation),]
yes_variation <- cbind(Data,Shuffled_Data)
VectorPTest_All <- yes_variation[,c("Variation","NonResponse","Response","Trials","Shuffled_Response")]
Control_Only <- yes_variation[yes_variation$Variation=="Control",]
VectorPTest_Chall <- subset(yes_variation,!(Variation=="Control"))
VectorPTest_Chall <- VectorPTest_Chall[,c("Variation","NonResponse","Response","Trials","Shuffled_Response")]
ControlResponse <- Control_Only$Response
ControlResponseRevised <- Control_Only$Shuffled_Response
ControlTotal <- Control_Only$Trials
VariationCount <- length(VectorPTest_Chall$Variation)
VP <- data.frame(c(VectorPTest_Chall,rep(ControlResponse),rep(ControlResponseRevised),rep(ControlTotal)))
names(VP) <- c("Variation","NonResponse","Response", "Trials", "ResponseShuffled", "ControlReponse",
"ControlResponseShuffled","ControlTotal")
VP1 <<- data.frame(VP[,c(5,7,4,8)])
VP2 <<- data.frame(VP[,c(3,6,4,8)])
ptest <- apply(VP1, 1, function(column) prop.test(x=c(column[1], column[2]),
n=c(column[3], column[4]), alternative="two.sided",
conf.level=ConfLevel, correct=FALSE)$p.value)
min_p_value <- min(ptest)
return(min_p_value)
}
#CALL FUNCTION
sim_result <- do.call(rbind, rlply(Simulations, targetshuffle(total)))
答案 0 :(得分:1)
另外,要注意的一件事是创建所有数据框。每次执行此操作时,您都要复制组成对象中的所有数据。如果维度是可预测的,您可以考虑在函数的开头创建空矩阵并随时填充它们。