我构建了一个闪亮的网络APP,该界面具有不错的界面,可以计算出区分测试组所需的样本量。我想建立一个可视化电源工作原理的模拟,我的理解是,电源是将测试组彼此区分开的概率。使用内置的功率计算器,我发现当%CV = 15,%Improvement = 20和power = 80%时,我需要进行10次重复来区分2个样本组。当我尝试使用随机数生成器对此进行仿真时,我平均获得40-55%的幂。请参见下面的代码。
cv <- 15 #coefficient of variance is the standard deviation divided by the mean
percent_increase <- 20 #percent increase to detect
mean1 <- 40
mean2 <- mean1 + (mean1*(percent_increase/100))
sd1 <- (cv*mean1)/100
sd2 <- (cv*mean2)/100
pooled_sd <- sqrt((sd1^2 + sd2^2)/2)
difference <- (percent_increase/100)*mean1
pwrt <- power.t.test(delta=difference,sd=sd1,power=0.8,sig.level = .05, type="two.sample", alternative = "two.sided")
print(paste("Number of replicates needed is", pwrt$n))
#Simulate sample populations and tag which samples are different from each other.
record_test <- c()
for(i in 1:1000){
rep_sequence <- seq(2,50, by=4)
data_list <- list() # empty data list
for(r in 1:length(rep_sequence)){
d <- rnorm(rep_sequence[r], mean = mean1, sd = pooled_sd)
d2 <- rnorm(rep_sequence[r], mean = mean2, sd = pooled_sd)
df <- data.frame("value"=d, "sample"="Sample 1")
df2 <- data.frame("value"=d2, "sample"="Sample 2")
df3 <- rbind(df, df2)
df3$rep_n <- rep_sequence[r]
data_list[[r]] <- df3
}
all_data <- do.call(rbind, data_list)
all_data_summ <- all_data %>%
group_by(sample, rep_n) %>%
summarise(N= n(),
mean = mean(value),
sd = sd(value)
) %>%
ungroup() %>%
mutate(se = sd / sqrt(N),
ci_lower = mean - qnorm(0.975)*se,
ci_upper = mean + qnorm(0.975)*se,
#ci_lower = mean - qt(1 - (0.05/2), N -1)* se,
#ci_upper = mean + qt(1 - (0.05/2), N -1)* se,
)
different_tag <- (all_data_summ %>% filter(sample == 'Sample 1') %>% select(N, ci_upper) ) %>% #Sample 1 set
left_join(all_data_summ %>% filter(sample == 'Sample 2') %>% select(N, ci_lower), by=c("N"="N")) %>% #Sample 2 set
rename(ci_upper_s1 = ci_upper, ci_lower_s2 = ci_lower) %>%
mutate(different = ifelse(ci_lower_s2 > ci_upper_s1 ,'yes', 'no'))
all_data_summ1 <- all_data_summ %>%
left_join(different_tag %>% select(N, different), by=c("N"="N"))
replicate_n <- 10 #at n=10 the power should be 80%
test_result <- all_data_summ1[all_data_summ1$rep_n == replicate_n, ]$different[[1]]
record_test <- c(record_test, test_result)
ggplot(all_data_summ1, aes(rep_n, mean)) +
geom_errorbar(aes(x=rep_n, ymin=ci_lower, ymax= ci_upper, group=sample),position=position_dodge(width=1.5), width=1.5,size=1, colour="red") +
geom_point(data= filter(all_data_summ1, different == "yes"), colour="black", size=8,stroke=2, aes(rep_n,mean,group=sample),position=position_dodge(width=1.5), shape=0) +
geom_point(position=position_dodge(width=1.5), size=4, pch=21, aes(fill=sample)) +
scale_x_continuous(breaks = rep_sequence) +
labs(x="Replication", y="Average", title= paste0('Sample Means with 95% Confidence Interval Bars at CV=',cv, '%')) +
theme_gray(base_size = 25) +
theme(plot.title = element_text(hjust = 0.5))
}
print(table(record_test)/length(record_test))