我必须在一个字段中的36个地块上分配12种重复的3种处理方式。 每个地块都有固定的环境变量。
我如何找到最佳的治疗方案分配,以使每种治疗方案的环境变量的均值和标准差尽可能相等?
一些虚拟数据:
df1 <- data.frame(plot= seq(1, 36, by=1),
treatment =rep(1:3),
enviroment=rnorm(36, 9300, 3000))
head(df1)
plot var environment
1 1 1 8594.994
2 2 2 9737.577
3 3 3 8111.464
4 4 1 8367.244
5 5 2 5965.046
6 6 3 7664.372
基本上这只是蛮力的,但是我想我的问题对于这种方法是组合的或沉重的。总结一下,我想使每次治疗的平均值和标准偏差最小。查看Boxplots进行澄清。我希望这至少是一个有趣的问题,并且希望能有任何想法解决该问题!谢谢。
library(dplyr)
# Boxplot by var to show distribution before optimization
mean <- df1 %>% group_by(var) %>% summarise(mean=mean(environment))
sd <- df1 %>% group_by(var) %>% summarise(sd=sd(environment)) %>% summarise(sum_sd=sum(sd))
boxplot(environment~var,data=df1, main=paste("Before Optimization Sum-SD:",round(sd),"Diff_means:",round(abs(abs(mean[1,2] - mean[2,2])+abs(mean[1,2] - mean[3,2])))),
xlab="treatment", ylab="environment")
# Run the randomization process n-times
for(i in 1:10000000) {
# Randomize treatment position over experimnetal units
df1[,c("var")] <- df1[sample(nrow(df1)),c("var")]
# Calculate Mean for each treatment
mean <- df1 %>% group_by(var) %>% summarise(mean=mean(environment))
# Calculate Sum of SD for all machines
sd <- df1 %>% group_by(var) %>% summarise(sd=sd(environment)) %>% summarise(sum_sd=sum(sd))
# First run,save initial values
if (i == 1) {
# Save the difference of the means
var_1 <- abs(abs(mean[1,2] - mean[2,2])+abs(mean[1,2] - mean[3,2]))
# Save summed standard deviations
sd_1 <- df1 %>% group_by(var) %>% summarise(sd=sd(environment)) %>% summarise(sum_sd=sum(sd))
}
# if randomized values are better then initial values save them
if(var_1 > abs(abs(mean[1,2] - mean[2,2])+abs(mean[1,2] - mean[3,2]))&
(sd_1 >sd)) {
# save better values
var_1 <- abs(abs(mean[1,2] - mean[2,2])+abs(mean[1,2] - mean[3,2]))
sd_1 <- sd
# save improved experimental design
best_setup <- df1
# give visual output of better design
boxplot(environment~var,data=best_setup, main=paste("nth Run:",i,"Sum-SD:",round(sd),"Diff_means:",round(abs(abs(mean[1,2] - mean[2,2])+abs(mean[1,2] - mean[3,2])))),
xlab="var", ylab="environment")
}
}
# give visual output of best found experimental design
boxplot(environment~var,data=best_setup, main=paste("After Optimization Sum-SD:",round(sd),"Diff_means:",round(abs(abs(mean[1,2] - mean[2,2])+abs(mean[1,2] - mean[3,2])))),
xlab="var", ylab="environment")