通过找到最佳排列来优化数据框中组的均值和标准差

时间:2018-10-03 14:15:17

标签: r optimization mean standard-deviation nonlinear-optimization

上下文

我必须在一个字段中的36个地块上分配12种重复的3种处理方式。 每个地块都有固定的环境变量。

问题

我如何找到最佳的治疗方案分配,以使每种治疗方案的环境变量的均值和标准差尽可能相等?

数据

一些虚拟数据:

 df1 <- data.frame(plot= seq(1, 36, by=1),
              treatment =rep(1:3),
              enviroment=rnorm(36, 9300, 3000))
head(df1)
    plot var environment
1    1   1    8594.994
2    2   2    9737.577
3    3   3    8111.464
4    4   1    8367.244
5    5   2    5965.046
6    6   3    7664.372

我的幼稚方法

基本上这只是蛮力的,但是我想我的问题对于这种方法是组合的或沉重的。总结一下,我想使每次治疗的平均值和标准偏差最小。查看Boxplots进行澄清。我希望这至少是一个有趣的问题,并且希望能有任何想法解决该问题!谢谢。

library(dplyr)    
# Boxplot by var to show distribution before optimization
    mean  <- df1 %>% group_by(var) %>% summarise(mean=mean(environment))
    sd    <- df1 %>% group_by(var) %>% summarise(sd=sd(environment)) %>% summarise(sum_sd=sum(sd))
    boxplot(environment~var,data=df1, main=paste("Before Optimization Sum-SD:",round(sd),"Diff_means:",round(abs(abs(mean[1,2] - mean[2,2])+abs(mean[1,2] - mean[3,2])))), 
            xlab="treatment", ylab="environment")

    # Run the randomization process n-times
    for(i in 1:10000000) {
      # Randomize treatment position over experimnetal units
      df1[,c("var")] <- df1[sample(nrow(df1)),c("var")]
      # Calculate Mean for each treatment
      mean <- df1 %>% group_by(var) %>% summarise(mean=mean(environment))
      # Calculate Sum of SD for all machines
      sd    <- df1 %>% group_by(var) %>% summarise(sd=sd(environment)) %>% summarise(sum_sd=sum(sd))
      # First run,save initial values
      if (i == 1) {
        # Save the difference of the means
        var_1 <- abs(abs(mean[1,2] - mean[2,2])+abs(mean[1,2] - mean[3,2]))
        # Save summed standard deviations
        sd_1  <- df1 %>% group_by(var) %>% summarise(sd=sd(environment)) %>% summarise(sum_sd=sum(sd))
      }
      # if randomized values are better then initial values save them
      if(var_1 > abs(abs(mean[1,2] - mean[2,2])+abs(mean[1,2] - mean[3,2]))&
         (sd_1 >sd)) {
        # save better values
        var_1 <-  abs(abs(mean[1,2] - mean[2,2])+abs(mean[1,2] - mean[3,2]))
        sd_1  <- sd
        # save improved experimental design
        best_setup <- df1
        # give visual output of better design
        boxplot(environment~var,data=best_setup, main=paste("nth Run:",i,"Sum-SD:",round(sd),"Diff_means:",round(abs(abs(mean[1,2] - mean[2,2])+abs(mean[1,2] - mean[3,2])))), 
                xlab="var", ylab="environment")
      }
    }
    # give visual output of best found experimental design 
    boxplot(environment~var,data=best_setup, main=paste("After Optimization Sum-SD:",round(sd),"Diff_means:",round(abs(abs(mean[1,2] - mean[2,2])+abs(mean[1,2] - mean[3,2])))), 
            xlab="var", ylab="environment")

0 个答案:

没有答案