Question

所以我的最终目标是绘制一个包含多个 95% 置信区间的图，分为 2 个组，如下例所示：

Text

我找到了这个代码：https://rpubs.com/nati147/703463

但是如何在图中添加分组比较？

<块引用>

编写一个函数‘CI_95’，输入一个样本值向量，并输出该样本的 95% 置信区间。您可以使用 ‘margin_error_95’函数。

Sub FilteredAdvanced()
    Const nValues As Long = 3 'amount of rows you want to find from the end
    
    Dim ar() As Long
    ReDim ar(nValues - 1) As Long
    
    ' apply filter
    With Sheet1
        .AutoFilterMode = False
        .UsedRange.AutoFilter 1, "A123"
        Dim rng As Range
        Set rng = .UsedRange.Columns(1).SpecialCells(xlCellTypeVisible)
        .UsedRange.AutoFilter 'remove filter
    End With
    
    Dim n As Long
    n = nValues - 1

    Dim iArea As Long
    For iArea = rng.Areas.Count To 1 Step -1
        Dim iRow As Long
        For iRow = rng.Areas(iArea).Rows.Count To 1 Step -1
            ar(n) = rng.Areas(iArea).Rows(iRow).Row
            n = n - 1
            If n < 0 Then Exit For
        Next iRow
        If n < 0 Then Exit For
    Next iArea

    Dim j As Long
    For j = 0 To nValues - 1
        Debug.Print ar(j)
    Next
End Sub

<块引用>

编写一个名为‘margin_error_95’的函数，它接受一个向量样本值，并输出 95% 置信度的误差幅度间隔。

CI_95 <- function(sample_vals, sig){
  error <- margin_error_95(sample_vals, sig)
  CI <- mean(sample_vals) + c(-1, 1)*error
}

运行图：

margin_error_95 <- function(sample_vals, sig){
  n <- length(sample_vals)
  mar_err <- 1.96*(sig/sqrt(n))
}

plot_CI_95 <- function(seed){
  B <- 100
  n <- 30
  mu <- 5
  sig <- 1.2
  
  set.seed(seed)
  # extract upper bound of CI's
  
  x_1 <- replicate(B,
                   {samp <- rnorm(n, mean = mu, sd = sig )
                   max(CI_95(samp, sig))
                   }
  )
  
  #extract lower bound of CI's
  
  set.seed(seed)
  
  x_0 <- replicate(B,
                   {samp <- rnorm(n, mean = mu, sd = sig )
                   min(CI_95(samp, sig))
                   }
  )
  
  set.seed(seed)
  
  means <- replicate(B, mean(rnorm(n, mean = mu, sd = sig)))
  
  plot(means, 1:B, pch = 20,
       xlim = c(mu - sig, mu + sig),
       ylim = c(0,B+1),
       xlab = "sample means",
       ylab = "index of the CI",
       main = paste(B, "Confidence intervals")
  )
  
  for (i in 1:B){
    if(between(mu, x_0[i], x_1[i])){
      segments(x_0[i], i, x_1[i], i, lwd = 2) #plot CI's that contain the mean in black
    } else {
      segments(x_0[i], i, x_1[i], i, col = "red", lwd = 2) #plot CI's that don't contain the mean in red
    }
  }
  
  abline(v=mu, col = "blue") #plot a vertical line at the population mean
}

Text

Answer 1

这是一个解决方案，尽管它可能需要根据您的喜好进一步改进。我保留了 plot_CI_95 函数的一般结构，但在不同的组上添加了一个循环。这意味着 mu 和 sig 变量现在必须有多个值，如果您要显示分组差异，则每个组一个值。还有一些颜色和其他图形参数。结果如下所示。

为了避免两个组的间隔重叠，需要调整一些参数。 1) height 中的图 png（增加值使组之间的空间更大，2）offset 参数（可以增加到大约 0.3），或 3）lwd 中的segments 函数（值越小意味着线条越细）。使用 png 或类似函数直接保存图形将允许微调所需的外观。

library(dplyr)

CI_95 <- function(sample_vals, sig){
  error <- margin_error_95(sample_vals, sig)
  CI <- mean(sample_vals) + c(-1, 1)*error
}


margin_error_95 <- function(sample_vals, sig){
  n <- length(sample_vals)
  mar_err <- 1.96*(sig/sqrt(n))
}

png("group_plot.png",height=7,width=3,units = 'in',res=1000)

plot_CI_95 <- function(seed){
  B <- 100
  n <- 30
  # mean and std dev as a vector of values
  # need to have same length
  # assume one value per group
  mu <- c(5,4.5) # group1, group2
  sig <- c(1.2,1) # group1, group2
  offset<- 0.25 # controls point and line offset from nominal value
  # colors for 2 groups
  colsuse  <- c('steelblue','gold')
  
  # loop over groups
  # mu and sig are now indexed by this loop
  for(j in 1:length(mu)){
    # use seed+j to make different random sample for each group
    
    # extract upper bound of CI's
    set.seed(seed+j)
    x_1 <- replicate(B,
                     {samp <- rnorm(n, mean = mu[j], sd = sig[j])
                     max(CI_95(samp, sig[j]))
                     }
    )
    
    #extract lower bound of CI's
    set.seed(seed+j)
    x_0 <- replicate(B,
                     {samp <- rnorm(n, mean = mu[j], sd = sig[j])
                     min(CI_95(samp, sig[j]))
                     }
    )
    
    set.seed(seed+j)
    
    means <- replicate(B, mean(rnorm(n, mean = mu[j], sd = sig[j])))
    
    # for first group, establish the plot
    # for second group, add values to the plot
    # if groups are very different this might need to be modified with the xlim
    if(j == 1){
      plot(means, (1:B)+offset*ifelse(j==1,1,-1), pch = 20,
           xlim = c(mu[j] - sig[j], mu[j] + sig[j]),
           ylim = c(0,B+1),
           xlab = "sample means",
           ylab = "index of the CI",
           main = paste(B, "Confidence intervals"),
           col=colsuse[j]
      )
    }else{
      points(means, (1:B)+offset*ifelse(j==1,1,-1), pch = 20,
           col=colsuse[j])
    }
 
    for (i in 1:B){
      if(between(mu[j], x_0[i], x_1[i])){
        segments(x_0[i], i+offset*ifelse(j==1,1,-1), x_1[i], i+offset*ifelse(j==1,1,-1), col=colsuse[j], lwd = 1) #plot CI's that contain the mean in black
      } else {
        segments(x_0[i], i+offset*ifelse(j==1,1,-1), x_1[i], i+offset*ifelse(j==1,1,-1), col = "red", lwd = 1) #plot CI's that don't contain the mean in red
      }
    }
    
    abline(v=mu[j], col = colsuse[j]) #plot a vertical line at the population mean
  }
  
  par(xpd=F)
  # legend for the groups
  legend("topright",legend = c('Male','Female'),lty=1,col=colsuse,cex=0.5)
}

plot_CI_95(1)

dev.off()

垂直 95% 置信区间图 2 组比较

1 个答案: