Question

我编写了代码来绘制A / B测试变体的密度数据。我想通过阴影（填充稍微透明）改善每条曲线下方的区域。我目前正在使用matplot，但了解ggplot可能是更好的选择。

有什么想法吗？感谢。

# Setup data frame - these are results from an A/B experiment
conv_data = data.frame(
            VarNames = c("Variation 1", "Variation 2", "Variation 3") # Set variation names
            ,NumSuccess = c(1,90,899) # Set number of successes / conversions
            ,NumTrials = c(10,100,1070) # Set number of trials
            )
conv_data$NumFailures = conv_data$NumTrials - conv_data$NumSuccess # Set number of failures [no conversions]
num_var = NROW(conv_data) # Set total number of variations
plot_col = rainbow(num_var) # Set plot colors

get_density_data <- function(n_var, s, f) {
    x = seq(0,1,length.out=100) # 0.01,0.02,0.03...1
    dens_data = matrix(data = NA, nrow=length(x), ncol=(n_var+1))
    dens_data[,1] = x

    # set density data
    for(j in 1:n_var) {
        # +1 to s[], f[] to ensure uniform prior
        dens_data[,j+1] = dbeta(x, s[j]+1, f[j]+1)
    }
    return(dens_data)
}

density_data = get_density_data(num_var, conv_data$NumSuccess, conv_data$NumFailures)

matplot(density_data[,1]*100, density_data[,-1], type = "l", lty = 1, col = plot_col, ylab = "Probability Density", xlab = "Conversion Rate %", yaxt = "n")
legend("topleft", col=plot_col, legend = conv_data$VarNames, lwd = 1)

这会产生以下情节： enter image description here

Answer 1

# Setup data frame - these are results from an A/B experiment
conv_data = data.frame(
  VarNames = c("Variation 1", "Variation 2", "Variation 3") # Set variation names
  ,NumSuccess = c(1,90,899) # Set number of successes / conversions
  ,NumTrials = c(10,100,1070) # Set number of trials
)
conv_data$NumFailures = conv_data$NumTrials - conv_data$NumSuccess # Set number of failures [no conversions]
num_var = NROW(conv_data) # Set total number of variations
plot_col = rainbow(num_var) # Set plot colors

get_density_data <- function(n_var, s, f) {
  x = seq(0,1,length.out=100) # 0.01,0.02,0.03...1
  dens_data = matrix(data = NA, nrow=length(x), ncol=(n_var+1))
  dens_data[,1] = x

  # set density data
  for(j in 1:n_var) {
    # +1 to s[], f[] to ensure uniform prior
    dens_data[,j+1] = dbeta(x, s[j]+1, f[j]+1)
  }
  return(dens_data)
}

density_data = get_density_data(num_var, conv_data$NumSuccess, conv_data$NumFailures)

matplot(density_data[,1]*100, density_data[,-1], type = "l",
        lty = 1, col = plot_col, ylab = "Probability Density",
        xlab = "Conversion Rate %", yaxt = "n")
legend("topleft", col=plot_col, legend = conv_data$VarNames, lwd = 1)

## and add this part
for (ii in seq_along(plot_col))
  polygon(c(density_data[, 1] * 100, rev(density_data[, 1] * 100)),
          c(density_data[, ii + 1], rep(0, nrow(density_data))),
          col = adjustcolor(plot_col[ii], alpha.f = .25))

enter image description here

Answer 2

能够回答自己的问题：

df = as.data.frame(t(conversion_data))
dfs = stack(df)
ggplot(dfs, aes(x=values)) + geom_density(aes(group=ind, colour=ind, fill=ind), alpha=0.3)

r密度图 - 曲线下的填充区域

2 个答案: