我编写了代码来绘制A / B测试变体的密度数据。我想通过阴影(填充稍微透明)改善每条曲线下方的区域。我目前正在使用matplot,但了解ggplot可能是更好的选择。
有什么想法吗?感谢。
# Setup data frame - these are results from an A/B experiment
conv_data = data.frame(
VarNames = c("Variation 1", "Variation 2", "Variation 3") # Set variation names
,NumSuccess = c(1,90,899) # Set number of successes / conversions
,NumTrials = c(10,100,1070) # Set number of trials
)
conv_data$NumFailures = conv_data$NumTrials - conv_data$NumSuccess # Set number of failures [no conversions]
num_var = NROW(conv_data) # Set total number of variations
plot_col = rainbow(num_var) # Set plot colors
get_density_data <- function(n_var, s, f) {
x = seq(0,1,length.out=100) # 0.01,0.02,0.03...1
dens_data = matrix(data = NA, nrow=length(x), ncol=(n_var+1))
dens_data[,1] = x
# set density data
for(j in 1:n_var) {
# +1 to s[], f[] to ensure uniform prior
dens_data[,j+1] = dbeta(x, s[j]+1, f[j]+1)
}
return(dens_data)
}
density_data = get_density_data(num_var, conv_data$NumSuccess, conv_data$NumFailures)
matplot(density_data[,1]*100, density_data[,-1], type = "l", lty = 1, col = plot_col, ylab = "Probability Density", xlab = "Conversion Rate %", yaxt = "n")
legend("topleft", col=plot_col, legend = conv_data$VarNames, lwd = 1)
这会产生以下情节:
答案 0 :(得分:1)
# Setup data frame - these are results from an A/B experiment
conv_data = data.frame(
VarNames = c("Variation 1", "Variation 2", "Variation 3") # Set variation names
,NumSuccess = c(1,90,899) # Set number of successes / conversions
,NumTrials = c(10,100,1070) # Set number of trials
)
conv_data$NumFailures = conv_data$NumTrials - conv_data$NumSuccess # Set number of failures [no conversions]
num_var = NROW(conv_data) # Set total number of variations
plot_col = rainbow(num_var) # Set plot colors
get_density_data <- function(n_var, s, f) {
x = seq(0,1,length.out=100) # 0.01,0.02,0.03...1
dens_data = matrix(data = NA, nrow=length(x), ncol=(n_var+1))
dens_data[,1] = x
# set density data
for(j in 1:n_var) {
# +1 to s[], f[] to ensure uniform prior
dens_data[,j+1] = dbeta(x, s[j]+1, f[j]+1)
}
return(dens_data)
}
density_data = get_density_data(num_var, conv_data$NumSuccess, conv_data$NumFailures)
matplot(density_data[,1]*100, density_data[,-1], type = "l",
lty = 1, col = plot_col, ylab = "Probability Density",
xlab = "Conversion Rate %", yaxt = "n")
legend("topleft", col=plot_col, legend = conv_data$VarNames, lwd = 1)
## and add this part
for (ii in seq_along(plot_col))
polygon(c(density_data[, 1] * 100, rev(density_data[, 1] * 100)),
c(density_data[, ii + 1], rep(0, nrow(density_data))),
col = adjustcolor(plot_col[ii], alpha.f = .25))
答案 1 :(得分:0)
能够回答自己的问题:
df = as.data.frame(t(conversion_data))
dfs = stack(df)
ggplot(dfs, aes(x=values)) + geom_density(aes(group=ind, colour=ind, fill=ind), alpha=0.3)