ggplot2 boxplot:中位数的水平条?

时间:2011-11-25 12:17:16

标签: r ggplot2 boxplot

我想通过在中位数处添加一个粗条来使ggplot2 boxplot更有意义(因此,如果中位数等于下四分位数或上四分位数中的任何一个,则可以检测到它相等)。我最近发现了一个Kohske的帖子: Can I get boxplot notches in ggplot2? 但我不知道如何给“横杆”一个“高度”。然后我试了一下 使用矩形但它也不起作用。这是一个最小的例子:

require(ggplot2) 
require(reshape2) 
require(plyr) 
set.seed(1) 
## parameters 
p1 <- c(5, 20, 100) 
p2 <- c("f1", "f2", "f3", "f4", "f5") 
p3 <- c("g1","g2","g3","g4","g5") 
N <- 1000 
## lengths 
l1 <- length(p1) 
l2 <- length(p2) 
l3 <- length(p3) 
## build result array containing the measurements 
arr <- array(rep(NA, l1*l2*l3*N), dim=c(l1, l2, l3, N), 
         dimnames=list( 
         p1=p1, 
         p2=p2, 
         p3=p3, 
         N=1:N)) 
for(i in 1:l1){ 
    for(j in 1:l2){ 
        for(k in 1:l3){ 
            arr[i,j,k,] <- i+j+k+runif(N, min=-4, max=4) 
        } 
    } 
} 

arr <- arr + rexp(3*5*5*N) 
## create molten data 
mdf <- melt(arr, formula = . ~ p1 + p2 + p3 + N) # create molten data frame 
## confidence interval calculated by `boxplot.stats` 
f <- function(x){ 
    ans <- boxplot.stats(x) 
    data.frame(x=x, y=ans$stats[3], ymin=ans$conf[1], ymax=ans$conf[2]) 
} 

## (my poor) trial 
ggplot(mdf, aes(x=p3, y=value)) + geom_boxplot(outlier.shape=1) + 
stat_summary(fun.data=f, geom="rectangle", colour=NA, fill="black", 
xmin=x-0.36, xmax=x+0.36, ymin=max(y-0.2, ymin), ymax=min(y+0.2, 
ymax)) + facet_grid(p2 ~ p1, scales = "free_y") 


**SOLUTION** (after the discussion with Kohske below):
f <- function(x, height){
    ans <- median(x)
    data.frame(y=ans, ymin=ans-height/2, ymax=ans+height/2)
}
p <- ggplot(mdf, aes(x=p3, y=value)) + geom_boxplot(outlier.shape=1) +
stat_summary(fun.data=f, geom="crossbar", height=0.5, colour=NA,
         fill="black", width=0.78) +
facet_grid(p2 ~ p1, scales = "free_y")
pdf()
print(p)
dev.off()

**UPDATE** Hmmm... it's not that trivial. The following example shows that the "height" of the crossbar should be adapted to the y-axis scale, otherwise it might be overseen.

require(ggplot2)
require(reshape2)
require(plyr)
set.seed(1)
## parameters
p1 <- c(5, 20, 100)
p2 <- c("f1", "f2", "f3", "f4", "f5")
p3 <- c("g1","g2","g3","g4","g5")
N <- 1000
## lengths
l1 <- length(p1)
l2 <- length(p2)
l3 <- length(p3)
## build result array containing the measurements
arr <- array(rep(NA, l1*l2*l3*N), dim=c(l1, l2, l3, N),
     dimnames=list(
     p1=p1,
     p2=p2,
     p3=p3,
     N=1:N))
for(i in 1:l1){
    for(j in 1:l2){
        for(k in 1:l3){
            arr[i,j,k,] <- i+j^4+k+runif(N, min=-4, max=4)
        }
    } 
}
arr <- arr + rexp(3*5*5*N)
arr[1,2,5,] <- arr[1,2,5,]+30
arr[1,5,3,] <- arr[1,5,3,]+100

## create molten data
mdf <- melt(arr, formula = . ~ p1 + p2 + p3 + N) # create molten data frame

f <- function(x, height){
    ans <- median(x)
    data.frame(y=ans, ymin=ans-height/2, ymax=ans+height/2)
}

## plot
p <- ggplot(mdf, aes(x=p3, y=value)) + geom_boxplot(outlier.shape=1) +
stat_summary(fun.data=f, geom="crossbar", height=0.7, colour=NA,
         fill="black", width=0.78) +
facet_grid(p2 ~ p1, scales = "free_y")
pdf()
print(p)
dev.off()

1 个答案:

答案 0 :(得分:7)

这是一个例子:

f <- function(x, height) {
 ans <- median(x)
 data.frame(ymin = ans-height/2, ymax = ans+height/2, y = ans)
}

df <- data.frame(x=gl(2,6), y=c(1,1,1,1,3,3, 1,1,3,3,3,3))
ggplot(df, aes(x, y)) + geom_boxplot() + 
 stat_summary(fun.data = f, geom = "crossbar", height = 0.1,
  colour = NA, fill = "skyblue", width = 0.8, alpha = 0.5)

enter image description here

如果你只想改变外表,那么这里是一个快速的黑客,我不建议,

df <- data.frame(x=gl(2,6), y=c(c(1,1,1,1,3,3), c(1,1,3,3,3,3)*10))
ggplot(df, aes(x, y)) + geom_boxplot() + facet_grid(x~.)

gs <- grid.gget("geom_boxplot", grep = T)
if (inherits(gs, "grob")) gs <- list(gs)
gss <- llply(gs, function(g) g$children[[length(g$children)]])

l_ply(gss, function(g) grid.edit(g$name, grep=T, just = c("left", "center"), height = unit(0.05, "native"), gp = gpar(fill = "skyblue", alpha = 0.5, col = NA)))

enter image description here