现在我倾向于一个没有胡须的盒子图,只显示1.,2。 3.四分位数+数据点更多,例如1个人口SD +/-样本中位数。
但是 - 我需要向一些老师展示这个图表,以便衡量他们最喜欢什么。我想将我的图表与普通的箱线图进行比较。但是,如果只有一个异常值,或者例如,正常的箱形图看起来是相同的。 5个异常值处于相同值。在这种情况下,这将是一个交易破坏者。
test <-structure(list(value = c(3, 5, 3, 3, 6, 4, 5, 4, 6, 4, 6, 4,
4, 6, 5, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 5, 6, 6, 4, 3, 5, 4,
6, 5, 6, 4, 5, 5, 3, 4, 4, 6, 4, 4, 5, 5, 3, 4, 5, 8, 8, 8, 8,
9, 6, 6, 7, 6, 9), places = structure(c(1L, 2L, 1L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 2L, 1L), .Label = c("a", "b"), class = "factor")), .Names = c("value",
"places"), row.names = c(NA, -60L), class = "data.frame")
ggplot(test, aes(x=places,y=value))+geom_boxplot()
这里有两个异常值(“a”,9) - 但只显示了一个“点”。
所以我的问题:如何抖动异常值。而且 - 你会为这种数据建议什么样的显示?
答案 0 :(得分:8)
GeomBoxplot$draw<-function (., data, ..., outlier.colour = "black", outlier.shape = 16,
outlier.size = 2, outlier.jitter=0)
defaults <- with(data, data.frame(x = x, xmin = xmin, xmax = xmax,
colour = colour, size = size, linetype = 1, group = 1,
alpha = 1, fill = alpha(fill, alpha), stringsAsFactors = FALSE))
defaults2 <- defaults[c(1, 1), ]
if (!is.null(data$outliers) && length(data$outliers[[1]] >=
1)) {
p<-pp$adjust(data.frame(x=data$x[rep(1, length(data$outliers[[1]]))], y=data$outliers[[1]]),.scale)
outliers_grob <- GeomPoint$draw(data.frame(x=p$x, y = p$y, colour = I(outlier.colour),
shape = outlier.shape, alpha = 1, size = outlier.size,
fill = NA), ...)
else {
outliers_grob <- NULL
with(data, ggname(.$my_name(), grobTree(outliers_grob, GeomPath$draw(data.frame(y = c(upper,
ymax), defaults2), ...), GeomPath$draw(data.frame(y = c(lower,
ymin), defaults2), ...), GeomRect$draw(data.frame(ymax = upper,
ymin = lower, defaults), ...), GeomRect$draw(data.frame(ymax = middle,
ymin = middle, defaults), ...))))
ggplot(test, aes(x=places,y=value))+geom_boxplot(outlier.jitter=0.05)
GeomBoxplotJitterOutlier <- proto(GeomBoxplot, {
draw <- function (., data, ..., outlier.colour = "black", outlier.shape = 16,
outlier.size = 2, outlier.jitter=0) {
# copy the body of function 'draw' above and paste here.
objname <- "boxplot_jitter_outlier"
desc <- "Box and whiskers plot with jittered outlier"
guide_geom <- function(.) "boxplot_jitter_outlier"
geom_boxplot_jitter_outlier <- GeomBoxplotJitterOutlier$build_accessor()
ggplot(test, aes(x=places,y=value))+geom_boxplot_jitter_outlier(outlier.jitter=0.05)
答案 1 :(得分:6)
似乎已接受的答案不再适用,因为ggplot2已更新。 经过网上搜索后,我发现了以下内容:http://comments.gmane.org/gmane.comp.lang.r.ggplot2/3616 - 看看Winston Chang的回复 -
geom_boxplot(outlier.colour = NA)
# This returns a data frame with the outliers only
find_outliers <- function(y, coef = 1.5) {
qs <- c(0, 0.25, 0.5, 0.75, 1)
stats <- as.numeric(quantile(y, qs))
iqr <- diff(stats[c(2, 4)])
outliers <- y < (stats[2] - coef * iqr) | y > (stats[4] + coef * iqr)
library(MASS) # Use the birthwt data set from MASS
# Find the outliers for each level of 'smoke'
outlier_data <- ddply(birthwt, .(smoke), summarise, lwt = find_outliers(lwt))
# This draws an ordinary box plot
ggplot(birthwt, aes(x = factor(smoke), y = lwt)) + geom_boxplot()
# This draws the outliers using geom_dotplot
ggplot(birthwt, aes(x = factor(smoke), y = lwt)) +
geom_boxplot(outlier.colour = NA) +
#also consider:
# geom_jitter(alpha = 0.5, size = 2)+
geom_dotplot(data = outlier_data, binaxis = "y",
stackdir = "center", binwidth = 4)
答案 2 :(得分:2)
你可以使用geom_jitter来做到这一点,但是请注意box_plot已经为异常值绘制了点,所以为了不显示它们两次,你需要用geom_boxplot(outlier.shape = NA)
test <-structure(list(value = c(3, 5, 3, 3, 6, 4, 5, 4, 6, 4, 6, 4, 4, 6, 5, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 5, 6, 6, 4, 3, 5\
, 4, 6, 5, 6, 4, 5, 5, 3, 4, 4, 6, 4, 4, 5, 5, 3, 4, 5, 8, 8, 8, 8, 9, 6, 6, 7, 6, 9), places = structure(c(1L, 2L, 1L, 1L, 1L\
, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, \
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L), .Label = c("a", "b"), class =\
"factor")), .Names = c("value", "places"), row.names = c(NA, -60L), class = "data.frame")
# adding a level that you will use latter for giving colors
l <- rep(c(10,20,30,40,50,60), 10)
# [1]
# original plot
ggplot(test, aes(x=places,y=value))+geom_boxplot()
# [2]
# plot with outlier from boxplot and the points jittered to see
# distribution (outliers and the same point from position jitter would be
# counted twice for each different height)
ggplot(data=test, aes(x=places, y=value)) + geom_boxplot() + geom_jitter(position=position_jitter(width=0.1, height=0))
# [3]
# make wider the jitter to avoid overplotting because there are a lot
# of points with the same value, also remove the outliers from boxplot
# (they are plotted with the geom_jitter anyway)
ggplot(data=test, aes(x=places, y=value)) + geom_boxplot(outlier.shape = NA) +
geom_jitter(position=position_jitter(width=0.3, height=0))
# [4]
# adding colors to the points to see if there is a sub-pattern in the distribution
ggplot(data=test, aes(x=places, y=value)) + geom_boxplot(outlier.shape = NA) +
geom_jitter(position=position_jitter(width=0.3, height=0), aes(colour=levels))
# [5]
# adding a bit of vertical jittering
# jittering (a good option for a less discrete datasets)
ggplot(data=test, aes(x=places, y=value)) + geom_boxplot(outlier.shape = NA) +
geom_jitter(position=position_jitter(width=0.3, height=0.05), aes(colour=levels))
# [6]
# finally remember that position_jitter makes a jittering of a 40% of
# the resolution of the data, so if you forget the height=0 you will
# have a total different picture
ggplot(data=test, aes(x=places, y=value)) + geom_boxplot(outlier.shape = NA) +
答案 3 :(得分:1)
g = ggplot(test, aes(x = places,y = value))
g + geom_boxplot(outlier.colour = rgb(0,0,0,0)) + geom_point(data = test[test$value > 8,], position = position_jitter(width = .4))
答案 4 :(得分:1)
DrawGeomBoxplotJitterOutlier <- function(data, panel_params, coord, ...,
outlier.colour = NULL,
outlier.fill = NULL,
outlier.shape = 19,
outlier.size = 1.5,
outlier.stroke = 0.5,
outlier.alpha = NULL) {
boxplot_grob <- ggplot2::GeomBoxplot$draw_group(data, panel_params, coord, ...)
point_grob <- grep("geom_point.*", names(boxplot_grob$children))
if (length(point_grob) == 0)
ifnotnull <- function(x, y) ifelse(is.null(x), y, x)
if (is.null(outlier.jitter.width)) {
outlier.jitter.width <- (data$xmax - data$xmin) / 2
x <- data$x[1]
y <- data$outliers[[1]]
if (outlier.jitter.width > 0 & length(y) > 1) {
x <- jitter(rep(x, length(y)), amount=outlier.jitter.width)
if (outlier.jitter.height > 0 & length(y) > 1) {
y <- jitter(y, amount=outlier.jitter.height)
outliers <- data.frame(
x = x, y = y,
colour = ifnotnull(outlier.colour, data$colour[1]),
fill = ifnotnull(outlier.fill, data$fill[1]),
shape = ifnotnull(outlier.shape, data$shape[1]),
size = ifnotnull(outlier.size, data$size[1]),
stroke = ifnotnull(outlier.stroke, data$stroke[1]),
fill = NA,
alpha = ifnotnull(outlier.alpha, data$alpha[1]),
stringsAsFactors = FALSE
boxplot_grob$children[[point_grob]] <- ggplot2::GeomPoint$draw_panel(outliers, panel_params, coord)
GeomBoxplotJitterOutlier <- ggplot2::ggproto("GeomBoxplotJitterOutlier",
draw_group = DrawGeomBoxplotJitterOutlier)
geom_boxplot_jitter_outlier <- function(mapping = NULL, data = NULL,
stat = "boxplot", position = "dodge",
..., outlier.jitter.width=0,
na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE) {
geom = GeomBoxplotJitterOutlier, mapping = mapping, data = data,
stat = stat, position = position, show.legend = show.legend,
inherit.aes = inherit.aes, params = list(na.rm = na.rm,
outlier.jitter.height=outlier.jitter.height, ...))