突出显示ggplot2 stat_qq输出中的点

时间:2018-01-27 18:19:13

标签: r ggplot2

我正在尝试根据ggplot stat_qq输出中的订单统计信息突出显示所选点:

ydata <- data.frame(sample = c(rep("Sample 1", 100),
                               rep("Sample 2", 100),
                               rep("Sample 3", 100),
                               rep("Sample 4", 100)),
                               x=rnorm(400))

ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100

ggplot(ydata, aes(sample=x)) + stat_qq() + facet_wrap(~sample) + scale_x_continuous(breaks = -2:2, labels = function(x) paste0(x, " \n [",100 * signif(pnorm(-2:2, lower.tail=FALSE),2), "%]")) + theme_bw(base_size = 14, base_family = "sans") + labs(title = "Four Samples of 100 Observations From Normal Distribution", 
       caption = "4 Samples of n = 100 from Normal Distribution \nNumbers indicate order of value",
       y = "Sample Value",
       x = "Standard Deviation\n[%exceeding]") +  
  geom_text(data = ydata[ydata$order %in% c(2,16,50,84,98),], aes(x=qnorm(pnorm(x)), y=x, label = order), nudge_y = 1)

产生了这个:

enter image description here

显然我的文字符号没有突出显示正确的点(2点,16点,50点84点,第98点)。我希望我也可以突出显示红色的实际点。非常感谢任何建议。

2 个答案:

答案 0 :(得分:1)

您可以将stat =“qq”应用于geom_point,然后使用分配给新变量的颜色

ydata <- data.frame(sample = c(rep("Sample 1", 100),
                               rep("Sample 2", 100),
                               rep("Sample 3", 100),
                               rep("Sample 4", 100)),
                    x=rnorm(400))

ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100
ydata$highlight = ifelse(ydata$order %in% c(2,16,50,84,98), "#FF0000", "#000000")
ydata$order_txt = ifelse(ydata$order %in% c(2,16,50,84,98), ydata$order, "")

ggplot(ydata, aes(sample=x)) + 
  geom_point(color=ydata$highlight, stat="qq")  +
  geom_text(label=ydata$order_txt, stat="qq", nudge_y=1) +
  facet_wrap(~sample) + 
  scale_x_continuous(breaks = -2:2, labels = function(x) paste0(x, " \n [",100 * signif(pnorm(-2:2, lower.tail=FALSE),2), "%]")) + 
  theme_bw(base_size = 14, base_family = "sans") + 
  labs(
    title = "Four Samples of 100 Observations From Normal Distribution",
    caption = "4 Samples of n = 100 from Normal Distribution \nNumbers indicate order of value",
    y = "Sample Value",x = "Standard Deviation\n[%exceeding]")

答案 1 :(得分:1)

您可以计算ggplot之外的qq值并创建一个单独的列,将qq值分组为突出显示而不突出显示。然后,您可以使用geom_point绘制它们,并将分组变量作为颜色美学。例如:

library(tidyverse)

# Generate data reproducibly
set.seed(2)
ydata <- data.frame(sample = c(rep("Sample 1", 100),
                               rep("Sample 2", 100),
                               rep("Sample 3", 100),
                               rep("Sample 4", 100)),
                    x=rnorm(400))

ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100

# Quantile indices to highlight
pts = c(2,16,50,84,98)

# Add qq values and grouping column to data frame and pipe into ggplot
# Use split and map to calculate the qq values separately for each Sample
split(ydata, ydata$sample) %>% 
  map_df(~ .x %>% mutate(xq = qqnorm(x, plot.it=FALSE)$x,
                         group = ifelse(order %in% pts, "A", "B"))) %>% 
  ggplot(aes(xq, x, colour=group)) +
  geom_point(size=1) +
  geom_text(aes(label=ifelse(group=="A", order, "")), 
            nudge_y=1, size=3) +
  facet_wrap(~ sample) +
  theme_bw(base_size = 14, base_family = "sans") +
  scale_colour_manual(values=c("red", "black")) +
  guides(colour=FALSE)

enter image description here

作为替代方案,快速破解将使用ggplot_build突出显示原始图中的特定点(请注意,关于如何相对于突出显示的点放置标签,某些内容并不完全正确):< / p>

pts = rep(c(2,16,50,84,98), 4) + rep(seq(0,300,100), each=5)

# Assuming you've assigned your plot to the object p
pb = ggplot_build(p)

# Change point colors
pb$data[[1]][pts, "colour"] = "red"

# Change label colors
pb$data[[2]][["colour"]] = "red"

# Regenerate plot object
p = ggplot_gtable(pb)
plot(p)

enter image description here