在ggplot循环R中标记前10%的值

时间:2019-12-06 11:45:30

标签: r ggplot2

更新 我将问题缩小到循环i。因为如果我在下面的代码中仅使用特定列(“ b”),则它可以工作。有谁知道i出了什么问题以及为什么循环无法正常工作?

我有一个很大的数据集,我在其中遍历多列以创建图形。我想在图表中为i标记前10%的点(后来在最低的10%上)。我已经尝试了很多,但无法弄清楚如何仅标记热门歌曲。标签名称应对应于“标签”列

在示例中,我使用gghighlight进行了尝试,但这不起作用。

## loading packages
library("ggplot2")
library("purrr")
library("ggbeeswarm")
library("gghighlight")

## creating data
group <- c("Control", "PAD", "Control", "PAD", "PAD", "Control", "PAD", "Control", "PAD", "PAD", "Control", "PAD", "Control", "PAD", "PAD")
label <- (1:15)
b <- round(runif(15, 1, 7))
c <- round(runif(15, 1, 3))
d <- round(runif(15, 3, 8))
e <- round(runif(15, 1, 5))
event <- c("no event", "event", "no event", "no event", "no event", "no event", "event", "no event", "no event", "no event", "no event", "no event", "no event", "event", "event")

df <- data.frame(group, label, b, c, d, e, event)
df

rm(group, label, b, c, d, e, event)

# add color coding to the dataset
df$color <- "color"
for (i in 1:dim(df)[1]) {
  if (df$group[i] == "Control") {
    df$color[i] <- "Control"
  }
}
for (i in 1:dim(df)[1]) {
  if (df$group[i] == "PAD" && df$event[i] == "event") {
    df$color[i] <- "PAD with event"
  }
}
for (i in 1:dim(df)[1]) {
  if (df$group[i] == "PAD" && df$event[i] == "no event") {
    df$color[i] <- "PAD without event"
  }
}

rm(i)
## this is where i am having issues
for (i in names(df)[1:4]){
ggplot(df, aes_string("group", "b")) + 
  geom_boxplot(show.legend = F) + 
  geom_beeswarm(aes(color = color), size=2) +
  scale_color_manual(values= c("Control"="#107f40", "PAD with event" = "#D85622", "PAD without event"="#2D416D")) +
  geom_text(aes(label=ifelse(b>quantile(b, 0.9, type=2, na.rm = TRUE), as.numeric(label),'')))

}

我没有收到错误,但它标记了所有值,而不是仅是热门歌曲。此外,标签未设置为“标签”列,而是分组。
请在下面的示例中查看我如何查看标签的示例(仅在热门歌曲中) example

2 个答案:

答案 0 :(得分:1)

以下是问题所要求的。
它首先将数据重新格式化,然后在构面中绘制所有箱须图。
要点是使用ggrepel::geom_label_repel将标签放置在远离这些点的位置。

library(tidyverse)
library(ggplot2)
library(ggrepel)

df2 <- df %>%
  select(-event) %>%
  gather(key, value, -group, -label, -color) %>%
  group_by(group, key) %>%
  mutate(flag = value >= quantile(value, 0.9, type = 2))

g <- ggplot(df2, aes(x = group, y = value, color = color)) + 
  geom_boxplot(show.legend = FALSE) + 
  scale_color_manual(values= c("Control"="#107f40", "PAD with event" = "#D85622", "PAD without event"="#2D416D")) +
  geom_point(data = df2 %>% filter(flag), show.legend = FALSE) +
  geom_label_repel(data = df2 %>% filter(flag) %>% unique(), 
             aes(label = label),
             color = "black") +
  facet_wrap(~ key)

g

enter image description here

数据创建代码。

我通过设置RNG种子重新发布问题中的代码,以使其可重现。

set.seed(1234)    # Make the results reproducible

group <- c("Control","PAD","Control","PAD","PAD", "Control","PAD","Control","PAD","PAD", "Control","PAD","Control","PAD","PAD")
label <- 1:15
b <- round(runif(15, 1, 7)) 
c <- round(runif(15, 1, 3)) 
d <- round(runif(15, 3, 8)) 
e <- round(runif(15, 1, 5))
event <- c("no event", "event" , "no event" , "no event" , "no event", "no event", "event", "no event", "no event" , "no event" , "no event" , "no event", "no event", "event", "event")

df <- data.frame(group, label, b,c,d, e, event)
df

rm(group, label, b, c, d, e, event)

df$color <- "color"
for (i in 1:dim(df)[1]){
  if (df$group[i]=="Control") {
    df$color[i] <- "Control" 
  }
  if (df$group[i] == "PAD" && df$event[i] == "event") {
    df$color[i] <- "PAD with event" 
  }
  if (df$group[i] == "PAD" && df$event[i] == "no event") {
    df$color[i] <- "PAD without event"
  }
}

答案 1 :(得分:0)

 ./gradlew runSuite1 -i --rerun-tasks &
 ./gradlew runSuite2 -i --rerun-tasks &