Plotly:在boxplot中用样本名称注释异常值

时间:2017-11-27 19:15:59

标签: r ggplot2 plotly boxplot

我正在尝试使用ggplot创建一个boxplot,并使用数据集airquality进行绘图,其中Month位于x轴上,Ozone值位于y轴上。我的目的是注释该情节,以便当我将鼠标悬停在异常点上时,除臭氧值外还应显示Sample名称:

library(tidyverse)
library(plotly)
library(datasets)
data(airquality)

# add months
airquality$Month <- factor(airquality$Month,
                           labels = c("May", "Jun", "Jul", "Aug", "Sep"))

# add sample names
airquality$Sample <- paste0('Sample_',seq(1:nrow(airquality)))

# boxplot
p <- ggplot(airquality, aes(x = Month, y = Ozone)) +
  geom_boxplot()
p <- plotly_build(p)
p

这是创建的情节:

enter image description here

默认情况下,当我将鼠标悬停在每个框上时,它会显示x轴变量的基本摘要统计信息。但是,我还希望看到异常样本是什么。对于例如当鼠标悬停在五月时,它会显示异常值115,但不会显示它实际上是Sample_30

如何将Sample变量添加到异常值点,以便显示异常值和样本名称?

4 个答案:

答案 0 :(得分:0)

我在https://github.com/ropensci/plotly/issues/887上找到了解决方法

尝试编写这种代码!

 library(plotly)

 vals <- boxplot(airquality$Ozone,plot = FALSE)
 y <- airquality[airquality$Ozone > vals$stats[5,1] | airquality$Ozone < vals$stats[1,1],]

plot_ly(airquality,y = ~Ozone,x = ~Month,type = "box") %>% 
   add_markers(data = y, text = y$Day)

答案 1 :(得分:0)

我们可以几乎这样获得它:

library(ggplot2)
library(plotly)
library(datasets)
data(airquality)
# add months
airquality$Month <- factor(airquality$Month,
                           labels = c("May", "Jun", "Jul", "Aug", "Sep"))
# add sample names
airquality$Sample <- paste0('Sample_',seq(1:nrow(airquality)))
# boxplot
gg <- ggplot(airquality, aes(x = Month, y = Ozone)) +
  geom_boxplot()
ggly <- ggplotly(gg)
# add hover info
hoverinfo <- with(airquality, paste0("sample: ", Sample, "</br></br>", 
                                     "month: ", Month, "</br>",
                                     "ozone: ", Ozone))
ggly$x$data[[1]]$text <- hoverinfo
ggly$x$data[[1]]$hoverinfo <- c("text", "boxes")

ggly

enter image description here

不幸的是,悬停不适用于第一个箱形图...

答案 2 :(得分:0)

我已经设法通过Shiny实现了这一目标。

library(plotly)
library(shiny)
library(htmlwidgets)
library(datasets)

# Prepare data ----
data(airquality)
# add months
airquality$Month <- factor(airquality$Month,
                           labels = c("May", "Jun", "Jul", "Aug", "Sep"))
# add sample names
airquality$Sample <- paste0('Sample_', seq(1:nrow(airquality)))

# Plotly on hover event ----
addHoverBehavior <- c(
  "function(el, x){",
  "  el.on('plotly_hover', function(data) {",
  "    if(data.points.length==1){",
  "      $('.hovertext').hide();",
  "      Shiny.setInputValue('hovering', true);",
  "      var d = data.points[0];",
  "      Shiny.setInputValue('left_px', d.xaxis.d2p(d.x) + d.xaxis._offset);",
  "      Shiny.setInputValue('top_px', d.yaxis.l2p(d.y) + d.yaxis._offset);",
  "      Shiny.setInputValue('dx', d.x);",
  "      Shiny.setInputValue('dy', d.y);",
  "      Shiny.setInputValue('dtext', d.text);",
  "    }",
  "  });",
  "  el.on('plotly_unhover', function(data) {",
  "    Shiny.setInputValue('hovering', false);",
  "  });",
  "}")

# Shiny app ----
ui <- fluidPage(
  tags$head(
    # style for the tooltip with an arrow (http://www.cssarrowplease.com/)
    tags$style("
               .arrow_box {
                    position: absolute;
                  pointer-events: none;
                  z-index: 100;
                  white-space: nowrap;
                  background: rgb(54,57,64);
                  color: white;
                  font-size: 14px;
                  border: 1px solid;
                  border-color: rgb(54,57,64);
                  border-radius: 1px;
               }
               .arrow_box:after, .arrow_box:before {
                  right: 100%;
                  top: 50%;
                  border: solid transparent;
                  content: ' ';
                  height: 0;
                  width: 0;
                  position: absolute;
                  pointer-events: none;
               }
               .arrow_box:after {
                  border-color: rgba(136, 183, 213, 0);
                  border-right-color: rgb(54,57,64);
                  border-width: 4px;
                  margin-top: -4px;
               }
               .arrow_box:before {
                  border-color: rgba(194, 225, 245, 0);
                  border-right-color: rgb(54,57,64);
                  border-width: 10px;
                  margin-top: -10px;
               }")
  ),
  div(
    style = "position:relative",
    plotlyOutput("myplot"),
    uiOutput("hover_info")
  )
)

server <- function(input, output){
  output$myplot <- renderPlotly({
    airquality[[".id"]] <- seq_len(nrow(airquality))
    gg <- ggplot(airquality, aes(x=Month, y=Ozone, ids=.id)) + geom_boxplot()
    ggly <- ggplotly(gg, tooltip = "y")
    ids <- ggly$x$data[[1]]$ids
    ggly$x$data[[1]]$text <- 
      with(airquality, paste0("<b> sample: </b>", Sample, "<br/>",
                              "<b> month: </b>", Month, "<br/>",
                              "<b> ozone: </b>", Ozone))[ids]
    ggly %>% onRender(addHoverBehavior)
  })
  output$hover_info <- renderUI({
    if(isTRUE(input[["hovering"]])){
      style <- paste0("left: ", input[["left_px"]] + 4 + 5, "px;", # 4 = border-width after
                      "top: ", input[["top_px"]] - 24 - 2 - 1, "px;") # 24 = line-height/2 * number of lines; 2 = padding; 1 = border thickness
      div(
        class = "arrow_box", style = style,
        p(HTML(input$dtext), 
          style="margin: 0; padding: 2px; line-height: 16px;")
      )
    }
  })
}

shinyApp(ui = ui, server = server)

enter image description here

答案 3 :(得分:0)

此方法将获得相同的结果,但不显示悬停的箱线图摘要统计信息。移除boxplot图层上的离群值和悬停,并仅将离群值的geom_point层覆盖在悬停信息上。 here表示了密谋的离群值的定义。当处理更复杂的图形时(例如并排组合的箱形图),此方法将比其他解决方案更好。有趣的是,此数据的ggplotly箱线图与ggplot图不同。 ggplotly中的8月的上部栅栏晶须比8月的ggplot上部的栅栏晶须延伸得更多。

library(dplyr)
library(plotly)
library(datasets)
library(ggplot2)
data(airquality)

# manipulate data
mydata = airquality %>% 
    # add months
    mutate(Month = factor(airquality$Month,labels = c("May", "Jun", "Jul", "Aug", "Sep")),
    # add sample names
           Sample = paste0('Sample_',seq(1:n())))%>%
    # label if outlier sample by Month
    group_by(Month) %>% 
    mutate(OutlierFlag = ifelse((Ozone<quantile(Ozone,1/3,na.rm=T)-1.5*IQR(Ozone,na.rm=T)) | (Ozone>quantile(Ozone,2/3,na.rm=T)+1.5*IQR(Ozone,na.rm=T)),'Outlier','NotOutlier'))%>%
    group_by()


# boxplot
p <- ggplot(mydata, aes(x = Month, y = Ozone)) +
    geom_boxplot()+
    geom_point(data=mydata %>% filter(OutlierFlag=="Outlier"),aes(group=Month,label1=Sample,label2=Ozone),size=2)

output = ggplotly(p, tooltip=c("label1","label2"))

# makes boxplot outliers invisible and hover info off
for (i in 1:length(output$x$data)){
    if (output$x$data[[i]]$type=="box"){
        output$x$data[[i]]$marker$opacity = 0  
        output$x$data[[i]]$hoverinfo = "none"
    }
}

# print end result of plotly graph
output

boxplot with outlier ID hover