我需要在ggplot2
中创建-log10 p值的qq图,其中137个点的子集("目标")使用颜色友好的调色板以黄色突出显示我&# 39; m使用被叫cbbPalette
。我无法在备用软件包中执行此操作,因为我最终需要使用grid.arrange
包中与gridExtra
一起使用的ggplot2
将多个qq图组合到一个网格中。
设定:
library(ggplot2)
library(reshape2)
cbbPalette <- c("#E69F00", "#000000") #part of my palette; gold & black
set.seed(100)
数据由100,137个p值组成,其中137个为目标:
p_values = c(
runif(100000, min = 0, max = 1),
runif(132, min = 1e-7, max = 1),
c(6e-20, 6e-19, 7e-9, 7.5e-9, 4e-8)
)
#labels for the p-values
names_letters <-
do.call(paste0, replicate(2, sample(LETTERS, 100137, TRUE), FALSE))
names = paste0(names_letters, sprintf("%04d", sample(9999, 100137, TRUE)))
targets = names[100001:100137] #last 137 are targets
df = as.data.frame(p_values)
df$names = names
df <-
df[sample(nrow(df)), ] #shuffles the df to place targets randomly w/in
df$Category = ifelse(df$names %in% targets, "Target", "Non-Target")
数据外观:
head(df, 4)
p_values names Category
89863 0.4821147 NZ3385 Non-Target
20209 0.3998835 SQ3793 Non-Target
29200 0.7893478 ZT5497 Non-Target
71623 0.3459360 QF5311 Non-Target
融化df使用reshape2
与观察(o)&amp;预期(e)-log10 p值:
df.m = melt(df)
df.m$o = -log10(sort(df.m$value, decreasing = F))
df.m$e = -log10(1:nrow(df.m) / nrow(df.m))
熔化df的外观:
head(df.m,4)
names Category variable value o e
1 NZ3385 Non-Target p_values 0.4821147 19.221849 5.000595
2 SQ3793 Non-Target p_values 0.3998835 18.221849 4.699565
3 ZT5497 Non-Target p_values 0.7893478 8.154902 4.523473
4 QF5311 Non-Target p_values 0.3459360 8.124939 4.398535
QQ-积
df_qq = ggplot(df.m, aes(e, o)) +
geom_point(aes(color = Category)) +
scale_colour_manual(values = cbbPalette) +
geom_abline(intercept = 0, slope = 1) +
ylab("Observed -log[10](p)") +
xlab("Theoretical -log[10](p)")
然后我得到了一个没有突出显示我的137个目标的QQ。
答案 0 :(得分:1)
你可以在非目标之后的单独library(shiny)
library(ggplot2)
library(dplyr)
library(tidyverse)
## Only run examples in interactive R sessions
if (interactive()) {
ui <- fluidPage(
# App title ----
titlePanel("Survey Data Analysis Template"),
# Sidebar layout with input and output definitions ----
sidebarLayout(
# Sidebar panel for inputs ----
sidebarPanel(
# Input: Select a file ----
fileInput("file1", "Choose CSV File",
multiple = TRUE,
accept = c("text/csv",
"text/comma-separated-values,text/plain",
".csv")),
# Horizontal line ----
tags$hr(),
# Input: Checkbox if file has header ----
checkboxInput("header", "Header", TRUE),
# Input: Select separator ----
radioButtons("sep", "Separator",
choices = c(Comma = ",",
Semicolon = ";",
Tab = "\t"),
selected = ","),
# Input: Select quotes ----
radioButtons("quote", "Quote",
choices = c(None = "",
"Double Quote" = '"',
"Single Quote" = "'"),
selected = '"'),
# Horizontal line ----
tags$hr(),
# Input: Select number of rows to display ----
radioButtons("disp", "Display",
choices = c(Head = "head",
All = "all"),
selected = "head"),
# Include a Slider for Strata
sliderInput("strata",
"strata:",
min = 1,
max = 20,
value = c(1,20),
step=1),
# Select Variable from the selected Dataset
selectInput("vari", "Variable",
choices=colnames(df)),
hr(),
helpText("")
),
##########################
# Main panel for displaying outputs ----
mainPanel(
# Output: Data file ----
tableOutput("contents"),
verbatimTextOutput("summary") # Generate Summary Statistics for the selected variable by strata
)
)
)
server <- function(input, output, session) {
mytable <- reactive({
req(input$file1)
df <- read.csv(input$file1$datapath,
header = input$header,
sep = input$sep,
quote = input$quote, stringsAsFactors = FALSE)
print(df)
df<-as.data.frame(df)
# Subset the data to filter based on strata
df<- df %>%
filter(df$Strata>=input$strata[1] & df$Strata<=input$strata[2])
df<-as.data.frame(df)
print(df)
if(input$disp == "head") {
return(head(df))
}
else {
return(df)
}
# Output by Strata Filter
output$contents <- renderTable({
# Now do use (), since we are calling a value from a reactive.
mytable()
})
# Create Table of Summary Statistics from the selected Variable
print(mytable)
mytable<-as.data.frame(mytable)
# Select based on the drop down variable
mytable<- mytable %>%
select(mytable$vari)
# Generate a summary of the dataset
output$summary <- renderPrint({
dataset <- mytable()
summary(dataset)
})
})
}
# Run the app ----
shinyApp(ui, server)
}
调用中绘制目标,按顺序绘制geom,使目标最终位于顶部:
geom_point()
我还在调色板中添加了名称,以确保在更改cbbPalette <- c(Target = "#E69F00", `Non-Target` = "#000000")
df_qq = ggplot(df.m, aes(e, o)) +
geom_abline(intercept = 0, slope = 1) +
geom_point(aes(color = Category), data = df.m[df.m$Category == "Non-Target", ]) +
geom_point(aes(color = Category), data = df.m[df.m$Category == "Target", ]) +
scale_colour_manual(values = cbbPalette) +
ylab("Observed -log[10](p)") +
xlab("Theoretical -log[10](p)")
调用的顺序时将正确的颜色附加到每个类别,否则会混淆。
结果:
答案 1 :(得分:1)
如果您希望避免将数据框拆分为两次geom_point
调用,则可以先按类别列排序数据,然后将其输入ggplot
。对于这两个类别值,您可以简单地安排:
df.m %>%
arrange(Category) %>%
ggplot(...)
将使用非目标观察按字母顺序排列数据,然后按目标观察。点数按顺序绘制,因此这将使得目标类别中的点位于顶部。
要更好地控制排序,可以将Category设为一个因子,并明确设置级别,然后按因子顺序排列:
df.m %>%
mutate(Category = as.factor(Category) %>% fct_relevel("Target")) %>%
arrange(desc(Category)) %>%
ggplot(...)
我正在使用fct_relevel
包中的forcats
,因为这是操纵因子级别的一种非常简单的方法;你也可以用基数R订购等级。 fct_relevel
将目标等级放在第一位,所以当我按类别排列时,我正在反向进行,以便最后再次绘制目标。
希望有意义!