我有两个数据框,我正在努力合并为一个图。下面给出第一个数据帧的再现,然后是代码,以生成数据的森林图,面向不同的变量组。如果p.value <0.05,则填充geom_points
,否则填充library(tidyverse)
##data frame
df1 <- tibble('outcome.var' = c('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'),
'coefficient' = c(-0.08, -0.01, -0.06, 0.02, 0.01, 0.02, -0.08, -0.1, 0.1, 0.2),
'conf.low' = c(-0.12, -0.03, -0.09, 0.01, 0.02, 0.01, -0.10, -0.2, 0.05, 0.1),
'conf.high' = c(-0.05, 0.02, -0.03, 0.03, -0.01, 0.04, -0.06, 0, 0.2, 0.3),
'p.value' =c(0.01, 0.02, 0.05, 0.10, 0.02, 0.13, 0.11, 0.01, 0.01, 0.04)) %>%
mutate(significant = ifelse(p.value > 0.05, 'P > 0.05', 'P < 0.05'),
label = case_when(
.$outcome.var %in% c('A', 'B', 'C') ~ '1',
.$outcome.var %in% c('D', 'E', 'F') ~ '2',
.$outcome.var %in% c('G', 'H') ~ '3',
.$outcome.var %in% c('I', 'J') ~ '4'))
。
label
创建df1$outcome.var <- df1$outcome.var %>%
as.factor() %>%
fct_relevel(., 'B', 'C', 'A', 'F', 'D', 'E', 'H', 'G', 'J', 'I') %>%
fct_recode(., 'Bb' = 'B',
'Cc' = 'C',
'Aa' = 'A',
'Ff' = 'F',
'Dd' = 'D',
'Ee' = 'E',
'Hh' = 'H',
'Gg' = 'G',
'Jj' = 'J',
'Ii' = 'I') %>%
fct_rev()
p1 <- ggplot(data = df1, aes(outcome.var, coefficient)) +
geom_pointrange(aes(ymin = conf.low, ymax = conf.high), shape = 32) +
geom_point(aes(shape = significant), fill = 'white') +
geom_hline(mapping = NULL, data = NULL, yintercept = 0, colour = "grey40", size = 0.5, linetype = "solid") +
geom_hline(mapping = NULL, data = NULL, yintercept = c(-0.15, -0.10, -0.05, 0.05, 0.10), colour = "grey85", size = 0.5, linetype = "longdash") +
theme(panel.background = element_rect(fill = "grey95")) +
scale_y_continuous('Coefficient') +
scale_shape_manual(values = c(19, 21)) +
scale_colour_manual(values = c('black', 'black')) +
theme(panel.spacing = unit(1, "lines")) +
theme(legend.title = element_blank()) +
xlab(NULL) +
coord_flip() +
facet_grid(label ~ ., scales = "free", space = "free", switch = "x") +
theme(strip.text.y = element_blank()) +
theme(legend.text = element_text(face = 'bold'))
变量是为了在图中启用正确的构面。
已更新,以便将变量转换为因子
df2
我有第二个数据框df1
,其结构与geom_point
相同,只是具有不同的数据值。如何对我的情节进行编码,使其同时包含geom_pointrange
和df1
df2
和df2
,但.isin()
点的颜色不同。理想情况下,点和点范围应略微垂直偏移,以使它们在图上不重叠。
答案 0 :(得分:1)
您没有提供数据框df2
,所以让我创建一个包含随机值的数据框。
df2 <-
df1 %>%
mutate(
coefficient = coefficient + rnorm(1, sd=0.1),
conf.low = coefficient - 0.05,
conf.high = coefficient + 0.05,
significant = ifelse(p.value > 0.05, 'P > 0.05', 'P < 0.05'))
现在我添加一个变量dataset
来跟踪哪些值来自哪个数据集,并将两个数据框合并为一个名为df
的数据框。
df1 <- df1 %>%
mutate(dataset = 'original')
df2 <- df2 %>%
mutate(dataset = 'alternative')
df <- bind_rows(df1, df2)
您的图表可以使用color
美学和position_dodge()
和geom_pointrange()
命令中的geom_point()
创建。
ggplot(data = df, aes(outcome.var, coefficient, color=dataset)) +
geom_pointrange(aes(ymin = conf.low, ymax = conf.high), shape = 32, position=position_dodge(width=0.5)) +
geom_point(aes(shape = significant), fill = 'white', position=position_dodge(width=0.5)) +
geom_hline(mapping = NULL, data = NULL, yintercept = 0, colour = "grey40", size = 0.5, linetype = "solid") +
geom_hline(mapping = NULL, data = NULL, yintercept = c(-0.15, -0.10, -0.05, 0.05, 0.10), colour = "grey85", size = 0.5, linetype = "longdash") +
theme(panel.background = element_rect(fill = "grey95")) +
scale_y_continuous('Coefficient') +
scale_shape_manual(values = c(19, 21)) +
# scale_colour_manual(values = c('black', 'black')) +
theme(panel.spacing = unit(1, "lines")) +
theme(legend.title = element_blank()) +
xlab(NULL) +
coord_flip() +
facet_grid(label ~ ., scales = "free", space = "free", switch = "x") +
theme(strip.text.y = element_blank()) +
theme(legend.text = element_text(face = 'bold'))