我试图在ggplot2中制作的geom_segment
图中添加使用geom_density_ridges
的均值。
library(dplyr)
library(ggplot2)
library(ggridges)
Fig1 <- ggplot(Figure3Data, aes(x = `hairchange`, y = `EffortGroup`)) +
geom_density_ridges_gradient(aes(fill = ..x..), scale = 0.9, size = 1)
ingredients <- ggplot_build(Fig1) %>% purrr::pluck("data", 1)
density_lines <- ingredients %>%
group_by(group) %>% filter(density == mean(density)) %>% ungroup()
p <- ggplot(Figure3Data, aes(x = `hairchange`, y = `EffortGroup`)) +
geom_density_ridges_gradient(aes(fill = ..x..), scale = 0.9, size = 1) +
scale_fill_gradientn( colours = c("#0000FF", "#FFFFFF", "#FF0000"),name =
NULL, limits=c(-2,2))+ coord_flip() +
theme_ridges(font_size = 20, grid=TRUE, line_size=1,
center_axis_labels=TRUE) +
scale_x_continuous(name='Average Self-Perceived Hair Change', limits=c(-2,2))+
ylab('Total SSM Effort (hours)')+
geom_segment(data =density_lines,
aes(x = x, y = ymin, xend = x, yend = ymin+density*scale*iscale))
print(p)
但是,我收到“错误:data
必须具有唯一的名称,但是具有重复的元素”。下面是一个没有我所拥有的数据集的图。有关如何修复代码的任何建议?
前35行数据如下:
structure(list(MonthsMassage = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
2, 2, 1, 1), MinutesPerDayMassage = c("0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "11-20 minutes daily",
"11-20 minutes daily", "11-20 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "11-20 minutes daily", "11-20 minutes daily"
), Minutes = c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15),
hairchange = c(-1, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0, -1,
-1, 0, 0, -1, 0, -1, 0, -1, -1, -1, -1, -1, 0, -1, -1, -1,
0, 1, -1, 0, 0, -1, 0), HairType1 = c("Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "Templefrontal",
"Templefrontal", "other", "Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "other",
"other", "other", "Templefrontal", "Templefrontal", "other",
"Templefrontal", "other", "Templefrontal", "Templefrontal"
), HairType2 = c("other", "other", "other", "other", "other",
"other", "other", "other", "other", "Vertexthinning", "Vertexthinning",
"other", "Vertexthinning", "other", "other", "Vertexthinning",
"other", "Vertexthinning", "Vertexthinning", "other", "other",
"other", "Vertexthinning", "other", "Vertexthinning", "other",
"other", "other", "other", "other", "other", "Vertexthinning",
"other", "other", "other"), HairType3 = c("other", "Diffusethinning",
"other", "Diffusethinning", "other", "other", "Diffusethinning",
"Diffusethinning", "Diffusethinning", "other", "Diffusethinning",
"Diffusethinning", "other", "other", "Diffusethinning", "Diffusethinning",
"other", "Diffusethinning", "Diffusethinning", "Diffusethinning",
"other", "other", "other", "other", "other", "other", "other",
"other", "other", "Diffusethinning", "other", "other", "other",
"other", "other"), Effort = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5,
2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 5, 5, 5, 5, 5, 7.5, 7.5), EffortGroup = c("<5",
"<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5",
"<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5",
"<5", "<5", "<5", "<5", "<5", "<5", "<5", "12.5", "12.5",
"12.5", "12.5", "12.5", "12.5", "12.5")), row.names = c(NA,
-35L), class = c("tbl_df", "tbl", "data.frame"))
答案 0 :(得分:2)
如果我理解正确,OP希望在密度等于每个脊线平均密度的位置绘制一条水平线。
表达式
density_lines <- ingredients %>%
group_by(group) %>% filter(density == mean(density)) %>% ungroup()
返回一个空的数据集,因为没有记录,其中density
的值与mean(density)
完全匹配。
但是,它确实可以针对整体最大值(但不适用于所有局部最大值)
density_lines <- ingredients %>%
group_by(group) %>% filter(density == max(density)) %>% ungroup()
给出
由于不存在完全匹配的内容,因此可以选择最接近的值
density_lines <- ingredients %>%
group_by(group) %>%
top_n(1, -abs(density - mean(density)))
绘制为
这将为每个脊线绘制一个线段,但我们希望在每个曲线分支中看到4个线段(那些相邻峰的最大值大于平均值的线段)。与
density_lines <- ingredients %>%
group_by(group) %>%
top_n(4, -abs(density - mean(density)))
我们得到
您可以在n
处使用top_n()
参数,但恕我直言,正确的方法是将每个峰线从峰到谷以及从峰到谷进行分组,以便为每条曲线获得一个分段分支。
或者,我们可以使用near()
函数进行过滤。此函数需要指定一个公差tol
,我们需要从数据集中进行计算:
density_lines <- ingredients %>%
group_by(group) %>%
filter(near(
density, mean(density),
tol = ingredients %>% summarise(0.25 * max(abs(diff(density)))) %>% pull()
))
对于经过精心选择的因素0.25
(尝试和错误),我们确实获得了
似乎我误解了OP的意图。现在,我们将尝试使用mean(density)
在geom_hline
处绘制一条垂直线(与coord_flip()
,geom_hline()
一起创建一条垂直线)。
同样,我们遵循OP的巧妙方法,从创建的图块中提取密度和比例因子。
# create plot object
Fig1 <- ggplot(Figure3Data, aes(x = hairchange, y = EffortGroup)) +
geom_density_ridges_gradient(aes(fill = ..x..), scale = 0.9, size = 1) +
scale_fill_gradientn(
colours = c("#0000FF", "#FFFFFF", "#FF0000"),
name =
NULL,
limits = c(-2, 2)
) + coord_flip() +
theme_ridges(
font_size = 20,
grid = TRUE,
line_size = 1,
center_axis_labels = TRUE
) +
scale_x_continuous(name = 'Average Self-Perceived Hair Change', limits =
c(-2, 2)) +
ylab('Total SSM Effort (hours)')
# extract plot data and summarise
mean_density <-
ggplot_build(Fig1) %>%
purrr::pluck("data", 1) %>%
group_by(group) %>%
summarise(density = mean(density), scale = first(scale), iscale = first(iscale))
# add hline and plot
Fig1 +
geom_hline(aes(yintercept = group + density * scale * iscale),
data = mean_density)
OP的clarified
我想要的是每个人的平均自我感知的头发变化(y轴数据) 10条山脊线中的
这可以通过以下步骤实现:
EffortGroup
的平均自我感知的头发变化。approx()
每个EffortGroup
的平均自我感觉到的头发变化由
Figure3Data %>%
group_by(EffortGroup) %>%
summarise(x_mean = mean(hairchange))
产生(对于OP数据的发布的子集):
EffortGroup x_mean <chr> <dbl> 1 <5 -0.643 2 12.5 -0.143
所有步骤在一起
# create plot object
Fig1 <- ggplot(Figure3Data, aes(x = hairchange, y = EffortGroup)) +
geom_density_ridges_gradient(aes(fill = ..x..), scale = 0.9, size = 1) +
scale_fill_gradientn(
colours = c("#0000FF", "#FFFFFF", "#FF0000"),
name = NULL,
limits = c(-2, 2)) +
coord_flip() +
theme_ridges(
font_size = 20,
grid = TRUE,
line_size = 1,
center_axis_labels = TRUE) +
scale_x_continuous(name = 'Average Self-Perceived Hair Change',
limits = c(-2, 2)) +
ylab('Total SSM Effort (hours)')
density_lines <-
Figure3Data %>%
group_by(EffortGroup) %>%
summarise(x_mean = mean(hairchange)) %>%
mutate(group = as.integer(factor(EffortGroup))) %>%
left_join(ggplot_build(Fig1) %>% purrr::pluck("data", 1),
on = "group") %>%
group_by(group) %>%
summarise(x_mean = first(x_mean),
density = approx(x, density, first(x_mean))$y,
scale = first(scale),
iscale = first(iscale))
# add segments and plot
Fig1 +
geom_segment(aes(x = x_mean,
y = group,
xend = x_mean,
yend = group + density * scale * iscale),
data = density_lines)
OP具有asked来适当地重新排列水平轴。这可以通过将EffortGroup
从类型character
强制转换为factor
来完成,其中因子级别是按预期顺序明确指定的:
# turn EffortGroup into factor with levels in desired order
lvls <- c("<5", "12.5", "22.5", "35", "50", "75", "105", "152", "210", "210+")
Figure3Data <-
Figure3Data %>%
mutate(EffortGroup = factor(EffortGroup, levels = lvls))
或者,可以通过
从给定的EffortGroup
值中直接导出Effort
# create Effort Group from scratch
lvls <- c("<5", "12.5", "22.5", "35", "50", "75", "105", "152", "210", "210+")
brks <- c(-Inf, 5, 12.5, 22.5, 35, 50, 75, 105, 152, 210, Inf)
Figure3Data <-
Figure3Data %>%
mutate(EffortGroup = cut(Effort, brks, lvls, right = FALSE))
无论如何,由于density_lines
已经是一个因素,因此必须修改EffortGroup
的计算:
density_lines <-
Figure3Data %>%
group_by(EffortGroup) %>%
summarise(x_mean = mean(hairchange)) %>%
mutate(group = as.integer(EffortGroup)) %>% # remove call to factor() here
left_join( ...
使用OP (link)提供的完整数据集,绘图最终变为
每个EffortGroup
的平均自我感知的头发变化的位置由
Figure3Data %>%
group_by(EffortGroup) %>%
summarise(x_mean = mean(hairchange))
# A tibble: 10 x 2 EffortGroup x_mean <fct> <dbl> 1 <5 -0.643 2 12.5 -0.393 3 22.5 -0.118 4 35 -0.0606 5 50 0.286 6 75 0 7 105 0.152 8 152 0.167 9 210 0.379 10 210+ 0.343