我想在一个图表中创建一个包含多列不同变量的R条形图。我只能使用以下代码执行2x2绘图:
barplot(table(y = cut$Gender,x = cut$Education))
即使如此,性别也会在教育之上叠加。
我的样本数据集是:
structure(list(Gender = c("Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Male", "Female", "Male", "Male", "Male", "Male",
"Female", "Male", "Female", "Male", "Male", "Male", "Male"),
Age = c("45-54 yrs", "35-44 yrs", "25-34 yrs", "25-34 yrs",
"25-34 yrs", "45-54 yrs", "25-34 yrs", "25-34 yrs", "25-34 yrs",
"35-44 yrs", "18-24 yrs", "25-34 yrs", "25-34 yrs", "55-64 yrs",
"35-44 yrs", "35-44 yrs", "35-44 yrs", "45-54 yrs", "35-44 yrs",
"45-54 yrs"), Employment = c("Civil servant", "Private sector",
"Private sector", "Private sector", "Trader", "Civil servant",
"Private sector", "Private sector", "Private sector", "Civil servant",
"Student", "Student", "Civil servant", "Retired", "Self-employed",
"Private sector", "Civil servant", "Civil servant", "Private sector",
"Private sector"), Marriage = c("Married", "Married", "Married",
"Married", "Single, never married", "Married", "Married",
"Married", "Married", "Married", "Single, never married",
"Single, never married", "Married", "Married", "Married",
"Married", "Married", "Married", "Married", "Married"), Education = c("Advanced degree",
"Advanced degree", "Bachelor's degree", "Bachelor's degree",
"Secondary education", "Advanced degree", "Bachelor's degree",
"Bachelor's degree", "Secondary education", "Secondary education",
"Secondary education", "Secondary education", "Advanced degree",
"Bachelor's degree", "Basic education", "Advanced degree",
"Advanced degree", "Advanced degree", "Advanced degree",
"Advanced degree"), Residence = c("Ashanti", "Ashanti", "Ashanti",
"Ashanti", "Ashanti", "Brong-Ahafo", "Brong-Ahafo", "Brong-Ahafo",
"Brong-Ahafo", "Brong-Ahafo", "Brong-Ahafo", "Brong-Ahafo",
"Central", "Central", "Eastern", "Greater Accra", "Greater Accra",
"Greater Accra", "Greater Accra", "Greater Accra"), Experience = c("Never",
"Never", "Never", "Never", "Never", "Never", "Never", "Never",
"Never", "Never", "Never", "Never", "Never", "Never", "Never",
"Never", "Never", "Never", "Never", "Never")), .Names = c("Gender",
"Age", "Employment", "Marriage", "Education", "Residence", "Experience"
), row.names = c(NA, 20L), class = "data.frame")
答案 0 :(得分:1)
这是一种方法:
首先将数据转换为长格式,其中一个包含来自melt
包的两个选项reshape
或来自gather
的{{1}}。在这里,我将使用tidyr
库
它加载了许多有用的包。
tidyverse
然后用ggplot2
制作条形图library(tidyverse)
df %>%
gather(variable, value)
要添加文字注释,我们会生成ggplot()+
geom_bar(aes(x = variable, fill = value), color = "black" , position = "stack", show.legend = FALSE)
图层,标签的位置将由geom_text
确定,stat = "count"
会计算与条形顶部对应的特殊变量..count..
,因为这样在情节上我们可以用vjust = 1
geom_text(stat = "count", aes(x = variable, label = value,
y = ..count..,
group = value),
position = "stack", vjust = 1)
要在y轴上添加百分比标签,通常为y = (..count..)/sum(..count..)
,但总和(.. count ..)是所有变量的计数总和,在这里不合适,因此最简单的解决方案是手动标记
scale_y_continuous(labels = c("0%", "25%", "50%", "75%", "100%"),
breaks = c(0, 5, 10, 15, 20))
它看起来如何:
library(tidyverse)
df %>%
gather(variable, value) %>%
ggplot()+
geom_bar(aes(x = variable, fill = value),
color = " black",
position = "stack", show.legend = FALSE)+
geom_text(stat = "count",
aes(x = variable,
label = value,
y = ..count..,
group = value),
position = "stack", vjust = 1) +
scale_y_continuous(labels = c("0%", "25%", "50%", "75%", "100%"),
breaks = c(0, 5, 10, 15, 20))
另一个选项是y = ..count../sum(..count..)*7
,因为有7个变量
df %>%
gather(variable, value) %>%
ggplot()+
geom_bar(aes(x = variable, y = ..count../sum(..count..)*7, fill = value), color= " black", position = "stack", show.legend = FALSE)+
geom_text(stat = "count", aes(x = variable, label = value, y = ..count../sum(..count..)*7, group = value), position = "stack", vjust = 1)+
scale_y_continuous(labels = scales::percent)+
ylab("")
相同的输出图
您甚至可以使用mutate with gsub和negative lookahead在标签中添加条件换行符
df %>%
gather(variable, value) %>%
mutate(label = gsub(" (?!yrs)", "\n", value, perl = T)) %>%
ggplot()+
geom_bar(aes(x = variable, y = ..count../sum(..count..)*7, fill = value), color= " black", position = "stack", show.legend = FALSE)+
geom_text(stat = "count", aes(x = variable, label = label, y = ..count../sum(..count..)*7, group = value), position = "stack", vjust = 1)+
scale_y_continuous(labels = scales::percent)+
ylab("")