尽管我已将所有NA值都估算到了我的IncomeLev列中,并从精神健康中明确删除了所有NA值(这在我的堆叠条形图中),但我的可视化仍显示了NA条形图。
brfss2013$mentalHealth <- forcats::fct_explicit_na(brfss2013$mentalHealth, na_level = "Missing")
brfss2013$incomeLev <- as.factor(brfss2013$incomeLev)
brfss2013 <- subset(brfss2013, !is.na(incomeLev))
brfss2013 %>%
add_count(incomeLev) %>%
rename(count_inc = n) %>%
count(incomeLev, mentalHealth, count_inc) %>%
rename(count_mentalHealth = n) %>%
mutate(percent= count_mentalHealth / count_inc) %>%
mutate(incomeLev = factor(incomeLev,
levels=c('0-$20k','25-$35k','35-$50k','50-$75k','>$75k')))%>%
ggplot(aes(x= incomeLev,
y= count_mentalHealth,
group= mentalHealth)) +
xlab('Annual Income')+ylab('Number of People')+
geom_bar(aes(fill=mentalHealth),
stat="identity",na.rm=TRUE)+
# Using the scales package does the percent formatting for you
geom_text(aes(label = scales::percent(percent)),position = position_stack(vjust = 0.5))+
theme_minimal()
以下是我的数据示例:
brfss2013<-structure(list(incomeLev = structure(c(5L, 1L, 1L, 5L, 4L, 1L,
1L, 4L, 1L, 3L), .Label = c(">$75k", "0-$20k", "25-$35k", "35-$50k",
"50-$75"), class = "factor"), healtheat = c(4.66, 1.68, 2.37,
1.85, 2.5, 3, 3.66, 4.27, 2.72, 1.72), X_age_g = structure(c(5L,
4L, 5L, 5L, 6L, 4L, 3L, 5L, 4L, 6L), .Label = c("Age 18 to 24",
"Age 25 to 34", "Age 35 to 44", "Age 45 to 54", "Age 55 to 64",
"Age 65 or older"), class = "factor"), employ1 = structure(c(7L,
1L, 1L, 7L, 7L, 1L, 1L, 7L, 7L, 5L), .Label = c("Employed for wages",
"Self-employed", "Out of work for 1 year or more", "Out of work for less than 1 year",
"A homemaker", "A student", "Retired", "Unable to work"), class = "factor"),
renthom1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L), .Label = c("Own", "Rent", "Other arrangement"), class = "factor"),
sex = structure(c(2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L), .Label = c("Male",
"Female"), class = "factor"), physLev = structure(c(3L, 1L,
3L, 1L, 2L, 1L, 2L, 1L, 2L, 2L), .Label = c("0-200", "200-500",
"500-1000", "1000-2000", "2000-4000", "4000-10000", ">10000"
), class = "factor"), mentalHealth = structure(c(5L, 1L,
1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L), .Label = c("Excellent",
"Good", "Ok", "Bad", "Very Bad", "Missing"), class = "factor")), row.names = c(NA,
10L), class = "data.frame")