条形图显示没有NA值时的NA条

时间:2019-02-24 18:07:17

标签: r ggplot2

尽管我已将所有NA值都估算到了我的IncomeLev列中,并从精神健康中明确删除了所有NA值(这在我的堆叠条形图中),但我的可视化仍显示了NA条形图。

brfss2013$mentalHealth <- forcats::fct_explicit_na(brfss2013$mentalHealth, na_level = "Missing")
brfss2013$incomeLev <- as.factor(brfss2013$incomeLev) 
brfss2013 <- subset(brfss2013, !is.na(incomeLev))
brfss2013 %>%
  add_count(incomeLev) %>%
  rename(count_inc = n) %>% 
  count(incomeLev, mentalHealth, count_inc) %>%
  rename(count_mentalHealth = n) %>% 
  mutate(percent= count_mentalHealth / count_inc) %>%
  mutate(incomeLev = factor(incomeLev,
                            levels=c('0-$20k','25-$35k','35-$50k','50-$75k','>$75k')))%>%
  ggplot(aes(x= incomeLev,
             y= count_mentalHealth,
             group= mentalHealth)) + 
  xlab('Annual Income')+ylab('Number of People')+
  geom_bar(aes(fill=mentalHealth), 
           stat="identity",na.rm=TRUE)+ 
  # Using the scales package does the percent formatting for you
  geom_text(aes(label = scales::percent(percent)),position = position_stack(vjust = 0.5))+
  theme_minimal()

以下是我的数据示例:

brfss2013<-structure(list(incomeLev = structure(c(5L, 1L, 1L, 5L, 4L, 1L, 
1L, 4L, 1L, 3L), .Label = c(">$75k", "0-$20k", "25-$35k", "35-$50k", 
"50-$75"), class = "factor"), healtheat = c(4.66, 1.68, 2.37, 
1.85, 2.5, 3, 3.66, 4.27, 2.72, 1.72), X_age_g = structure(c(5L, 
4L, 5L, 5L, 6L, 4L, 3L, 5L, 4L, 6L), .Label = c("Age 18 to 24", 
"Age 25 to 34", "Age 35 to 44", "Age 45 to 54", "Age 55 to 64", 
"Age 65 or older"), class = "factor"), employ1 = structure(c(7L, 
1L, 1L, 7L, 7L, 1L, 1L, 7L, 7L, 5L), .Label = c("Employed for wages", 
"Self-employed", "Out of work for 1 year or more", "Out of work for less than 1 year", 
"A homemaker", "A student", "Retired", "Unable to work"), class = "factor"), 
    renthom1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
    1L), .Label = c("Own", "Rent", "Other arrangement"), class = "factor"), 
    sex = structure(c(2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L), .Label = c("Male", 
    "Female"), class = "factor"), physLev = structure(c(3L, 1L, 
    3L, 1L, 2L, 1L, 2L, 1L, 2L, 2L), .Label = c("0-200", "200-500", 
    "500-1000", "1000-2000", "2000-4000", "4000-10000", ">10000"
    ), class = "factor"), mentalHealth = structure(c(5L, 1L, 
    1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L), .Label = c("Excellent", 
    "Good", "Ok", "Bad", "Very Bad", "Missing"), class = "factor")), row.names = c(NA, 
10L), class = "data.frame")

0 个答案:

没有答案