从ggplot中的频率分布中删除缺失值

时间:2014-06-21 18:53:07

标签: r ggplot2 rstudio frequency-distribution stackedbarseries

我的数据

dsL<-readRDS("./Data/Derived/dsL.rds")
# color palette for the outcome
attcol8<-c("Never"="#4575b4",
           "Once or Twice"="#74add1",
           "Less than once/month"="#abd9e9",
           "About once/month"="#e0f3f8",
           "About twice/month"="#fee090",
           "About once/week"="#fdae61",
           "Several times/week"="#f46d43",
           "Everyday"="#d73027")

# view for one respondent
print (dsL[dsL$id==1,c("id","year","attend","attendF")])

   id year attend         attendF
1   1 1997     NA            <NA>
2   1 1998     NA            <NA>
3   1 1999     NA            <NA>
4   1 2000      1           Never
5   1 2001      6 About once/week
6   1 2002      2   Once or Twice
7   1 2003      1           Never
8   1 2004      1           Never
9   1 2005      1           Never
10  1 2006      1           Never
11  1 2007      1           Never
12  1 2008      1           Never
13  1 2009      1           Never
14  1 2010      1           Never
15  1 2011      1           Never

为每个测量波创建频率分布 有:

ds<- dsL
p<-ggplot(ds, aes(x=yearF, fill=attendF))
p<-p+ geom_bar(position="fill")
p<-p+ scale_fill_manual(values = attcol8,
                    name="Response category" )
p<-p+ scale_y_continuous("Prevalence: proportion of total",
                     limits=c(0, 1),
                     breaks=c(.1,.2,.3,.4,.5,.6,.7,.8,.9,1))
p<-p+ scale_x_discrete("Waves of measurement",
                   limits=as.character(c(2000:2011)))
p<-p+ labs(title=paste0("In the past year, how often have you attended a worship service?"))
p

enter image description here

缺失值用于计算要显示的总响应 研究中的自然减员。承认消耗不是 与结果测量显着相关,我们可以删除缺失 来自计算总回应的值并看一下 每个响应在每个时间点都得到认可的百分比。

问题是

可以做什么产生我刚刚描述的图表?并且做得最多 有效率的?我在各个地方试过na.rm = TRUE,但它没有去 特技。有什么想法吗?

ds<- dsL
### ???
p<-ggplot(ds, aes(x=yearF, fill=attendF))
p<-p+ geom_bar(position="fill")
p<-p+ scale_fill_manual(values = attcol8,
                    name="Response category" )
p<-p+ scale_y_continuous("Prevalence: proportion of total",
                     limits=c(0, 1),
                     breaks=c(.1,.2,.3,.4,.5,.6,.7,.8,.9,1))
p<-p+ scale_x_discrete("Waves of measurement",
                   limits=as.character(c(2000:2011)))
p<-p+ labs(title=paste0("In the past year, how often have you attended a worship service?"))
#p

更新

在@MrFlick解决方案之后:

ds<- dsL
p<-ggplot(subset(ds, !is.na(attendF)), aes(x=yearF, fill=attendF))
p<-p+ geom_bar(position="fill")
p<-p+ scale_fill_manual(values = attcol8,
                    name="Response category" )
p<-p+ scale_y_continuous("Prevalence: proportion of total",
                     limits=c(0, 1),
                     breaks=c(.1,.2,.3,.4,.5,.6,.7,.8,.9,1))
p<-p+ scale_x_discrete("Waves of measurement",
                   limits=as.character(c(2000:2011)))
p<-p+ labs(title=paste0("In the past year, how often have you attended a worship service?"))
#p

enter image description here

1 个答案:

答案 0 :(得分:3)

放置它们的最简单的地方是为剧情设置数据集

p <- ggplot(subset(ds, !is.na(attendF)), aes(x=yearF, fill=attendF))

我在这里创建了一些示例数据(这对初始问题很有帮助)并在子集化后重新运行绘图命令

ds<-data.frame(
    id=rep(1:100, each=4),
    yearF=factor(rep(2001:2004, 100)),
    attendF=sample(1:8, 400, T, c(.2,.2,.15,.10,.10, .20, .15, .02))
)
ds[sample(which(ds$year==2002), 5), "attendF"]<-NA
ds[sample(which(ds$year==2003), 15), "attendF"]<-NA
ds[sample(which(ds$year==2004), 40), "attendF"]<-NA

attcol8<-c("Never"="#4575b4",
           "Once or Twice"="#74add1",
           "Less than once/month"="#abd9e9",
           "About once/month"="#e0f3f8",
           "About twice/month"="#fee090",
           "About once/week"="#fdae61",
           "Several times/week"="#f46d43",
           "Everyday"="#d73027")
ds$attendF<-factor(ds$attendF, levels=1:8, labels=names(attcol8))

library(ggplot2)
p<-ggplot(subset(ds, !is.na(attendF)), aes(x=yearF, fill=attendF))
p<-p+ geom_bar(position="fill")
p<-p+ scale_fill_manual(values = attcol8,
                    name="Response category" )
p<-p+ scale_y_continuous("Prevalence: proportion of total",
                     limits=c(0, 1),
                     breaks=c(.1,.2,.3,.4,.5,.6,.7,.8,.9,1))
p<-p+ scale_x_discrete("Waves of measurement",
                   limits=as.character(c(2001:2004)))
p<-p+ labs(title=paste0("In the past year, how often have you attended a worship service?"))
p

这给出了以下情节

enter image description here