ggplot2 summarySE绘制小数据集 - 使用geom_point缺少点

时间:2017-06-06 17:29:08

标签: r plot ggplot2 time-series

我正在使用如下的小数据集:

Thermo  Chl     Month
1.53    1.53    May
3.63    1.53    June
3.83    2.33    July
3.84    2.33    July
5.08           August
5.50           August
5.04    1.55    September
5.90    1.70    September
3.75           September
3.63    1.70    September
5.90    1.70    September
9.00    2.50    October
11.03   1.03    October
11.00   1.00    October
9.00    2.37    October
9.00    2.03    November
14.96   2.04    November
7.13    15.95   January
13.03   9.00    February
9.00    1.03    April
9.00    2.50    May
8.97    2.50    May
4.88    2.50    June
4.88    2.50    June
3.63    2.50    July
4.88    1.03    July
3.83    2.33    August
5.53    2.30    August
7.20    2.37    September
7.17    2.33    September
7.17    2.33    September
7.20    2.37    September
7.17    2.33    September
7.17    2.33    September
7.17    1.08    September
7.17    2.33    September
7.17    1.03    September
7.17    2.33    September
7.17    2.33    September
7.17    1.03    September
9.00    1.03    October
9.00    2.37    October
8.97    5.50    October
11.00   5.50    October
13.00   10.00   November
13.00   10.00   December
11.00   15.17   January
11.00   1.03    February
9.00    2.33    March
5.50    1.03    April
3.83    2.37    May
3.87    2.33    June
5.50    3.83    June
3.83    2.33    July
5.50    3.37    July
7.17    2.37    July
7.17    2.30    August
7.17    2.33    September
9.00    2.33    September
9.03    3.83    September
9.00    3.83    September
9.00    2.33    September
9.00    2.37    September
7.17    2.33    September
7.17    4.00    September
7.17    3.50    September
7.17    2.33    September
7.20    2.33    September
7.17    3.83    September
7.17    2.33    October
7.17    2.30    October
7.17    2.33    October
7.17    2.33    October
8.68    2.33    October
7.20    2.33    October
9.00    3.87    October
9.00    5.50    October
11.00   5.50    November
13.00   1.03    November
11.00   3.83    November
9.00    3.83    December
6.03    13.00   March
8.00    15.67   March
5.97    0.70    April
5.57    2.40    May
7.25    1.03    May
5.50    2.33    June
5.50    3.00    July
4.70    2.00    July
5.50    2.33    August
5.50    2.33    August
9.00    2.33    September
9.00    1.03    September
9.00    2.33    September
7.13    2.37    September
7.17    2.33    September
7.17    2.33    September
7.17    2.33    September
7.17    2.43    September
7.20    1.07    September
7.17    2.00    September
7.17    2.33    September
7.17    2.00    September
7.17    1.03    September
7.13    2.33    October
7.17    2.33    October
7.17    2.30    October
7.17    2.33    October
9.00    3.83    October
13.00   2.33    October
14.97   3.83    October
14.93   2.03    October

当我使用summarySE和ggplot2函数进行绘图时,如下所示:

phyto.maxchl$Month <- factor(phyto.maxchl$Month)

phyto.se <- summarySE(phyto.maxchl, measurevar="Thermocline", groupvars=c("Month"))

ggplot(phyto.se, aes(x=phyto.se$Month, y=phyto.se$Thermocline)) + 
  geom_point(position=position_dodge(),stat="identity") +
  geom_errorbar(aes(ymin=phyto.se$Thermocline-se, ymax=phyto.se$Thermocline+se),
                width=.2,            
                position=position_dodge(.9))+
  ylab("Thermocline Depth (m)")+
  xlab("Month")+
  theme_bw()+
  theme(axis.text.x=element_text(angle=70,size=15,vjust=0.60))+
  labs(title = "Thermocline Depth for 2013 - 2016")+
  scale_y_continuous(trans='reverse')+
  theme(legend.title=element_blank())

我错过了1月到4月的积分。这是因为我没有足够的数据点来满足计算标准误差所需的要求吗?

1 个答案:

答案 0 :(得分:0)

summarySE()如果遇到组内的任何NA,则会返回NA,除非提供na.rm = TRUE作为参数。

Rmisc::summarySE(df, "Chl", groupvars="Month")
#>        Month  N       Chl        sd        se         ci
#> 1      April  3  0.920000 0.1905256 0.1100000  0.4732918
#> 2     August  7        NA        NA        NA         NA
#> 3   December  2  6.915000 4.3628488 3.0850000 39.1986416
#> 4   February  2  5.015000 5.6356410 3.9850000 50.6342259
#> 5    January  2 15.560000 0.5515433 0.3900000  4.9554198
#> 6       July  9  2.362222 0.6471626 0.2157209  0.4974532
#> 7       June  6  2.503333 0.7439803 0.3037287  0.7807594
#> 8      March  3 10.333333 7.0584866 4.0752191 17.5342528
#> 9        May  6  2.055000 0.6229848 0.2543325  0.6537824
#> 10  November  6  4.071667 3.3103318 1.3514373  3.4739802
#> 11   October 24  2.747083 1.2873312 0.2627754  0.5435923
#> 12 September 42        NA        NA        NA         NA

相反:

Rmisc::summarySE(df, "Chl", groupvars="Month", na.rm = T)
#>        Month  N       Chl         sd          se          ci
#> 1      April  3  0.920000 0.19052559 0.110000000  0.47329180
#> 2     August  5  2.318000 0.01643168 0.007348469  0.02040262
#> 3   December  2  6.915000 4.36284884 3.085000000 39.19864161
#> 4   February  2  5.015000 5.63564105 3.985000000 50.63422587
#> 5    January  2 15.560000 0.55154329 0.390000000  4.95541985
#> 6       July  9  2.362222 0.64716261 0.215720870  0.49745322
#> 7       June  6  2.503333 0.74398029 0.303728680  0.78075943
#> 8      March  3 10.333333 7.05848662 4.075219149 17.53425280
#> 9        May  6  2.055000 0.62298475 0.254332460  0.65378240
#> 10  November  6  4.071667 3.31033181 1.351437301  3.47398018
#> 11   October 24  2.747083 1.28733118 0.262775376  0.54359228
#> 12 September 41  2.246098 0.75104220 0.117293086  0.23705817

这就是为什么你得到两个缺失的值。 NA s相对于1月和4月而不是正确的8月和9月这一事实可能是因为Month因子值与其标签之间存在差异。

levels(df$Month) <- month.name

df$Month <- factor(df$Month, levels = month.name)

应该修复它。

以下是您的确切代码,无NA个问题以及gggplot来电时的一些修正。特别是你不应该在df$Var中使用aes(),变量的名称就足够了。

df2 <- Rmisc::summarySE(df, "Chl", groupvars="Month", na.rm = T)

library(ggplot2)

ggplot(df2, aes(x=factor(Month, levels = month.name), y=Chl)) + 
  geom_point(position=position_dodge(),stat="identity") +
  geom_errorbar(aes(ymin=Chl-se, ymax=Chl+se),
                width=.2,            
                position=position_dodge(.9))+
  ylab("Thermocline Depth (m)")+
  xlab("Month")+
  theme_bw()+
  theme(axis.text.x=element_text(angle=70,size=15,vjust=0.60))+
  labs(title = "Thermocline Depth for 2013 - 2016")+
  scale_y_continuous(trans='reverse')+
  theme(legend.title=element_blank())
#> Warning: Width not defined. Set with `position_dodge(width = ?)`

为了完整起见,我将向您展示如何预先计算摘要,我们可以将计算结果保留为ggplot

library(ggplot2)


ggplot(df, aes(factor(Month, levels = month.name), Chl)) +
  stat_summary(geom = 'point', position = position_dodge(.9)) +
  stat_summary(geom = 'errorbar', width=.2,            
               position=position_dodge(.9))+
  ylab("Thermocline Depth (m)")+
  xlab("Month")+
  theme_bw()+
  theme(axis.text.x=element_text(angle=70,size=15,vjust=0.60))+
  labs(title = "Thermocline Depth for 2013 - 2016")+
  scale_y_continuous(trans='reverse')+
  theme(legend.title=element_blank())

数据:

df <- read.table(text = 'Thermo  Chl     Month
1.53    1.53    May
             3.63    1.53    June
             3.83    2.33    July
             3.84    2.33    July
             5.08     NA      August
             5.50     NA      August
             5.04    1.55    September
             5.90    1.70    September
             3.75     NA      September
             3.63    1.70    September
             5.90    1.70    September
             9.00    2.50    October
             11.03   1.03    October
             11.00   1.00    October
             9.00    2.37    October
             9.00    2.03    November
             14.96   2.04    November
             7.13    15.95   January
             13.03   9.00    February
             9.00    1.03    April
             9.00    2.50    May
             8.97    2.50    May
             4.88    2.50    June
             4.88    2.50    June
             3.63    2.50    July
             4.88    1.03    July
             3.83    2.33    August
             5.53    2.30    August
             7.20    2.37    September
             7.17    2.33    September
             7.17    2.33    September
             7.20    2.37    September
             7.17    2.33    September
             7.17    2.33    September
             7.17    1.08    September
             7.17    2.33    September
             7.17    1.03    September
             7.17    2.33    September
             7.17    2.33    September
             7.17    1.03    September
             9.00    1.03    October
             9.00    2.37    October
             8.97    5.50    October
             11.00   5.50    October
             13.00   10.00   November
             13.00   10.00   December
             11.00   15.17   January
             11.00   1.03    February
             9.00    2.33    March
             5.50    1.03    April
             3.83    2.37    May
             3.87    2.33    June
             5.50    3.83    June
             3.83    2.33    July
             5.50    3.37    July
             7.17    2.37    July
             7.17    2.30    August
             7.17    2.33    September
             9.00    2.33    September
             9.03    3.83    September
             9.00    3.83    September
             9.00    2.33    September
             9.00    2.37    September
             7.17    2.33    September
             7.17    4.00    September
             7.17    3.50    September
             7.17    2.33    September
             7.20    2.33    September
             7.17    3.83    September
             7.17    2.33    October
             7.17    2.30    October
             7.17    2.33    October
             7.17    2.33    October
             8.68    2.33    October
             7.20    2.33    October
             9.00    3.87    October
             9.00    5.50    October
             11.00   5.50    November
             13.00   1.03    November
             11.00   3.83    November
             9.00    3.83    December
             6.03    13.00   March
             8.00    15.67   March
             5.97    0.70    April
             5.57    2.40    May
             7.25    1.03    May
             5.50    2.33    June
             5.50    3.00    July
             4.70    2.00    July
             5.50    2.33    August
             5.50    2.33    August
             9.00    2.33    September
             9.00    1.03    September
             9.00    2.33    September
             7.13    2.37    September
             7.17    2.33    September
             7.17    2.33    September
             7.17    2.33    September
             7.17    2.43    September
             7.20    1.07    September
             7.17    2.00    September
             7.17    2.33    September
             7.17    2.00    September
             7.17    1.03    September
             7.13    2.33    October
             7.17    2.33    October
             7.17    2.30    October
             7.17    2.33    October
             9.00    3.83    October
             13.00   2.33    October
             14.97   3.83    October
             14.93   2.03    October', h = TRUE)