提供小提琴图的样本量

时间:2018-11-01 14:27:20

标签: r ggplot2

使用以下代码,我为大多数变量绘制了一个小提琴图,并添加了一些我没有足够信息的点。 我想在每个小提琴的右端添加样本大小,但是我一直无法找到一种方法。

#dataset
str(threats)
'data.frame':   60 obs. of  3 variables:
 $ threat         : Factor w/ 7 levels "weather","competition",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ Species        : Factor w/ 5 levels "Bank","Barn",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ effect.abs     : int  18 13 0 43 43 0 23 13 14 16 ...

#added to help 0 values with logarithmic axis scale
threats$effect.abs1<-threats$effect.abs+0.1 

#subset of data with insufficient info for violin plot
#plotted with geom_dotplot
threats.sub<-subset(threats, 
                    (threat=="competition") | 
                      (threat=="disease" & Species =="Barn") | 
                      (threat=="insect_availability") | 
                      (threat=="weather" & 
                         (Species=="Cliff" | Species=="Purple")) | 
                      (threat=="incidental_loss") |
                      (threat=="predation" & Species=="Bank"))

ggplot() +
  geom_dotplot(data=threats.sub, aes(x=Species, y=effect.abs1, fill=Species),
               binaxis='y', stackdir='center', binwidth =.1) +
  geom_violin(data=threats, aes(x=Species, y=effect.abs1, fill=Species)) +
  coord_flip() +
  facet_wrap(~threat, ncol=2, labeller = labeller(threat=facet.labels),
             strip.position = "left") +
  scale_y_log10(breaks=c(0.1,1,10,100), labels=c(0,1,10,100)) +
  labs(x=("Threat"), y=("Absolute effect on adult survival (%)")) +
  theme_bw() +
  theme(axis.text=element_text(size=9, colour="black"),
        axis.title=element_text(size=10, colour="black"),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.grid=element_blank(),
        panel.border=element_rect(colour="black", size=1),
        plot.margin=unit(c(.3,.3,.4,.4), "cm"),
        strip.background=element_rect(fill=NA, colour=NA), #element_blank(),
        legend.position="right")

enter image description here

我尝试使用以下解决方案(在其他问题中提供),仅导致出现错误消息。

give.n <- function(x){
  return(c(y = mean(x), label = length(x)))
}
stat_summary(fun.data = give.n, geom = "text") #added to ggplot code above
Error in if (empty(data)) { : missing value where TRUE/FALSE needed

对于此问题的任何帮助,我们将不胜感激。我希望找到一种方法来计算R的样本量(而不是我提供每个样本量),因为在生成此图时,我还会不断收到以下警告消息,我想再次检查所有数据显示正确。

Warning messages:
1: In max(data$density) : no non-missing arguments to max; returning -Inf
2: In max(data$density) : no non-missing arguments to max; returning -Inf
3: In max(data$density) : no non-missing arguments to max; returning -Inf

谢谢!


根据要求:

structure(list(threat = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
7L, 7L, 7L, 7L, 7L), .Label = c("weather", "competition", "incidental_loss", 
"contaminants", "insect_availability", "disease", "predation"
), class = "factor"), 
Species = structure(c(1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 5L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 
5L, 5L, 1L, 2L, 2L, 2L, 2L), .Label = c("Bank", "Barn", "Cliff", 
"Tree", "Purple"), class = "factor"), 
effect.abs = c(18L, 
13L, 0L, 43L, 43L, 0L, 23L, 13L, 14L, 16L, 18L, 29L, 0L, 40L, 
0L, 20L, 53L, 0L, 17L, 15L, 13L, 25L, 19L, 25L, 0L, 0L, 0L, 14L, 
20L, 0L, 0L, 0L, 0L, 4L, 1L, 0L, 1L, 1L, 1L, 1L, 12L, 0L, 30L, 
95L, 10L, 3L, 7L, 12L, 14L, 100L, 0L, 23L, 13L, 5L, 0L, 58L, 
20L, 4L, 9L, 0L)), row.names = c(NA, -60L), class = "data.frame")

2 个答案:

答案 0 :(得分:3)

解决此问题的方法是预先计算您的n 例如

summary_df <- df %>%
  group_by(threat, Species, effect.abs1) %>%
  summarise(n = n())

然后将其添加到图形中

+ geom_label(aes(x = 100, y = effect.abs1, label = n), data = summary_df)

答案 1 :(得分:0)

感谢@Jack Brookes的有用评论,让我开始对此感兴趣。这是我针对此问题的最终解决方案。

#first summarize n's for all data
summary_df_all <- threats %>%
  group_by(threat, Species) %>%
  summarise(n = n(), maxE=max(effect.abs1))

#next summarize n's for the subset of data I'm not interested in getting the n's for
summary_df_sub <- threats.sub %>%
  group_by(threat, Species) %>%
  summarise(n = n(), maxE=max(effect.abs1)) %>% 
  mutate(probability = 0)

#combine these summaries, and filter out the points that will not be displayed
summary_df_violin <- left_join(summary_df_all, summary_df_sub, 
                                by = c("threat", "Species")) %>% 
   mutate(probability = ifelse(is.na(probability), 1, 
                              probability)) %>% filter(probability > 0)

#and plot
ggplot() +
  geom_dotplot(data=threats.sub, aes(x=Species, y=effect.abs1, colour=Species, fill=Species), 
               binaxis='y', stackdir='center', binwidth =.09) +
  geom_violin(data=threats, aes(x=Species, y=effect.abs1, colour=Species, fill=Species), size=1.1) +
  #geom_label(aes(x=100, y=effect.abs1, label=n), data=summary_df)
  geom_text(data=summary_df_violin, aes(y=maxE.x, x=Species, label=n.x), nudge_y=.2) +
  coord_flip() +
  facet_wrap(~threat, ncol=2, labeller = labeller(threat=facet.labels),
             strip.position = "left") +
  scale_y_log10(breaks=c(0.1,1,10,100), labels=c(0,1,10,100)) +
  labs(x=("Threat"), y=("Absolute effect on adult survival (%)")) +
  theme_bw() +
  theme(axis.text=element_text(size=9, colour="black"),
        axis.title=element_text(size=10, colour="black"),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.grid=element_blank(),
        panel.border=element_rect(colour="black", size=1),
        plot.margin=unit(c(.3,.3,.4,.4), "cm"),
        strip.background=element_rect(fill=NA, colour=NA), 
        strip.text=element_text(size=9, colour="black"), 
        legend.position="right")

enter image description here