Question

我有一个如下所示的数据集：

data <- data.frame(Day =c(rep(2,9),rep(5,9),rep(9,9)),Treat = 
rep(c("A","A","A","B","B","B","C","C","C"),3), Length = 
c(2,4,3,5,3,3,8,3,7,3,6,7,4,7,8,8,8,8,10,7,5,7,8,9,19,20,12), Width = 
seq(1,27,1))
head(data)

我想在ggplot中创建两个折线图，X轴上有“Day”，Y上有“Length”或“Width”。在这两个图上，我想要单独的线条用于每个“治疗”，每个点都有SE条。

我尝试将数据转换成长格式：

data_long <- melt(data, c(1,2), c(3,4))

是否有一种有效的方法来强制ggplot计算正确的汇总统计数据然后按组绘制？我尝试将“Day”作为整数和因子，并通过分别为“长度”和“宽度”熔化原始数据集。任何帮助非常感谢！

Answer 1

这是一个更简单的解决方案，有助于在绘图时防止重叠，并且它在错误栏中占据了更标准的95％置信区间：

require(ggplot2)

df <- data.frame(
  Day = c(rep(2,9),rep(5,9),rep(9,9)),
  Treat = rep(c("A","A","A","B","B","B","C","C","C"),3), 
  Length = c(2,4,3,5,3,3,8,3,7,3,6,7,4,7,8,8,8,8,10,7,5,7,8,9,19,20,12),
  Width = seq(1,27,1))

#Use aggregate function to find mean "Length" based on "Treat" and "Day"
plotDf <- aggregate(Length ~ Treat + Day, mean, na.rm=TRUE, data=df)

#Create function that uses 95% confidence interval to make SE bars
#Note: you can adjust CI by removing/replacing '1.96*' as needed.
find_se <- function(x) 1.96*sd(x,na.rm=TRUE)/sqrt(length(x))

#Use aggregate function to calculate SEs based on "Treat" and "Day"
plotDf$SE <- setNames(
  aggregate(Length ~ Treat + Day,find_se, data=df)[,"Length"],"SE")

#Plot (uses position_dodge to avoid overlap)
pd <- position_dodge(.2)
ggplot(data=plotDf, aes(x=Day,y=Length,colour=Treat)) +
       geom_line(position=pd) +
       geom_point(position=pd) +
       geom_errorbar(aes(ymin=Length-SE, ymax=Length+SE),
                     width=.3, position=pd)

图形输出：

Answer 2

我喜欢将summarySE()定义的here用于错误栏，我希望下面会生成您要查找的内容。

首先是摘要功能：

summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
  library(plyr)

  # New version of length which can handle NA's: if na.rm==T, don't count them
  length2 <- function (x, na.rm=FALSE) {
    if (na.rm) sum(!is.na(x))
    else       length(x)
  }

  # This does the summary. For each group's data frame, return a vector with
  # N, mean, and sd
  datac <- ddply(data, groupvars, .drop=.drop,
                 .fun = function(xx, col) {
                   c(N    = length2(xx[[col]], na.rm=na.rm),
                     mean = mean   (xx[[col]], na.rm=na.rm),
                     sd   = sd     (xx[[col]], na.rm=na.rm)
                   )
                 },
                 measurevar
  )

  # Rename the "mean" column    
  datac <- rename(datac, c("mean" = measurevar))

  datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean

  # Confidence interval multiplier for standard error
  # Calculate t-statistic for confidence interval: 
  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
  ciMult <- qt(conf.interval/2 + .5, datac$N-1)
  datac$ci <- datac$se * ciMult

  return(datac)
}

然后插入数据。

data <- data.frame(Day =c(rep(2,9),rep(5,9),rep(9,9)),Treat = 
rep(c("A","A","A","B","B","B","C","C","C"),3), Length = 
c(2,4,3,5,3,3,8,3,7,3,6,7,4,7,8,8,8,8,10,7,5,7,8,9,19,20,12), Width = 
seq(1,27,1))
summarized <- summarySE(data, measurevar="Length", groupvars=c("Treat", "Day"))
summarized

然后是ggplot本身。

ggplot(summarized, aes(x=Day, y=Length, colour=Treat)) + 
  geom_errorbar(aes(ymin=Length-se, ymax=Length+se), width=.1) +
  geom_line() +
  geom_point()

在ggplot2

2 个答案: