我有一个如下所示的数据集:
data <- data.frame(Day =c(rep(2,9),rep(5,9),rep(9,9)),Treat =
rep(c("A","A","A","B","B","B","C","C","C"),3), Length =
c(2,4,3,5,3,3,8,3,7,3,6,7,4,7,8,8,8,8,10,7,5,7,8,9,19,20,12), Width =
seq(1,27,1))
head(data)
我想在ggplot中创建两个折线图,X轴上有“Day”,Y上有“Length”或“Width”。在这两个图上,我想要单独的线条用于每个“治疗”,每个点都有SE条。
我尝试将数据转换成长格式:
data_long <- melt(data, c(1,2), c(3,4))
是否有一种有效的方法来强制ggplot计算正确的汇总统计数据然后按组绘制?我尝试将“Day”作为整数和因子,并通过分别为“长度”和“宽度”熔化原始数据集。任何帮助非常感谢!
答案 0 :(得分:1)
这是一个更简单的解决方案,有助于在绘图时防止重叠,并且它在错误栏中占据了更标准的95%置信区间:
require(ggplot2)
df <- data.frame(
Day = c(rep(2,9),rep(5,9),rep(9,9)),
Treat = rep(c("A","A","A","B","B","B","C","C","C"),3),
Length = c(2,4,3,5,3,3,8,3,7,3,6,7,4,7,8,8,8,8,10,7,5,7,8,9,19,20,12),
Width = seq(1,27,1))
#Use aggregate function to find mean "Length" based on "Treat" and "Day"
plotDf <- aggregate(Length ~ Treat + Day, mean, na.rm=TRUE, data=df)
#Create function that uses 95% confidence interval to make SE bars
#Note: you can adjust CI by removing/replacing '1.96*' as needed.
find_se <- function(x) 1.96*sd(x,na.rm=TRUE)/sqrt(length(x))
#Use aggregate function to calculate SEs based on "Treat" and "Day"
plotDf$SE <- setNames(
aggregate(Length ~ Treat + Day,find_se, data=df)[,"Length"],"SE")
#Plot (uses position_dodge to avoid overlap)
pd <- position_dodge(.2)
ggplot(data=plotDf, aes(x=Day,y=Length,colour=Treat)) +
geom_line(position=pd) +
geom_point(position=pd) +
geom_errorbar(aes(ymin=Length-SE, ymax=Length+SE),
width=.3, position=pd)
图形输出:
答案 1 :(得分:0)
我喜欢将summarySE()
定义的here用于错误栏,我希望下面会生成您要查找的内容。
首先是摘要功能:
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
conf.interval=.95, .drop=TRUE) {
library(plyr)
# New version of length which can handle NA's: if na.rm==T, don't count them
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}
# This does the summary. For each group's data frame, return a vector with
# N, mean, and sd
datac <- ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
mean = mean (xx[[col]], na.rm=na.rm),
sd = sd (xx[[col]], na.rm=na.rm)
)
},
measurevar
)
# Rename the "mean" column
datac <- rename(datac, c("mean" = measurevar))
datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
# Confidence interval multiplier for standard error
# Calculate t-statistic for confidence interval:
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
ciMult <- qt(conf.interval/2 + .5, datac$N-1)
datac$ci <- datac$se * ciMult
return(datac)
}
然后插入数据。
data <- data.frame(Day =c(rep(2,9),rep(5,9),rep(9,9)),Treat =
rep(c("A","A","A","B","B","B","C","C","C"),3), Length =
c(2,4,3,5,3,3,8,3,7,3,6,7,4,7,8,8,8,8,10,7,5,7,8,9,19,20,12), Width =
seq(1,27,1))
summarized <- summarySE(data, measurevar="Length", groupvars=c("Treat", "Day"))
summarized
然后是ggplot本身。
ggplot(summarized, aes(x=Day, y=Length, colour=Treat)) +
geom_errorbar(aes(ymin=Length-se, ymax=Length+se), width=.1) +
geom_line() +
geom_point()