Question

我想从多次试验的曲线估计“平均曲线”。我在使用approx（）之前已经完成了这个，但是后来我有一组固定的x轴值来测量y。

在此数据集中，x和y都混合了值（即，没有固定的x值，其中y已经过测量）。相反，每个试验的x值都不同。

有没有办法在这些情况下平均曲线（标准误差）？

或者：如何从不同的曲线中提取y值（对于一组固定的x值）并构造一个新的数据帧？

我提供了一个样本数据集（已融化） - 以及用于绘制各个试验曲线的代码。 P1，P2，P3，P4，P5各个试验的名称/ ID

    > dput(head(dat,74))
structure(list(ID = structure(c(7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
11L, 11L, 11L, 11L, 11L, 11L, 11L), .Label = c("LCRA_P1", "LCRA_P2", 
"LCRA_P3", "LCRA_P4", "LCRA_P5", "LCRA_P6", "P1", "P2", "P3", 
"P4", "P5"), class = "factor"), Time = c(170L, 452L, 572L, 692L, 
812L, 932L, 1052L, 1172L, 1292L, 1412L, 1532L, 1652L, 1772L, 
1892L, 2012L, 2132L, 2252L, 54L, 290L, 410L, 530L, 650L, 770L, 
890L, 1010L, 1130L, 1250L, 1370L, 1490L, 1610L, 1730L, 1850L, 
1970L, 115L, 235L, 355L, 475L, 595L, 715L, 835L, 955L, 1075L, 
1195L, 1315L, 1435L, 1555L, 1675L, 1795L, 135L, 201L, 321L, 441L, 
561L, 681L, 801L, 921L, 1041L, 1161L, 1281L, 1401L, 100L, 251L, 
371L, 431L, 491L, 611L, 731L, 791L, 851L, 911L, 971L, 1031L, 
1091L, 1151L), I = c(154.5066034, 138.3819058, 104.8425346, 61.6283449, 
40.34374398, 35.18384073, 29.37894957, 40.34374398, 44.85865933, 
27.44398585, 31.9589012, 41.6337198, 54.53347792, 64.20829652, 
70.65817559, 66.78824815, 66.78824815, 154.5066034, 90.00781278, 
73.88311512, 62.2733328, 61.6283449, 57.75841746, 53.24350211, 
48.08359886, 55.17846583, 51.30853839, 42.92369561, 53.24350211, 
50.66355049, 54.53347792, 38.40878026, 54.53347792, 154.5066034, 
73.88311512, 62.2733328, 61.6283449, 57.75841746, 53.24350211, 
48.08359886, 55.17846583, 51.30853839, 42.92369561, 38.40878026, 
54.53347792, 37.79284177, 35.21289014, 39.08281758, 154.5066034, 
129.997063, 84.84790953, 51.30853839, 40.98873189, 33.24887701, 
29.37894957, 27.44398585, 33.24887701, 33.89386492, 31.9589012, 
31.9589012, 135.1569662, 85.49289744, 48.08359886, 48.08359886, 
22.2840826, 27.44398585, 49.37357467, 51.30853839, 31.9589012, 
28.73396167, 23.57405841, 21.63909469, 9.384324471, 25.50902213
)), .Names = c("ID", "Time", "I"), row.names = c(NA, 74L), class = "data.frame")

（包括绘图代码）

> ggplot(dat,aes(x=Time, y = I, colour=ID)+
geom_point()+
labs(x="Time (Seconds)", y ="Infiltration (mm/hour)")+
scale_x_continuous(breaks=seq(0,2500,100))+
scale_y_continuous(breaks=seq(0,160,10))+
geom_line(aes(group=ID))

平均而言，我使用了这个：

ggplot(df2,aes(x=Time, y=I))+
  stat_summary(fun.data="mean_se",mult=1, geom="smooth")

结果（下图）没有任何意义。

timers

Answer 1

我仍然不确定你想要的确切输出是什么，但这里有一些你可以适应的简单例子。当你制作aes时，我认为你的geom_smooth中仍然有颜色或群组，这就是为什么你有很多行。如果您想要不同ID的行或点或任何其他geom，但是想要一条平均所有 ID的平滑线，则需要将获取颜色或组的内容与不需要的内容分开

研究stat_smooth的参数 - 你可以做很多事情来指定它绘制的曲线，包括方法和公式，以及取决于方法的参数。注意（从输出geom_smooth给出），少量观察的默认值是黄土曲线，这可能是您正在寻找的平均类型。

以下是您可能想要采取此操作的示例：

library(ggplot2)

ggplot(df, aes(x = Time, y = I)) +
  geom_point(aes(color = ID)) +
  geom_smooth()
#> `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(df, aes(x = Time, y = I)) +
  geom_point(aes(color = ID)) +
  geom_smooth(se = F, method = lm)

ggplot(df, aes(x = Time, y = I)) +
  geom_line(aes(group = ID), alpha = 0.5) +
  geom_smooth(size = 0.8, se = F, span = 0.2)
#> `geom_smooth()` using method = 'loess' and formula 'y ~ x'

由reprex package（v0.2.0）创建于2018-06-14。

基于R中的多个曲线/数据集绘制平均曲线

1 个答案: