我有农场的产量数据(自变量)和各种营养素作为预测因子。我使用lm(y ~ ploy(x,3))
进行了单变量(立方)线性回归。然后我将预测变量(P)与产量相对应,并添加了最佳拟合曲线(图1)。然后,我如何找到该曲线的局部最大值,并在我的图中添加一个点,其中包括该拟合屈服的值(图2)?我查看了quantmod package中的findPeaks()
函数,但未能实现。
我的代码:
head(farm)
P <- farm$P
Yield <- farm$Yield
fit <- lm(Yield ~ poly(P,3), data=farm)
plot(P, Yield, col="lightblue", xlab = "P", ylab = "Yield", main="Farm - polynomial fit")
lines(sort(P), fitted(fit)[order(P)], col='darkred', type='l', cex=10)
我的数据:
farm <- structure(list(Yield = c(28.3818, 27.0422555556, 31.5444454545,
32.2084818182, 25.983, 43.49634, 53.3981333333, 59.19274, 61.23185,
63.83512, 64.5388, 63.08576, 63.83954, 62.7838333333, 64.10366,
67.5600666667, 67.6325, 68.0023, 63.02148, 64.0025666667, 64.19196,
60.4893, 65.2803333333, 62.28096, 59.1914, 59.37304, 58.6482333333,
58.08474, 58.98306, 61.52755, 61.9972, 62.0188833333, 61.09072,
58.44715, 61.06646, 59.6103833333, 60.57035, 67.5646, 67.85488,
71.5719571429, 50.30092, 22.80535, 62.26542, 62.0754333333, 58.46464,
62.7326833333, 65.53482, 63.3064666667, 63.68004, 63.5212166667,
64.65068, 66.19655, 66.0726, 66.6404666667, 65.02208, 62.1035666667,
61.78824, 61.0844166667, 60.1723, 61.6899333333, 57.8784166667,
56.8886666667, 59.13944, 57.26695, 69.5792666667, 61.9865166667,
55.7342, 53.7012857143, 56.9748166667, 56.8706333333, 61.3384166667,
52.87725, 29.2331888889, 15.5046, 42.943, 44.590325, 50.09525,
52.68065, 53.0983714286, 19.43875, 38.06708, 59.9217666667, 62.0287166667,
66.59496, 64.3986333333, 64.4089333333, 64.6951, 63.8205, 63.6122,
62.51384, 63.2565666667, 62.47745, 61.42234, 62.6233166667, 62.0730333333,
60.81996, 60.6490833333, 58.4331333333, 59.94638, 61.4119333333
), P = c(90L, 93L, 97L, 97L, 100L, 106L, 107L, 114L, 118L, 120L,
121L, 121L, 120L, 120L, 121L, 115L, 116L, 113L, 101L, 90L, 85L,
84L, 85L, 85L, 83L, 82L, 82L, 82L, 82L, 84L, 84L, 85L, 84L, 87L,
87L, 88L, 88L, 92L, 93L, 95L, 80L, 67L, 85L, 90L, 88L, 90L, 91L,
91L, 91L, 91L, 91L, 92L, 88L, 78L, 73L, 71L, 71L, 71L, 71L, 71L,
69L, 69L, 69L, 71L, 75L, 71L, 71L, 78L, 82L, 84L, 84L, 77L, 64L,
51L, 99L, 104L, 107L, 109L, 107L, 99L, 102L, 115L, 120L, 121L,
121L, 120L, 121L, 112L, 111L, 101L, 101L, 89L, 89L, 85L, 84L,
83L, 83L, 82L, 83L, 83L), Mg = c(666L, 667L, 668L, 668L, 668L,
670L, 670L, 671L, 672L, 672L, 673L, 673L, 673L, 673L, 673L, 645L,
645L, 636L, 594L, 553L, 535L, 534L, 534L, 534L, 534L, 534L, 534L,
534L, 534L, 534L, 534L, 543L, 540L, 570L, 568L, 576L, 576L, 577L,
577L, 577L, 574L, 572L, 575L, 576L, 576L, 576L, 577L, 577L, 577L,
577L, 577L, 577L, 574L, 567L, 565L, 564L, 564L, 564L, 564L, 564L,
564L, 564L, 564L, 564L, 565L, 564L, 553L, 519L, 509L, 509L, 509L,
508L, 505L, 502L, 668L, 669L, 670L, 670L, 670L, 668L, 669L, 672L,
672L, 673L, 673L, 673L, 673L, 636L, 636L, 594L, 594L, 553L, 553L,
534L, 534L, 534L, 534L, 534L, 534L, 534L), S = c(29L, 30L, 31L,
31L, 31L, 33L, 33L, 35L, 36L, 36L, 37L, 37L, 36L, 36L, 37L, 37L,
37L, 37L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L,
38L, 38L, 38L, 38L, 36L, 36L, 36L, 36L, 37L, 37L, 37L, 34L, 31L,
35L, 36L, 36L, 36L, 37L, 36L, 37L, 37L, 37L, 37L, 37L, 38L, 39L,
38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 39L, 38L, 37L, 37L,
37L, 38L, 38L, 36L, 33L, 30L, 31L, 32L, 33L, 34L, 33L, 31L, 32L,
35L, 36L, 37L, 37L, 36L, 37L, 37L, 37L, 38L, 38L, 38L, 38L, 38L,
38L, 38L, 38L, 38L, 38L, 38L), K = c(537L, 542L, 549L, 549L,
554L, 563L, 565L, 576L, 584L, 586L, 589L, 588L, 587L, 587L, 588L,
565L, 566L, 557L, 516L, 477L, 460L, 459L, 460L, 459L, 456L, 455L,
455L, 454L, 455L, 457L, 458L, 463L, 461L, 478L, 476L, 482L, 482L,
489L, 491L, 493L, 470L, 447L, 477L, 485L, 482L, 485L, 487L, 486L,
487L, 487L, 487L, 488L, 495L, 512L, 517L, 514L, 513L, 513L, 512L,
512L, 510L, 509L, 509L, 513L, 519L, 513L, 495L, 454L, 443L, 446L,
446L, 435L, 414L, 392L, 552L, 561L, 565L, 569L, 565L, 551L, 557L,
579L, 586L, 589L, 589L, 587L, 589L, 555L, 554L, 515L, 516L, 476L,
475L, 459L, 458L, 457L, 456L, 455L, 456L, 457L), Ca = c(4795L,
4796L, 4797L, 4797L, 4797L, 4799L, 4799L, 4800L, 4801L, 4801L,
4802L, 4802L, 4802L, 4802L, 4802L, 4779L, 4779L, 4771L, 4736L,
4701L, 4686L, 4685L, 4685L, 4685L, 4685L, 4685L, 4685L, 4685L,
4685L, 4685L, 4685L, 4789L, 4754L, 5135L, 5100L, 5204L, 5204L,
5205L, 5205L, 5205L, 5202L, 5200L, 5203L, 5204L, 5204L, 5204L,
5205L, 5205L, 5205L, 5205L, 5205L, 5205L, 5125L, 4886L, 4807L,
4806L, 4806L, 4806L, 4806L, 4806L, 4806L, 4806L, 4806L, 4806L,
4807L, 4806L, 4687L, 4329L, 4211L, 4211L, 4211L, 4210L, 4207L,
4204L, 4797L, 4798L, 4799L, 4799L, 4799L, 4797L, 4798L, 4801L,
4801L, 4802L, 4802L, 4802L, 4802L, 4771L, 4771L, 4736L, 4736L,
4701L, 4701L, 4685L, 4685L, 4685L, 4685L, 4685L, 4685L, 4685L
)), .Names = c("Yield", "P", "Mg", "S", "K", "Ca"), row.names = c(NA,
100L), class = "data.frame")
答案 0 :(得分:2)
fit
的系数给出了回归线的显式多项式方程。因此,您可以通过获取此多项式的一阶和二阶导数来解析最大值:
fit <- lm(Yield ~ poly(P,3, raw=TRUE), data=farm) # Note use of raw=TRUE, otherwise poly returns orthogonal polynomials
# Plot data points
with(farm, plot(P, Yield, col="lightblue", ylim=c(0, max(Yield)),
xlab = "P", ylab = "Yield", main="Farm - polynomial fit"))
# Add model fit
P = seq(min(farm$P), max(farm$P), length=1000)
pred = data.frame(P,
Yield=predict(fit, newdata=data.frame(P)))
with(pred, lines(P, Yield, col='darkred', type='l', cex=10))
# Vector of model coefficients
cf = coef(fit)
# First derivative of fit. This is just for Illustration; we won't plot this
# equation directly, but we'll find its roots to get the locations of
# local maxima and minima.
D1 = cf[2] + 2*cf[3]*pred$P + 3*cf[4]*pred$P^2
# Roots of first derivative (these are locations where first derivative = 0).
# Use quadratic formula to find roots.
D1roots = (-2*cf[3] + c(-1,1)*sqrt((2*cf[3])^2 - 4*3*cf[4]*cf[2]))/(2*3*cf[4])
# Second derivative at the two roots.
D2atD1roots = 2*cf[3] + 6*cf[4]*D1roots
# Local maxima are where the second derivative is negative
max_x = D1roots[which(D2atD1roots < 0)]
max_y = cf %*% max_x^(0:3)
# Plot local maxima
points(max_x, max_y, pch=16, col="red")
# Add values of Yield at local maxima
text(max_x, max_y, label=round(max_y,2), adj=c(0.5,-1), cex=0.8)
答案 1 :(得分:0)
由于您没有给出一个可重复性最小的示例,因此提供代码有点困难。但建议是寻找最接近零点的拟合曲线的导数。您可以简单地计算导数
curve <- fitted(fit)[order(P)]
derivative <- diff(curve)/(diff(order(P)))
然后查找函数的零。这里的approch可能是找到abs(derivative)