如何在R中找到并绘制多项式回归曲线的局部最大值?

时间:2018-01-29 16:32:28

标签: r plot regression

我有农场的产量数据(自变量)和各种营养素作为预测因子。我使用lm(y ~ ploy(x,3))进行了单变量(立方)线性回归。然后我将预测变量(P)与产量相对应,并添加了最佳拟合曲线(图1)。然后,我如何找到该曲线的局部最大值,并在我的图中添加一个点,其中包括该拟合屈服的值(图2)?我查看了quantmod package中的findPeaks()函数,但未能实现。

图1 - 我创建的图: enter image description here

图2 - 我想创建的图: enter image description here

我的代码:

head(farm)
P <- farm$P
Yield <- farm$Yield
fit <- lm(Yield ~ poly(P,3), data=farm)
plot(P, Yield, col="lightblue", xlab = "P", ylab = "Yield", main="Farm - polynomial fit")
lines(sort(P), fitted(fit)[order(P)], col='darkred', type='l', cex=10) 

我的数据:

farm <- structure(list(Yield = c(28.3818, 27.0422555556, 31.5444454545, 
32.2084818182, 25.983, 43.49634, 53.3981333333, 59.19274, 61.23185, 
63.83512, 64.5388, 63.08576, 63.83954, 62.7838333333, 64.10366, 
67.5600666667, 67.6325, 68.0023, 63.02148, 64.0025666667, 64.19196, 
60.4893, 65.2803333333, 62.28096, 59.1914, 59.37304, 58.6482333333, 
58.08474, 58.98306, 61.52755, 61.9972, 62.0188833333, 61.09072, 
58.44715, 61.06646, 59.6103833333, 60.57035, 67.5646, 67.85488, 
71.5719571429, 50.30092, 22.80535, 62.26542, 62.0754333333, 58.46464, 
62.7326833333, 65.53482, 63.3064666667, 63.68004, 63.5212166667, 
64.65068, 66.19655, 66.0726, 66.6404666667, 65.02208, 62.1035666667, 
61.78824, 61.0844166667, 60.1723, 61.6899333333, 57.8784166667, 
56.8886666667, 59.13944, 57.26695, 69.5792666667, 61.9865166667, 
55.7342, 53.7012857143, 56.9748166667, 56.8706333333, 61.3384166667, 
52.87725, 29.2331888889, 15.5046, 42.943, 44.590325, 50.09525, 
52.68065, 53.0983714286, 19.43875, 38.06708, 59.9217666667, 62.0287166667, 
66.59496, 64.3986333333, 64.4089333333, 64.6951, 63.8205, 63.6122, 
62.51384, 63.2565666667, 62.47745, 61.42234, 62.6233166667, 62.0730333333, 
60.81996, 60.6490833333, 58.4331333333, 59.94638, 61.4119333333
), P = c(90L, 93L, 97L, 97L, 100L, 106L, 107L, 114L, 118L, 120L, 
121L, 121L, 120L, 120L, 121L, 115L, 116L, 113L, 101L, 90L, 85L, 
84L, 85L, 85L, 83L, 82L, 82L, 82L, 82L, 84L, 84L, 85L, 84L, 87L, 
87L, 88L, 88L, 92L, 93L, 95L, 80L, 67L, 85L, 90L, 88L, 90L, 91L, 
91L, 91L, 91L, 91L, 92L, 88L, 78L, 73L, 71L, 71L, 71L, 71L, 71L, 
69L, 69L, 69L, 71L, 75L, 71L, 71L, 78L, 82L, 84L, 84L, 77L, 64L, 
51L, 99L, 104L, 107L, 109L, 107L, 99L, 102L, 115L, 120L, 121L, 
121L, 120L, 121L, 112L, 111L, 101L, 101L, 89L, 89L, 85L, 84L, 
83L, 83L, 82L, 83L, 83L), Mg = c(666L, 667L, 668L, 668L, 668L, 
670L, 670L, 671L, 672L, 672L, 673L, 673L, 673L, 673L, 673L, 645L, 
645L, 636L, 594L, 553L, 535L, 534L, 534L, 534L, 534L, 534L, 534L, 
534L, 534L, 534L, 534L, 543L, 540L, 570L, 568L, 576L, 576L, 577L, 
577L, 577L, 574L, 572L, 575L, 576L, 576L, 576L, 577L, 577L, 577L, 
577L, 577L, 577L, 574L, 567L, 565L, 564L, 564L, 564L, 564L, 564L, 
564L, 564L, 564L, 564L, 565L, 564L, 553L, 519L, 509L, 509L, 509L, 
508L, 505L, 502L, 668L, 669L, 670L, 670L, 670L, 668L, 669L, 672L, 
672L, 673L, 673L, 673L, 673L, 636L, 636L, 594L, 594L, 553L, 553L, 
534L, 534L, 534L, 534L, 534L, 534L, 534L), S = c(29L, 30L, 31L, 
31L, 31L, 33L, 33L, 35L, 36L, 36L, 37L, 37L, 36L, 36L, 37L, 37L, 
37L, 37L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 
38L, 38L, 38L, 38L, 36L, 36L, 36L, 36L, 37L, 37L, 37L, 34L, 31L, 
35L, 36L, 36L, 36L, 37L, 36L, 37L, 37L, 37L, 37L, 37L, 38L, 39L, 
38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 39L, 38L, 37L, 37L, 
37L, 38L, 38L, 36L, 33L, 30L, 31L, 32L, 33L, 34L, 33L, 31L, 32L, 
35L, 36L, 37L, 37L, 36L, 37L, 37L, 37L, 38L, 38L, 38L, 38L, 38L, 
38L, 38L, 38L, 38L, 38L, 38L), K = c(537L, 542L, 549L, 549L, 
554L, 563L, 565L, 576L, 584L, 586L, 589L, 588L, 587L, 587L, 588L, 
565L, 566L, 557L, 516L, 477L, 460L, 459L, 460L, 459L, 456L, 455L, 
455L, 454L, 455L, 457L, 458L, 463L, 461L, 478L, 476L, 482L, 482L, 
489L, 491L, 493L, 470L, 447L, 477L, 485L, 482L, 485L, 487L, 486L, 
487L, 487L, 487L, 488L, 495L, 512L, 517L, 514L, 513L, 513L, 512L, 
512L, 510L, 509L, 509L, 513L, 519L, 513L, 495L, 454L, 443L, 446L, 
446L, 435L, 414L, 392L, 552L, 561L, 565L, 569L, 565L, 551L, 557L, 
579L, 586L, 589L, 589L, 587L, 589L, 555L, 554L, 515L, 516L, 476L, 
475L, 459L, 458L, 457L, 456L, 455L, 456L, 457L), Ca = c(4795L, 
4796L, 4797L, 4797L, 4797L, 4799L, 4799L, 4800L, 4801L, 4801L, 
4802L, 4802L, 4802L, 4802L, 4802L, 4779L, 4779L, 4771L, 4736L, 
4701L, 4686L, 4685L, 4685L, 4685L, 4685L, 4685L, 4685L, 4685L, 
4685L, 4685L, 4685L, 4789L, 4754L, 5135L, 5100L, 5204L, 5204L, 
5205L, 5205L, 5205L, 5202L, 5200L, 5203L, 5204L, 5204L, 5204L, 
5205L, 5205L, 5205L, 5205L, 5205L, 5205L, 5125L, 4886L, 4807L, 
4806L, 4806L, 4806L, 4806L, 4806L, 4806L, 4806L, 4806L, 4806L, 
4807L, 4806L, 4687L, 4329L, 4211L, 4211L, 4211L, 4210L, 4207L, 
4204L, 4797L, 4798L, 4799L, 4799L, 4799L, 4797L, 4798L, 4801L, 
4801L, 4802L, 4802L, 4802L, 4802L, 4771L, 4771L, 4736L, 4736L, 
4701L, 4701L, 4685L, 4685L, 4685L, 4685L, 4685L, 4685L, 4685L
)), .Names = c("Yield", "P", "Mg", "S", "K", "Ca"), row.names = c(NA, 
100L), class = "data.frame")

2 个答案:

答案 0 :(得分:2)

fit的系数给出了回归线的显式多项式方程。因此,您可以通过获取此多项式的一阶和二阶导数来解析最大值:

fit <- lm(Yield ~ poly(P,3, raw=TRUE), data=farm)  # Note use of raw=TRUE, otherwise poly returns orthogonal polynomials

# Plot data points
with(farm, plot(P, Yield, col="lightblue", ylim=c(0, max(Yield)), 
                xlab = "P", ylab = "Yield", main="Farm - polynomial fit"))

# Add model fit
P = seq(min(farm$P), max(farm$P), length=1000)
pred = data.frame(P,
                  Yield=predict(fit, newdata=data.frame(P)))

with(pred, lines(P, Yield, col='darkred', type='l', cex=10))

# Vector of model coefficients
cf = coef(fit)

# First derivative of fit. This is just for Illustration; we won't plot this
#  equation directly, but we'll find its roots to get the locations of 
#  local maxima and minima.
D1 = cf[2] + 2*cf[3]*pred$P + 3*cf[4]*pred$P^2

# Roots of first derivative (these are locations where first derivative = 0).
#  Use quadratic formula to find roots.
D1roots = (-2*cf[3] + c(-1,1)*sqrt((2*cf[3])^2 - 4*3*cf[4]*cf[2]))/(2*3*cf[4])

# Second derivative at the two roots. 
D2atD1roots =  2*cf[3] + 6*cf[4]*D1roots

# Local maxima are where the second derivative is negative
max_x = D1roots[which(D2atD1roots < 0)]
max_y = cf %*% max_x^(0:3)

# Plot local maxima
points(max_x, max_y, pch=16, col="red")

# Add values of Yield at local maxima
text(max_x, max_y, label=round(max_y,2), adj=c(0.5,-1), cex=0.8)

enter image description here

答案 1 :(得分:0)

由于您没有给出一个可重复性最小的示例,因此提供代码有点困难。但建议是寻找最接近零点的拟合曲线的导数。您可以简单地计算导数

curve <- fitted(fit)[order(P)]
derivative <- diff(curve)/(diff(order(P)))

然后查找函数的零。这里的approch可能是找到abs(derivative)

的最小值