具有由AIC选择的最佳拟合曲线的散点图

时间:2014-07-11 21:26:38

标签: r plot curve loess

我正在尝试编写一个执行以下操作的函数: 1.生成和x,y散点图 2.添加黄土曲线 3.基于AIC模型拟合程序添加曲线,其中最佳拟合模型可以是线性,二次或三次。我只想为此步骤绘制一条线(即最佳拟合,而不是所有3种可能)。

我可以执行第1步和第2步,但不能使#3工作。我哪里错了?下面的示例数据,但我将在各种数据集上运行此函数,其中一些数据集的长度和NA会有所不同。

maindata = structure(c(1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 
1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 
1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 
1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 
1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, -1.54, 
1.5, 3.26, -2.79, 0.54, -0.51, -2.12, 1.83, -1.88, 0.47, -1.05, 
-2.16, -1.04, -1.77, -2.54, 1.67, -2.97, -2.58, NA, -0.08, 
2.05, 0.27, 2.18, 0.01, NA, -2.08, -0.42, -0.23, -1.58, -0.55, 
2.63, 0.38, 2.17, -3.09, 3.14, -3.01, -0.13, 2.38, 3.88, 1.14, 
2.54, 1.71, 2.86, -1.11, -1.98, -0.93, 1.03, 2.25, 1.18, -1.91, 
1, -0.09, 0.7, -1.35, -0.2, 1.35, 1.72, 0.72, -5.96, 2.95, -0.25, 
NA, 47, 40, NA, 20, 70, 80, 30, 33, 40, 71, 63, 25, 66, 41, 25, 
38, 18, 22, 60, 85, 30, 75, 25, 80, 65, 33, 85, 95, 45, 75, 19, 
75, 27, 13, 14, 15, 99, 22, 10, NA, 20, 35, 17, 55, 35, 70, 47, 
24, 45, 38, 50, 90, 60, 50, 100, 42, 34, 55, 10, 15, 90, NA), .Dim = c(62L, 
3L), .Dimnames = list(NULL, c("year", "x_values", "y_values")))


dd_plot = function(x, y, yaxis, xaxis) {
  subset.data = subset(maindata, x!="NA" & y!="NA"); 
  plot(subset.data$y~subset.data$x, pch = 20, ylab = yaxis, xlab=xaxis)
    lines(loess.smooth(subset.data$x, subset.data$y), col = "blue", lwd = 2, lty =2)
  fit = stepAIC(lm(subset.data$y~subset.data$x+I(subset.data$x^2)+I(subset.data$x^3)))
  lines(subset.data$x, predict(fit), col="red")
    legend("topleft", c("Lowess","Best Fit Polynomial"), lty = c(2,1), col=c("blue","red"), bty="n", xjust = -0.2)
}

dd_plot(y = y_values, x = x_values, yaxis = "Y_label", xaxis = "X_label")

2 个答案:

答案 0 :(得分:1)

嗯,你没有准确地描述哪个部分没有工作"或者它以何种方式与您的期望相匹配。您的示例有一堆不匹配的变量名称,这可能是您的真实数据没有,它在maindata中寻找xy实际上有x_valuesy_values。然后,您对dd_plot的调用引用了不存在的变量x_valuesy_values

但是我假设你已经过去了,那是在谈论用于最佳拟合多项式的简单曲线而不是简单的曲线球。问题是在绘制线条时,您的数据应按x值排序。 R只连接连续点。所以我想你想要

lines(subset.data$x[order(subset.data$x)], 
    predict(fit)[order(subset.data$x)], col="red")

而不是绘制未分类的数据。返回

enter image description here

答案 1 :(得分:1)

这里有一些不同的问题。从风格上讲,您通常希望使用数据框而不是您生成的“结构”。第二个问题是您将函数内部的数据与外部数据混合在一起。这并非严格错误,但通常会使解决问题更加困难。

更改那些解决了问题:

maindata <-
  data.frame(year=c(1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 
               1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 
               1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 
               1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 
               1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 
               2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013),
             x_values=c(-1.54, 1.5, 3.26, -2.79, 0.54, -0.51, -2.12, 1.83, -1.88, 0.47, -1.05, 
               -2.16, -1.04, -1.77, -2.54, 1.67, -2.97, -2.58, NA, -0.08, 
               2.05, 0.27, 2.18, 0.01, NA, -2.08, -0.42, -0.23, -1.58, -0.55, 
               2.63, 0.38, 2.17, -3.09, 3.14, -3.01, -0.13, 2.38, 3.88, 1.14, 
               2.54, 1.71, 2.86, -1.11, -1.98, -0.93, 1.03, 2.25, 1.18, -1.91, 
               1, -0.09, 0.7, -1.35, -0.2, 1.35, 1.72, 0.72, -5.96, 2.95, -0.25, 
               NA),
             y_values=c(47, 40, NA, 20, 70, 80, 30, 33, 40, 71, 63, 25, 66, 41, 25, 
               38, 18, 22, 60, 85, 30, 75, 25, 80, 65, 33, 85, 95, 45, 75, 19, 
               75, 27, 13, 14, 15, 99, 22, 10, NA, 20, 35, 17, 55, 35, 70, 47, 
               24, 45, 38, 50, 90, 60, 50, 100, 42, 34, 55, 10, 15, 90, NA))

dd_plot = function(x, y, ...) {
  subset.data = subset(data.frame(x, y), !is.na(x) & !is.na(y)); 
  plot(subset.data$y~subset.data$x,
       pch = 20, ...)
  lines(loess.smooth(subset.data$x, subset.data$y),
        col = "blue", lwd = 2, lty = 2)
  fit = stepAIC(lm(subset.data$y~subset.data$x+I(subset.data$x^2)+I(subset.data$x^3)))
  lines(subset.data$x, predict(fit), col="red")
  legend("topleft", c("Loess","Best Fit Polynomial"),
         lty = c(2,1), col=c("blue","red"), bty="n", xjust = -0.2)
}

dd_plot(y = maindata$y_values, x = maindata$x_values, ylab = "Y_label", xlab = "X_label")