改进对数对数刻度的视觉拟合

时间:2018-01-16 12:59:12

标签: r ggplot2 curve-fitting predict non-linear-regression

我正在尝试拟合模型以匹配特定曲线。曲线必须以log10标度绘制才有意义。

库:

library(data.table)
library(ggplot2)
library(minpack.lm)

数据:

selected_data <- structure(list(shear_rate = c(1000, 794.329, 630.957, 501.187, 398.107, 316.228, 251.189, 199.527, 158.488, 125.893, 100, 79.4327, 63.0956, 50.1188, 39.8107, 31.6228, 25.1188, 19.9526, 15.8489, 12.5893, 10, 7.94327, 6.30959, 5.01187, 3.98106, 3.16227, 2.51188, 1.99526, 1.58489, 1.25892, 0.999992, 0.794326, 0.630961, 0.501198, 0.39811, 0.31623, 0.251185, 0.199522, 0.158491, 0.125895, 0.0999949, 0.0794332, 0.0630955, 0.0501237, 0.0398085, 0.0316226, 0.0251186, 0.0199599, 0.0158465, 0.0125871, 0.00999673, 0.00794292, 0.00630626, 0.0050086, 0.00398398, 0.00316298, 0.00250931, 0.00199656, 0.0015839, 0.0012584, 0.000999712, 0.000792728, 0.00063262, 0.000508155, 0.000395346, 0.000317991, 0.000249344, 0.000196793, 0.000160254, 0.000127184, 0.000104187, 8.23117e-05, 6.47387e-05, 4.92171e-05, 4.36566e-05, 3.17533e-05, 1.73707e-05, 1.96217e-05, 7.36872e-06, 1.29909e-05, 1.1677e-05, 1.02765e-05, 8.38507e-06, 7.018e-06, 2.97303e-06, 1.43835e-06, 6.66051e-06), viscosity = c(0.078019, 0.0871012, 0.0978244, 0.110653, 0.125792, 0.142472, 0.160574, 0.180504, 0.202741, 0.228238, 0.257929, 0.293168, 0.334648, 0.383216, 0.440372, 0.508067, 0.588533, 0.684139, 0.798329, 0.934592, 1.09783, 1.29358, 1.52916, 1.81312, 2.15566, 2.57134, 3.07472, 3.68667, 4.43435, 5.34438, 6.45903, 7.82502, 9.50057, 11.5573, 14.0901,17.2101, 21.056, 25.8055, 31.6732, 38.9193, 47.895, 58.9914,72.7054, 89.6858, 110.719, 136.708, 168.881, 208.623, 258.156, 319.473, 395.938, 490.966, 610.17, 759.098, 944.226, 1177.97, 1472.51, 1837.24, 2301.57, 2880.84, 3609.73, 4529.95, 5655.8, 7014.35, 8985.18, 11142.5, 14172.5, 17910.1, 21937.4, 27576.4, 33589.4, 42421.1, 53821.7, 70652.6, 79480.7, 109057, 199001, 175881, 467589, 264795, 294139, 333755, 408477, 487371, 1149050, 2372100, 511717), 
sample_name = c("x", "x", "x", "x", "x", "x", "x", "x", "x","x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x",  "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x")), .Names = c("shear_rate", "viscosity", "sample_name"), class = c("data.table", "data.frame"), row.names = c(NA, -87L))

模特:

fitted_models <- lapply(split(selected_data, by = "sample_name"), function(d) {
d <- d[order(shear_rate)]
start_viscosity_zero <- mean(d[seq(5), viscosity])
start_viscosity_inf <- mean(d[seq(nrow(d) - 5, nrow(d)), viscosity])
d[, start_viscosity :=  (start_viscosity_zero - viscosity) / (viscosity - start_viscosity_inf)]
m1 <- lm(log(start_viscosity) ~ log(shear_rate), data = d)
start_C <- (coef(m1)[[1]])^(1/coef(m1)[[2]])
start_m <- coef(m1)[[2]]/10
nlsLM(viscosity ~ C * (shear_rate)^(m-1) + viscosity_inf,
    data = d,
    start = list(viscosity_inf = 24, C = 0.11, m = 0.4))
})

代码之间的一些:

selected_data[, prediction := predict(fitted_models[[ .BY[["sample_name"]] ]], .SD), by = "sample_name"]

情节:

p <- ggplot(data = selected_data, aes(x = shear_rate, y = viscosity, group = sample_name, colour = sample_name))
p <- p + geom_point()
p <- p + geom_line(aes(y = prediction))
p <- p + coord_trans(x = "log10", y = "log10")
p <- p + scale_colour_discrete("Sample")
p <- p + scale_fill_discrete(guide = FALSE)
p

这段代码给了我类似下面的图(上面的代码只生成了一个样本的图形,并且简化了):

ggplot

你可以马上看到我的问题。 &#39;徘徊&#39;预测曲线的原因是由于对数轴,我知道。但是,我该如何改变这条线以使它符合图形?

1 个答案:

答案 0 :(得分:3)

您的问题不是可视化问题。问题是你适合线性比例,并期望情节在对数尺度上看起来很好。那是行不通的。 (一般而言,对于按对数尺度变化的数据,拟合线性标度不是一个好主意。)

简单示例:

# create exponential decay data
set.seed(342)
x <- seq(0, 10, by = .5)
y <- exp(-x) * rnorm(length(x), mean = 1, sd = .3)
d <- data.frame(x, y)

# fit model to data
model <- nls(y ~ a*exp(-b*x), data = d, start = list(a = 1, b = 1))

# visualize in linear scale---looks good
xpred <- seq(0, 10, by = .1)
ypred <- predict(model, data.frame(x = xpred))
ggplot(d, aes(x, y)) + geom_point() + 
  geom_line(data = data.frame(x = xpred, y = ypred))

enter image description here

# visualize in log scale---oops
ggplot(d, aes(x, y)) + geom_point() + 
  geom_line(data = data.frame(x = xpred, y = ypred)) + scale_y_log10()

enter image description here