找到增长曲线的最大梯度

时间:2012-07-31 15:47:13

标签: r ggplot2 derivative

我使用ggplot2制作了一张包含四条生长曲线的图表。

如果有人想尝试,希望下面的代码能生成图表。

我想找到每条线上最大斜率的值,取4个时间点。

任何人都可以提出任何想法吗?

library(ggplot2)
dat <- structure(list(TIME = c(0L, 2L, 4L, 6L, 8L, 10L, 12L, 14L, 16L, 
                           18L, 20L, 22L, 24L, 26L, 28L, 30L, 0L, 2L, 4L, 6L, 8L, 10L, 12L, 
                           14L, 16L, 18L, 20L, 22L, 24L, 26L, 28L, 30L, 0L, 2L, 4L, 6L, 
                           8L, 10L, 12L, 14L, 16L, 18L, 20L, 22L, 24L, 26L, 28L, 30L, 0L, 
                           2L, 4L, 6L, 8L, 10L, 12L, 14L, 16L, 18L, 20L, 22L, 24L, 26L, 
                           28L, 30L), OD600 = c(0.2202, 0.2177, 0.2199, 0.2471, 0.2834, 
                                                0.357, 0.4734, 0.647, 0.898, 1.1959, 1.3765, 1.3978, 1.3948, 
                                                1.3928, 1.3961, 1.4018, 0.24, 0.2317, 0.2328, 0.2522, 0.2748, 
                                                0.3257, 0.4098, 0.5455, 0.7387, 0.9904, 1.2516, 1.3711, 1.3713, 
                                                1.3703, 1.3686, 1.3761, 0.2266, 0.2219, 0.2245, 0.2401, 0.2506, 
                                                0.2645, 0.3018, 0.3484, 0.4216, 0.5197, 0.666, 0.872, 1.1181, 
                                                1.2744, 1.3079, 1.2949, 0.2389, 0.2242, 0.2315, 0.2364, 0.2372, 
                                                0.2373, 0.2306, 0.2385, 0.236, 0.2331, 0.2379, 0.2334, 0.2336, 
                                                0.2339, 0.2389, 0.2349), MMS = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                                                                 0, 0, 0, 0, 0, 0, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 
                                                                                 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 
                                                                                 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 
                                                                                 0.01, 0.01, 0.01, 0.01, 0.01, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 
                                                                                 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02)), .Names = c("TIME", 
                                                                                                                                                          "OD600", "MMS"), class = "data.frame", row.names = c(NA, -64L
                                                                                                                                                                                                               ))
graph = ggplot(data=dat, aes(x=TIME, y=OD600))
graph + geom_line(aes(colour=factor(MMS)), alpha=1) +
opts(title="Log growth curves: change in cell density with increasing concentrations of MMS")+
scale_y_log10()

非常感谢

2 个答案:

答案 0 :(得分:5)

如果您不需要插值,@ lockedoff的解决方案很好,但您确定两个初始浓度都需要14吗?

要获得更好的值,您应该找到倾斜时间,即二阶导数为零的位置。这对于真实数据来说可能很棘手,您应首先绘制衍生物以确定是否可行。

你会注意到浓度0.02是没有希望的,如果这是我的实验,我会回到实验室检查这是否真的是0.02或0.2。如果没有,你有一个非常不寻常的内容,小心,审稿人会发回它,但没有一个很好的解释。

使用predict.smooth.spline计算导数,并取消找到斜率== 0的点。

library(plyr)
smoothingDf = 8 # Adujst this. Larger values-> Smoother curves
# Check smoothing of second derivatives
deriv2 = ddply(dat,.(MMS),function(x){
  data.frame(predict(smooth.spline(x$TIME,x$OD600,df=smoothingDf),0:max(x$TIME),2))
})
ggplot(data=deriv2, aes(x=x, y=y))+ geom_line(aes(colour=factor(MMS)))
# No chance to get a good value for 0.02, remove it
dat1 = dat[dat$MMS != 0.02,]

ld50 = ddply(dat1,.(MMS),function(x){
  sp = smooth.spline(x$TIME, x$OD600, df=smoothingDf)
  # Try to find a good initial range
  app = predict(sp,min(x$TIME):max(x$TIME),2)
  lower = app$x[which.max(app$y)]
  upper = app$x[which.min(app$y)]
  uniroot(function(t)  predict(sp,t,2)$y ,lower=lower,upper=upper )$root
})

结果看起来不错,但没有0.02

    MMS       V1
1 0.000 16.23093
2 0.005 17.43714
3 0.010 22.29317

Second derivatives. Note that 0.02 is not useful

答案 1 :(得分:3)

这样的东西?

cbind(
  MMS = unique(dat$MMS),
  do.call(
    rbind,
    lapply(
      unique(dat$MMS),
      function(x) {
        tdat <- dat[dat$MMS == x, ]
        response <- tdat$OD600
        timepoints <- tdat$TIME
        rise <- (response[4:length(response)] - response[1:(length(response) - 3)])
        run <- (timepoints[4:length(timepoints)] - timepoints[1:(length(timepoints) - 3)])
        slopes <- c(rep(NA, 3), rise/run)
        return(
          list(
            max_slope = max(slopes, na.rm = T), 
            time = timepoints[which(slopes == max(slopes, na.rm = T)) - 3]
          )
        )
      }
    )
  )
)

给出:

     MMS   max_slope   time
[1,] 0     0.1215833   14  
[2,] 0.005 0.1176833   14  
[3,] 0.01  0.1014      20  
[4,] 0.02  0.002166667 2