用ggplot2重现drc :: plot.drc

时间:2016-07-10 07:27:49

标签: r ggplot2 logistic-regression drc

我想使用drc::plot.drc重现以下ggplot2图表。

enter image description here

df1 <-
      structure(list(TempV = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
    7L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 13L, 13L, 13L, 13L, 
    13L, 13L, 13L, 13L, 13L, 13L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
    11L, 11L, 11L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 10L, 10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L, 10L, 14L, 14L, 14L, 14L, 14L, 14L, 
    14L, 14L, 14L, 14L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
    12L), .Label = c("22.46FH-142", "27.59FH-142", "26.41FH-142", 
    "29.71FH-142", "31.66FH-142", "34.11FH-142", "33.22FH-142", "22.46FH-942", 
    "27.59FH-942", "26.41FH-942", "29.71FH-942", "31.66FH-942", "34.11FH-942", 
    "33.22FH-942"), class = "factor"), Start = c(0L, 24L, 48L, 72L, 
    96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 
    144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 
    192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 
    0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 
    48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 
    96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 
    144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 
    192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 
    0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 
    48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 
    96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 
    144L, 168L, 192L, 216L), End = c(24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 
    24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 
    120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 
    24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 
    120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 
    24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 
    120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf), 
        Germinated = c(0L, 0L, 0L, 0L, 3L, 67L, 46L, 12L, 101L, 221L, 
        0L, 0L, 0L, 0L, 57L, 50L, 44L, 31L, 32L, 236L, 0L, 0L, 0L, 
        31L, 68L, 50L, 31L, 34L, 29L, 207L, 0L, 0L, 8L, 30L, 31L, 
        55L, 27L, 22L, 4L, 273L, 0L, 0L, 46L, 64L, 16L, 8L, 15L, 
        15L, 20L, 266L, 0L, 0L, 0L, 0L, 4L, 13L, 63L, 51L, 147L, 
        172L, 0L, 0L, 4L, 26L, 92L, 31L, 91L, 14L, 7L, 185L, 0L, 
        0L, 0L, 0L, 0L, 32L, 59L, 36L, 50L, 273L, 0L, 0L, 0L, 4L, 
        13L, 32L, 42L, 52L, 42L, 265L, 0L, 0L, 0L, 6L, 22L, 40L, 
        57L, 44L, 73L, 208L, 0L, 1L, 2L, 24L, 55L, 41L, 68L, 24L, 
        33L, 202L, 0L, 0L, 18L, 31L, 26L, 30L, 61L, 25L, 58L, 201L, 
        0L, 0L, 36L, 54L, 33L, 55L, 12L, 27L, 55L, 178L, 0L, 0L, 
        6L, 28L, 26L, 31L, 53L, 48L, 33L, 225L)), .Names = c("TempV", 
    "Start", "End", "Germinated"), row.names = c(NA, -140L), class = "data.frame")

library(data.table)

dt1 <- data.table(df1)

library(drc)

dt1fm1 <- 
  drm(
        formula   = Germinated ~ Start + End
      , curveid   = TempV
  #   , pmodels   = 
  #   , weights   = 
      , data      = dt1
  #   , subset    = 
      , fct       = LL.2()
      , type      = "event"
      , bcVal     = NULL
      , bcAdd     = 0
  #   , start     =
      , na.action = na.fail
      , robust    = "mean"
      , logDose   = NULL
      , control   = drmc(
                            constr      = FALSE
                            , errorm      = TRUE
                            , maxIt       = 1500
                            , method      = "BFGS"
                            , noMessage   = FALSE
                            , relTol      = 1e-07
                            , rmNA        = FALSE
                            , useD        = FALSE
                            , trace       = FALSE
                            , otrace      = FALSE
                            , warnVal     = -1
                            , dscaleThres = 1e-15
                            , rscaleThres = 1e-15
                            )
      , lowerl    = NULL
      , upperl    = NULL
      , separate  = FALSE
      , pshifts   = NULL 
      )



## ----dt1fm1Plot1----
plot(
        x      = dt1fm1
    , xlab     = "Time (Hours)"
    , ylab     = "Proportion Germinated (\\%)"    
  # , ylab     = "Proportion Germinated (%)"    
    , add      = FALSE
    , level    = NULL
    , type     = "average" # c("average", "all", "bars", "none", "obs", "confidence")
    , broken   = FALSE
  # , bp
    , bcontrol = NULL
    , conName  = NULL
    , axes     = TRUE
    , gridsize = 100
    , log      = ""
  # , xtsty
    , xttrim   = TRUE
    , xt       = NULL
    , xtField    = NULL
    , xField     = "Time (Hours)"
    , xlim     = c(0, 200)
    , yt       = NULL
    , ytField    = NULL
    , yField     = "Proportion Germinated"
    , ylim     = c(0, 1.05)
    , lwd      = 1
    , cex      = 1.2
    , cex.axis = 1
    , col      = TRUE
  # , lty
  # , pch
    , legend     = TRUE
  # , legendText  
    , legendPos  = c(40, 1.1)
    , cex.legend = 0.6
    , normal     = FALSE
    , normRef    = 1
    , confidence.level = 0.95
    )


## ----dt1fm1Plot2----
dt1fm1Means1 <- dt1[, .(Germinated=mean(Germinated)/450), by=.(TempV, Start, End)]
dt1fm1Means2 <- dt1fm1Means1[, .(Start=Start, End=End, Cum_Germinated=cumsum(Germinated)), by=.(TempV)]
dt1fm1Means  <- data.table(dt1fm1Means2[End!=Inf], Pred=predict(object=dt1fm1))

dt1fm1Plot2 <- 
       ggplot(data= dt1fm1Means, mapping=aes(x=End, y=Cum_Germinated, group=TempV, color=TempV, shape=TempV)) + 
        geom_point() +
        geom_line(aes(y = Pred)) +
        scale_shape_manual(values=seq(0, 13)) +
        labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
        theme_bw() +
        scale_x_continuous(expand = c(0, 0), breaks = c(0, unique(dt1fm1Means$End))) +
        scale_y_continuous(expand = c(0, 0), labels = function(x) paste0(100*x,"\\%")) +
      # scale_y_continuous(expand = c(0, 0), labels = percent) +
        expand_limits(x = c(0, max(dt1fm1Means$End)+20), y = c(0, max(dt1fm1Means$Pred)+0.1)) +
        theme(axis.title.x = element_text(size = 12, hjust = 0.54, vjust = 0),
              axis.title.y = element_text(size = 12, angle = 90,  vjust = 0.25))
print(dt1fm1Plot2)

enter image description here

问题

ggplot2输出中几乎没有差异。出现这些差异是因为predict函数以不同于模式中给定级别的模式提供输出。

被修改

实际上,drm函数更改了TempV级别的顺序,这从summary(dt1fm1)输出和drc::plot.drc输出图表中可以清楚地看出。

1 个答案:

答案 0 :(得分:3)

正如问题所述,有一个问题与drm改变因子水平的顺序有关。对这个混乱进行彻底改组证明比我预期的更棘手。

最后,我通过在每个因素级别调用drm函数一次来建立一个结果表,一次一个因子级别。

这种啰嗦的做法揭示了你的plot.drc和ggplot版本的第一个情节都不正确这一事实。

让我们首先将函数调用包装到另一个包装函数中的drm(),以便为每条跟踪重复调用它:

drcmod <- function(dt1){
  drm(formula   = Germinated ~ Start + End
    , curveid   = TempV
    , data      = dt1
    , fct       = LL.2()
    , type      = "event"
    , bcVal     = NULL
    , bcAdd     = 0
    , na.action = na.fail
    , robust    = "mean"
    , logDose   = NULL
    , control   = drmc(
      constr      = FALSE
      , errorm      = TRUE
      , maxIt       = 1500
      , method      = "BFGS"
      , noMessage   = FALSE
      , relTol      = 1e-07
      , rmNA        = FALSE
      , useD        = FALSE
      , trace       = FALSE
      , otrace      = FALSE
      , warnVal     = -1
      , dscaleThres = 1e-15
      , rscaleThres = 1e-15
    )
    , lowerl    = NULL
    , upperl    = NULL
    , separate  = FALSE
    , pshifts   = NULL 
  )
}

现在我们可以使用这个包装器依次将drc模型拟合到每个因子级别:

dt2 <- data.table()
for (i in 1:nlevels(dt1$TempV)) {
  dt <- dt1[TempV==levels(TempV)[i]]
  dt[, TempV:=as.character(TempV)]
  dt[, Germ_frac := mean(Germinated)/450, by=.(Start)]
  dt[, cum_Germinated := cumsum(Germ_frac)]
  dt[, Pred := c(predict(object=drcmod(dt)), NA)] 
  dt2 <- rbind(dt2, dt)
}

和情节:

ggplot(dt2[End != Inf], aes(x=End, y=cum_Germinated, group=TempV, color=TempV, shape=TempV)) + 
  geom_point() +
  geom_line(aes(y = Pred)) +
  scale_shape_manual(values=seq(0, 13)) +
  labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
  theme_bw()

enter image description here

修改

如果我们使用较少因子级别的数据子集运行问题中的原始代码,例如使用

dt1 <- dt1[TempV %in% levels(TempV)[1:5],]
dt1 <- droplevels(dt1)

所有图(OP中的2个版本和本答案中的版本)给出相同的结果。当使用大量因子水平时,似乎只会出现差异。 OP中的ggplot和plot.drc都给出了跟踪到因子级别的错误匹配这一事实表明问题最有可能出现在drm()函数中,而不是在plot.drc中。