如何将相同的功能应用于已拆分的多个数据帧?

时间:2020-08-13 14:17:18

标签: r function loops split

我目前有一些代码,可以让我估计使用exapand.grid函数生成的一组参数的增长逻辑曲线。

  library('spatstat')
  library('ggplot2')
  library('dplyr')
  library('reshape2')
  library("plyr")
  
  n <- 1000 # number of hosts
  dim <- 1000 # dimension of the landscape
  landscape <- ppp(x=runif(n)*dim, y=runif(n)*dim, 
                   window = owin(xrange = c(0, dim), yrange = c(0, dim)))
  ## give marks to the process, e.g. 1 infected randomly selected
  marks(landscape) <- sample(c(TRUE, rep(FALSE, n-1)))
  data <- data.frame(x=landscape$x, y=landscape$y, id=1:n)
  sigma <- 300
  delta.t <- 100 
grid <- expand.grid(theta=c(.1,.5, 1, 5, 10), ## parameter 1
                    beta=c(1, 5, 10,20,50), ## parameter 2
                    replicate=1:2, ## replicates, 10 here
                    r=NA, detection=NA)
grid<-grid %>% arrange_at(1:2,desc)

for (i in 1:nrow(grid)){
  ## you get parameter values this way:
  ## and you do something with them
  ## run a simulation
  ## identify a realised growth rate r
  ## measure first detection date
  output <- tauLeapG(beta=grid[i,"beta"],
                     theta=grid[i,"theta"],
                     b=.4,
                     sigma=sigma, 
                     delta.t=delta.t,
                     ppp=landscape)
  print(paste0("beta", grid[i,"beta"]))
  print(paste0("theta", grid[i,"theta"]))
  
  ## output a df with location and time of infection
  temp <- output[[2]][,1:2][order(output[[2]][,2]),]
  
  data <- cbind(data, temp$time)
  
  ## print status
  cat('\nepidemic', i, 'simulated...')
}

colnames(data)[-(1:3)] <- paste0('simulation ', 1:nrow(grid))

    ## starting from data, generated above
    head(data)
    ## 6 epidemics, with time of infection of each host
    ## to fit a logistic we need to convert this to number of infected a each time
    data_long <- melt(data, id.vars = c('x', 'y', 'id'), value.name = 'time', variable.name = 'sim')
    times <- sort(unique(data_long$time))

    ## make a logistic df from this data
     data_logistic <- data_long %>% group_by(sim) %>%
    do(data.frame(time=times, infected=sapply(times, function(x) sum(.$time <= x))))
    ## all curves have the same number of points
    ggplot(data_logistic) + geom_point(aes(x=time, y=infected, colour=sim))

    ## prepare a logistic function of r to fit
    logis <- function(t, r, K=1, s=0, q0){
    pmin(
    K*q0*exp(r*(t+s)) / (K + q0*(exp(r*(t+s)) - 1)),
    K) # numerical errors can happen for high r and sigma
    }
 
    ## prepare an evaluation function for the optimisation,
    ## the function you seek to minimise
    eval <- function(r, df){
    sum((logis(r=r, t=df$time, K=1000, q0=1) -
           df$infected)^2) ## sum of square errors between predictions and observations
    }

    ## fit it with optimize (because only one parameter to identify, if more use optim)
    (opt <- optimize(f = eval, interval = c(0, 0.1), df=data_logistic))

    ## lets plot it!
    pred_data <- data.frame(time=times, infected=logis(r=opt$minimum, t=times, K=1000, q0=1))

    ggplot(data_logistic) + geom_point(aes(x=time, y=infected), size=.2) +
    geom_line(data=pred_data, aes(x=time, y=infected), colour="red", size=2)+
    ggtitle("1000 simulations of the incidence of a pathogen (infected) over time (t)")

但是,我想做的是将逻辑优化函数应用于具有相同特定值的副本。为此,我首先根据我的多个变量将数据帧分为多个数据帧。

datasplit<-split(grid,list(grid$theta,grid$beta))

我不知道如何将其余代码分别应用于每个数据帧,然后将估算值放回网格中,如下所示:

grid <- expand.grid(theta=c(.1,.5, 1, 5, 10), 
                   beta=c(1, 5, 10,20,50), ## parameter 2
                   replicate=1:2, ## replicates, 10 here
                   r=NA, detection=NA)

是否可以使用sapply()之类的东西?

0 个答案:

没有答案