我目前有一些代码,可以让我估计使用exapand.grid函数生成的一组参数的增长逻辑曲线。
library('spatstat')
library('ggplot2')
library('dplyr')
library('reshape2')
library("plyr")
n <- 1000 # number of hosts
dim <- 1000 # dimension of the landscape
landscape <- ppp(x=runif(n)*dim, y=runif(n)*dim,
window = owin(xrange = c(0, dim), yrange = c(0, dim)))
## give marks to the process, e.g. 1 infected randomly selected
marks(landscape) <- sample(c(TRUE, rep(FALSE, n-1)))
data <- data.frame(x=landscape$x, y=landscape$y, id=1:n)
sigma <- 300
delta.t <- 100
grid <- expand.grid(theta=c(.1,.5, 1, 5, 10), ## parameter 1
beta=c(1, 5, 10,20,50), ## parameter 2
replicate=1:2, ## replicates, 10 here
r=NA, detection=NA)
grid<-grid %>% arrange_at(1:2,desc)
for (i in 1:nrow(grid)){
## you get parameter values this way:
## and you do something with them
## run a simulation
## identify a realised growth rate r
## measure first detection date
output <- tauLeapG(beta=grid[i,"beta"],
theta=grid[i,"theta"],
b=.4,
sigma=sigma,
delta.t=delta.t,
ppp=landscape)
print(paste0("beta", grid[i,"beta"]))
print(paste0("theta", grid[i,"theta"]))
## output a df with location and time of infection
temp <- output[[2]][,1:2][order(output[[2]][,2]),]
data <- cbind(data, temp$time)
## print status
cat('\nepidemic', i, 'simulated...')
}
colnames(data)[-(1:3)] <- paste0('simulation ', 1:nrow(grid))
## starting from data, generated above
head(data)
## 6 epidemics, with time of infection of each host
## to fit a logistic we need to convert this to number of infected a each time
data_long <- melt(data, id.vars = c('x', 'y', 'id'), value.name = 'time', variable.name = 'sim')
times <- sort(unique(data_long$time))
## make a logistic df from this data
data_logistic <- data_long %>% group_by(sim) %>%
do(data.frame(time=times, infected=sapply(times, function(x) sum(.$time <= x))))
## all curves have the same number of points
ggplot(data_logistic) + geom_point(aes(x=time, y=infected, colour=sim))
## prepare a logistic function of r to fit
logis <- function(t, r, K=1, s=0, q0){
pmin(
K*q0*exp(r*(t+s)) / (K + q0*(exp(r*(t+s)) - 1)),
K) # numerical errors can happen for high r and sigma
}
## prepare an evaluation function for the optimisation,
## the function you seek to minimise
eval <- function(r, df){
sum((logis(r=r, t=df$time, K=1000, q0=1) -
df$infected)^2) ## sum of square errors between predictions and observations
}
## fit it with optimize (because only one parameter to identify, if more use optim)
(opt <- optimize(f = eval, interval = c(0, 0.1), df=data_logistic))
## lets plot it!
pred_data <- data.frame(time=times, infected=logis(r=opt$minimum, t=times, K=1000, q0=1))
ggplot(data_logistic) + geom_point(aes(x=time, y=infected), size=.2) +
geom_line(data=pred_data, aes(x=time, y=infected), colour="red", size=2)+
ggtitle("1000 simulations of the incidence of a pathogen (infected) over time (t)")
但是,我想做的是将逻辑优化函数应用于具有相同特定值的副本。为此,我首先根据我的多个变量将数据帧分为多个数据帧。
datasplit<-split(grid,list(grid$theta,grid$beta))
我不知道如何将其余代码分别应用于每个数据帧,然后将估算值放回网格中,如下所示:
grid <- expand.grid(theta=c(.1,.5, 1, 5, 10),
beta=c(1, 5, 10,20,50), ## parameter 2
replicate=1:2, ## replicates, 10 here
r=NA, detection=NA)
是否可以使用sapply()之类的东西?