我有一个国家年的面板数据集。我想计算自事件发生以来的时间,以及每个国家的活动总数,我可以随着时间的推移而腐烂。我正在使用timeSinceEvent
包中的doBy
函数,该函数返回一个具有我想要的值的数据框,但是我无法将其应用到我的主df。
structure(list(ccode.a = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L,
31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L,
31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L,
42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L,
42L, 42L, 42L, 42L, 42L), year = c(1975, 1976, 1977, 1978, 1979,
1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990,
1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
2002, 2003, 2004, 2005, 2006, 2007, 2008, 1975, 1976, 1977, 1978,
1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989,
1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 1975, 1976, 1977,
1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988,
1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 1975, 1976,
1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987,
1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 1975,
1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986,
1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985,
1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996,
1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004), onset.a = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("ccode.a", "year",
"onset.a"), row.names = c(NA, 200L), class = "data.frame")
我尝试过使用它:
last.step <- function(x) {
temp <- timeSinceEvent(x$onset.a, x$year)
cbind(x[,1],temp) #timeSinceEvent cuts off the country ID
}
result <- do.call("rbind", by(data, data$ccode.a, last.step))
以及
test <- by(data, data$ccode.a, function(x) timeSinceEvent(data$onset.a, data$year))
无济于事。我逐步完成了这个功能,似乎正在做我想做的事情,但我想我在调用它的方式有问题吗?
答案 0 :(得分:1)
在我看来,问题很简单,ccode.a==20
没有事件,因此timeSinceEvent
在应用于该子集时会返回NULL
。这意味着last.step
会返回两个ccode.a
的不同维度的数据框,因此rbind
会失败。
不完全是一个解决方案,但也许更好地了解问题所处的位置有帮助。
答案 1 :(得分:1)
由于有空列,您应该在rbind.fill()
中使用plyr
。它将填充na空列
last.step <- function(x) {
temp <- timeSinceEvent(x$onset.a, x$year)
cbind(x[,1],temp) #timeSinceEvent cuts off the country ID
}
result <- do.call(rbind.fill, by(data, data$ccode.a, last.step))
然而,这不会返回“空”lists
,即只有x [,1]的那个。只有rbind
里面有lists
的{{1}} {。}}。我不知道这是否是预期的行为和/或是你想要的。
答案 2 :(得分:0)
结束了必须稍微修改timeSinceEvent
包中的doBy
。这是最终的代码。感谢lselzer在rbind.fill
和RoyalTS中指出plyr
,指出当timeSinceEvent
参数全为零时null
返回yvar
。
panel.tse <- function(yvar, tvar = seq_along(yvar)){
if (!(is.numeric(yvar) | is.logical(yvar))){
stop("yvar must be either numeric or logical")
}
yvar[is.na(yvar)] <- 0
event.idx <- which(yvar == 1)
run <- cumsum(yvar)
un <- unique(run)
tlist <- list()
for (i in 1:length(un)){
v <- un[[i]]
y <- yvar[run == v]
t <- tvar[run == v]
t <- t - t[1]
tlist[[i]] <- t
}
timeAfterEvent <- unlist(tlist)
timeAfterEvent[run == 0] <- NA
run[run == 0] <- NA
ans <- cbind(data.frame(yvar = yvar, tvar = tvar), run, tae = timeAfterEvent)
return(ans)
}
last.step <- function(x) {
temp <- panel.tse(x$onset.a, x$year)
cbind(x[,1],temp)
}
result <- do.call(rbind.fill, by(data, data$ccode.a, last.step))