面板数据事件的时间

时间:2012-06-18 20:17:57

标签: r data-manipulation

我有一个国家年的面板数据集。我想计算自事件发生以来的时间,以及每个国家的活动总数,我可以随着时间的推移而腐烂。我正在使用timeSinceEvent包中的doBy函数,该函数返回一个具有我想要的值的数据框,但是我无法将其应用到我的主df。

structure(list(ccode.a = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 20L, 20L, 20L, 20L, 20L, 
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 
20L, 20L, 20L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 
31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 
31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 40L, 40L, 
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 
40L, 40L, 40L, 40L, 40L, 40L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 
41L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 
42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 
42L, 42L, 42L, 42L, 42L), year = c(1975, 1976, 1977, 1978, 1979, 
1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 
1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 
2002, 2003, 2004, 2005, 2006, 2007, 2008, 1975, 1976, 1977, 1978, 
1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 
1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 1975, 1976, 1977, 
1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 
1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 
2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 1975, 1976, 
1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 
1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 
1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 1975, 
1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 
1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 
1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 
1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 
1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 
1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004), onset.a = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("ccode.a", "year", 
"onset.a"), row.names = c(NA, 200L), class = "data.frame")

我尝试过使用它:

last.step <- function(x) {
  temp <- timeSinceEvent(x$onset.a, x$year)
  cbind(x[,1],temp) #timeSinceEvent cuts off the country ID
}
result <- do.call("rbind", by(data, data$ccode.a, last.step))

以及

test <- by(data, data$ccode.a, function(x) timeSinceEvent(data$onset.a, data$year))

无济于事。我逐步完成了这个功能,似乎正在做我想做的事情,但我想我在调用它的方式有问题吗?

3 个答案:

答案 0 :(得分:1)

在我看来,问题很简单,ccode.a==20没有事件,因此timeSinceEvent在应用于该子集时会返回NULL。这意味着last.step会返回两个ccode.a的不同维度的数据框,因此rbind会失败。

不完全是一个解决方案,但也许更好地了解问题所处的位置有帮助。

答案 1 :(得分:1)

由于有空列,您应该在rbind.fill()中使用plyr。它将填充na空列

last.step <- function(x) {
  temp <- timeSinceEvent(x$onset.a, x$year)
  cbind(x[,1],temp) #timeSinceEvent cuts off the country ID
}
result <- do.call(rbind.fill, by(data, data$ccode.a, last.step))

然而,这不会返回“空”lists,即只有x [,1]的那个。只有rbind里面有lists的{​​{1}} {。}}。我不知道这是否是预期的行为和/或是你想要的。

答案 2 :(得分:0)

结束了必须稍微修改timeSinceEvent包中的doBy。这是最终的代码。感谢lselzer在rbind.fill和RoyalTS中指出plyr,指出当timeSinceEvent参数全为零时null返回yvar

panel.tse <- function(yvar, tvar = seq_along(yvar)){
   if (!(is.numeric(yvar) | is.logical(yvar))){
        stop("yvar must be either numeric or logical")
    }
   yvar[is.na(yvar)] <- 0
   event.idx <- which(yvar == 1)
   run <- cumsum(yvar)
   un <- unique(run)
   tlist <- list()
   for (i in 1:length(un)){
     v <- un[[i]]
     y <- yvar[run == v]
     t <- tvar[run == v]
     t <- t - t[1]
     tlist[[i]] <- t
   }
   timeAfterEvent <- unlist(tlist)
   timeAfterEvent[run == 0] <- NA
   run[run == 0] <- NA
   ans <- cbind(data.frame(yvar = yvar, tvar = tvar), run, tae = timeAfterEvent)
   return(ans)
 }

last.step <- function(x) {
  temp <- panel.tse(x$onset.a, x$year)
  cbind(x[,1],temp) 
}

result <- do.call(rbind.fill, by(data, data$ccode.a, last.step))