我有一个带有id和状态的数据集。我需要在过去的四个季度中获得最大的状态。例如,我创建了这个表:
library(data.table)
library(zoo)
DT =
data.table(id=c("1","1","1","1","1","1","1","1","2"),
date=as.yearqtr(c("2015Q1", "2014Q4", "2014Q3", "2014Q2", "2014Q1", "2013Q4","2013Q3","2013Q2","2015Q2")),
status=c(0,0,0,0,0,0,1,0,0))
我希望在过去4个季度中获得status
的最大值:
DT =
data.table(id=c("1","1","1","1","1","1","1","1","2"),
date=as.yearqtr(c("2015Q1", "2014Q4", "2014Q3", "2014Q2", "2014Q1","2013Q4","2013Q3","2013Q2","2015Q2")),
status=c(0,0,0,0,0,0,1,0,0),
Max_status=c(0,0,0,1,1,1,1,0,0))
有没有人有任何线索在data.table中整齐而快速地做到这一点?我试过这样的事情:
DT[,Date_1yPrior:=date-1]
DT[,Max_Status:=max(status[date>=Date_1yPrior & date<=Date_1yPrior]),by='id']
但j和i指的不正确。我更喜欢data.table
解决方案。
提前谢谢!
答案 0 :(得分:3)
使用zoo::rollapplyr
DT[order(id, date), max_status:=rollapplyr(status, FUN=max, width=4, fill=0, partial=TRUE), id]
DT
# id date status max_status
#1: 1 2015 Q1 0 0
#2: 1 2014 Q4 0 0
#3: 1 2014 Q3 0 0
#4: 1 2014 Q2 0 1
#5: 1 2014 Q1 0 1
#6: 1 2013 Q4 0 1
#7: 1 2013 Q3 1 1
#8: 1 2013 Q2 0 0
#9: 2 2015 Q2 0 0
答案 1 :(得分:1)
我认为这就是诀窍:
library(zoo)
library(data.table)
DT[, date_num := as.numeric(date)]
DT[, NextStatus1Q := .SD[match(date_num - .25, .SD$date_num), status], by=id]
DT[, NextStatus2Q := .SD[match(date_num - .5, .SD$date_num), status], by=id]
DT[, NextStatus3Q := .SD[match(date_num - .75, .SD$date_num), status], by=id]
DT[, NextStatus4Q := .SD[match(date_num - 1, .SD$date_num), status], by=id]
DT[,MaxStatus:=max(NextStatus1Q,NextStatus2Q,NextStatus3Q,status,0,na.rm=T),by=c('id','date')]
#DT[,MaxStatus:=max(NextStatus1Q,NextStatus2Q,NextStatus3Q,NextStatus4Q,status,0,na.rm=T),by=c('id','date')] # If you also want 4q (actually 5 quarters back)
创建新列,在其中查找1-3个季度前的值,然后选择最大值