根据之前行的属性选择行

时间:2013-12-05 12:26:33

标签: r date dataframe row

我有以下数据框:

hospNo <- as.character(seq(1:10))
patID <- c("1","2","2","2","2","3","4","4","4","4")
admissionType <- c("chronic", "acute", "chronic", "chronic", "chronic", "acute",   "acute", "chronic", "chronic", "chronic")
dischargeDate <- c("20110101", "20100101", "20100106", "20100120", "20100314", "20120607", "20120329", "20120402", "20120408","20120421")

HospData <- cbind(hospNo,patID,admissionType,dischargeDate)
HospData <- data.frame(HospData)

我想在急性入院后14天内(出院日期)选择患者(患者)(慢性)的每一行(住院号)。

本例中的解决方案是长期住院治疗:3,8和9。

有办法吗? 我知道如何通过as.Date(“%Y%m%d”)更改日期,但我发布原始数据,因为可能有更好的计算方法。

非常感谢帮助!

2 个答案:

答案 0 :(得分:2)

> library(zoo)
> library(plyr)

> HospData$dischargeDate <- as.Date(HospData$dischargeDate, format = "%Y%m%d")
> out <- ddply(HospData, .(patID), function(x) {
    lastAcuteDischarge <- ifelse(x$admissionType == "acute", x$dischargeDate, NA)
    lastAcuteDischarge <- na.locf(lastAcuteDischarge, na.rm = FALSE)
    x[x$admissionType == "chronic" & x$dischargeDate - lastAcuteDischarge < 14,]
})
> out[complete.cases(out),]
#  hospNo patID admissionType dischargeDate
# 2      3     2       chronic    2010-01-06
# 3      8     4       chronic    2012-04-02
# 4      9     4       chronic    2012-04-08

答案 1 :(得分:1)

试试这个:

#data
hospNo <- as.character(seq(1:10))
patID <- c("1","2","2","2","2","3","4","4","4","4")
admissionType <- c("chronic", "acute", "chronic", "chronic", "chronic", "acute",   "acute", "chronic", "chronic", "chronic")
dischargeDate <- c("20110101", "20100101", "20100106", "20100120", "20100314", "20120607", "20120329", "20120402", "20120408","20120421")
HospData <- cbind(hospNo,patID,admissionType,dischargeDate)
HospData <- data.frame(HospData)

#merge to have acute and chronic dischargeDate on same row
HospData1 <- merge(HospData,
                   HospData[ HospData$admissionType=="acute",c("patID","dischargeDate")],
                   by="patID", all.x=T)
HospData1 <- HospData1[ HospData1$admissionType=="chronic" & 
                          !is.na(HospData1$dischargeDate.y),]
#calculate duration
HospData1$duration <- 
  as.Date(HospData1$dischargeDate.x,"%Y%m%d")-
  as.Date(HospData1$dischargeDate.y,"%Y%m%d")

#subset where within 14 days
HospData1[ HospData1$duration <15,]
#     patID hospNo admissionType dischargeDate.x dischargeDate.y duration
# 3     2      3       chronic        20100106        20100101   5 days
# 8     4      8       chronic        20120402        20120329   4 days
# 9     4      9       chronic        20120408        20120329  10 days