我有一个患者入院和出院日期的数据框(df),有4列:
ID,admitDate(作为日期),dcDate(作为日期),los(逗留时间) 天)。
$ admitDate : Date, format: "2009-09-19" "2010-01-24" "2010-09-30" ...
$ dcDate : Date, format: "2009-09-23" "2010-01-27" "2010-10-04" ...
$ los : num 4 3 4 25 6 3 6 2 2 3 ...
我需要能够在任何特定时间告诉患者(以及哪些患者)入院。也就是说,我想我需要找出患者之间的重叠情况。洛杉矶。以下是我定义重叠的方法:(df $ admitDate [x]< = df $ disDate [y])& (df $ admitDate [y]< = df $ disDate [x])
非常感谢任何帮助。
以下是前20位患者的输出结果:
> dput(head(df,20))
structure(list(Unit.Number = c(2013459L, 2013459L, 2047815L,
1362858L, 1331174L, 2068040L, 1363711L, 2175972L, 2036695L, 1426614L,
1403126L, 2083126L, 1334063L, 1349385L, 1404482L, 2175545L, 1296600L,
1293220L, 1336768L, 2148401L), admitDate = structure(c(14506,
14633, 14882, 15172, 14945, 15632, 15482, 15601, 16096, 15843,
16013, 15548, 15436, 15605, 16115, 15597, 15111, 15050, 15500,
15896), class = "Date"), dcDate = structure(c(14510, 14636, 14886,
15197, 14951, 15635, 15488, 15603, 16098, 15846, 16016, 15552,
15438, 15606, 16118, 15598, 15113, 15058, 15501, 15915), class = "Date"),
los = c(4, 3, 4, 25, 6, 3, 6, 2, 2, 3, 3, 4, 2, 1, 3, 1,
2, 8, 1, 19)), .Names = c("Unit.Number", "admitDate", "dcDate",
"los"), row.names = c(NA, 20L), class = "data.frame")
首先,我尝试了G. Grothendieck建议的代码:
days <- seq(min(df$admitDate), max(df$dcDate), "day")
no.patients <- data.frame(
Date = days,
Num = sapply(days, function(d) sum(d >= df$admitDate & d <= df$dcDate)),
Patients = sapply(days, function(d)
toString(df$Unit.Number[d >= df$admitDate & d <= df$dcDate]))
)
以下是发生的事情:
> days <- seq(min(df$admitDate), max(df$dcDate), "day")
Error in seq.int(0, to0 - from, by) : 'to' cannot be NA, NaN or infinite
> no.patients <- data.frame(Date = d,
+ Num = sapply(days, function(d) sum(d >= df$admitDate & d <= df$dcDate)))
Error in data.frame(Date = d, Num = sapply(days, function(d) sum(d >= :
object 'd' not found
然后,我想也许我需要摆脱NA。所以这就是我所做的:
> df <- df[rowSums(is.na(df)) < 0, ]
再次尝试。这是我得到的:
> days <- seq(min(df$admitDate), max(df$dcDate), "day")
Error in seq.int(0, to0 - from, by) : 'to' cannot be NA, NaN or infinite
In addition: Warning messages:
1: In min.default(numeric(0), na.rm = FALSE) :
no non-missing arguments to min; returning Inf
2: In max.default(numeric(0), na.rm = FALSE) :
no non-missing arguments to max; returning -Inf
> no.patients <- data.frame(Date = d,
+ Num = sapply(days, function(d) sum(d >= df$admitDate & d <= df$dcDate)))
Error in data.frame(Date = d, Num = sapply(days, function(d) sum(d >= :
object 'd' not found
答案 0 :(得分:1)
试试这个:
days <- seq(min(df$admitDate), max(df$dcDate), "day")
no.patients <- data.frame(
Date = days,
Num = sapply(days, function(d) sum(d >= df$admitDate & d <= df$dcDate)),
Patients = sapply(days, function(d)
toString(df$Unit.Number[d >= df$admitDate & d <= df$dcDate]))
)
,并提供:
> head(no.patients)
Date Num Patients
1 2009-09-19 1 2013459
2 2009-09-20 1 2013459
3 2009-09-21 1 2013459
4 2009-09-22 1 2013459
5 2009-09-23 1 2013459
6 2009-09-24 0
ADDED 患者列表到每一行。修正了df
。
答案 1 :(得分:0)
这是另一种方式。这是一个根据进入/退出时间创建队列大小的过程,在这种情况下可用于计算患者数量:
df <- structure(list(Unit.Number = c(2013459L, 2013459L, 2047815L,
1362858L, 1331174L, 2068040L, 1363711L, 2175972L, 2036695L, 1426614L,
1403126L, 2083126L, 1334063L, 1349385L, 1404482L, 2175545L, 1296600L,
1293220L, 1336768L, 2148401L), admitDate = structure(c(14506,
14633, 14882, 15172, 14945, 15632, 15482, 15601, 16096, 15843,
16013, 15548, 15436, 15605, 16115, 15597, 15111, 15050, 15500,
15896), class = "Date"), dcDate = structure(c(14510, 14636, 14886,
15197, 14951, 15635, 15488, 15603, 16098, 15846, 16016, 15552,
15438, 15606, 16118, 15598, 15113, 15058, 15501, 15915), class = "Date"),
los = c(4, 3, 4, 25, 6, 3, 6, 2, 2, 3, 3, 4, 2, 1, 3, 1,
2, 8, 1, 19)), .Names = c("Unit.Number", "admitDate", "dcDate",
"los"), row.names = c(NA, 20L), class = "data.frame")
# create dataframe for computing the size of the queue (concurrent patients)
x <- data.frame(date = c(df$admitDate, df$dcDate)
, op = c(rep(1, nrow(df)), rep(-1, nrow(df)))
, Unit.Number = c(df$Unit.Number, df$Unit.Number)
)
# sort and calculate concurrent patients
x <- x[order(x$date), ] # sort in time order
x$cum <- cumsum(x$op)
# 'x' will have the 'cum' equal to the number of patients concurrently.
# for 'op' == 1, you have the patient ID and 'cum' will be the number of
# patients at that time.
plot(x$date, x$cum, type = 's')
这就是&#39; x&#39;的第一部分。看起来像:
> head(x,10)
date op Unit.Number cum
1 2009-09-19 1 2013459 1
21 2009-09-23 -1 2013459 0
2 2010-01-24 1 2013459 1
22 2010-01-27 -1 2013459 0
3 2010-09-30 1 2047815 1
23 2010-10-04 -1 2047815 0
5 2010-12-02 1 1331174 1
25 2010-12-08 -1 1331174 0
18 2011-03-17 1 1293220 1
38 2011-03-25 -1 1293220 0
>