找到具有特定值的下一行

时间:2015-08-02 22:36:09

标签: r loops

我的输入是前三列:ID COLLECTED_DATE Hyperkalemia

我的输出是最后三列:met_criteria my_row next_1_time_interval

Met_criteria =高钾血症== 3 | 4

My_row =在3或4值(在同一ID内)后,高钾血症= = 1的行

next_1_time_interval == 3或4值与时间1值之间的时间(在同一ID内)

> ROW ID COLLECTED_DATE Hyperkalemia met_criteria my_row next_1_time_interval
1  123  1   5/25/08 3:30            0           NA     NA                   NA
2  124  1   5/26/08 3:30            0           NA     NA                   NA
3  125  1   5/27/08 4:00            2           NA     NA                   NA
4  126  1   5/28/08 3:45            2           NA     NA                   NA
5  127  1   5/29/08 3:40            3         TRUE    248         5.590500e+04
6  128  1   5/29/08 6:40            2           NA     NA                   NA
7  129  1  5/29/08 20:15            2           NA     NA                   NA
8  144  1   6/13/08 4:00            0           NA     NA                   NA
9  145  1   6/14/08 4:10            0           NA     NA                   NA
10 146  1   6/15/08 4:45            4         TRUE    248         5.549592e+04
11 147  1   6/15/08 6:30            0           NA     NA                   NA
12 246  1 12/18/12 10:46            0           NA     NA                   NA
13 247  1  1/14/14 11:30            0           NA     NA                   NA
14 248  1 10/14/14 12:40            1           NA     NA                   NA
15 249  1   2/4/15 14:27            0           NA     NA                   NA
16 633  2   9/12/14 4:30            0           NA     NA                   NA
17 634  2   9/29/14 8:57            0           NA     NA                   NA
18 635  3   12/6/11 2:50            0           NA     NA                   NA
19 636  3  12/6/11 10:45            0           NA     NA                   NA
20 637  3  12/6/11 17:50            0           NA     NA                   NA
21 704  3   4/18/12 5:00            0           NA     NA                   NA
22 705  3   4/18/12 5:00            0           NA     NA                   NA
23 706  3  4/18/12 16:30            3         TRUE    708         8.333333e-01
24 707  3  4/18/12 16:30            4         TRUE    708         8.333333e-01
25 708  3  4/18/12 17:20            1           NA     NA                   NA
26 768  3   5/10/12 3:35            0           NA     NA                   NA
27 769  3   5/11/12 3:20            0           NA     NA                   NA
28 770  3   5/12/12 4:00            4         TRUE     NA                   NA
29 771  3   5/12/12 5:45            0           NA     NA                   NA
30 772  3   5/13/12 4:00            0           NA     NA                   NA
31 773  3   5/14/12 5:30            0           NA     NA                   NA
32 774  3   5/15/12 4:00            4         TRUE     NA                   NA
33 775  3   5/15/12 5:15            0           NA     NA                   NA
34 776  3   5/16/12 8:34            0           NA     NA                   NA
35 777  3   5/17/12 7:10            0           NA     NA                   NA
36 778  3   5/18/12 7:10            0           NA     NA                   NA
37 779  3   5/28/12 7:26            0           NA     NA                   NA
38 780  3   6/20/12 9:46            0           NA     NA                   NA
39 781  3   8/1/12 13:10            0           NA     NA                   NA
40 782  3 11/14/12 13:34            0           NA     NA                   NA
41 783  3  11/13/13 9:35            0           NA     NA                   NA
42 784  3    6/5/15 9:31            0           NA     NA                   NA

列出的输出是所需的输出。

要查看的行是:127,146(3或4的高钾血症)和248(3或4值后的1的高钾血症),每行属于一个人的ID。

另外:行706,707 :(高钾血症中的3或4值 对于相同的ID

,在3或4值(高钾血症)之后的708:1值
    pots_1 <- arrange(pots_1, MRN, COLLECTED_DATE)

    pots_1$Hyperkalemia[is.na(pots_1$Hyperkalemia)] <- 0
    pots_1$met_criteria <- NA
    pots_1$next_1_time_interval <- NA
    pots_1$my_row <- NA 


   # data.frame = pots
   # for every patient  (sorted by mrn and collected date) 
   for (mrn in unique(pots_1$MRN)){ 

      # for each row for each patient (sorted by collected date)
      for (i in 1:length(which(pots_1$MRN == mrn))) {

       # number of rows for this mrn
        mrn_max_row <- length(which(pots_1$MRN == mrn))

        # if Hyperkalemia = 3 or 4 
        if(pots_1$Hyperkalemia[i] == 3 | pots_1$Hyperkalemia[i] == 4){
         my_start_row <- i
         my_row <- i
         pots_1$met_criteria[i] <- TRUE

         #for every row after 3 or  4 until I get to Hyperkalemia = 1
          while (pots_1$Hyperkalemia[my_row] != 1){
          my_row <- my_row+1 
          if (my_row > mrn_max_row) {
           break
          }

      } # ends while 
      if (pots_1$Hyperkalemia[my_row]) {
      pots_1$my_row[i] <- my_row
      pots_1$next_1_time_interval[my_start_row] <- difftime(pots_1$COLLECTED_DATE[my_row], pots_1$COLLECTED_DATE[my_start_row], units = "hours")
      }
      # break only marks the first one if break is here

      } # ends if

    } # ends loop through mrn result rows

  } # ends loop through mrns

此代码可正常工作,但仅适用于第一个ID。我已经尝试了许多方法来使这个工作用于整个ID集并且已经获得了许多有趣的错误:)比如计算下一个值而不管ID。

有人可以帮我解决整个ID集的问题吗?

这是用R.写的。我意识到循环和while语句在R中不典型,但是我只有7500行,无论循环如何都运行得非常快。

1 个答案:

答案 0 :(得分:1)

以下是使用data.table

的方法
library(data.table)
setDT(x)[,COLLECTED_DATE:=
  as.POSIXct(COLLECTED_DATE,format="%m/%d/%y %H:%M")]

简单的部分:

x[,met_criteria:=Hyperkalemia %in% 3:4]

现在,定义my_row看起来有些复杂。如果您想要分析其来源,请检查以前的编辑。

x[,my_row:=zoo::na.locf(NA^(Hyperkalemia!=1)*.I,
                        fromLast=T,na.rm=F)*NA^(!met_criteria),by=ID]

现在next_1_time_interval非常容易:

x[,next_1_time_interval:=difftime(COLLECTED_DATE[my_row],
                                  COLLECTED_DATE,
                                  units="hours")]
> x
    ROW ID      COLLECTED_DATE Hyperkalemia met_criteria my_row next_1_time_interval
 1: 123  1 2008-05-25 03:30:00            0        FALSE     NA             NA hours
 2: 124  1 2008-05-26 03:30:00            0        FALSE     NA             NA hours
 3: 125  1 2008-05-27 04:00:00            2        FALSE     NA             NA hours
 4: 126  1 2008-05-28 03:45:00            2        FALSE     NA             NA hours
 5: 127  1 2008-05-29 03:40:00            3         TRUE     14   5.590500e+04 hours
 6: 128  1 2008-05-29 06:40:00            2        FALSE     NA             NA hours
 7: 129  1 2008-05-29 20:15:00            2        FALSE     NA             NA hours
 8: 144  1 2008-06-13 04:00:00            0        FALSE     NA             NA hours
 9: 145  1 2008-06-14 04:10:00            0        FALSE     NA             NA hours
10: 146  1 2008-06-15 04:45:00            4         TRUE     14   5.549592e+04 hours
11: 147  1 2008-06-15 06:30:00            0        FALSE     NA             NA hours
12: 246  1 2012-12-18 10:46:00            0        FALSE     NA             NA hours
13: 247  1 2014-01-14 11:30:00            0        FALSE     NA             NA hours
14: 248  1 2014-10-14 12:40:00            1        FALSE     NA             NA hours
15: 249  1 2015-02-04 14:27:00            0        FALSE     NA             NA hours
16: 633  2 2014-09-12 04:30:00            0        FALSE     NA             NA hours
17: 634  2 2014-09-29 08:57:00            0        FALSE     NA             NA hours
18: 635  3 2011-12-06 02:50:00            0        FALSE     NA             NA hours
19: 636  3 2011-12-06 10:45:00            0        FALSE     NA             NA hours
20: 637  3 2011-12-06 17:50:00            0        FALSE     NA             NA hours
21: 704  3 2012-04-18 05:00:00            0        FALSE     NA             NA hours
22: 705  3 2012-04-18 05:00:00            0        FALSE     NA             NA hours
23: 706  3 2012-04-18 16:30:00            3         TRUE     25   8.333333e-01 hours
24: 707  3 2012-04-18 16:30:00            4         TRUE     25   8.333333e-01 hours
25: 708  3 2012-04-18 17:20:00            1        FALSE     NA             NA hours
26: 768  3 2012-05-10 03:35:00            0        FALSE     NA             NA hours
27: 769  3 2012-05-11 03:20:00            0        FALSE     NA             NA hours
28: 770  3 2012-05-12 04:00:00            4         TRUE     NA             NA hours
29: 771  3 2012-05-12 05:45:00            0        FALSE     NA             NA hours
30: 772  3 2012-05-13 04:00:00            0        FALSE     NA             NA hours
31: 773  3 2012-05-14 05:30:00            0        FALSE     NA             NA hours
32: 774  3 2012-05-15 04:00:00            4         TRUE     NA             NA hours
33: 775  3 2012-05-15 05:15:00            0        FALSE     NA             NA hours
34: 776  3 2012-05-16 08:34:00            0        FALSE     NA             NA hours
35: 777  3 2012-05-17 07:10:00            0        FALSE     NA             NA hours
36: 778  3 2012-05-18 07:10:00            0        FALSE     NA             NA hours
37: 779  3 2012-05-28 07:26:00            0        FALSE     NA             NA hours
38: 780  3 2012-06-20 09:46:00            0        FALSE     NA             NA hours
39: 781  3 2012-08-01 13:10:00            0        FALSE     NA             NA hours
40: 782  3 2012-11-14 13:34:00            0        FALSE     NA             NA hours
41: 783  3 2013-11-13 09:35:00            0        FALSE     NA             NA hours
42: 784  3 2015-06-05 09:31:00            0        FALSE     NA             NA hours
    ROW ID      COLLECTED_DATE Hyperkalemia met_criteria my_row next_1_time_interval