对每个id重复第一次观察

时间:2015-08-07 01:13:16

标签: r transformation dplyr tidyr

我有一个数据集如下

 Obs#      Id          Date              Med          Dosage      Result
  1        1567        01/03/2011        Acebutol     10mg        100.2
  2        1567        04/02/2011        Acebutol     10mg        98.6
  3        1567        08/14/2011        Oxaprozin    20mg        99.34
  4        1567        08/14/2011        Bayer        20mg        99.34
  5        7845        02/01/2011        Bayer        20mg        89.64
  6        7845        06/14/2011        Bayer        20mg        95.41 
  7        7845        01/06/2012        Bayer        50mg        89.92
  8        7845        01/06/2012        Acebutol     50mg        89.92
  9        7845        04/19/2012        Bayer        50mg        95.15
 10        7845        09/25/2012        Bayer        50mg        99.37
 11        1567        01/14/2012        Oxaprozin    20mg        89.34
 12        1567        05/12/2012        Oxaprozin    20mg        91.4


Test2 <- structure(list(Obs. = 1:12, Id = c(1567L, 1567L, 1567L, 1567L, 
7845L, 7845L, 7845L, 7845L, 7845L, 7845L, 1567L, 1567L), Date =               
structure(c(1L,5L, 9L, 9L, 4L, 8L, 2L, 2L, 6L, 10L, 3L, 7L), .Label =          
c("01/03/2011", "01/06/2012", "01/14/2012", "02/01/2011", "04/02/2011",     
"04/19/2012","05/12/2012", "06/14/2011", "08/14/2011", "09/25/2012"), class      
= "factor"),Med = structure(c(1L, 1L, 3L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 
3L, 3L), .Label = c("Acebutol", "Bayer", "Oxaprozin"), class = "factor"), 
Dosage = structure(c(3L, 3L, 4L, 1L, 1L, 1L, 2L, 5L, 2L, 
2L, 4L, 4L), .Label = c(" 20mg", " 50mg", "10mg", "20mg", 
"50mg"), class = "factor"), Result = c(100.2, 98.6, 99.34, 
99.34, 89.64, 95.41, 89.92, 89.92, 95.15, 99.37, 89.34, 91.4
)), .Names = c("Obs.", "Id", "Date", "Med", "Dosage", "Result"
), class = "data.frame", row.names = c(NA, -12L))

我正在使用spread函数来转换此数据集,如下所示

library(dplyr)
library(tidyr)
library(doBy)
Test2$X <- NULL
Test2$Obs. <- NULL
Test21 = Test2 %>% spread(Med, Dosage, fill = 0)
Test22  = Test21 %>% rename(Date2 = Date) %>% mutate(Date1 = NA)
Test22$Date2 = as.Date(Test22$Date2, "%m/%d/%Y")
Test22 = orderBy(~Id+Date2, data=Test22)

    Id      Date2        Result     Acebutol Bayer    Oxaprozin   Date1
    1567    2011-01-03   100.20     10mg     0        0           NA
    1567    2011-04-02   98.60      10mg     0        0           NA
    1567    2011-08-14   99.34      0        20mg     20mg        NA
    1567    2012-01-14   89.34      0        0        20mg        NA
    1567    2012-05-12   91.40      0        0        20mg        NA
    7845    2011-02-01   89.64      0        20mg     0           NA
    7845    2011-06-14   95.41      0        20mg     0           NA
    7845    2012-01-06   89.92      50mg     50mg     0           NA
    7845    2012-04-19   95.15      0        50mg     0           NA
    7845    2012-09-25   99.37      0        50mg     0           NA

我要做的是重复每个Id的第一次观察

    Id      Date2        Result     Acebutol Bayer    Oxaprozin   Date1

  **1567    2011-01-03   100.20     0        0        0           NA

    1567    2011-01-03   100.20     10mg     0        0           NA
    1567    2011-04-02   98.60      10mg     0        0           NA
    1567    2011-08-14   99.34      0        20mg     20mg        NA
    1567    2012-01-14   89.34      0        0        20mg        NA
    1567    2012-05-12   91.40      0        0        20mg        NA

  **7845    2011-02-01   89.64      0        0        0           NA

    7845    2011-02-01   89.64      0        20mg     0           NA
    7845    2011-06-14   95.41      0        20mg     0           NA
    7845    2012-01-06   89.92      50mg     50mg     0           NA
    7845    2012-04-19   95.15      0        50mg     0           NA
    7845    2012-09-25   99.37      0        50mg     0           NA

不知道如何做到这一点,感谢任何帮助。

2 个答案:

答案 0 :(得分:0)

Something like this?

TestNew <- Test22 %>% 
           group_by(Id) %>% 
           arrange(Date2) %>% 
           filter(row_number(Date2) == 1) %>% 
           bind_rows(Test2) %>% 
           arrange(Id, Date2)

答案 1 :(得分:0)

Test22 %>%
  group_by(Id) %>%
  slice(1) %>% # select the first row of each ID
  mutate_each(funs(set0 = c("0")), 4:6) %>% # recode columns 4-6 as zero
  bind_rows(., Test22) %>% # rbind the new zeroed-out top rows to the original
  arrange(Id, Date2, Result) # sort back to the preferred order

Source: local data frame [12 x 7]

     Id      Date2 Result Acebutol Bayer Oxaprozin Date1
1  1567 2011-01-03 100.20        0     0         0    NA
2  1567 2011-01-03 100.20     10mg    NA        NA    NA
3  1567 2011-04-02  98.60     10mg    NA        NA    NA
4  1567 2011-08-14  99.34       NA  20mg      20mg    NA
5  1567 2012-01-14  89.34       NA    NA      20mg    NA
6  1567 2012-05-12  91.40       NA    NA      20mg    NA
7  7845 2011-02-01  89.64        0     0         0    NA
8  7845 2011-02-01  89.64       NA  20mg        NA    NA
9  7845 2011-06-14  95.41       NA  20mg        NA    NA
10 7845 2012-01-06  89.92     50mg  50mg        NA    NA
11 7845 2012-04-19  95.15       NA  50mg        NA    NA
12 7845 2012-09-25  99.37       NA  50mg        NA    NA