Question

我是R的新手。我有一组数据，其中包含3组肺功能测量，每个观察下面给出3个相应的日期。我想使用R软件为每次观察提取斜率（肺功能下降），并在每个观察的新栏中插入 1.我该如何处理这个问题？
2.我的数据集是否以正确的格式排列？

ID      FEV1_Date11 FEV1_Date12 FEV1_Date13  DATE11     DATE12     DATE13
18105   1.35        1.25        1.04         6/9/1990   8/16/1991  8/27/1993
18200   0.87        0.85                     9/12/1991  3/11/1993   
18303   0.79                                 4/23/1992      
24204   4.05        3.95        3.99         6/8/1992   3/22/1993  11/5/1994
28102   1.19        1.04        0.96         10/31/1990 7/24/1991  6/27/1992
34104   1.03        1.16        1.15         7/25/1992  12/8/1993  12/7/1994
43108   0.92        0.83        0.79         6/23/1993  1/12/1994  1/11/1995
103114  2.43        2.28        2.16         6/5/1994   6/21/1995  4/7/1996
114101  0.73        0.59        0.6          6/25/1989  8/5/1990   8/24/1991

第1次观察的例子，斜率= 0.0003 感谢..

Answer 1

这是一个＆＃34; hacky＆＃34;解决方案，但如果我正确理解您的问题（可能需要一些澄清），这应该适用于您的情况。请注意，这有点特定于您的情况，因为列对应按您指定的顺序排列。

library(dplyr)
library(lubridate)

### Load Data
tdf <- read.table(header=TRUE, stringsAsFactors = FALSE, text = '
ID      FEV1_Date11 FEV1_Date12 FEV1_Date13  DATE11     DATE12     DATE13
18105   1.35        1.25        1.04         6/9/1990   8/16/1991  8/27/1993
18200   0.87        0.85        NA           9/12/1991  3/11/1993  NA
18303   0.79        NA          NA           4/23/1992  NA         NA
24204   4.05        3.95        3.99         6/8/1992   3/22/1993  11/5/1994
28102   1.19        1.04        0.96         10/31/1990 7/24/1991  6/27/1992
34104   1.03        1.16        1.15         7/25/1992  12/8/1993  12/7/1994
43108   0.92        0.83        0.79         6/23/1993  1/12/1994  1/11/1995
103114  2.43        2.28        2.16         6/5/1994   6/21/1995  4/7/1996
114101  0.73        0.59        0.6          6/25/1989  8/5/1990   8/24/1991') %>% tbl_df

#####################################
### Reshape the data by column pairs.
#####################################
### Function to reshape a single column pair
xform_data <- function(x) {
  df<-data.frame(tdf[,'ID'],
                 names(tdf)[x],
                 tdf[,names(tdf)[x]],
                 tdf[,names(tdf)[x+3]], stringsAsFactors = FALSE)
  names(df) <- c('ID', 'DateKey', 'Val', 'Date'); df
}
### Create a new data frame with the data in a deep format (i.e. reshaped)
### 'lapply' is used to reshape each pair of columns (date and value).
### 'lapply' returns a list of data frames (on df per pair) and 'bind_rows'
### combines them into one data frame.
newdf <-
  bind_rows(lapply(2:4, function(x) {xform_data(x)})) %>%
  mutate(Date = mdy(Date, tz='utc'))

#####################################
### Calculate the slopes per ID
#####################################
slopedf <-
  newdf %>%
  arrange(DateKey, Date) %>%
  group_by(ID) %>%
  do(slope = lm(Val ~ Date, data = .)$coefficients[[2]]) %>%
  mutate(slope = as.vector(slope)) %>%
  ungroup
slopedf
## # A tibble: 9 x 2
##       ID         slope
##    <int>         <dbl>
## 1  18105 -3.077620e-09
## 2  18200 -4.239588e-10
## 3  18303            NA
## 4  24204 -5.534095e-10
## 5  28102 -4.325210e-09
## 6  34104  1.690414e-09
## 7  43108 -2.490139e-09
## 8 103114 -4.645589e-09
## 9 114101 -1.924497e-09

##########################################
### Adding slope column to original data.
##########################################
> tdf %>% left_join(slopedf, by = 'ID')
## # A tibble: 9 x 8
##       ID FEV1_Date11 FEV1_Date12 FEV1_Date13     DATE11    DATE12    DATE13         slope
##    <int>       <dbl>       <dbl>       <dbl>      <chr>     <chr>     <chr>         <dbl>
## 1  18105        1.35        1.25        1.04   6/9/1990 8/16/1991 8/27/1993 -3.077620e-09
## 2  18200        0.87        0.85          NA  9/12/1991 3/11/1993      <NA> -4.239588e-10
## 3  18303        0.79          NA          NA  4/23/1992      <NA>      <NA>            NA
## 4  24204        4.05        3.95        3.99   6/8/1992 3/22/1993 11/5/1994 -5.534095e-10
## 5  28102        1.19        1.04        0.96 10/31/1990 7/24/1991 6/27/1992 -4.325210e-09
## 6  34104        1.03        1.16        1.15  7/25/1992 12/8/1993 12/7/1994  1.690414e-09
## 7  43108        0.92        0.83        0.79  6/23/1993 1/12/1994 1/11/1995 -2.490139e-09
## 8 103114        2.43        2.28        2.16   6/5/1994 6/21/1995  4/7/1996 -4.645589e-09
## 9 114101        0.73        0.59        0.60  6/25/1989  8/5/1990 8/24/1991 -1.924497e-09

Answer 2

如果我理解了这个问题，我想你想要在每次访问之间找到斜率：

library(dplyr)

group_by(df, ID) %>% 
  mutate_at(vars(starts_with("DATE")), funs(as.Date(., "%m/%d/%Y"))) %>% 
  do(data_frame(slope=diff(unlist(.[,2:4]))/diff(unlist(.[,5:7])),
                after_visit=1+(1:length(slope))))

## Source: local data frame [18 x 3]
## Groups: ID [9]
## 
##        ID         slope after_visit
##     <int>         <dbl>       <dbl>
## 1   18105 -2.309469e-04           2
## 2   18105 -2.830189e-04           3
## 3   18200 -3.663004e-05           2
## 4   18200            NA           3
## 5   18303            NA           2
## 6   18303            NA           3
## 7   24204 -3.484321e-04           2
## 8   24204  6.745363e-05           3
## 9   28102 -5.639098e-04           2
## 10  28102 -2.359882e-04           3
## 11  34104  2.594810e-04           2
## 12  34104 -2.747253e-05           3
## 13  43108 -4.433498e-04           2
## 14  43108 -1.098901e-04           3
## 15 103114 -3.937008e-04           2
## 16 103114 -4.123711e-04           3
## 17 114101 -3.448276e-04           2
## 18 114101  2.604167e-05           3

替代沉思：

group_by(df, ID) %>% 
  mutate_at(vars(starts_with("DATE")), funs(as.Date(., "%m/%d/%Y"))) %>% 
  do(data_frame(date=as.Date(unlist(.[,5:7]), origin="1970-01-01"), # in the event you wanted to keep the data less awful and have one observation per row, this preserves the Date class
                reading=unlist(.[,2:4]))) %>% 
  do(data_frame(slope=diff(.$reading)/unclass(diff(.$date))))

提取斜坡进行个别观察

2 个答案: