在R

时间:2017-05-17 22:51:38

标签: r

我有一个带有id和测量值的数据集。在时间0进行一些测量,而在时间1进行一些测量。这导致一些缺失值。我想将行与time0和时间1组合在一起,因为两者都是基线测量,新数据集的时间从1开始。基本上为每个id合并time0和time1。想不出办法做到这一点。为了显示我的数据的样子,这里有一些模拟数据。

set.seed(234)

 N=3
 t<-sample(2:6,N,replace=TRUE)
 id<-c(rep(1:N,t))
 n<-length(id)
 x<-as.matrix(cbind(a=rnorm(n,0,1),b=rnorm(n,0,1),c=rnorm(n,0,1),d=rnorm(n,0,1),e=rn
orm(n,0,1)))

time<-c(rbind(as.matrix(c(1:t[1]+1)),as.matrix(c(1:t[2]+1)),as.matrix(c(1:t[3]+1))))

x1<-cbind(id,time,x)

 ######### Add missing data

x2<-rbind(x1,c(1,0,0.98,NA,NA,0.71,0.85))
x3<-rbind(x2,c(1,1,NA,0.85,0.62,NA,0.85))
x4<-rbind(x3,c(2,0,0.81,NA,NA,0.68,0.87))
x5<-rbind(x4,c(2,1,NA,0.97,0.83,NA,0.85))
x6<-rbind(x5,c(3,0,0.87,NA,NA,0.72,0.83))
x7<-rbind(x6,c(3,1,NA,0.98,0.71,NA,0.86))

# create a new dataframe with missing

  newx<-x7[order(x7[,1],x7[,2]),]

  newx
       id time          a          b           c           d            e
  [1,]  1    0  0.9800000         NA          NA  0.71000000  0.850000000
  [2,]  1    1         NA  0.8500000  0.62000000          NA  0.850000000
  [3,]  1    2  0.7590390 -0.8716028 -0.30554099 -0.30528521  0.030963334
  [4,]  1    3  0.3713058  1.1876234  0.86956546 -0.28108275  0.669563187
  [5,]  1    4  0.5758514 -0.6672287 -1.06121591 -1.16458396 -0.140668367
  [6,]  1    5 -0.5703207  0.5383396 -0.09635967  0.09034109  1.281077794
  [7,]  1    6  0.1198567  0.4905632  0.47460932  1.01451692 -0.621039707
  [8,]  2    0  0.8100000         NA          NA  0.68000000  0.870000000
  [9,]  2    1         NA  0.9700000  0.83000000          NA  0.850000000
 [10,]  2    2  0.2095484 -1.0216529 -0.02671707  0.37160636  0.160315383
 [11,]  2    3 -0.1481357 -0.3726091  1.10167492  1.70677625 -0.860442148
 [12,]  2    4  0.6433900  1.3251178 -0.26842418  0.92790039  0.318602469
 [13,]  2    5  1.1348350 -0.7313432  0.01035965  1.05747589 -1.829611181
 [14,]  2    6  0.1995994  0.7625386  0.25897152 -1.05112649 -1.121045817
 [15,]  3    0  0.8700000         NA          NA  0.72000000  0.830000000
 [16,]  3    1         NA  0.9800000  0.71000000          NA  0.860000000
 [17,]  3    2  0.2987197  0.3275333 -0.39459737  2.48875683  0.002293782
 [18,]  3    3 -0.3191671 -1.1440187 -0.48873668 -0.32581308 -0.289496481

2 个答案:

答案 0 :(得分:3)

我不确定这是否是你想要的,因为你没有显示预期的结果。这会使用包na.locf中的zoo向后滚动测量值(fromLast = TRUE),并使用后面的值填充NA。使用dplyrgroup_bymutate_all对数据框进行操作

library(dplyr)
library(zoo)
newx %>% 
  data.frame() %>% 
  group_by(id) %>% 
  mutate_all(na.locf, fromLast = TRUE) %>%
  filter(time != 1) %>%
  mutate(time = if_else(time == 0, 1, time))

#       id  time          a          b           c           d            e
# 1      1     1  0.9800000  0.8500000  0.62000000  0.71000000  0.850000000
# 2      1     2  0.7590390 -0.8716028 -0.30554099 -0.30528521  0.030963334
# 3      1     3  0.3713058  1.1876234  0.86956546 -0.28108275  0.669563187
# 4      1     4  0.5758514 -0.6672287 -1.06121591 -1.16458396 -0.140668367
# 5      1     5 -0.5703207  0.5383396 -0.09635967  0.09034109  1.281077794
# 6      1     6  0.1198567  0.4905632  0.47460932  1.01451692 -0.621039707
# 7      2     1  0.8100000  0.9700000  0.83000000  0.68000000  0.870000000
# 8      2     2  0.2095484 -1.0216529 -0.02671707  0.37160636  0.160315383
# 9      2     3 -0.1481357 -0.3726091  1.10167492  1.70677625 -0.860442148
# 10     2     4  0.6433900  1.3251178 -0.26842418  0.92790039  0.318602469
# 11     2     5  1.1348350 -0.7313432  0.01035965  1.05747589 -1.829611181
# 12     2     6  0.1995994  0.7625386  0.25897152 -1.05112649 -1.121045817
# 13     3     1  0.8700000  0.9800000  0.71000000  0.72000000  0.830000000
# 14     3     2  0.2987197  0.3275333 -0.39459737  2.48875683  0.002293782
# 15     3     3 -0.3191671 -1.1440187 -0.48873668 -0.32581308 -0.289496481

答案 1 :(得分:1)

我们也可以使用data.table

library(data.table)
library(zoo)
as.data.table(newx)[time!=1, na.locf(.SD, fromLast = TRUE), by = id][time==0, time := 1][]
# id time          a          b           c           d            e
# 1:  1    1  0.9800000 -0.8716028 -0.30554099  0.71000000  0.850000000
# 2:  1    2  0.7590390 -0.8716028 -0.30554099 -0.30528521  0.030963334
# 3:  1    3  0.3713058  1.1876234  0.86956546 -0.28108275  0.669563187
# 4:  1    4  0.5758514 -0.6672287 -1.06121591 -1.16458396 -0.140668367
# 5:  1    5 -0.5703207  0.5383396 -0.09635967  0.09034109  1.281077794
# 6:  1    6  0.1198567  0.4905632  0.47460932  1.01451692 -0.621039707
# 7:  2    1  0.8100000 -1.0216529 -0.02671707  0.68000000  0.870000000
# 8:  2    2  0.2095484 -1.0216529 -0.02671707  0.37160636  0.160315383
# 9:  2    3 -0.1481357 -0.3726091  1.10167492  1.70677625 -0.860442148
#10:  2    4  0.6433900  1.3251178 -0.26842418  0.92790039  0.318602469
#11:  2    5  1.1348350 -0.7313432  0.01035965  1.05747589 -1.829611181
#12:  2    6  0.1995994  0.7625386  0.25897152 -1.05112649 -1.121045817
#13:  3    1  0.8700000  0.3275333 -0.39459737  0.72000000  0.830000000
#14:  3    2  0.2987197  0.3275333 -0.39459737  2.48875683  0.002293782
#15:  3    3 -0.3191671 -1.1440187 -0.48873668 -0.32581308 -0.289496481