我有一个带有id和测量值的数据集。在时间0进行一些测量,而在时间1进行一些测量。这导致一些缺失值。我想将行与time0和时间1组合在一起,因为两者都是基线测量,新数据集的时间从1开始。基本上为每个id合并time0和time1。想不出办法做到这一点。为了显示我的数据的样子,这里有一些模拟数据。
set.seed(234)
N=3
t<-sample(2:6,N,replace=TRUE)
id<-c(rep(1:N,t))
n<-length(id)
x<-as.matrix(cbind(a=rnorm(n,0,1),b=rnorm(n,0,1),c=rnorm(n,0,1),d=rnorm(n,0,1),e=rn
orm(n,0,1)))
time<-c(rbind(as.matrix(c(1:t[1]+1)),as.matrix(c(1:t[2]+1)),as.matrix(c(1:t[3]+1))))
x1<-cbind(id,time,x)
######### Add missing data
x2<-rbind(x1,c(1,0,0.98,NA,NA,0.71,0.85))
x3<-rbind(x2,c(1,1,NA,0.85,0.62,NA,0.85))
x4<-rbind(x3,c(2,0,0.81,NA,NA,0.68,0.87))
x5<-rbind(x4,c(2,1,NA,0.97,0.83,NA,0.85))
x6<-rbind(x5,c(3,0,0.87,NA,NA,0.72,0.83))
x7<-rbind(x6,c(3,1,NA,0.98,0.71,NA,0.86))
# create a new dataframe with missing
newx<-x7[order(x7[,1],x7[,2]),]
newx
id time a b c d e
[1,] 1 0 0.9800000 NA NA 0.71000000 0.850000000
[2,] 1 1 NA 0.8500000 0.62000000 NA 0.850000000
[3,] 1 2 0.7590390 -0.8716028 -0.30554099 -0.30528521 0.030963334
[4,] 1 3 0.3713058 1.1876234 0.86956546 -0.28108275 0.669563187
[5,] 1 4 0.5758514 -0.6672287 -1.06121591 -1.16458396 -0.140668367
[6,] 1 5 -0.5703207 0.5383396 -0.09635967 0.09034109 1.281077794
[7,] 1 6 0.1198567 0.4905632 0.47460932 1.01451692 -0.621039707
[8,] 2 0 0.8100000 NA NA 0.68000000 0.870000000
[9,] 2 1 NA 0.9700000 0.83000000 NA 0.850000000
[10,] 2 2 0.2095484 -1.0216529 -0.02671707 0.37160636 0.160315383
[11,] 2 3 -0.1481357 -0.3726091 1.10167492 1.70677625 -0.860442148
[12,] 2 4 0.6433900 1.3251178 -0.26842418 0.92790039 0.318602469
[13,] 2 5 1.1348350 -0.7313432 0.01035965 1.05747589 -1.829611181
[14,] 2 6 0.1995994 0.7625386 0.25897152 -1.05112649 -1.121045817
[15,] 3 0 0.8700000 NA NA 0.72000000 0.830000000
[16,] 3 1 NA 0.9800000 0.71000000 NA 0.860000000
[17,] 3 2 0.2987197 0.3275333 -0.39459737 2.48875683 0.002293782
[18,] 3 3 -0.3191671 -1.1440187 -0.48873668 -0.32581308 -0.289496481
答案 0 :(得分:3)
我不确定这是否是你想要的,因为你没有显示预期的结果。这会使用包na.locf
中的zoo
向后滚动测量值(fromLast = TRUE
),并使用后面的值填充NA。使用dplyr
包group_by
和mutate_all
对数据框进行操作
library(dplyr)
library(zoo)
newx %>%
data.frame() %>%
group_by(id) %>%
mutate_all(na.locf, fromLast = TRUE) %>%
filter(time != 1) %>%
mutate(time = if_else(time == 0, 1, time))
# id time a b c d e
# 1 1 1 0.9800000 0.8500000 0.62000000 0.71000000 0.850000000
# 2 1 2 0.7590390 -0.8716028 -0.30554099 -0.30528521 0.030963334
# 3 1 3 0.3713058 1.1876234 0.86956546 -0.28108275 0.669563187
# 4 1 4 0.5758514 -0.6672287 -1.06121591 -1.16458396 -0.140668367
# 5 1 5 -0.5703207 0.5383396 -0.09635967 0.09034109 1.281077794
# 6 1 6 0.1198567 0.4905632 0.47460932 1.01451692 -0.621039707
# 7 2 1 0.8100000 0.9700000 0.83000000 0.68000000 0.870000000
# 8 2 2 0.2095484 -1.0216529 -0.02671707 0.37160636 0.160315383
# 9 2 3 -0.1481357 -0.3726091 1.10167492 1.70677625 -0.860442148
# 10 2 4 0.6433900 1.3251178 -0.26842418 0.92790039 0.318602469
# 11 2 5 1.1348350 -0.7313432 0.01035965 1.05747589 -1.829611181
# 12 2 6 0.1995994 0.7625386 0.25897152 -1.05112649 -1.121045817
# 13 3 1 0.8700000 0.9800000 0.71000000 0.72000000 0.830000000
# 14 3 2 0.2987197 0.3275333 -0.39459737 2.48875683 0.002293782
# 15 3 3 -0.3191671 -1.1440187 -0.48873668 -0.32581308 -0.289496481
答案 1 :(得分:1)
我们也可以使用data.table
library(data.table)
library(zoo)
as.data.table(newx)[time!=1, na.locf(.SD, fromLast = TRUE), by = id][time==0, time := 1][]
# id time a b c d e
# 1: 1 1 0.9800000 -0.8716028 -0.30554099 0.71000000 0.850000000
# 2: 1 2 0.7590390 -0.8716028 -0.30554099 -0.30528521 0.030963334
# 3: 1 3 0.3713058 1.1876234 0.86956546 -0.28108275 0.669563187
# 4: 1 4 0.5758514 -0.6672287 -1.06121591 -1.16458396 -0.140668367
# 5: 1 5 -0.5703207 0.5383396 -0.09635967 0.09034109 1.281077794
# 6: 1 6 0.1198567 0.4905632 0.47460932 1.01451692 -0.621039707
# 7: 2 1 0.8100000 -1.0216529 -0.02671707 0.68000000 0.870000000
# 8: 2 2 0.2095484 -1.0216529 -0.02671707 0.37160636 0.160315383
# 9: 2 3 -0.1481357 -0.3726091 1.10167492 1.70677625 -0.860442148
#10: 2 4 0.6433900 1.3251178 -0.26842418 0.92790039 0.318602469
#11: 2 5 1.1348350 -0.7313432 0.01035965 1.05747589 -1.829611181
#12: 2 6 0.1995994 0.7625386 0.25897152 -1.05112649 -1.121045817
#13: 3 1 0.8700000 0.3275333 -0.39459737 0.72000000 0.830000000
#14: 3 2 0.2987197 0.3275333 -0.39459737 2.48875683 0.002293782
#15: 3 3 -0.3191671 -1.1440187 -0.48873668 -0.32581308 -0.289496481