我正在开发一个模拟缺失数据并对采样数据运行回归的项目。 这是我到目前为止所拥有的。 库(MASS)
#specifying the covariance matrix
sigma <- matrix(c(1,.7,.49,.343,.2401,.7,1,.7,.49,.343,.49,.7,1,.7,.49,.343,.49,
.7,1,.7,.2401,.343,.49,.7,1),5,5,byrow=TRUE)
#generating the data
data <- mvrnorm(n=1000, c(5,5.25,5.5,5.75,6), sigma)
split(data,)
#specifying the missing data mechanism for MCAR
LogoddsratioMCAR <- -.5
OddsRatioMCAR <-exp(LogoddsratioMCAR)
OddsMCAR <- OddsRatioMCAR/(1+OddsRatioMCAR)
Probability2 <- 1-OddsMCAR
Probability3 <- Probability2 - OddsMCAR*(Probability2)
Probability4 <- Probability3 - OddsMCAR*(Probability3)
Probability5 <- Probability4 - OddsMCAR*(Probability4)
#sampling from each column
dataframe <- as.data.frame(data)
dataMCAR1 <- dataframe$V1
dataMCAR2 <- dataframe$V2[sample(1:nrow(data),Probability2*nrow(data))]
dataMCAR3 <- dataframe$V3[sample(1:nrow(data),Probability3*nrow(data))]
dataMCAR4 <- dataframe$V4[sample(1:nrow(data),Probability4*nrow(data))]
dataMCAR5 <- dataframe$V5[sample(1:nrow(data),Probability5*nrow(data))]
现在我需要将NA添加到dataMCAR2-dataMCAR5,以使列表具有相同的长度。我想将它们组合成一个数据框并对它们进行回归。
我如何将这些NA附加到列表中?
答案 0 :(得分:0)
这是一种方法。 dataMCAR1的长度为1000.因此,您希望其他向量具有相同的长度(例如,dataMCAR2)。在这里,我在lapply
中连接了每个向量和NA。然后,我使用cbind
绑定了所有五个向量并创建了一个数据框。最后,我使用列表中的名称(即ana)
ana <- mget(ls(pattern = "^dataMCAR\\d+"))
bob <- as.data.frame(Reduce(cbind,
lapply(ana, function(x) c(x, rep(c(NA), times = (1000 - length(x)))))
)
)
colnames(bob) <- names(ana)
# dataMCAR1 dataMCAR2 dataMCAR3 dataMCAR4 dataMCAR5
#1 3.492947 6.702115 4.743988 6.330211 6.257005
#2 4.637356 5.322731 4.916232 6.209659 7.619699
#3 2.967167 4.397137 5.445473 6.632309 6.844667
#4 4.484144 4.814281 5.060921 5.357306 4.831958
#5 6.245234 5.471267 4.959116 5.975332 6.243439
#6 5.334700 4.122378 6.671627 6.529121 7.354149
#summary(bob)
# dataMCAR1 dataMCAR2 dataMCAR3 dataMCAR4 dataMCAR5
#Min. :1.465 Min. :2.141 Min. :2.223 Min. :3.253 Min. :3.249
#1st Qu.:4.334 1st Qu.:4.606 1st Qu.:4.886 1st Qu.:5.106 1st Qu.:5.412
#Median :5.005 Median :5.336 Median :5.616 Median :5.795 Median :6.064
#Mean :5.000 Mean :5.305 Mean :5.550 Mean :5.783 Mean :6.041
#3rd Qu.:5.657 3rd Qu.:5.957 3rd Qu.:6.225 3rd Qu.:6.487 3rd Qu.:6.697
#Max. :8.168 Max. :8.955 Max. :8.208 Max. :8.043 Max. :8.740
# NA's :378 NA's :613 NA's :759 NA's :850