我正在尝试填写从列中随机抽样的缺失值。例如,对于变量“apple”,数据看起来像(1,2,3,NA),然后我希望从(1,2,3)中随机估算NA。我想出了下面的代码,但是我试图让它从相应的列而不是整个数据集中推断
random.imp <- function (a){
missing <- is.na(a)
n.missing <- sum(missing)
a.obs <- a[!missing]
imputed <- a
imputed[missing] <- sample (a.obs, n.missing, replace=TRUE)
return (imputed)
}
train.num<- random.imp (train.num)
我认为sample
之后的代码有问题,任何评论都会有所帮助!
答案 0 :(得分:2)
编辑已更新以处理数据框
# MAKE THE DATA
train.num<-data.frame(var=sample(c("a","b","v","h","i"),10,TRUE),
c1=sample(c(NA,1,4,8),10,T),
c2=sample(c(NA,2,3,0),10,T),
c3=sample(c(NA,5,9,11),10,T))
# CALL THE FUNCTION
random.imp <- function (a){
n<-length(a[is.na(a)])
a[is.na(a)]<-sample(unique(a[!is.na(a)]),n,TRUE)
return(a)
}
# USE APPLY TO RUN THE FUNCTION COLWISE (in this case for columns 2-4)
train.num[,2:4]<-apply(train.num[,2:4],2,function(x)random.imp(x))
答案 1 :(得分:0)
random.imp <- function(a){
a[is.na(a)] <- sample(x = a[!is.na(a)], size = sum(is.na(a)), replace = TRUE)
return(a)
}
df <- data.frame(x = c(1, NA, 3, 4, NA), y = c(NA, 6, NA, 5, NA))
df
# x y
# 1 1 NA
# 2 NA 6
# 3 3 NA
# 4 4 5
# 5 NA NA
df[] <- lapply(df, random.imp)
df
# x y
# 1 1 5
# 2 1 6
# 3 3 6
# 4 4 5
# 5 1 6