我研究缺失并且我试图在R中现有缺失的基础上随机降级数据集。基本上我想要做的是随机选择我的数据框中的位置并将它们更改为NA但仅当它们不是NA时。我已经编写了下面的函数,但是在较大的数据帧上它实际上很慢,甚至在运行了几个小时后它也没有生成解决方案。有没有人有更好的方法来随机降级数据(使用NA检查)或知道一种方法来提高我的功能效率? 非常感谢你!
degradefunction<- function(x,nrow1, ncol1,del.amount){
repeat{
#Generate random row numbers
rowsample<-sample(1:nrow1,del.amount,replace=TRUE)
#Generate random column numbers
colsample<-sample(1:ncol1, del.amount, replace=TRUE)
x.del<-x
#For each deletion, with a randomly selected row and column location
for (i in 1:del.amount){
#Test if the randomly selected location is not an NA
if(is.na(x.del[rowsample[i],colsample[i]])==FALSE)
#If it is not NA but is a number make it NA now
{x.del[rowsample[i],colsample[i]]<-NA}
#If our randomly selected location IS an NA
else {
#Get a new randomly selected location (row and column)
resamplenrow1<-sample(1:nrow1,1,replace=TRUE)
resamplencol1<-sample(1:ncol1,1,replace=TRUE)
#See if the new location is an NA
if(is.na(x.del[resamplenrow1,resamplencol1])==FALSE){
#If the randomly selected location is not an NA, make it an NA
x.del[resamplenrow1,resamplencol1]<-NA}}}
#Break the loop when the amount of data in the dataframe has the number of
#missing values we want (in a case starting with 0 NAs we want it to break
#when the number of NAs in x.del is equal to the del.amount, but when we
#do this with data including missingness we want the number of missing
#values to equal the deletion amount plus the number of NAs previously
#occurring in the data.
if(sum(is.na(x.del))==(del.amount + (sum(is.na(x))))){break}
}
#Give back the degraded data
return(x.del)
}
#Test the degrade function
#On data with 0 previous missingness
mypractice<-matrix(c(1,2,3, 4), nrow=10, ncol=4)
mypractice
mypractice<-as.data.frame(mypractice)
str(mypractice)
sum(is.na(mypractice))
newvarx<-degradefunction(mypractice, 10,4,16)
newvarx
sum(is.na(newvarx))
#The sum of missingness is now 16
#On data with previous missingness
mypractice<-matrix(c(1,2,3,NA), nrow=10, ncol=4)
mypractice
mypractice<-as.data.frame(mypractice)
str(mypractice)
sum(is.na(mypractice))
newvarx<-degradefunction(mypractice, 10,4,16)
newvarx
sum(is.na(newvarx))
#We get a total missingness (26) of the missingness we induced (16)
#and the missingness in the original data (10)
答案 0 :(得分:2)
这个怎么样?
degradefunction <- function(x, del.amount){
# 1) indicate which cells are NA (works with matrix or df)
preNAs <- is.na(x)
# 2) how many cells are eligible to be degraded?
OpenSpots <- prod(dim(x)) - sum(preNAs)
# 3) of these, select del.amount for replacement with NA
newNas <- sample(1:OpenSpots, size = del.amount, replace = FALSE)
# 4) impute these NAs, ignoring the original NAs
x[!preNAs][newNas] <- NA
x
}
degradefunction(mypractice,16)