样本数据
#creates a data frame with two features and 100 observations
train = data.frame(salary = sample(2000:3000, 100, replace = TRUE),
revenue = sample(c("A", "B"), 100, replace = TRUE))
#five NAs on random positions for the salary feature
train[sample(1:nrow(train), 5), "salary"] <- NA
test = data.frame(salary = sample(2000:3000, 100, replace = TRUE),
revenue = sample(c("A", "B"), 100, replace = TRUE))
test[sample(1:nrow(test), 5), "salary"] <- NA
数据转换
transformation <- preProcess(train[, ], method = c("medianImpute",
"scale", "center"))
transformed_train <- predict(transformation, newdata = train[, ])
transformed_test <- predict(transformation, newdata = test[, ])
交叉验证
trControl = trainControl(method = "repeatedcv",
number = 5,
repeats = 5,
classProbs = TRUE,
savePredictions = TRUE,
verboseIter = TRUE)
建模
model_knn <- caret::train(revenue ~ .,
data = transformed_train,
method = "knn",
preProcess = c("scale", "center", "knnImpute"),
trControl = trControl)
confusionMatrix(predict(model_knn, transformed_test), transformed_test$revenue)
Cotext
以上,我已经复制了一个示例,说明如何使用Caret程序包中的preProcess函数来估算数据。 PreProcess也可以在建模阶段使用。
问题
我不想使用Caret的预处理功能,而是想知道如何实现MICE插补包?这个想法是在测试集上使用推算训练集的结果。
我想唯一需要更新的是数据转换部分。