因为don't have a built in feature selection的插入符号ml中有一些方法可用,我正在尝试使用rfe
。
有人可以解释一下,除了培训时间和排名功能列表之外, train()
和rfe()
之间的区别是什么?事情只有train()
可以做,反之亦然吗?
以下是我举例说明问题:
# worker functions
useRFE <- function(trainctrl) {
control <- rfeControl(functions=caretFuncs,
method="cv",
repeats=5,
number=5,
returnResamp="final",
verbose = TRUE)
# run the RFE algorithm
resultsRFE <-
rfe(
training[,1:ncol(iris) - 1],
training$Species,
sizes = subsets,
rfeControl = control,
method = "svmRadial",
metric = "Accuracy",
trControl = trainctrl
)
# test
predictedClasses <- predict(resultsRFE, testing)
confmat <- confusionMatrix(data = predictedClasses$pred, reference = testing$Species)
confmat
# Balanced Accuracy = 100%
}
useTrain <- function(trainctrl) {
# train
resultsTrain <- train(
Species ~ .,
data = training,
method = "svmRadial",
metric = "Accuracy",
trControl = trainctrl
)
# test
predictedClasses <- predict(resultsTrain, testing)
confmat <- confusionMatrix(data = predictedClasses,
reference = testing$Species)
confmat
# Balanced Accuracy = 100%
}
# load library
library(caret)
# load iris data
data(iris)
# create datasets
inTrain <- createDataPartition(y = iris$Species,
p = .75,
list = FALSE)
## The output is a set of integers for the rows of Sonar
## that belong in the training set.
training <<- iris[ inTrain,]
testing <<- iris[-inTrain,]
# select count of features
subsets <- c(1:ncol(iris)-1)
# set controls
trainctrl <- trainControl(
classProbs= TRUE,
method="cv",
repeats=5,
number=5,
verbose = TRUE)
# compare
ptm <- proc.time()
useTrain(trainctrl = trainctrl)
proc.time() - ptm
# Accuracy : 0.9167
# User System passed
# 1.54 0.06 10.27
ptm <- proc.time()
useRFE(trainctrl = trainctrl)
proc.time() - ptm
# Accuracy : 0.9167
# User System passed
# 0.86 0.04 24.44
答案 0 :(得分:1)
train
builds and tunes将所有预测变量作为输入。 rfe
在尝试优化模型性能的方式中以different predictors are removed进行递归功能消除。您可以将rfe
视为train
的可能包装。
这些方法的解释在website上。