我试图比较几种回归和分类算法的性能,但是“e1071”包中的svm函数不会按照我喜欢的方式工作。
所以这是我的代码:
library("mlbench")
library("e1071")
library("hydroGOF")
data(BostonHousing)
data = BostonHousing
rm(BostonHousing)
data = na.omit(data)
svm = function(j, data){
### some preparations
n = dim(data)[1]
### empty vector for CV
fold_j = numeric(n)
### assign numbers for every observation from 1 to 10 for CV
fold_svm = c(rep(1:10, floor(n/10)), 1:(n%%10))
### empty vector for infold CV
MSE_infold = numeric(10)
### empty vector for cross validated MSE
MSE = numeric(j)
for(j in 1:j){
print(paste0("SVM ", j))
### pick a sample for 10-fold CV
fold_j = sample(fold_svm, n, replace = FALSE)
for(i in 1:10){
print(paste0("CV ", i))
### define train data
traindat_svm = data[fold_j != i, ]
### define test data
newdat = data[fold_j == i, ]
### svm
svm_model = svm(medv ~ .,
data = traindat_svm,
cost = 1000,
gamma = 0.0001)
### predict values for response (medv = column 14) with svm
svm_pred = predict(svm_model, newdat[,-14])
### calculate standardized MSE infold
MSE_infold[i] = ((mse(svm_pred, newdat[,14])) / var(data$medv)) * 100
}
### calculate MSE
MSE[j] = mean(MSE_infold)
}
MSE
}
如果我使用我的功能会发生什么,比如说
svm(10, data)
是我收到以下错误消息:
Error in svm(medv ~ ., data = traindat_svm, cost = 1000, gamma = 1e-04) :
unused arguments (cost = 1000, gamma = 1e-04)
如果我单独执行每个命令而没有函数/ for循环,它可以很好地工作。
任何想法?
提前致谢,Niklas