我正在尝试使用knn
和我的数据集来构建caret
模型,其中True
(实际销售),DOW
(星期几)和{从{1}}到D1
(历史销售)。
D10
但是,当我编织文件时,它给了我library(caret)
library(reshape2)
library(dplyr)
library(tibble)
library(dummies)
#data
rm = matrix(rnorm(100*10, 10, 5), nrow = 100) %>% as.data.frame()
wide = cbind(
rnorm(100, 100, 1),
weekdays(seq(as.Date('2019/1/1'), by='day', length.out = 100)),
rm
)
colnames(wide) = c('true', 'DOW', paste0('D',1:10))
#preprocessing for knn
train.true = train[,1]
dow.tr = dummy(train$DOW, sep='.')
dow.te = dummy(test$DOW, sep='.')
k.train = cbind(train[, -c(2, nearZeroVar(train))], dow.tr)
k.test = cbind(test, dow.te)[,-2]
seq.knn.pre1 = rep(0, nrow(test))
for (i in 1:10){
this.train = k.train[, c((i+1):ncol(k.train))]
this.test = k.test[i, c((i+1):ncol(k.test))]
train.control = trainControl(method='repeatedcv', number=10, repeats = 1)
k = train(train.true~., method='knn', tuneLength = 8,
trControl=train.control, preProcess='scale',
data=data.frame(train.true, this.train))
seq.knn.pre1[i] = predict(k, this.test)
}
seq.knn.pre1 = cbind(true = test[,1], k.pred1 = seq.knn.pre1) %>% data.frame()
error
。
我猜测问题可能来自DOW虚拟变量。当我的模拟数据集不包含分类变量时,该代码会很好地结合在一起。我有可能在那里修复它吗?
任何建议都值得赞赏!