我在以下代码中运行来自插入符号的随机林:
library(quantmod)
library(Hmisc)
library(caret)
daysback=1
mlmodels=c("rf","knn")
my.df <- getSymbols(Symbols = "^FTSE", auto.assign = FALSE)
my.df=na.approx(my.df)
difvar=100*(my.df[,4]-my.df[,1])/my.df[,1]
data<-as.data.frame(difvar)
dframe=data.frame(ds=as.Date(rownames(data)),y=as.factor(sign(data[,1])))
for (i in 1:365) {
eval(parse(text=paste0("dframe$y",i,"=Lag(dframe$y, +",i,")")))
}
dframe$year=as.numeric(format(dframe$ds,"%Y"))
dframe$month=as.numeric(format(dframe$ds,"%m"))
dframe$day=as.numeric(format(dframe$ds,"%d"))
dframe$dow=as.numeric(factor(weekdays(dframe$ds), levels = c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"),
ordered = TRUE))
dframe=dframe[complete.cases(dframe),]
start=nrow(dframe)-daysback
end=nrow(dframe)
actual_sign=data.frame(act=sign(dframe$y),date=dframe$ds)
#Run each model for m symbol
rf_f=data.frame()
res=list()
##Forecast data prep
for (j in start:end){
print(j)
dframetmp=dframe[1:j,]
m_rf = train(y~.-ds, data=dframetmp, method="rf")
}
你可以看到我的y和x都是分类变量。年,月,日,星期和y的滞后值也是绝对的。 除了将一些代码转换为所有这些因素之外,我是否还需要创建虚拟变量? 任何优化建议?
非常感谢 乔治