我正在测试在预测时间序列并使用扩展窗口进行评估时如何使用机器学习模型。首先,我只尝试一种模型,但想扩展到多种模型。我希望使用多达400个观测值来估计模型,然后通过一个观测值扩展窗口并在最长12个月的时间内进行预测。为了检验这一点,我使用了R中的经济数据集。我想绘制所有预测在它们出现在扩展窗口中时以及拟合值,火车和测试数据。我想要做的是像下面的链接中的图一样的图形:
请参阅下面的代码,了解到目前为止的尝试。感谢您提出的任何建议/改进或指导,以寻求更多有关此问题的见解。
data <- economics
fittedmat <- matrix(0, nrow = 1000, ncol = 1)
fcmat <- matrix(0, nrow = 1000, ncol = 12)
truemat <- matrix(0, nrow = 1000, ncol = 12)
errormat <- matrix(0, nrow = 1000, ncol = 12)
for(i in 400:(nrow(data)-12)){
# Construct data sets/ fit and eval windows
train_data <- data[1:i, ]
test_data <- data[(i+1):(i+12) , ]
# Fit model
glmnet_fit <- train(train_data %>% select(-date, -unemploy) %>% as.matrix(),
train_data %>% pull(unemploy),
method = "glmnet")
# Fitted values
fittedmat[(1:i), ] <- predict(glmnet_fit,
train_data %>% select(-date, -unemploy) %>% as.matrix())
# Predicted values using model
fcmat[i, ] <- as.vector(predict(glmnet_fit,
test_data %>% select(-date, -unemploy) %>% as.matrix()))
# Combine and evaluate
truemat[i, ] <- as.vector(test_data$unemploy)
errormat[i, ] <- truemat[i, ] - fcmat[i, ]
}
fcmat <- fcmat[-(1:399), ]
fcmat <- fcmat[-(164:601), ]
truemat <- truemat[-(1:399), ]
truemat <- truemat[-(164:601), ]
#datemat <- datemat[-(1:399), ]
#datemat <- datemat[-(164:601), ]
#class(datemat) <- "Date"
errormat <- errormat[-(1:399), ]
errormat <- errormat[-(164:601), ]
fittedmat <- fittedmat[-(563: 1000), ]
fittedmattest <- cbind(train_data, as.matrix(fittedmat))
names(fittedmattest)[7] <- "fitted"
ggplot() +
geom_line(fittedmattest, mapping = aes(x = date, y = as.matrix(fittedmat)),
color = "green") +
geom_line(fittedmattest, mapping = aes(x = date, y = unemploy),
color = "red"
)
fcmatTest <- as.data.frame(fcmat[(163), ])
colnames(fcmatTest) <- c("forecast")
dates <- as.data.frame(test_data$date)
fcmatTest <- as.data.frame(cbind(dates, fcmatTest))
colnames(fcmatTest) <- c("dates", "forecast")
ggplot() +
geom_line(data = filter(train_data, date > "2010-01-01"), mapping = aes(x = date, y = unemploy),
color = "green") +
geom_line(data = filter(fittedmattest, date > "2010-01-01"), mapping = aes(x = date, y = fitted),
color = "red") +
geom_line(data = fcmatTest, mapping = aes(x = dates, y = forecast),
color = "black", linetype = "dashed") +
geom_line(data = test_data, mapping = aes(x = date, y = unemploy),
color = "blue")