我已经用Python创建了一个模型,但是我不知道如何将其用于预测。例如FB Prophet允许设置要预测的步骤数。您能告诉我-我应该运行什么代码才能预测XGBoost的5个步骤?
我已经建立并评估了一个模型,我只需要了解如何使用它即可。
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import xgboost as xgb
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error, mean_absolute_error
plt.style.use('fivethirtyeight')
dfs = pd.read_csv('F:\TDG\Analysts\Ops Analyst\Files\885 OCtober 2016+ Daily.csv', index_col=[0], parse_dates=[0])
split_date = '1/1/2018'
dfs_train = dfs.loc[dfs.index <= split_date].copy()
dfs_test = dfs.loc[dfs.index > split_date].copy()
_ = dfs_test \
.rename(columns={'y': 'TEST SET'}) \
.join(dfs_train.rename(columns={'y': 'TRAINING SET'}), how='outer') \
.plot(figsize=(15,5), title='data', style='.')
def create_features(df, label=None):
"""
Creates time series features from datetime index
"""
df['date'] = df.index
df['hour'] = df['date'].dt.hour
df['dayofweek'] = df['date'].dt.dayofweek
df['quarter'] = df['date'].dt.quarter
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year
df['dayofyear'] = df['date'].dt.dayofyear
df['dayofmonth'] = df['date'].dt.day
df['weekofyear'] = df['date'].dt.weekofyear
X = df[['hour','dayofweek','quarter','month','year',
'dayofyear','dayofmonth','weekofyear']]
if label:
y = df[label]
return X, y
return X
X_train, y_train = create_features(dfs_train, label='y')
X_test, y_test = create_features(dfs_test, label='y')
reg = xgb.XGBRegressor(n_estimators=1000)
reg.fit(X_train, y_train,
eval_set=[(X_train, y_train), (X_test, y_test)],
early_stopping_rounds=50,
verbose=False) # Change verbose to True if you want to see it train
_ = plot_importance(reg, height=0.9)
Forecast on Test Set
dfs_test['y_Prediction'] = reg.predict(X_test)
dfs_all = pd.concat([dfs_test, dfs_train], sort=False)
_ = dfs_all[['y','y_Prediction']].plot(figsize=(15, 5))
mean_squared_error(y_true=dfs_test['y'],
y_pred=dfs_test['y_Prediction'])
mean_absolute_error(y_true=dfs_test['y'],
y_pred=dfs_test['y_Prediction'])
def mean_absolute_percentage_error(y_true, y_pred):
"""Calculates MAPE given y_true and y_pred"""
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
mean_absolute_percentage_error(y_true=dfs_test['y'],
y_pred=dfs_test['y_Prediction'])