XGBOOST时间序列预测

时间:2019-09-10 15:44:58

标签: python time-series xgboost

我已经用Python创建了一个模型,但是我不知道如何将其用于预测。例如FB Prophet允许设置要预测的步骤数。您能告诉我-我应该运行什么代码才能预测XGBoost的5个步骤?

我已经建立并评估了一个模型,我只需要了解如何使用它即可。

 import numpy as np # linear algebra
    import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
    import seaborn as sns
    import matplotlib.pyplot as plt
    import xgboost as xgb
    from xgboost import plot_importance, plot_tree
    from sklearn.metrics import mean_squared_error, mean_absolute_error
    plt.style.use('fivethirtyeight')

dfs = pd.read_csv('F:\TDG\Analysts\Ops Analyst\Files\885 OCtober 2016+ Daily.csv', index_col=[0], parse_dates=[0])

split_date = '1/1/2018'
dfs_train = dfs.loc[dfs.index <= split_date].copy()
dfs_test = dfs.loc[dfs.index > split_date].copy()

_ = dfs_test \
    .rename(columns={'y': 'TEST SET'}) \
    .join(dfs_train.rename(columns={'y': 'TRAINING SET'}), how='outer') \
    .plot(figsize=(15,5), title='data', style='.')

def create_features(df, label=None):
    """
    Creates time series features from datetime index
    """
    df['date'] = df.index
    df['hour'] = df['date'].dt.hour
    df['dayofweek'] = df['date'].dt.dayofweek
    df['quarter'] = df['date'].dt.quarter
    df['month'] = df['date'].dt.month
    df['year'] = df['date'].dt.year
    df['dayofyear'] = df['date'].dt.dayofyear
    df['dayofmonth'] = df['date'].dt.day
    df['weekofyear'] = df['date'].dt.weekofyear

    X = df[['hour','dayofweek','quarter','month','year',
           'dayofyear','dayofmonth','weekofyear']]
    if label:
        y = df[label]
        return X, y
    return X

X_train, y_train = create_features(dfs_train, label='y')
X_test, y_test = create_features(dfs_test, label='y')

reg = xgb.XGBRegressor(n_estimators=1000)
reg.fit(X_train, y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        early_stopping_rounds=50,
       verbose=False) # Change verbose to True if you want to see it train

_ = plot_importance(reg, height=0.9)

Forecast on Test Set

dfs_test['y_Prediction'] = reg.predict(X_test)
dfs_all = pd.concat([dfs_test, dfs_train], sort=False)

_ = dfs_all[['y','y_Prediction']].plot(figsize=(15, 5))

mean_squared_error(y_true=dfs_test['y'],
                   y_pred=dfs_test['y_Prediction'])

mean_absolute_error(y_true=dfs_test['y'],
                   y_pred=dfs_test['y_Prediction'])

def mean_absolute_percentage_error(y_true, y_pred): 
    """Calculates MAPE given y_true and y_pred"""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mean_absolute_percentage_error(y_true=dfs_test['y'],
                   y_pred=dfs_test['y_Prediction'])

0 个答案:

没有答案