从Scikit-Optimize获得的超参数在优化之外运行不会提供相同的RMSE

时间:2019-01-17 05:34:34

标签: python-3.x optimization deep-learning lstm hyperparameters

我已经使用scikit-optimize包通过高斯过程(使用gp_minimize function)进行了贝叶斯优化。我已经使用验证集进行了优化,并将最佳超参数vlues写入了csv文件。然后,我创建了一个脚本来从文件中读取最佳huperparameters,并将它们分配给模型超参数。然后,我训练并获得测试集的结果。作为测试步骤,在新的最佳超参数上训练了模型,而不是测试集,我使用验证集来评估模型,以查看是否获得与优化任务相同的RMSE值。事实证明,在优化之外使用相同参数运行验证集时,RMSE值要比优化中得到的差(我应该获得与优化中得到的相同的RMSE值,因为我基本上是使用相同的超参数) )

当我评估在测试模型中分配给超参数的值时,在带小数点的值中,小数点中的数字与csv中编写的数字不同。

我不明白问题是什么。任何帮助深表感谢。 另外请注意,小数点很长。

我的测试代码和示例csv如下所示。

测试代码:

import tensorflow as tf
import matplotlib as mplt
mplt.use('agg')  # Must be before importing matplotlib.pyplot or pylab!
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt
import csv
import atexit
from time import time, strftime, localtime
from datetime import timedelta
np.random.seed(1)
tf.set_random_seed(1)


class RNNConfig():

    graph = tf.Graph()
    input_size = 1
    fileNames = ['store2_1', 'store85_1', 'store259_1', 'store519_1', 'store725_1',
                 'store749_1',
                 'store934_1', 'store1019_1']
    column_min_max_all = [[[0, 11000], [1, 7]], [[0, 17000], [1, 7]], [[0, 23000], [1, 7]], [[0, 14000], [1, 7]],
                          [[0, 14000], [1, 7]], [[0, 15000], [1, 7]], [[0, 17000], [1, 7]], [[0, 25000], [1, 7]]]
    columns = ['Sales', 'DayOfWeek', 'SchoolHoliday', 'Promo']
    features = len(columns)

    num_steps = None
    lstm_size = None
    batch_size = None
    init_learning_rate = None
    learning_rate_decay = None
    init_epoch = None
    max_epoch = None
    hidden1_nodes = None
    hidden2_nodes = None
    dropout_rate = None
    hidden1_activation = None
    hidden2_activation = None
    lstm_activation = None
    fileName = None
    column_min_max = None
    plotname = None
    writename = None

config = RNNConfig()



def secondsToStr(elapsed=None):
    if elapsed is None:
        return strftime("%Y-%m-%d %H:%M:%S", localtime())
    else:
        return str(timedelta(seconds=elapsed))

def log(s, elapsed=None):
    line = "="*40
    print(line)
    print(secondsToStr(), '-', s)
    if elapsed:
        print("Elapsed time:", elapsed)
    print(line)
    print()

def endlog():
    end = time()
    elapsed = end-start
    log("End Program", secondsToStr(elapsed))


def segmentation(data):

    seq = [price for tup in data[config.columns].values for price in tup]

    seq = np.array(seq)

    # split into items of features
    seq = [np.array(seq[i * config.features: (i + 1) * config.features])
           for i in range(len(seq) // config.features)]

    # split into groups of num_steps
    X = np.array([seq[i: i + config.num_steps] for i in range(len(seq) -  config.num_steps)])

    y = np.array([seq[i +  config.num_steps] for i in range(len(seq) -  config.num_steps)])

    # get only sales value
    y = [[y[i][0]] for i in range(len(y))]

    y = np.asarray(y)

    return X, y

def scale(data):

    for i in range (len(config.column_min_max)):
        data[config.columns[i]] = (data[config.columns[i]] - config.column_min_max[i][0]) / ((config.column_min_max[i][1]) - (config.column_min_max[i][0]))

    return data

def rescle(test_pred):

    prediction = [(pred * (config.column_min_max[0][1] - config.column_min_max[0][0])) + config.column_min_max[0][0] for pred in test_pred]

    return prediction


def pre_process():
    store_data = pd.read_csv(config.fileName)

    store_data = store_data.drop(store_data[(store_data.Open == 0) & (store_data.Sales == 0)].index)
    #
    # store_data = store_data.drop(store_data[(store_data.Open != 0) & (store_data.Sales == 0)].index)

    # ---for segmenting original data --------------------------------
    # original_data = store_data.copy()

    ## train_size = int(len(store_data) * (1.0 - test_ratio))

    validation_len = len(store_data[(store_data.Month == 6) & (store_data.Year == 2015)].index)
    test_len = len(store_data[(store_data.Month == 7) & (store_data.Year == 2015)].index)
    train_size = int(len(store_data) - (validation_len + test_len))

    train_data = store_data[:train_size]
    validation_data = store_data[(train_size - config.num_steps): validation_len + train_size]
    test_data = store_data[((validation_len + train_size) - config.num_steps):]
    original_val_data = validation_data.copy()
    original_test_data = test_data.copy()

    # -------------- processing train data---------------------------------------
    scaled_train_data = scale(train_data)
    train_X, train_y = segmentation(scaled_train_data)

    # -------------- processing validation data---------------------------------------
    scaled_validation_data = scale(validation_data)
    val_X, val_y = segmentation(scaled_validation_data)

    # -------------- processing test data---------------------------------------
    scaled_test_data = scale(test_data)
    test_X, test_y = segmentation(scaled_test_data)

    # ----segmenting original validation data-----------------------------------------------
    nonescaled_val_X, nonescaled_val_y = segmentation(original_val_data)

    # ----segmenting original test data-----------------------------------------------
    nonescaled_test_X, nonescaled_test_y = segmentation(original_test_data)

    return train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y, nonescaled_val_y


def generate_batches(train_X, train_y, batch_size):
    num_batches = int(len(train_X)) // batch_size
    if batch_size * num_batches < len(train_X):
        num_batches += 1

    batch_indices = range(num_batches)
    for j in batch_indices:
        batch_X = train_X[j * batch_size: (j + 1) * batch_size]
        batch_y = train_y[j * batch_size: (j + 1) * batch_size]
        assert set(map(len, batch_X)) == {config.num_steps}
        yield batch_X, batch_y

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    itemindex = np.where(y_true == 0)
    y_true = np.delete(y_true, itemindex)
    y_pred = np.delete(y_pred, itemindex)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def RMSPE(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.sqrt(np.mean(np.square(((y_true - y_pred) / y_pred)), axis=0))

def plot(true_vals,pred_vals,name):
    fig = plt.figure()
    fig = plt.figure(dpi=100, figsize=(20, 7))
    days = range(len(true_vals))
    plt.plot(days, pred_vals, label='pred sales')
    plt.plot(days, true_vals, label='truth sales')
    plt.legend(loc='upper left', frameon=False)
    plt.xlabel("day")
    plt.ylabel("sales")
    plt.grid(ls='--')
    plt.savefig(name, format='png', bbox_inches='tight', transparent=False)
    plt.close()

def write_results(true_vals,pred_vals,name):

    with open(name, "w") as f:
        writer = csv.writer(f)
        writer.writerows(zip(true_vals, pred_vals))


def train_test():
    train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y, nonescaled_val_y  = pre_process()


    # Add nodes to the graph
    with config.graph.as_default():

        tf.set_random_seed(1)

        inputs = tf.placeholder(tf.float32, [None, config.num_steps, config.features], name="inputs")
        targets = tf.placeholder(tf.float32, [None, config.input_size], name="targets")
        model_learning_rate = tf.placeholder(tf.float32, None, name="learning_rate")
        model_dropout_rate = tf.placeholder_with_default(0.0, shape=())
        global_step = tf.Variable(0, trainable=False)

        model_learning_rate = tf.train.exponential_decay(learning_rate=model_learning_rate, global_step=global_step,
                                                         decay_rate=config.learning_rate_decay,
                                                         decay_steps=config.init_epoch, staircase=False)

        cell = tf.contrib.rnn.LSTMCell(config.lstm_size, state_is_tuple=True, activation=config.lstm_activation)

        val1, _ = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)

        val = tf.transpose(val1, [1, 0, 2])

        last = tf.gather(val, int(val.get_shape()[0]) - 1, name="last_lstm_output")

        # hidden layer
        hidden1 = tf.layers.dense(last, units=config.hidden1_nodes, activation=config.hidden2_activation)
        hidden2 = tf.layers.dense(hidden1, units=config.hidden2_nodes, activation=config.hidden1_activation)

        dropout = tf.layers.dropout(hidden2, rate=model_dropout_rate, training=True)

        weight = tf.Variable(tf.truncated_normal([config.hidden2_nodes, config.input_size]))
        bias = tf.Variable(tf.constant(0.1, shape=[config.input_size]))

        prediction = tf.nn.relu(tf.matmul(dropout, weight) + bias)

        loss = tf.losses.mean_squared_error(targets,prediction)
        optimizer = tf.train.AdamOptimizer(model_learning_rate)
        minimize = optimizer.minimize(loss,global_step=global_step)


    # --------------------training------------------------------------------------------

    with tf.Session(graph=config.graph) as sess:
        tf.set_random_seed(1)

        tf.global_variables_initializer().run()

        iteration = 1

        for epoch_step in range(config.max_epoch):

            for batch_X, batch_y in generate_batches(train_X, train_y, config.batch_size):
                train_data_feed = {
                    inputs: batch_X,
                    targets: batch_y,
                    model_learning_rate: config.init_learning_rate,
                    model_dropout_rate: config.dropout_rate
                }

                train_loss, _, value = sess.run([loss, minimize, val1], train_data_feed)

                if iteration % 5 == 0:
                    print("Epoch: {}/{}".format(epoch_step, config.max_epoch),
                          "Iteration: {}".format(iteration),
                          "Train loss: {:.6f}".format(train_loss))
                iteration += 1

        saver = tf.train.Saver()
        saver.save(sess, "checkpoints_sales/sales_pred.ckpt")

        # --------------------testing------------------------------------------------------

    with tf.Session(graph=config.graph) as sess:
        tf.set_random_seed(1)

        saver.restore(sess, tf.train.latest_checkpoint('checkpoints_sales'))

        test_data_feed = {
            inputs: val_X
        }

        test_pred = sess.run(prediction, test_data_feed)

        # rmsse = sess.run(correct_prediction, test_data_feed)

        pred_vals = rescle(test_pred)

        pred_vals = np.array(pred_vals)

        pred_vals = (np.round(pred_vals, 0)).astype(np.int32)

        pred_vals = pred_vals.flatten()

        pred_vals = pred_vals.tolist()

        nonescaled_y = nonescaled_val_y.flatten()

        nonescaled_y = nonescaled_y.tolist()

        plot(nonescaled_y, pred_vals, config.plotname)
        write_results(nonescaled_y, pred_vals, config.writename)

        meanSquaredError = mean_squared_error(nonescaled_y, pred_vals)
        rootMeanSquaredError = sqrt(meanSquaredError)
        print("RMSE:", rootMeanSquaredError)
        mae = mean_absolute_error(nonescaled_y, pred_vals)
        print("MAE:", mae)
        mape = mean_absolute_percentage_error(nonescaled_y, pred_vals)
        print("MAPE:", mape)
        rmse_val = RMSPE(nonescaled_y, pred_vals)
        print("RMSPE:", rmse_val)

    # --------------------testing------------------------------------------------------

    with tf.Session(graph=config.graph) as sess:
        tf.set_random_seed(1)

        saver.restore(sess, tf.train.latest_checkpoint('checkpoints_sales'))

        test_data_feed = {
            inputs: test_X
        }

        test_pred = sess.run(prediction, test_data_feed)

        # rmsse = sess.run(correct_prediction, test_data_feed)

        pred_vals = rescle(test_pred)

        pred_vals = np.array(pred_vals)

        pred_vals = (np.round(pred_vals, 0)).astype(np.int32)

        pred_vals = pred_vals.flatten()

        pred_vals = pred_vals.tolist()

        nonescaled_y = nonescaled_test_y.flatten()

        nonescaled_y = nonescaled_y.tolist()

        print("-------------------------------------------")

        plot(nonescaled_y, pred_vals, config.plotname)
        write_results(nonescaled_y, pred_vals, config.writename)

        meanSquaredError = mean_squared_error(nonescaled_y, pred_vals)
        rootMeanSquaredError = sqrt(meanSquaredError)
        print("RMSE:", rootMeanSquaredError)
        mae = mean_absolute_error(nonescaled_y, pred_vals)
        print("MAE:", mae)
        mape = mean_absolute_percentage_error(nonescaled_y, pred_vals)
        print("MAPE:", mape)
        rmse_val = RMSPE(nonescaled_y, pred_vals)
        print("RMSPE:", rmse_val)

        # sess.close()
        # tf.reset_default_graph()


if __name__ == '__main__':

    start = time()

    # for i in range(len(config.fileNames)):

    i=0

    config.fileName = '{}{}{}'.format('/home/suleka/sepre/', config.fileNames[i], '.csv')


    config.plotname = '{}{}{}'.format('Sales_Prediction_testset_without_zero_bsl_plot_', config.fileNames[i], '.png')
    config.writename = '{}{}{}'.format('Sales_Prediction_testset_without_zero_bsl_results_', config.fileNames[i],
                                       '.csv')

    config.column_min_max = config.column_min_max_all[i]

    hyperparameters = pd.read_csv('vali_without_zero_baseline_result.csv', header=None)

    config.num_steps = hyperparameters.iloc[i:, 1].get_values()[0]
    config.lstm_size = hyperparameters.iloc[i:, 2].get_values()[0]
    config.hidden2_nodes = hyperparameters.iloc[i:, 3].get_values()[0]
    config.hidden2_activation = hyperparameters.iloc[i:, 4].get_values()[0]
    config.hidden1_activation = hyperparameters.iloc[i:, 5].get_values()[0]
    config.hidden1_nodes = hyperparameters.iloc[i:, 6].get_values()[0]
    config.lstm_activation = hyperparameters.iloc[i:, 7].get_values()[0]
    config.init_epoch = hyperparameters.iloc[i:, 8].get_values()[0]
    config.max_epoch = hyperparameters.iloc[i:, 9].get_values()[0]
    config.learning_rate_decay = hyperparameters.iloc[i:, 10].get_values()[0]
    config.dropout_rate = hyperparameters.iloc[i:, 11].get_values()[0]
    config.batch_size = hyperparameters.iloc[i:, 12].get_values()[0]
    config.init_learning_rate = hyperparameters.iloc[i:, 13].get_values()[0]

    config.hidden1_activation = eval(config.hidden1_activation)
    config.hidden2_activation = eval(config.hidden2_activation)
    config.lstm_activation = eval(config.lstm_activation)



    train_test()


    atexit.register(endlog)
    log("Start Program")

示例csv:

  

store2_1.csv,6,82,18,tf.nn.tanh,tf.nn.tanh,17,tf.nn.relu,17,66, 0.7208117865 ,0.1040798744,6, 0.0051688728,726.2278197328   
store85_1.csv,3,111,23,tf.nn.relu,tf.nn.relu,54,tf.nn.relu,5,107,0.7710698079,0.3024494235,46,0.0006901922,683.713975285

通过读取文件分配给python变量时,突出显示的值变为:

enter image description here

我怀疑这可能是为什么我没有获得与优化中相同的验证RMSE值的原因(与未获得正确的小数点值有关)。如果您有其他意见,请赐教。

0 个答案:

没有答案