训练DNN进行回归时的NaN损失和指标

时间:2020-07-22 08:08:35

标签: tensorflow keras dotnetnuke

我正在训练几种模型,以便尝试找到适合该工作的最佳超参数。

我的数据是7天不同的像素值,回归结果是第7天的像素值。

我希望它理解起来不会太复杂,但是我正尝试从skratch训练每个模型,因此我将获得在超参数之间进行比较的真实结果(因此keras.backend.clear_session())。

请参阅下面的代码:

import pickle
import pandas as pd
import numpy as np
import tensorflow
import os
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt


def load_data(interval):
    pickle_dir = 'C:/NDVI_DATA/pickle_pixel/'+str(interval)+'_d/'

    picklx = open(pickle_dir+'X_pixle.pickle', 'rb')
    pickly = open(pickle_dir+'y_pixle.pickle', 'rb')

    X_inner = pickle.load(picklx)
    y_inner = pickle.load(pickly)

    return X_inner, y_inner


def optimizer_selector(name, learning_r):
    if name == 'adam':
        return_opti = keras.optimizers.Adam(learning_rate=learning_r)
    elif name == 'SGD':
        return_opti = keras.optimizers.SGD(learning_rate=learning_r, nesterov=True)
    elif name == 'adamax':
        return_opti = keras.optimizers.Adamax(learning_rate=learning_r)
    elif name == 'adadelta':
        return_opti = keras.optimizers.Adadelta(learning_rate=learning_r)

    return return_opti


results = pd.DataFrame(columns=['Interval', 'Optimizer', 'LR', 'Epochs', 'Batch_size',
                                'Test_RMSE', 'History', 'Weights'])


def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))


model_dir = 'C:/Users/user/PycharmProjects/NDVI/models/1'
os.chdir(model_dir)

for inter_days in [5, 10, 15, 20]:
    os.chdir(model_dir)
    print("\nStarting interval: "+str(inter_days)+"\n")
    X, y = load_data(inter_days)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=20)
    print("X_train shape: ", X_train.shape)
    print("y_train shape: ", y_train.shape)
    print("X_test shape: ", X_test.shape)
    print("y_test shape: ", y_test.shape)

    # Creating the model for the current interval
    keras.backend.clear_session()
    model = Sequential()
    model.add(Dense(units=128, kernel_initializer='normal', input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(units=256, kernel_initializer='normal', activation='relu'))
    model.add(Dense(units=256, kernel_initializer='normal', activation='relu'))
    model.add(Dense(units=256, kernel_initializer='normal', activation='relu'))
    model.add(Dense(units=1, kernel_initializer='normal', activation='linear'))

    model.compile(loss='mean_absolute_error', optimizer='adam',
                  metrics=[keras.metrics.RootMeanSquaredError(name='rmse')])
    Wsave = model.get_weights()

    for opti in ['adam', 'SGD', 'adamax', 'adadelta']:
        for lr in [0.01, 0.001, 0.0001]:
            curr_opti = optimizer_selector(opti, lr)
            model.compile(loss='mean_absolute_error', optimizer=curr_opti,
                          metrics=[keras.metrics.RootMeanSquaredError(name='rmse')])
            for epochs_run in [1, 2, 3]:
                for batch in [30, 50, 100, 200, 500]:
                    # Set up the directory which contains all the data
                    path = 'interval_' + str(inter_days)+'/'+opti+'/'+'lr_'+str(lr)+'/'+'epochs_'+str(epochs_run)+'/'+'batch_' + str(batch)
                    os.chdir(model_dir)
                    os.makedirs(path)
                    os.chdir(model_dir+'/'+path)
                    print('\nStarting fit for ' + str(inter_days) + ' days using ' + opti + ' optimizer, learning rate: '
                          + str(lr) + ' epochs: ' + str(epochs_run) + ' and batch size: ' + str(batch))

                    # Reset weights before fit
                    model.set_weights(Wsave)
                    model.summary()

                    # Set callbacks
                    checkpoint_name = 'Weights--best.hdf5'
                    checkpoint = ModelCheckpoint(checkpoint_name, monitor='val_loss', verbose=1, save_best_only=True,
                                                 mode='auto')
                    callbacks_list = [checkpoint]
                    history = model.history #TODO: find how to use history

                    # Fit and results
                    model.fit(X_train, y_train, epochs=epochs_run, batch_size=batch, validation_split=0.2,
                              callbacks=[callbacks_list])
                    scores = model.evaluate(X_test, y_test)
                    weights = model.get_weights()

                    # Saving the model
                    model.save('saved_model')

                    # Add the model to the results DataFrame
                    model_summery = dict(Interval=inter_days, Optimizer=opti, LR=lr, Epochs=epochs_run,
                                         Batch_size=batch, Test_RMSE=scores[1], History=history, Weights=weights),

                    results = results.append(pd.DataFrame(model_summery), ignore_index=True)



# Save all results to a pickle.
cwd = model_dir+'/'+'results_pickle'
os.chdir(cwd)
results_pickle = open("results.pickle", 'wb')
pickle.dump(results, results_pickle)
results_pickle.close()

我得到的输出是这样:

Starting fit for 5 days using SGD optimizer, learning rate: 0.01 epochs: 1 and batch size: 30
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 128)               1024      
_________________________________________________________________
dense_1 (Dense)              (None, 256)               33024     
_________________________________________________________________
dense_2 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_3 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 257       
=================================================================
Total params: 165,889
Trainable params: 165,889
Non-trainable params: 0
_________________________________________________________________
405/406 [============================>.] - ETA: 0s - loss: 0.1702 - rmse: 0.3044
Epoch 00001: val_loss improved from inf to 0.15796, saving model to Weights--best.hdf5
406/406 [==============================] - 5s 13ms/step - loss: 0.1702 - rmse: 0.3044 - val_loss: 0.1580 - val_rmse: 0.2883
204/204 [==============================] - 1s 5ms/step - loss: 0.1618 - rmse: 0.2921

Starting interval: 10

X_train shape:  (12308, 13)
y_train shape:  (12308, 1)
X_test shape:  (5276, 13)
y_test shape:  (5276, 1)

Starting fit for 10 days using adam optimizer, learning rate: 0.01 epochs: 1 and batch size: 30
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 128)               1792      
_________________________________________________________________
dense_1 (Dense)              (None, 256)               33024     
_________________________________________________________________
dense_2 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_3 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 257       
=================================================================
Total params: 166,657
Trainable params: 166,657
Non-trainable params: 0
_________________________________________________________________
326/329 [============================>.] - ETA: 0s - loss: nan - rmse: nan
Epoch 00001: val_loss did not improve from inf
329/329 [==============================] - 4s 13ms/step - loss: nan - rmse: nan - val_loss: nan - val_rmse: nan

0 个答案:

没有答案