使用MXNet后端时,Keras无法加载模型权重

时间:2017-10-11 22:48:57

标签: python tensorflow keras mxnet

我正在努力填补半小时的碳通量系列。我想使用train-test-validate交叉验证来识别最简约的LSTM模型,方法是训练一个包含所有可用输入的模型,然后修剪它直到分数停止改进。对于每个模型,我使用k-fold CV来分割90%的火车,10%验证,然后在model.fit()中,将火车进一步分成火车和测试装置。我正在使用早期停止来帮助最小化运行时间并使用ModelCheckpoint来保存最佳权重(具有最低“val_loss”的纪元)。然后,我想加载这些模型权重,并使用在测试集上表现最佳的权重,计算模型外10%数据集的验证分数(MSE)。

以下是我的代码训练LSTM的一个工作示例,其中包含9个因子和13次(每次观察前6小时)

import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import combinations
from functools import partial
from multiprocessing import Pool
from sklearn.neural_network import MLPRegressor as MPR
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.model_selection import RepeatedKFold
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import EarlyStopping,ModelCheckpoint
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.9
session = tf.Session(config=config)

def TimeShape(rolls,X1):
    X = np.zeros(shape = (X1.shape[0],rolls+1,X1.shape[1]))
    X[:,0,:] = X1
    if rolls > 0:
        for roll in range(0,rolls):
            X2 = np.roll(X1,(roll+1),axis=0)
            X[:,roll+1,:] = X2
    return(X)

def LSTM_Model(time_steps,inputs,load=None):
    model = Sequential()
    model.add(LSTM(12, input_shape=(time_steps+1,inputs),return_sequences=True,init='normal', activation='tanh'))
    model.add(LSTM(6,init='normal', activation='tanh'))
    model.add(Dense(1, init='normal',activation='linear'))
    NUM_GPU = 1 # or the number of GPUs available on your machine
    gpu_list = []
    for i in range(NUM_GPU): gpu_list.append('gpu(%d)' % i)
    model.compile(loss='mean_squared_error', optimizer='adam',context=gpu_list) # - Add if using MXNET
    return(model)

class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.train_losses = []
        self.test_losses = []
    def on_epoch_end(self, batch, logs={}):
        self.train_losses.append(logs.get('loss'))
        self.test_losses.append(logs.get('val_loss'))

class LSTM_Optimize:
    def __init__(self,Path,y_var):
#       **Read and prep Data Data**
        self.Master = pd.read_csv(Path,delimiter = ',',header = 0,na_values = -9999)
        self.Master = self.Master.set_index(pd.DatetimeIndex(pd.to_datetime(self.Master['datetime'])))
        self.Master['DOY'] = self.Master.index.dayofyear*1.0
        self.Master['HR'] = self.Master.index.hour*1.0
        self.Data = self.Master[np.isfinite(self.Master[y_var])]
        self.Data = self.Data.interpolate().bfill()
        self.Data = self.Data.interpolate().ffill()
#       ** Nomralize Y variable**
#       ** Pipeline takes care of X, but not Y, I've foun the models work better when normalizing Y **
        self.y = self.Data[y_var].values
        self.YStandard = StandardScaler()
        self.YScaled = self.YStandard.fit(self.y.reshape(-1, 1))
        Yscale = self.YScaled.transform(self.y.reshape(-1, 1))
        self.y = np.ndarray.flatten(Yscale)
        self.Ytru = self.YScaled.inverse_transform(self.y.reshape(-1,1))

    def Run(self,Inputs):
        # Preparing the input data
        time_steps = 12   
        X = self.Data[Inputs]
        input_shape = len(Inputs)
        self.XStandard = StandardScaler()
        self.XScaled= self.XStandard.fit(X)
        Xscale = self.XScaled.transform(X)
        Xscale = TimeShape(time_steps,Xscale)
        Xscale = Xscale[time_steps+1:,:,:]
        self.y = self.y[time_steps+1:]


        ES = EarlyStopping(monitor='val_loss', min_delta=0.0, patience=25, verbose=1, mode='auto')
        CH = ModelCheckpoint(filepath='weights.hdf5',monitor='val_loss', verbose=0, save_best_only=True)
        HS=LossHistory()
        MSE = []
        kf = RepeatedKFold(n_splits=10,n_repeats=2)
        batch_size=25
        Mod = LSTM_Model(time_steps,input_shape)
        plt.figure(figsize = (7,7))
        for train,test in kf.split(Xscale,self.y):
            Mod.fit(Xscale[train],self.y[train],batch_size=batch_size, nb_epoch=1000,validation_split=0.1,
                    shuffle=True,callbacks=[ES,CH,HS],verbose=0)
            Y = Mod.predict(Xscale[test],batch_size = batch_size)
            Mod.load_weights('weights.hdf5')
            Y = Mod.predict(Xscale[test],batch_size = batch_size)
            MSE.append(metrics.mean_squared_error(self.y[test],Y))
            plt.plot(HS.test_losses,linestyle='--')
            plt.plot(HS.train_losses)

        print(Mod.summary())
        print(np.asanyarray(MSE).mean())

Path = 'FluxData.csv'
% matplotlib inline
start_time = time.time()
if __name__ == '__main__':  
    CH4_Model = ['Sedge','Shrubby','Temp','VWC','ustar','wind_speed','air_pressure',
             'PPFD_Avg','NR_Wm2_Avg','AirTC_Avg']
    y_var = 'ch4_flux'
    Model = CH4_Model
    Best = LSTM_Optimize(Path,y_var)
    Best.Run(Model)
    print()
    print("--- %s seconds ---" % (time.time() - start_time))

以下是我的数据集的几行 - 实际系列有1000个观察值

datetime,co2_flux,ch4_flux,ustar,wind_speed,AirTC_Avg,air_pressure,AirTC_Min,RH,PPFD_Avg,NR_Wm2_Avg,VWC,Temp,Sedge,Shrubby
7/11/2016 8:00,-0.337747167,0.011732699,0.404379747,3.887986435,15.07,101118.6513,15.03,92.7,414.2,225.1,0.5895,7.950660426,0.001292044,0.823794007
7/11/2016 8:30,-1.021087283,0.010256442,0.424094541,3.94983083,14.89,101144.0926,14.84,92.8,339.7,177.1,0.5895,8.24119905,0.001058732,0.826866339
7/11/2016 9:00,-0.146511388,0.008503355,0.456274817,4.687202214,14.71,101177.3176,14.63,93.4,354.4,183.7,0.5895,8.146344257,0.000474955,0.84272365
7/11/2016 9:30,0.144368521,0.009458078,0.462915317,4.810986576,14.27,101203.9191,14.2,93.3,370.2,188.4,0.5895,7.995179025,0.00147768,0.854715683
7/11/2016 10:00,1.471425801,0.014895985,0.47095652,5.098075355,13.7,101235.9171,13.62,94.3,462.9,233.9,0.5895,7.521166721,4.64E-05,0.871581919
7/11/2016 10:30,0.889911286,0.01564225,0.487227522,4.969666239,13.13,101277.0195,13.04,96,309.9,155.2,0.5895,7.923818563,8.14E-06,0.880709962

当我使用Tensorflow支持运行时,一切顺利,我得到了。但是,如果我尝试使用MXNet后端运行它,它无法加载保存模型权重,我得到这个回溯:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-1-14c6597a2feb> in <module>()
    114     Model = CH4_Model
    115     Best = LSTM_Optimize(Path,y_var)
--> 116     Best.Run(Model)
    117     print()
    118     print("--- %s seconds ---" % (time.time() - start_time))

<ipython-input-1-14c6597a2feb> in Run(self, Inputs)
     96                     shuffle=True,callbacks=[ES,CH,HS],verbose=0)
     97             Y = Mod.predict(Xscale[test],batch_size = batch_size)
---> 98             Mod.load_weights('weights.hdf5')
     99             Y = Mod.predict(Xscale[test],batch_size = batch_size)
    100             MSE.append(metrics.mean_squared_error(self.y[test],Y))

/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/engine/topology.py in load_weights(self, filepath, by_name)
   2718             self.load_weights_from_hdf5_group_by_name(f)
   2719         else:
-> 2720             self.load_weights_from_hdf5_group(f)
   2721 
   2722         if hasattr(f, 'close'):

/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/engine/topology.py in load_weights_from_hdf5_group(self, f)
   2804                         weight_values[0] = w
   2805                 weight_value_tuples += zip(symbolic_weights, weight_values)
-> 2806             K.batch_set_value(weight_value_tuples)
   2807 
   2808     def load_weights_from_hdf5_group_by_name(self, f):

/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/backend/mxnet_backend.py in batch_set_value(tuples)
   2205     """
   2206     for p, w in tuples:
-> 2207         set_value(p, w)
   2208 
   2209 

/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/backend/mxnet_backend.py in set_value(x, value)
   2193     if isinstance(value, Number):
   2194         value = [value]
-> 2195     x.bind(mx.nd.array(value))
   2196 
   2197 

/usr/local/lib/python3.5/dist-packages/mxnet-0.11.0-py3.5.egg/mxnet/ndarray.py in array(source_array, ctx, dtype)
   1295                 raise TypeError('source_array must be array like object')
   1296     arr = empty(source_array.shape, ctx, dtype)
-> 1297     arr[:] = source_array
   1298     return arr
   1299 

/usr/local/lib/python3.5/dist-packages/mxnet-0.11.0-py3.5.egg/mxnet/ndarray.py in __setitem__(self, key, value)
    384                 _internal._set_value(float(value), out=self)
    385             elif isinstance(value, (np.ndarray, np.generic)):
--> 386                 self._sync_copyfrom(value)
    387             else:
    388                 raise TypeError(

/usr/local/lib/python3.5/dist-packages/mxnet-0.11.0-py3.5.egg/mxnet/ndarray.py in _sync_copyfrom(self, source_array)
    556             print(self.shape)
    557             raise ValueError('Shape inconsistent: expected %s vs got %s'%(
--> 558                 str(self.shape), str(source_array.shape)))
    559         check_call(_LIB.MXNDArraySyncCopyFromCPU(
    560             self.handle,

ValueError: Shape inconsistent: expected () vs got (1,)

为什么我要使用MXNet?它似乎比tensorflow更快,我将不得不在许多具有不同输入和不同节点数和超参数的模型上执行列车测试验证。通过使用多处理来并行训练多个不同的模型,我已经能够通过MXNet后端显着提高keras模型的速度。但是,使用tensroflow后端时,我尝试进行多处理时会出现线程锁定错误。

对于上下文,我在p2.xlarge实例上使用深度学习AMI Ubuntu Linux - 2.3_Sep2017(ami-d6ee1dae)环境。

任何想法都将不胜感激!

1 个答案:

答案 0 :(得分:3)

MXNet后端LSTM层使用需要静态声明存储桶大小(长度)的MXNet bucketing module,但是,Keras和TF支持动态长度。因此,您会看到加载模型权重和形状不匹配的问题。

目前正在努力为MXNet后端添加Keras2支持,在Keras2中应该注意这个问题。https://github.com/deep-learning-tools/keras/tree/keras2_mxnet_backend https://github.com/keras-team/keras/issues/8697