我正在努力填补半小时的碳通量系列。我想使用train-test-validate交叉验证来识别最简约的LSTM模型,方法是训练一个包含所有可用输入的模型,然后修剪它直到分数停止改进。对于每个模型,我使用k-fold CV来分割90%的火车,10%验证,然后在model.fit()中,将火车进一步分成火车和测试装置。我正在使用早期停止来帮助最小化运行时间并使用ModelCheckpoint来保存最佳权重(具有最低“val_loss”的纪元)。然后,我想加载这些模型权重,并使用在测试集上表现最佳的权重,计算模型外10%数据集的验证分数(MSE)。
以下是我的代码训练LSTM的一个工作示例,其中包含9个因子和13次(每次观察前6小时)
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import combinations
from functools import partial
from multiprocessing import Pool
from sklearn.neural_network import MLPRegressor as MPR
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.model_selection import RepeatedKFold
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import EarlyStopping,ModelCheckpoint
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.9
session = tf.Session(config=config)
def TimeShape(rolls,X1):
X = np.zeros(shape = (X1.shape[0],rolls+1,X1.shape[1]))
X[:,0,:] = X1
if rolls > 0:
for roll in range(0,rolls):
X2 = np.roll(X1,(roll+1),axis=0)
X[:,roll+1,:] = X2
return(X)
def LSTM_Model(time_steps,inputs,load=None):
model = Sequential()
model.add(LSTM(12, input_shape=(time_steps+1,inputs),return_sequences=True,init='normal', activation='tanh'))
model.add(LSTM(6,init='normal', activation='tanh'))
model.add(Dense(1, init='normal',activation='linear'))
NUM_GPU = 1 # or the number of GPUs available on your machine
gpu_list = []
for i in range(NUM_GPU): gpu_list.append('gpu(%d)' % i)
model.compile(loss='mean_squared_error', optimizer='adam',context=gpu_list) # - Add if using MXNET
return(model)
class LossHistory(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self.train_losses = []
self.test_losses = []
def on_epoch_end(self, batch, logs={}):
self.train_losses.append(logs.get('loss'))
self.test_losses.append(logs.get('val_loss'))
class LSTM_Optimize:
def __init__(self,Path,y_var):
# **Read and prep Data Data**
self.Master = pd.read_csv(Path,delimiter = ',',header = 0,na_values = -9999)
self.Master = self.Master.set_index(pd.DatetimeIndex(pd.to_datetime(self.Master['datetime'])))
self.Master['DOY'] = self.Master.index.dayofyear*1.0
self.Master['HR'] = self.Master.index.hour*1.0
self.Data = self.Master[np.isfinite(self.Master[y_var])]
self.Data = self.Data.interpolate().bfill()
self.Data = self.Data.interpolate().ffill()
# ** Nomralize Y variable**
# ** Pipeline takes care of X, but not Y, I've foun the models work better when normalizing Y **
self.y = self.Data[y_var].values
self.YStandard = StandardScaler()
self.YScaled = self.YStandard.fit(self.y.reshape(-1, 1))
Yscale = self.YScaled.transform(self.y.reshape(-1, 1))
self.y = np.ndarray.flatten(Yscale)
self.Ytru = self.YScaled.inverse_transform(self.y.reshape(-1,1))
def Run(self,Inputs):
# Preparing the input data
time_steps = 12
X = self.Data[Inputs]
input_shape = len(Inputs)
self.XStandard = StandardScaler()
self.XScaled= self.XStandard.fit(X)
Xscale = self.XScaled.transform(X)
Xscale = TimeShape(time_steps,Xscale)
Xscale = Xscale[time_steps+1:,:,:]
self.y = self.y[time_steps+1:]
ES = EarlyStopping(monitor='val_loss', min_delta=0.0, patience=25, verbose=1, mode='auto')
CH = ModelCheckpoint(filepath='weights.hdf5',monitor='val_loss', verbose=0, save_best_only=True)
HS=LossHistory()
MSE = []
kf = RepeatedKFold(n_splits=10,n_repeats=2)
batch_size=25
Mod = LSTM_Model(time_steps,input_shape)
plt.figure(figsize = (7,7))
for train,test in kf.split(Xscale,self.y):
Mod.fit(Xscale[train],self.y[train],batch_size=batch_size, nb_epoch=1000,validation_split=0.1,
shuffle=True,callbacks=[ES,CH,HS],verbose=0)
Y = Mod.predict(Xscale[test],batch_size = batch_size)
Mod.load_weights('weights.hdf5')
Y = Mod.predict(Xscale[test],batch_size = batch_size)
MSE.append(metrics.mean_squared_error(self.y[test],Y))
plt.plot(HS.test_losses,linestyle='--')
plt.plot(HS.train_losses)
print(Mod.summary())
print(np.asanyarray(MSE).mean())
Path = 'FluxData.csv'
% matplotlib inline
start_time = time.time()
if __name__ == '__main__':
CH4_Model = ['Sedge','Shrubby','Temp','VWC','ustar','wind_speed','air_pressure',
'PPFD_Avg','NR_Wm2_Avg','AirTC_Avg']
y_var = 'ch4_flux'
Model = CH4_Model
Best = LSTM_Optimize(Path,y_var)
Best.Run(Model)
print()
print("--- %s seconds ---" % (time.time() - start_time))
以下是我的数据集的几行 - 实际系列有1000个观察值
datetime,co2_flux,ch4_flux,ustar,wind_speed,AirTC_Avg,air_pressure,AirTC_Min,RH,PPFD_Avg,NR_Wm2_Avg,VWC,Temp,Sedge,Shrubby
7/11/2016 8:00,-0.337747167,0.011732699,0.404379747,3.887986435,15.07,101118.6513,15.03,92.7,414.2,225.1,0.5895,7.950660426,0.001292044,0.823794007
7/11/2016 8:30,-1.021087283,0.010256442,0.424094541,3.94983083,14.89,101144.0926,14.84,92.8,339.7,177.1,0.5895,8.24119905,0.001058732,0.826866339
7/11/2016 9:00,-0.146511388,0.008503355,0.456274817,4.687202214,14.71,101177.3176,14.63,93.4,354.4,183.7,0.5895,8.146344257,0.000474955,0.84272365
7/11/2016 9:30,0.144368521,0.009458078,0.462915317,4.810986576,14.27,101203.9191,14.2,93.3,370.2,188.4,0.5895,7.995179025,0.00147768,0.854715683
7/11/2016 10:00,1.471425801,0.014895985,0.47095652,5.098075355,13.7,101235.9171,13.62,94.3,462.9,233.9,0.5895,7.521166721,4.64E-05,0.871581919
7/11/2016 10:30,0.889911286,0.01564225,0.487227522,4.969666239,13.13,101277.0195,13.04,96,309.9,155.2,0.5895,7.923818563,8.14E-06,0.880709962
当我使用Tensorflow支持运行时,一切顺利,我得到了。但是,如果我尝试使用MXNet后端运行它,它无法加载保存模型权重,我得到这个回溯:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-1-14c6597a2feb> in <module>()
114 Model = CH4_Model
115 Best = LSTM_Optimize(Path,y_var)
--> 116 Best.Run(Model)
117 print()
118 print("--- %s seconds ---" % (time.time() - start_time))
<ipython-input-1-14c6597a2feb> in Run(self, Inputs)
96 shuffle=True,callbacks=[ES,CH,HS],verbose=0)
97 Y = Mod.predict(Xscale[test],batch_size = batch_size)
---> 98 Mod.load_weights('weights.hdf5')
99 Y = Mod.predict(Xscale[test],batch_size = batch_size)
100 MSE.append(metrics.mean_squared_error(self.y[test],Y))
/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/engine/topology.py in load_weights(self, filepath, by_name)
2718 self.load_weights_from_hdf5_group_by_name(f)
2719 else:
-> 2720 self.load_weights_from_hdf5_group(f)
2721
2722 if hasattr(f, 'close'):
/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/engine/topology.py in load_weights_from_hdf5_group(self, f)
2804 weight_values[0] = w
2805 weight_value_tuples += zip(symbolic_weights, weight_values)
-> 2806 K.batch_set_value(weight_value_tuples)
2807
2808 def load_weights_from_hdf5_group_by_name(self, f):
/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/backend/mxnet_backend.py in batch_set_value(tuples)
2205 """
2206 for p, w in tuples:
-> 2207 set_value(p, w)
2208
2209
/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/backend/mxnet_backend.py in set_value(x, value)
2193 if isinstance(value, Number):
2194 value = [value]
-> 2195 x.bind(mx.nd.array(value))
2196
2197
/usr/local/lib/python3.5/dist-packages/mxnet-0.11.0-py3.5.egg/mxnet/ndarray.py in array(source_array, ctx, dtype)
1295 raise TypeError('source_array must be array like object')
1296 arr = empty(source_array.shape, ctx, dtype)
-> 1297 arr[:] = source_array
1298 return arr
1299
/usr/local/lib/python3.5/dist-packages/mxnet-0.11.0-py3.5.egg/mxnet/ndarray.py in __setitem__(self, key, value)
384 _internal._set_value(float(value), out=self)
385 elif isinstance(value, (np.ndarray, np.generic)):
--> 386 self._sync_copyfrom(value)
387 else:
388 raise TypeError(
/usr/local/lib/python3.5/dist-packages/mxnet-0.11.0-py3.5.egg/mxnet/ndarray.py in _sync_copyfrom(self, source_array)
556 print(self.shape)
557 raise ValueError('Shape inconsistent: expected %s vs got %s'%(
--> 558 str(self.shape), str(source_array.shape)))
559 check_call(_LIB.MXNDArraySyncCopyFromCPU(
560 self.handle,
ValueError: Shape inconsistent: expected () vs got (1,)
为什么我要使用MXNet?它似乎比tensorflow更快,我将不得不在许多具有不同输入和不同节点数和超参数的模型上执行列车测试验证。通过使用多处理来并行训练多个不同的模型,我已经能够通过MXNet后端显着提高keras模型的速度。但是,使用tensroflow后端时,我尝试进行多处理时会出现线程锁定错误。
对于上下文,我在p2.xlarge实例上使用深度学习AMI Ubuntu Linux - 2.3_Sep2017(ami-d6ee1dae)环境。
任何想法都将不胜感激!
答案 0 :(得分:3)
MXNet后端LSTM层使用需要静态声明存储桶大小(长度)的MXNet bucketing module,但是,Keras和TF支持动态长度。因此,您会看到加载模型权重和形状不匹配的问题。
目前正在努力为MXNet后端添加Keras2支持,在Keras2中应该注意这个问题。https://github.com/deep-learning-tools/keras/tree/keras2_mxnet_backend https://github.com/keras-team/keras/issues/8697