GridSearching LSTM网络中的问题-Batch_size问题

时间:2019-07-05 09:08:03

标签: python-3.x lstm gridsearchcv batchsize

我编写了将gridsearch方法应用于由keras构建的LSTM网络的代码。一切似乎都正常,但传递batch_size时出现了一些问题。

我试图更改batch_size的格式,但是据我所知,它必须是一个元组。


#LSTM ok
from Methods.LSTM_1HL import LSTM_method
Yhat_train_LSTM, Yhat_test_LSTM = LSTM_method(X_train, X_test, Y_train, 
Y_test)

def create_model(optimizer, hl1_nodes, input_shape):
    # creation of the NN - Electric Load
    # LSTM layers followed by other LSTM layer must have the parameter "return_sequences" set at True
    model = Sequential()
    model.add(LSTM(units = hl1_nodes , input_shape=input_shape, return_sequences=False))
    model.add(Dense(1, activation="linear"))  # output layer
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])
    model.summary()
    return model

def LSTM_method(X_train, X_test, Y_train, Y_test):

    # normalize X and Y data
    mmsx = MinMaxScaler()
    mmsy = MinMaxScaler()

    X_train = mmsx.fit_transform(X_train)
    X_test = mmsx.transform(X_test)
    Y_train = mmsy.fit_transform(Y_train)
    Y_test = mmsy.transform(Y_test)  

    X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])


    # NN for Electric Load
    # LSTM  Input Shape
    time_steps = 1  # number of time-steps you are feeding a sequence (?)
    inputs_numb = X_train.shape[1]  # number of inputs
    input_shape=(time_steps, inputs_numb)


    model = KerasRegressor(build_fn=create_model,verbose=1)

    #GridSearch code
    start=time()
    optimizers = ['rmsprop', 'adam']
    epochs = np.array([100, 500, 1000])
    hl1_nodes = np.array([1, 10, 50])
    btcsz = np.array([1,X_train.shape[0]])


    param_grid = dict(optimizer=optimizers, hl1_nodes=hl1_nodes, input_shape=input_shape, nb_epoch=epochs,batch_size=btcsz)
    scoring = make_scorer(accuracy_score) #in order to use a metric as a scorer
    grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring = scoring)
    grid_result = grid.fit(X_train, Y_train)

    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    for params, mean_score, scores in grid_result.grid_scores_:
        print("%f (%f) with: %r" % (scores.mean(), scores.std(), params))
    print("total time:",time()-start)


    # Predictions - Electric Load
    Yhat_train = grid_result.predict(X_train, verbose=0)
    X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
    Yhat_test = grid_result.predict(X_test, verbose=0)


    # Denormalization - Electric Load
    Yhat_train = mmsy.inverse_transform(Yhat_train)
    Yhat_test = mmsy.inverse_transform(Yhat_test)
    Y_train = mmsy.inverse_transform(Y_train)
    Y_test = mmsy.inverse_transform(Y_test)

    return Yhat_train, Yhat_test

出现以下错误:


TypeError                                 Traceback (most recent call last)
 in 
     10 #from Methods.LSTM_1HL import create_model
     11 
---> 12 Yhat_train_LSTM, Yhat_test_LSTM = LSTM_method(X_train, X_test, Y_train, Y_test)

c:\Users\ER180124\Code\LoadForecasting\Methods\LSTM_1HL.py in LSTM_method(X_train, X_test, Y_train, Y_test)
     62     scoring = make_scorer(accuracy_score) #in order to use a metric as a scorer
     63     grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring = scoring)
---> 64     grid_result = grid.fit(X_train, Y_train)
     65 
     66     print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

~\.conda\envs\PierEnv\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
    720                 return results_container[0]
    721 
--> 722             self._run_search(evaluate_candidates)
    723 
    724         results = results_container[0]

~\.conda\envs\PierEnv\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
   1189     def _run_search(self, evaluate_candidates):
   1190         """Search all candidates in param_grid"""
-> 1191         evaluate_candidates(ParameterGrid(self.param_grid))
   1192 
   1193 

~\.conda\envs\PierEnv\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
    709                                for parameters, (train, test)
    710                                in product(candidate_params,
--> 711                                           cv.split(X, y, groups)))
    712 
    713                 all_candidate_params.extend(candidate_params)

~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
    915             # remaining jobs.
    916             self._iterating = False
--> 917             if self.dispatch_one_batch(iterator):
    918                 self._iterating = self._original_iterator is not None
    919 

~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
    757                 return False
    758             else:
--> 759                 self._dispatch(tasks)
    760                 return True
    761 

~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
    714         with self._lock:
    715             job_idx = len(self._jobs)
--> 716             job = self._backend.apply_async(batch, callback=cb)
    717             # A job can complete so quickly than its callback is
    718             # called before we get here, causing self._jobs to

~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)

~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
    547         # Don't delay the application, to avoid keeping the input
    548         # arguments in memory
--> 549         self.results = batch()
    550 
    551     def get(self):

~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in (.0)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

~\.conda\envs\PierEnv\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
    526             estimator.fit(X_train, **fit_params)
    527         else:
--> 528             estimator.fit(X_train, y_train, **fit_params)
    529 
    530     except Exception as e:

~\.conda\envs\PierEnv\lib\site-packages\keras\wrappers\scikit_learn.py in fit(self, x, y, **kwargs)
    139                 **self.filter_sk_params(self.build_fn.__call__))
    140         else:
--> 141             self.model = self.build_fn(**self.filter_sk_params(self.build_fn))
    142 
    143         loss_name = self.model.loss

c:\Users\ER180124\Code\LoadForecasting\Methods\LSTM_1HL.py in create_model(optimizer, hl1_nodes, input_shape)
     19     # LSTM layers followed by other LSTM layer must have the parameter "return_sequences" set at True
     20     model = Sequential()
---> 21     model.add(LSTM(units = hl1_nodes , input_shape=input_shape, return_sequences=False))
     22     model.add(Dense(1, activation="linear"))  # output layer
     23     model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])

~\.conda\envs\PierEnv\lib\site-packages\keras\legacy\interfaces.py in wrapper(*args, **kwargs)
     89                 warnings.warn('Update your `' + object_name + '` call to the ' +
     90                               'Keras 2 API: ' + signature, stacklevel=2)
---> 91             return func(*args, **kwargs)
     92         wrapper._original_function = func
     93         return wrapper

~\.conda\envs\PierEnv\lib\site-packages\keras\layers\recurrent.py in __init__(self, units, activation, recurrent_activation, use_bias, kernel_initializer, recurrent_initializer, bias_initializer, unit_forget_bias, kernel_regularizer, recurrent_regularizer, bias_regularizer, activity_regularizer, kernel_constraint, recurrent_constraint, bias_constraint, dropout, recurrent_dropout, implementation, return_sequences, return_state, go_backwards, stateful, unroll, **kwargs)
   2183                                    stateful=stateful,
   2184                                    unroll=unroll,
-> 2185                                    **kwargs)
   2186         self.activity_regularizer = regularizers.get(activity_regularizer)
   2187 

~\.conda\envs\PierEnv\lib\site-packages\keras\layers\recurrent.py in __init__(self, cell, return_sequences, return_state, go_backwards, stateful, unroll, **kwargs)
    406                              '(tuple of integers, '
    407                              'one integer per RNN state).')
--> 408         super(RNN, self).__init__(**kwargs)
    409         self.cell = cell
    410         self.return_sequences = return_sequences

~\.conda\envs\PierEnv\lib\site-packages\keras\engine\base_layer.py in __init__(self, **kwargs)
    145                     batch_size = None
    146                 batch_input_shape = (
--> 147                     batch_size,) + tuple(kwargs['input_shape'])
    148             self.batch_input_shape = batch_input_shape
    149 

TypeError: 'int' object is not iterable

我不明白为什么在错误消息的最后一部分中,当我定义一个为元组的批处理大小时会得到“ batch_size = None”。

1 个答案:

答案 0 :(得分:0)

好吧,我想我遇到了你的问题。

在进行CV搜索时,很可能会使用可能的配置的叉积从参数字典中生成参数网格。您的参数字典中有input_shape的{​​{1}},实际上是两个整数的序列。因此,您输入的形状参数为(time_steps, inputs_numb)time_steps。然后,它们在堆栈跟踪的最后一行变为inputs_numb(None,) + (times_steps)。这是一个元组+ int运算,因此无效。相反,您希望配置空间只有一个可能的(None,) + (inputs_numb)

您应该做的就是转换此行

input_shape

对此:

input_shape=(time_steps, inputs_numb)