我开始了解scikit-optimize
包,而我对贝叶斯优化比较陌生,我想在当前的卷积神经网络中使用它。但是,我尝试通过使用Bayesian-optimization
来找到卷积NN的最佳超参数,但是我目前的尝试无法正常工作。
到目前为止,我为此目的尝试实现,但是我的代码无法正常工作,我不知道代码的哪一部分仍然存在问题。谁能指出我的正确做法?为了找到最佳超参数,在卷积神经网络上使用贝叶斯优化是否有任何有效的实现?有什么想法吗?
更新
我尝试了GridSearchCV
,RandomSearchCV
来处理我的卷积神经网络,它具有很深的层次,而使用GridSearchCV
花费了太多时间来完成2-3天,甚至无法完成优化。我想使用贝叶斯优化(即skopt
,optuna
)之类的新优化框架来查找卷积NN的最佳参数和超参数。谁能为my current attempt 1 in colab和my attempt 2 in colab提供可能的补救和有效方法?有什么想法吗?
我当前的尝试:
这是我目前尝试使用scikit-optimize
软件包进行贝叶斯优化的地方。这是my attempt in this colab,在这里我进行了所有在卷积NN上实现贝叶斯优化的实验,以找到最佳的超参数:
### function returned to Bayesian Optimization
@use_named_args(dimensions=dimensions)
def bayes_opt(cnn_num_steps, cnn_init_epoch, cnn_max_epoch,
cnn_learning_rate_decay, cnn_batch_size, cnn_dropout_rate, cnn_init_learning_rate):
global iteration, num_steps, init_epoch, max_epoch, learning_rate_decay, dropout_rate, init_learning_rate, batch_size
num_steps = np.int32(cnn_num_steps)
batch_size = np.int32(cnn_batch_size)
learning_rate_decay = np.float32(cnn_learning_rate_decay)
init_epoch = np.int32(cnn_init_epoch)
max_epoch = np.int32(cnn_max_epoch)
dropout_rate = np.float32(cnn_dropout_rate)
init_learning_rate = np.float32(cnn_init_learning_rate)
tf.reset_default_graph()
tf.set_random_seed(randomState)
sess = tf.Session()
(train_X, train_y), (test_X, test_y) = cifar10.load_data()
train_X = train_X.astype('float32') / 255.0
test_X = test_X.astype('float32') / 255.0
targets = tf.placeholder(tf.float32, [None, input_size], name="targets")
model_learning_rate = tf.placeholder(tf.float32, None, name="learning_rate")
model_dropout_rate = tf.placeholder_with_default(0.0, shape=())
global_step = tf.Variable(0, trainable=False)
prediction = cnn(model_dropout_rate, model_learning_rate)
model_learning_rate = tf.train.exponential_decay(learning_rate=model_learning_rate, global_step=global_step, decay_rate=learning_rate_decay,
decay_steps=init_epoch, staircase=False)
with tf.name_scope('loss'):
model_loss = tf.losses.mean_squared_error(targets, prediction)
with tf.name_scope('adam_optimizer'):
train_step = tf.train.AdamOptimizer(model_learning_rate).minimize(model_loss,global_step=global_step)
sess.run(tf.global_variables_initializer())
for epoch_step in range(max_epoch):
for batch_X, batch_y in generate_batches(train_X, train_y, batch_size):
train_data_feed = {
inputs: batch_X,
targets: batch_y,
model_learning_rate: init_learning_rate,
model_dropout_rate: dropout_rate
}
sess.run(train_step, train_data_feed)
## how to return validation error, any idea?
## return validation error
## return val_error
my current attempt in colab仍然有各种各样的问题,尚未解决。任何人都可以通过使用贝叶斯优化来找到非常深的卷积NN的最佳超参数来提供可行的方法吗?有什么想法吗?谢谢!
答案 0 :(得分:3)
我建议您对Keras Tuner
使用Bayesian Optimization
软件包。
下面只是一个有关如何实现此目标的小例子。
from kerastuner import HyperModel, Objective
import tensorflow as tf
from kerastuner.tuners import BayesianOptimization
# Create the keras tuner model.
class MyHyperModel(HyperModel):
def build(self, hp):
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(len(tokenizer.word_index) + 1, embedding_dim))
for i in range(hp.Int('num_layers', 1, 3)):
model.add(tf.keras.layers.Conv1D(filters=hp.Choice('num_filters', values=[32, 64], default=64),activation='relu',
kernel_size=3,
bias_initializer='glorot_uniform'))
model.add(tf.keras.layers.MaxPool1D())
model.add(tf.keras.layers.GlobalMaxPool1D())
for i in range(hp.Int('num_layers_rnn', 1, 3)):
model.add(tf.keras.layers.Dense(units=hp.Int('units', min_value=32, max_value=512, step=32), activation='relu'))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(
optimizer=hp.Choice('optimizer', values= ['Adam', 'Adadelta', 'Adamax']),
loss='binary_crossentropy',
metrics=[f1])
return model
然后一旦创建,就可以通过以下代码开始训练模型。
hypermodel = MyHyperModel()
tuner = BayesianOptimization(
hypermodel,
objective=Objective('val_f1', direction="max"),
num_initial_points=50,
max_trials=15,
directory='./',
project_name='real_or_not')
tuner.search(train_dataset,
epochs=10, validation_data=validation_dataset)
您可以在此link上查看文档。
我还附加了指向Kaggle Notebook的链接,该链接演示了我自己编写的Bayesian Optimization
。我附上了链接,以便您可以实际尝试该示例。随时提出任何其他问题。
更新:16/08
您评论说,您希望使用Bayesian Optimization
调整以下超参数。我将通过以下方式解决该问题。
import tensorflow as tf
from kerastuner import HyperModel, Objective
from kerastuner.tuners import BayesianOptimization
class MyHyperModel(HyperModel):
def build(self, hp):
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(filters=hp.Choice('num_filters', values=[32, 64], default=64),activation='relu',
kernel_size=(3,3),
bias_initializer='glorot_uniform', input_shape=(32, 32, 3)))
model.add(tf.keras.layers.MaxPooling2D())
for i in range(hp.Int('num_layers', 1, 3)):
model.add(tf.keras.layers.Conv2D(filters=hp.Choice('num_filters', values=[32, 64], default=64),activation='relu',
kernel_size=(3,3),
bias_initializer='glorot_uniform'))
model.add(tf.keras.layers.MaxPooling2D())
model.add(tf.keras.layers.Flatten())
for i in range(hp.Int('num_layers_rnn', 1, 3)):
model.add(tf.keras.layers.Dense(units=hp.Int('units', min_value=32, max_value=512, step=32), activation='relu'))
model.add(tf.keras.layers.Dropout(rate=hp.Choice('droup_out_rate', values=[0.2, 0.4, 0.5], default=0.2)))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(
optimizer=tf.keras.optimizers.Adam(
hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
loss='binary_crossentropy',
metrics=['accuracy'])
return model
class MyTuner(BayesianOptimization):
def run_trial(self, trial, *args, **kwargs):
# You can add additional HyperParameters for preprocessing and custom training loops
# via overriding `run_trial`
kwargs['batch_size'] = trial.hyperparameters.Int('batch_size', 32, 256, step=32)
kwargs['epochs'] = trial.hyperparameters.Int('epochs', 10, 30)
super(MyTuner, self).run_trial(trial, *args, **kwargs)
hypermodel = MyHyperModel()
tuner = MyTuner(
hypermodel,
objective=Objective('val_acc', direction="max"),
num_initial_points=50,
max_trials=15,
directory='./',
project_name='cnn_bayesian_opt')
tuner.search(train_dataset, validation_data=validation_dataset)
您还可以查看github问题,解释如何调整epochs
和batch_size
here。
上面的代码将根据您的要求调整以下参数。
number_of_convolutional_filter
number_of_hidden_layer
drop_rate
learning_rate
batch_size
epochs
答案 1 :(得分:2)
Ax platform是非常强大的工具,可以在深度NN上使用贝叶斯优化。这是我使用ax
的方法,如下:
构建CNN模型
!pip install ax-platform
from tensorflow.keras import models
from ax.service.managed_loop import optimize
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)
def build_model(opt, dropout):
model = models.Sequential()
model.add(Conv2D(32, kernel_size=(3,3), input_shape=(32,32,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(n_hidden))
model.add(Activation('relu'))
model.add(Dropout(dropout))
model.add(Dense(10))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
return model
训练CNN模型
下一步是训练CNN模型并返回其精度,该精度将用于贝叶斯优化:
def train_evaluate(param):
acc = 0
mymodel = build_model(opt=param["opt"], dropout=param["dropout"])
mymodel.fit(X_train, y_train, epochs=param["epochs"], batch_size=param["batch_size"],verbose=1, validation_data=[X_test, y_test])
acc = mymodel.evaluate(X_test, y_test)[1]
print(param, acc)
del mymodel
return acc
运行贝叶斯优化
best_parameters, values, experiment, model = optimize(
parameters=[
{"name": "opt", "type": "choice", "values": ['adam', 'rmsprop', 'sgd']},
{"name": "dropout", "type": "choice", "values": [0.0, 0.25, 0.50, 0.75, 0.99]},
{"name": "epochs", "type": "choice", "values": [10, 50, 100]},
{"name": "batch_size", "type": "choice", "values": [32,64, 100, 128]}
],
evaluation_function=train_evaluate,
objective_name="acc",
total_trials=10,
)
返回最佳参数
data = experiment.fetch_data()
df = data.df
best_arm_name = df.arm_name[df["mean"] == df["mean"].max()].values[0]
best_arm = experiment.arms_by_name[best_arm_name]
print(best_parameters)
print(best_arm)
请注意,您可以按照上面显示的相同方式添加其他要优化的参数,例如learning_rate
,num_hidden_layer
。我希望这能满足您的需求。让我知道您是否还有其他问题。祝你好运!