我在tensorflow-gpu 1.8.0中实现了MLP,我使用Hyperopt来查找最佳参数配置。每当损失函数进一步最小化时,就会创建一个检查点文件。检查点文件总是被覆盖,在流程结束时,我只获得了这些文件:
检查点
Model_1_checkpoint.ckpt.data-00000-的-00001
Model_1_checkpoint.ckpt.index
Model_1_checkpoint.meta
我在代码下方显示训练MLP(在函数内):
# Hyperparameters
n_step= np.round(parameters['step'],3)
n_hidden= np.int(parameters['number_neurons'])
n_bias= np.round(parameters['bias'],3)
n_batch= np.int(parameters['batch'])
# General variables
N_instances= xtrain_data_1_T60.shape[0]
N_input= xtrain_data_1_T60.shape[1]
N_classes= enc_ytrain_data_1_T60.shape[1]
N_epochs= 500
display_step= 100
# Reset graph
tf.reset_default_graph()
# Placeholders
X= tf.placeholder(name= "Logs", dtype= tf.float32, shape= [None, N_input])
y= tf.placeholder(name= "Facies", dtype= tf.float32, shape= [None, N_classes])
# MLP network architecture
input_layer= tf.layers.dense(X, units= N_input, activation= None,
kernel_initializer= tf.keras.initializers.glorot_normal(1969),
bias_initializer= tf.keras.initializers.Zeros())
hidden_layer= tf.layers.dense(input_layer, units= n_hidden, activation= tf.nn.tanh,
kernel_initializer= tf.keras.initializers.he_normal(1969),
bias_initializer= tf.keras.initializers.Constant(n_bias))
output_layer= tf.layers.dense(hidden_layer, units= N_classes, activation= tf.nn.softmax,
kernel_initializer= tf.keras.initializers.he_normal(1969),
bias_initializer= tf.keras.initializers.Zeros(), name= "mlp_output")
loss_op= tf.reduce_mean(tf.keras.backend.binary_crossentropy(y, output_layer))
optimizer= tf.train.GradientDescentOptimizer(learning_rate= n_step).minimize(loss_op)
# Initialize variables
init= tf.initialize_all_variables() #tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# Training loop
for epoch in range(0, N_epochs):
avg_cost = 0.
total_batch= np.int(N_instances/n_batch)
start_idx= 0
end_idx= n_batch
for i in range(0, total_batch):
batchx= xtrain_data_1_T60[start_idx:end_idx,:]
batchy= enc_ytrain_data_1_T60[start_idx:end_idx,:]
_, c= sess.run([optimizer, loss_op], feed_dict= {X: batchx, y: batchy})
avg_cost += c/total_batch
# Set next batch
start_idx += n_batch
end_idx += n_batch
if (end_idx > N_instances):
end_idx= N_instances
if (epoch % display_step == 0):
print("Epoch : %03d/%03d cost : %.4f\n"%(epoch, N_epochs, avg_cost))
print("Optimization finished\n")
prediction_1= sess.run(output_layer, feed_dict= {X: xvalidation_data_1_V40})
prediction_1= prediction_1.argmax(axis= 1) + 1
# Initialize a saver to save the current best model
saver= tf.train.Saver(max_to_keep= 1)
# Only check for prediction results with 3 lithofacies. Otherwise, I assign a dummy error and accuracy
if len(np.unique(prediction_1)) == 3:
error= 1. - metrics.recall_score(yvalidation_data_1_V40, prediction_1, average= 'micro')
accuracy= metrics.accuracy_score(yvalidation_data_1_V40, prediction_1)
global temp_error
if (error < temp_error):
temp_error= error
saver.save(sess, '{}/{}'.format(checkpoint_path, checkpoint_name))
print("Best model saved in file: ", '{}/{}'.format(checkpoint_path, checkpoint_name))
print()
else:
error= 3
accuracy= 0.00
print("Error: {}".format(error))
print("Accuracy: {:.2%}".format(accuracy))
print("Predicted number of lithofacies: {}\n".format(len(np.unique(prediction_1))))
sess.close()
然后,在同一个脚本中,我恢复保存检查点以计算预测:
tf.reset_default_graph()
# Restore the best model and predict again
with tf.Session() as sess:
new_saver= tf.train.import_meta_graph(checkpoint_path + "/" + checkpoint_name + ".meta")
new_saver.restore(sess, checkpoint_path)
# Retrieve placeholder from restored graph
X= best_model_1.get_tensor_by_name('Logs:0')
# Retrieve output layer of MLP network to compute predictions
pred= best_model_1.get_tensor_by_name('mlp_output/kernel:0')
model_prob_density_1= sess.run(pred, feed_dict= {X: voting_data})
不幸的是,该行&#34; new_saver.restore(...)&#34;导致以下错误消息:
NotFoundError(参见上面的回溯):检查点中找不到密钥密集/偏差 [[节点:save / RestoreV2 = RestoreV2 [dtypes = [DT_FLOAT,DT_FLOAT,DT_FLOAT,DT_FLOAT,DT_FLOAT,DT_FLOAT],_ device =&#34; / job:localhost / replica:0 / task:0 / device:CPU:0& #34;](_ arg_save / Const_0_0,save / RestoreV2 / tensor_names,save / RestoreV2 / shape_and_slices)]] [[节点:save / RestoreV2 / _9 = _Recvclient_terminated = false,recv_device =&#34; / job:localhost / replica:0 / task:0 / device:GPU:0&#34;,send_device =&#34; / job :localhost / replica:0 / task:0 / device:CPU:0&#34;,send_device_incarnation = 1,tensor_name =&#34; edge_14_save / RestoreV2&#34;,tensor_type = DT_FLOAT,_device =&#34; / job:本地主机/复制:0 /任务:0 /设备:GPU:0&#34;]]
我回到代码的第一个块来尝试这些更改:
a)saver = tf.train.Saver(max_to_keep = 1) - &gt; saver = tf.train.Saver(tf.global_variables(),max_to_keep = 1)
b)saver = tf.train.Saver(max_to_keep = 1) - &gt; saver = tf.train.Saver(tf.trainableable_variables(),max_to_keep = 1)
然而,我仍然收到同样的错误信息。
有什么建议吗?
非常感谢, 伊万
答案 0 :(得分:0)
很抱歉可能存在混淆,第二个区块中的代码版本不正确。
这是正确的版本:
df %>% group_by(L2ID) %>% mutate(Test= ifelse(min(L2X1)==max(L2X1), TRUE,FALSE)) %>%
mutate(L2X1_Fixed = ifelse(Test ==FALSE, min(L2X1), L2X1))
# A tibble: 6 x 8
# Groups: L2ID [2]
L1ID L2ID L1X1 L1X2 L2X1 L2X2 Test L2X1_Fixed
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <dbl>
1 1 11 2.355470 -1.53195614 0 13 FALSE 0
2 2 11 3.784859 0.20900278 0 13 FALSE 0
3 3 11 3.339077 -0.19772481 1 13 FALSE 0
4 4 22 2.512764 0.18222493 1 8 TRUE 1
5 5 22 1.176079 0.04175856 1 8 TRUE 1
6 6 22 3.688449 -0.42174624 1 9 TRUE 1