我使用DNNRegressor作为遗传算法的适应度函数,因此我需要执行数千次,然后在每次迭代中保存模型。这里显示了代码的轻量级版本。模拟在某个点之后很好,然后出现错误并且模拟停止。我正在使用Tensorflow 1.0.1,如果您有任何建议可以解决这个问题,我将非常感谢您的帮助。
错误:
tensorflow.python.framework.errors_impl.InvalidArgumentError: 不成功的TensorSliceReader构造函数:无法获得匹配 文件 /home/xxxx/workspace2/NNEvolution/4_15_136/6/model.ckpt-1500: 资源耗尽:/ home / xxxx / workspace2 / NNEvolution / 4_15_136 / 6
[[节点:save / RestoreV2_5 = RestoreV2 [dtypes = [DT_FLOAT], _device =" / job:localhost / replica:0 / task:0 / cpu:0"](_ recv_save / Const_0,save / RestoreV2_5 / tensor_names,save / RestoreV2_5 / shape_and_slices)]]
[[节点:save / RestoreV2_7 / _7 = _Recvclient_terminated = false, recv_device =" /作业:本地主机/复制:0 /任务:0 / GPU:0&#34 ;, send_device =" /作业:本地主机/复制:0 /任务:0 / CPU:0&#34 ;, send_device_incarnation = 1,tensor_name =" edge_6_save / RestoreV2_7", tensor_type = DT_FLOAT, _device =" /作业:本地主机/复制:0 /任务:0 / GPU:0"]]InvalidArgumentError(参见上面的回溯):不成功 TensorSliceReader构造函数:无法获取匹配的文件 /home/xxxx/workspace2/NNEvolution/4_15_136/6/model.ckpt-1500: 资源耗尽:/ home / xxxx / workspace2 / NNEvolution / 4_15_136 / 6
[[节点:save / RestoreV2_5 = RestoreV2 [dtypes = [DT_FLOAT], _device =" / job:localhost / replica:0 / task:0 / cpu:0"](_ recv_save / Const_0,save / RestoreV2_5 / tensor_names,save / RestoreV2_5 / shape_and_slices)]]
[[节点:save / RestoreV2_7 / _7 = _Recvclient_terminated = false, recv_device =" /作业:本地主机/复制:0 /任务:0 / GPU:0&#34 ;, send_device =" /作业:本地主机/复制:0 /任务:0 / CPU:0&#34 ;, send_device_incarnation = 1,tensor_name =" edge_6_save / RestoreV2_7", tensor_type = DT_FLOAT, _device =" /作业:本地主机/复制:0 /任务:0 / GPU:0"]]处理以退出代码139结束(由信号11中断: SIGSEGV)
此处的代码详情:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import tensorflow as tf
import shutil
tf.logging.set_verbosity(tf.logging.INFO)
def FitnessFunction ( hhLayer, epoch, pathModel, train_set, val_set, test_set, every_n_steps, early_stopping_rounds, nnExperiment, gaExperiment, gaGeneration, globalIndCounter ):
avgMSE = 0
# Data sets
config1 = tf.ConfigProto()
config1.gpu_options.allow_growth = True
feature_columns = [tf.contrib.layers.real_valued_column("")]
for nnExp in range(0, nnExperiment):
validation_monitor = tf.contrib.learn.monitors.ValidationMonitor(
val_set.data,
val_set.target,
every_n_steps=every_n_steps,
early_stopping_metric="loss",
early_stopping_metric_minimize=False,
early_stopping_rounds=early_stopping_rounds)
#validation_monitor = tf.contrib.learn.monitors.ValidationMonitor(
# val_set.data,
# val_set.target,
# every_n_steps=every_n_steps,
# early_stopping_metric="loss",
# early_stopping_metric_minimize=True)
# Specify that all features have real-value data
strName = str(gaExperiment) + "_" + str(gaGeneration)+ "_"+str(globalIndCounter)
#indPath = pathModel + "/" + strName +"/" + str( nnExp ) +"/"+ "model.ckpt"
indPath = os.path.join( os.path.sep, pathModel, strName, str( nnExp ) )
#if (os.path.isdir(pathModel)):
# shutil.rmtree(pathModel)
#Creating the model
#mlp = tf.contrib.learn.DNNRegressor( feature_columns=feature_columns, hidden_units=hhLayer, optimizer=tf.train.AdamOptimizer(1e-4), model_dir=pathModel, enable_centered_bias = True, dropout=0.4)
#mlp = tf.contrib.learn.DNNRegressor(feature_columns=feature_columns, hidden_units=hhLayer,
# optimizer=tf.train.AdamOptimizer(1e-4), model_dir=pathModel,
# enable_centered_bias=True)
#try:
mlp = tf.contrib.learn.DNNRegressor(feature_columns=feature_columns, hidden_units=hhLayer, model_dir=indPath, activation_fn=tf.nn.relu,enable_centered_bias=True,
dropout=0.01, config=tf.contrib.learn.RunConfig(gpu_memory_fraction=0.30, save_checkpoints_secs= 43200) )
#except:
# a=1
#mlp = tf.contrib.learn.DNNRegressor(feature_columns=feature_columns, hidden_units=hhLayer, model_dir=pathModel, enable_centered_bias=True, optimizer=tf.train.ProximalAdagradOptimizer(learning_rate=0.1, l1_regularization_strength=0.001), dropout=0.01)
# optimizer=tf.train.ProximalAdagradOptimizer(learning_rate=0.1, l1_regularization_strength=0.001 )
# Trainning the model.
#try:
mlp.fit( x=train_set.data, y=train_set.target, steps=epoch, monitors=[validation_monitor], batch_size=50 )
#except:
# a=1
MSE = mlp.evaluate( x=test_set.data, y=test_set.target)["loss"]
# Classify two new flower samples.
# new_samples = np.array(
# [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float)
# y = list(classifier.predict(new_samples))
# print("Predictions: {}".format(str(y)))
avgMSE = avgMSE + MSE
del mlp
return (avgMSE/nnExperiment)
if __name__ == "__main__":
# Data sets
#config = tf.ConfigProto()
#config.gpu_options.allow_growth = True
IRIS_TRAINING = os.path.join(os.path.dirname(__file__), "boston_train.csv")
IRIS_VAL = os.path.join(os.path.dirname(__file__), "boston_val.csv")
IRIS_TEST = os.path.join(os.path.dirname(__file__), "boston_test.csv")
train_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename=IRIS_TRAINING, target_dtype=np.float, features_dtype=np.float )
val_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename=IRIS_VAL, target_dtype=np.float, features_dtype=np.float )
test_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename=IRIS_TEST, target_dtype=np.float, features_dtype=np.float )
#initialPath = "/home/xxxx/workspace2/NNEvolution"
initialPath = os.path.join( os.path.sep, 'home', 'xxxx', 'workspace2', 'NNEvolution' )
if ( os.path.isdir( initialPath ) ):
shutil.rmtree( initialPath )
idxCount = 1
for exp in range(0,50):
for ger in range(0,30):
mse = FitnessFunction( [30,30,30,30], 1500, initialPath, train_set, val_set, test_set, 1, 200, nnExperiment=10, gaExperiment=exp, gaGeneration=ger, globalIndCounter=idxCount)
idxCount = idxCount +1
print("MSE: {0:f}".format(mse))