我正在尝试使用强力网格搜索来查找TensorFlow DeepLearning模型中的最佳隐藏节点数。我不太关心程序需要多长时间,但我发现我的程序内存不足,因为它必须具备所有的tf.variables。构建我的模型的代码如下:
def hiddenLayer(input_data, num_nodes, num_inputs, layer_num):
#Initialize all weights as the standard deviation of the input
weights = tf.Variable(tf.truncated_normal([num_inputs, num_nodes],
stddev=1.0 / math.sqrt(float(num_inputs))), name='hidden' + str(layer_num) + '_weights')
#Initialize all biases as zero
biases = tf.Variable(tf.zeros([num_nodes]), name='hidden' + str(layer_num) + '_biases')
#Using RELU, return a linear combination of the biases and weights
return tf.nn.relu(tf.matmul(input_data, weights) + biases)
def softmaxOutput(input_data, num_inputs, num_outputs):
#Initialize all weights as the standard deviation of the input
weights = tf.Variable(tf.truncated_normal([num_inputs, num_outputs],
stddev=1.0 / math.sqrt(float(num_inputs))), name='output_weights')
#Initialize all baises as zero
biases = tf.Variable(tf.zeros([num_outputs]), name='output_biases')
#Squash the linear combination using softmax to give you LOGITS
return tf.nn.softmax(tf.matmul(input_data, weights) + biases)
def calculate_loss(logits, labels):
labels = tf.to_int64(labels)
return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, labels, name = 'xentropy'))
class SingleDNNClassifier(object):
def __init__(self, num_nodes, num_inputs, num_outputs, batch_size, layer_num=1, lr=0.01):
#Defining the features and labels variables
self.x = x = tf.placeholder(tf.float32, shape=(batch_size, num_inputs))
self.y = y = tf.placeholder(tf.int32, shape=(batch_size))
#Run the input data through the first hidden layer
x = hiddenLayer(x, num_nodes, num_inputs, layer_num)
#Run the output from the first hidden layer with softmax to get predicted logits
self.logits = logits = softmaxOutput(x, num_nodes, num_outputs)
#Get the predicted labels
self.predictions = tf.argmax(logits,1)
#Calculate the loss
self.loss = xeloss = calculate_loss(logits, y)
#Define the training operation for this model
self.train_op = tf.train.GradientDescentOptimizer(lr).minimize(xeloss)
def train(sess, model, data, batch_size):
epoch_size = int(data.num_samples/batch_size)
losses = []
for step in xrange(epoch_size):
#The way you get batches now shuffles the data, check to see if this is correct
train_x, train_y = data.next_batch(batch_size)
loss, _ = sess.run([model.loss, model.train_op],
{model.x: train_x, model.y: train_y})
losses.append(loss)
if step % (epoch_size/5) == 5:
tf.logging.info("%.2f: %.3f", step * 1.0 / epoch_size, np.mean(losses))
return np.mean(losses)
def evaluate(sess, model, data, batch_size):
predicted_values = []
actual_values = []
for i in xrange(int(data.num_samples/batch_size)):
val_x, val_y = data.next_batch(batch_size)
predictions = sess.run(model.predictions, {model.x: val_x, model.y: val_y})
predicted_values.append(predictions)
actual_values.append(val_y)
predicted_values = np.concatenate(predicted_values).ravel()
actual_values = np.concatenate(actual_values).ravel()
return roc_auc_score(actual_values, predicted_values)
如您所见,我的模型非常简单。只是一个带有softmax输出的单层网络。但是,我想知道隐藏节点的最佳数量,因此我运行此代码:
sess = tf.InteractiveSession()
hidden_nodes = 100
epochs = 100
increment = 100
max_hidden_nodes = 20000
while (hidden_nodes <= max_hidden_nodes):
print ("HIDDEN NODES: %d\n" %(hidden_nodes))
output_file = open('AUC_SingleLayer/SingleLayer_' + str(hidden_nodes) + '.txt', 'w')
model = SingleDNNClassifier(hidden_nodes,16000,2,100)
tf.initialize_all_variables().run()
for i in range(epochs):
print ("\tEPOCH: %d\n" %(i+1))
train(sess, model, dataset.training, 100)
valid_auc = DLM.evaluate(sess, model, dataset.testing, 100)
output_file.write('EPOCH %d: %.5f' % (i+1, valid_auc))
if (i < epochs-1):
output_file.write('\n')
hidden_nodes += increment
但是,我一直收到一个错误,我的Linux工作站内存不足,因为它必须重新初始化我的 hiddenLayer 函数中的权重和偏差变量。当然,随着节点数量的增加,每个变量将占用的内存越多。
我正在尝试执行以下链接:https://github.com/tensorflow/tensorflow/issues/2311
我可以将值传递给我正在更改的维度的未指定大小的占位符但不确定它是否可行。我很感激任何帮助或方向。
TLDR:需要停止重新初始化变量,因为我的内存不足但变量的大小会在我的while循环的每次迭代中发生变化。