Question

我正在尝试使用强力网格搜索来查找TensorFlow DeepLearning模型中的最佳隐藏节点数。我不太关心程序需要多长时间，但我发现我的程序内存不足，因为它必须具备所有的tf.variables。构建我的模型的代码如下：

def hiddenLayer(input_data, num_nodes, num_inputs, layer_num):
    #Initialize all weights as the standard deviation of the input
    weights = tf.Variable(tf.truncated_normal([num_inputs, num_nodes],
        stddev=1.0 / math.sqrt(float(num_inputs))), name='hidden' + str(layer_num) + '_weights')
    #Initialize all biases as zero
    biases = tf.Variable(tf.zeros([num_nodes]), name='hidden' + str(layer_num) + '_biases')
    #Using RELU, return a linear combination of the biases and weights
    return tf.nn.relu(tf.matmul(input_data, weights) + biases)



def softmaxOutput(input_data, num_inputs, num_outputs):
    #Initialize all weights as the standard deviation of the input
    weights = tf.Variable(tf.truncated_normal([num_inputs, num_outputs], 
        stddev=1.0 / math.sqrt(float(num_inputs))), name='output_weights')
    #Initialize all baises as zero
    biases = tf.Variable(tf.zeros([num_outputs]), name='output_biases')
    #Squash the linear combination using softmax to give you LOGITS
    return tf.nn.softmax(tf.matmul(input_data, weights) + biases)

def calculate_loss(logits, labels):
    labels = tf.to_int64(labels)
    return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits, labels, name = 'xentropy'))


class SingleDNNClassifier(object):
    def __init__(self, num_nodes, num_inputs, num_outputs, batch_size, layer_num=1, lr=0.01):
    #Defining the features and labels variables
        self.x = x = tf.placeholder(tf.float32, shape=(batch_size, num_inputs))
        self.y = y = tf.placeholder(tf.int32, shape=(batch_size))
        #Run the input data through the first hidden layer
        x = hiddenLayer(x, num_nodes, num_inputs, layer_num)
        #Run the output from the first hidden layer with softmax to get predicted logits
        self.logits = logits = softmaxOutput(x, num_nodes, num_outputs)
        #Get the predicted labels
        self.predictions = tf.argmax(logits,1)
        #Calculate the loss
        self.loss = xeloss = calculate_loss(logits, y)
        #Define the training operation for this model
        self.train_op = tf.train.GradientDescentOptimizer(lr).minimize(xeloss)


def train(sess, model, data, batch_size):
    epoch_size = int(data.num_samples/batch_size)
    losses = []

    for step in xrange(epoch_size):
        #The way you get batches now shuffles the data, check to see if this is correct
        train_x, train_y = data.next_batch(batch_size)
        loss, _ = sess.run([model.loss, model.train_op],
            {model.x: train_x, model.y: train_y})
        losses.append(loss)
        if step % (epoch_size/5) == 5:
            tf.logging.info("%.2f: %.3f", step * 1.0 / epoch_size, np.mean(losses))
    return np.mean(losses)


def evaluate(sess, model, data, batch_size):
    predicted_values = []
    actual_values = []

    for i in xrange(int(data.num_samples/batch_size)):
        val_x, val_y = data.next_batch(batch_size)
        predictions = sess.run(model.predictions, {model.x: val_x, model.y: val_y})
        predicted_values.append(predictions)
        actual_values.append(val_y)
    predicted_values = np.concatenate(predicted_values).ravel()
    actual_values = np.concatenate(actual_values).ravel()

    return roc_auc_score(actual_values, predicted_values)

如您所见，我的模型非常简单。只是一个带有softmax输出的单层网络。但是，我想知道隐藏节点的最佳数量，因此我运行此代码：

sess = tf.InteractiveSession()

hidden_nodes = 100
epochs = 100
increment = 100
max_hidden_nodes = 20000

while (hidden_nodes <= max_hidden_nodes):
    print ("HIDDEN NODES: %d\n" %(hidden_nodes))
    output_file = open('AUC_SingleLayer/SingleLayer_' + str(hidden_nodes) + '.txt', 'w')
    model = SingleDNNClassifier(hidden_nodes,16000,2,100)

    tf.initialize_all_variables().run()

    for i in range(epochs):
        print ("\tEPOCH: %d\n" %(i+1))
        train(sess, model, dataset.training, 100)
        valid_auc = DLM.evaluate(sess, model, dataset.testing, 100)
        output_file.write('EPOCH %d: %.5f' % (i+1, valid_auc))
        if (i < epochs-1):
            output_file.write('\n')
    hidden_nodes += increment

但是，我一直收到一个错误，我的Linux工作站内存不足，因为它必须重新初始化我的 hiddenLayer 函数中的权重和偏差变量。当然，随着节点数量的增加，每个变量将占用的内存越多。

我正在尝试执行以下链接：https://github.com/tensorflow/tensorflow/issues/2311

我可以将值传递给我正在更改的维度的未指定大小的占位符但不确定它是否可行。我很感激任何帮助或方向。

TLDR：需要停止重新初始化变量，因为我的内存不足但变量的大小会在我的while循环的每次迭代中发生变化。

我可以更改深度学习模型中隐藏节点的数量，而不是重新制作整个模型吗？

0 个答案: