Question

我有一个相当大的网络，我的GPU内存不足。它不是任何代码中的错误，网络本身太大而无法容纳到内存中。我甚至尝试过GPU配置建议here。

例如，我已经尝试了下面的gpu_options ......

gpu_options = tf.GPUOptions()
config = tf.ConfigProto(gpu_options=gpu_options)
config.gpu_options.allow_growth = True
# config.optimizer_options.opt_level = 2
# config.graph_options.enable_recv_scheduling = True
# config.graph_options.build_cost_model = 1
config.gpu_options.per_process_gpu_memory_fraction = 0.1

但我的内存仍然不足。 GitHub用户@girving here告诉我，Tensorflow没有处理内存溢出（这对我来说没有意义，为什么他们不会实现这个）。

然而，他还声称有解决方法。我无法找到任何必须实施变通方法的人的支持。谁能指出我正确的方向？我能以某种方式实现排队吗？

供参考，这里有一些代码......程序在sess(init)

时耗尽内存

#Kendall Weihe
#This is a CNN that handles 3D data
#Adjust network parameters below, also adjust data directory

import tensorflow as tf
import pdb
import numpy as np
from numpy import genfromtxt
from PIL import Image
from tensorflow.python.ops import rnn, rnn_cell
from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell
from tensorflow.tensorflow.scroll import scroll_data

# Parameters
learning_rate = 0.001
training_iters = 1000000
batch_size = 1
display_step = 1

# Network Parameters
n_images = 100
n_input_x = 396 # Input image x-dimension
n_input_y = 396 # Input image y-dimension
n_input_z = 5
n_hidden = 128
n_classes = 2 # Binary classification -- on a surface or not
n_output = n_input_x * n_classes

dropout = 0.75 # Dropout, probability to keep units

# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input_z, n_input_x, n_input_y])
y = tf.placeholder(tf.float32, [None, n_input_z, n_input_x, n_input_y, n_classes], name="ground_truth")
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)

def input_data():
    data = np.empty((n_images, n_input_x, n_input_y))
    temp = []
    for i in range(n_images):
        filename = "/home/volcart/Documents/Data/input_crops/cropped00" + str(i) + ".tif"
        im = Image.open(path)
        imarray = np.array(im)
        temp.append(imarray)

    for i in range(n_images):
        for j in range(n_input_x):
            for k in range(n_input_y):
                data[i][j][k] = temp[i][j][k]

    return data

# Create some wrappers for simplicity
def conv3d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv3d(x, W, strides=[1, strides, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpool3d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool3d(x, ksize=[1, k, k, k, 1], strides=[1, k, k, k, 1],
                          padding='SAME')

def deconv3d(prev_layer, w, b, output_shape, strides):
    # Deconv layer
    deconv = tf.nn.conv3d_transpose(prev_layer, w, output_shape=output_shape, strides=strides, padding="VALID")
    deconv = tf.nn.bias_add(deconv, b)
    deconv = tf.nn.relu(deconv)
    return deconv

# Create model
def conv_net(x, weights, biases, dropout):
    # Reshape input picture
    x = tf.reshape(x, shape=[-1, n_input_z, n_input_x, n_input_y, 1])

    with tf.name_scope("conv1") as scope:
    # Convolution Layer
        conv1 = conv3d(x, weights['wc1'], biases['bc1'])
        # Max Pooling (down-sampling)
        #conv1 = tf.nn.local_response_normalization(conv1)
        conv1 = maxpool3d(conv1, k=2)

    # Convolution Layer
    with tf.name_scope("conv2") as scope:
        conv2 = conv3d(conv1, weights['wc2'], biases['bc2'])
        # Max Pooling (down-sampling)
        # conv2 = tf.nn.local_response_normalization(conv2)
        conv2 = maxpool3d(conv2, k=2)

    # Convolution Layer
    with tf.name_scope("conv3") as scope:
        conv3 = conv3d(conv2, weights['wc3'], biases['bc3'])
        # Max Pooling (down-sampling)
        # conv3 = tf.nn.local_response_normalization(conv3)
        conv3 = maxpool3d(conv3, k=2)

    # pdb.set_trace()

    temp_batch_size = tf.shape(x)[0] #batch_size shape
    with tf.name_scope("deconv1") as scope:
        output_shape = [temp_batch_size, 2, n_input_x / 4, n_input_y / 4, 16]
        strides = [1,2,2,2,1]
        #conv4 = deconv3d(conv3, weights['wdc1'], biases['bdc1'], output_shape, strides)
        # conv4 = tf.nn.local_response_normalization(conv4)
        conv4 = tf.nn.conv3d_transpose(conv3, weights['wdc1'], output_shape=output_shape, strides=strides, padding="SAME")
        conv4 = tf.nn.bias_add(conv4, biases['bdc1'])
        conv4 = tf.nn.relu(conv4)

    with tf.name_scope("deconv2") as scope:
        output_shape = [temp_batch_size, 3, n_input_x / 2, n_input_y / 2, 8]
        strides = [1,1,2,2,1]
        conv5 = deconv3d(conv4, weights['wdc2'], biases['bdc2'], output_shape, strides)
        # conv5 = tf.nn.local_response_normalization(conv5)

    with tf.name_scope("deconv3") as scope:
        output_shape = [temp_batch_size, n_input_z, n_input_x, n_input_y, 1]
        #this time don't use ReLu -- since output layer
        conv6 = tf.nn.conv3d_transpose(conv5, weights['wdc3'], output_shape=output_shape, strides=[1,1,2,2,1], padding="VALID")
        conv6 = tf.nn.bias_add(conv6, biases['bdc3'])
        conv6 = tf.nn.dropout(conv6, dropout)
        # conv6 = tf.nn.relu(conv6)


    # pdb.set_trace()

    x = tf.reshape(conv6, [-1, n_input_x])
    x = tf.split(0, n_input_y * n_input_z, x)

    lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True, activation=tf.nn.relu)
    # lstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * n_hidden, state_is_tuple=True)
    lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=0.75)
    outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)


    output = []
    for i in xrange(n_input_y * n_input_z):
        output.append(tf.matmul(outputs[i], lstm_weights[i]) + lstm_biases[i])

    return output

weights = {
    # 5x5 conv, 1 input, 32 outputs
    'wc1' : tf.Variable(tf.random_normal([2, 2, 2, 1, 8])),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc2' : tf.Variable(tf.random_normal([2, 2, 2, 8, 16])),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc3' : tf.Variable(tf.random_normal([2, 2, 2, 16, 32])),

    'wdc1' : tf.Variable(tf.random_normal([2, 2, 2, 16, 32])),

    'wdc2' : tf.Variable(tf.random_normal([2, 2, 2, 8, 16])),

    'wdc3' : tf.Variable(tf.random_normal([3, 2, 2, 1, 8])),
}

biases = {
    'bc1': tf.Variable(tf.random_normal([8])),
    'bc2': tf.Variable(tf.random_normal([16])),
    'bc3': tf.Variable(tf.random_normal([32])),
    'bdc1': tf.Variable(tf.random_normal([16])),
    'bdc2': tf.Variable(tf.random_normal([8])),
    'bdc3': tf.Variable(tf.random_normal([1])),
}

lstm_weights = {}
lstm_biases = {}

for i in xrange(n_input_y * n_input_z):
    lstm_weights[i] = tf.Variable(tf.random_normal([n_hidden, n_output]))
    lstm_biases[i] = tf.Variable(tf.random_normal([n_output]))

# Construct model
with tf.name_scope("net") as scope:
    print "Building network..."
    pred = conv_net(x, weights, biases, keep_prob)
    print "Network built!"

    # pdb.set_trace()
    pred = tf.transpose(tf.pack(pred),[1,0,2])
    pred = tf.reshape(pred, [-1, n_input_z, n_input_x, n_input_y, n_classes])
    # Reshape for cost function
    temp_pred = tf.reshape(pred, [-1, n_classes])
    temp_y = tf.reshape(y, [-1, n_classes])

with tf.name_scope("loss") as scope:
    # cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(pred, y))
    cost = (tf.nn.sigmoid_cross_entropy_with_logits(temp_pred, temp_y))

with tf.name_scope("opt") as scope:
    print "Initializing optimizer..."
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    print "optimizer initialized!"

# pdb.set_trace()

# Evaluate model
with tf.name_scope("acc") as scope:
    # accuracy is the difference between prediction and ground truth matrices
    correct_pred = tf.equal(0,tf.cast(tf.sub(tf.nn.sigmoid(temp_pred),temp_y), tf.int32))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

with tf.name_scope("prediction-node") as scope:
    prediction_node = tf.nn.sigmoid(temp_pred)

# Initializing the variables
with tf.name_scope("initialize-and-config") as scope:
    print "Initializing variables & configuring..."
    init = tf.initialize_all_variables()
    saver = tf.train.Saver()
    gpu_options = tf.GPUOptions()
    config = tf.ConfigProto(gpu_options=gpu_options)
    config.gpu_options.allow_growth = True
    # config.optimizer_options.opt_level = 2
    # config.graph_options.enable_recv_scheduling = True
    # config.graph_options.build_cost_model = 1
    config.gpu_options.per_process_gpu_memory_fraction = 0.1
    print "Variables and configurations initialized!"

# Launch the graph
with tf.Session(config=config) as sess:
    print "Initializing session..."
    sess.run(init)
    print "Session initialized!"

    print "Restoring session..."
    saver.restore(sess, "/home/volcart/Documents/3D-CNN-2D-LSTM-reg-model/model.ckpt")
    print "Session restored!"

    tf.get_default_graph().finalize()
    # Import data
    print "Importing data..."
    data = input_data()
    print "Data imported!"

    # Keep training until reach max iterations
    for i in range(n_images):

        print "Prediction image number -- " + str(i)

        temp = []
        for j in range(n_input_z):
            temp.append(data[j,:,:])

        temp = np.asarray(temp)
        temp = temp.reshape((1, n_input_z, n_input_x, n_input_y))
        prediction = sess.run(prediction_node, feed_dict={x: temp, keep_prob: 1.0})

        prediction = prediction.reshape((n_input_x, n_input_y, n_classes))

        temp_arr1 = np.empty((n_input_x, n_input_y))
        for i in xrange(n_input_x):
            for j in xrange(n_input_y):
                if l == 0:
                    temp_arr1[i][j] = prediction[i][j][0]

        csv_file = "/home/volcart/Documents/3D-CNN-2D-LSTM-pred/3D-CNN-2D-LSTM-step-" + str(i) + ".csv"
        np.savetxt(csv_file, temp_arr1, delimiter=",")

Tensorflow网络对于GPU内存而言太大

0 个答案: