我在tensorflow中使用4GB内存GPU(GTX980)的卷积层
在使用卷积层之前,一切正常但是当我开始使用卷积层时,发生了以下错误
无法在流上排队卷积:CUDNN_STATUS_NOT_SUPPORTED
我听说这个问题与GPU内存有关
我知道单个tensorflow OP由于protobuf限制而最多可以有2GB,但是我的网络没有任何超过2GB的OP,所以这不是问题。
问题,当我使用卷积层时,我的整个网络大小(权重矩阵大小)甚至更小,但是这种错误一直发生。
当我将批量大小更改为非常小的数字时,不会发生错误,但SGD在这种小批量的情况下效果不佳。
可以使用像PyTorch这样的其他框架来解决吗?或者我仍然可以使用Tensorflow运行批量大小为500000?
还是与小内存(4GB)GPU相关?
请帮助我,我被卡住了。网络摘要
数据摘要
型号代码
如你所见,这是一个非常小的网络
我尝试使用没有卷积层的更大网络,但它运行良好
class MyModel:
def __init__(self, learning_rate, batch_size, neighbor, weight_decay = 0.9, huber_delta=0.3, keep_prob_lst=[]):
""" hyperparameters """
self.isConv = True
self.batch_size = batch_size
self.lr = learning_rate
self.input_size = neighbor * 3
self.output_size = 1
self.neighbor = neighbor
self.weight_decay = weight_decay
self.conv1_size = 10
self.layer1_size = 100
self.layer2_size = 100
self.huber_delta = huber_delta
self.keep_prob_lst_val = keep_prob_lst
self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
def _create_placeholders(self):
""" define the placeholders for input and output """
with tf.name_scope("data"):
self.input = tf.placeholder(tf.float32, shape = [self.batch_size, self.input_size], name='input')
self.output = tf.placeholder(tf.float32, shape= [self.batch_size, self.output_size], name='output')
def _create_weights(self):
""" define weights. """
# Assemble this part of the graph on the CPU. You can change it to GPU if you have GPU
with tf.name_scope("weights"):
self.conv_W_1 = tf.Variable(tf.random_normal([3,1, self.conv1_size], stddev=0.01, mean=0.0, seed=0), name='conv_layer1_weight')
self.conv_b_1 = tf.Variable(tf.zeros([1, self.conv1_size * self.neighbor]), name='conv_layer1_bias')
self.W_1 = tf.Variable(tf.random_normal([self.conv1_size * self.neighbor, self.layer1_size], stddev=0.01, mean=0.0, seed=0), name='layer1_weight')
self.b_1 = tf.Variable(tf.zeros([1,self.layer1_size]), name='layer1_bias')
self.W_2 = tf.Variable(tf.random_normal([self.layer1_size, self.layer2_size], stddev=0.01, mean=0.0, seed=0), name='layer2_weight')
self.b_2 = tf.Variable(tf.zeros([1,self.layer2_size]), name='layer2_bias')
self.W_out = tf.Variable(tf.random_normal([self.layer2_size, self.output_size], stddev=0.01, mean=0.0, seed=0), name='layer_out_weight')
self.b_out = tf.Variable(tf.zeros([1,self.output_size]), name='layer_out_bias')
def _create_loss(self):
""" define the inference + the loss function """
with tf.name_scope("loss"):
self.conv1_input = tf.reshape(self.input, [self.batch_size, self.neighbor*3, 1])
self.conv1_output = tf.nn.conv1d(self.conv1_input, self.conv_W_1, 3, 'VALID')
self.conv1_output_reshape = tf.reshape(self.conv1_output, [self.batch_size, -1]) + self.conv_b_1
self.layer1_output = tf.nn.relu(tf.matmul(self.conv1_output_reshape, self.W_1) + self.b_1)
self.layer2_output = tf.nn.relu(tf.matmul(self.layer1_output, self.W_2) + self.b_2)
self.layer_out_output = tf.matmul(self.layer2_output, self.W_out) + self.b_out
self.se = 0.5 * tf.square(self.layer_out_output - self.output, name='square')
self.loss = tf.reduce_mean(self.se)
def _create_optimizer(self):
""" define optimizer """
self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss,
global_step=self.global_step)
def build_graph(self):
""" Build the graph for our model """
self._create_placeholders()
self._create_weights()
self._create_loss()
self._create_optimizer()
# self._create_summaries()