每次在GPU上运行此代码时,内核都会死掉。该问题与cuda或cudnn无关,因为我可以在GPU上运行其他代码。
#define shapes
weight_shapes = [(8,8,4,32), (4,4,32,64), (3,3,64,64), (3136,512),
(512,action_size), (32), (64), (64), (512), (action_size)]
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='VALID')
x = tf.nn.bias_add(x, b)
#x = tf.layers.batch_normalization(x,training = True,trainable = True,epsilon = 1e-5,name = 'batch_norm1')
return tf.nn.relu(x)
def dense(x, W, b):
return tf.add(tf.matmul(x, W), b)
def policy(inputs_, W):
with tf.name_scope("conv1"):
# Input is 84x84x4
conv1 = conv2d(inputs_, W[0], W[5], strides=4)
with tf.name_scope("conv2"):
conv2 = conv2d(conv1, W[1], W[6], strides=2)
with tf.name_scope("conv3"):
conv3 = conv2d(conv2, W[2], W[7], strides=1)
## --> [7, 7, 64]
with tf.name_scope("flatten"):
flatten = tf.contrib.layers.flatten(conv3)
## --> [3136]
with tf.name_scope("fc1"):
fc1 = tf.nn.relu(dense(flatten, W[3], W[8]))
with tf.name_scope("logits"):
logits = dense(fc1, W[4], W[9])
return logits
代码的主要部分是:
################################
### build the network policy####
################################
class PGNetwork:
def __init__(self, state_size, action_size, learning_rate):
self.state_size = state_size
self.action_size = action_size
self.learning_rate = learning_rate
self.weights = [
tf.get_variable('wc1', shape = weight_shapes[0], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('wc2', shape = weight_shapes[1], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('wc3', shape = weight_shapes[2], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('wd1', shape = weight_shapes[3], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('wd2', shape = weight_shapes[4], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('bc1', shape = weight_shapes[5], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('bc2', shape = weight_shapes[6], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('bc3', shape = weight_shapes[7], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('bd1', shape = weight_shapes[8], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('bd2', shape = weight_shapes[9], initializer=tf.contrib.layers.xavier_initializer())
]
with tf.name_scope("inputs"):
# We create the placeholders
# *state_size means that we take each elements of state_size in tuple hence is like if we wrote
# [None, 84, 84, 4] #the first argument is related to batch size
self.inputs_= tf.placeholder(tf.float32, [None, *state_size], name="inputs_")
self.actions = tf.placeholder(tf.int32, [None, action_size], name="actions")
self.discounted_episode_rewards_ = tf.placeholder(tf.float32, [None, ], name="discounted_episode_rewards_")
self.flat_multiplier_tensor = tf.placeholder(tf.float32, shape = [None])
# Add this placeholder for having this variable in tensorboard
self.mean_reward_ = tf.placeholder(tf.float32, name="mean_reward")
with tf.variable_scope('PGNetwork'):
self.logits = policy(self.inputs_, self.weights)
with tf.name_scope("softmax"):
self.action_distribution = tf.nn.softmax(self.logits)
with tf.name_scope("sample_gradient"):
self.split_inputs = tf.unstack(self.inputs_, num = batch_size_zero_padded, axis=0)
self.split_actions = tf.unstack(self.actions, num = batch_size_zero_padded, axis = 0)
self.intermediate = [tf.expand_dims(self.split_inputs[i], 0) for i in range(batch_size_zero_padded)]
,然后当我尝试运行以下代码时,内核死亡:内核似乎已经死亡。它将自动重启。
# Reset the graph
tf.reset_default_graph()
# Instantiate the PGNetwork
PGNetwork = PGNetwork(state_size, action_size, learning_rate)
# Initialize Session
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
对于可能出现问题的任何想法,我将不胜感激!我在tensorflow-gpu 1.9.0中使用python 3.6。