我正在使用卷积网络来实现深度Q网络。在我的实现中,状态是3D矩阵,但我也使用一系列状态(4个连续状态)来使网络了解环境中的运动,因此网络的实际输入将是其每个元素都是的堆栈3D矩阵。由于有一批,所以输入就像:
[batch_size,hight_of_state,width_of_state, number_of_channels_of_state, 4]
我的代码有错误:
conv1层的输入0与该层不兼容:预期ndim = 4,找到的ndim = 5。收到完整形状:[32、9、9、3、4]
我不能在转换层上使用5暗输入吗?
class DQNNetwork_for_matrixes:
def __init__(self ,stacked_state_size, action_space, learning_rate, name='DQNetwork'):
self.stacked_state_size = stacked_state_size
self.action_size = action_space
self.learning_rate = learning_rate
with tf.variable_scope(name):
# We create the placeholders
# [None, 9, 9, 3, 4]
self.inputs_ = tf.placeholder(tf.float32, [None, 9,9,3,4],
name="inputs")
self.actions_ = tf.placeholder(tf.float32, [None,action_space],
name="actions_")
# Remember that target_Q is the R(s,a) + ymax Qhat(s', a')
# notice I cleaned [none,] and wrote [none]
self.target_Q = tf.placeholder(tf.float32, [None], name="target")
"""
First convnet:
CNN
ELU
"""
# Input is 9x9x3x4
self.conv1 = tf.layers.conv2d(inputs = self.inputs_,
filters = 32,
kernel_size = [1,1],
#strides = (4,4),
padding = "VALID",
kernel_initializer=None,
#kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
name = "conv1")
self.conv1_out = tf.nn.elu(self.conv1, name="conv1_out")
"""
Second convnet:
CNN
ELU
"""
self.conv2 = tf.layers.conv2d(inputs = self.conv1_out,
filters = 64,
kernel_size = [3,3],
#strides = [2,2],
padding = "VALID",
kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
name = "conv2")
self.conv2_out = tf.nn.elu(self.conv2, name="conv2_out")
"""
Third convnet:
CNN
ELU
"""
self.conv3 = tf.layers.conv2d(inputs = self.conv2_out,
filters = 64,
kernel_size = [3,3],
#strides = [2,2],
padding = "VALID",
kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
name = "conv3")
self.conv3_out = tf.nn.elu(self.conv3, name="conv3_out")
self.flatten = tf.contrib.layers.flatten(self.conv3_out)
self.fc = tf.layers.dense(inputs = self.flatten,
units = 512,
activation = tf.nn.elu,
kernel_initializer=tf.contrib.layers.xavier_initializer(),
name="fc1")
self.output = tf.layers.dense(inputs = self.fc,
kernel_initializer=tf.contrib.layers.xavier_initializer(),
units = self.action_size,
activation=None)
# Q is our predicted Q value.
self.Q = tf.reduce_sum(tf.multiply(self.output, self.actions_))
# The loss is the difference between our predicted Q_values and the #Q_target
# Sum(Qtarget - Q)^2
self.loss = tf.reduce_mean(tf.square(self.target_Q - self.Q))
self.optimizer = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss)
tf.reset_default_graph()
DQNetwork_matrix = DQNNetwork_for_matrixes(32,[9,9,3,4], action_space, learning_rate)