Tensorflow,我在执行时间有形状不匹配

时间:2018-06-26 15:57:30

标签: python tensorflow tensorflow-datasets

大家下午好

我目前在使用tensorflow时遇到了一些麻烦,因为由于某种原因,在运行大约三个半小时后出现了Shape错误。使用tensorflow管道加载文件,并创建两个可重新初始化的数据集以进行训练和测试。我知道数据具有正确的形状,因为我将硬编码的形状调整为期望的形状,并且从未出现错误。问题是,在某个时刻运行网络时,在展平操作中存在样本数量不正确的示例。程序崩溃了,但是除了张量中的元素数不能被10(我的批处理大小)整除之外,没有其他解释。老实说,这对我来说毫无意义,因为数据与其他一批没有问题的批处理完全通过同一管道进行。

我可以根据需要提供代码,但我认为这更多是无法从框架中理解某些概念。

预先感谢所有帮助。

编辑:请在这里找到代码,标称符号t对应于具有时间数据(X)的层,f对应于具有频率数据(FREQ)的层,q对应于包含倒谱的层data(QUEF)和tf对应于包含2-D数据的图层,X的频谱图(SPECG),Y为标签。除标签tf.int64

外,所有数据均为tf.float32

编辑2:产生问题的操作是在qsubnet_out上展平

编辑3:可能是最重要的,似乎比某些层次收敛到NaN 训练循环:

for i in range(FLAGS.max_steps):
    start = time.time()
    sess.run([train],feed_dict={handle:train_handle})
    if i%10 == False:
        summary_op,entropy,acc,expected,output = sess.run([merged,loss,accuracy,Y,tf.argmax(logit,1)],feed_dict={handle:train_handle})
        summary_op,_,_ = sess.run([merged,loss,accuracy],feed_dict={handle:test_handle})

培训操作:

W = { 'tc1': [64,3], 'tc2':[128,3], 'tc3':[256,5], 'tc4': [128, 2],
              'fc1': [64,3], 'fc2':[128,3], 'fc3':[256,5], 'fc4': [128, 2],
              'qc1': [64,3], 'qc2':[128,3], 'qc3':[256,5], 'qc4': [128, 2],
              'tfc1': [64,(3,3)], 'tfc2':[128,(3,3)], 'tfc3':[256,(5,5)], 'tfc4': [128, (2,2)],
              'dense1': 1000, 'dense2': 100, 'dense3': 200,'dense4': 300, 'dense5': 200,
              'out' : NUM_CLASSES
            }
iter = tf.data.Iterator.from_string_handle(handle, train_dataset.output_types, train_dataset.output_shapes)
X,FREQ,QUEF,SPECG,Y = iter.get_next()
X.set_shape([FLAGS.batch_size,768,14])
FREQ.set_shape([FLAGS.batch_size,384,14])
QUEF.set_shape([FLAGS.batch_size,384,14])
SPECG.set_shape([FLAGS.batch_size,65,18,14])
logit = net.run(X,FREQ,QUEF,SPECG,W)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=Y,logits=logit))

文件net.py:

def run(X,FREQ,QUEF,SPECG,W):
    time = tf.layers.batch_normalization(X,axis=-1,training=True,trainable=True)
    freq = tf.layers.batch_normalization(FREQ,axis=-1,training=True,trainable=True)
    quef = tf.layers.batch_normalization(QUEF,axis=-1,training=True,trainable=True)
    time_freq = tf.layers.batch_normalization(SPECG,axis=-1,training=True,trainable=True)


    regularizer = tf.contrib.layers.l2_regularizer(0.1);
    #########################################################################################################
    #### TIME SUBNET
    with tf.device('/GPU:1'):
        tc1 = tf.layers.conv1d(inputs=time,filters=W['tc1'][0],kernel_size=W['tc1'][1],strides=1,padding='SAME',kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='tc1')
        trelu1 = tf.nn.relu(features=tc1,name='trelu1')
        tpool1 = tf.layers.max_pooling1d(trelu1,pool_size=2,strides=1)

        tc2 = tf.layers.conv1d(inputs=tpool1,filters=W['tc2'][0],kernel_size=W['tc2'][1],strides=1,padding='SAME',kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='tc2')
        tc3 = tf.layers.conv1d(inputs=tc2,filters=W['tc3'][0],kernel_size=W['tc3'][1],strides=1,padding='SAME',kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='tc3')
        trelu2 = tf.nn.relu(tc3,name='trelu2')
        tpool2 = tf.layers.max_pooling1d(trelu2,pool_size=2,strides=1)

        tc4 = tf.layers.conv1d(inputs=tpool2,filters=W['tc4'][0],kernel_size=W['tc4'][1],strides=1,padding='SAME',kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='tc4')
        tsubnet_out = tf.nn.relu6(tc4,'trelu61')
    #########################################################################################################
    #### CEPSTRUM SUBNET (QUEFRENCIAL)
        qc1 = tf.layers.conv1d(inputs=quef,filters=W['qc1'][0],kernel_size=W['qc1'][1],strides=1,padding='SAME',kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='qc1')
        qrelu1 = tf.nn.relu(features=qc1,name='qrelu1')
        qpool1 = tf.layers.max_pooling1d(qrelu1,pool_size=2,strides=1)

        qc2 = tf.layers.conv1d(inputs=qpool1,filters=W['qc2'][0],kernel_size=W['qc2'][1],padding='SAME',strides=1,kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='qc2')
        qc3 = tf.layers.conv1d(inputs=qc2,filters=W['qc3'][0],kernel_size=W['qc3'][1],padding='SAME',strides=1,kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='qc3')
        qrelu2 = tf.nn.relu(qc3,name='qrelu2')
        qpool2 = tf.layers.max_pooling1d(qrelu2,pool_size=2,strides=1)

        qc4 = tf.layers.conv1d(inputs=qpool2,filters=W['qc4'][0],kernel_size=W['qc4'][1],padding='SAME',strides=1,kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='qc4')
        qsubnet_out = tf.nn.relu6(qc4,'qrelu61')
    #########################################################################################################
    #FREQ SUBNET  
    with tf.device('/GPU:1'):
        fc1 = tf.layers.conv1d(inputs=freq,filters=W['fc1'][0],kernel_size=W['fc1'][1],padding='SAME',strides=1,kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='fc1')
        frelu1 = tf.nn.relu(features=fc1,name='trelu1')
        fpool1 = tf.layers.max_pooling1d(frelu1,pool_size=2,strides=1)

        fc2 = tf.layers.conv1d(inputs=fpool1,filters=W['fc2'][0],kernel_size=W['fc2'][1],padding='SAME',strides=1,kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='fc2')
        fc3 = tf.layers.conv1d(inputs=fc2,filters=W['fc3'][0],kernel_size=W['fc3'][1],padding='SAME',strides=1,kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='fc3')
        frelu2 = tf.nn.relu(fc3,name='frelu2')
        fpool2 = tf.layers.max_pooling1d(frelu2,pool_size=2,strides=1)

        fc4 = tf.layers.conv1d(inputs=fpool2,filters=W['fc4'][0],kernel_size=W['fc4'][1],padding='SAME',strides=1,kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='fc4')
        fsubnet_out = tf.nn.relu6(fc4,'frelu61')


    ########################################################################################################
    ## TIME/FREQ SUBNET
    with tf.device('/GPU:0'):
        tfc1 = tf.layers.conv2d(inputs=time_freq,filters=W['tfc1'][0],kernel_size=W['tfc1'][1],padding='SAME', strides=1,kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='tfc1')
        tfrelu1 = tf.nn.relu(tfc1)
        tfpool1 = tf.layers.max_pooling2d(tfrelu1,pool_size=[2, 2],strides=[1, 1])

        tfc2 = tf.layers.conv2d(inputs=tfpool1,filters=W['tfc2'][0],kernel_size=W['tfc2'][1],padding='SAME', strides=1,kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='tfc2')
        tfc3 = tf.layers.conv2d(inputs=tfc2,filters=W['tfc3'][0],kernel_size=W['tfc3'][1],padding='SAME', strides=1,kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='tfc3')
        tfrelu2 = tf.nn.relu(tfc3)
        tfpool2 = tf.layers.max_pooling2d(tfrelu2,pool_size=[2, 2], strides=[1, 1])

        tfc4 = tf.layers.conv2d(inputs=tfpool2,filters=W['tfc4'][0],kernel_size=W['tfc4'][1],padding='SAME', strides=1,kernel_initializer=tf.initializers.random_normal,kernel_regularizer=regularizer,name='tfc4')
        tfsubnet_out = tf.nn.relu6(tfc4,'tfrelu61')
        ########################################################################################################
       ##Flatten subnet outputs
        tsubnet_out = tf.layers.flatten(tsubnet_out)
        fsubnet_out = tf.layers.flatten(fsubnet_out)
        tfsubnet_out = tf.layers.flatten(tfsubnet_out)
        qsubnet_out = tf.layers.flatten(qsubnet_out)

        #Final subnet computation
        input_final = tf.concat((tsubnet_out,fsubnet_out,qsubnet_out,tfsubnet_out),1)
        dense1 = tf.layers.dense(input_final,W['dense1'],tf.nn.relu, kernel_initializer=tf.initializers.random_normal,name='dense1')
        dense2 = tf.layers.dense(dense1,W['dense2'],tf.nn.relu, kernel_initializer=tf.initializers.random_normal,name='dense2')
        dense3 = tf.layers.dense(dense2,W['dense3'],tf.nn.relu, kernel_initializer=tf.initializers.random_normal,name='dense3')
        dense4 = tf.layers.dense(dense3,W['dense4'],tf.nn.relu, kernel_initializer=tf.initializers.random_normal,name='dense4')
        dense5 = tf.layers.dense(dense4,W['dense5'],tf.nn.relu, kernel_initializer=tf.initializers.random_normal,name='dense5')
        out = tf.layers.dense(dense5,W['out'],tf.nn.relu, name='out')
    return out

1 个答案:

答案 0 :(得分:0)

最后几天后,我已经能够找到问题所在。最后,与我提交的代码无关。但这与Tensorflow数据集的创建有关。从批处理开始,如果数据集的长度不能被批处理大小整除。标记drop_remainder为True。

我不会删除这个问题,因为我相信这是将来会有更多人遇到的问题,而且来源不易确定。