训练时一批的输出是相同的

时间：2020-03-26 05:36:09

标签： python tensorflow deep-learning

我使用alexnet work和LSTM进行动作识别，LSTM的输入是alexnet的最后一个conv层的输出，但是一开始，训练时一批的输出是相同的，最后，预测都是0，我不在哪里。

train step 66: 5.6914
[92 58 83 55 95 80 46 45  2 79 67 99 68  4 12  7 61 44 83 12]
[47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47]
train step 67: 5.99625
[95 56 94  9 34 90 58 79 68 25 27  0 19 76 15 60 52 15 24 81]
[47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47]
train step 68: 6.31583
[76 70 59 57 35  6 37 93 17 31 17 38 59 35 34 59 52  9 91 46]
[56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56]
train step 69: 7.09382
[39 47 20 12 96 64 93 36  0 27 76 54 11 15 25 13 62 71 23 49]
[56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56]
train step:69 0
train step 70: 6.44368
[49 12 58 24  1 24 43 41 50  9  5  5 81 85 94  1 50 96 14 28]
[12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12]
train step 71: 5.61211
[ 37  50  14  99 100  47  62  96  76  30  61  26  23  42   3  95  56  44
  67  87]
[12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12]

我做了什么：

更改学习率
更改批次大小
添加隐藏层神经元我的网络模型太简单了吗？我的数据集是UCF101。这是我的代码：
Alexnet

import tensorflow as tf

def variable_with_weight_loss(shape,stddev,wl):
    var=tf.Variable(tf.truncated_normal(shape,stddev=stddev))
    if wl is not None:
        weight_loss=tf.multiply(tf.nn.l2_loss(var),wl,name='weight_loss')
        tf.add_to_collection('losses',weight_loss)
    return var

def print_activations(t):
    print(t.op.name, '', t.get_shape().as_list())

def dropout(x, keep_prob, name=None):
    return tf.nn.dropout(x, keep_prob, name)

def LRN_layer(x, R, alpha, beta, name, bias=1.0):
    return tf.nn.local_response_normalization(x, depth_radius=R, alpha=alpha, beta=beta, name=name, bias=bias)



def alexnet(images):
    parameters = []

    with tf.name_scope('conv1') as scope:
        kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 64], dtype = tf.float32, stddev = 1e-1), name = 'weights')
        conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding = 'SAME')
        biases = tf.Variable(tf.constant(0.0, shape = [64], dtype = tf.float32), trainable = True, name = 'biases')
        bias = tf.nn.bias_add(conv, biases)
        conv1 = tf.nn.relu(bias, name = scope)
        print_activations(conv1)
        parameters += [kernel, biases]

        lrn1 = tf.nn.lrn(conv1, 2, bias = 1.0, alpha = 0.001 / 9, beta = 0.75, name = 'lrn1')
        pool1 = tf.nn.max_pool(lrn1, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool1')
        print_activations(pool1)

    with tf.name_scope('conv2') as scope:
        kernel = tf.Variable(tf.truncated_normal([5, 5, 64, 192], dtype = tf.float32, stddev = 1e-1), name = 'weights')
        conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding = 'SAME')
        biases = tf.Variable(tf.constant(0.0, shape = [192], dtype = tf.float32), trainable = True, name = 'biases')
        bias = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(bias, name = scope)
        parameters += [kernel, biases]
        print_activations(conv2)

        lrn2 = tf.nn.lrn(conv2, 2, bias = 1.0, alpha = 0.001 / 9, beta = 0.75, name = 'lrn2')
        pool2 = tf.nn.max_pool(lrn2, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool2')
        print_activations(pool2)

    with tf.name_scope('conv3') as scope:
        kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 384], dtype = tf.float32, stddev = 1e-1), name = 'weights')
        conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding = 'SAME')
        biases = tf.Variable(tf.constant(0.0, shape = [384], dtype = tf.float32), trainable = True, name = 'biases')
        bias = tf.nn.bias_add(conv, biases)
        conv3 = tf.nn.relu(bias, name = scope)
        parameters += [kernel, biases]
        print_activations(conv3)

    with tf.name_scope('conv4') as scope:
        kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 256], dtype = tf.float32, stddev = 1e-1), name = 'weights')
        conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding = 'SAME')
        biases = tf.Variable(tf.constant(0.0, shape = [256], dtype = tf.float32), trainable = True, name = 'biases')
        bias = tf.nn.bias_add(conv, biases)
        conv4 = tf.nn.relu(bias, name = scope)
        parameters += [kernel, biases]
        print_activations(conv4)

    with tf.name_scope('conv5') as scope:
        kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype = tf.float32, stddev = 1e-1), name = 'weights')
        conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding = 'SAME')
        biases = tf.Variable(tf.constant(0.0, shape = [256], dtype = tf.float32), trainable = True, name = 'biases')
        bias = tf.nn.bias_add(conv, biases)
        conv5 = tf.nn.relu(bias, name = scope)
        parameters += [kernel, biases]
        print_activations(conv5)

        pool5 = tf.nn.max_pool(conv5, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool5')
        print_activations(pool5)

    ##FC   
    with tf.name_scope('FC1') as scope:
        reshape=tf.layers.flatten(pool5)
        '''
        dim=reshape.get_shape()[1].value
        weight=variable_with_weight_loss(shape=[dim,4096],stddev=0.01,wl=0.004)
        biases=tf.Variable(tf.constant(0.0,shape=[4096]),dtype=tf.float32,trainable=True)
        FC1=tf.nn.relu(tf.matmul(reshape,weight)+biases)
        FC1 = tf.nn.dropout(FC1, 0.8)
        print_activations(FC1)

    with tf.name_scope('FC2') as scope:

        weight=variable_with_weight_loss(shape=[4096,2048],stddev=0.001,wl=0.004)
        biases=tf.Variable(tf.constant(0.0,shape=[2048]),dtype=tf.float32,trainable=True)
        FC2=tf.nn.relu(tf.matmul(FC1,weight)+biases)
        FC2 = tf.nn.dropout(FC2, 0.8)
        print_activations(FC2)

    with tf.name_scope('FC3') as scope:

        weight=variable_with_weight_loss(shape=[4096,2048],stddev=0.001,wl=0.004)
        biases=tf.Variable(tf.constant(0.0,shape=[2048]),dtype=tf.float32,trainable=True)
        FC3=tf.nn.relu(tf.matmul(FC2,weight)+biases)
        print_activations(FC3)
    '''
    return reshape, parameters

LSTM和培训

lr = 0.01
class_num = 101
batch_size =20
num_layers = 3
n_hidden_units=1024  # hidden units
n_steps = 15
n_inputs=2048


X = tf.placeholder(tf.float32, shape=[None, 224, 224, 3], name='input')
Y = tf.placeholder(tf.float32, shape=[None, class_num], name='label')

alexnet_output,_ = alexnet(X)
print(alexnet_output.shape)
dim=alexnet_output.get_shape()[1].value
output  = tf.reshape(alexnet_output ,[-1,n_steps,dim])
print(output.shape)

def unit_lstm():
    # define LSTM_cell
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden_units, forget_bias=2.0, state_is_tuple=True)
    # dropout layer
    lstm_cell = tf.contrib.rnn.DropoutWrapper(cell=lstm_cell, input_keep_prob=1.0, output_keep_prob=1.0)
    return lstm_cell
# MultiRNNCell 
mlstm_cell = tf.contrib.rnn.MultiRNNCell([unit_lstm() for i in range(num_layers)], state_is_tuple=True)

init_state = mlstm_cell.zero_state(batch_size, dtype=tf.float32)
outputs, state = tf.nn.dynamic_rnn(mlstm_cell, inputs=output,initial_state=init_state, time_major=False)
h_state = outputs[:,-1,:]
print(h_state.shape)

W1 = tf.get_variable("W1", shape=[n_hidden_units, 101], initializer=tf.contrib.layers.xavier_initializer())
bias1 = tf.get_variable("bias1", shape=[101], initializer=tf.contrib.layers.xavier_initializer())
y_pre = tf.matmul(h_state, W1) + bias1
print(y_pre.shape)

cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y,logits=y_pre ))
train_op = tf.train.AdamOptimizer(lr).minimize(cross_entropy)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y_pre, 1), tf.argmax(Y, 1)), dtype=tf.float32))

[video_names,video_labels] = get_train_video(batch_size)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    for i in range(100):
        imgs_value = []
        [names,labels]=sess.run([video_names,video_labels])
        for video in names:
            video = video.decode()
            image_value = get_img(video)
            imgs_value.append(image_value)
        videos_values = np.array(sum(imgs_value,[]))
        '''
        for m in range(len(videos_values)):
            plt.imshow(videos_values[m])
            plt.show()
        print(names)

        '''
        loss,op,train_accuracy=sess.run([cross_entropy,train_op,accuracy],feed_dict={X:videos_values,Y:labels})
        print("train step %d: %g"%(i,loss))
        if (i+1)%10==0:
            print("train step:%d %g"%(i,train_accuracy))

uitts

import tensorflow as tf
import os
import numpy as np
import cv2
from PIL import Image
import matplotlib.pyplot as plt 
from skimage import io


#get all videos and labels
def get_train_video(batch_size):
    train_video = []
    train_label = []
    train_filepath = 'F:/data/res_lstm/ucfTrainTestlist'
    imgs_path = r'F:/data/res_lstm/img'
    for i in range(3):
        with open(os.path.join(train_filepath,'trainlist0'+str(i+1)+'.txt')) as f:
            files = f.readlines()
            for line in files:
                video_name = line.split(' ')[0]
                video_label = int(line.split(' ')[1])-1
                train_video.append(imgs_path+'/'+video_name)
                train_label.append(video_label)
    train_label = tf.one_hot(np.array(train_label),101)
    [data,label] = tf.train.slice_input_producer([train_video,train_label],num_epochs=None,shuffle = True)
    [video_names,video_labels] = tf.train.batch([data,label],batch_size=batch_size,num_threads=1,
                                             capacity=64,allow_smaller_final_batch=False)
    return video_names,video_labels


#（15，224，224，3）
def get_img(img_path):
    img_value = []
    imgs = os.listdir(img_path)
    imgs.sort(key=lambda x:int(x[:-4]))
    for img in imgs:
        picture = io.imread(os.path.join(img_path,img)) 
        normal_value =picture /255.0 
        '''
        plt.imshow(normal_value)
        plt.show()
        '''
        img_value.append(normal_value)
    return img_value

0 个答案:

没有答案