我使用alexnet work和LSTM进行动作识别,LSTM的输入是alexnet的最后一个conv层的输出,但是一开始,训练时一批的输出是相同的,最后,预测都是0,我不在哪里。
train step 66: 5.6914
[92 58 83 55 95 80 46 45 2 79 67 99 68 4 12 7 61 44 83 12]
[47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47]
train step 67: 5.99625
[95 56 94 9 34 90 58 79 68 25 27 0 19 76 15 60 52 15 24 81]
[47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47]
train step 68: 6.31583
[76 70 59 57 35 6 37 93 17 31 17 38 59 35 34 59 52 9 91 46]
[56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56]
train step 69: 7.09382
[39 47 20 12 96 64 93 36 0 27 76 54 11 15 25 13 62 71 23 49]
[56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56]
train step:69 0
train step 70: 6.44368
[49 12 58 24 1 24 43 41 50 9 5 5 81 85 94 1 50 96 14 28]
[12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12]
train step 71: 5.61211
[ 37 50 14 99 100 47 62 96 76 30 61 26 23 42 3 95 56 44
67 87]
[12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12]
我做了什么:
import tensorflow as tf
def variable_with_weight_loss(shape,stddev,wl):
var=tf.Variable(tf.truncated_normal(shape,stddev=stddev))
if wl is not None:
weight_loss=tf.multiply(tf.nn.l2_loss(var),wl,name='weight_loss')
tf.add_to_collection('losses',weight_loss)
return var
def print_activations(t):
print(t.op.name, '', t.get_shape().as_list())
def dropout(x, keep_prob, name=None):
return tf.nn.dropout(x, keep_prob, name)
def LRN_layer(x, R, alpha, beta, name, bias=1.0):
return tf.nn.local_response_normalization(x, depth_radius=R, alpha=alpha, beta=beta, name=name, bias=bias)
def alexnet(images):
parameters = []
with tf.name_scope('conv1') as scope:
kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 64], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [64], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name = scope)
print_activations(conv1)
parameters += [kernel, biases]
lrn1 = tf.nn.lrn(conv1, 2, bias = 1.0, alpha = 0.001 / 9, beta = 0.75, name = 'lrn1')
pool1 = tf.nn.max_pool(lrn1, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool1')
print_activations(pool1)
with tf.name_scope('conv2') as scope:
kernel = tf.Variable(tf.truncated_normal([5, 5, 64, 192], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [192], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv2)
lrn2 = tf.nn.lrn(conv2, 2, bias = 1.0, alpha = 0.001 / 9, beta = 0.75, name = 'lrn2')
pool2 = tf.nn.max_pool(lrn2, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool2')
print_activations(pool2)
with tf.name_scope('conv3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 384], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [384], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv3 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv3)
with tf.name_scope('conv4') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 256], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [256], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv4 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv4)
with tf.name_scope('conv5') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [256], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv5 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv5)
pool5 = tf.nn.max_pool(conv5, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool5')
print_activations(pool5)
##FC
with tf.name_scope('FC1') as scope:
reshape=tf.layers.flatten(pool5)
'''
dim=reshape.get_shape()[1].value
weight=variable_with_weight_loss(shape=[dim,4096],stddev=0.01,wl=0.004)
biases=tf.Variable(tf.constant(0.0,shape=[4096]),dtype=tf.float32,trainable=True)
FC1=tf.nn.relu(tf.matmul(reshape,weight)+biases)
FC1 = tf.nn.dropout(FC1, 0.8)
print_activations(FC1)
with tf.name_scope('FC2') as scope:
weight=variable_with_weight_loss(shape=[4096,2048],stddev=0.001,wl=0.004)
biases=tf.Variable(tf.constant(0.0,shape=[2048]),dtype=tf.float32,trainable=True)
FC2=tf.nn.relu(tf.matmul(FC1,weight)+biases)
FC2 = tf.nn.dropout(FC2, 0.8)
print_activations(FC2)
with tf.name_scope('FC3') as scope:
weight=variable_with_weight_loss(shape=[4096,2048],stddev=0.001,wl=0.004)
biases=tf.Variable(tf.constant(0.0,shape=[2048]),dtype=tf.float32,trainable=True)
FC3=tf.nn.relu(tf.matmul(FC2,weight)+biases)
print_activations(FC3)
'''
return reshape, parameters
lr = 0.01
class_num = 101
batch_size =20
num_layers = 3
n_hidden_units=1024 # hidden units
n_steps = 15
n_inputs=2048
X = tf.placeholder(tf.float32, shape=[None, 224, 224, 3], name='input')
Y = tf.placeholder(tf.float32, shape=[None, class_num], name='label')
alexnet_output,_ = alexnet(X)
print(alexnet_output.shape)
dim=alexnet_output.get_shape()[1].value
output = tf.reshape(alexnet_output ,[-1,n_steps,dim])
print(output.shape)
def unit_lstm():
# define LSTM_cell
lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden_units, forget_bias=2.0, state_is_tuple=True)
# dropout layer
lstm_cell = tf.contrib.rnn.DropoutWrapper(cell=lstm_cell, input_keep_prob=1.0, output_keep_prob=1.0)
return lstm_cell
# MultiRNNCell
mlstm_cell = tf.contrib.rnn.MultiRNNCell([unit_lstm() for i in range(num_layers)], state_is_tuple=True)
init_state = mlstm_cell.zero_state(batch_size, dtype=tf.float32)
outputs, state = tf.nn.dynamic_rnn(mlstm_cell, inputs=output,initial_state=init_state, time_major=False)
h_state = outputs[:,-1,:]
print(h_state.shape)
W1 = tf.get_variable("W1", shape=[n_hidden_units, 101], initializer=tf.contrib.layers.xavier_initializer())
bias1 = tf.get_variable("bias1", shape=[101], initializer=tf.contrib.layers.xavier_initializer())
y_pre = tf.matmul(h_state, W1) + bias1
print(y_pre.shape)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y,logits=y_pre ))
train_op = tf.train.AdamOptimizer(lr).minimize(cross_entropy)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y_pre, 1), tf.argmax(Y, 1)), dtype=tf.float32))
[video_names,video_labels] = get_train_video(batch_size)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(100):
imgs_value = []
[names,labels]=sess.run([video_names,video_labels])
for video in names:
video = video.decode()
image_value = get_img(video)
imgs_value.append(image_value)
videos_values = np.array(sum(imgs_value,[]))
'''
for m in range(len(videos_values)):
plt.imshow(videos_values[m])
plt.show()
print(names)
'''
loss,op,train_accuracy=sess.run([cross_entropy,train_op,accuracy],feed_dict={X:videos_values,Y:labels})
print("train step %d: %g"%(i,loss))
if (i+1)%10==0:
print("train step:%d %g"%(i,train_accuracy))
import tensorflow as tf
import os
import numpy as np
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from skimage import io
#get all videos and labels
def get_train_video(batch_size):
train_video = []
train_label = []
train_filepath = 'F:/data/res_lstm/ucfTrainTestlist'
imgs_path = r'F:/data/res_lstm/img'
for i in range(3):
with open(os.path.join(train_filepath,'trainlist0'+str(i+1)+'.txt')) as f:
files = f.readlines()
for line in files:
video_name = line.split(' ')[0]
video_label = int(line.split(' ')[1])-1
train_video.append(imgs_path+'/'+video_name)
train_label.append(video_label)
train_label = tf.one_hot(np.array(train_label),101)
[data,label] = tf.train.slice_input_producer([train_video,train_label],num_epochs=None,shuffle = True)
[video_names,video_labels] = tf.train.batch([data,label],batch_size=batch_size,num_threads=1,
capacity=64,allow_smaller_final_batch=False)
return video_names,video_labels
#(15,224,224,3)
def get_img(img_path):
img_value = []
imgs = os.listdir(img_path)
imgs.sort(key=lambda x:int(x[:-4]))
for img in imgs:
picture = io.imread(os.path.join(img_path,img))
normal_value =picture /255.0
'''
plt.imshow(normal_value)
plt.show()
'''
img_value.append(normal_value)
return img_value