我的Tensorflow损失没有变化。这是我的代码。
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import math
import os
import nltk
import random
import tflearn
batch_size = 100
start = 0
end = batch_size
learning_rate = 0.01
num_classes = 8
path1 = "/home/indy/Downloads/aclImdb/train/pos"
path2 = "/home/indy/Downloads/aclImdb/train/neg"
path3 = "/home/indy/Downloads/aclImdb/test/pos"
path4 = "/home/indy/Downloads/aclImdb/test/neg"
time_steps = 300
embedding = 50
step = 1
def get_embedding():
gfile_path = os.path.join("/home/indy/Downloads/glove.6B", "glove.6B.50d.txt")
f = open(gfile_path,'r')
embeddings = {}
for line in f:
sp_value = line.split()
word = sp_value[0]
embedding = [float(value) for value in sp_value[1:]]
assert len(embedding) == 50
embeddings[word] = embedding
return embeddings
ebd = get_embedding()
def get_y(file_path):
y_value = file_path.split('_')
y_value = y_value[1].split('.')
if y_value[0] == '1':
return 0
elif y_value[0] == '2':
return 1
elif y_value[0] == '3':
return 2
elif y_value[0] == '4':
return 3
elif y_value[0] == '7':
return 4
elif y_value[0] == '8':
return 5
elif y_value[0] == '9':
return 6
elif y_value[0] == '10':
return 7
def get_x(file_path):
x_value = open(file_path,'r')
for line in x_value:
x_value = line.replace("<br /><br />","")
x_value = x_value.lower()
x_value = nltk.word_tokenize(x_value.decode('utf-8'))
padding = 300 - len(x_value)
if padding > 0:
p_value = ['pad' for i in range(padding)]
x_value = np.concatenate((x_value,p_value))
if padding < 0:
x_value = x_value[:300]
for i in x_value:
if ebd.get(i) == None:
ebd[i] = [float(np.random.normal(0.0,1.0)) for j in range(50)]
x_value = [ebd[value] for value in x_value]
assert len(x_value) == 300
return x_value
def get_total_files(path1,path2,path3,path4):
directory1 = os.listdir(path1)
file_path1 = [os.path.join(path1,file) for file in directory1]
directory2 = os.listdir(path2)
file_path2 = [os.path.join(path2,file) for file in directory2]
directory3 = os.listdir(path3)
file_path3 = [os.path.join(path3,file) for file in directory3]
directory4 = os.listdir(path4)
file_path4 = [os.path.join(path4,file) for file in directory4]
total_files_train = np.concatenate((file_path1,file_path2))
total_files_test = np.concatenate((file_path3,file_path4))
random.shuffle(total_files_train)
random.shuffle(total_files_test)
x1 = [get_x(file) for file in total_files_train]
y1 = [get_y(file) for file in total_files_train]
x2 = [get_x(file) for file in total_files_test]
y2 = [get_y(file) for file in total_files_test]
return x1 , y1 , x2 , y2
total_files_train_x, total_files_train_y, total_files_test_x, total_files_test_y = get_total_files(path1,path2,path3,path4)
train_set_x = total_files_train_x[:10000]
validate_set_x = total_files_train_x[10000:15000]
test_set_x = total_files_test_x[0:5000]
train_set_y = total_files_train_y[:10000]
validate_set_y = total_files_train_y[10000:15000]
test_set_y = total_files_test_y[0:5000]
X = tf.placeholder(tf.float32, [None,time_steps,embedding])
Y = tf.placeholder(tf.int32, [None])
def build_nlp_model(x, _units,num_classes,num_of_filters):
x = tf.expand_dims(x,3)
with tf.variable_scope("one"):
filter_shape = [1, embedding, 1, num_of_filters]
conv_weights = tf.get_variable("conv_weights" , filter_shape, tf.float32, tf.truncated_normal_initializer(mean=0.0, stddev=1.0))
conv_biases = tf.Variable(tf.constant(0.1, shape=[num_of_filters]))
conv = tf.nn.conv2d(x, conv_weights, strides=[1,1,1,1], padding = "VALID")
normalize = conv + conv_biases
tf_normalize = tflearn.layers.normalization.batch_normalization(normalize)
relu = tf.nn.elu(tf_normalize)
pooling = tf.reduce_max(relu, reduction_indices = 3, keep_dims = True)
outputs_fed_lstm = pooling
with tf.variable_scope("two"):
filter_shape = [1, 1, 1, 1000]
conv_weights = tf.get_variable("conv_weights" , filter_shape, tf.float32, tf.truncated_normal_initializer(mean=0.0, stddev=1.0))
conv_biases = tf.Variable(tf.constant(0.1, shape=[1000]))
conv = tf.nn.conv2d(outputs_fed_lstm, conv_weights, strides=[1,1,1,1], padding = "VALID")
normalize = conv + conv_biases
tf_normalize = tflearn.layers.normalization.batch_normalization(normalize)
relu = tf.nn.elu(tf_normalize)
pooling = tf.reduce_max(relu, reduction_indices = 3, keep_dims = True)
outputs_fed_lstm = pooling
with tf.variable_scope("three"):
filter_shape = [1, 1, 1, 1000]
conv_weights = tf.get_variable("conv_weights" , filter_shape, tf.float32, tf.truncated_normal_initializer(mean=0.0, stddev=1.0))
conv_biases = tf.Variable(tf.constant(0.1, shape=[1000]))
conv = tf.nn.conv2d(outputs_fed_lstm, conv_weights, strides=[1,1,1,1], padding = "VALID")
normalize = conv + conv_biases
tf_normalize = tflearn.layers.normalization.batch_normalization(normalize)
relu = tf.nn.elu(tf_normalize)
pooling = tf.reduce_max(relu, reduction_indices = 3, keep_dims = True)
outputs_fed_lstm = pooling
with tf.variable_scope("four"):
filter_shape = [1, 1, 1, num_of_filters]
conv_weights = tf.get_variable("conv_weights" , filter_shape, tf.float32, tf.truncated_normal_initializer(mean=0.0, stddev=1.0))
conv_biases = tf.Variable(tf.constant(0.1, shape=[num_of_filters]))
conv = tf.nn.conv2d(outputs_fed_lstm, conv_weights, strides=[1,1,1,1], padding = "VALID")
normalize = conv + conv_biases
tf_normalize = tflearn.layers.normalization.batch_normalization(normalize)
relu = tf.nn.elu(tf_normalize)
pooling = tf.reduce_max(relu, reduction_indices = 3, keep_dims = True)
outputs_fed_lstm = pooling
with tf.variable_scope("five"):
filter_shape = [1, 1, 1, num_of_filters]
conv_weights = tf.get_variable("conv_weights" , filter_shape, tf.float32, tf.truncated_normal_initializer(mean=0.0, stddev=1.0))
conv_biases = tf.Variable(tf.constant(0.1, shape=[num_of_filters]))
conv = tf.nn.conv2d(outputs_fed_lstm, conv_weights, strides=[1,1,1,1], padding = "VALID")
normalize = conv + conv_biases
tf_normalize = tflearn.layers.normalization.batch_normalization(normalize)
relu = tf.nn.elu(tf_normalize)
pooling = tf.reduce_max(relu, reduction_indices = 3, keep_dims = True)
outputs_fed_lstm = pooling
x = tf.squeeze(outputs_fed_lstm, [2])
x = tf.transpose(x, [1, 0, 2])
x = tf.reshape(x, [-1, 1])
x = tf.split(0, time_steps, x)
lstm = tf.nn.rnn_cell.LSTMCell(num_units = _units)
# multi_lstm = tf.nn.rnn_cell.MultiRNNCell([lstm] * lstm_layers, state_is_tuple = True)
outputs , state = tf.nn.rnn(lstm,x, dtype = tf.float32)
weights = tf.Variable(tf.random_normal([_units,num_classes]))
biases = tf.Variable(tf.random_normal([num_classes]))
logits = tf.matmul(outputs[-1], weights) + biases
return logits
logits = build_nlp_model(X,500,num_classes,1000)
c_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits,Y)
loss = tf.reduce_mean(c_loss)
global_step = tf.Variable(0, name="global_step", trainable=False)
# decayed_learning_rate = tf.train.exponential_decay(learning_rate,0,10000,0.9)
optimizer= tf.train.AdamOptimizer(learning_rate)
minimize_loss = optimizer.minimize(loss, global_step=global_step)
with tf.variable_scope("four", reuse = True):
weights = tf.get_variable("conv_weights")
grads_and_vars = optimizer.compute_gradients(loss,[weights])
correct_predict = tf.nn.in_top_k(logits, Y, 1)
accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
for i in range(10):
for j in range(100):
x = train_set_x[start:end]
y = train_set_y[start:end]
start = end
end += batch_size
if start >= 10000:
start = 0
end = batch_size
sess.run(minimize_loss,feed_dict={X : x, Y : y})
step += 1
gr_print = sess.run([grad for grad, _ in grads_and_vars], feed_dict={X : x, Y : y})
print (gr_print)
print ("One Epoch Finished")
cost = sess.run(loss,feed_dict = {X: x,Y: y})
accu = sess.run(accuracy,feed_dict = {X: x, Y: y})
print ("Loss after one Epoch(Training) = " + "{:.6f}".format(cost) + ", Training Accuracy= " + "{:.5f}".format(accu))
q = validate_set_x[:100]
w = validate_set_y[:100]
cost = sess.run(loss,feed_dict = {X: q,Y: w})
accu = sess.run(accuracy,feed_dict = {X: q, Y: w})
在许多时代之后,我的损失依然存在。所以我认为我有消失的梯度问题,所以我应用了批量标准化,但结果没有差异。我也尝试过度拟合模型,但我得到了相同的结果。我正在使用optimizer.compute_gradients
来计算渐变。下面是不同转换层的损耗梯度的结果,以及它们的外观。以下是关于第一个转换层和第4个转换层的渐变效果。
关于第一个转换层的渐变代码:
with tf.variable_scope("one", reuse = True):
weights = tf.get_variable("conv_weights")
grads_and_vars = optimizer.compute_gradients(loss,[weights])
gr_print = sess.run([grad for grad, _ in grads_and_vars], feed_dict={X : x, Y : y})
print (gr_print)
这是我在一次迭代后得到的结果:
[array([[[[ 2.38197345e-06, -1.04135906e-04, 2.60035231e-05, ...,
-1.01550373e-04, 0.00000000e+00, 1.01060732e-06]],
[[ -1.98007251e-06, 8.13827137e-05, -8.14055747e-05, ...,
-6.40711369e-05, 0.00000000e+00, 1.05516607e-04]],
[[ 4.51127789e-06, 2.21654373e-05, -4.99439229e-05, ...,
9.87191743e-05, 0.00000000e+00, 1.70595697e-04]],
...,
[[ -4.70160239e-06, -8.67914496e-05, 2.50699850e-05, ...,
1.18909593e-04, 0.00000000e+00, 2.43308150e-05]],
[[ -1.18101923e-06, -7.71943451e-05, -3.41630148e-05, ...,
-3.28040805e-05, 0.00000000e+00, -6.01144784e-05]],
[[ -1.98778321e-06, -3.23160748e-05, -5.44797731e-05, ...,
2.23019324e-05, 0.00000000e+00, -3.29296927e-05]]]], dtype=float32)]
关于第4个转换层的渐变代码:
with tf.variable_scope("four", reuse = True):
weights = tf.get_variable("conv_weights")
grads_and_vars = optimizer.compute_gradients(loss,[weights])
gr_print = sess.run([grad for grad, _ in grads_and_vars], feed_dict={X : x, Y : y})
print (gr_print)
这是我在一次迭代后获得的:
[array([[[[ 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , -6.21198082, 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ]]]], dtype=float32)]
在第一层之后,相对于第2,第3,第4,第5个转换层的渐变看起来如上所述。但是对于在第一个转换层之后的转换层,所有渐变中有一个共同点,它们在整个梯度数组中都有一个数字,如输出中所示,它不是零。我也应用了批量规范,我仍然得到了上述结果。
我很困惑,我不知道问题在哪里?
我还有一个问题,如果我想访问pooling,output_fed_lstm等变量,我该如何访问它们?
with tf.variable_scope("one", reuse = True):
weights = tf.get_variable("conv_weights")
grads_and_vars = optimizer.compute_gradients(loss,[weights])
我知道我可以访问conv_weights之类的变量 如上所示。
with tf.variable_scope("one"):
filter_shape = [1, embedding, 1, num_of_filters]
conv_weights = tf.get_variable("conv_weights" , filter_shape, tf.float32, tf.truncated_normal_initializer(mean=0.0, stddev=1.0))
conv_biases = tf.Variable(tf.constant(0.1, shape=[num_of_filters]))
conv = tf.nn.conv2d(x, conv_weights, strides=[1,1,1,1], padding = "VALID")
normalize = conv + conv_biases
tf_normalize = tflearn.layers.normalization.batch_normalization(normalize)
relu = tf.nn.elu(tf_normalize)
pooling = tf.reduce_max(relu, reduction_indices = 3, keep_dims = True)
outputs_fed_lstm = pooling
但是如何访问pooling,outputs_fed_lstm等变量,这些变量也在“one”范围内?
答案 0 :(得分:0)
您可以使用tf.all_variables()
获取当前图表中的所有变量。这将给出所有变量的列表作为变量对象,您可以使用variable.name()
来标识变量,从而找到您要查找的内容。您还应该为所有您感兴趣的变量命名,以便于实现此目的。例如,要命名池化操作:
pooling = tf.reduce_max(relu, reduction_indices = 3, keep_dims = True, name='pooling')
就你的代码而言,我的第一个猜测是你的学习率太高,而且由于死神经元而导致不稳定。尝试降低学习率,看看是否有帮助。