我尝试在Tensorflow中学习从0到200的数字的身份函数,它具有一个简单的完全连接的网络,该网络由一个具有512个神经元的隐藏层和一个具有200个神经元的输出层组成(虽然杀伤力强,但仅用于测试)。使用亚当和softmax交叉熵作为损失函数,损失从刚初始化的权重的16个迅速减少到约500步(约200个示例),经过约500个步骤,并停留在那里或仅非常缓慢地移动。
培训过程:
即使模型过于复杂,通常也不会仅仅拟合数据吗?我没有进行标准化,但是我认为这没有太大的不同,不是吗? 这种行为是否有潜在原因,还是我的实现有错误(tensorflow 1.9)?
import tensorflow as tf
import numpy as np
from random import randint
def model(features, labels, mode, params):
hidden = tf.layers.dense(features, 512, activation=tf.nn.relu)
logits = tf.layers.dense(hidden, 201, activation=tf.nn.relu)
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode, predictions={'logits': logits})
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)
mean_loss = tf.reduce_mean(loss)
learning_rate = params['learning_rate']
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(mean_loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=mean_loss, train_op=train_op)
def train_input_fn(features, labels, batch_size):
dataset = tf.data.Dataset.from_tensor_slices((features, labels))
if batch_size is None:
dataset = dataset.batch(len(labels))
else:
dataset = dataset.shuffle(1000).repeat().batch(batch_size)
return dataset
我像这样开始训练和预测:
if __name__ == '__main__':
feat = [[float(i)] for i in range(200)]
lab = [int(i) for sublist in feat for i in sublist]
learning_rate = 0.0001
classifier = tf.estimator.Estimator(
model_fn=model,
model_dir='/home/test_dir/{}'.format(learning_rate),
params={'learning_rate': learning_rate}
)
classifier.train(input_fn=lambda: train_input_fn(feat, lab, 200), max_steps=800000)
# the rest is just testing the prediction process
input = [[float(randint(0, 200))] for i in range(10)]
input_labels = np.array([int(i) for sublist in input for i in sublist])
predictions = classifier.predict(input_fn=lambda: train_input_fn(input, input_labels, None))
logits = []
for p in predictions:
logits.append(p['logits'])
tensor_logits = tf.convert_to_tensor(np.array(logits))
tensor_label_ids = tf.convert_to_tensor(input_labels)
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=tensor_logits, labels=tensor_label_ids)
mean_loss = tf.reduce_mean(loss)
preds = tf.argmax(tensor_logits, axis=-1)
with tf.Session() as sess:
print(input_labels)
print(sess.run(preds))
print(sess.run(loss))
print(sess.run(mean_loss))