我目前正在将代码从Keras更改为Tensorflow,以便使用Tensorflow 1.10.0中的量化训练的新功能。但是,我发现在使用Adam优化器时,在Keras和Tensorflow中的训练过程显示出很大的差异。
这里是练习的代码,其目的相同,目的是以Tensorflow和Keras方式训练“ sin(10x)”功能。
from keras.layers import Input, Dense, BatchNormalization
from keras.models import Model
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import keras.backend as K
KERAS = 'keras'
TENSORFLOW = 'tensorflow'
def create_model():
ipt = Input([1])
m = Dense(1000, activation='relu')(ipt)
m = BatchNormalization()(m)
m = Dense(1000, activation='relu')(m)
m = BatchNormalization()(m)
m = Dense(1)(m)
return Model(ipt, m)
valX = np.expand_dims(np.linspace(-1, 1, 10000), 1)
valY = np.sin(valX * 10)
valY_ = {}
for phase in (KERAS, TENSORFLOW):
sess = tf.Session()
sess.as_default()
K.set_session(sess)
model = create_model()
if phase is KERAS:
model.compile('adam', 'mean_squared_error')
else:
tensor_y_gt = tf.placeholder(dtype=tf.float32, shape=model.output.get_shape().as_list())
mse = tf.losses.mean_squared_error(model.output, tensor_y_gt)
training_steps = tf.train.AdamOptimizer().minimize(mse)
sess.run(tf.global_variables_initializer())
for step in range(2000):
X = np.random.uniform(-1, 1, [256, 1])
Y = np.sin(X * 10)
if phase is KERAS:
loss = model.train_on_batch(X, Y)
else:
loss, _ = sess.run([mse, training_steps], feed_dict={model.input: X, tensor_y_gt: Y})
if step % 100 == 0:
print('%s, step#%d, loss=%.5f' % (phase, step, loss))
valY_[phase] = model.predict(valX)[:, 0]
sess.close()
valX = valX[:, 0]
valY = valY[:, 0]
plt.plot(valX, valY, 'r--', label='sin(10x)')
plt.plot(valX, valY_[KERAS], 'g-', label=KERAS)
plt.plot(valX, valY_[TENSORFLOW], 'b-', label=TENSORFLOW)
plt.legend(loc='best', ncol=1)
plt.show()
您可以看到两者之间的区别: 罪恶情节(10x)
环境:
有人知道吗?