我正在Tensorflow和Keras中实现MNIST的CNN。因此,我试图在两个平台上实现相同的网络,只是为了进行比较。但是,我得到了一个非常奇怪的结果:Keras代码确实比TF代码更快。我知道Keras只是包装纸,所以我无法真正解释它。
这是我的keras代码:
td = td/255.
td = td.reshape(td.shape[0], 28, 28, 1)
testd = testd/255.0
testd=testd.reshape(testd.shape[0], 28, 28, 1)
print(testd.shape)
# Define the model
model = tf.keras.models.Sequential()
model.add(layers.Convolution2D(32, kernel_size=(5,5), strides=(1, 1), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D(4, 4))
model.add(layers.Convolution2D(32, (5, 5), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
model.compile(optimizer='Adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
checkpointer = ModelCheckpoint(filepath='./checkpoints/weights.hdf5', verbose=1, save_best_only=True)
# Stop training when there aro no changes or improvements in the loss function
early = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto', baseline=None)
# Train the model
start=time.time()
history = model.fit(td, tl,batch_size=32, epochs=30,validation_split=0.2,callbacks=[TrainValTensorBoard(write_graph=False), checkpointer, early])
stop=time.time()
这是我的张量流等效项:
x = tf.placeholder(tf.float32, [None, 28, 28, 1], name='Input')
y_ = tf.placeholder(tf.float32, [None, 10], name='GroundTruth')
conv1 = tf.layers.conv2d(x, 32, 5, activation=tf.nn.relu,padding='same',name='Conv1')
max1 = tf.layers.max_pooling2d(conv1,4,4,name='Max1')
conv2 = tf.layers.conv2d(max1,32,5,activation=tf.nn.relu,padding='same',name='Conv2')
max2 = tf.layers.max_pooling2d(conv2,2,2,name='Max2')
dim2flaten=max2.shape[1]*max2.shape[2]*max2.shape[3]
flatten = tf.reshape(max2,[-1,dim2flaten],name='Flat')
fcon = tf.layers.dense(flatten,64,activation=tf.nn.relu,name='Fully')
y = tf.layers.dense(fcon,10,activation=tf.nn.softmax,name='Soft')
# print(max1.shape)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_*tf.log(y), reduction_indices=[1]))
RATE = 0.1
STEPS = 100
#training = tf.train.GradientDescentOptimizer(RATE).minimize(cross_entropy)
training =tf.train.AdamOptimizer(0.01).minimize(cross_entropy)
init = tf.global_variables_initializer()
correct_prediction = tf.equal(tf.arg_max(y, 1), tf.arg_max(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()
print(Batch_X.shape)
s=np.arange(Batch_X.shape[0])
np.random.shuffle(s)
g=Batch_X[s]
print(g.shape)
print(s)
s = np.arange(Batch_X.shape[0])
batch_size=32
total_batch=int(len(Batch_Y)/batch_size)
with tf.Session() as sess:
start = time.time()
sess.run(init)
print(y.graph == tf.get_default_graph())
tf.summary.scalar("loss", cross_entropy)
tf.summary.scalar("accuracy", accuracy)
merged_summary_op = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(logs_path+'/train', graph=tf.get_default_graph())
test_writer = tf.summary.FileWriter(logs_path+'/test', graph=tf.get_default_graph())
# Run Steps times
for i in range(STEPS+1):
np.random.shuffle(s)
for j in range(total_batch):
bx = Batch_X[s]
by = Batch_Y[s]
bx = bx[32*j:32*(j+1), :, :, :]
by = by[32*j:32*(j+1), :]
stop = time.time()
print(stop - start)
summary, train=sess.run([merged_summary_op,training], feed_dict={x: bx, y_: by})
if (i % 10) == 0:
print('Training Step:' + str(i) + ' Accuracy = ' + str(
sess.run(accuracy, feed_dict={x: bx, y_: by})) + ' Loss = ' + str(
sess.run(cross_entropy, {x: bx, y_: by})))
train_writer.add_summary(summary, i)
summary,val_acc=sess.run([merged_summary_op,accuracy],feed_dict={x: X_Test, y_:Y_Test})
print('Val accuracy = '+str(val_acc))
test_writer.add_summary(summary,i)
有人知道我在TF中做错了什么吗,它是如此之慢?