我在cs231n类后面的张量流中实现了CNN。我想每次都重现相同的结果,所以我已经将种子设置在需要的地方,但我的结果仍然不稳定。我不知道是不是因为我正在使用的优化器。以下是我写的代码。请仔细阅读,我们将不胜感激。
import numpy as np
def train_network(lr, ksz, kst, plsz, plst, bs):
ksz = np.int(ksz)
kst = np.int(kst)
plsz = np.int(plsz)
plst = np.int(plst)
tf.reset_default_graph()
random.seed(0)
tf.set_random_seed(0)
np.random.seed(0)
# tf.set_random_seed(1234)
with tf.Graph().as_default():
tf.set_random_seed(0)
with tf.Session() as sess:
X = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)
# y_out = my_model(X,y,is_training,ksz=np.int(ksz),kst=np.int(kst),plsz=np.int(plsz),plst=np.int(plst))
# Conv-Relu-BN
conv1act = tf.contrib.layers.conv2d(inputs=X, num_outputs=32, padding='same', kernel_size=ksz, stride=kst,
activation_fn=tf.nn.relu,
weights_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0),
biases_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0))
bn1act = tf.contrib.layers.batch_norm(inputs=conv1act, is_training=is_training)
# Conv-Relu-BN
conv2act = tf.contrib.layers.conv2d(inputs=bn1act, num_outputs=64, padding='same', kernel_size=ksz,
stride=kst, activation_fn=tf.nn.relu,
weights_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0),
biases_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0))
bn2act = tf.contrib.layers.batch_norm(inputs=conv2act, is_training=is_training)
# Conv-Relu-BN
c3 = tf.contrib.layers.conv2d(inputs=bn2act, num_outputs=128, padding='same', kernel_size=ksz, stride=kst,
activation_fn=tf.nn.relu,
weights_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0),
biases_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0))
b3 = tf.contrib.layers.batch_norm(inputs=c3, is_training=is_training)
# Conv-Relu-BN
c4 = tf.contrib.layers.conv2d(inputs=b3, num_outputs=256, padding='same', kernel_size=ksz, stride=kst,
activation_fn=tf.nn.relu,
weights_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0),
biases_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0))
b4 = tf.contrib.layers.batch_norm(inputs=c4, is_training=is_training)
# Maxpool
maxpool1act = tf.contrib.layers.max_pool2d(inputs=b4, stride=plst, kernel_size=plsz)
# Flatten
Rsize = maxpool1act.get_shape().as_list()
# print(Rsize)
Rsize1 = Rsize[1] * Rsize[2] * Rsize[3]
flatten1 = tf.reshape(maxpool1act, shape=[-1, Rsize1])
# FC-Relu-BN
fc1 = tf.contrib.layers.fully_connected(inputs=flatten1, num_outputs=1024, activation_fn=tf.nn.relu,
weights_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0),
biases_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0))
bn3act = tf.contrib.layers.batch_norm(inputs=fc1, is_training=is_training)
# Output FC
y_out = tf.contrib.layers.fully_connected(inputs=bn3act, num_outputs=10, activation_fn=None,
weights_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0),
biases_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0))
mean_loss = tf.losses.softmax_cross_entropy(logits=y_out, onehot_labels=tf.one_hot(y, 10))
# optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
optimizer = tf.train.RMSPropOptimizer(learning_rate=lr)
# batch normalization in tensorflow requires this extra dependency
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
train_step = optimizer.minimize(mean_loss)
with tf.device("/cpu:0"): # "/cpu:0" or "/gpu:0"
sess.run(tf.global_variables_initializer())
print('Training')
run_model(X, y, mean_loss, is_training, sess, y_out, mean_loss, X_train, y_train, 10, np.int(bs), 100,
train_step, True)
print('Validation')
loss, val = run_model(X, y, mean_loss, is_training, sess, y_out, mean_loss, X_val, y_val, 1, np.int(bs))
return 1 - val
运行模型功能
def run_model(X, y, mean_loss, is_training, session, predict, loss_val, Xd, yd,
epochs=1, batch_size=64, print_every=100,
training=None, plot_losses=False):
# have tensorflow compute accuracy
# print(predict)
random.seed(0)
np.random.seed(0)
correct_prediction = tf.equal(tf.argmax(predict, 1), y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# shuffle indicies
train_indicies = np.arange(Xd.shape[0])
# np.random.shuffle(train_indicies)
# print(train_indicies)
training_now = training is not None
# setting up variables we want to compute (and optimizing)
# if we have a training function, add that to things we compute
variables = [mean_loss, correct_prediction, accuracy]
if training_now:
variables[-1] = training
# counter
iter_cnt = 0
for e in range(epochs):
# keep track of losses and accuracy
correct = 0
losses = []
# make sure we iterate over the dataset once
for i in range(int(math.ceil(Xd.shape[0] / batch_size))):
# print()
# generate indicies for the batch
start_idx = (i * batch_size) % Xd.shape[0]
idx = train_indicies[start_idx:start_idx + batch_size]
# create a feed dictionary for this batch
feed_dict = {X: Xd[idx, :],
y: yd[idx],
is_training: training_now}
# get batch size
actual_batch_size = yd[idx].shape[0]
# print(actual_batch_size)
# have tensorflow compute loss and correct predictions
# and (if given) perform a training step
loss, corr, _ = session.run(variables, feed_dict=feed_dict)
# print(np.sum(corr))
# aggregate performance stats
losses.append(loss * actual_batch_size)
correct += np.sum(corr)
# print(np.sum(corr))
# print(actual_batch_size)
# print(np.sum(corr)/float(actual_batch_size))
# print every now and then
if training_now and (iter_cnt % print_every) == 0:
print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}" \
.format(iter_cnt, loss, float(np.sum(corr)) / actual_batch_size))
iter_cnt += 1
total_correct = float(correct) / Xd.shape[0]
total_loss = np.sum(losses) / Xd.shape[0]
print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}" \
.format(total_loss, total_correct, e + 1))
if plot_losses:
plt.plot(losses)
plt.grid(True)
plt.title('Epoch {} Loss'.format(e + 1))
plt.xlabel('minibatch number')
plt.ylabel('minibatch loss')
plt.show()
return total_loss, total_correct