我导入了Mnist数据集并对其进行了整形,但是当训练模型时,损失会先增加然后再降低……我想知道出什么问题了,这与“重塑或激活”功能有关?
我将激活功能从Relu更改为Sigmoid,但仍然相同 我希望损失首先应该是最高的,然后应该减少
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import utils
# Load data.
mnist = tf.keras.datasets.mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data(r'C:\Users\Ati\Downloads\mnist.npz')
print("Original X shape", X_train.shape)
print("Original Y shape", Y_train.shape)
# Reshape data.
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255 # Original data is uint8 (0-255). Scale it to range [0,1].
X_test /= 255
print("Training X matrix shape", X_train.shape)
print("Testing X matrix shape", X_test.shape)
print (Y_train[0])
# Represent the targets as one-hot vectors: e.g. 2 -> [0, 0, 1, 0, 0, 0, 0, 0, 0].
nb_classes = 10
Y_train = utils.to_categorical(Y_train, nb_classes)
Y_test = utils.to_categorical(Y_test, nb_classes)
print("Training Y matrix shape", Y_train.shape)
print("Testing Y matrix shape", Y_test.shape)
class NeuralNetwork:
def add_layer(inputs,in_size,out_size,activation_function=None):
Weights= tf.Variable(tf.random_normal([in_size,out_size]))
biases= tf.Variable(tf.zeros([out_size])+0.1)
Wx_plus_b = tf.matmul(inputs,Weights)+biases
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b)
return outputs
xs = tf.placeholder(dtype=tf.float32, shape=[None,784])
ys = tf.placeholder(dtype=tf.float32, shape=[None,10])
l1=NeuralNetwork.add_layer(xs ,784,10,activation_function=tf.nn.sigmoid)
prediction= NeuralNetwork.add_layer(l1,10,1,activation_function=None)
loss=tf.reduce_mean(tf.reduce_sum(tf.square(ys - prediction),
reduction_indices=[1]))
train_step=tf.train.GradientDescentOptimizer(0.1).minimize(loss)
init=tf.global_variables_initializer()
sess=tf.Session()
sess.run(init)
for i in range(1000):
sess.run(train_step, feed_dict={xs:X_train, ys:Y_train})
sess.run(train_step, feed_dict={xs:X_test, ys:Y_test})
if i % 50==0: #print loss every 50 step
print("loss after training =",sess.run(loss,feed_dict={xs:X_train,ys:Y_train}))
############################
this is the output:
loss after training = 1833.7517
loss after training = 5131.908
loss after training = 5125.8164
loss after training = 5080.7856
loss after training = 4986.3594
loss after training = 4790.469
loss after training = 4498.9233
loss after training = 4173.5728
loss after training = 3850.2773
loss after training = 3391.6511
loss after training = 2610.5771
loss after training = 1402.5613
loss after training = 210.35393
loss after training = 36.65735
loss after training = 20.012854
loss after training = 14.49133
loss after training = 11.960718
loss after training = 10.629963
loss after training = 9.785735
loss after training = 9.206442