我正在尝试使用TensorFlow 2.0进行MNIST分类。
我的神经网络的架构如下:
输入层有784个神经元(28 * 28)
隐藏层有512个神经元
输出层有10个神经元
隐藏层使用ReLU激活功能,输出层有10个神经元。
我这样做的代码如下:
# Load and prepare the MNIST dataset-
mnist = tf.keras.datasets.mnist
# type(mnist)
# module
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# type(X_train), type(y_train), type(X_test), type(y_test)
# (numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)
# Normalize and convert samples from integers to floating-point numbers-
X_train, X_test = X_train / 255.0, X_test / 255.0
X_train = tf.cast(X_train, dtype=tf.float32)
X_test = tf.cast(X_test, dtype=tf.float32)
y_train = tf.cast(y_train, dtype=tf.float32)
y_test = tf.cast(y_test, dtype=tf.float32)
print("\nShapes of training and testing sets are:")
print("X_train.shape = {0}, y_train.shape = {1}, X_test.shape = {2} & y_test.shape = {3}\n".format(X_train.shape, y_train.shape, X_test.shape, y_test.shape))
# Shapes of training and testing sets are:
# X_train.shape = (60000, 28, 28), y_train.shape = (60000,), X_test.shape = (10000, 28, 28) & y_test.shape = (10000,)
# Reshape training and testing sets-
X_train = tf.reshape(X_train, shape=(X_train.shape[0], 784))
X_test = tf.reshape(X_test, shape=(X_test.shape[0], 784))
print("\nDimensions of training and testing sets AFTER reshaping are:")
print("X_train.shape = {0} and X_test.shape = {1}\n".format(X_train.shape, X_test.shape))
# Dimensions of training and testing sets AFTER reshaping are:
# X_train.shape = (60000, 784) and X_test.shape = (10000, 784)
def relu(x):
'''
Function to calculate ReLU for
given 'x'
'''
# return np.maximum(x, 0)
return tf.cast(tf.math.maximum(x, 0), dtype = tf.float32)
def relu_derivative(x):
'''
Function to calculate derivative
of ReLU
'''
# return np.where(x <= 0, 0, 1)
# return tf.where(x <=0, 0, 1)
return tf.cast(tf.where(x <=0, 0, 1), dtype=tf.float32)
def softmax_stable(z):
'''
Function to compute softmax activation function.
Numerically stable
'''
# First cast 'z' to floating type-
z = tf.cast(z, dtype = tf.float32)
# Get largest element in 'z'-
largest = tf.math.reduce_max(z)
# Raise each value to exp('z - largest')-
z_exp = tf.math.exp(z - largest)
# Compute softmax activation values-
s = z_exp / tf.math.reduce_sum(z_exp)
return s
def initialize_parameters():
W1 = tf.Variable(tf.random.uniform(shape=(784, 512), minval=0, maxval=1))
b1 = tf.Variable(tf.random.uniform(shape = (1, 512), minval = 0, maxval=1))
W2 = tf.Variable(tf.random.uniform(shape = (512, 10), minval=0, maxval=1))
b2 = tf.Variable(tf.random.uniform(shape = (1, 10), minval=0, maxval=1))
return {'W1': W1, 'W2': W2,
'b1': b1, 'b2': b2}
def forward_propagation(parameters, X, Y):
W1 = parameters['W1']
W2 = parameters['W2']
b1 = parameters['b1']
b2 = parameters['b2']
Z1 = tf.matmul(X_train, W1) + b1 # (6000, 512)
A1 = relu(Z1) # (6000, 512)
Z2 = tf.matmul(A1, W2) + b2 # (6000, 10)
# A2 = softmax(Z2) # (6000, 10)
# OR-
A2 = tf.nn.softmax(Z2) # (6000, 10)
return A2
def cost(parameters, X, Y):
y_pred_temp = forward_propagation(parameters, X, Y)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
return loss_fn(y_true = Y, y_pred = y_pred_temp)
def train_model(parameters, X, Y, learning_rate):
W1 = parameters['W1']
W2 = parameters['W2']
b1 = parameters['b1']
b2 = parameters['b2']
with tf.GradientTape(persistent = True) as t:
current_loss = cost(parameters, X_train, y_train)
dW2, dW1, db2, db1 = t.gradient(current_loss, [W2, W1, b2, b1])
W2 = W2 - (learning_rate * dW2)
W1 = W1 - (learning_rate * dW1)
b2 = b2 - (learning_rate * db2)
b1 = b1 - (learning_rate * db1)
updated_params = {'W1': W1, 'W2': W2,
'b1': b1, 'b2': b2}
return updated_params
params = initialize_parameters()
updated_params, cost_val = train_model(params, X_train, y_train, 0.01)
现在,如果我想在循环中使用“ train_model()”,按如下所示更新其值:
for epoch in range(100):
updated_params, cost_val = train_model(updated_params, X_train, y_train, 0.01)
随后对“ train_model()”的调用将返回“ dW2”,“ dW1”,“ db2”,“ db1” 为“ NoneType”
怎么了?
谢谢!
答案 0 :(得分:2)
问题出在您的initialize_parameters()
函数上。您不是在创建tf.Variables
,而是在创建tf.Tensors
。如果您需要带导数w.r.t,则需要将参数设为tf.Variables
。他们。
def initialize_parameters():
W1 = tf.Variable(tf.random.uniform(shape=(784, 512), minval=0, maxval=1))
b1 = tf.Variable(tf.random.uniform(shape = (1, 512), minval = 0, maxval=1))
W2 = tf.Variable(tf.random.uniform(shape = (512, 10), minval=0, maxval=1))
b2 = tf.Variable(tf.random.uniform(shape = (1, 10), minval=0, maxval=1))
return {'W1': W1, 'W2': W2,
'b1': b1, 'b2': b2}