我在Python3上使用tensorflow(2.0版)实现了神经网络模型
我不知道代码能否正常工作,因为损失值几乎没有变化。
代码错误 要么 模型参数太多(这意味着代码正确)?
请告诉我代码是否正常工作。
以下是代码。
import tensorflow as tf
import numpy as np
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
class Model(object):
def __init__(self):
self.var_list = []
self.w_layer1 = tf.Variable(tf.random.normal(shape=[28*28, 1000], stddev=0.3,dtype=tf.float64))
self.b_layer1 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))
self.w_layer2 = tf.Variable(tf.random.normal(shape=[1000, 100], stddev=0.3,dtype=tf.float64))
self.b_layer2 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))
self.w_layer3 = tf.Variable(tf.random.normal(shape=[100, 100], stddev=0.3,dtype=tf.float64))
self.b_layer3 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))
self.w_layer4 = tf.Variable(tf.random.normal(shape=[100, 10], stddev=0.3,dtype=tf.float64))
self.b_layer4 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))
self.var_list.append(self.w_layer1)
self.var_list.append(self.b_layer1)
self.var_list.append(self.w_layer2)
self.var_list.append(self.b_layer2)
self.var_list.append(self.w_layer3)
self.var_list.append(self.b_layer3)
self.var_list.append(self.w_layer4)
self.var_list.append(self.b_layer4)
def __call__(self, x):
return self.w*x+self.b
def dense_layer(self, inputs, w, b):
z = tf.matmul(inputs, w) + b
return tf.nn.relu(z)
def output_layer(self, inputs, w, b):
return tf.matmul(inputs, w) + b
def flattend(self, inputs):
inputs = tf.cast(inputs, tf.float64)
return tf.reshape(inputs, [-1, 28*28])
def loss(self, outputs, targets):
predicted_y = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = outputs, labels = targets))
return predicted_y
def grad(self, x, target_y):
with tf.GradientTape() as tape:
tape.watch(self.var_list)
loss_value = self.loss(self.run(x), target_y)
return tape.gradient(loss_value, self.var_list)
def run(self, inputs):
inputs = self.flattend(inputs)
layer1 = self.dense_layer(inputs, self.w_layer1, self.b_layer1)
layer2 = self.dense_layer(layer1, self.w_layer2, self.b_layer2)
layer3 = self.dense_layer(layer2, self.w_layer3, self.b_layer3)
layer4 = self.output_layer(layer3, self.w_layer4, self.b_layer4)
return layer4
def optimizer(self):
opt = tf.keras.optimizers.SGD(learning_rate=0.01)
return opt
def make_onehot_labels(labels):
depth = 10
one_hot_labels = tf.one_hot(labels, depth)
return one_hot_labels
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
train_images = train_images/255.0
test_images = test_images/255.0
train_labels = make_onehot_labels(train_labels)
test_labels = make_onehot_labels(test_labels)
ds_train_x = tf.data.Dataset.from_tensor_slices(train_images)
ds_train_y = tf.data.Dataset.from_tensor_slices(train_labels)
train_dataset = tf.data.Dataset.zip((ds_train_x, ds_train_y)).shuffle(1000).repeat().batch(300)
train_images = tf.convert_to_tensor(train_images)
train_labels = tf.convert_to_tensor(train_labels)
test_images = tf.convert_to_tensor(test_images)
test_labels = tf.convert_to_tensor(test_labels)
count = 1
model = Model()
opt = model.optimizer()
print(model.loss(model.run(train_images), train_labels))
for epoch in range(10):
for data in train_dataset:
if count%200==0:
print(model.loss(model.run(train_images), train_labels))
#print(grads)
break
grads = model.grad(data[0], data[1])
opt.apply_gradients(zip(grads, model.var_list))
count = count+1
以下是上述代码执行的结果
tf.Tensor(184.81706096058622, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)
答案 0 :(得分:1)
问题在以下部分
for epoch in range(10):
for data in train_dataset:
if count%200==0:
print(model.loss(model.run(train_images), train_labels))
#print(grads)
break
grads = model.grad(data[0], data[1])
opt.apply_gradients(zip(grads, model.var_list))
count = count+1
在if条件下,您有一个break
,这意味着您打count%200==0
时会中断训练循环(并重新开始新的纪元)。删除中断,您将看到错误率下降。
要详细说明这个问题,一旦达到count == 200,就会中断循环,并且计数器不再增加,因此,如果在200次迭代后满足条件(基本上包括您的渐变应用程序。)