损失值不变(神经网络)

时间:2019-12-02 04:06:09

标签: tensorflow

我在Python3上使用tensorflow(2.0版)实现了神经网络模型

我不知道代码能否正常工作,因为损失值几乎没有变化。

代码错误  要么 模型参数太多(这意味着代码正确)?

请告诉我代码是否正常工作。

以下是代码。

import tensorflow as tf
import numpy as np

fashion_mnist = tf.keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()



class Model(object):
 def __init__(self):
        self.var_list   = []
        self.w_layer1 = tf.Variable(tf.random.normal(shape=[28*28, 1000], stddev=0.3,dtype=tf.float64))
        self.b_layer1 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))
        self.w_layer2 = tf.Variable(tf.random.normal(shape=[1000, 100], stddev=0.3,dtype=tf.float64))
        self.b_layer2 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))
        self.w_layer3 = tf.Variable(tf.random.normal(shape=[100, 100], stddev=0.3,dtype=tf.float64))
        self.b_layer3 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))
        self.w_layer4 = tf.Variable(tf.random.normal(shape=[100, 10], stddev=0.3,dtype=tf.float64))
        self.b_layer4 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))

        self.var_list.append(self.w_layer1)
        self.var_list.append(self.b_layer1)
        self.var_list.append(self.w_layer2)
        self.var_list.append(self.b_layer2)
        self.var_list.append(self.w_layer3)
        self.var_list.append(self.b_layer3)
        self.var_list.append(self.w_layer4)
        self.var_list.append(self.b_layer4)


 def __call__(self, x):
        return self.w*x+self.b

 def dense_layer(self, inputs, w, b):
        z = tf.matmul(inputs, w) + b
        return tf.nn.relu(z)

 def output_layer(self, inputs, w, b):
        return tf.matmul(inputs, w) + b

 def flattend(self, inputs):
        inputs = tf.cast(inputs, tf.float64)
        return tf.reshape(inputs, [-1, 28*28])

 def loss(self, outputs, targets):
        predicted_y = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = outputs, labels = targets))
        return predicted_y

 def grad(self, x, target_y):
        with tf.GradientTape() as tape:
            tape.watch(self.var_list)
            loss_value = self.loss(self.run(x), target_y)
            return tape.gradient(loss_value, self.var_list)

 def run(self, inputs):
        inputs  = self.flattend(inputs)
        layer1  = self.dense_layer(inputs, self.w_layer1, self.b_layer1)
        layer2  = self.dense_layer(layer1, self.w_layer2, self.b_layer2)
        layer3  = self.dense_layer(layer2, self.w_layer3, self.b_layer3)
        layer4  = self.output_layer(layer3, self.w_layer4, self.b_layer4)

        return layer4

 def optimizer(self):
    opt = tf.keras.optimizers.SGD(learning_rate=0.01)
    return opt

def make_onehot_labels(labels):
    depth = 10
    one_hot_labels = tf.one_hot(labels, depth)
    return one_hot_labels



fashion_mnist = tf.keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
train_images = train_images/255.0
test_images  = test_images/255.0
train_labels = make_onehot_labels(train_labels)
test_labels  = make_onehot_labels(test_labels)

ds_train_x = tf.data.Dataset.from_tensor_slices(train_images)
ds_train_y = tf.data.Dataset.from_tensor_slices(train_labels)
train_dataset = tf.data.Dataset.zip((ds_train_x, ds_train_y)).shuffle(1000).repeat().batch(300)


train_images = tf.convert_to_tensor(train_images)
train_labels   = tf.convert_to_tensor(train_labels)


test_images = tf.convert_to_tensor(test_images)
test_labels   = tf.convert_to_tensor(test_labels)


count = 1
model = Model()
opt = model.optimizer()
print(model.loss(model.run(train_images), train_labels))
for epoch in range(10):

    for data in train_dataset:

        if count%200==0:

            print(model.loss(model.run(train_images), train_labels))
            #print(grads)
            break
        grads = model.grad(data[0], data[1])  
        opt.apply_gradients(zip(grads, model.var_list))
        count = count+1


以下是上述代码执行的结果

tf.Tensor(184.81706096058622, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)

1 个答案:

答案 0 :(得分:1)

问题在以下部分

for epoch in range(10):

    for data in train_dataset:

        if count%200==0:

            print(model.loss(model.run(train_images), train_labels))
            #print(grads)
            break
        grads = model.grad(data[0], data[1])  
        opt.apply_gradients(zip(grads, model.var_list))
        count = count+1

在if条件下,您有一个break,这意味着您打count%200==0时会中断训练循环(并重新开始新的纪元)。删除中断,您将看到错误率下降。

要详细说明这个问题,一旦达到count == 200,就会中断循环,并且计数器不再增加,因此,如果在200次迭代后满足条件(基本上包括您的渐变应用程序。)