我正在尝试在 TensorFlow 中编写一个简单的神经网络,以了解其具体细节。 旨在简单地拟合非线性函数,而无需明确使用模型、层或优化器。
但我觉得训练太早饱和了,有人可以建议修改以使其表现更好吗?
import tensorflow as tf
import numpy as np
class SimpleDenseLayer(tf.keras.layers.Layer):
def __init__(self, in_length=0, in_dim=0, out_dim=0):
super(SimpleDenseLayer,self).__init__()
print(in_length,in_dim,out_dim)
self.kernel = tf.Variable(shape=(in_dim,out_dim),trainable=True,
initial_value=tf.random.uniform((in_dim,out_dim)))
self.bias = tf.Variable(shape=(in_length,out_dim),trainable=True,
initial_value=tf.random.uniform((in_length,out_dim)))
def call(self,x):
return tf.nn.relu(tf.matmul(x,self.kernel) + self.bias)
x = tf.random.uniform((100,2))
y = x[0]**3 - 3*x[1]
layer1 = SimpleDenseLayer(100,2,80)
layer2 = SimpleDenseLayer(100,80,1)
loss = lambda y1, y2: tf.reduce_sum(tf.math.abs(y1-y2))
for i in range(50):
with tf.GradientTape() as gt:
nn = layer2(layer1(x))
los = loss(nn,y)
grad1,grad2 = gt.gradient(los,[layer1.variables, layer2.variables])
print(i,los)
for grad,var in zip(grad1,layer1.variables):
var.assign_sub(grad*0.0001)
for grad,var in zip(grad2,layer2.variables):
var.assign_sub(grad*0.0001)