我正在使用一个简单的MLP来解决以下形式的线性方程组
y = 斧头
此处 A 是将 x 映射到 y 的矩阵。我通过修复 A 并随机生成 x 来生成综合数据。我认为,MLP应该能够学习这种简单的映射,但是我的网络无法正常运行。
这是我生成数据的方式:
import numpy as np
import scipy.sparse as sps
import tensorflow as tf
import matplotlib as mp
import matplotlib.pyplot as plt
N_x = 100
N_y =50
d = 0.1
m_tr = 10000
m_val = 10000
m_te = 120
A = np.random.randn(N_y,N_x)
A = A/np.linalg.norm(A,2, axis = 0)
#Generate x_train
x_train = sps.hstack([sps.random(N_x, 1, density=d, format='coo', dtype=None,
random_state=None) for _ in range(m_tr)]).toarray()
x_train = np.random.rand(1, m_tr)*x_train/np.linalg.norm(x_train,2, axis = 0)
x_test = sps.hstack([sps.random(N_x, 1, density=d, format='coo', dtype=None,
random_state=None) for _ in range(m_te)]).toarray()
x_test = np.random.rand(1, m_te)*x_test/np.linalg.norm(x_test,2, axis = 0)
y_train = np.matmul(A,x_train)
y_test = np.matmul(A,x_test)
train_data = ((y_train.T,x_train.T))
test_data = ((y_test.T,x_test.T))
这是我的MLP
# Parameters
learning_rate = 0.001
training_epochs = 20
Batch_Size = 100
batch_size = tf.placeholder(tf.int64)
n_batches = m_tr//Batch_Size
def fc_layer(input_, channels_in,channels_out, name = "fc"):
with tf.name_scope(name):
W = tf.Variable(tf.random_normal([channels_in, channels_out], stddev=0.1), name="weights")
b = tf.Variable(tf.constant(0.1, shape=[channels_out]), name="biases")
act = tf.matmul(input_, W) + b
return act
# Setup placeholders, and reshape the data
y = tf.placeholder(tf.float32, shape=[None,N_y], name = 'y')
x = tf.placeholder(tf.float32, shape=[None,N_x], name = 'x')
dataset = tf.data.Dataset.from_tensor_slices((y, x)).batch(batch_size).repeat()
iterator = dataset.make_initializable_iterator()
input_features, output_features = iterator.get_next()
fc_1_linear = fc_layer(input_features, N_y,256, name = "fc1")
fc_1 = tf.nn.relu(fc_1_linear)
fc_2_linear= fc_layer(fc_1, 256,512, name = "fc2")
fc_2 = tf.nn.relu(fc_2_linear)
out_layer= fc_layer(fc_2, 512,N_x, name = "fc3")
with tf.name_scope('loss'):
loss_op = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.squared_difference(out_layer,output_features),1)))
with tf.name_scope('train'):
train_op = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss_op)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# initialise iterator with train data
sess.run(iterator.initializer, feed_dict={ y: train_data[0], x: train_data[1], batch_size: Batch_Size})
print('Training...')
end_loss = 0
for i in range(training_epochs):
tot_loss = 0
for _ in range(n_batches):
temp, loss_value = sess.run([train_op, loss_op])
tot_loss += loss_value
step = i*n_batches+ _
if (step)>10:
train_summary = tf.Summary()
train_summary.ParseFromString(sess.run(merged_op))
writer.add_summary(train_summary,step)
end_loss = tot_loss/n_batches
print("Iter: {}, Loss: {:.4f}".format(i, end_loss))
# initialise iterator with test data
sess.run(iterator.initializer, feed_dict={ y: test_data[0], x: test_data[1], batch_size: test_data[0].shape[0]})
print('Test Loss: {:4f}',sess.run(loss_op))
print('Generalization Error: {:4f}'.format(sess.run(loss_op)-end_loss))
我不确定是什么问题。损失似乎随着每个时期的减少而减少,但是在绘制实际向量和重构向量时,存在很多差异。