Question

我要分类如果输入数据小于200，而输出数据为（0，1）如果输入数据超过200而不是输出（1，0）

输入值是连续整数值，层是5。

隐藏层使用Sigmoid，最后一个隐藏层使用softmax函数

损失函数为reduce_mean并使用梯度后代进行训练

import numpy as np
import tensorflow as tf

def set_x_data():
    x_data = np.array([[50]
                     , [60]
                     , [70]
                     , [80]
                     , [90]
                     , [110]
                     , [120]
                     , [130]
                     , [140]
                     , [150]
                     , [160]
                     , [170]
                     , [180]
                     , [190]
                     , [200]
                     , [210]
                     , [220]
                     , [230]
                     , [240]
                     , [250]
                     , [260]
                     , [270]
                     , [280]
                     , [290]
                     , [300]
                     , [310]
                     , [320]
                     , [330]
                     , [340]
                     , [350]
                     , [360]
                     , [370]
                     , [380]
                     , [390]])

    return x_data

def set_y_data(x):
    y_data = np.array([[0, 1]
                     , [0, 1]
                     , [0, 1]
                     , [0, 1]
                     , [0, 1]
                     , [0, 1]
                     , [0, 1]
                     , [0, 1]
                     , [0, 1]
                     , [0, 1]
                     , [0, 1]
                     , [0, 1]
                     , [0, 1]
                     , [0, 1]
                     , [0, 1]
                     , [0, 1]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]
                     , [1, 0]])
    return y_data

def set_bias(efficiency):
    arr = np.array([efficiency])

    return arr

W1 = tf.Variable(tf.random_normal([1, 5]), name='weight1')
W2 = tf.Variable(tf.random_normal([5, 5]), name='weight2')
W3 = tf.Variable(tf.random_normal([5, 5]), name='weight3')
W4 = tf.Variable(tf.random_normal([5, 5]), name='weight4')
W5 = tf.Variable(tf.random_normal([5, 2]), name='weight5')

def inference(input, b):
    hidden_layer1 = tf.sigmoid(tf.matmul(input, W1) + b)
    hidden_layer2 = tf.sigmoid(tf.matmul(hidden_layer1, W2) + b)
    hidden_layer3 = tf.sigmoid(tf.matmul(hidden_layer2, W3) + b)
    hidden_layer4 = tf.sigmoid(tf.matmul(hidden_layer3, W4) + b)
    out_layer = tf.nn.softmax(tf.matmul(hidden_layer4, W5) + b)

    return out_layer

def loss(hypothesis, y):
    cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(hypothesis), reduction_indices=[1]))

    return cross_entropy

def train(loss):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
    train = optimizer.minimize(loss)

    return train

x_data = set_x_data(1)
y_data = set_y_data(0)
b_data = set_bias(0.8)

x= tf.placeholder(tf.float32, shape=[None, 1])
y= tf.placeholder(tf.float32, shape=[None, 2])
b = tf.placeholder(tf.float32, shape=[None])

hypothesis = inference(x, b)
loss = loss(hypothesis, y)
train = train(loss)

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

print(sess.run(W1))

for step in range(2000):
    sess.run(train, feed_dict={x:x_data, y:y_data, b:b_data})

print(sess.run(W1))
print(sess.run(hypothesis, feed_dict={x:np.array([[1000]]), b:b_data}))

当我在训练之前和训练之后打印W1时，值没有特别改变并且在输入= 1000时进行测试，该值并不能满足我的期望。我认为值几乎接近（1，0），但结果几乎是（0.5，0.5）

我猜错误是由于损失函数造成的，因为它是从这里到那里复制的，但是我不确定。

上面的代码只是我的代码的简化，但是我认为我必须展示我的真实代码

代码太长，所以我创建了新帖子

classifying data by tensorflow but accuracy value didn't change

Answer 1

上述网络的培训中存在一些问题，但是通过一些更改，您可以获得的网络可以this decision function

（{The plot in the link显示2类的得分，即x> 200时的得分

此网络中有待改进的问题列表：

培训数据非常稀缺（仅34点！）通常太小，尤其是对于您所使用的5层网络而言。通常，您需要的输入样本要比网络中的参数多得多。尝试添加更多的输入值并减少层数（如下面的代码-我使用浮点数而不是整数来获取更多点，但我认为它仍然兼容）。
输入范围通常需要缩放（下面，我尝试通过除以常数来实现超简单的缩放）。这是因为您通常希望避免较大范围的变量（尤其是您传递许多具有soft-max非线性的层，这会破坏包含在非常高或非常低的值中的信息）。在更高级的情况下，您可能需要进行最小最大缩放或z得分。
尝试更多时期（并尝试绘制损失函数值的演变）。在给定的时期数下，损失函数的优化尚未收敛。在下面，我会再增加10倍的时间。请看下面的代码现在几乎如何在this plot中收敛（并看2000个纪元还不够）：
改组（x，y）数据很有帮助。尽管这在这种情况下并不重要，但收敛速度更快（请参阅Le Cun的论文“ Efficient Backprop”）。在更严重的示例中，通常需要这样做。
重要的是，我认为您希望b作为参数，而不是常量，不是吗？网络的偏差通常还会与乘法权重一起优化。（而且，对所有隐藏层使用单个共享偏差也不常见。）

下面是代码。请注意，可能会有进一步的改进，但是这些小技巧最终会达到所需的决策功能。

我添加了一些内联注释以指示相对于原始版本的更改。希望您能从中找到建议。

代码：

import numpy as np
import tensorflow as tf

# I've modified the functions set_x_data and set_y_data
# so as to generate a larger set of numbers. 

# Generate a range of numbers from 50 to 390
def set_x_data():
    x_data = np.arange(50, 390, 0.1)
    return x_data[:,None]

# Assign labels depending on x_data
def set_y_data(x_data):
    ydata1 = x_data >= 200
    ydata2 = x_data < 200
    return np.hstack((ydata1, ydata2))

def set_bias(efficiency):
    arr = np.array([efficiency])

    return arr

# Let's keep W1 and W5 (one hidden layer only)
# BTW, in this problem you could do with 0 hidden layers. But keeping
# 1 to show it works
W1 = tf.Variable(tf.random_normal([1, 5]), name='weight1')
W5 = tf.Variable(tf.random_normal([5, 2]), name='weight5')

# BTW, b should be a parameter, too. 
b = tf.Variable(tf.constant(0.0))

# Just keeping 1 hidden layer
def inference(input):
    hidden_layer1 = tf.sigmoid(tf.matmul(input, W1) + b)
    out_layer = tf.nn.softmax(tf.matmul(hidden_layer1, W5) + b)

    return out_layer

# This is unchanged
def loss(hypothesis, y):
    cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(hypothesis), reduction_indices=[1]))

    return cross_entropy

# This is unchanged
def train(loss):
    optimizer = 
tf.train.GradientDescentOptimizer(learning_rate=0.1)
    train = optimizer.minimize(loss)

    return train

# Using SCALE to normalize the input variables (range of inputs too big)
# This is a simple normalization in this case. Other examples are 
# Min-Max normalization or z-scores. 

SCALE = 1000
x_data = set_x_data()
y_data = set_y_data(x_data)
x_data /= SCALE

# Now only placeholders are x and y (b is a parameter)
x= tf.placeholder(tf.float32, shape=[None, 1])
y= tf.placeholder(tf.float32, shape=[None, 2])

hypothesis = inference(x)
loss = loss(hypothesis, y)
train = train(loss)

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

print(sess.run(W1))

# Epochs x 10, it did not converge with fewer epochs
epochs = 20000
losses = np.zeros(epochs)
for step in range(epochs):
# Shuffle data
    r = np.random.permutation(x_data.shape[0])
    x_data = x_data[r]
    y_data = y_data[r,:]
    # Small modification here to capture the loss. 
    _, l = sess.run([train, loss], feed_dict={x:x_data, y:y_data})
    losses[step] = l

print(sess.run(W1))
print(sess.run(b))

上面显示决策功能的代码：

%matplotlib inline
import matplotlib.pyplot as plt
ystar = np.arange(50, 400, 10)[:,None]
plt.plot(ystar, sess.run(hypothesis, feed_dict={x:ystar/SCALE})[:,0])

通过tensorflow分类整数数据

1 个答案: