Question

这是我正在使用的代码片段：

import tensorflow as tf
import numpy as np
from PIL import Image
from os import listdir

nodes_l1 = 500
nodes_l2 = 100
nodes_l3 = 500
num_batches = 20
num_epochs = 50

# Array of file dirs
human_file_array = listdir('human/')
human_file_array = [['human/'+human_file_array[i],[1,0]] for i in range(len(human_file_array))]
cucumber_file_array = listdir('cucumber/')
cucumber_file_array = [['cucumber/'+cucumber_file_array[i],[0,1]] for i in range(len(cucumber_file_array))]
file_array_shuffled = human_file_array + cucumber_file_array
np.random.shuffle(file_array_shuffled)

htest_file_array = listdir('human_test/')
htest_file_array = [['human_test/'+htest_file_array[i],[1,0]] for i in range(len(htest_file_array))]
ctest_file_array = listdir('cucumber_test/')
ctest_file_array = [['cucumber_test/'+ctest_file_array[i],[0,1]] for i in range(len(ctest_file_array))]
test_file_array = ctest_file_array + htest_file_array
np.random.shuffle(test_file_array)

input_data = tf.placeholder('float', [None, 250*250*3]
output_data = tf.placeholder('float')

hl1_vars = {
    'weight': tf.Variable(tf.random_normal([250*250*3, nodes_l1])),
    'bias': tf.Variable(tf.random_normal([nodes_l1]))
}

hl2_vars = {
    'weight': tf.Variable(tf.random_normal([nodes_l1, nodes_l2])),
    'bias': tf.Variable(tf.random_normal([nodes_l2]))
}

hl3_vars = {
    'weight': tf.Variable(tf.random_normal([nodes_l2, nodes_l3])),
    'bias': tf.Variable(tf.random_normal([nodes_l3]))
}

output_layer_vars = {
    'weight': tf.Variable(tf.random_normal([nodes_l3, 2])),
    'bias': tf.Variable(tf.random_normal([2]))
}

layer1 = tf.add(tf.matmul(input_data, hl1_vars['weight']),hl1_vars['bias'])
layer1 = tf.nn.softmax(layer1)

layer2 = tf.add(tf.matmul(layer1, hl2_vars['weight']), hl2_vars['bias'])
layer2 = tf.nn.softmax(layer2)

layer3 = tf.add(tf.matmul(layer2, hl3_vars['weight']), hl3_vars['bias'])
layer3 = tf.nn.softmax(layer3)

output = tf.add(tf.matmul(layer3, output_layer_vars['weight']), output_layer_vars['bias'])
output = tf.nn.softmax(output)

def convert_image(path):
    with Image.open(path) as img:
        img = img.resize((250,250))
        img = img.convert('RGB')
        return img

def train_network():
    #prediction = output
    cost = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(output, output_data)) # output is the prediction, output_data is key
    optimizer = tf.train.AdamOptimizer().minimize(cost)

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        saver = tf.train.Saver()

        for epoch in range(num_epochs):
            epoch_error = 0
            batch_size = int((len(file_array_shuffled)/num_batches))
            for i in range(num_batches):
                path_var = []
                key_var = []
                img_var = []
                #Still Filename Batch!!
                batch_file_array = file_array_shuffled[batch_size*i:(batch_size*i)+batch_size] #batch1['file&val array']['val']
                for batch_val in batch_file_array:
                    path_var.append(batch_val[0])
                    key_var.append(batch_val[1])
                #FROM HERE ON path_var AND key_var HAVE MATCHING INDEXES DO NOT RANDOMIZE!!!

                #This section here is complicated!
                for path in path_var:
                    img = convert_image(path)
                    img_var.append(np.reshape(np.array(img), 250*250*3))
                #print np.shape(img_var),np.shape(key_var) #img_var is array of size (batch#, 64*64*3) key_var is the key [human, cucumber]

                #End of complicationimage conversion
                _,c = sess.run([optimizer, cost], feed_dict={input_data:img_var, output_data:key_var})
                epoch_error += c
                #print "Batch",i+1,"done out of",num_batches
            print "Epoch",epoch+1,"completed out of",num_epochs,"\tError",epoch_error
            save_path = saver.save(sess, "model.ckpt")

train_network()


def use_network():
    #prediction = output
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())

        saver = tf.train.Saver()
        saver.restore(sess, "model.ckpt")

        for test_file in test_file_array:
            #print test_file
            img = np.reshape(np.array(convert_image(test_file[0])), 250*250*3)
            result = output.eval(feed_dict={input_data:[img]})
            print result,tf.argmax(result,1).eval(),test_file[1]

use_network()

http://pastebin.com/Gp6SVYJR

由于我还不熟悉使用tensorflow，我认为尝试创建一个可以识别人类和黄瓜之间差异的程序是个好主意。我从Image-Net中提取图像，并将人类图片放入 human / 和黄瓜照片到 cucumber /

我创建了一个我认为该程序正在采取的步骤列表：

制作一系列文件路径和密钥，然后进行洗牌。
创建文件路径批次。
批次中的文件路径将转换为图像，大小调整为250x250，并添加到图像批处理数组中。（此时键和图像仍然对齐）。
图像批处理和密钥批量输入阵列。
在所有时代结束时，它会针对每个图像中的10个来测试网络。

当我运行use_network（）时，我在控制台中得到了这个输出：

[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.61422414  0.38577583]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [1, 0]

第一个数组是输出节点，第二个数组是输出的tf.argmax（），第三个数组是预期的结果。

实际学习似乎也很小，这是学习的结果：

Epoch 1 completed out of 50     Error 3762.83390808
Epoch 2 completed out of 50     Error 3758.51748657
Epoch 3 completed out of 50     Error 3753.70425415
Epoch 4 completed out of 50     Error 3748.32539368
Epoch 5 completed out of 50     Error 3742.45524597
Epoch 6 completed out of 50     Error 3736.21272278
Epoch 7 completed out of 50     Error 3729.56756592
...
Epoch 45 completed out of 50    Error 3677.34605408
Epoch 46 completed out of 50    Error 3677.34388733
Epoch 47 completed out of 50    Error 3677.34150696
Epoch 48 completed out of 50    Error 3677.3391571
Epoch 49 completed out of 50    Error 3677.33673096
Epoch 50 completed out of 50    Error 3677.33418274

我试图做以下事情来尝试改变一些事情：

使图像变小，例如32x32，和/或黑白图像。要查看较小的图像是否会导致预测发生变化。
更改reduce_sum和reduce_mean之间的成本等式，以及sigmoid_cross_entropy到softmax_cross_entropy之间的内部等式。

我有一些想法，为什么它不起作用，它们如下：

糟糕的代码
输入数据太大，没有足够的节点/图层来处理。
图像及相关密钥在某处被扰乱。

Answer 1

我看到一些可能的问题。首先，您使用密集连接的图层来处理大型图像网络图像。您应该使用卷积网络来处理图像。我认为这是你最大的问题。只有在应用了卷积/汇集层的金字塔后才能将空间维度减少为＆＃34;特征＆＃34;你应该添加一个密集的层。

https://www.tensorflow.org/versions/r0.11/tutorials/deep_cnn/index.html

其次，即使您打算使用密集层，也不应该将softmax函数应用为隐藏层之间的激活（有一些例外，例如注意模型，但这是一个更高级的概念。）Softmax强制使用层中的每次激活都可能是您不想要的。我会改变隐藏层之间的激活来重新或至少是tanh。

最后，我发现当网络接近恒定值时，它可以帮助降低学习率。我不认为这是你的问题。我的前两个评论是你应该关注的。

TensorFlow在训练后总是收敛到所有项目的相同输出

1 个答案: