Tensorflow CNN无法学习(图像输入 - 图像输出)

时间:2018-03-09 08:43:44

标签: python tensorflow

我被困在大学项目的Tensorflow卷积神经网络上,我希望有人可以帮助我。
它应该为图片输入输出图片。左边是输入,右边是输出。两者都是.jpeg格式。

input and output

重量看起来像这样。左图显示学习前的权重,右图是在几个时期之后,并且在进一步训练时根本没有变化 网似乎没有学到任何有用的东西,我有一种感觉,我忘记了一些基本的东西。 学习时准确度大约为5%

weights

here is what it looks when i save the input image x
我不知道我是否在加载或保存图像时出错

And this is what the output y of the net looks like

我基于tensorflow mnist教程的代码。 这是我缩短的代码,使其更具可读性:

import tensorflow as tf
from PIL import Image
import numpy as np

def weight_variable(dim,stddev=0.35):
    init = tf.random_normal(dim, stddev=stddev)
    return tf.Variable(init)

def bias_variable(dim,val=0.1):
    init = tf.constant(val, shape=dim)
    return tf.Variable(init)

def conv2d(x,W):
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding = 'SAME')

def max_pool2x2(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding = 'SAME')

def output_pics(pic): # for weights
    #1 color (dimension) array cast to uint8 and output as jpeg to file

def output_pics_color(pic):
    #3 colors (dimensions) array cast to uint8 and output as jpeg to file

def show_pic(pic):
    #3 colors (dimensions) array cast to uint8 and shown in window



filesX = [...] # filenames of inputs for training
filesY = [...] # filenames of outputsfor training
test_filesX = [...]# filenames of inputs for testing
test_filesY = [...]# filenames of outputs for testing
px_size = 128 # size of images 128x128 (resized)


filename_queueX = tf.train.string_input_producer(filesX)
filename_queueY = tf.train.string_input_producer(filesY)
filename_testX = tf.train.string_input_producer(test_filesY)
filename_testY = tf.train.string_input_producer(test_filesY)

image_reader = tf.WholeFileReader()
img_name, img_dataX = image_reader.read(filename_queueX)
imageX = tf.image.decode_jpeg(img_dataX)
imageX = tf.image.resize_images(imageX, [px_size,px_size])
imageX.set_shape((px_size,px_size,3))
imageX=tf.cast(imageX, tf.float32)

...
same for imageY, test_imageX, test_imageY

trainX = []
trainY = []
testX = []
testY = []
j=1


with tf.name_scope('model'):
    x=tf.placeholder(tf.float32, [None, px_size,px_size,3])
    prob = tf.placeholder(tf.float32)

    init_op = tf.global_variables_initializer()

    # load images into lists
    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        for i in range(1,65):
            trainX.append(imageX.eval())
            trainY.append(imageY.eval())
        for i in range(1, 10):
            testX.append(test_imageX.eval())
            testY.append(test_imageY.eval())
        coord.request_stop()
        coord.join(threads)    

    # layer 1 
    x_img = tf.reshape(x,[-1,px_size,px_size, 3])    
    W1 = weight_variable([20,20,3,3])
    b1 = bias_variable([3])                       
    y1 = tf.nn.softmax(conv2d(x_img,W1)+b1)

    # layer 2
    W2 = weight_variable([30,30,3,3])
    b2 = bias_variable([3])
    y2=tf.nn.softmax(conv2d(y1, W2)+b2)

    # layer 3
    W3 = weight_variable([40,40,3,3])
    b3 = bias_variable([3])
    y3=tf.nn.softmax(conv2d(y2, W3)+b3)


    y = y3

    with tf.name_scope('train'):
        y_ =tf.placeholder(tf.float32, [None, px_size,px_size,3])
        cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_, logits=y))
        opt = tf.train.MomentumOptimizer(learning_rate=0.5, momentum=0.1).minimize(cross_entropy)

    with tf.name_scope('eval'):
        correct = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

        nEpochs = 1000
        batchSize = 10
        res = 0
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            trAccs = []
            for i in range(nEpochs):
                if i%100 == 0 :
                    train_accuracy = sess.run(accuracy, feed_dict={x:trainX, y_:trainY, prob: 1.0})
                    print(train_accuracy)
                    output_pics(W1)#output weights of layer 1 to file
                    output_pics_color(x)#save input image
                    output_pics_color(y)#save net output
                    sess.run(opt, feed_dict={x:trainX, y_:trainY, prob: 0.5})

1 个答案:

答案 0 :(得分:0)

  • 这是图像生成问题
  • 您选择的模型是图像生成任务的一个非常糟糕的模型
  • 正常CNN用于图像识别和对象检测任务
  • 关于MNIST的教程是图像分类问题,而不是图像生成问题
  • 为特定问题选择合适的模型类型非常重要
  • 很明显,这个模型没有机会实现你提到的输出
  • 我不知道你是如何计算准确度的,因为这是无监督学习问题
  • 你在每一层之后都使用过softmax这真是一个坏主意.Tensorflow mnist教程甚至没有这段代码
  • Softmax仅用于最后一层
  • 在隐藏层中应该使用泄漏的relu或简单的relu
  • 我建议您寻找更合适的深度学习模型
  • 变分自动编码器生成对抗网络或简单生成对抗网络的特定组合