Question

我被困在大学项目的Tensorflow卷积神经网络上，我希望有人可以帮助我。
它应该为图片输入输出图片。左边是输入，右边是输出。两者都是.jpeg格式。

重量看起来像这样。左图显示学习前的权重，右图是在几个时期之后，并且在进一步训练时根本没有变化网似乎没有学到任何有用的东西，我有一种感觉，我忘记了一些基本的东西。学习时准确度大约为5％

here is what it looks when i save the input image x
我不知道我是否在加载或保存图像时出错

And this is what the output y of the net looks like

我基于tensorflow mnist教程的代码。这是我缩短的代码，使其更具可读性：

import tensorflow as tf
from PIL import Image
import numpy as np

def weight_variable(dim,stddev=0.35):
    init = tf.random_normal(dim, stddev=stddev)
    return tf.Variable(init)

def bias_variable(dim,val=0.1):
    init = tf.constant(val, shape=dim)
    return tf.Variable(init)

def conv2d(x,W):
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding = 'SAME')

def max_pool2x2(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding = 'SAME')

def output_pics(pic): # for weights
    #1 color (dimension) array cast to uint8 and output as jpeg to file

def output_pics_color(pic):
    #3 colors (dimensions) array cast to uint8 and output as jpeg to file

def show_pic(pic):
    #3 colors (dimensions) array cast to uint8 and shown in window



filesX = [...] # filenames of inputs for training
filesY = [...] # filenames of outputsfor training
test_filesX = [...]# filenames of inputs for testing
test_filesY = [...]# filenames of outputs for testing
px_size = 128 # size of images 128x128 (resized)


filename_queueX = tf.train.string_input_producer(filesX)
filename_queueY = tf.train.string_input_producer(filesY)
filename_testX = tf.train.string_input_producer(test_filesY)
filename_testY = tf.train.string_input_producer(test_filesY)

image_reader = tf.WholeFileReader()
img_name, img_dataX = image_reader.read(filename_queueX)
imageX = tf.image.decode_jpeg(img_dataX)
imageX = tf.image.resize_images(imageX, [px_size,px_size])
imageX.set_shape((px_size,px_size,3))
imageX=tf.cast(imageX, tf.float32)

...
same for imageY, test_imageX, test_imageY

trainX = []
trainY = []
testX = []
testY = []
j=1


with tf.name_scope('model'):
    x=tf.placeholder(tf.float32, [None, px_size,px_size,3])
    prob = tf.placeholder(tf.float32)

    init_op = tf.global_variables_initializer()

    # load images into lists
    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        for i in range(1,65):
            trainX.append(imageX.eval())
            trainY.append(imageY.eval())
        for i in range(1, 10):
            testX.append(test_imageX.eval())
            testY.append(test_imageY.eval())
        coord.request_stop()
        coord.join(threads)    

    # layer 1 
    x_img = tf.reshape(x,[-1,px_size,px_size, 3])    
    W1 = weight_variable([20,20,3,3])
    b1 = bias_variable([3])                       
    y1 = tf.nn.softmax(conv2d(x_img,W1)+b1)

    # layer 2
    W2 = weight_variable([30,30,3,3])
    b2 = bias_variable([3])
    y2=tf.nn.softmax(conv2d(y1, W2)+b2)

    # layer 3
    W3 = weight_variable([40,40,3,3])
    b3 = bias_variable([3])
    y3=tf.nn.softmax(conv2d(y2, W3)+b3)


    y = y3

    with tf.name_scope('train'):
        y_ =tf.placeholder(tf.float32, [None, px_size,px_size,3])
        cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_, logits=y))
        opt = tf.train.MomentumOptimizer(learning_rate=0.5, momentum=0.1).minimize(cross_entropy)

    with tf.name_scope('eval'):
        correct = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

        nEpochs = 1000
        batchSize = 10
        res = 0
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            trAccs = []
            for i in range(nEpochs):
                if i%100 == 0 :
                    train_accuracy = sess.run(accuracy, feed_dict={x:trainX, y_:trainY, prob: 1.0})
                    print(train_accuracy)
                    output_pics(W1)#output weights of layer 1 to file
                    output_pics_color(x)#save input image
                    output_pics_color(y)#save net output
                    sess.run(opt, feed_dict={x:trainX, y_:trainY, prob: 0.5})

Answer 1

这是图像生成问题
您选择的模型是图像生成任务的一个非常糟糕的模型
正常CNN用于图像识别和对象检测任务
关于MNIST的教程是图像分类问题，而不是图像生成问题
为特定问题选择合适的模型类型非常重要
很明显，这个模型没有机会实现你提到的输出
我不知道你是如何计算准确度的，因为这是无监督学习问题
你在每一层之后都使用过softmax这真是一个坏主意.Tensorflow mnist教程甚至没有这段代码
Softmax仅用于最后一层
在隐藏层中应该使用泄漏的relu或简单的relu
我建议您寻找更合适的深度学习模型
变分自动编码器生成对抗网络或简单生成对抗网络的特定组合

Tensorflow CNN无法学习（图像输入 - 图像输出）

1 个答案: