张量流不训练(只有偏差变化)

时间:2017-04-18 12:00:59

标签: python tensorflow neural-network deep-learning conv-neural-network

我想训练卷积网络输出0到100的数字。但很快,模型就会停止更新权重,只会更改完全连接图层中的偏差。我无法理解为什么。

重量图像: enter image description here

我玩过不同数量的图层等等,但我总是遇到只有FC偏差发生变化的同样问题。

这是我测试的当前代码。我剥掉了辍学等事情。此时过度拟合不是问题。事实上,我想尝试过度拟合数据,这样我才能看到我的模型学到了什么

from __future__ import print_function

import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
import matplotlib.image as mpimg


###################################################################################
############################# Read Data ###########################################

with tf.name_scope("READ_DATA"):

  def read_my_file_format(filename_queue):
    reader = tf.WholeFileReader()
    key, record_string = reader.read(filename_queue)
    split_res = tf.string_split([key],'_')
    key = split_res.values[5]
    example = tf.image.decode_png(record_string)
    example = tf.image.rgb_to_grayscale(example, name=None)

    processed_example = resize_img(example)
    processed_example = reshape_img(processed_example)
    return processed_example, key


  def resize_img(imgg):
    return tf.image.resize_images(imgg,[102,525])

  def reshape_img(imgg):
    return tf.reshape(imgg,shape=[102,525,1])


  def input_pipeline( bsize=30, num_epochs=None):
    filename_queue = tf.train.string_input_producer(
        tf.train.match_filenames_once("./png_imgs/*.png"), num_epochs=num_epochs, shuffle=True)
    example, label = read_my_file_format(filename_queue)

    min_after_dequeue = bsize
    capacity = min_after_dequeue + 3 * 8

    example_batch, label_batch = tf.train.shuffle_batch(
        [example, label], batch_size=bsize, capacity=capacity,
        min_after_dequeue=min_after_dequeue)
    return  example_batch, label_batch

  imb_batch1,label_batch1 = input_pipeline()

  single_img, single_lbl = input_pipeline(bsize=1)

############################# Read Data ###########################################
###################################################################################



# Parameters
#learning_rate = 0.0001
training_iters = 200000
batch_size = 30

# Network Parameters
n_input = 600*300*3 
n_classes = 1 # MNIST total classes (0-9 digits)
dropout = 0.75 # Dropout, probability to keep units

# tf Graph input
x = tf.placeholder(tf.float32, [None, 102,525,1])
y = tf.placeholder(tf.float32, [None, 1])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
learning_rate = tf.placeholder(tf.float32) 


# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)


def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                          padding='SAME')


# Create model
def conv_net(x, dropout):

  # Convolution Layer
    with tf.variable_scope('conv1') as scope:
      w = tf.get_variable('weights',[5,5,1,32], initializer=tf.contrib.layers.xavier_initializer())
      b = tf.get_variable('biases',[32],initializer=tf.random_normal_initializer())
      conv1 = conv2d(x,w,b)
      tf.summary.histogram('weights',w)
      tf.summary.histogram('biases',b)

    with tf.variable_scope('conv2') as scope:
      w = tf.get_variable('weights',[5,5,32,32], initializer=tf.contrib.layers.xavier_initializer())
      b = tf.get_variable('biases',[32],initializer=tf.random_normal_initializer())
      conv2 = conv2d(conv1,w,b)
      tf.summary.histogram('weights',w)
      tf.summary.histogram('biases',b)

    with tf.name_scope("Maxpool"):
      conv2 = maxpool2d(conv2,k=2)

    with tf.variable_scope('FC1') as scope:
      w = tf.get_variable('weights',[32*263*51,64], initializer=tf.contrib.layers.xavier_initializer())
      b = tf.get_variable('biases',[64],initializer=tf.random_normal_initializer())
      FC1 = tf.reshape(conv2,[-1,w.get_shape().as_list()[0]])
      FC1 = tf.add(tf.matmul(FC1,w),b)
      tf.summary.histogram('weights',w)
      tf.summary.histogram('biases',b)


    with tf.variable_scope('FC2') as scope:
      w = tf.get_variable('weights',[64,1], initializer=tf.contrib.layers.xavier_initializer())
      b = tf.get_variable('biases',[1],initializer=tf.random_normal_initializer())
      FC2 = tf.add(tf.matmul(FC1,w),b)
      tf.summary.histogram('weights',w)
      tf.summary.histogram('biases',b)

    return FC2


# Construct model
pred = conv_net(x, keep_prob)

def cost():
  with tf.name_scope("Cost"):

    diff = tf.abs(tf.subtract(y,pred))
    cost=tf.reduce_mean(diff)
    print(cost)
  tf.summary.histogram('Label',y)
  tf.summary.histogram('predicted',pred)
  tf.summary.scalar('cost',cost)
  return cost

with tf.name_scope("Optimizer"):
  optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost())
 # optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost())


# Initializing the variables
saver = tf.train.Saver()
init = tf.global_variables_initializer()
merged = tf.summary.merge_all()


# Launch the graph
with tf.Session() as sess:

    sess.run(init)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    writer = tf.summary.FileWriter("/tmp/tensorboard/log01")
    writer.add_graph(sess.graph)
    step = 1
    l_rate= 0.1

    # Keep training until reach max iterations
    while step * batch_size < training_iters:
        print("step: ",step)
        batch_x, batch_y = sess.run([imb_batch1,label_batch1])

        batch_y = batch_y.reshape(-1,1) 
        if step % 100 == 0 :
          l_rate = l_rate/5

        if l_rate < 0.000001 :
          l_rate= 0.000001

        if step > 20:
          _,sumry = sess.run([optimizer,merged], feed_dict={x: batch_x, y: batch_y,
                                         keep_prob: dropout, learning_rate: l_rate})
          writer.add_summary(sumry,step)
        else :
          sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
                                         keep_prob: dropout, learning_rate: l_rate})


        step += 1

    print("Training Done!")



    coord.request_stop()
    coord.join(threads)

代码中某处是否存在愚蠢的错误?

1 个答案:

答案 0 :(得分:0)

在第一个完全连接的图层中没有非线性,因此它不会增加任何与仅有一个完全连接的图层相关的值。