Question

我想训练一个基于输入图像预测百分比值（0.0-100.0）的模型。该值的精确十进制并不是非常重要，更重要的是它落在所需值的近似值（+ - 2％）内，尽管它越接近越好。

我的方法是采用现有的卷积图像分类器，并简单地将最后一层从10个类更改为1个。我使用以下模型作为基本模型，并对该模型进行了更改。 https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/convolutional_network.py

虽然由于某种原因我无法让模型学到任何东西。我倾向于得到的预测范围为 - + 1e7，远远低于我想要的0-100。

我想知道的一些事情：

有没有更好的方法来解决这个问题？（虽然仍在使用深度学习）
有没有办法通过指示预期输出必须落在0-100范围内来“帮助”模型？
我犯了一些愚蠢的错误吗？

我注意到训练时我的体重和偏差几乎没有变化，但我无法弄清楚原因。

非常感谢我能得到的任何帮助。几周以来我一直在努力争取这个进展很少。

这是我正在使用的代码：

from __future__ import print_function

import tensorflow as tf

def read_my_file_format(filename_queue):
  reader = tf.WholeFileReader()
  key, record_string = reader.read(filename_queue)

  split_res = tf.string_split([key],'_')
  key2 = split_res.values[4]
  example = tf.image.decode_jpeg(record_string)
  processed_example = resize_img(example)
  processed_example2 = reshape_img(processed_example)
  return processed_example2, key2

def resize_img(imgg):
  return tf.image.resize_images(imgg,[128,256])

def reshape_img(imgg):
  return tf.reshape(imgg,shape=[128,256,3])


def input_pipeline( num_epochs=None):
  filename_queue = tf.train.string_input_producer(
      tf.train.match_filenames_once("./max100/*.jpg"), num_epochs=num_epochs, shuffle=True)
  example, label = read_my_file_format(filename_queue)
  min_after_dequeue = 10
  capacity = min_after_dequeue + 3 * 3

  example_batch, label_batch = tf.train.shuffle_batch(
      [example, label], batch_size=20, capacity=capacity,
      min_after_dequeue=min_after_dequeue)
  return  example_batch, label_batch

imb_batch1,label_batch1 = input_pipeline()


# Network Parameters
n_input = 172 
n_classes = 1 
dropout = 0.75 # Dropout, probability to keep units
learning_rate = 0.005


x = tf.placeholder(tf.float32, [None,128,256,3])
y = tf.placeholder(tf.float32, [None,1])
keep_prob = tf.placeholder(tf.float32)






# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1, channels=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, channels], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)


def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                          padding='SAME')


# Create model
def conv_net(x, weights, biases, dropout):
  with tf.name_scope("Conv_net"):


      # Convolution Layer

      conv1 = conv2d(x, weights['wc1'], biases['bc1'],1,1)

      # Max Pooling (down-sampling)
      conv1 = maxpool2d(conv1, k=2)

      # Convolution Layer
      conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
      # Max Pooling (down-sampling)

      conv2 = maxpool2d(conv2, k=2)


      fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
      fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
      fc1 = tf.nn.relu(fc1)
      # Apply Dropout
      fc1 = tf.nn.dropout(fc1, dropout)

      # Output, class prediction
      out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])

      return out






# Store layers weight & bias
weights = {
    # 5x5 conv, 1 input, 32 outputs
    'wc1': tf.Variable(tf.random_normal([5, 5, 3, 32]),name="w1"),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64]),name="wc2"),
  #  'wc3': tf.Variable(tf.random_normal([5, 5, 64, 64]),name="wc3"),
    # fully connected, 7*7*64 inputs, 1024 outputs
    'wd1': tf.Variable(tf.random_normal([32*64*64, 1024]),name="wd1"),
    # 1024 inputs, 10 outputs (class prediction)
    'out': tf.Variable(tf.random_normal([1024, n_classes]),name="w_out")
}
tf.summary.histogram('weights1',weights['wc1'])
tf.summary.histogram('wc2',weights['wc2'])
#tf.summary.histogram('wc3',weights['wc3'])
tf.summary.histogram('wd1',weights['wd1'])
tf.summary.histogram('wout',weights['out'])


biases = {
    'bc1': tf.Variable(tf.random_normal([32]),name="bc1"),
    'bc2': tf.Variable(tf.random_normal([64]),name="bc2"),
 #   'bc3': tf.Variable(tf.random_normal([64]),name="bc3"),
    'bd1': tf.Variable(tf.random_normal([1024]),name="bd1"),
    'out': tf.Variable(tf.random_normal([n_classes]),name="b_out")
}
tf.summary.histogram('bc1',biases['bc1'])
tf.summary.histogram('bc2',biases['bc2'])
#tf.summary.histogram('bc3',biases['bc3'])
tf.summary.histogram('bd1',biases['bd1'])
tf.summary.histogram('bout',biases['out'])



# Construct model
with tf.name_scope("Predictions"):
  pred = conv_net(x, weights, biases, keep_prob)
  tf.summary.histogram('predic',pred)


def tmp():
  with tf.name_scope("Cost"):
    t_pred = pred
    tf.summary.histogram('predictions',t_pred)
    tf.summary.histogram('labels',y)
    my_loss = tf.nn.l2_loss([t_pred,y])
    tf.summary.scalar('myloss',my_loss)
    softmax = tf.nn.softmax_cross_entropy_with_logits(logits=t_pred, labels=y)
    tf.summary.histogram('softmax',softmax)
    sumloss = tf.contrib.slim.losses.mean_squared_error(t_pred,y)
    tf.summary.scalar('sumloss',sumloss)
    costt = tf.reduce_mean(softmax)

    return sumloss #costt

# Define loss and optimizer
cost = tmp()#tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
tf.summary.scalar('cost',cost)
with tf.name_scope("optimizer"):
  optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)



# Evaluate model
correct_pred =  tf.less_equal( tf.abs(tf.subtract(pred,y)) ,10)  #tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))

accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
tf.summary.scalar('accuracyyy',accuracy)


#init_op = tf.initialize_all_variables()
init_op = tf.global_variables_initializer()
merged = tf.summary.merge_all()


with tf.Session() as sess:
  sess.run(init_op)
  coord = tf.train.Coordinator()
  threads = tf.train.start_queue_runners(coord=coord)

  writer = tf.summary.FileWriter("/tmp/tensorboard/log8")
  writer.add_graph(sess.graph)


  for epoch in range(1,200):
    print("start epoch:",epoch)

    imb_batch,label_batch = sess.run([imb_batch1,label_batch1])

    label_batch = label_batch.reshape(-1,1) 

    if epoch % 3 ==0:
      suma,_ =sess.run([merged,optimizer], feed_dict={x:imb_batch ,
       y:label_batch ,keep_prob: dropout})
      writer.add_summary(suma,epoch) 

    else:
      sess.run(optimizer, feed_dict={x:imb_batch ,
       y:label_batch ,keep_prob: dropout})


  coord.request_stop()
  coord.join(threads)

编辑：

典型的图像看起来如下图所示。然后我想让模型告诉我蓝色条组成的图像（仅计算条形图）的百分比。所以我给出了像这样的输入图像和正确的值（在这种情况下约为40％）。

我稍后会想要扩展模型来告诉我一些事情，比如第一行的蓝色多于第二行等等。我还希望模型能够处理不同数量的“行”。

Sample image

Convnet回归方法

0 个答案: