我想训练一个基于输入图像预测百分比值(0.0-100.0)的模型。该值的精确十进制并不是非常重要,更重要的是它落在所需值的近似值(+ - 2%)内,尽管它越接近越好。
我的方法是采用现有的卷积图像分类器,并简单地将最后一层从10个类更改为1个。我使用以下模型作为基本模型,并对该模型进行了更改。 https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/convolutional_network.py
虽然由于某种原因我无法让模型学到任何东西。我倾向于得到的预测范围为 - + 1e7,远远低于我想要的0-100。
我想知道的一些事情:
有没有更好的方法来解决这个问题? (虽然仍在使用深度学习)
有没有办法通过指示预期输出必须落在0-100范围内来“帮助”模型?
我犯了一些愚蠢的错误吗?
我注意到训练时我的体重和偏差几乎没有变化,但我无法弄清楚原因。
非常感谢我能得到的任何帮助。几周以来我一直在努力争取这个进展很少。
这是我正在使用的代码:
from __future__ import print_function
import tensorflow as tf
def read_my_file_format(filename_queue):
reader = tf.WholeFileReader()
key, record_string = reader.read(filename_queue)
split_res = tf.string_split([key],'_')
key2 = split_res.values[4]
example = tf.image.decode_jpeg(record_string)
processed_example = resize_img(example)
processed_example2 = reshape_img(processed_example)
return processed_example2, key2
def resize_img(imgg):
return tf.image.resize_images(imgg,[128,256])
def reshape_img(imgg):
return tf.reshape(imgg,shape=[128,256,3])
def input_pipeline( num_epochs=None):
filename_queue = tf.train.string_input_producer(
tf.train.match_filenames_once("./max100/*.jpg"), num_epochs=num_epochs, shuffle=True)
example, label = read_my_file_format(filename_queue)
min_after_dequeue = 10
capacity = min_after_dequeue + 3 * 3
example_batch, label_batch = tf.train.shuffle_batch(
[example, label], batch_size=20, capacity=capacity,
min_after_dequeue=min_after_dequeue)
return example_batch, label_batch
imb_batch1,label_batch1 = input_pipeline()
# Network Parameters
n_input = 172
n_classes = 1
dropout = 0.75 # Dropout, probability to keep units
learning_rate = 0.005
x = tf.placeholder(tf.float32, [None,128,256,3])
y = tf.placeholder(tf.float32, [None,1])
keep_prob = tf.placeholder(tf.float32)
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1, channels=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, channels], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
# Create model
def conv_net(x, weights, biases, dropout):
with tf.name_scope("Conv_net"):
# Convolution Layer
conv1 = conv2d(x, weights['wc1'], biases['bc1'],1,1)
# Max Pooling (down-sampling)
conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
conv2 = maxpool2d(conv2, k=2)
fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout)
# Output, class prediction
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1': tf.Variable(tf.random_normal([5, 5, 3, 32]),name="w1"),
# 5x5 conv, 32 inputs, 64 outputs
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64]),name="wc2"),
# 'wc3': tf.Variable(tf.random_normal([5, 5, 64, 64]),name="wc3"),
# fully connected, 7*7*64 inputs, 1024 outputs
'wd1': tf.Variable(tf.random_normal([32*64*64, 1024]),name="wd1"),
# 1024 inputs, 10 outputs (class prediction)
'out': tf.Variable(tf.random_normal([1024, n_classes]),name="w_out")
}
tf.summary.histogram('weights1',weights['wc1'])
tf.summary.histogram('wc2',weights['wc2'])
#tf.summary.histogram('wc3',weights['wc3'])
tf.summary.histogram('wd1',weights['wd1'])
tf.summary.histogram('wout',weights['out'])
biases = {
'bc1': tf.Variable(tf.random_normal([32]),name="bc1"),
'bc2': tf.Variable(tf.random_normal([64]),name="bc2"),
# 'bc3': tf.Variable(tf.random_normal([64]),name="bc3"),
'bd1': tf.Variable(tf.random_normal([1024]),name="bd1"),
'out': tf.Variable(tf.random_normal([n_classes]),name="b_out")
}
tf.summary.histogram('bc1',biases['bc1'])
tf.summary.histogram('bc2',biases['bc2'])
#tf.summary.histogram('bc3',biases['bc3'])
tf.summary.histogram('bd1',biases['bd1'])
tf.summary.histogram('bout',biases['out'])
# Construct model
with tf.name_scope("Predictions"):
pred = conv_net(x, weights, biases, keep_prob)
tf.summary.histogram('predic',pred)
def tmp():
with tf.name_scope("Cost"):
t_pred = pred
tf.summary.histogram('predictions',t_pred)
tf.summary.histogram('labels',y)
my_loss = tf.nn.l2_loss([t_pred,y])
tf.summary.scalar('myloss',my_loss)
softmax = tf.nn.softmax_cross_entropy_with_logits(logits=t_pred, labels=y)
tf.summary.histogram('softmax',softmax)
sumloss = tf.contrib.slim.losses.mean_squared_error(t_pred,y)
tf.summary.scalar('sumloss',sumloss)
costt = tf.reduce_mean(softmax)
return sumloss #costt
# Define loss and optimizer
cost = tmp()#tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
tf.summary.scalar('cost',cost)
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.less_equal( tf.abs(tf.subtract(pred,y)) ,10) #tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
tf.summary.scalar('accuracyyy',accuracy)
#init_op = tf.initialize_all_variables()
init_op = tf.global_variables_initializer()
merged = tf.summary.merge_all()
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
writer = tf.summary.FileWriter("/tmp/tensorboard/log8")
writer.add_graph(sess.graph)
for epoch in range(1,200):
print("start epoch:",epoch)
imb_batch,label_batch = sess.run([imb_batch1,label_batch1])
label_batch = label_batch.reshape(-1,1)
if epoch % 3 ==0:
suma,_ =sess.run([merged,optimizer], feed_dict={x:imb_batch ,
y:label_batch ,keep_prob: dropout})
writer.add_summary(suma,epoch)
else:
sess.run(optimizer, feed_dict={x:imb_batch ,
y:label_batch ,keep_prob: dropout})
coord.request_stop()
coord.join(threads)
编辑:
典型的图像看起来如下图所示。然后我想让模型告诉我蓝色条组成的图像(仅计算条形图)的百分比。所以我给出了像这样的输入图像和正确的值(在这种情况下约为40%)。
我稍后会想要扩展模型来告诉我一些事情,比如第一行的蓝色多于第二行等等。我还希望模型能够处理不同数量的“行”。