我是tensorflow的新手,我试图偏离mnist数据集并尝试一些不同的东西。我正在使用情感数据集CK+,似乎无法修改我的代码以成功运行此数据集。对于那些想要复制我的作品的人,我找到了经处理的图像和标签here。您将在ck + scaled文件夹中找到图像,并在已处理文件夹中找到标签。
我们正在使用大小为[256 x 256]的265张图像。
所以,这是我的代码:
import os
import tensorflow as tf
import sys
import urllib
import numpy as np
from PIL import Image
import glob
train = []
for filename in glob.glob('/Users/madhavthaker/Documents/CSCI63/Final Project/face-emoticon-master/data/ck+_scaled/*.png'): #assuming gif
img=np.asarray(Image.open(filename))
img_flat = img.reshape(img.size)
train.append(img_flat)
### MNIST EMBEDDINGS ###
ckp_labels = [5, 0, 3, 5, 4, 0, 1, 3, 5, 4, 0, 3, 5, 0, 1, 5, 4, 0, 0, 0, 2, 1, 3, 5, 0, 3, 5, 1, 3, 5, 0, 3, 5, 4, 0, 3, 5, 3, 1, 1, 0, 4, 5, 2, 1, 5, 3, 5, 1, 5, 3, 1, 5, 1, 5, 0, 1, 5, 3, 5, 1, 3, 0, 1, 5, 2, 3, 1, 5, 3, 1, 3, 1, 5, 3, 2, 5, 3, 1, 5, 3, 4, 0, 5, 0, 3, 1, 3, 2, 5, 1, 3, 5, 1, 5, 4, 0, 3, 1, 5, 1, 2, 5, 1, 3, 5, 3, 5, 1, 3, 5, 5, 3, 1, 1, 3, 4, 1, 5, 4, 1, 5, 0, 1, 3, 5, 2, 3, 5, 5, 3, 5, 1, 0, 1, 5, 3, 0, 5, 1, 0, 3, 5, 0, 3, 5, 3, 1, 4, 5, 1, 3, 5, 1, 3, 1, 3, 5, 1, 5, 0, 3, 5, 1, 1, 4, 1, 5, 1, 4, 1, 0, 1, 3, 5, 5, 0, 1, 0, 5, 4, 0, 5, 3, 5, 3, 5, 1, 3, 5, 2, 0, 5, 2, 0, 5, 2, 3, 4, 3, 2, 5, 1, 5, 0, 3, 0, 1, 3, 5, 0, 1, 3, 5, 0, 4, 3, 3, 1, 4, 2, 1, 3, 5, 5, 3, 0, 3, 1, 5, 5, 0, 3, 5, 3, 2, 5, 3, 4, 7, 7, 7, 7, 7, 7, 7, 7, 0, 2, 4, 0, 7, 2, 0, 7, 0, 7, 2, 4, 4, 0, 2, 4, 7, 2]
if sys.version_info[0] >= 3:
from urllib.request import urlretrieve
else:
from urllib import urlretrieve
LOGDIR = 'log3/'
GITHUB_URL ='https://raw.githubusercontent.com/mamcgrath/TensorBoard-TF-Dev-Summit-Tutorial/master/'
### MNIST EMBEDDINGS ###
mnist = tf.contrib.learn.datasets.mnist.read_data_sets(train_dir=LOGDIR + 'data', one_hot=True)
### Get a sprite and labels file for the embedding projector ###
urlretrieve(GITHUB_URL + 'labels_1024.tsv', LOGDIR + 'labels_1024.tsv')
urlretrieve(GITHUB_URL + 'sprite_1024.png', LOGDIR + 'sprite_1024.png')
# Add convolution layer
def conv_layer(input, size_in, size_out, name="conv"):
with tf.name_scope(name):
#w = tf.Variable(tf.zeros([5, 5, size_in, size_out]), name="W")
#b = tf.Variable(tf.zeros([size_out]), name="B")
w = tf.Variable(tf.truncated_normal([4, 4, size_in, size_out], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="SAME")
act = tf.nn.relu(conv + b)
tf.summary.histogram("weights", w)
tf.summary.histogram("biases", b)
tf.summary.histogram("activations", act)
return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
# Add fully connected layer
def fc_layer(input, size_in, size_out, name="fc"):
with tf.name_scope(name):
w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
act = tf.nn.relu(tf.matmul(input, w) + b)
tf.summary.histogram("weights", w)
tf.summary.histogram("biases", b)
tf.summary.histogram("activations", act)
return act
def mnist_model(learning_rate, use_two_conv, use_two_fc, hparam):
tf.reset_default_graph()
tf.set_random_seed(1)
sess = tf.Session()
# Setup placeholders, and reshape the data
x = tf.placeholder(tf.float32, shape=[None, 256*256], name="x")
x_image = tf.reshape(x, [-1, 256, 256, 1])
tf.summary.image('input', x_image, 3)
y = tf.placeholder(tf.float32, shape=[None, ], name="labels")
if use_two_conv:
conv1 = conv_layer(x_image, 1, 32, "conv1")
conv_out = conv_layer(conv1, 32, 64, "conv2")
else:
conv1 = conv_layer(x_image, 1, 64, "conv")
conv_out = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
flattened = tf.reshape(conv_out, [-1, 16 * 16 * 16])
if use_two_fc:
fc1 = fc_layer(flattened, 16 * 16 * 16, 40, "fc1")
embedding_input = fc1
embedding_size = 40
logits = fc_layer(fc1, 40, 1, "fc2")
else:
embedding_input = flattened
embedding_size = 7*7*64
logits = fc_layer(flattened, 7*7*64, 10, "fc")
with tf.name_scope("xent"):
xent = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=y), name="xent")
tf.summary.scalar("xent", xent)
with tf.name_scope("train"):
train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(tf.argmax(logits, -1), tf.argmax(y, -1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("accuracy", accuracy)
summ = tf.summary.merge_all()
embedding = tf.Variable(tf.zeros([1024, embedding_size]), name="test_embedding")
assignment = embedding.assign(embedding_input)
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter(LOGDIR + hparam)
writer.add_graph(sess.graph)
config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
embedding_config = config.embeddings.add()
embedding_config.tensor_name = embedding.name
embedding_config.sprite.image_path = LOGDIR + 'sprite_1024.png'
embedding_config.metadata_path = LOGDIR + 'labels_1024.tsv'
# Specify the width and height of a single thumbnail.
embedding_config.sprite.single_image_dim.extend([256, 256])
tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config)
for i in range(300):
if i % 5 == 0:
[train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: train, y: ckp_labels})
writer.add_summary(s, i)
print ("train accuracy:", train_accuracy)
sess.run(train_step, feed_dict={x: train, y: ckp_labels})
def make_hparam_string(learning_rate, use_two_fc, use_two_conv):
conv_param = "conv2" if use_two_conv else "conv1"
fc_param = "fc2" if use_two_fc else "fc1"
return "lr_%.0E%s%s" % (learning_rate, conv_param, fc_param)
def main():
# You can try adding some more learning rates
#for learning_rate in [1E-3, 1E-4, 1E-5]:
for learning_rate in [1E-4]:
# Include "False" as a value to try different model architectures
#for use_two_fc in [True, False]:
for use_two_fc in [True]:
#for use_two_conv in [True, False]:
for use_two_conv in [True]:
# Construct a hyperparameter string for each one (example: "lr_1E-3fc2conv2")
hparam = make_hparam_string(learning_rate, use_two_fc, use_two_conv)
print('Starting run for %s' % hparam)
sys.stdout.flush() # this forces print-ed lines to show up.
# Actually run with the new settings
mnist_model(learning_rate, use_two_fc, use_two_conv, hparam)
if __name__ == '__main__':
main()
以下是我收到的错误:
InvalidArgumentError (see above for traceback): logits and labels must be same size: logits_size=[16960,1] labels_size=[1,265]
[[Node: xent/SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](xent/Reshape, xent/Reshape_1)]]
让我感到困惑的是为什么我的logits形状是[16960,1]。任何帮助将不胜感激。
答案 0 :(得分:1)
首先,y应该是shape(batch_size):(所以(265)在你的情况下;也许它也可以用于(265,1))
y = tf.placeholder(tf.float32, shape=[None], name="labels")
如果y不是单热编码,则必须使用tf.nn.sparse_softmax_cross_entropy_with_logits
代替softmax_cross_entropy_with_logits
。
然后:在你的第二个转换层(和最大池)结束时,图像大小为(256x256)/ 2/2 =(64,64)。使用64深度,每个样本可获得64 * 64 * 64个值。但你做flattened = tf.reshape(conv_out, [-1, 16 * 16 * 16])
,它会给你一个形状张量[265*2^6, 16*16*16]
(265 * 2 ^ 6 = 16960,这就是它的来源)。将其替换为flattened = tf.reshape(conv_out, [-1, 64*64*64])
。
更进一步,logits = fc_layer(fc1, 40, 1, "fc2")
也是一个错误,您应该logits = fc_layer(fc1, 40, num_classes, "fc2")
,而您似乎有num_classes = 8
。
这些更改应该为您提供形状的修改(265,num_classes),这是tf.nn.sparse_softmax_cross_entropy_with_logits
所需的。
对于use_two_fc或use_two_conv为false的情况,您还有其他更改,我会让您弄清楚。你应该在每个步骤中更加小心所有张量的形状,必要时打印它们以检查它们是否真的是你想要的。也许使用更多变量,如num_classes_ batch_size等,以确保事物连贯,并且更具可读性。