我正在训练张量流中的卷积网络。
我的代码是它的squeezenet代码,我正在斯坦福大学的网站上尝试在200个类的微型imagenet上进行训练。
import tensorflow as tf
import numpy as np
import os
from scipy.misc import imread
NUM_CLASSES = 200
def fire_module(x,inp,sp,e11p,e33p):
with tf.variable_scope("fire"):
with tf.variable_scope("squeeze"):
W = tf.get_variable("weights",shape=[1,1,inp,sp])
b = tf.get_variable("bias",shape=[sp])
s = tf.nn.conv2d(x,W,[1,1,1,1],"VALID")+b
s = tf.nn.relu(s)
with tf.variable_scope("e11"):
W = tf.get_variable("weights",shape=[1,1,sp,e11p])
b = tf.get_variable("bias",shape=[e11p])
e11 = tf.nn.conv2d(s,W,[1,1,1,1],"VALID")+b
e11 = tf.nn.relu(e11)
with tf.variable_scope("e33"):
W = tf.get_variable("weights",shape=[3,3,sp,e33p])
b = tf.get_variable("bias",shape=[e33p])
e33 = tf.nn.conv2d(s,W,[1,1,1,1],"SAME")+b
e33 = tf.nn.relu(e33)
return tf.concat([e11,e33],3)
class SqueezeNet(object):
def extract_features(self, input=None, reuse=True):
if input is None:
input = self.image
x = input
layers = []
with tf.variable_scope('features', reuse=reuse):
with tf.variable_scope('layer0'):
W = tf.get_variable("weights",shape=[3,3,3,64])
b = tf.get_variable("bias",shape=[64])
x = tf.nn.conv2d(x,W,[1,2,2,1],"VALID")
x = tf.nn.bias_add(x,b)
layers.append(x)
with tf.variable_scope('layer1'):
x = tf.nn.relu(x)
layers.append(x)
with tf.variable_scope('layer2'):
x = tf.nn.max_pool(x,[1,3,3,1],strides=[1,2,2,1],padding='VALID')
layers.append(x)
with tf.variable_scope('layer3'):
x = fire_module(x,64,16,64,64)
layers.append(x)
with tf.variable_scope('layer4'):
x = fire_module(x,128,16,64,64)
layers.append(x)
with tf.variable_scope('layer5'):
x = tf.nn.max_pool(x,[1,3,3,1],strides=[1,2,2,1],padding='VALID')
layers.append(x)
with tf.variable_scope('layer6'):
x = fire_module(x,128,32,128,128)
layers.append(x)
with tf.variable_scope('layer7'):
x = fire_module(x,256,32,128,128)
layers.append(x)
with tf.variable_scope('layer8'):
x = tf.nn.max_pool(x,[1,3,3,1],strides=[1,2,2,1],padding='VALID')
layers.append(x)
with tf.variable_scope('layer9'):
x = fire_module(x,256,48,192,192)
layers.append(x)
with tf.variable_scope('layer10'):
x = fire_module(x,384,48,192,192)
layers.append(x)
with tf.variable_scope('layer11'):
x = fire_module(x,384,64,256,256)
layers.append(x)
with tf.variable_scope('layer12'):
x = fire_module(x,512,64,256,256)
layers.append(x)
return layers
def __init__(self, save_path=None, sess=None, restore=True, data=None):
"""Create a SqueezeNet model.
Inputs:
- save_path: path to TensorFlow checkpoint
- sess: TensorFlow session
- input: optional input to the model. If None, will use placeholder for input.
"""
self.images = tf.placeholder('float',shape=[None,None,None,3],name='input_image')
self.labels = tf.placeholder('int32', shape=[None], name='labels')
self.layers = []
x = self.images
self.layers = self.extract_features(x, reuse=False)
self.features = self.layers[-1]
with tf.variable_scope('classifier'):
with tf.variable_scope('layer0'):
x = self.features
self.layers.append(x)
with tf.variable_scope('layer1'):
W = tf.get_variable("weights",shape=[1,1,512,1000])
b = tf.get_variable("bias",shape=[1000])
x = tf.nn.conv2d(x,W,[1,1,1,1],"VALID")
x = tf.nn.bias_add(x,b)
self.layers.append(x)
with tf.variable_scope('layer2'):
x = tf.nn.relu(x)
self.layers.append(x)
with tf.variable_scope('layer3'):
x = tf.nn.avg_pool(x,[1,13,13,1],strides=[1,13,13,1],padding='VALID')
self.layers.append(x)
self.classifier = tf.reshape(x,[-1, NUM_CLASSES])
self.optimizer = tf.train.AdamOptimizer(learning_rate=0.002, beta1=0.85, beta2=0.97)
if restore:
saver = tf.train.Saver()
saver.restore(sess, save_path)
self.predicted_class = self.class_names[np.argmax(self.classifier, axis=1)]
else:
print(self.classifier.shape)
self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=self.classifier))
self.training_op = self.optimizer.minimize(self.loss)
self.train()
def load_images(self, path):
# First load wnids
with open(os.path.join(path, 'wnids.txt'), 'r') as f:
wnids = [x.strip() for x in f]
# Map wnids to integer labels
wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
print("started loading")
# Use words.txt to get names for each class
with open(os.path.join(path, 'words.txt'), 'r') as f:
wnid_to_words = dict(line.split('\t') for line in f)
for wnid, words in wnid_to_words.items():
wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
print("loading done")
self.class_names = {}
for i in range(200):
self.class_names[i] = [wnid_to_words[wnid] for wnid in wnids]
X_train = []
y_train = []
for i, wnid in enumerate(wnids):
if (i + 1) % 2 == 0:
print('loading training data for synset %d / %d'
% (i + 1, len(wnids)))
# To figure out the filenames we need to open the boxes file
boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
with open(boxes_file, 'r') as f:
filenames = [x.split('\t')[0] for x in f]
num_images = len(filenames)
X_train_block = np.zeros((num_images, 3, 64, 64), dtype=np.float32)
y_train_block = wnid_to_label[wnid] * \
np.ones(num_images, dtype=np.int64)
for j, img_file in enumerate(filenames):
img_file = os.path.join(path, 'train', wnid, 'images', img_file)
img = imread(img_file)
if img.ndim == 2:
## grayscale file
img.shape = (64, 64, 1)
#X_train_block[j] = img.transpose(2, 0, 1)
X_train.append(X_train_block)
y_train.append(y_train_block)
# We need to concatenate all training data
X_train = np.concatenate(X_train, axis=0)
y_train = np.concatenate(y_train, axis=0)
X_train = np.transpose(X_train, (0, 2, 3, 1))
#print(X_train.shape, y_train.shape)
return (X_train, y_train)
def train(self):
self.training_images = self.load_images("/home/jatin/codes/projects/objectdetect/object/imagenet/tiny-imagenet-200")
#print(self.training_images[0].shape)
self.num_epochs = 100
saver = tf.train.Saver()
#print("initiated")
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(1):
sess.run([self.training_op],
feed_dict={self.images: self.training_images[0], self.labels: self.training_images[1]})
print("Doing training")
if epoch % 10 == 0:
save_path = saver.save(sess, "./sqnet/squeezenet.cpkt")
snet = SqueezeNet(restore=False)
但是运行此命令会给我错误-
InvalidArgumentError (see above for traceback): logits and labels must have the same first dimension, got logits shape [0,200] and labels shape [500]
[[Node: SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits = SparseSoftmaxCrossEntropyWithLogits[T=DT_FLOAT, Tlabels=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Reshape, _arg_labels_0_1)]]
该错误基本上与该行相对应-
self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=self.classifier))
但是X_train和y_train的第一维是相同的。 如果有人告诉我,那将是很大的帮助。