这只是一个简单的转发网。我之前只使用过tf.Variable()
,然后我发现需要一个共享变量来查看测试结果。所以我使用tf.get_variable()
代替tf.Variable()
,但我发现在培训过程中无法减少损失。我想我的代码可能有问题,但我无法修复它。似乎参数在每个时代初始化,如何在每个时代修复它?
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import tensorflow.python.platform
import tensorflow as tf
import global_define as gd
# The MNIST dataset has 10 classes, representing the digits 0 through 9.
# NUM_CLASSES = 16
# # The MNIST images are always 28x28 pixels.
# IMAGE_SIZE = 28
# IMAGE_PIXELS = 784
def weight_variable(name,shape):
return tf.get_variable(name=name,shape=shape,initializer=tf.random_normal_initializer())
def bias_variable(name,shape):
return tf.get_variable(name=name,shape=shape,initializer=tf.constant_initializer(0.1))
def conv2d_same(x,W):
return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')
def conv2d_valid(x,W):
return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='VALID')
def max_pool_2x2(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
def max_pool_3x3(x):
return tf.nn.max_pool(x,ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME')
def inference(image_input,images_input_test):
with tf.variable_scope("what_the_fuck") as scope:
images=tf.reshape(image_input,[-1,32,32,1])
W_conv1=weight_variable("weight1",[5,5,1,9])
b_conv1=bias_variable("bias1",[9])
h_conv1=tf.nn.relu(conv2d_valid(images,W_conv1)+b_conv1)
h_pool1=max_pool_3x3(h_conv1)
norm1=tf.nn.lrn(h_pool1,4,bias=1.0,alpha=0.001/9.0,beta=0.75,name='norm1')
W_conv2=weight_variable("weight2",[5,5,9,16])
b_conv2=bias_variable("bias2",[16])
h_conv2=tf.nn.relu(conv2d_same(norm1,W_conv2)+b_conv2)
h_pool2=max_pool_3x3(h_conv2)
norm2=tf.nn.lrn(h_pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm2')
W_fc1=weight_variable("weight3",[7*7*16,1024])
b_fc1=bias_variable("bias3",[1024])
h_pool2_flat=tf.reshape(norm2,[-1,7*7*16])
h_fc1=tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1)+b_fc1)
#keep_prob=tf.placeholder("float")
h_fc1_drop=tf.nn.dropout(h_fc1,0.8)
W_fc2=weight_variable("weight4",[1024,384])
b_fc2=bias_variable("bias4",[384])
h_fc2=tf.nn.relu(tf.matmul(h_fc1_drop,W_fc2)+b_fc2)
h_fc2_drop=tf.nn.dropout(h_fc2,0.8)
W_fc3=weight_variable("weight5",[384,gd.NUM_CLASSES])
b_fc3=bias_variable("bias5",[gd.NUM_CLASSES])
h_fc3=tf.maximum(tf.nn.softmax(tf.matmul(h_fc2_drop,W_fc3)+b_fc3),1e-30)
return h_fc3
def loss(y_conv,labels):
batch_size=tf.size(labels)
labels=tf.expand_dims(labels,1)
indices=tf.expand_dims(tf.range(0,batch_size),1)
concated=tf.concat(1,[indices,labels])
onehot_labels=tf.sparse_to_dense(concated,tf.pack([batch_size,10]),1.0,0.0)
cross_entropy=tf.nn.softmax_cross_entropy_with_logits(y_conv,onehot_labels,name='xentropy')
loss=tf.reduce_mean(cross_entropy,name='xentropy_mean')
#tf.summary.scalar('xentropy_mean',loss)
return loss
def training(loss,learning_rate):
tf.scalar_summary(loss.op.name,loss)
optimizer=tf.train.GradientDescentOptimizer(learning_rate)
global_step=tf.Variable(0,name='global_step',trainable=False)
train_op=optimizer.minimize(loss,global_step=global_step)
return train_op
def evaluation(y_conv,labels):
correct = tf.nn.in_top_k(y_conv, labels, 1)
return tf.reduce_sum(tf.cast(correct, tf.int32))
这是代码的另一部分
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os.path
import sys
import time
import numpy as np
import re
import datetime
import utils
import tensorflow as tf
import PIL
from PIL import Image
import global_define as gd
import lenet5
from utils import tile_raster_images
TRAIN_FILE = 'train.tfrecords'
VALIDATION_FILE = 'validation.tfrecords'
TEST_FILE='test.tfrecords'
flags = tf.app.flags
FLAGS = flags.FLAGS
#FLAGS = None
flags.DEFINE_string('tfrecord_dir',
'/home/scw4750/Liuhongkun/tfrecord/kaggle_zooplankton/image2tfrecord/zooplankton_rotate_dataset_10_classes/shuffled_32_32/tfrecord_32_32/', 'Directory to put the training data.')
flags.DEFINE_string('filename', 'train.tfrecords', 'Directory to put the training data.')
flags.DEFINE_integer('batch_size', 100, 'Batch size. '
'Must divide evenly into the dataset sizes.')
flags.DEFINE_integer('num_epochs', None, 'Batch size. '
'Must divide evenly into the dataset sizes.')
flags.DEFINE_integer('learning_rate', 0.01,'balabala')
flags.DEFINE_integer('max_steps', 50000,'balabala')
flags.DEFINE_string('model_dir','Modal/model'+re.sub(r'[^0-7]','',str(datetime.datetime.now()))+'/','balabala')
flags.DEFINE_string('tensorevents_dir','tensorboard_event/event_wth'+re.sub(r'[^0-7]','',str(datetime.datetime.now()))+'/','balabala')
flags.DEFINE_string('log_dir','Log_data/log'+re.sub(r'[^0-7]','',str(datetime.datetime.now()))+'/','balabala')
flags.DEFINE_string('pic_dir','Pic/Pictures_input'+re.sub(r'[^0-7]','',str(datetime.datetime.now()))+'/','balabala')
if not os.path.exists(FLAGS.log_dir):
os.makedirs(FLAGS.log_dir)
if not os.path.exists(FLAGS.tensorevents_dir):
os.makedirs(FLAGS.tensorevents_dir)
if not os.path.exists(FLAGS.model_dir):
os.makedirs(FLAGS.model_dir)
if not os.path.exists(FLAGS.pic_dir):
os.makedirs(FLAGS.pic_dir)
def read_and_decode(filename_queue):
reader=tf.TFRecordReader()
_,serialized_exampe=reader.read(filename_queue)
features=tf.parse_single_example(serialized_exampe,
features={
'image_raw':tf.FixedLenFeature([],tf.string),
'label':tf.FixedLenFeature([],tf.int64)
})
image=tf.decode_raw(features['image_raw'],tf.uint8)
image.set_shape([gd.IMAGE_PIXELS])
image=tf.cast(image,tf.float32)*(1./255)-0.5
label=tf.cast(features['label'],tf.int32)
return image,label
def do_eval(sess,eval_correct,log_name):
true_count=0
for step in xrange(FLAGS.batch_size):
#print(sess.run(eval_correct))
true_count+=sess.run(eval_correct)
precision=float(true_count)/(FLAGS.batch_size*FLAGS.batch_size)
# true_count=sess.run(eval_correct)
# precision=float(true_count)/FLAGS.batch_size
print(' Num examples: %d Num correct: %d Precision @ 1: %0.04f' %
(FLAGS.batch_size*FLAGS.batch_size, true_count, precision))
logfile=open(log_name,'a')
logfile.write(' Num examples: %d Num correct: %d Precision : %0.04f' %
(FLAGS.batch_size, true_count, precision))
logfile.close()
return precision
def inputs(train,batch_size,num_epochs):
if not num_epochs:num_epochs=None
if train=='train':
filename=os.path.join(FLAGS.tfrecord_dir,TRAIN_FILE)
elif train=='validation':
filename=os.path.join(FLAGS.tfrecord_dir,VALIDATION_FILE)
else:
filename=os.path.join(FLAGS.tfrecord_dir,TEST_FILE)
with tf.name_scope('input'):
filename_queue=tf.train.string_input_producer([filename],num_epochs=None)
image,label=read_and_decode(filename_queue)
images, sparse_labels = tf.train.shuffle_batch(
[image, label], batch_size=batch_size,
capacity=1000 + 3 * batch_size,num_threads=1,
min_after_dequeue=1000)
return images, sparse_labels
def run_training(log_name):
with tf.Graph().as_default():
images,labels=inputs(train='train',batch_size=FLAGS.batch_size,
num_epochs=FLAGS.num_epochs)
images_test,labels_test =inputs(train='test', batch_size=FLAGS.batch_size,
num_epochs=FLAGS.num_epochs)
logits=lenet5.inference(images)
scope.reuse_variables()
logits_test=lenet5.inference(images_test)
loss=lenet5.loss(logits,labels)
train_op=lenet5.training(loss,FLAGS.learning_rate)
eval_correct=lenet5.evaluation(logits,labels)
eval_correct_test=lenet5.evaluation(logits_test,labels_test)
init_op=tf.initialize_all_variables()
#tf.scalar_summary('Train_loss',loss)
summary_op=tf.merge_all_summaries()
saver=tf.train.Saver()
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
#config=tf.ConfigProto(log_device_placement=True)
with tf.Session(config=config) as sess:
sess.run(init_op)
summary_writer=tf.train.SummaryWriter(FLAGS.tensorevents_dir,sess.graph)
coord=tf.train.Coordinator()
threads=tf.train.start_queue_runners(sess=sess,coord=coord)
try:
step=0
while not coord.should_stop():
start_time=time.time()
_,loss_value,images_out=sess.run([train_op,loss,images])
duration=time.time()-start_time
if step % 100 == 0:
print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,
duration))
logfile=open(log_name,'a')
logfile.write('Step %d: loss = %.2f (%.3f sec)\n' % (step, loss_value, duration))
logfile.close()
summary_str=sess.run(summary_op)
summary_writer.add_summary(summary_str,step)
if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
checkpoint_file = os.path.join(FLAGS.model_dir, 'model.ckpt')
saver.save(sess, checkpoint_file, global_step=step)
concat_img=Image.fromarray(
tile_raster_images(
X=images_out,
img_shape=(32, 32),
tile_shape=(10, 10)
))
concat_img.save(FLAGS.pic_dir+str(step)+'_train'+'.jpg')
print('Train:')
do_eval(sess,eval_correct,log_name)
print('Test:')
do_eval(sess,eval_correct_test,log_name)
step+=1
except tf.errors.OutOfRangeError:
print('Done training for %d epochs, %d steps.' % (1001, step))
finally:
coord.request_stop()
coord.join(threads)
#sess.close()
if __name__=="__main__":
log_name=str(FLAGS.log_dir)+'lenet_zooplankton_'+'learningrate_'+str(FLAGS.learning_rate)+'_'+re.sub(r'[^0-7]','',str(datetime.datetime.now()))+'.txt'
f=open(log_name,'w')
f.close()
run_training(log_name)
答案 0 :(得分:0)
请改为尝试:
logits=lenet5.inference(images)
tf.get_variable_scope().reuse_variables()
logits_test=lenet5.inference(images_test)
然后删除推理函数中scope.reuse_variables下面的所有内容。