首先,几点:
这是一个二进制分类问题,我的最后一层是只有一个输出(没有Sigmoid激活)的完全连接的层,我使用tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits())作为损失。
我已经检查了logit,在每一步都打印了logits,它们正在更改,我还列出了所有可训练的参数,以防万一,所有必要的参数都已设置为可训练。只有损失保持不变。
该代码实际上取自其github页面上的stanford cs20si mnist示例(请检查您是否想要全面引入tensorflow,或者甚至跟上新的变化),也就是说,他们的示例非常完美,我即使比较tf图,它们看起来也一样。我的代码和代码之间的唯一区别是数据集api(通过从目录中读取图像文件获取输入),类数和丢失。
即是,这是代码:
import os
import numpy as np
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import time
import tensorflow as tf
from data_pipeline2 import ImageDataGenerator
from model.ResNetBest import network as forward_pass
from model.ResNetBest import loss as lossfn
import utils
EPOCHS = 20
class ConvNet(object):
def __init__(self):
self.lr = 0.001
self.batch_size = 2
self.keep_prob = tf.constant(0.75)
self.gstep = tf.Variable(0, dtype=tf.int32,
trainable=False, name='global_step')
self.n_classes = 1
self.skip_step = 20
# self.n_test = 10000
self.training = True
def get_data(self):
with tf.name_scope('data'):
train_data = ImageDataGenerator(directory="C:\\Users\\A123\\Desktop\\TRAIN",
horizontal_flip=True, vertical_flip=True, rescale=True, normalize=True,
color_jitter=True, epochs=1, batch_size=self.batch_size, num_cpus=20).dataset_pipeline()
test_data = ImageDataGenerator(directory="C:\\Users\\A123\\Desktop\\TRAIN",
horizontal_flip=False, vertical_flip=False, rescale=False, normalize=False,
color_jitter=True, epochs=1, batch_size=self.batch_size, num_cpus=20).dataset_pipeline()
iterator = tf.data.Iterator.from_structure(train_data.output_types,
train_data.output_shapes)
self.img, self.label = iterator.get_next()
self.img = tf.reshape(self.img, [-1, 389, 389, 3])
self.train_init = iterator.make_initializer(train_data) # initializer for train_data
self.test_init = iterator.make_initializer(test_data) # initializer for train_data
def inference(self):
self.logits = forward_pass.network(self.img)
def loss(self):
'''
define loss function
use softmax cross entropy with logits as the loss function
compute mean cross entropy, softmax is applied internally
'''
#
with tf.name_scope('loss'):
# entropy = tf.nn.softmax_cross_entropy_with_logits(labels=self.label, logits=self.logits)
# self.loss = tf.reduce_mean(entropy, name='loss')
self.loss = lossfn.lossfn(self.logits, None, self.label)
def optimize(self):
'''
Define training op
using Adam Gradient Descent to minimize cost
'''
self.opt = tf.train.AdamOptimizer(self.lr).minimize(self.loss,
global_step=self.gstep)
def summary(self):
'''
Create summaries to write on TensorBoard
'''
with tf.name_scope('summaries'):
tf.summary.scalar('loss', self.loss)
tf.summary.scalar('accuracy', self.accuracy)
tf.summary.histogram('histogram loss', self.loss)
with tf.name_scope('debug'):
tf.summary.scalar('logits', tf.reduce_mean(self.logits))
self.summary_op = tf.summary.merge_all()
def eval(self):
'''
Count the number of right predictions in a batch
'''
with tf.name_scope('predict'):
# preds = tf.nn.sigmoid(self.logits)
preds = self.logits
# correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(self.label, 1))
correct_preds = tf.equal(tf.cast(tf.round(preds), dtype=tf.int32), self.label)
self.accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
def build(self):
'''
Build the computation graph
'''
self.get_data()
self.inference()
self.loss()
self.optimize()
self.eval()
self.summary()
def train_one_epoch(self, sess, saver, init, writer, epoch, step):
start_time = time.time()
sess.run(init)
self.training = True
total_loss = 0
n_batches = 0
try:
while True:
_, l, summaries, logits = sess.run([self.opt, self.loss, self.summary_op, self.logits])
writer.add_summary(summaries, global_step=step)
if (step + 1) % self.skip_step == 0:
print('Loss at step {0}: {1} and logits: {2}'.format(step, l, np.sum(logits)))
print(logits)
step += 1
total_loss += l
n_batches += 1
except tf.errors.OutOfRangeError:
pass
saver.save(sess, 'checkpoints/ResNet18/ResNet', step)
print('Average loss at epoch {0}: {1}'.format(epoch, total_loss / n_batches))
print('Took: {0} seconds'.format(time.time() - start_time))
############################## debug code ###########################################
variables_names = [v.name for v in tf.trainable_variables()]
values = sess.run(variables_names)
for k, v in zip(variables_names, values):
print("Variable: ", k)
# print("Shape: ", v.shape)
# print(v)
return step
def eval_once(self, sess, init, writer, epoch, step):
start_time = time.time()
sess.run(init)
self.training = False
total_correct_preds = 0
n_step = 0
try:
while True:
accuracy_batch, summaries = sess.run([self.accuracy, self.summary_op])
writer.add_summary(summaries, global_step=step)
total_correct_preds += accuracy_batch
n_step += 1
except tf.errors.OutOfRangeError:
pass
print('Accuracy at epoch {0}: {1} '.format(epoch, total_correct_preds / n_step))
print('Took: {0} seconds'.format(time.time() - start_time))
def train(self, n_epochs):
'''
The train function alternates between training one epoch and evaluating
'''
utils.safe_mkdir('checkpoints')
utils.safe_mkdir('checkpoints/ResNet18')
writer = tf.summary.FileWriter('./graphs/ResNet18', tf.get_default_graph())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/ResNet18/checkpoint'))
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
step = self.gstep.eval()
for epoch in range(n_epochs):
step = self.train_one_epoch(sess, saver, self.train_init, writer, epoch, step)
self.eval_once(sess, self.test_init, writer, epoch, step)
writer.close()
if __name__ == '__main__':
model = ConvNet()
model.build()
model.train(n_epochs=EPOCHS)
这是我的network.py文件(它不是resnet,我之前曾尝试过resnet,但是发生了同样的事情,所以我创建了一个简单的网络):
import tensorflow as tf
def conv_relu(inputs, filters, k_size, stride, padding, scope_name):
'''
A method that does convolution + relu on inputs
'''
with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE) as scope:
in_channels = inputs.shape[-1]
kernel = tf.get_variable('kernel',
[k_size, k_size, in_channels, filters],
initializer=tf.truncated_normal_initializer())
biases = tf.get_variable('biases',
[filters],
initializer=tf.random_normal_initializer())
conv = tf.nn.conv2d(inputs, kernel, strides=[1, stride, stride, 1], padding=padding)
return tf.nn.relu(conv + biases, name=scope.name)
def maxpool(inputs, ksize, stride, padding='VALID', scope_name='pool'):
'''A method that does max pooling on inputs'''
with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE) as scope:
pool = tf.nn.max_pool(inputs,
ksize=[1, ksize, ksize, 1],
strides=[1, stride, stride, 1],
padding=padding)
return pool
def fully_connected(inputs, out_dim, scope_name='fc'):
'''
A fully connected linear layer on inputs
'''
with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE) as scope:
in_dim = inputs.shape[-1]
w = tf.get_variable('weights', [in_dim, out_dim],
initializer=tf.truncated_normal_initializer())
b = tf.get_variable('biases', [out_dim],
initializer=tf.constant_initializer(0.0))
out = tf.matmul(inputs, w) + b
return out
def network(data, labels_one_hot=None):
# jsonFile = './model/ResNetBest/resnetV4_2Best.json'
# with open(jsonFile, "r") as file:
# jsonDef = file.read()
# model = tf.keras.models.model_from_json(jsonDef)
conv1 = conv_relu(inputs=data,
filters=32,
k_size=5,
stride=1,
padding='SAME',
scope_name='conv1')
pool1 = maxpool(conv1, 2, 2, 'VALID', 'pool1')
conv2 = conv_relu(inputs=pool1,
filters=64,
k_size=5,
stride=1,
padding='SAME',
scope_name='conv2')
pool2 = maxpool(conv2, 2, 2, 'VALID', 'pool2')
feature_dim = pool2.shape[1] * pool2.shape[2] * pool2.shape[3]
pool2 = tf.reshape(pool2, [-1, feature_dim])
fc = fully_connected(pool2, 10, 'fc')
dropout = tf.nn.dropout(tf.nn.relu(fc), 0.75, name='relu_dropout')
logits = fully_connected(dropout, 1, 'logits')
# return model(data)
return logits
这是具有丢失功能的文件:
import tensorflow as tf
def binary_crossentropy(y, y_):
cross_entropy = tf.reduce_sum(- y * tf.log(y_) - (1 - y) * tf.log(1 - y_), 1)
loss = tf.reduce_mean(cross_entropy)
return loss
def lossfn(net_out, data, labels):
with tf.name_scope('cross_entropy'):
# return tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=net_out)
# return tf.keras.losses.binary_crossentropy(tf.cast(labels, tf.float32), tf.squeeze(net_out))
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.cast(labels, tf.float32), logits=tf.squeeze(net_out))
# return binary_crossentropy(tf.squeeze(tf.cast(labels, tf.float32)), net_out)
# return tf.losses.sigmoid_cross_entropy(tf.squeeze(labels), net_out)
return tf.reduce_mean(cross_entropy, name='loss')
请帮助,这根本没有道理。