我正在训练一个模型,该模型可以对62个字母数字字符进行分类,但是损失值在前几批中首先急剧下降,然后跌落并永不下降,我不知道哪里出了问题或如何调试模型
这是训练日志的快照:
以下是转换数据的示例:
我先使用4个conv层,然后再使用1个fc层,并使用adam优化器来最大程度地减少日志丢失。我再次检查了图像标签是否正确。因此我不知道哪里还有问题。
这是代码:
import numpy as np
import tensorflow as tf
import os
from PIL import Image
import shutil
import time
input = temp= tf.placeholder(dtype='float32', shape=(None,32,32,1), name='input')#(None,62)
label = tf.placeholder(dtype='float32',shape=(None,62))#(None,62)
temp = tf.layers.conv2d(inputs=temp,filters=32,kernel_size=(3,3),padding="SAME",activation=tf.nn.relu,kernel_initializer=tf.keras.initializers.he_normal())#(None,32,32,32)
#temp = tf.layers.dropout(inputs=temp,rate=0.5)
temp = tf.layers.max_pooling2d(temp,pool_size=[2, 2], strides=2)#(None,16,16,32)
temp = tf.layers.conv2d(inputs=temp,filters=64,kernel_size=(3,3),padding="SAME",activation=tf.nn.relu,kernel_initializer=tf.keras.initializers.he_normal())#(None,16,16,64)
#temp = tf.layers.dropout(inputs=temp,rate=0.2)
temp = tf.layers.max_pooling2d(temp,pool_size=[2, 2], strides=2)#(None,8,8,64)
temp = tf.layers.conv2d(inputs=temp,filters=128,kernel_size=(3,3),padding="SAME",activation=tf.nn.relu,kernel_initializer=tf.keras.initializers.he_normal())#(None,8,8,128)
temp = tf.layers.dropout(inputs=temp,rate=0.2)
temp = tf.layers.max_pooling2d(temp,pool_size=[2, 2], strides=2)#(None,4,4,128)
temp = tf.layers.conv2d(inputs=temp,filters=256,kernel_size=(3,3),padding="SAME",activation=tf.nn.relu,kernel_initializer=tf.keras.initializers.he_normal())#(None,4,4,256)
temp = tf.layers.dropout(inputs=temp,rate=0.2)
temp = tf.layers.max_pooling2d(temp,pool_size=[2, 2], strides=2)#(None,2,2,256)
temp = tf.layers.conv2d(inputs=temp,filters=62,kernel_size=(2,2),padding="VALID",kernel_initializer=tf.keras.initializers.he_normal())#fc (None,1,1,62)
output = temp = tf.layers.flatten(temp)#(None,62)
output = tf.nn.softmax(output)
#loss
output_clip = tf.clip_by_value(output,1e-7,1-1e-7)
loss = tf.reduce_mean(tf.reduce_sum(-label*tf.log(output_clip)-(1-label)*tf.log(1-output_clip),axis=-1))#scaler
optimizer = tf.train.AdamOptimizer().minimize(loss)
#accuracy
indexoutput = tf.argmax(output,axis=-1)#(None,)
labelindex = tf.argmax(label,axis=-1)#(None,)
equals = tf.equal(indexoutput,labelindex)#(None,)
equals = tf.reduce_sum(tf.cast(equals,dtype='int8'),axis=-1)#scaler
acc = tf.cast(equals,dtype='float32')/tf.cast(tf.shape(output)[0],dtype='float32')#scaler
def train(epochs):
saver = tf.train.Saver()
lossrec=[]
accrec = []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
#saver.restore(sess, os.path.join(os.getcwd(),'model_logloss','captchabreak.ckpt'))
valimg,vallabel = next(validategenerator(os.path.join(os.getcwd(),'tests')))
for i in range(epochs):
for j, (trainimg, trainlabel) in enumerate(traingenerator(os.path.join(os.getcwd(),'pics'),32)):
_, trainacc, trainloss = sess.run([optimizer, acc, loss],feed_dict={input: trainimg, label: trainlabel});
valacc, valloss = sess.run([acc, loss], feed_dict={input: np.array(valimg), label: np.array(vallabel)})
print("epoch:{} batch:{} trainloss:{:.4f} validateloss:{:.4f} trainacc:{:.2f} validateacc:{:.2f}"
.format(i, j, trainloss, valloss, trainacc, valacc))
#keep some logs
lossrec.append(valloss)
accrec.append(valacc)
print(lossrec);
print(accrec)
if len(lossrec) >= 3 and valloss >= lossrec[-2] and valloss >= lossrec[-3]: break;
shutil.rmtree("model_logloss")
saver.save(sess, "model_logloss/captchabreak.ckpt")
def traingenerator(path,batch_size):
fs = os.listdir(path);
fs=np.random.permutation(fs)
if batch_size == 0:
batch_size = len(fs)
offset=0
while offset<len(fs):
yield fetch(fs[offset:offset+batch_size],path)
offset+=batch_size
def validategenerator(path):
fs = os.listdir(path);
yield fetch(fs,path)
def fetch(fs,path):
imgs = []
labels = []
for i, fname in enumerate(fs):
fp = os.path.join(path, fname)
imp = Image.open(fp).resize((32, 32));
imp = imp.convert('L')
imp = imp.point(lambda p: p > 210 and 255)
im = np.array(imp)
im = np.expand_dims(im, axis=-1)
imp.close()
c = fname[0]
lb = np.zeros((62))
if ord(c) >= 48 and ord(c) <= 57:
lb[ord(c) - 48] = 1
if ord(c) >= 65 and ord(c) <= 90:
lb[ord(c) - 65 + 10] = 1
if ord(c) >= 97 and ord(c) <= 122:
lb[ord(c) - 97 + 36] = 1
imgs.append(im)
labels.append(lb)
return np.array(imgs), np.array(labels)
if __name__ == "__main__":
train(30)
答案 0 :(得分:1)
已解决。我忘了将像素值除以255。