我正在尝试使用Theano实现CNN,并尝试使用我的更大数据集的一小部分样本集来测试我的代码。我正在尝试将一组8280张图片(250 * 250大小)分类为115个类,我的样本集是前两个类的32张图片集(每张图片16张)。我遇到的问题是,从第一个时代开始,NaN中的训练损失和它在更进一步的时期不会改变。
from __future__ import print_function
import sys
import os
import time
import numpy as np
import theano
import theano.tensor as T
import lasagne
import re
import cv2
from lasagne.layers import Conv2DLayer, MaxPool2DLayer , DropoutLayer
from lasagne.layers import InputLayer, DenseLayer, batch_norm
def split_list(a_list):
half = len(a_list)/2
return a_list[:half], a_list[half:]
def load_dataset(path=''):
cat_list = []
filelist = sorted(os.listdir(path))
trainlist = []
testlist = []
tmptrain = []
tmptest = []
max_id = 0
for f in filelist:
match = re.match(r'C(\d+)([F|G])(\d+)\.PNG', f)
id = int(match.group(1)) - 1
max_id = max(max_id,id)
fg_class = match.group(2)
fg_id = int(match.group(3))
if id not in [p[0] for p in cat_list]:
cat_list.append([id, [], []])
if fg_class == 'G':
cat_list[-1][1].append(f)
else:
cat_list[-1][2].append(f)
for f in cat_list:
id = f[0]
trainG, testG = split_list(f[1])
trainF, testF = split_list(f[2])
tmptrain = tmptrain + [(id, 1, F) for F in trainF] + [(id, 0, G) for G in trainG] # (Class_id,Forgery,Img)
tmptest = tmptest + [(id, 1, F) for F in testF] + [(id, 0, F) for F in testG]
X_train = np.array([cv2.imread(path+f[2],0) for f in tmptrain]).astype(np.int32)
y_train = np.array([f[0] for f in tmptrain]).astype(np.int32)
X_test = np.array([cv2.imread(path+f[2],0) for f in tmptest]).astype(np.int32)
y_test = np.array([f[0] for f in tmptest]).astype(np.int32)
fg_train = np.array([f[1] for f in tmptrain]).astype(np.int32)
fg_test = np.array([f[1] for f in tmptest]).astype(np.int32)
X_train = np.expand_dims(X_train,axis=1).astype(np.int32)
X_test = np.expand_dims(X_test, axis=1).astype(np.int32)
return X_train, y_train, X_test, y_test, fg_train , fg_test
def ExplicitNegativeCorrelation(net,layer='fc2',lr=0.00001):
for param in lasagne.layers.get_all_params(net[layer]):
if param.name.startswith('W'):
W = param
mean = T.mean(W,0) * lr
W = W - mean#T.mean(T.mean(W,0))
def ImplicitNegativeCorrelation(MSE,Cross,Hinge):
mean = T.mean((MSE+Cross+Hinge),axis=0)
return ((MSE-mean)**2+(Cross-mean)**2+(Hinge-mean)**2)/3
def build_cnn(inputvar,input_shape, trained_weights=None):
net = {}
net['input'] = InputLayer(input_shape,input_var=inputvar)
net['drop_input'] = DropoutLayer(net['input'],p=0.2)
net['conv1'] = batch_norm(Conv2DLayer(net['input'], num_filters=96, filter_size=11, stride=4, flip_filters=False))#,W=lasagne.init.HeNormal()))
net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=3, stride=2)
net['conv2'] = batch_norm(Conv2DLayer(net['pool1'], num_filters=256, filter_size=5, pad=2, flip_filters=False))#, W=lasagne.init.HeNormal()))
net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=3, stride=2)
net['conv3'] = batch_norm(Conv2DLayer(net['pool2'], num_filters=384, filter_size=3, pad=1, flip_filters=False))#, W=lasagne.init.HeNormal()))
net['conv4'] = batch_norm(Conv2DLayer(net['conv3'], num_filters=384, filter_size=3, pad=1, flip_filters=False))#, W=lasagne.init.HeNormal()))
net['conv5'] = batch_norm(Conv2DLayer(net['conv4'], num_filters=256, filter_size=3, pad=1, flip_filters=False))#, W=lasagne.init.HeNormal()))
net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=3, stride=2)
net['fc1'] = batch_norm(DenseLayer(net['pool5'], num_units=2048))
net['drop_fc1'] = DropoutLayer(net['fc1'])
net['fc2'] = batch_norm(DenseLayer(net['drop_fc1'], num_units=2048))
net['fc_class'] = batch_norm(DenseLayer(net['fc2'],num_units=115))
return net
def iterate_minibatches(inputs, targets_class,targets_verif, batchsize, shuffle=False):
assert len(inputs) == len(targets_class)
assert len(inputs) == len(targets_verif)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets_class[excerpt], targets_verif[excerpt]
def main(num_epochs=500):
print("Loading data...")
X_train, y_train, X_test, y_test, fg_train, fg_test = load_dataset('./signatures/tmp4/')
X_val, y_val, fg_val = X_train, y_train, fg_train
print(y_train.shape)
input_var = T.tensor4('inputs')
target_var_class = T.ivector('targets')
network = build_cnn(input_var, (None, 1, 250, 250))
class_prediction = lasagne.layers.get_output(network['fc_class']) # ,inputs={network['input']:input_var})
loss_class = lasagne.objectives.categorical_crossentropy(class_prediction, target_var_class)
loss = loss_class.mean()
params = lasagne.layers.get_all_params([network['fc_class']], trainable=True)
lr = 0.01
updates = lasagne.updates.nesterov_momentum(
loss, params, learning_rate=lr, momentum=0.9)
test_prediction_class = lasagne.layers.get_output(network['fc_class'], deterministic=True)
test_loss_class = lasagne.objectives.categorical_crossentropy(test_prediction_class,
target_var_class)
test_loss_class = test_loss_class.mean()
test_acc_class = T.mean(T.eq(T.argmax(test_prediction_class, axis=1), target_var_class),
dtype=theano.config.floatX)
predict_class = theano.function([input_var], T.argmax(test_prediction_class,axis=1))
train_fn = theano.function([input_var, target_var_class], loss, updates=updates)
val_fn_class = theano.function([input_var, target_var_class], [test_loss_class, test_acc_class])
print("Starting training...")
BatchSize = 2
for epoch in range(num_epochs):
train_err = 0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(X_train, y_train,fg_train, BatchSize, shuffle=True):
inputs, targets_class, targets_verif = batch
train_err += train_fn(inputs, targets_class)
#ExplicitNegativeCorrelation(network, layer='fc2',lr=lr/10)
print(targets_class,predict_class(inputs))
train_batches += 1
val_err_class = 0
val_acc_class = 0
val_batches = 0
for batch in iterate_minibatches(X_val, y_val, fg_val, BatchSize, shuffle=False):
inputs, targets_class, targets_verif = batch
err_class, acc_class = val_fn_class(inputs, targets_class)
val_err_class += err_class
val_acc_class += acc_class
val_batches += 1
print("Epoch {} of {} took {:.3f}s".format(
epoch + 1, num_epochs, time.time() - start_time))
print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
print(" Classification loss:\t\t{:.6f}".format(val_err_class / val_batches))
print(" Classification accuracy:\t\t{:.2f} %".format(
val_acc_class / val_batches * 100))
test_err_class = 0
test_acc_class = 0
test_err_verif = 0
test_acc_verif = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, fg_test, BatchSize, shuffle=False):
inputs, targets_class, targets_verif = batch
err_class, acc_class = val_fn_class(inputs, targets_class)
test_err_class += err_class
test_acc_class += acc_class
test_batches += 1
print("Final results:")
print(" test loss (Classification):\t\t\t{:.6f}".format(test_err_class / test_batches))
print(" test accuracy (Classification):\t\t{:.2f} %".format(
test_acc_class / test_batches * 100))
if __name__ == '__main__':
main()
我试图将lasagne.nonlinearities.softmax
放入DenseLayers,但确实解决了NaN问题,但训练模型的准确性不会有任何好处,它会在0到25%之间波动。历元!)。
我已经实现了load_dataset
函数,我认为它正常工作(我已多次测试该函数),并且我将每个图片的类ID作为损失函数中的目标。所以我的输入和目标将是这样的:
Input Shape: (BatchSize, 1, 250, 250)
Target Shape: (BatchSize, 1) : vector of class ids
我已在this link上传了我的样本集。
答案 0 :(得分:0)
根据数据看起来我们有4个类,所以我改变了加载代码来反映它:
y_train = np.array([f[0] * 2 + f[1] for f in tmptrain]).astype(np.int32)
y_test = np.array([f[0] * 2 + f[1] for f in tmptest]).astype(np.int32)
输出层中的单位数应该等于类的数量,所以我添加了一个带SoftMax的输出层:
net['fo_class'] = DenseLayer(net['fc_class'],num_units=4,
nonlinearity=lasagne.nonlinearities.softmax)
我建议在输入后立即删除dropout图层 - 你可以用它来比较结果,没有它来确保那个
批量大小= 2太小而且学习率太高
以下是包含这些更改的代码示例:
from __future__ import print_function
import sys
import os
import time
import numpy as np
import theano
import theano.tensor as T
import lasagne
import re
import cv2
from lasagne.layers import Conv2DLayer, MaxPool2DLayer , DropoutLayer
from lasagne.layers import InputLayer, DenseLayer
def split_list(a_list):
half = len(a_list)/2
return a_list[:half], a_list[half:]
def load_dataset(path=''):
cat_list = []
filelist = sorted(os.listdir(path))
tmptrain = []
tmptest = []
max_id = 0
for f in filelist:
match = re.match(r'C(\d+)([F|G])(\d+)\.PNG', f)
id = int(match.group(1)) - 1
max_id = max(max_id,id)
fg_class = match.group(2)
if id not in [p[0] for p in cat_list]:
cat_list.append([id, [], []])
if fg_class == 'G':
cat_list[-1][1].append(f)
else:
cat_list[-1][2].append(f)
for f in cat_list:
id = f[0]
trainG, testG = split_list(f[1])
trainF, testF = split_list(f[2])
tmptrain = tmptrain + [(id, 1, F) for F in trainF] + [(id, 0, G) for G in trainG]
tmptest = tmptest + [(id, 1, F) for F in testF] + [(id, 0, F) for F in testG]
X_train = np.array([cv2.imread(path+f[2],0) for f in tmptrain]).astype(np.float32)
y_train = np.array([f[0] * 2 + f[1] for f in tmptrain]).astype(np.int32)
X_test = np.array([cv2.imread(path+f[2],0) for f in tmptest]).astype(np.float32)
y_test = np.array([f[0] * 2 + f[1] for f in tmptest]).astype(np.int32)
fg_train = np.array([f[1] for f in tmptrain]).astype(np.float32)
fg_test = np.array([f[1] for f in tmptest]).astype(np.float32)
X_train = np.expand_dims(X_train,axis=1).astype(np.float32)
X_test = np.expand_dims(X_test, axis=1).astype(np.float32)
return X_train, y_train, X_test, y_test, fg_train , fg_test
def ExplicitNegativeCorrelation(net,layer='fc2',lr=0.00001):
for param in lasagne.layers.get_all_params(net[layer]):
if param.name.startswith('W'):
W = param
mean = T.mean(W,0) * lr
W = W - mean
def ImplicitNegativeCorrelation(MSE,Cross,Hinge):
mean = T.mean((MSE+Cross+Hinge),axis=0)
return ((MSE-mean)**2+(Cross-mean)**2+(Hinge-mean)**2)/3
def build_cnn(inputvar,input_shape, trained_weights=None):
net = {}
net['input'] = InputLayer(input_shape,input_var=inputvar)
net['conv1'] = Conv2DLayer(net['input'], num_filters=96, filter_size=11, stride=4)
net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=3, stride=2)
net['conv2'] = Conv2DLayer(net['pool1'], num_filters=256, filter_size=5, pad=2)
net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=3, stride=2)
net['conv3'] = Conv2DLayer(net['pool2'], num_filters=384, filter_size=3, pad=1)
net['conv4'] = Conv2DLayer(net['conv3'], num_filters=384, filter_size=3, pad=1)
net['conv5'] = Conv2DLayer(net['conv4'], num_filters=256, filter_size=3, pad=1)
net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=3, stride=2)
net['fc1'] = DenseLayer(net['pool5'], num_units=2048)
net['drop_fc1'] = DropoutLayer(net['fc1'])
net['fc2'] = DenseLayer(net['drop_fc1'], num_units=2048)
net['fc_class'] = DenseLayer(net['fc2'],num_units=115)
net['fo_class'] = DenseLayer(net['fc_class'],num_units=4,
nonlinearity=lasagne.nonlinearities.softmax)
return net
def iterate_minibatches(inputs, targets_class,targets_verif, batchsize, shuffle=False):
assert len(inputs) == len(targets_class)
assert len(inputs) == len(targets_verif)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets_class[excerpt], targets_verif[excerpt]
def main(num_epochs=500):
print("Loading data...")
X_train, y_train, X_test, y_test, fg_train, fg_test = load_dataset('./signatures/tmp4/')
X_train /= 255
X_val, y_val, fg_val = X_train, y_train, fg_train
print(y_train.shape)
check = X_train[0][0]
print(check)
input_var = T.tensor4('inputs')
target_var_class = T.ivector('targets')
network = build_cnn(input_var, (None, 1, 250, 250))
class_prediction = lasagne.layers.get_output(network['fo_class'])
loss_class = lasagne.objectives.categorical_crossentropy(class_prediction, target_var_class)
loss = loss_class.mean()
params = lasagne.layers.get_all_params([network['fo_class']], trainable=True)
lr = 0.0007
updates = lasagne.updates.nesterov_momentum(
loss, params, learning_rate=lr, momentum=0.9)
test_prediction_class = lasagne.layers.get_output(network['fo_class'], deterministic=True)
test_loss_class = lasagne.objectives.categorical_crossentropy(test_prediction_class,
target_var_class)
test_loss_class = test_loss_class.mean()
test_acc_class = T.mean(T.eq(T.argmax(test_prediction_class, axis=1), target_var_class),
dtype=theano.config.floatX)
predict_class = theano.function([input_var], T.argmax(test_prediction_class,axis=1))
train_fn = theano.function([input_var, target_var_class], loss, updates=updates)
val_fn_class = theano.function([input_var, target_var_class], [test_loss_class, test_acc_class])
print("Starting training...")
BatchSize = 16
for epoch in range(num_epochs):
train_err = 0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(X_train, y_train,fg_train, BatchSize, shuffle=True):
inputs, targets_class, targets_verif = batch
train_err += train_fn(inputs, targets_class)
print(targets_class,predict_class(inputs))
train_batches += 1
val_err_class = 0
val_acc_class = 0
val_batches = 0
for batch in iterate_minibatches(X_val, y_val, fg_val, BatchSize, shuffle=False):
inputs, targets_class, targets_verif = batch
err_class, acc_class = val_fn_class(inputs, targets_class)
val_err_class += err_class
val_acc_class += acc_class
val_batches += 1
print("Epoch {} of {} took {:.3f}s".format(
epoch + 1, num_epochs, time.time() - start_time))
print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
print(" Classification loss:\t\t{:.6f}".format(val_err_class / val_batches))
print(" Classification accuracy:\t\t{:.2f} %".format(
val_acc_class / val_batches * 100))
test_err_class = 0
test_acc_class = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, fg_test, BatchSize, shuffle=False):
inputs, targets_class, targets_verif = batch
err_class, acc_class = val_fn_class(inputs, targets_class)
test_err_class += err_class
test_acc_class += acc_class
test_batches += 1
print("Final results:")
print(" test loss (Classification):\t\t\t{:.6f}".format(test_err_class / test_batches))
print(" test accuracy (Classification):\t\t{:.2f} %".format(
test_acc_class / test_batches * 100))
if __name__ == '__main__':
main()