我正在研究需要识别面部特征的面部特征的项目。我将此表示为回归问题,并希望从简单的conv网络开始,并在下面定义网络。
我注意到预测的输出总是相同的,之后会有更多的调试,我看到得分层的权重和梯度在迭代时不会改变。我使用~5e-2的固定学习率来生成以下示例。随着迭代的进展,训练损失似乎减少但我无法理解为什么。我还记录了其他图层:'conv1'
,'conv2'
,'fc1'
,并查看了在迭代中保持不变的相同行为。由于损失似乎在减少,所以必须改变一些东西,我的猜测是我在下面记录的方式可能不正确。
修改了lenet:
# Modified LeNet. Added relu1, relu2 and, dropout.
# Loss function is an Euclidean distance
def lenet(hdf5_list, batch_size=64, dropout_ratio=0.5, train=True):
# our version of LeNet: a series of linear and simple nonlinear transformations
n = caffe.NetSpec()
n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5_list, ntop=2)
n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=20, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0.1))
n.relu1 = L.ReLU(n.conv1, in_place=False, relu_param=dict(negative_slope=0.1))
n.pool1 = L.Pooling(n.relu1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0.1))
n.relu2 = L.ReLU(n.conv2, in_place=False, relu_param=dict(negative_slope=0.1))
n.pool2 = L.Pooling(n.relu2, kernel_size=2, stride=2, pool=P.Pooling.MAX)
if train:
n.drop3 = fc1_input = L.Dropout(n.pool2, in_place=True, dropout_param = dict(dropout_ratio=dropout_ratio) )
else:
fc1_input = n.pool2
n.fc1 = L.InnerProduct(fc1_input, num_output=500, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0.1))
n.relu3 = L.ReLU(n.fc1, in_place=True, relu_param=dict(negative_slope=0.1))
n.score = L.InnerProduct(n.relu3, num_output=30, weight_filler=dict(type='xavier'))
n.loss = L.EuclideanLoss(n.score, n.label)
return n.to_proto()
求解器循环:
#custom solver loop
for it in range(niter):
solver.step(1)
train_loss[it] = solver.net.blobs['loss'].data
score_weights.append(solver.net.params['score'][0].data)
score_biases.append(solver.net.params['score'][1].data)
score_weights_diff.append(solver.net.params['score'][0].diff)
score_biases_diff.append(solver.net.params['score'][1].diff)
if (it % val_interval) == 0 or (it == niter - 1):
val_error_this = 0
for test_it in range(niter_val_error):
solver.test_nets[0].forward()
val_error_this += euclidean_loss(solver.test_nets[0].blobs['score'].data ,
solver.test_nets[0].blobs['label'].data) / niter_val_error
val_error[it // val_interval] = val_error_this
打印分数:
print score_weights_diff[0].shape
for i in range(10):
score_weights_i = score_weights_diff[i]
print score_weights_i[0:30:10,0]
print score_biases_diff[0].shape
for i in range(5):
score_biases_i = score_biases_diff[i]
print score_biases_i[0:30:6]
输出:
(30, 500)
[ -3.71852257e-05 7.34565838e-05 2.61445384e-04]
[ -3.71852257e-05 7.34565838e-05 2.61445384e-04]
[ -3.71852257e-05 7.34565838e-05 2.61445384e-04]
[ -3.71852257e-05 7.34565838e-05 2.61445384e-04]
[ -3.71852257e-05 7.34565838e-05 2.61445384e-04]
[ -3.71852257e-05 7.34565838e-05 2.61445384e-04]
[ -3.71852257e-05 7.34565838e-05 2.61445384e-04]
[ -3.71852257e-05 7.34565838e-05 2.61445384e-04]
[ -3.71852257e-05 7.34565838e-05 2.61445384e-04]
[ -3.71852257e-05 7.34565838e-05 2.61445384e-04]
131
(30,)
[ 3.22921231e-04 5.66378840e-05 -5.15143370e-07 -1.51118627e-04
2.30352176e-04]
[ 3.22921231e-04 5.66378840e-05 -5.15143370e-07 -1.51118627e-04
2.30352176e-04]
[ 3.22921231e-04 5.66378840e-05 -5.15143370e-07 -1.51118627e-04
2.30352176e-04]
[ 3.22921231e-04 5.66378840e-05 -5.15143370e-07 -1.51118627e-04
2.30352176e-04]
[ 3.22921231e-04 5.66378840e-05 -5.15143370e-07 -1.51118627e-04
2.30352176e-04]
答案 0 :(得分:1)
从您的代码中看起来有点困难,但score_weights_diff
,score_biases_diff
和其他列表可能会将引用存储到solver.net.params['score'][0].diff
和因此,列表中的所有条目实际上都是相同的,并且在每次迭代时一起变化。
尝试并保存副本:
score_weights_diff.append(solver.net.params['score'][0].diff[...].copy())