我正在尝试构建一个5层神经网络来对3个类,178个实例和13个要素数据集进行分类。基本上我遵循了here给出的指导原则。我在Matlab中写下了我自己的代码,它可以成功运行。但是,训练结果非常糟糕。模型保持与输出预测相同的类。我找不到我的代码哪里出错,或者模型不适合数据?有人能帮我找到问题所在吗?非常感谢你。
我的Matlab培训代码如下所示:
%% Initialization
numclass = 3; % num of class
c = 13; % num of feature
% for each layer, initialize each parameter w
and each b to a small random value near zero
w1 = normrnd(0,0.01,[c,10]); % Input layer -> layer 2 (10 nodes)
b1 = normrnd(0,0.01,[1,10]);
w2 = normrnd(0,0.01,[10,6]); % layer 2 -> layer 3 (6 nodes)
b2 = normrnd(0,0.01,[1,6]);
w3 = normrnd(0,0.01,[6,4]); % layer 3 -> layer 4 (4 nodes)
b3 = normrnd(0,0.01,[1,4]);
w4 = normrnd(0,0.01,[4,numclass]); % layer 4 -> Output layer (3 nodes/class label)
b4 = normrnd(0,0.01,[1,numclass]);
Iter = 0;
lambda = 0.5; % regularization coefficient
%% Batch Training
while Iter < 200
Iter = Iter+1
d_w1 = 0; d_w2 = 0; d_w3 = 0; d_b1 = 0; d_b2 = 0; d_b3 = 0;
d_w4 = 0; d_b4 = 0;
for i = 1:r
% Forward propagation
a1 = X0(i,:); % X0 is training data, each row represents a instance with 13 features
% Input layer -> Layer 2
z2 = a1*w1+b1;
a2 = sigmoid(z2);
% Layer 2 -> Layer 3
z3 = a2*w2+b2;
a3 = sigmoid(z3);
% Layer 3 -> Layer 4
z4 = a3*w3+b3;
a4 = sigmoid(z4);
% Layer 4 -> Output Layer
z5 = a4*w4+b4;
a5 = sigmoid(z5);
% Backward propagation
y = zeros(1,numclass);
y(Y0(i)) = 1; % Y0 is the training label ({1,2,3} in this case), each element indicates which class the instance belongs to
% Output Layer -> Layer 4
delta5 = (-(y-a5).*d_sigmoid(z5))';
% Output Layer -> Layer 3
delta4 = (w4*delta5).*d_sigmoid(z4');
% Layer 3 -> Layer 2
delta3 = (w3*delta4).*d_sigmoid(z3');
% Layer 2 -> Layer I
delta2 = (w2*delta3).*d_sigmoid(z2');
% Compute the desired partial derivatives
d_w1 = d_w1 + (delta2*a1)';
d_b1 = d_b1 + delta2';
d_w2 = d_w2 + (delta3*a2)';
d_b2 = d_b2 + delta3';
d_w3 = d_w3 + (delta4*a3)';
d_b3 = d_b3 + delta4';
d_w4 = d_w4 + (delta5*a4)';
d_b4 = d_b4 + delta5';
end
eta = 0.8; % leraning rate
% weights and bias updating
w1 = w1 - eta*((1/r*d_w1)+ lambda*w1);
b1 = b1 - eta*1/r*d_b1;
w2 = w2 - eta*((1/r*d_w2)+ lambda*w2);
b2 = b2 - eta*1/r*d_b2;
w3 = w3 - eta*((1/r*d_w3)+ lambda*w3);
b3 = b3 - eta*1/r*d_b3;
w4 = w4 - eta*((1/r*d_w4)+ lambda*w4);
b4 = b4 - eta*1/r*d_b4;
end
sigmoid和d_sigmoid函数如下所示:
function y = sigmoid(x);
L=1;
k=10;
x0=0;
y = L./(1+exp(-k*(x-x0)));
end
function y = d_sigmoid(x)
tmp = sigmoid(x);
y = tmp.*(1-tmp);
end
预测代码如下所示:
%% Prediction: X1 is testing data, and Y1 is a vector of testing label
[r,c] = size(X1);
for i = 1:r
A1 = X1(i,:);
% Input layer -> Layer 2
Z2 = A1*w1+b1;
A2 = sigmoid(Z2);
% Layer 2 -> Layer 3
Z3 = A2*w2+b2;
A3 = sigmoid(Z3);
% Layer 3 -> Layer 4
Z4 = A3*w3+b3;
A4 = sigmoid(Z4);
% Layer 4 -> Output Layer
Z5 = A4*w4+b4;
A5 = sigmoid(Z5);
pred(i) = find(A5==max(A5))
end
error = length(find((pred'-Y1)~=0))