我已经获得了以下学习曲线,用于绘制训练和交叉验证集的学习曲线,误差成本和训练样例数量(图中的100s)。有人可以告诉我这种学习曲线是否可行?因为我的印象是,随着训练样本数量的增加,交叉验证错误会减少。
Learning Curve. Note that the x axis denotes the number of training examples in 100s.
编辑:
这是我用来计算绘制学习曲线的9个值的代码。
X是训练集示例的2D矩阵。它的尺寸为m x (n+1)
。 y的维度为m x 1
,每个元素的值为1或0.
for j=1:9
disp(j)
[theta,J] = trainClassifier(X(1:(j*100),:),y(1:(j*100)),lambda);
[error_train(j), grad] = costprediciton_train(theta , X(1:(j*100),:), y(1:(j*100)));
[error_cv(j), grad] = costfunction_test2(theta , Xcv(1:(j*100),:),ycv(1:(j*100)));
end
我用来从训练集中找到Theta的最佳值的代码。
% Train the classifer. Return theta
function [optTheta, J] = trainClassifier(X,y,lambda)
[m,n]=size(X);
initialTheta = zeros(n, 1);
options=optimset('GradObj','on','MaxIter',100);
[optTheta, J, Exit_flag ] = fminunc(@(t)(regularizedCostFunction(t, X, y, lambda)), initialTheta, options);
end
%regularized cost
function [J, grad] = regularizedCostFunction(theta, X, y,lambda)
[m,n]=size(X);
h=sigmoid( X * theta);
temp1 = -1 * (y .* log(h));
temp2 = (1 - y) .* log(1 - h);
thetaT = theta;
thetaT(1) = 0;
correction = sum(thetaT .^ 2) * (lambda / (2 * m));
J = sum(temp1 - temp2) / m + correction;
grad = (X' * (h - y)) * (1/m) + thetaT * (lambda / m);
end
我用于计算训练集结果预测的误差成本的代码:(类似于CV集的误差成本代码)
Theta的维度为(n+1) x 1
,由假设函数中的特征系数组成。
function [J,grad] = costprediciton_train(theta , X, y)
[m,n]=size(X);
h=sigmoid(X * theta);
temp1 = y .* log(h);
temp2 = (1-y) .* log(1- h);
J = -sum (temp1 + temp2)/m;
t=h-y;
grad=(X'*t)*(1/m);
end
function [J,grad] = costfunction_test2(theta , X, y)
m= length(y);
h=sigmoid(X*theta);
temp1 = y .* log(h);
temp2 = (1-y) .* log(1- h);
J = -sum (temp1 + temp2)/m ;
grad = (X' * (h - y)) * (1/m) ;
end
Sigmoid功能:
function g = sigmoid(z)
g= zeros(size(z));
den=1 + exp(-1*z);
g = 1 ./ den;
end