我在实现批量和随机梯度下降方面迈出了第一步。
这是我的实施:
package ch.learning;
import java.util.*;
import org.jzy3d.analysis.AbstractAnalysis;
import org.jzy3d.analysis.AnalysisLauncher;
import org.jzy3d.chart.factories.AWTChartComponentFactory;
import org.jzy3d.colors.Color;
import org.jzy3d.colors.ColorMapper;
import org.jzy3d.colors.colormaps.ColorMapRainbow;
import org.jzy3d.maths.Coord3d;
import org.jzy3d.maths.Range;
import org.jzy3d.plot3d.builder.*;
import org.jzy3d.plot3d.builder.concrete.*;
import org.jzy3d.plot3d.primitives.Scatter;
import org.jzy3d.plot3d.primitives.Shape;
import org.jzy3d.plot3d.rendering.canvas.Quality;
import org.apache.commons.math3.analysis.function.Sigmoid;
public class LogisticReg_GradientDescent {
private List<double[]> trainingExamples = new LinkedList<double[]>();
private static final int sizeTrainingset = 1000;
private volatile double[] theta = {10, 10, 10, 10 };
// Configurable compoenent of step size during theata update
private final double alpha = 0.01;
// Amount of iteration in Batch Gradient Descent
private static final int iterations = 10000;
private static final int printsAtStartAndEnd = 5;
private void buildTrainingExample(int amount) {
// Area of the house
double areaMin = 80;
double areaMax = 1000;
double areaRange = areaMax - areaMin;
// Distance to center
double distanceMin = 10;
double distanceMax = 10000;
double distanceRange = distanceMax - distanceMin;
// Generate training examples with prices
for (int i = 0; i < amount; i++) {
double[] example = new double[5];
example[0] = 1.0;
example[1] = areaMin + Math.random() * areaRange;
example[2] = distanceMin + Math.random() * distanceRange;
// Price is a feature as well in this logistic regression example
double price = 0;
price += _priceComponent(example[1], areaRange);
price += _priceComponent(example[2], distanceRange);
// price += _priceComponent(example[3], yocRange);
example[3] = price;
example[4] = (price>200000)?0:1;
trainingExamples.add(example);
}
}
// Random price according with some range constraints
private double _priceComponent(double value, double range) {
if (value <= range / 3)
return 50000 + 50000 * Math.random() * 0.1;
if (value <= (range / 3 * 2))
return 100000 + 100000 * Math.random() * 0.1;
return 150000 + 150000 * Math.random() * 0.1;
}
private double classificationByHypothesis(double[] features) {
// Scaling
double scalingF0 = features[0];
double scalingF1 = (features[1] - 80) / (920);
double scalingF2 = (features[2] - 10) / (9990);
double scalingF3 = (features[3] - 50000) / (400000);
double z = this.theta[0] * scalingF0 + this.theta[1] * scalingF1 + this.theta[2] * scalingF2
+ this.theta[3] * scalingF3;
double ret = 1 / (1 + Math.pow(Math.E, -z));
return ret;
}
// Costfunction: Mean squared error function
private double gradientBatch_costs() {
double costs = this.trainingExamples.stream().mapToDouble(l -> {
double costsint;
if (l[4] == 0) {
costsint = -Math.log(1 - classificationByHypothesis(l));
} else {
costsint = -Math.log(classificationByHypothesis(l));
}
return costsint;
}).sum();
return costs / this.trainingExamples.size();
}
// Theta Update with Batch Gradient Descent
private void gradientBatch_thetaUpdate(int amount) {
for (int i = 0; i < amount; i++) {
double partialDerivative0 = this.trainingExamples.stream()
.mapToDouble(l -> (classificationByHypothesis(l) - l[4]) * l[0]).sum();
double tmpTheta0 = this.theta[0] - (this.alpha * partialDerivative0 / this.trainingExamples.size());
double partialDerivative1 = this.trainingExamples.stream()
.mapToDouble(l -> (classificationByHypothesis(l) - l[4]) * l[1]).sum();
double tmpTheta1 = this.theta[1] - (this.alpha * partialDerivative1 / this.trainingExamples.size());
double partialDerivative2 = this.trainingExamples.stream()
.mapToDouble(l -> (classificationByHypothesis(l) - l[4]) * l[2]).sum();
double tmpTheta2 = this.theta[2] - (this.alpha * partialDerivative2 / this.trainingExamples.size());
double partialDerivative3 = this.trainingExamples.stream()
.mapToDouble(l -> (classificationByHypothesis(l) - l[4]) * l[3]).sum();
double tmpTheta3 = this.theta[3] - (this.alpha * partialDerivative3 / this.trainingExamples.size());
this.theta = new double[] { tmpTheta0, tmpTheta1, tmpTheta2, tmpTheta3 };
}
}
// Theta update with Stochastic Gradient Descent
private void gradientStochastic_thetaUpdate(double[] feature) {
double tmpTheta0 = this.theta[0] - this.alpha * (classificationByHypothesis(feature) - feature[4]) * feature[0];
double tmpTheta1 = this.theta[1] - this.alpha * (classificationByHypothesis(feature) - feature[4]) * feature[1];
double tmpTheta2 = this.theta[2] - this.alpha * (classificationByHypothesis(feature) - feature[4]) * feature[2];
double tmpTheta3 = this.theta[3] - this.alpha * (classificationByHypothesis(feature) - feature[4]) * feature[3];
this.theta = new double[] { tmpTheta0, tmpTheta1, tmpTheta2, tmpTheta3 };
}
private void resetTheta() {
this.theta = new double[] {0.00001, 0.00001, 0.00001, 0.00001};
}
private void printSummary(int iteration) {
System.out.println(String.format("%s \t\t Theta: %f \t %f \t %f \t %f \t Costs: %f", iteration, this.theta[0],
this.theta[1], this.theta[2], this.theta[3], this.gradientBatch_costs()));
}
public static void main(String[] args) {
LogisticReg_GradientDescent d = new LogisticReg_GradientDescent();
// Batch and Stochastic Gradient Descent use the same training example
d.buildTrainingExample(sizeTrainingset);
System.out.println("Batch Gradient Descent");
d.printSummary(0);
System.out.println(String.format("First %s iterations", printsAtStartAndEnd));
for (int i = 1; i <= iterations; i++) {
d.gradientBatch_thetaUpdate(1);
d.printSummary(i);
}
System.out.println("Some examples are:");
System.out.println(String.format("The 1:%s, Area:%s, Distance:%s, Price:%s, Classification:%s", d.trainingExamples.get(0)[0],d.trainingExamples.get(0)[1],d.trainingExamples.get(0)[2],d.trainingExamples.get(0)[3],d.trainingExamples.get(0)[4]));
System.out.println(String.format("The 1:%s, Area:%s, Distance:%s, Price:%s, Classification:%s", d.trainingExamples.get(500)[0],d.trainingExamples.get(500)[1],d.trainingExamples.get(500)[2],d.trainingExamples.get(500)[3],d.trainingExamples.get(500)[4]));
try {
AnalysisLauncher.open(d.new SurfaceDemo());
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
class SurfaceDemo extends AbstractAnalysis{
@Override
public void init(){
double x;
double y;
double z;
float a;
Coord3d[] points = new Coord3d[trainingExamples.size()];
Color[] colors = new Color[trainingExamples.size()];
for(int i=0; i<trainingExamples.size(); i++){
x = trainingExamples.get(i)[1]; // Area
y = trainingExamples.get(i)[2]; // Distance to center
z = trainingExamples.get(i)[3]; // price
points[i] = new Coord3d(x, y, z);
a = 1f;
if(trainingExamples.get(i)[4]==1){
colors[i] =new Color(0,0,0,a);
}else{
colors[i]= new Color(250,0,0,a);
}
}
Scatter scatter = new Scatter(points, colors);
scatter.setWidth(4);
Mapper mapper = new Mapper() {
@Override
public double f(double x, double y) {
return (-theta[0]-theta[1]*x-theta[2]*y)/theta[3];
}
};
// Create the object to represent the function over the given range.
Range rangeX = new Range(0, 1000);
Range rangeY = new Range(0, 10000);
int steps = 10;
final Shape surface = Builder.buildOrthonormal(new OrthonormalGrid(rangeX, steps, rangeY, steps), mapper);
surface.setColorMapper(new ColorMapper(new ColorMapRainbow(), surface.getBounds().getZmin(), surface.getBounds().getZmax(), new Color(1, 1, 1, .5f)));
surface.setFaceDisplayed(true);
surface.setWireframeDisplayed(false);
chart = AWTChartComponentFactory.chart(Quality.Advanced, getCanvasType());
chart.getScene().add(scatter);
chart.getScene().add(surface);
}
}
}
图形表示看起来像
所以我用org.jzy3d.plot3d绘制生成的训练实例。 我们看到x(房子的面积),y(到市中心的距离)和z(价格)。 分类使红色(负类 - &gt;未售出)和黑色(正类 - &gt;销售)。
在生成的培训实例中,分类仅取决于价格,您可以在此处看到:
example[4] = (price>200000)?0:1;
问题,我不明白的是
我想绘制分类器的决策边界。 决定边界取决于Theta的优化组件。 (使用批量梯度下降)。 所以我尝试用这段代码绘制决策边界平面:
Mapper mapper = new Mapper() {
@Override
public double f(double x, double y) {
return (-theta[0]-theta[1]*x-theta[2]*y)/theta[3];
}
};
由于
theta [0] * 1 + theta [1] * x + theta [2] * y + theta [3] * z = 0
所以
z = - (theta [0] * 1 + theta [1] * x + theta [2] * y)/ theta [3]
我希望我的决定在红色和黑色区域之间的边界平面。 相反,它会被z = 0挂起。
我不知道,要么我不能以正确的方式绘制这个决策边界平面,要么我的优化参数是糟糕的。 此外,我不知道如何选择一个好的初始θ矢量。 现在我用
private volatile double[] theta = {1, 1, 1, 1 };
我将alpha设置为0.0001
private final double alpha = 0.0001;
这是最大可能的Alpha,我的成本函数不会跳转,而且sigmoid实现不会返回无穷大。 我已经在
进行了功能扩展private double classificationByHypothesis(double[] features) {
// Scaling
double scalingF0 = features[0];
double scalingF1 = (features[1] - 80) / (920);
double scalingF2 = (features[2] - 10) / (9990);
double scalingF3 = (features[3] - 50000) / (400000);
double z = this.theta[0] * scalingF0 + this.theta[1] * scalingF1 + this.theta[2] * scalingF2
+ this.theta[3] * scalingF3;
double ret = 1 / (1 + Math.pow(Math.E, -z));
return ret;
}
给定初始theta和alpha等于0.0001的最后五次迭代是
9996,Theta:1.057554,-6.340981,-6.242139,8.145087,费用:0.359108
9997,Theta:1.057560,-6.341234,-6.242345,8.145576,费用:0.359109
9998,Theta:1.057565,-6.341487,-6.242552,8.146065,费用:0.359110
9999,Theta:1.057571,-6.341740,-6.242758,8.146553,费用:0.359112
10000,Theta:1.057576,-6.341993,-6.242965,8.147042,费用:0.359113
生成的培训实例的一些示例是
面积:431.50139030510206,距离:8591.341686012887, 价钱:255049.1280388437,分类:0.0
面积:727.4042972310916,距离:4364.710136408952, 价钱:258385.59452489938,分类:0.0
感谢任何提示!