Question

我在实现批量和随机梯度下降方面迈出了第一步。

这是我的实施：

package ch.learning;

import java.util.*;

import org.jzy3d.analysis.AbstractAnalysis;
import org.jzy3d.analysis.AnalysisLauncher;
import org.jzy3d.chart.factories.AWTChartComponentFactory;
import org.jzy3d.colors.Color;
import org.jzy3d.colors.ColorMapper;
import org.jzy3d.colors.colormaps.ColorMapRainbow;
import org.jzy3d.maths.Coord3d;
import org.jzy3d.maths.Range;
import org.jzy3d.plot3d.builder.*;
import org.jzy3d.plot3d.builder.concrete.*;
import org.jzy3d.plot3d.primitives.Scatter;
import org.jzy3d.plot3d.primitives.Shape;
import org.jzy3d.plot3d.rendering.canvas.Quality;


import org.apache.commons.math3.analysis.function.Sigmoid;


public class LogisticReg_GradientDescent {

    private List<double[]> trainingExamples = new LinkedList<double[]>();
    private static final int sizeTrainingset = 1000;
    private volatile  double[] theta = {10, 10, 10, 10 };
    // Configurable compoenent of step size during theata update
    private final double alpha = 0.01;
    // Amount of iteration in Batch Gradient Descent
    private static final int iterations = 10000;
    private static final int printsAtStartAndEnd = 5;

    private void buildTrainingExample(int amount) {

        // Area of the house
        double areaMin = 80;
        double areaMax = 1000;
        double areaRange = areaMax - areaMin;

        // Distance to center
        double distanceMin = 10;
        double distanceMax = 10000;
        double distanceRange = distanceMax - distanceMin;

        // Generate training examples with prices
        for (int i = 0; i < amount; i++) {
            double[] example = new double[5];
            example[0] = 1.0;
            example[1] = areaMin + Math.random() * areaRange;
            example[2] = distanceMin + Math.random() * distanceRange;
            // Price is a feature as well in this logistic regression example
            double price = 0;
            price += _priceComponent(example[1], areaRange);
            price += _priceComponent(example[2], distanceRange);
            // price += _priceComponent(example[3], yocRange);
            example[3] = price;
            example[4] = (price>200000)?0:1;
            trainingExamples.add(example);
        }
    }

    // Random price according with some range constraints
    private double _priceComponent(double value, double range) {
        if (value <= range / 3)
            return 50000 + 50000 * Math.random() * 0.1;
        if (value <= (range / 3 * 2))
            return 100000 + 100000 * Math.random() * 0.1;
        return 150000 + 150000 * Math.random() * 0.1;
    }

    private double classificationByHypothesis(double[] features) {
        // Scaling
        double scalingF0 = features[0];
        double scalingF1 = (features[1] - 80) / (920);
        double scalingF2 = (features[2] - 10) / (9990);
        double scalingF3 = (features[3] - 50000) / (400000);

        double z = this.theta[0] * scalingF0 + this.theta[1] * scalingF1 + this.theta[2] * scalingF2
                + this.theta[3] * scalingF3;

        double ret = 1 / (1 + Math.pow(Math.E, -z));
        return ret;
    }

    // Costfunction: Mean squared error function
    private double gradientBatch_costs() {

        double costs = this.trainingExamples.stream().mapToDouble(l -> {
            double costsint;
            if (l[4] == 0) {

                costsint = -Math.log(1 - classificationByHypothesis(l));
            } else {

                costsint = -Math.log(classificationByHypothesis(l));
            }
            return costsint;
        }).sum();

        return costs / this.trainingExamples.size();
    }

    // Theta Update with Batch Gradient Descent
    private void gradientBatch_thetaUpdate(int amount) {
        for (int i = 0; i < amount; i++) {

            double partialDerivative0 = this.trainingExamples.stream()
                    .mapToDouble(l -> (classificationByHypothesis(l) - l[4]) * l[0]).sum();
            double tmpTheta0 = this.theta[0] - (this.alpha * partialDerivative0 / this.trainingExamples.size());

            double partialDerivative1 = this.trainingExamples.stream()
                    .mapToDouble(l -> (classificationByHypothesis(l) - l[4]) * l[1]).sum();
            double tmpTheta1 = this.theta[1] - (this.alpha * partialDerivative1 / this.trainingExamples.size());

            double partialDerivative2 = this.trainingExamples.stream()
                    .mapToDouble(l -> (classificationByHypothesis(l) - l[4]) * l[2]).sum();
            double tmpTheta2 = this.theta[2] - (this.alpha * partialDerivative2 / this.trainingExamples.size());

            double partialDerivative3 = this.trainingExamples.stream()
                    .mapToDouble(l -> (classificationByHypothesis(l) - l[4]) * l[3]).sum();
            double tmpTheta3 = this.theta[3] - (this.alpha * partialDerivative3 / this.trainingExamples.size());

            this.theta = new double[] { tmpTheta0, tmpTheta1, tmpTheta2, tmpTheta3 };
        }
    }

    // Theta update with Stochastic Gradient Descent
    private void gradientStochastic_thetaUpdate(double[] feature) {
        double tmpTheta0 = this.theta[0] - this.alpha * (classificationByHypothesis(feature) - feature[4]) * feature[0];
        double tmpTheta1 = this.theta[1] - this.alpha * (classificationByHypothesis(feature) - feature[4]) * feature[1];
        double tmpTheta2 = this.theta[2] - this.alpha * (classificationByHypothesis(feature) - feature[4]) * feature[2];
        double tmpTheta3 = this.theta[3] - this.alpha * (classificationByHypothesis(feature) - feature[4]) * feature[3];
        this.theta = new double[] { tmpTheta0, tmpTheta1, tmpTheta2, tmpTheta3 };
    }

    private void resetTheta() {
        this.theta = new double[] {0.00001, 0.00001, 0.00001, 0.00001};
    }

    private void printSummary(int iteration) {
        System.out.println(String.format("%s \t\t Theta: %f \t %f \t %f \t %f \t Costs: %f", iteration, this.theta[0],
                this.theta[1], this.theta[2], this.theta[3], this.gradientBatch_costs()));
    }

    public static void main(String[] args) {
        LogisticReg_GradientDescent d = new LogisticReg_GradientDescent();

        // Batch and Stochastic Gradient Descent use the same training example
        d.buildTrainingExample(sizeTrainingset);




        System.out.println("Batch Gradient Descent");
        d.printSummary(0);

        System.out.println(String.format("First %s iterations", printsAtStartAndEnd));
        for (int i = 1; i <= iterations; i++) {
            d.gradientBatch_thetaUpdate(1);
            d.printSummary(i);
        }

        System.out.println("Some examples are:");
        System.out.println(String.format("The 1:%s, Area:%s, Distance:%s, Price:%s, Classification:%s", d.trainingExamples.get(0)[0],d.trainingExamples.get(0)[1],d.trainingExamples.get(0)[2],d.trainingExamples.get(0)[3],d.trainingExamples.get(0)[4]));
        System.out.println(String.format("The 1:%s, Area:%s, Distance:%s, Price:%s, Classification:%s", d.trainingExamples.get(500)[0],d.trainingExamples.get(500)[1],d.trainingExamples.get(500)[2],d.trainingExamples.get(500)[3],d.trainingExamples.get(500)[4]));

         try {
                AnalysisLauncher.open(d.new SurfaceDemo());
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        class SurfaceDemo extends AbstractAnalysis{

            @Override
            public void init(){
                double x;
                double y;
                double z;
                float a;
                Coord3d[] points = new Coord3d[trainingExamples.size()];
                Color[]   colors = new Color[trainingExamples.size()];

                for(int i=0; i<trainingExamples.size(); i++){
                    x = trainingExamples.get(i)[1]; // Area
                    y = trainingExamples.get(i)[2]; // Distance to center
                    z = trainingExamples.get(i)[3]; // price
                    points[i] = new Coord3d(x, y, z);

                    a = 1f;
                    if(trainingExamples.get(i)[4]==1){
                        colors[i] =new Color(0,0,0,a);
                    }else{
                        colors[i]= new Color(250,0,0,a);
                    }
                }

                Scatter scatter = new Scatter(points, colors);
                scatter.setWidth(4);

                Mapper mapper = new Mapper() {
                    @Override
                    public double f(double x, double y) {

                       return (-theta[0]-theta[1]*x-theta[2]*y)/theta[3];
                    }
                };

                // Create the object to represent the function over the given range.
                Range rangeX = new Range(0, 1000);
                Range rangeY = new Range(0, 10000);
                int steps = 10;
                final Shape surface = Builder.buildOrthonormal(new OrthonormalGrid(rangeX, steps, rangeY, steps), mapper);
                surface.setColorMapper(new ColorMapper(new ColorMapRainbow(), surface.getBounds().getZmin(), surface.getBounds().getZmax(), new Color(1, 1, 1, .5f)));
                surface.setFaceDisplayed(true);
                surface.setWireframeDisplayed(false);


                chart = AWTChartComponentFactory.chart(Quality.Advanced, getCanvasType());
                chart.getScene().add(scatter);
                chart.getScene().add(surface);

            }
    }
}

图形表示看起来像

所以我用org.jzy3d.plot3d绘制生成的训练实例。我们看到x（房子的面积），y（到市中心的距离）和z（价格）。分类使红色（负类 - ＆gt;未售出）和黑色（正类 - ＆gt;销售）。

在生成的培训实例中，分类仅取决于价格，您可以在此处看到：

 example[4] = (price>200000)?0:1;

问题，我不明白的是

我想绘制分类器的决策边界。决定边界取决于Theta的优化组件。（使用批量梯度下降）。所以我尝试用这段代码绘制决策边界平面：

Mapper mapper = new Mapper() {
                    @Override
                    public double f(double x, double y) {

                       return (-theta[0]-theta[1]*x-theta[2]*y)/theta[3];
                    }
                };

由于

theta [0] * 1 + theta [1] * x + theta [2] * y + theta [3] * z = 0

所以

z = - （theta [0] * 1 + theta [1] * x + theta [2] * y）/ theta [3]

我希望我的决定在红色和黑色区域之间的边界平面。相反，它会被z = 0挂起。

我不知道，要么我不能以正确的方式绘制这个决策边界平面，要么我的优化参数是糟糕的。此外，我不知道如何选择一个好的初始θ矢量。现在我用

private volatile  double[] theta = {1, 1, 1, 1 };

我将alpha设置为0.0001

private final double alpha = 0.0001;

这是最大可能的Alpha，我的成本函数不会跳转，而且sigmoid实现不会返回无穷大。我已经在

进行了功能扩展

private double classificationByHypothesis(double[] features) {

    // Scaling
    double scalingF0 = features[0];
    double scalingF1 = (features[1] - 80) / (920);
    double scalingF2 = (features[2] - 10) / (9990);
    double scalingF3 = (features[3] - 50000) / (400000);

    double z = this.theta[0] * scalingF0 + this.theta[1] * scalingF1 + this.theta[2] * scalingF2
            + this.theta[3] * scalingF3;

    double ret = 1 / (1 + Math.pow(Math.E, -z));
    return ret;
}

给定初始theta和alpha等于0.0001的最后五次迭代是

9996，Theta：1.057554，-6.340981，-6.242139,8.145087，费用：0.359108

9997，Theta：1.057560，-6.341234，-6.242345,8.145576，费用：0.359109

9998，Theta：1.057565，-6.341487，-6.242552,8.146065，费用：0.359110

9999，Theta：1.057571，-6.341740，-6.242758,8.146553，费用：0.359112

10000，Theta：1.057576，-6.341993，-6.242965,8.147042，费用：0.359113

生成的培训实例的一些示例是

面积：431.50139030510206，距离：8591.341686012887，   价钱：255049.1280388437，分类：0.0

面积：727.4042972310916，距离：4364.710136408952，   价钱：258385.59452489938，分类：0.0

感谢任何提示！

批量梯度下降没有收敛

0 个答案: