线性回归的梯度下降不能完美地降低

时间:2015-12-22 10:27:40

标签: c++ machine-learning gradient-descent

我正在尝试在均匀分布的训练集上实现基本梯度下降算法。由于数据是均匀的,所以分区线应该是对角线,但我得到一条线,如下图所示。在图中,圆圈是我的数据点,线代表成本函数(h(x))。 Figure 1

  

我正在使用OpenCV来输出其他内容。我使用下面的等式: -

enter image description here

#include <iostream>
#include <unistd.h>

#include <cv.h>
#include <highgui.h>

#define WIN_WIDTH 500
#define WIN_HEIGHT 500
#define MAX_POINTS 500

using namespace std;
using namespace cv;

void getPoints(vector<Point> &randPoints, int size)
{
  for (int i = 20; i < WIN_HEIGHT; i+=20)
  {
    for (int j = 20; j < WIN_WIDTH; j+=20)
    {
      int x = i;
      int y = j;

      Point pt = Point(x, y);

      randPoints.push_back(pt);
    }
  }
}

void gradientDescent( double &th1,  double &th2,  double &alpha, vector<Point> &pointVec)
{
    int size = pointVec.size();

    double sum1 = 0.0, sum2 = 0.0;

    for (int i = 0; i < size; i++)
    {
        sum1 += (th1 + th2 * pointVec[i].x) - pointVec[i].y;
        sum2 += ((th1 + th2 * pointVec[i].x) - pointVec[i].y) * pointVec[i].x;
    }

    th1 = th1 - ((alpha/( double)size) * sum1);
    th2 = th2 - ((alpha/( double)size) * sum2);
}

int main(int argc, char**argv)
{
    Mat img(WIN_WIDTH, WIN_HEIGHT, CV_8UC3);
    img = Scalar(255, 255, 255);

    vector<Point> randPoints;
    getPoints(randPoints, MAX_POINTS);

    int size = randPoints.size();

    cout << "Training size = " << randPoints.size() << endl;

    for (int i = 0; i < size; i++)
        circle(img, randPoints[i], 4, Scalar(255, 0, 0), 1, 8);

   double theta1 = 0, theta2 = 0.25, alpha = 0.0000001; 

    if (argc > 2)
    {
        theta1 = atof(argv[1]);
        theta2 = atof(argv[2]);
    }

    int countConv = 0, prevY = 0;

    cout << "Theta0 = " << theta1 << " Theta1 = " << theta2 << endl;
    cout << "Learning rate = " << alpha << endl;

    Mat tmpImg(WIN_WIDTH, WIN_HEIGHT, CV_8UC3);

    while(1)
    {
        gradientDescent(theta1, theta2, alpha, randPoints);  

        int x = WIN_WIDTH+WIN_HEIGHT;
        int y = theta1 + (theta2 * x);

        int x1 = WIN_WIDTH-200;
        int y1 = theta1 + theta2*x1;

        img.copyTo(tmpImg);

        circle(tmpImg, Point(x1, y1), 4, Scalar(0, 0, 255), -1, 8);

        char text[64];
        sprintf(text, "(%d, %d)", x1, y1);

        putText(tmpImg, text, Point(x1+3, y1+3), FONT_HERSHEY_SCRIPT_SIMPLEX, 0.4, Scalar(0, 255, 0), 1, 8);

        line(tmpImg, Point(0, theta1), Point(x, y), Scalar(0, 0, 255));

        imshow("Gradient Descent", tmpImg);
        waitKey(33);
    }

    imshow("Gradient Descent", tmpImg);
    waitKey(0);

    return 0;
}

0 个答案:

没有答案