Question

我试图使用Simplex算法实现优化器。在线原始代码在每次迭代中创建一个具有0个初始值的新向量。我试图在循环外创建一个公共，然后使用std::fill重置每次迭代中的值。我很惊讶第一个比第二个快。以我的观点来看，声明无论如何都要求内存并初始化值，不能更快。

有人可以帮忙解释一下吗？如果这是真的，第一种方法有什么不利之处吗？或者我们可以进一步改进吗？

这是代码。

新人：

    void Simplex(std::vector<double>& result, std::function<double(std::vector<double>)> func,
        std::vector<double> init, std::vector<std::vector<double>> x = std::vector<std::vector<double>>(),
        double EPS = 1E8 * std::numeric_limits<double>::epsilon(), int MAXIT = 1000000)
    {
        int N = init.size();                                //  Space Dimension
        //  Coefficients for the new points.
        const double a = 1.0;       //  a: Reflection
        const double b = 1.0;       //  b: Expansion
        const double g = 0.5;       //  g: Contraction
        const double h = 0.5;       //  h: Multi-Contraction
        std::vector<double> xcentroid_old(N, 0);    //  Old Simplex Centroid * (N + 1)
        std::vector<double> xcentroid_new(N, 0);    //  New Simplex Centroid * (N + 1)
        std::vector<double> vf(N + 1, 0);           //  Values at Simplex Vertexes       
        int x1 = 0;                 //  Index of smallest vertex.
        int xn = 0;                 //  Index of second greatest vertex.
        int xnp1 = 0;               //  Index of greatest vertex.
        int countIT = 0;                //  Iteration Count

        //  If no initial simplex is specified, construct the trial simplex.
        if (x.size() == 0)
        {
            std::vector<double> del(init);
            //  del = init / 20
            std::transform(del.begin(), del.end(), del.begin(),
                std::bind2nd(std::divides<double>(), 20));
            for (int i = 0; i < N; i++)
            {
                std::vector<double> tmp(init);
                tmp[i] += del[i];
                x.push_back(tmp);
            }
            x.push_back(init);

            // Calculate the xcentriod.
            std::transform(init.begin(), init.end(), xcentroid_old.begin(),
                std::bind2nd(std::multiplies<double>(), N + 1));
        }

        std::vector<double> xg(N);
        std::vector<double> xr(N);
        std::vector<double> xe(N);
        std::vector<double> xc(N);
        //  Optimization starts.
        for (countIT = 0; countIT < MAXIT; countIT++)
        {
            for (int i = 0; i < N + 1; i++)
                vf[i] = func(x[i]);

            // Find index of max, second max, min of vf.
            x1 = 0; xn = 0; xnp1 = 0;
            for (int i = 0; i < vf.size(); i++)
            {
                if (vf[i] < vf[x1])
                    x1 = i;
                if (vf[i] > vf[xnp1])
                    xnp1 = i;
            }
            xn = x1;
            for (int i = 0; i < vf.size(); i++)
            {
                if (vf[i] < vf[xnp1] && vf[i] > vf[xn])
                    xn = i;
            }

            //  xg: Centroid of the N best vertexes.
            std::fill(xg.begin(), xg.end(), 0);
            for (int i = 0; i < x.size(); i++)
            {
                if (i != xnp1)
                    std::transform(xg.begin(), xg.end(), x[i].begin(), xg.begin(), std::plus<double>());
            }
            std::transform(xg.begin(), xg.end(),
                x[xnp1].begin(), xcentroid_new.begin(), std::plus<double>());
            std::transform(xg.begin(), xg.end(), xg.begin(),
                std::bind2nd(std::divides<double>(), N));

            //  Termination condition: change (sum of absolute differences on all dimensions)
            //  of simplex centroid is less than EPS.
            double diff = 0;
            for (int i = 0; i < N; i++)
                diff += fabs(xcentroid_old[i] - xcentroid_new[i]);
            if (diff / N < EPS)
                break;
            else
                xcentroid_old.swap(xcentroid_new);

            //  Reflection
            std::fill(xr.begin(), xr.end(), 0);
            for (int i = 0; i < N; i++)
                xr[i] = xg[i] + a * (xg[i] - x[xnp1][i]);
            double fxr = func(xr);
            if (vf[x1] <= fxr && fxr <= vf[xn])
                //  If f(x1) <= f(xr) <= f(xn), update xnp1 to xr.
                std::copy(xr.begin(), xr.end(), x[xnp1].begin());
            else if (fxr < vf[x1])
            {
                //  If f(xr) < f(x1), expansion.
                std::fill(xe.begin(), xe.end(), 0);
                for (int i = 0; i<N; i++)
                    xe[i] = xr[i] + b * (xr[i] - xg[i]);
                //  Update xnp1 to the better one of xr or xe.
                if (func(xe) < fxr)
                    std::copy(xe.begin(), xe.end(), x[xnp1].begin());
                else
                    std::copy(xr.begin(), xr.end(), x[xnp1].begin());
            }
            else if (fxr > vf[xn])
            {
                //  If f(xr) > f(xn), contraction.
                std::fill(xc.begin(), xc.end(), 0);
                for (int i = 0; i < N; i++)
                    xc[i] = xg[i] + g * (x[xnp1][i] - xg[i]);
                if (func(xc) < vf[xnp1])
                    //  If f(xc) < f(xnp1), update xnp1 to xc.
                    std::copy(xc.begin(), xc.end(), x[xnp1].begin());
                else
                {
                    //  If f(xc) >= f(xnp1), multi-contraction.
                    for (int i = 0; i < x.size(); i++)
                    {
                        if (i != x1)
                        {
                            for (int j = 0; j < N; j++)
                                x[i][j] = x[x1][j] + h * (x[i][j] - x[x1][j]);
                        }
                    }
                }
            }
        }

        if (countIT == MAXIT)
            throw std::invalid_argument("Iteration limit achieves, result may not be optimal.");

        result = x[x1];
    }

原文：

    void Simplex_Original(std::vector<double>& result, std::function<double(std::vector<double>)> func,
        std::vector<double> init, std::vector<std::vector<double>> x = std::vector<std::vector<double>>(),
        double EPS = 1E8 * std::numeric_limits<double>::epsilon(), int MAXIT = 1000000)
    {
        int N = init.size();                                //  Space Dimension
        //  Coefficients for the new points.
        const double a = 1.0;       //  a: Reflection
        const double b = 1.0;       //  b: Expansion
        const double g = 0.5;       //  g: Contraction
        const double h = 0.5;       //  h: Multi-Contraction
        std::vector<double> xcentroid_old(N, 0);    //  Old Simplex Centroid * (N + 1)
        std::vector<double> xcentroid_new(N, 0);    //  New Simplex Centroid * (N + 1)
        std::vector<double> vf(N + 1, 0);           //  Values at Simplex Vertexes       
        int x1 = 0;                 //  Index of smallest vertex.
        int xn = 0;                 //  Index of second greatest vertex.
        int xnp1 = 0;               //  Index of greatest vertex.
        int countIT = 0;                //  Iteration Count

        //  If no initial simplex is specified, construct the trial simplex.
        if (x.size() == 0)
        {
            std::vector<double> del(init);
            //  del = init / 20
            std::transform(del.begin(), del.end(), del.begin(),
                std::bind2nd(std::divides<double>(), 20));
            for (int i = 0; i < N; i++)
            {
                std::vector<double> tmp(init);
                tmp[i] += del[i];
                x.push_back(tmp);
            }
            x.push_back(init);

            // Calculate the xcentriod.
            std::transform(init.begin(), init.end(), xcentroid_old.begin(),
                std::bind2nd(std::multiplies<double>(), N + 1));
        }

        //  Optimization starts.
        for (countIT = 0; countIT < MAXIT; countIT++)
        {
            for (int i = 0; i < N + 1; i++)
                vf[i] = func(x[i]);

            // Find index of max, second max, min of vf.
            x1 = 0; xn = 0; xnp1 = 0;
            for (int i = 0; i < vf.size(); i++)
            {
                if (vf[i] < vf[x1])
                    x1 = i;
                if (vf[i] > vf[xnp1])
                    xnp1 = i;
            }
            xn = x1;
            for (int i = 0; i < vf.size(); i++)
            {
                if (vf[i] < vf[xnp1] && vf[i] > vf[xn])
                    xn = i;
            }

            //  xg: Centroid of the N best vertexes.
            std::vector<double> xg(N, 0);
            for (int i = 0; i < x.size(); i++)
            {
                if (i != xnp1)
                    std::transform(xg.begin(), xg.end(), x[i].begin(), xg.begin(), std::plus<double>());
            }
            std::transform(xg.begin(), xg.end(),
                x[xnp1].begin(), xcentroid_new.begin(), std::plus<double>());
            std::transform(xg.begin(), xg.end(), xg.begin(),
                std::bind2nd(std::divides<double>(), N));

            //  Termination condition: change (sum of absolute differences on all dimensions)
            //  of simplex centroid is less than EPS.
            double diff = 0;
            for (int i = 0; i < N; i++)
                diff += fabs(xcentroid_old[i] - xcentroid_new[i]);
            if (diff / N < EPS)
                break;
            else
                xcentroid_old.swap(xcentroid_new);

            //  Reflection
            std::vector<double> xr(N, 0);
            for (int i = 0; i < N; i++)
                xr[i] = xg[i] + a * (xg[i] - x[xnp1][i]);
            double fxr = func(xr);
            if (vf[x1] <= fxr && fxr <= vf[xn])
                //  If f(x1) <= f(xr) <= f(xn), update xnp1 to xr.
                std::copy(xr.begin(), xr.end(), x[xnp1].begin());
            else if (fxr < vf[x1])
            {
                //  If f(xr) < f(x1), expansion.
                std::vector<double> xe(N, 0);
                for (int i = 0; i<N; i++)
                    xe[i] = xr[i] + b * (xr[i] - xg[i]);
                //  Update xnp1 to the better one of xr or xe.
                if (func(xe) < fxr)
                    std::copy(xe.begin(), xe.end(), x[xnp1].begin());
                else
                    std::copy(xr.begin(), xr.end(), x[xnp1].begin());
            }
            else if (fxr > vf[xn])
            {
                //  If f(xr) > f(xn), contraction.
                std::vector<double> xc(N, 0);
                for (int i = 0; i < N; i++)
                    xc[i] = xg[i] + g * (x[xnp1][i] - xg[i]);
                if (func(xc) < vf[xnp1])
                    //  If f(xc) < f(xnp1), update xnp1 to xc.
                    std::copy(xc.begin(), xc.end(), x[xnp1].begin());
                else
                {
                    //  If f(xc) >= f(xnp1), multi-contraction.
                    for (int i = 0; i < x.size(); i++)
                    {
                        if (i != x1)
                        {
                            for (int j = 0; j < N; j++)
                                x[i][j] = x[x1][j] + h * (x[i][j] - x[x1][j]);
                        }
                    }
                }
            }
        }

        if (countIT == MAXIT)
            throw std::invalid_argument("Iteration limit achieves, result may not be optimal.");

        result = x[x1];
    }

测试功能：

double func(vector<double> x)
{
    return (x[0] * x[0] + x[1] * x[1]) * (x[0] * x[0] + x[1] * x[1]) - (x[0] - 3 * x[1]) * (x[0] - 3 * x[1]);
}

void main()
{
    int m = 1000, n = 10;
    double dz = 0.1 / m / n;
    vector<double> init(2), result(2);

    init[0] = 3;    init[1] = 3;
    clock_t t1;
    t1 = clock();
    for (int i = 0; i < m; i++)
    {
        for (int j = 0; j < n; j++)
        {
            init[0] += dz;
            Optimizer::Simplex_Original(result, func, init);
        }
    }
    cout << "Old:" << '\t' << float(clock() - t1) / CLOCKS_PER_SEC << endl;
    cout << result[0] << '\t' << result[1] << endl;

    init[0] = 3;    init[1] = 3;
    t1 = clock();
    for (int i = 0; i < m; i++)
    {
        for (int j = 0; j < n; j++)
        {
            init[0] += dz;
            Optimizer::Simplex(result, func, init);
        }
    }
    cout << "New:" << '\t' << float(clock() - t1) / CLOCKS_PER_SEC << endl;
    cout << result[0] << '\t' << result[1] << endl;
}

我在发布模式下使用VS 2013，O2已开启。

对于原始的，10000次重复花费大约9s，但新花费为13s。

Answer 1

我认为向量构造函数知道内存是连续的，将能够更好地优化设置其内容。它可能只是memset整个区域，或类似的东西，而std :: fill不知道它正在访问什么类型的容器，所以必须通过递增每个元素的迭代器并单独写每个元素来迭代所有元素。

Answer 2

对大型复杂功能的微小更改可以在优化的代码中触发各种令人惊讶的性能变化（好的或坏的）。处理器很复杂，很难预测什么会更快或更慢。

也就是说，分配零填充内存可能比将已分配的块设置为零更快。

考虑：

std::vector<double> xg(N, 0);

如果0.0表示平台上的所有位为零（可能），则归结为分配一个充满零的内存块。事实证明，大多数虚拟内存操作系统可以轻松分配已经清零的内存。实际上，操作系统可能在另一个核心上运行一个线程，其唯一目的是将未使用的内存块清零，以便它们可用。分配内存时，它已经是一堆零，所以在分配器中没有额外的工作要做。

比较：

std::fill(xg.begin(), xg.end(), 0);

那是在你的核心上使用你的线程来消除内存块。你已经有效地失去了让操作系统在你自己的线程上提前为你工作的并发性。

虽然分配会产生性能成本，但如果你做了很多事情，通常应该把它视为一个潜在的瓶颈，直到你衡量之后你才能真正知道。

矢量重新填充比创建新的更慢？

2 个答案: