Question

我正在尝试解决下一个数值优化问题：找到向量 x 使得成本函数最小化 0.5 * norm(Bx - v, 2)^2，其中 B 是矩阵，v 是一个向量。我已经实现了两种梯度下降算法。在其中一个我手动调整步长，在另一个中我使用 ftp://lsec.cc.ac.cn/pub/yyx/papers/p0504.pdf 中的方程 (2.5) 自动计算它。代价函数的梯度为B^T(B*x - v).

另外，我将我的实现与 numpy.linalg 中的 solve(A, B) 函数进行了比较，注意到优化问题的解是线性系统 A*x = b 的解，其中 A = B^T * B, b = B^T * v. 到目前为止，我得到的结果很差：大错误和很长的运行时间。我不知道是我的实现有错误，还是这些算法在我设置的计算实验中是如何工作的。

在计算实验中，我生成随机“解”向量 x 和矩阵 B。然后相应地计算 A 和 b。

感谢任何反馈。

这是我的代码：

import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as LA
import time

def residue(x, B, v):
    aux = np.dot(B, x) - v
    aux = pow(LA.norm(aux, 2), 2)
    aux = aux / pow(LA.norm(v, 2), 2)
    return aux

def gradGD(x, B, v):
    aux = np.dot(B, x) - v
    return np.dot(B.T, aux)

def gradientDescent(B, v, alpha, tol, x0):
    A = np.dot(B.T, B)
    b = np.dot(B.T, v)

    x = x0
    while True:
        res = residue(x, B, v)
        print('Residue ', res)

        if (res < tol):
            break

        x = x - alpha * gradGD(x, B, v)

    return x

# Gradient descent with auto step-size
def gradientDescentBB(B, v, tol, x0):
    x = x0
    xpre = np.zeros((N, 1))

    flag = 0
    while True:
        res = residue(x, B, v)
        #print('Residue ', res)

        if (res < tol):
            break

        if (flag == 0):
            grad = gradGD(x, B, v)
            x = x - (1e-06) * grad
            flag = 1
            continue

        gradpre = grad
        grad = gradGD(x, B, v)
        y = grad - gradpre
        s = x - xpre
        # print('dot', np.dot(s.T, y))
        # print('||y||_2 = ', LA.norm(y, 2))
        alpha = np.dot(s.T, y) / pow(LA.norm(y, 2), 2)
        # print("alpha = ", alpha)
        xpre = x
        x = x - alpha * grad

    return x

# Solves the optimization problem via Ax * b
def solver(B, v):
    A = np.dot(B.T, B)
    b = np.dot(B.T, v)

    return np.linalg.solve(A, b)

# Main routine
N = 1000
epsilon = 1.0e-6
a = 1/N - epsilon
iter = 20

mytime_iter = []
time2_iter = []

myeabs_iter = []
myerel_iter = []
myepercent_iter = []

cgseabs_iter = []
cgserel_iter = []
cgsepercent_iter = []

# Running the experiment many times
for i in range(iter):
    print('Iteration: ', i)
    B =  a * np.random.randn(N, N) + np.ones((N, N))
    #print(B)
    x0 = np.random.randn(N, 1) # Real solution of the optmization problem
    v = np.dot(B, x0)

    mystart = time.time()
    # x = gradientDescent(B, v, alpha=1999100e-09, tol=1e-05, x0=np.zeros((N, 1))) # Gradient Descent: Method 1
    x = gradientDescentBB(B, v, tol=1e-05, x0=np.zeros((N, 1))) # Gradient Descent: Method 2
    myend = time.time()
    mytime = myend - mystart
    
    start2 =  time.time()
    xalt = solver(B, v) # Solution of the optimization problem by solving A*x = b
    end2 =  time.time()
    time2 = start2 - end2

    myeabs = LA.norm(x - x0, 2)
    myerel = myeabs / LA.norm(x0, 2)
    myepercent = myerel * 100

    cgseabs = LA.norm(xalt - x0, 2)
    cgserel = cgseabs / LA.norm(x0, 2)
    cgsepercent = cgserel * 100

    mytime_iter.append(mytime)
    time2_iter.append(time2)
    myeabs_iter.append(myeabs)
    myerel_iter.append(myerel)
    myepercent_iter.append(myepercent)

    cgseabs_iter.append(cgseabs)
    cgserel_iter.append(cgserel)
    cgsepercent_iter.append(cgsepercent)

plt.figure(1)
plt.plot(mytime_iter, 'bo', label="GD")
plt.plot(time2_iter, 'ro', label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Time (s)")
# plt.ylim(-1.5, 2.0)  --


plt.figure(2)
plt.plot(myeabs_iter, "-b", label="GD")
plt.plot(cgseabs_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Absolute error")


plt.figure(3)
plt.plot(myerel_iter, "-b", label="GD")
plt.plot(cgserel_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Relative error")


plt.figure(4)
plt.plot(myepercent_iter, "-b", label="GD")
plt.plot(cgsepercent_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.ylabel("Relative error (%)")

plt.show()

在 Python 中使用梯度下降进行数值优化

0 个答案: