我正在尝试解决下一个数值优化问题:找到向量 x 使得成本函数最小化 0.5 * norm(Bx - v, 2)^2,其中 B 是矩阵,v 是一个向量。我已经实现了两种梯度下降算法。在其中一个我手动调整步长,在另一个中我使用 ftp://lsec.cc.ac.cn/pub/yyx/papers/p0504.pdf 中的方程 (2.5) 自动计算它。代价函数的梯度为B^T(B*x - v).
另外,我将我的实现与 numpy.linalg 中的 solve(A, B) 函数进行了比较,注意到优化问题的解是线性系统 A*x = b 的解,其中 A = B^T * B, b = B^T * v. 到目前为止,我得到的结果很差:大错误和很长的运行时间。我不知道是我的实现有错误,还是这些算法在我设置的计算实验中是如何工作的。
在计算实验中,我生成随机“解”向量 x 和矩阵 B。然后相应地计算 A 和 b。
感谢任何反馈。
这是我的代码:
import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as LA
import time
def residue(x, B, v):
aux = np.dot(B, x) - v
aux = pow(LA.norm(aux, 2), 2)
aux = aux / pow(LA.norm(v, 2), 2)
return aux
def gradGD(x, B, v):
aux = np.dot(B, x) - v
return np.dot(B.T, aux)
def gradientDescent(B, v, alpha, tol, x0):
A = np.dot(B.T, B)
b = np.dot(B.T, v)
x = x0
while True:
res = residue(x, B, v)
print('Residue ', res)
if (res < tol):
break
x = x - alpha * gradGD(x, B, v)
return x
# Gradient descent with auto step-size
def gradientDescentBB(B, v, tol, x0):
x = x0
xpre = np.zeros((N, 1))
flag = 0
while True:
res = residue(x, B, v)
#print('Residue ', res)
if (res < tol):
break
if (flag == 0):
grad = gradGD(x, B, v)
x = x - (1e-06) * grad
flag = 1
continue
gradpre = grad
grad = gradGD(x, B, v)
y = grad - gradpre
s = x - xpre
# print('dot', np.dot(s.T, y))
# print('||y||_2 = ', LA.norm(y, 2))
alpha = np.dot(s.T, y) / pow(LA.norm(y, 2), 2)
# print("alpha = ", alpha)
xpre = x
x = x - alpha * grad
return x
# Solves the optimization problem via Ax * b
def solver(B, v):
A = np.dot(B.T, B)
b = np.dot(B.T, v)
return np.linalg.solve(A, b)
# Main routine
N = 1000
epsilon = 1.0e-6
a = 1/N - epsilon
iter = 20
mytime_iter = []
time2_iter = []
myeabs_iter = []
myerel_iter = []
myepercent_iter = []
cgseabs_iter = []
cgserel_iter = []
cgsepercent_iter = []
# Running the experiment many times
for i in range(iter):
print('Iteration: ', i)
B = a * np.random.randn(N, N) + np.ones((N, N))
#print(B)
x0 = np.random.randn(N, 1) # Real solution of the optmization problem
v = np.dot(B, x0)
mystart = time.time()
# x = gradientDescent(B, v, alpha=1999100e-09, tol=1e-05, x0=np.zeros((N, 1))) # Gradient Descent: Method 1
x = gradientDescentBB(B, v, tol=1e-05, x0=np.zeros((N, 1))) # Gradient Descent: Method 2
myend = time.time()
mytime = myend - mystart
start2 = time.time()
xalt = solver(B, v) # Solution of the optimization problem by solving A*x = b
end2 = time.time()
time2 = start2 - end2
myeabs = LA.norm(x - x0, 2)
myerel = myeabs / LA.norm(x0, 2)
myepercent = myerel * 100
cgseabs = LA.norm(xalt - x0, 2)
cgserel = cgseabs / LA.norm(x0, 2)
cgsepercent = cgserel * 100
mytime_iter.append(mytime)
time2_iter.append(time2)
myeabs_iter.append(myeabs)
myerel_iter.append(myerel)
myepercent_iter.append(myepercent)
cgseabs_iter.append(cgseabs)
cgserel_iter.append(cgserel)
cgsepercent_iter.append(cgsepercent)
plt.figure(1)
plt.plot(mytime_iter, 'bo', label="GD")
plt.plot(time2_iter, 'ro', label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Time (s)")
# plt.ylim(-1.5, 2.0) --
plt.figure(2)
plt.plot(myeabs_iter, "-b", label="GD")
plt.plot(cgseabs_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Absolute error")
plt.figure(3)
plt.plot(myerel_iter, "-b", label="GD")
plt.plot(cgserel_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Relative error")
plt.figure(4)
plt.plot(myepercent_iter, "-b", label="GD")
plt.plot(cgsepercent_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.ylabel("Relative error (%)")
plt.show()