我有一些类似于Logistic回归的研究任务:
import torch
import math
import numpy as np
from sklearn.datasets import make_moons
from matplotlib import pyplot
from pandas import DataFrame
def sigmoid(x):
e1 = x.abs().sub(x).div(-2.).exp()
e2 = e1
e3 = x.abs().add(x).div(-2.).exp()
return e1.div(e2.add(e3))
w =torch.tensor([-4., -0.8783411383628845, 1.549607276916504, -0.7097892165184021, 6.121659278869629, -0.45039281249046326], requires_grad=True)
def median(y):
yt = torch.tensor(y,dtype=torch.float32)
ymax = torch.tensor([yt.max()])
return (torch.cat((yt,ymax)).median()+yt.median())/2.
def f(x,y,w):
v = torch.tensor([1,x,y])
# print(v)
W = (w.view(2,-1))
# print ("W=",W)
r=torch.mv(W,v)
return median(r)
def fl(x,y,w):
r = f(x,y,w)
return sigmoid(r)
def log_fl_1(x,y,w):
r = f(x,y,w)
ex = torch.exp(r.mul(torch.tensor(-1.)))
# print("r=", r)
den = torch.log(torch.add(torch.tensor(1.), ex))
# print("den=", den)
return torch.div(r,den)
def loss(X,y,w):
s=torch.tensor(0.)
for i in range(len(y)):
xx = X[i][0]
yy = X[i][1]
if y[i]==0:
th = fl(xx, yy, w)
s=s.add(th.log())
else:
lt1 = log_fl_1(xx, yy, w)
s=s.add(lt1)
return s
X, z = make_moons(n_samples=60, noise=0.1,random_state=8)
h = .000000000008
l0 = None
def grad(w):
sum_grad = torch.zeros(w.shape)
for i in range(len(z)):
xx = X[i][0]
yy = X[i][1]
th = f(xx, yy, w)
th.backward();
z_i = torch.tensor(z[i] * 1.)
sum_grad = w.grad.mul(th.sub(z_i)).add(sum_grad)
return sum_grad;
opt = torch.optim.LBFGS([w],lr=.001)
for i in range(50):
def closure():
opt.zero_grad()
s=loss(X,z,w)
print("s=",s.tolist(),"w=",w.tolist())
s.backward()
return s
opt.step(closure)
收敛,但是接近最佳梯度变为纳米。我可以使用函数grad(w)更有效地计算梯度,它的结果与pytorch导数计算器相同,只是它不会达到接近最优的数值。如何将其提供给torch.optim.LBFGS?