如何为torch.optim.LBFGS提供自定义渐变?

时间:2019-04-30 14:35:14

标签: python optimization pytorch gradient-descent

我有一些类似于Logistic回归的研究任务:

import torch
import math
import numpy as np
from sklearn.datasets import make_moons
from matplotlib import pyplot
from pandas import DataFrame

def sigmoid(x):
    e1 = x.abs().sub(x).div(-2.).exp()
    e2 = e1
    e3 = x.abs().add(x).div(-2.).exp()
    return e1.div(e2.add(e3))

w =torch.tensor([-4., -0.8783411383628845, 1.549607276916504, -0.7097892165184021, 6.121659278869629, -0.45039281249046326], requires_grad=True)

def median(y):
    yt = torch.tensor(y,dtype=torch.float32)
    ymax = torch.tensor([yt.max()])
    return (torch.cat((yt,ymax)).median()+yt.median())/2.
def f(x,y,w):
    v = torch.tensor([1,x,y])
    # print(v)
    W = (w.view(2,-1))
    # print ("W=",W)
    r=torch.mv(W,v)
    return median(r)
def fl(x,y,w):
    r = f(x,y,w)
    return sigmoid(r)
def log_fl_1(x,y,w):
    r = f(x,y,w)
    ex = torch.exp(r.mul(torch.tensor(-1.)))
    # print("r=", r)
    den = torch.log(torch.add(torch.tensor(1.), ex))
    # print("den=", den)
    return torch.div(r,den)

def loss(X,y,w):
    s=torch.tensor(0.)
    for i in range(len(y)):
        xx = X[i][0]
        yy = X[i][1]
        if y[i]==0:
            th = fl(xx, yy, w)
            s=s.add(th.log())
        else:
           lt1 = log_fl_1(xx, yy, w)
           s=s.add(lt1)
    return s

X, z = make_moons(n_samples=60, noise=0.1,random_state=8)
h = .000000000008
l0 = None

def grad(w):
    sum_grad = torch.zeros(w.shape)
    for i in range(len(z)):
        xx = X[i][0]
        yy = X[i][1]
        th = f(xx, yy, w)
        th.backward();
        z_i = torch.tensor(z[i] * 1.)
        sum_grad = w.grad.mul(th.sub(z_i)).add(sum_grad)
    return sum_grad;

opt = torch.optim.LBFGS([w],lr=.001)
for i in range(50):
    def closure():
        opt.zero_grad()
        s=loss(X,z,w)
        print("s=",s.tolist(),"w=",w.tolist())
        s.backward()
        return s
    opt.step(closure)

收敛,但是接近最佳梯度变为纳米。我可以使用函数grad(w)更有效地计算梯度,它的结果与pytorch导数计算器相同,只是它不会达到接近最优的数值。如何将其提供给torch.optim.LBFGS?

0 个答案:

没有答案