Question

我正在尝试进行广义最小二乘拟合，以通过一些（x,y）数据点找到最佳拟合线。我能够通过scipy做到这一点，但我在使用权重时遇到了麻烦。我想从原始拟合的残差中获得权重，并尝试使用权重通过最小二乘法进行重新设定。权重应该是残差的倒数，但是因为-1 < residuals < 1这只是一个例子，我可以将残差用作权重。

（x,y）数据点在别处计算，目标是找到最佳拟合线y = x/alpha + intercept的alpha（1 /斜率）和截距。

我的代码如下：

import numpy as np
from scipy.optimize import least_squares

ydata = [9.7372923, 10.0587245, 10.3838510, 10.6931371, 10.9616260, 11.1833220, 11.3806770, 11.5248917, 11.7353000]
xdata = np.array([j+5 for j in range(len(ydata))])

def get_weights(resid):
    """
    This function calculates the weights per (x,y) by using the inverse of the squared residuals divided by the total sum of the inverse of the squared residuals. 
    This might be incorrect but should work for the sake of example. 
    """
    total = sum([abs(resid[i]) for i in range(len(resid))])
    fract = np.array([resid[i]/total for i in range(len(resid))])
    return fract

def get_least_squares_fit(xs, ys):
    """
    This function finds alpha (1/slope) and the y-intercept in the equation
    of a line y = x / alpha + intercept = mx + b
    """
    ## SPECIFY INITIAL GUESS OF OPTIMIZED PARAMETERS
    params_guess = [1/3, 9] ## ps = [alpha, intercept]
    ## DEFINE FITTING FUNCTIONS
    fit_func = lambda ps,x : x/ps[0] + ps[1]
    err_func = lambda ps,x,y : fit_func(ps,x) - y
    ## GET OPTIMIZED PARAMETERS, RESIDUALS & WEIGHTS
    res = least_squares(err_func, params_guess, args=(xs, ys))
    alpha, intercept, residuals = res.x[0], res.x[1], res.fun
    weights = get_weights(residuals)
    return alpha, intercept, residuals, weights

alpha, intercept, residuals, weights = get_least_squares_fit(xdata, ydata)

print(alpha, intercept)
>> 4.03378447722 8.6198247828

print(residuals)
>>  [ 0.12206326  0.04853721 -0.02868313 -0.09006308 -0.11064582 -0.08443567
-0.03388451  0.06980694  0.1073048 ]

我使用scipy curve_fit获得了相同的结果，https://gstreamer.freedesktop.org/src/gst-python/gst-python-1.12.1.tar.xz有sigma和absolute_sigma。我猜这是最好的方法。我仍然试图解决这个问题。

Answer 1

我相信这是正常的。这个想法是每个残差应该乘以相应的权重。最小化的功能是这些产品的总和。我没有使用外部模块进行最小二乘拟合，而是使用了良好的'scipy.optimize.minimize，它给出了未加权最小二乘拟合（get_gls_fit(..., weights=None, ...)）和外部模块结果的相同结果。 / p>

import numpy as np
from scipy import optimize
## same xdata and ydata as stated in question

def guess_initial_parameters(xs, ys):
    """
    xs          :   type<list> or type<array>
    ys          :   type<list> or type<array>
    """
    ## GUESS SLOPE
    slope = (ys[-1]-ys[0])/(xs[-1]-xs[0])
    alpha = 1/slope
    ## GUESS INTERCEPT
    intercept = np.mean([ys[-1] - xs[-1]/alpha, ys[0] - xs[0]/alpha])
    return [alpha, intercept]

def update_weights(residuals, power=1):
    """
    residuals   :   type<list> or type<array>
    power       :   type<float>
    """
    ## INVERT RESIDUALS
    invs = [1/residuals[idr] for idr in range(len(residuals))]
    ## NORMALIZE RESIDUALS
    invs = [abs(inv)**power for inv in invs]
    total = sum(invs)
    return [invs[idv]/total for idv in range(len(invs))]

def fit_func(ps, xs):
    """
    ps          :   [alpha, intercept]
    xs          :   type<list> or type<array>
    """
    ## FIT TO EQUATION OF LINE
    return [xs[idx]/ps[0] + ps[1] for idx in range(len(xs))] ## alpha = 1/slope

def get_residuals(ps, xs, ys):
    """
    ps          :   [alpha, intercept]
    xs          :   type<list> or type<array>
    ys          :   type<list> or type<array>
    """
    ## GET LINEAR FIT
    ys_trial = fit_func(ps, xs)
    ## GET RESIDUALS
    residuals = [(ys[idx] -  ys_trial[idx])**2 for idx in range(len(ys))]
    return residuals

def err_func(ps, xs, ys, wts):
    """
    ps          :   [alpha, intercept]
    xs          :   type<list> or type<array>
    ys          :   type<list> or type<array>
    wts         :   type<list> or type<array>
    """
    ## GET RESIDUALS
    residuals = get_residuals(ps, xs, ys)
    ## SUM WEIGHTED RESIDUALS
    return sum([wts[idr] * residuals[idr] for idr in range(len(residuals))])

def get_gls_fit(xs, ys, ps_init, weights=None, power=2, routine='Nelder-Mead'):
    """
    xs          :   type<list> or type<array>
    ys          :   type<list> or type<array>
    ps_init     :   [alpha, intercept]
    weights     :   None or type<list> or type<array>
    power       :   type<float>
    routine     :   'Nelder-Mead'
    """
    ## GET INITIAL PARAMETER GUESS
    if type(ps_init) == (list or np.array):
        pass
    elif ps_init == 'estimate':
        ps_init = guess_initial_parameters(xs, ys)
    else:
        raise ValueError("ps_init = type<list> or type<array> or 'estimate'")
    ## GET WEIGHTS
    if weights is None:
        wts = np.ones(len(xs))
        print(">>>>>>>>>>>\nORDINARY LEAST SQUARES (OLS) FIT:")
    else:
        wts = weights[:]
        print(">>>>>>>>>>>\nGENERALIZED LEAST SQUARES (GLS) FIT:")
    ## MINIMIZE SUM OF WEIGHTED RESIDUALS
    ans = optimize.minimize(err_func, x0=ps_init, args=(xs, ys, wts,), method=routine)
    ## GET OPTIMIZED PARAMETERS
    alpha, intercept = ans.x[0], ans.x[1]
    ## GET RESIDUALS
    residuals = np.array(get_residuals([alpha, intercept], xs, ys))
    ## GET UPDATED WEIGHTS FOR REFITTING
    wts_upd = np.array(update_weights(residuals, power))
    ## PRINT & RETURN RESULTS
    print("\n   ALPHA = %.3f, INTERCEPT = %.3f" %(alpha, intercept))
    print("\n   RESIDUALS:  \n", residuals)
    print("\n   WEIGHTS (used): \n", wts)
    print("\n   WEIGHTS (updated):  \n", wts_upd, "\n\n")
    return [alpha, intercept], residuals, wts_upd

输出：

[alpha_og, intercept_og], residuals_og, wts_og = get_gls_fit(xdata, ydata, ps_init='estimate')
[alpha_up, intercept_up], residuals_up, wts_up = get_gls_fit(xdata, ydata, ps_init=[alpha_og, intercept_og], weights=wts_og)

>>>>>>>>>>>
ORDINARY LEAST SQUARES (OLS) FIT:

   ALPHA = 4.034, INTERCEPT = 8.620

   RESIDUALS:  
 [ 0.01489916  0.00235566  0.00082289  0.00811204  0.01224353  0.00713032
  0.0011486   0.00487199  0.01151256]

   WEIGHTS (used): 
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.]

   WEIGHTS (updated):  
 [ 0.00179424  0.07177589  0.58819594  0.00605264  0.002657    0.00783406
  0.3019051   0.01678001  0.00300511] 


>>>>>>>>>>>
GENERALIZED LEAST SQUARES (GLS) FIT:

   ALPHA = 3.991, INTERCEPT = 8.621

   RESIDUALS:  
 [  1.86965273e-02   4.34039033e-03   7.51041961e-05   4.53922546e-03
   7.27337443e-03   3.18112777e-03   1.00990091e-05   1.06473505e-02
   2.05510268e-02]

   WEIGHTS (used): 
 [ 0.00179424  0.07177589  0.58819594  0.00605264  0.002657    0.00783406
  0.3019051   0.01678001  0.00300511]

   WEIGHTS (updated):  
 [  2.86578120e-07   5.31749819e-06   1.77597366e-02   4.86184846e-06
   1.89362088e-06   9.89925930e-06   9.82216884e-01   8.83653134e-07
   2.37190819e-07]

如何在这个scipy最小二乘优化例程中应用权重？

1 个答案: