我正在使用SCAD罚函数实现阈值回归,SCAD具有自己定义的导数,因此我也实现了SCAD导数的功能。我将导数传递给“优化”的jac参数。 我首先使用没有SCAD惩罚功能的目标功能来进行初步猜测,并且它可以正常工作。 但是,当使用带有SCAD的惩罚函数来解决此问题时,迭代器无法给出正确的结果,并返回“由于精度损失而不一定实现的期望误差”。
我该怎么办?
import pandas as pd
import scipy as sc
from scipy import stats as st
import numpy as np
from scipy import optimize as opt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import pyplot as plt
def loss_SCAD(beta_matrix):
M_MSE = np.dot(x, beta_matrix[:tmp_lt])[:, None]
M_c = np.array(beta_matrix[-M:], ndmin=1)
scad = 0
for idx, c in enumerate(M_c):
nor_distri_x = (x[:, 1][:, None] - c) / tmp_h
nor_distri_CDF = st.norm.cdf(nor_distri_x) # 光滑示性函数
M_MSE += np.dot(x, beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)])[:, None] * nor_distri_CDF
beta_param = np.sum(np.abs(beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)]))
if beta_param > a * tmp_lam:
scad += 0.5 * ((a + 1) * tmp_lam ** 2)
elif beta_param > tmp_lam:
scad += ((a ** 2 - 1) * tmp_lam ** 2 - (beta_param - a * tmp_lam) ** 2) / (2 * (a - 1))
else:
scad += tmp_lam * beta_param
MSE = 0.5 * np.average((y - M_MSE) ** 2) + scad
return MSE
def loss_derivative(beta_matrix):
deriv_list = []
other_M_MSE, nor_distri_CDF = 0, 0
first_M_MSE = y - np.dot(x, beta_matrix[:tmp_lt])[:, None]
M_c = np.array(beta_matrix[-M:], ndmin=1)
for idx, c in enumerate(M_c):
nor_distri_x = (x[:, 1][:, None] - c) / tmp_h
nor_distri_CDF = st.norm.cdf(nor_distri_x) # 光滑示性函数
other_M_MSE += np.dot(x, beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)])[:, None] * nor_distri_CDF
square_part = first_M_MSE - other_M_MSE
for idx, beta_param in enumerate(beta_matrix[:tmp_lt]): # 更新梯度:第一段
deriv_list.append(np.average(square_part * -x[:, idx][:, None]))
for idx, c in enumerate(M_c):
beta_param = np.sum(np.abs(beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)]))
if beta_param == 0: # 更新梯度:SCAD函数
scad = 0
elif beta_param > tmp_lam:
scad = np.max(((a * tmp_lam - beta_param) / (a - 1), 0))
else:
scad = tmp_lam
for i in range(tmp_lt): # 更新梯度:分段
deriv_list.append(np.average(square_part * -x[:, i][:, None] * nor_distri_CDF) + scad)
for idx, c in enumerate(M_c): # 更新梯度:门槛参数
nor_distri_PDF = st.norm.pdf((tmp_x - c) / tmp_h)
deriv_list.append(np.average(square_part * np.dot(x, beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)])[:, None] * nor_distri_PDF / tmp_h))
return np.array(deriv_list)
def loss(beta_matrix):
M_MSE = np.dot(x, beta_matrix[:tmp_lt])[:, None]
M_c = np.array(beta_matrix[-M:], ndmin=1)
for idx, c in enumerate(M_c):
nor_distri_x = (x[:, 1][:, None] - c) / tmp_h
nor_distri_CDF = st.norm.cdf(nor_distri_x) # 光滑示性函数
M_MSE += np.dot(x, beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)])[:, None] * nor_distri_CDF
MSE = 0.5 * np.average((y - M_MSE) ** 2)
return MSE
np.random.seed(1171359)
a = 3.7
tmp_x = np.random.normal(loc=0, scale=2, size=(400, 1))
tmp_beta = [2, -2, 5, 2, 9, 7]
tmp_c = [-np.sqrt(2), np.sqrt(2)]
M = np.shape(tmp_c)[0]
x1, x2, x3 = tmp_x[tmp_x < tmp_c[0]], tmp_x[(tmp_x >= tmp_c[0]) & (tmp_x < tmp_c[1])], tmp_x[tmp_x >= tmp_c[1]]
y1, y2, y3 = tmp_beta[0] + tmp_beta[1] * x1, tmp_beta[2] + tmp_beta[3] * x2, tmp_beta[4] + tmp_beta[5] * x3
x = np.reshape(np.concatenate((x1, x2, x3)), (-1, 1))
y = np.reshape(np.concatenate((y1, y2, y3)) + np.random.randn(400), (-1, 1))
x = np.concatenate((np.ones_like(x), x), axis=1)
tmp_lt = x.shape[1]
tmp_h = np.log(400) / 400 * tmp_x.std(ddof=1)
tmp_lam = 0.5
tmp_res = opt.minimize(fun=loss, x0=np.append(np.ones(7), 2))
tmp_result = opt.minimize(fun=loss_SCAD, x0=tmp_res.x, jac=loss_derivative)
# print(tmp_result.x, tmp_res.x, sep='\n')
k = tmp_result.x[:-2].reshape((2, -1), order='F')
y_hat_1 = np.dot(x[x[:, 1] < tmp_result.x[-2]], k[:, 0].reshape((-1, 1)))
y_hat_2 = np.dot(x[(x[:, 1] >= tmp_result.x[-2]) & (x[:, 1] < tmp_result.x[-1])], k[:, 0].reshape((-1, 1)) + k[:, 1].reshape((-1, 1)))
y_hat_3 = np.dot(x[x[:, 1] >= tmp_result.x[-1]], k[:, 0].reshape((-1, 1)) + k[:, 1].reshape((-1, 1)) + k[:, 2].reshape((-1, 1)))
print(k, tmp_result.fun, tmp_result, sep='\n')
fig = plt.figure(111, figsize=(6, 6))
plt.plot(x[:, 1][x[:, 1] < tmp_result.x[-2]], y_hat_1, c='orange', alpha=0.6, linewidth=4)
plt.plot(x[:, 1][(x[:, 1] >= tmp_result.x[-2]) & (x[:, 1] < tmp_result.x[-1])], y_hat_2, c='blue', alpha=0.6, linewidth=4)
plt.plot(x[:, 1][x[:, 1] >= tmp_result.x[-1]], y_hat_3, c='green', alpha=0.6, linewidth=4)
plt.plot(x1, y1, c='black', alpha=0.6, linewidth=4)
plt.plot(x2, y2, c='black', alpha=0.6, linewidth=4)
plt.plot(x3, y3, c='black', alpha=0.6, linewidth=4)
plt.scatter(x[:, 1], y, s=12, alpha=0.5, c='gray')
plt.axis([tmp_x.min() * 1.05, tmp_x.max() * 1.05, y.min() * 1.05, y.max() * 1.05])
plt.show()
fun: 1.6869804934437402
hess_inv: array([[1, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1]])
jac: array([ 1.98384534e-07, -2.81814249e-07, -1.51727486e-08, -2.17460279e-08,
-1.51727486e-08, -2.17460279e-08, -1.15503569e+00, -6.99892939e-01])
message: 'Desired error not necessarily achieved due to precision loss.'
nfev: 84
nit: 0
njev: 72
status: 2
success: False
x: array([ 1.93751362, -2.04506983, 3.09129046, 4.13404934, 4.32073797,
4.87646363, -1.391239 , 1.4099789 ])