我正在使用SCAD惩罚函数实现阈值回归。我已经编写了目标函数和目标函数的渐变,但是无法成功优化并退出优化。请帮我看看 曾经有人告诉我使用“纳德米德”优化方法,但是这种方法虽然可以成功优化,但并未考虑渐变,但是图像完全不正确
import pandas as pd
import scipy as sc
from scipy import stats as st
import numpy as np
from scipy import optimize as opt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import pyplot as plt
def loss_SCAD(beta_matrix):
M_MSE = np.dot(x, beta_matrix[:tmp_lt])[:, None]
M_c = np.array(beta_matrix[-M:], ndmin=1)
scad = 0
for idx, c in enumerate(M_c):
nor_distri_x = (x[:, 1][:, None] - c) / tmp_h
nor_distri_CDF = st.norm.cdf(nor_distri_x) # 光滑示性函数
M_MSE += np.dot(x, beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)])[:, None] * nor_distri_CDF
beta_param = np.sum(np.abs(beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)]))
if beta_param > a * tmp_lam:
scad += 0.5 * ((a + 1) * tmp_lam ** 2)
elif beta_param > tmp_lam:
scad += ((a ** 2 - 1) * tmp_lam ** 2 - (beta_param - a * tmp_lam) ** 2) / (2 * (a - 1))
else:
scad += tmp_lam * beta_param
MSE = 0.5 * np.average((y - M_MSE) ** 2) + scad
return MSE
def loss_derivative(beta_matrix):
deriv_list = []
other_M_MSE, nor_distri_CDF = 0, 0
first_M_MSE = y - np.dot(x, beta_matrix[:tmp_lt])[:, None]
M_c = np.array(beta_matrix[-M:], ndmin=1)
for idx, c in enumerate(M_c):
nor_distri_CDF = st.norm.cdf((x[:, 1][:, None] - c) / tmp_h) # 光滑示性函数
other_M_MSE += np.dot(x, beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)])[:, None] * nor_distri_CDF
square_part = first_M_MSE - other_M_MSE
for idx, beta_param in enumerate(beta_matrix[:tmp_lt]): # 更新梯度:第一段
deriv_list.append(np.average(square_part * -x[:, idx][:, None]))
for idx, c in enumerate(M_c):
beta_param = np.sum(np.abs(beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)]))
if beta_param == 0: # 更新梯度:SCAD函数
scad = 0
elif beta_param > tmp_lam:
scad = np.max(((a * tmp_lam - beta_param) / (a - 1), 0))
else:
scad = tmp_lam
for i in range(tmp_lt): # 更新梯度:分段
deriv_list.append(np.average(square_part * -x[:, i][:, None] * nor_distri_CDF) + scad)
for idx, c in enumerate(M_c): # 更新梯度:门槛参数
nor_distri_PDF = st.norm.pdf((x[:, 1][:, None] - c) / tmp_h)
deriv_list.append(np.average(square_part * np.dot(x, beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)])[:, None] * nor_distri_PDF / tmp_h))
return np.array(deriv_list)
def loss(beta_matrix):
M_MSE = np.dot(x, beta_matrix[:tmp_lt])[:, None]
M_c = np.array(beta_matrix[-M:], ndmin=1)
for idx, c in enumerate(M_c):
nor_distri_x = (x[:, 1][:, None] - c) / tmp_h
nor_distri_CDF = st.norm.cdf(nor_distri_x) # 光滑示性函数
M_MSE += np.dot(x, beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)])[:, None] * nor_distri_CDF
MSE = 0.5 * np.average((y - M_MSE) ** 2)
return MSE
np.random.seed(1171359)
a = 3.7
tmp_x = np.random.normal(loc=0, scale=2, size=(400, 1))
tmp_beta = [2, -2, 5, 2, 9, 7]
tmp_c = [-np.sqrt(2), np.sqrt(2)]
M = np.shape(tmp_c)[0]
x1, x2, x3 = tmp_x[tmp_x < tmp_c[0]], tmp_x[(tmp_x >= tmp_c[0]) & (tmp_x < tmp_c[1])], tmp_x[tmp_x >= tmp_c[1]]
y1, y2, y3 = tmp_beta[0] + tmp_beta[1] * x1, tmp_beta[2] + tmp_beta[3] * x2, tmp_beta[4] + tmp_beta[5] * x3
x = np.reshape(np.concatenate((x1, x2, x3)), (-1, 1))
y = np.reshape(np.concatenate((y1, y2, y3)) + np.random.randn(400), (-1, 1))
x = np.concatenate((np.ones_like(x), x), axis=1)
tmp_lt = x.shape[1]
tmp_h = np.log(400) / 400 * tmp_x.std(ddof=1)
tmp_lam = 0.5
x0 = np.append(np.ones(7), 2)
# tmp_res = opt.minimize(fun=loss, x0=x0)
tmp_result = opt.minimize(fun=loss_SCAD, x0=x0, jac=loss_derivative, )
# kk = tmp_res.x[:-2].reshape((2, -1), order='F')
k = tmp_result.x[:-2].reshape((2, -1), order='F')
y_hat_1 = np.dot(x[x[:, 1] < tmp_result.x[-2]], k[:, 0].reshape((-1, 1)))
y_hat_2 = np.dot(x[(x[:, 1] >= tmp_result.x[-2]) & (x[:, 1] < tmp_result.x[-1])], k[:, 0].reshape((-1, 1)) + k[:, 1].reshape((-1, 1)))
y_hat_3 = np.dot(x[x[:, 1] >= tmp_result.x[-1]], k[:, 0].reshape((-1, 1)) + k[:, 1].reshape((-1, 1)) + k[:, 2].reshape((-1, 1)))
print(tmp_result, sep='\n')
fig = plt.figure(111, figsize=(6, 6))
plt.plot(x[:, 1][x[:, 1] < tmp_result.x[-2]], y_hat_1, c='orange', alpha=0.6, linewidth=4)
plt.plot(x[:, 1][(x[:, 1] >= tmp_result.x[-2]) & (x[:, 1] < tmp_result.x[-1])], y_hat_2, c='blue', alpha=0.6, linewidth=4)
plt.plot(x[:, 1][x[:, 1] >= tmp_result.x[-1]], y_hat_3, c='green', alpha=0.6, linewidth=4)
plt.plot(x1, y1, c='black', alpha=0.6, linewidth=4)
plt.plot(x2, y2, c='black', alpha=0.6, linewidth=4)
plt.plot(x3, y3, c='black', alpha=0.6, linewidth=4)
plt.scatter(x[:, 1], y, s=12, alpha=0.5, c='gray')
plt.axis([tmp_x.min() * 1.05, tmp_x.max() * 1.05, y.min() * 1.05, y.max() * 1.05])
plt.show()