Question

我试图反映出与sklearn几乎相同的结果，但是我没有得到很好的结果。我的自定义实现和sklearn的实现的拦截值相差5，因此，我尝试在此尽可能减小此值。

我的sklearn代码如下：

from sklearn.datasets import make_classification

X, y = make_classification(n_samples=50000, n_features=15, n_informative=10, n_redundant=5,
                           n_classes=2, weights=[0.7], class_sep=0.7, random_state=15)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=15)
clf = linear_model.SGDClassifier(eta0=0.0001, alpha=0.0001, loss='log', random_state=15, penalty='l2', tol=1e-3, verbose=2, learning_rate='constant')

clf.fit(X=X_train, y=y_train) # fitting our model

print(clf.coef_, clf.coef_.shape, clf.intercept_)

这导致

(array([[-0.42336692,  0.18547565, -0.14859036,  0.34144407, -0.2081867 ,
          0.56016579, -0.45242483, -0.09408813,  0.2092732 ,  0.18084126,
          0.19705191,  0.00421916, -0.0796037 ,  0.33852802,  0.02266721]]),
 (1, 15),
 array([-0.8531383]))

我的自定义实现

def initialize_weights(dim):
    ''' In this function, we will initialize our weights and bias'''
    #initialize the weights to zeros array of (dim,1) dimensions
    #you use zeros_like function to initialize zero
    #initialize bias to zero
    w = np.zeros_like(dim)
    b = 0

    return w,b

def sigmoid(z):
    ''' In this function, we will return sigmoid of z'''
    # compute sigmoid(z) and return
    return 1/(1+np.exp(-z))

def logloss(y_true,y_pred):
    
    '''In this function, we will compute log loss '''
    loss = 0
    A = list(zip(y_true, y_pred))
    for y, y_score in A:
        loss += (-1/len(A))*(y*np.log10(y_score) + (1-y) * np.log10(1-y_score))
    return loss

def gradient_dw(x,y,w,b,alpha,N):
    '''In this function, we will compute the gardient w.r.to w '''
    z = np.dot(w, x) + b
    dw = x*(y - sigmoid(z)) - ((1/alpha)*(1/N) * w)
    return dw

def gradient_db(x,y,w,b):
    z = np.dot(w, x) + b
    db = y - sigmoid(z)

    return DB

def train(X_train,y_train,X_test,y_test,epochs,alpha,eta0, tol=1e-3):
    ''' In this function, we will implement logistic regression'''
    #Here eta0 is learning rate
    #implement the code as follows
    # initalize the weights (call the initialize_weights(X_train[0]) function)
    w, b = initialize_weights(X_train[0])
    # for every epoch
    train_loss = []
    test_loss = []
    for epoch in range(epochs):
        # for every data point(X_train,y_train)
        for x, y in zip(X_train, y_train):
             #compute gradient w.r.to w (call the gradient_dw() function)
            dw = gradient_dw(x, y, w, b, alpha, len(X_train))
            #compute gradient w.r.to b (call the gradient_db() function)
            db = gradient_db(x, y, w, b)
            #update w, b
            w = w + eta0 * dw
            b = b + eta0 * db
        
        # predict the output of x_train[for all data points in X_train] using w,b
        y_pred = [sigmoid(np.dot(w, x)) for x in X_train]
        #compute the loss between predicted and actual values (call the loss function)
        train_loss.append(logloss(y_train, y_pred))
        # store all the train loss values in a list
        # predict the output of x_test[for all data points in X_test] using w,b
        y_pred_test = [sigmoid(np.dot(w, x)) for x in X_test]
        print(f"EPOCH: {epoch} Train Loss: {logloss(y_train, y_pred)} Test Loss: {logloss(y_test, y_pred_test)}")
        #compute the loss between predicted and actual values (call the loss function)
        test_loss.append(logloss(y_test, y_pred_test))
        
        # you can also compare previous loss and current loss if the loss is not updating then stop the process and return w,b

    return w,b, train_loss, test_loss



alpha=0.0001
eta0=0.0001
N=len(X_train)
epochs=50
w,b, train_loss, test_loss=train(X_train,y_train,X_test,y_test,epochs,alpha,eta0)

Thew，b导致

(array([-0.22281323,  0.10570237, -0.02506523,  0.16630429, -0.07033019,
         0.27985805, -0.27348925, -0.04622113,  0.13212066,  0.05330409,
         0.09926212, -0.00791336, -0.02920803,  0.1828124 ,  0.03442375]),
 -0.8019981458384148)

请帮助。

Answer 1

在您的函数 gradient_dw () 中，alpha（即正则化项）应位于分子中。

def gradient_dw(x,y,w,b,alpha,N):
    '''In this function, we will compute the gardient w.r.to w '''
    z = np.dot(w, x) + b
    dw = x*(y - sigmoid(z)) - ((alpha)*(1/N) * w)
    return dw

作为正则化逻辑回归的成本函数，是

Imgur

并且梯度下降算法通过对成本函数 w.r.t. 的导数变得如下。权重

Imgur

对您的代码的另一个小修正 - 需要将用于计算预测值数组的截距 b 添加到以下行

y_pred = [sigmoid(np.dot(w, x) + b) for x in X_train]
y_pred_test = [sigmoid(np.dot(w, x) + b) for x in X_test]

因此，完整代码的最终形式如下所示，它将与 Scikit-learn 实现的差异以 0.001 的顺序给出所有权重。

import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import linear_model
import matplotlib.pyplot as plt

X, y = make_classification(n_samples=50000, n_features=15, n_informative=10, n_redundant=5,
                           n_classes=2, weights=[0.7], class_sep=0.7, random_state=15)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=15)

clf = linear_model.SGDClassifier(eta0=0.0001, alpha=0.0001, loss='log', random_state=15, penalty='l2', tol=1e-3, verbose=2, learning_rate='constant')

clf.fit(X=X_train, y=y_train) # fitting our model

print(clf.coef_, clf.coef_.shape, clf.intercept_)

def initialize_weights(dim):
    ''' In this function, we will initialize our weights and bias'''
    #initialize the weights to zeros array of (dim,1) dimensions
    #you use zeros_like function to initialize zero
    #initialize bias to zero
    w = np.zeros_like(dim)
    b = 0

    return w,b

def sigmoid(z):
    ''' In this function, we will return sigmoid of z'''
    # compute sigmoid(z) and return
    return 1/(1+np.exp(-z))

def logloss(y_true,y_pred):
    
    '''In this function, we will compute log loss '''
    loss = 0
    A = list(zip(y_true, y_pred))
    for y, y_score in A:
        loss += (-1/len(A))*(y*np.log10(y_score) + (1-y) * np.log10(1-y_score))
    return loss

def gradient_dw(x,y,w,b,alpha,N):
    '''In this function, we will compute the gardient w.r.to w '''
    z = np.dot(w, x) + b
    dw = x*(y - sigmoid(z)) - ((alpha)*(1/N) * w)
    return dw

def gradient_db(x,y,w,b):
    z = np.dot(w, x) + b
    db = y - sigmoid(z)
    return db

def train(X_train,y_train,X_test,y_test,epochs,alpha,eta0, tol=1e-3):
    ''' In this function, we will implement logistic regression'''
    #Here eta0 is learning rate
    #implement the code as follows
    # initalize the weights (call the initialize_weights(X_train[0]) function)
    w, b = initialize_weights(X_train[0])
    # for every epoch
    train_loss = []
    test_loss = []
    for epoch in range(epochs):
        # for every data point(X_train,y_train)
        for x, y in zip(X_train, y_train):
             #compute gradient w.r.to w (call the gradient_dw() function)
            dw = gradient_dw(x, y, w, b, alpha, len(X_train))
            #compute gradient w.r.to b (call the gradient_db() function)
            db = gradient_db(x, y, w, b)
            #update w, b
            w = w + eta0 * dw
            b = b + eta0 * db
        
        # predict the output of x_train[for all data points in X_train] using w,b
        y_pred = [sigmoid(np.dot(w, x)) for x in X_train]
        #compute the loss between predicted and actual values (call the loss function)
        train_loss.append(logloss(y_train, y_pred))
        # store all the train loss values in a list
        # predict the output of x_test[for all data points in X_test] using w,b
        y_pred_test = [sigmoid(np.dot(w, x)) for x in X_test]
        print(f"EPOCH: {epoch} Train Loss: {logloss(y_train, y_pred)} Test Loss: {logloss(y_test, y_pred_test)}")
        #compute the loss between predicted and actual values (call the loss function)
        test_loss.append(logloss(y_test, y_pred_test))
        
        # you can also compare previous loss and current loss if the loss is not updating then stop the process and return w,b

    return w,b, train_loss, test_loss

alpha=0.0001
eta0=0.0001
N=len(X_train)
epochs=50
w,b, train_loss, test_loss=train(X_train,y_train,X_test,y_test,epochs,alpha,eta0)


print("Difference between custom w and Scikit-learn's clf.coef_ ", w - clf.coef_)
print("Difference between custom intercept b and Scikit-learn's clf.intercept_ ", b - clf.intercept_)

输出如下

Difference between custom w and Scikit-learn's clf.coef_  [[-0.00642552  0.00755955  0.00012041 -0.00335043 -0.01309563  0.00978314
   0.00724319  0.00418409  0.0125563  -0.00701162  0.00169655 -0.00480346
  -0.00173041  0.00056208  0.00032075]]
Difference between custom intercept b and Scikit-learn's clf.intercept_  [-0.03911387]

我的自定义逻辑回归实现有什么问题？

1 个答案:

输出如下