Question

我用Python写了一个非常简单的逻辑回归实现，但是由于某些原因，损失函数并未减少。这是我的实现：

from __future__ import print_function

import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import roc_auc_score

class LogisticRegression:
    def __init__(self, input_size, alpha, std, learning_rate, epsilon, num_epochs):
        self.W = std * np.random.randn(input_size + 1)
        self.alpha = alpha
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.num_epochs = num_epochs
    
    def fit(self, X, y):
        X = np.append(X, np.ones((X.shape[0], 1)), axis=1) # bias term
        N, D = X.shape

        self.loss_hist_ = []
        for epoch in range(self.num_epochs):
            loss, loss_grads = self.loss(X, y)
            self.loss_hist_.append(loss)
            print('Epoch', epoch+1, 'loss', loss)
            if loss < self.epsilon:
                break
            self.W -= self.learning_rate * loss_grads

        return self

    def predict(self, X):
        X = np.append(X, np.ones((X.shape[0], 1)), axis=1) # bias term
        probs = self.sigmoid_Wx(X)
        y_pred = (probs >= 0.5)
        return probs, y_pred

    def sigmoid_Wx(self, X):
        scores = np.matmul(X, self.W) # (N, D) x (D,) = (N,), one score per datapoint
        return 1 / (1 + np.exp(-scores))

    def loss(self, X, y):
        N, D = X.shape

        probs = self.sigmoid_Wx(X)
        data_loss = (-1 / N) * sum(y * np.log(probs) + (1 - y) * np.log(1. - probs))
        reg_loss = self.alpha * self.W.dot(self.W)
        loss = data_loss + reg_loss

        loss_grads = (-1 / N) * np.matmul(X.T, probs - y) # (D, N) x (N,) = (D,)
        loss_grads += 2*self.alpha*self.W

        return loss, loss_grads

DATA_DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), '..', '..', 'Data'))
FIGURES_DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), '..', 'Figures'))

def main():
    X_train, y_train, X_val, y_val = map(lambda fn: pd.read_csv(os.path.join(DATA_DIR, fn)), ['X_train.csv', 'Y_train.csv', 'X_val.csv', 'Y_val.csv'])

    X_train = X_train['Review Text'] # ignore all feats besides review text
    X_val = X_val['Review Text']
    y_train = (y_train['Sentiment'] == 'Positive').values.astype(int) # binarize labels
    y_val = (y_val['Sentiment'] == 'Positive').values.astype(int)

    print('Transforming into bow representation')
    vocab = sorted(CountVectorizer().fit(X_train).vocabulary_.keys())
    # todo: don't use dense arrays here
    X_train = CountVectorizer(vocabulary=vocab).fit_transform(X_train).toarray()
    X_val = CountVectorizer(vocabulary=vocab).fit_transform(X_val).toarray()

    lr = LogisticRegression(input_size=len(vocab), alpha=0., std=1e-4, learning_rate=1e-5, epsilon=1e-8, num_epochs=10)
    print('Fitting training set')
    lr.fit(X_train, y_train)
    print('Making predictions for validation set')
    y_val_probs, y_val_pred = lr.predict(X_val)

    print(roc_auc_score(y_val, y_val_probs))

if __name__ == '__main__':
    main()

这是我得到的输出：

$ py -3 LR_Template.py
Transforming into bow representation
Fitting training set
Epoch 1 loss 0.6935215358862573
Epoch 2 loss 0.6935650560886688
Epoch 3 loss 0.6936085850502569
Epoch 4 loss 0.69365212277282
Epoch 5 loss 0.6936956692581194
Epoch 6 loss 0.6937392245079684
Epoch 7 loss 0.6937827885241183
Epoch 8 loss 0.6938263613083845
Epoch 9 loss 0.6938699428625258
Epoch 10 loss 0.6939135331883654
Making predictions for validation set
0.5702245722219217

我不确定我在哪里错了？据我所知，所有的数学运算都是正确的。

逻辑回归实现的损失函数没有减少

0 个答案: