多层感知器权重不变

时间:2017-11-30 13:37:49

标签: python machine-learning neural-network

我是机器学习的新手,从Machine Learning an algorithmic perspective开始。我正在尝试制作一个逻辑分类器,通过调整书籍网站上给出的代码来识别良性程序中的恶意程序。然而,即使在100000个时期之后,与隐藏层和输出层相关联的权重也不会改变。

我尝试使用完整的数据集以及它的部分版本来运行算法,但仍然没有运气。

这是我的MLP课程

import numpy as np
class mlp:

def __init__(self, inputs, targets, nhidden, beta=1, momentum=0.9, outtype='logistic'):
    """ Constructor """
    # Set up network size
    self.nin = np.shape(inputs)[1]
    self.nout = np.shape(targets)[1]
    self.ndata = np.shape(inputs)[0]
    self.nhidden = nhidden

    self.beta = beta
    self.momentum = momentum
    self.outtype = outtype

    # Initialise network
    self.weights1 = (np.zeros((self.nin + 1, self.nhidden), dtype=float) - 0.5) * 2 / np.sqrt(self.nin)
    self.weights2 = (np.zeros((self.nhidden + 1, self.nout), dtype=float) - 0.5) * 2 / np.sqrt(self.nhidden)

def earlystopping(self, inputs, targets, valid, validtargets, eta, niterations=100):

    valid = np.concatenate((valid, -np.ones((np.shape(valid)[0], 1))), axis=1)

    old_val_error1 = 100002
    old_val_error2 = 100001
    new_val_error = 100000

    count = 0
    while (((old_val_error1 - new_val_error) > 0.001) or ((old_val_error2 - old_val_error1) > 0.001)):
        count += 1
        print
        count
        self.mlptrain(inputs, targets, eta, niterations)
        old_val_error2 = old_val_error1
        old_val_error1 = new_val_error
        validout = self.mlpfwd(valid)
        new_val_error = 0.5 * np.sum((validtargets - validout) ** 2)

    print("Stopped", new_val_error, old_val_error1, old_val_error2)
    return new_val_error

def mlptrain(self, inputs, targets, eta, niterations):
    """ Train the thing """
    # Add the inputs that match the bias node
    inputs = np.concatenate((inputs, -np.ones((self.ndata, 1))), axis=1)
    change = range(self.ndata)



    print(self.weights2)
    updatew1 = np.zeros((np.shape(self.weights1)))
    updatew2 = np.zeros((np.shape(self.weights2)))

    for n in range(niterations):

        self.outputs = self.mlpfwd(inputs)

        #error = 0.5 * np.sum((self.outputs - targets) ** 2)
        if (np.mod(n, 100) == 0):
             print ("Iteration: ", n, " Weight2: ", self.weights2)


            # Different types of output neurons
        if self.outtype == 'linear':
            deltao = (self.outputs - targets) / self.ndata
        elif self.outtype == 'logistic':
            deltao = self.beta * (self.outputs - targets) * self.outputs * (1.0 - self.outputs)
        elif self.outtype == 'softmax':
            deltao = (self.outputs - targets) * (self.outputs * (-self.outputs) + self.outputs) / self.ndata
        else:
            print("error")

        deltah = self.hidden * self.beta * (1.0 - self.hidden) * (np.dot(deltao, np.transpose(self.weights2)))

        updatew1 = eta * (np.dot(np.transpose(inputs), deltah[:, :-1])) + self.momentum * updatew1
        updatew2 = eta * (np.dot(np.transpose(self.hidden), deltao)) + self.momentum * updatew2
        self.weights1 -= updatew1
        self.weights2 -= updatew2

        # Randomise order of inputs (not necessary for matrix-based calculation)
        # np.random.shuffle(change)
        # inputs = inputs[change,:]
        # targets = targets[change,:]


    print(self.weights2)

def mlpfwd(self, inputs):
    """ Run the network forward """

    self.hidden = np.dot(inputs, self.weights1);
    self.hidden = 1.0 / (1.0 + np.exp(-self.beta * self.hidden))
    self.hidden = np.concatenate((self.hidden, -np.ones((np.shape(inputs)[0], 1))), axis=1)

    outputs = np.dot(self.hidden, self.weights2);

    # Different types of output neurons
    if self.outtype == 'linear':
        return outputs
    elif self.outtype == 'logistic':
        return 1.0 / (1.0 + np.exp(-self.beta * outputs))
    elif self.outtype == 'softmax':
        normalisers = np.sum(np.exp(outputs), axis=1) * np.ones((1, np.shape(outputs)[0]))
        return np.transpose(np.transpose(np.exp(outputs)) / normalisers)
    else:
        print("error")

def confmat(self, inputs, targets):
    """Confusion matrix"""

    # Add the inputs that match the bias node
    inputs = np.concatenate((inputs, -np.ones((np.shape(inputs)[0], 1))), axis=1)
    outputs = self.mlpfwd(inputs)


    nclasses = np.shape(targets)[1]

    if nclasses == 1:
        nclasses = 2
        outputs = np.where(outputs > 0.5, 1, 0)
    else:
        # 1-of-N encoding
        outputs = np.argmax(outputs, 1)
        targets = np.argmax(targets, 1)

    cm = np.zeros((nclasses, nclasses))
    for i in range(nclasses):
        for j in range(nclasses):
            cm[i, j] = np.sum(np.where(outputs == j, 1, 0) * np.where(targets == i, 1, 0))

    print(outputs)
    print(targets)

    print("Confusion matrix is:")
    print(cm)
    print("Percentage Correct: ", np.trace(cm) / np.sum(cm) * 100)

这是我提供数据的调用代码

import mlp
import numpy as np

apk_train_data = np.array([
    [4, 1, 6, 29, 0, 3711, 1423906, 0],
    [20, 1, 5, 24, 0, 4082, 501440, 0],
    [3, 0, 1, 6, 0, 5961, 2426358, 0],
    [0, 0, 2, 27, 0, 6074, 28762, 0],
    [12, 1, 3, 17, 0, 4066, 505, 0],
    [1, 0, 2, 5, 0, 1284, 38504, 0],
    [2, 0, 2, 10, 0, 2421, 5827165, 0],
    [5, 0, 17, 97, 0, 25095, 7429, 0],
    [1, 1, 3, 22, 6, 4539, 9100705, 0],
    [2, 0, 4, 15, 0, 2054, 264563, 0],
    [3, 1, 6, 19, 0, 3562, 978171, 0],
    [8, 0, 5, 12, 3, 1741, 1351990, 0],
    [9, 0, 5, 12, 2, 1660, 2022743, 0],
    [9, 0, 5, 12, 2, 1664, 2022743, 0],
    [10, 4, 11, 70, 8, 43944, 51488321, 1],
    [6, 0, 3, 18, 0, 8511, 19984102, 1],
    [11, 2, 6, 44, 0, 61398, 32139, 1],
    [0, 0, 0, 0, 0, 1008, 23872, 1],
    [7, 1, 1, 16, 3, 46792, 94818, 1],
    [3, 2, 1, 13, 2, 8263, 208820, 1],
    [0, 0, 0, 2, 0, 2749, 3926, 1],
    [10, 0, 1, 9, 0, 5220, 2275848, 1],
    [1, 1, 3, 34, 6, 50030, 814322, 1],
    [2, 2, 4, 48, 7, 86406, 12895, 1],
    [0, 1, 5, 45, 2, 63060, 803121, 1],
    [1, 0, 2, 11, 7, 7602, 1557, 1],
    [3, 0, 1, 15, 3, 20813, 218352, 1]
])
apk_test_data = np.array([
    [0, 0, 1, 9, 0, 4317, 118082, 0],
    [8, 0, 5, 12, 3, 1742, 1351990, 0],
    [8, 0, 5, 12, 3, 1744, 1351990, 0],
    [0, 0, 1, 11, 2, 17630, 326164, 1],
    [10, 2, 6, 45, 7, 22668, 30257520, 1],
    [1, 0, 1, 8, 0, 9317, 33000349, 1],
    [3, 0, 1, 15, 3, 20813, 218352, 1]
])

p = mlp.mlp(apk_train_data[:, 0:7], apk_train_data[:, 7:], 9)
p.mlptrain(apk_train_data[:, 0:7], apk_train_data[:, 7:], 0.25, 100000)
p.confmat(apk_test_data[:, 0:7], apk_test_data[:, 7:])

每个向量有7个维度,最后一个条目是目标

这是包含数据集的全文文件 https://drive.google.com/open?id=1q_aGNgHxTBh_mmVAzVXKBa27NTJKeKV8

请告诉我我做错了什么。如果有一些易于使用的库做同样的请建议相同。

1 个答案:

答案 0 :(得分:0)

正如评论中所提到的,随机初始化网络权重应该是网络训练。

    # Initialise network
    self.weights1 = (np.random.rand(self.nin+1,self.nhidden)-0.5)*2/np.sqrt(self.nin)
    self.weights2 = (np.random.rand(self.nhidden+1,self.nout)-0.5)*2/np.sqrt(self.nhidden)

然后,我从您的数据中观察到,这些属性完全没有可比性。这意味着您的网络渐变更新将由单个功能主导。为了解决这个问题,一种方法是标准化您的数据。

from sklearn.preprocessing import StandardScaler
for i in range(apk_train_data.shape[1]-1):
    scaler = StandardScaler().fit(apk_train_data[:,i].copy())
    apk_train_data[:,i] = scaler.transform(apk_train_data[:,i].copy())
    apk_test_data[:,i] = scaler.transform(apk_test_data[:,i].copy())

最后但并非最不重要的是,让eta为0.25太大了。我将通过使用相反的极端来说明:

p.mlptrain(apk_train_data[:, 0:7], apk_train_data[:, 7:], 0.0001, 100000)
p.confmat(apk_test_data[:, 0:7], apk_test_data[:, 7:])
# >> Percentage Correct:  71.4285714286
p.confmat(apk_train_data[:,0:7], apk_train_data[:,7:])
# >> Percentage Correct: 88.8888888889