我是机器学习的新手,从Machine Learning an algorithmic perspective开始。我正在尝试制作一个逻辑分类器,通过调整书籍网站上给出的代码来识别良性程序中的恶意程序。然而,即使在100000个时期之后,与隐藏层和输出层相关联的权重也不会改变。
我尝试使用完整的数据集以及它的部分版本来运行算法,但仍然没有运气。
这是我的MLP课程
import numpy as np
class mlp:
def __init__(self, inputs, targets, nhidden, beta=1, momentum=0.9, outtype='logistic'):
""" Constructor """
# Set up network size
self.nin = np.shape(inputs)[1]
self.nout = np.shape(targets)[1]
self.ndata = np.shape(inputs)[0]
self.nhidden = nhidden
self.beta = beta
self.momentum = momentum
self.outtype = outtype
# Initialise network
self.weights1 = (np.zeros((self.nin + 1, self.nhidden), dtype=float) - 0.5) * 2 / np.sqrt(self.nin)
self.weights2 = (np.zeros((self.nhidden + 1, self.nout), dtype=float) - 0.5) * 2 / np.sqrt(self.nhidden)
def earlystopping(self, inputs, targets, valid, validtargets, eta, niterations=100):
valid = np.concatenate((valid, -np.ones((np.shape(valid)[0], 1))), axis=1)
old_val_error1 = 100002
old_val_error2 = 100001
new_val_error = 100000
count = 0
while (((old_val_error1 - new_val_error) > 0.001) or ((old_val_error2 - old_val_error1) > 0.001)):
count += 1
print
count
self.mlptrain(inputs, targets, eta, niterations)
old_val_error2 = old_val_error1
old_val_error1 = new_val_error
validout = self.mlpfwd(valid)
new_val_error = 0.5 * np.sum((validtargets - validout) ** 2)
print("Stopped", new_val_error, old_val_error1, old_val_error2)
return new_val_error
def mlptrain(self, inputs, targets, eta, niterations):
""" Train the thing """
# Add the inputs that match the bias node
inputs = np.concatenate((inputs, -np.ones((self.ndata, 1))), axis=1)
change = range(self.ndata)
print(self.weights2)
updatew1 = np.zeros((np.shape(self.weights1)))
updatew2 = np.zeros((np.shape(self.weights2)))
for n in range(niterations):
self.outputs = self.mlpfwd(inputs)
#error = 0.5 * np.sum((self.outputs - targets) ** 2)
if (np.mod(n, 100) == 0):
print ("Iteration: ", n, " Weight2: ", self.weights2)
# Different types of output neurons
if self.outtype == 'linear':
deltao = (self.outputs - targets) / self.ndata
elif self.outtype == 'logistic':
deltao = self.beta * (self.outputs - targets) * self.outputs * (1.0 - self.outputs)
elif self.outtype == 'softmax':
deltao = (self.outputs - targets) * (self.outputs * (-self.outputs) + self.outputs) / self.ndata
else:
print("error")
deltah = self.hidden * self.beta * (1.0 - self.hidden) * (np.dot(deltao, np.transpose(self.weights2)))
updatew1 = eta * (np.dot(np.transpose(inputs), deltah[:, :-1])) + self.momentum * updatew1
updatew2 = eta * (np.dot(np.transpose(self.hidden), deltao)) + self.momentum * updatew2
self.weights1 -= updatew1
self.weights2 -= updatew2
# Randomise order of inputs (not necessary for matrix-based calculation)
# np.random.shuffle(change)
# inputs = inputs[change,:]
# targets = targets[change,:]
print(self.weights2)
def mlpfwd(self, inputs):
""" Run the network forward """
self.hidden = np.dot(inputs, self.weights1);
self.hidden = 1.0 / (1.0 + np.exp(-self.beta * self.hidden))
self.hidden = np.concatenate((self.hidden, -np.ones((np.shape(inputs)[0], 1))), axis=1)
outputs = np.dot(self.hidden, self.weights2);
# Different types of output neurons
if self.outtype == 'linear':
return outputs
elif self.outtype == 'logistic':
return 1.0 / (1.0 + np.exp(-self.beta * outputs))
elif self.outtype == 'softmax':
normalisers = np.sum(np.exp(outputs), axis=1) * np.ones((1, np.shape(outputs)[0]))
return np.transpose(np.transpose(np.exp(outputs)) / normalisers)
else:
print("error")
def confmat(self, inputs, targets):
"""Confusion matrix"""
# Add the inputs that match the bias node
inputs = np.concatenate((inputs, -np.ones((np.shape(inputs)[0], 1))), axis=1)
outputs = self.mlpfwd(inputs)
nclasses = np.shape(targets)[1]
if nclasses == 1:
nclasses = 2
outputs = np.where(outputs > 0.5, 1, 0)
else:
# 1-of-N encoding
outputs = np.argmax(outputs, 1)
targets = np.argmax(targets, 1)
cm = np.zeros((nclasses, nclasses))
for i in range(nclasses):
for j in range(nclasses):
cm[i, j] = np.sum(np.where(outputs == j, 1, 0) * np.where(targets == i, 1, 0))
print(outputs)
print(targets)
print("Confusion matrix is:")
print(cm)
print("Percentage Correct: ", np.trace(cm) / np.sum(cm) * 100)
这是我提供数据的调用代码
import mlp
import numpy as np
apk_train_data = np.array([
[4, 1, 6, 29, 0, 3711, 1423906, 0],
[20, 1, 5, 24, 0, 4082, 501440, 0],
[3, 0, 1, 6, 0, 5961, 2426358, 0],
[0, 0, 2, 27, 0, 6074, 28762, 0],
[12, 1, 3, 17, 0, 4066, 505, 0],
[1, 0, 2, 5, 0, 1284, 38504, 0],
[2, 0, 2, 10, 0, 2421, 5827165, 0],
[5, 0, 17, 97, 0, 25095, 7429, 0],
[1, 1, 3, 22, 6, 4539, 9100705, 0],
[2, 0, 4, 15, 0, 2054, 264563, 0],
[3, 1, 6, 19, 0, 3562, 978171, 0],
[8, 0, 5, 12, 3, 1741, 1351990, 0],
[9, 0, 5, 12, 2, 1660, 2022743, 0],
[9, 0, 5, 12, 2, 1664, 2022743, 0],
[10, 4, 11, 70, 8, 43944, 51488321, 1],
[6, 0, 3, 18, 0, 8511, 19984102, 1],
[11, 2, 6, 44, 0, 61398, 32139, 1],
[0, 0, 0, 0, 0, 1008, 23872, 1],
[7, 1, 1, 16, 3, 46792, 94818, 1],
[3, 2, 1, 13, 2, 8263, 208820, 1],
[0, 0, 0, 2, 0, 2749, 3926, 1],
[10, 0, 1, 9, 0, 5220, 2275848, 1],
[1, 1, 3, 34, 6, 50030, 814322, 1],
[2, 2, 4, 48, 7, 86406, 12895, 1],
[0, 1, 5, 45, 2, 63060, 803121, 1],
[1, 0, 2, 11, 7, 7602, 1557, 1],
[3, 0, 1, 15, 3, 20813, 218352, 1]
])
apk_test_data = np.array([
[0, 0, 1, 9, 0, 4317, 118082, 0],
[8, 0, 5, 12, 3, 1742, 1351990, 0],
[8, 0, 5, 12, 3, 1744, 1351990, 0],
[0, 0, 1, 11, 2, 17630, 326164, 1],
[10, 2, 6, 45, 7, 22668, 30257520, 1],
[1, 0, 1, 8, 0, 9317, 33000349, 1],
[3, 0, 1, 15, 3, 20813, 218352, 1]
])
p = mlp.mlp(apk_train_data[:, 0:7], apk_train_data[:, 7:], 9)
p.mlptrain(apk_train_data[:, 0:7], apk_train_data[:, 7:], 0.25, 100000)
p.confmat(apk_test_data[:, 0:7], apk_test_data[:, 7:])
每个向量有7个维度,最后一个条目是目标
这是包含数据集的全文文件 https://drive.google.com/open?id=1q_aGNgHxTBh_mmVAzVXKBa27NTJKeKV8
请告诉我我做错了什么。如果有一些易于使用的库做同样的请建议相同。
答案 0 :(得分:0)
正如评论中所提到的,随机初始化网络权重应该是网络训练。
# Initialise network
self.weights1 = (np.random.rand(self.nin+1,self.nhidden)-0.5)*2/np.sqrt(self.nin)
self.weights2 = (np.random.rand(self.nhidden+1,self.nout)-0.5)*2/np.sqrt(self.nhidden)
然后,我从您的数据中观察到,这些属性完全没有可比性。这意味着您的网络渐变更新将由单个功能主导。为了解决这个问题,一种方法是标准化您的数据。
from sklearn.preprocessing import StandardScaler
for i in range(apk_train_data.shape[1]-1):
scaler = StandardScaler().fit(apk_train_data[:,i].copy())
apk_train_data[:,i] = scaler.transform(apk_train_data[:,i].copy())
apk_test_data[:,i] = scaler.transform(apk_test_data[:,i].copy())
最后但并非最不重要的是,让eta
为0.25太大了。我将通过使用相反的极端来说明:
p.mlptrain(apk_train_data[:, 0:7], apk_train_data[:, 7:], 0.0001, 100000)
p.confmat(apk_test_data[:, 0:7], apk_test_data[:, 7:])
# >> Percentage Correct: 71.4285714286
p.confmat(apk_train_data[:,0:7], apk_train_data[:,7:])
# >> Percentage Correct: 88.8888888889