我正在从头开始编写SVM(支持向量机)模型,而无需使用任何现成的软件包/库,只是出于我自己对SVM模型如何从数学层面理解的了解。 。该模型旨在解决分类问题。
我面临的问题是超平面未正确近似。我的代码背后的概念是,如果使用Hinge成本函数预测不正确,则同时使用Hinge成本函数导数和Regularizer导数来更新权重。否则,仅使用正则化器导数来更新权重。反过来,这应该正确地逼近超平面,以便在图形上绘制超平面时,它应该在距两个类别的支持向量相等的距离裕度的情况下准确运行。但是,在我的情况下,绘制超平面是完全遥不可及的。 如果有人能指出正确的方向,我将不胜感激。
非常感谢
#To help us perform math operations
import numpy as np
#to plot our data and model visually
from matplotlib import pyplot as plt
#Step 1 - Define our data
#Input data - Of the form [X value, Y value, Bias term]
X = np.array([
[-2,4,-1],
[4,1,-1],
[1, 6, -1],
[2, 4, -1],
[6, 2, -1],
])
#Associated output labels - First 2 examples are labeled '-1' and last 3 are labeled '+1'
y = np.array([-1,-1,1,1,1])
#lets plot these examples on a 2D graph!
#for each example
for d, sample in enumerate(X):
# Plot the negative samples (the first 2)
if d < 2:
plt.scatter(sample[0], sample[1], s=120, marker='_', linewidths=2)
# Plot the positive samples (the last 3)
else:
plt.scatter(sample[0], sample[1], s=120, marker='+', linewidths=2)
# Print a possible hyperplane, that is seperating the two classes.
#we'll plot two points and draw the line between them (naive guess)
plt.plot([-2,6],[6,0.5])
#lets perform stochastic gradient descent to learn the seperating hyperplane between both classes
def svm_sgd_plot(X, Y):
observation_vector = X[0]
#Initialize our SVMs weight vector with zeros (3 values)
w = np.random.rand((len(observation_vector)))
#The learning rate
eta = 1
#how many iterations to train for
epochs = 1
#store misclassifications so we can plot how they change over time
errors = []
#training part, gradient descent part
for epoch in range(1,epochs):
error = 0
for i, x in enumerate(X):
#misclassification - calculating error (prediction vs target) using Hinge cost function
if (Y[i]*np.dot(X[i], w)) < 1:
#misclassified update for ours weights
#update the weight with respect to both derivative of Hinge cost function and Derivative of Regularizer with respect to the given weight
w = w + eta * ( (X[i] * Y[i]) + (-2 *(1/epoch)* w) )
error = 1
else:
#else if correct classification, update the weight with respect to only the Derivative of Regularizer with respect to the given weight
w = w + eta * (-2 *(1/epoch)* w)
errors.append(error)
#lets plot the rate of classification errors during training for our SVM
plt.plot(errors, '*')
plt.ylim(0.5,1.5)
plt.axes().set_yticklabels([])
plt.xlabel('Epoch')
plt.ylabel('Misclassified')
plt.show()
return w
w = svm_sgd_plot(X,y)
#they decrease over time! Our SVM is learning the optimal hyperplane
for d, sample in enumerate(X):
# Plot the negative samples
if d < 2:
plt.scatter(sample[0], sample[1], s=120, marker='_', linewidths=2)
# Plot the positive samples
else:
plt.scatter(sample[0], sample[1], s=120, marker='+', linewidths=2)
# Add our test samples
plt.scatter(2,2, s=120, marker='_', linewidths=2, color='red')
plt.scatter(4,3, s=120, marker='+', linewidths=2, color='Green')
# Print the hyperplane calculated by svm_sgd()
x2=[w[0],w[1],-w[1],w[0]]
x3=[w[0],w[1],w[1],-w[0]]
x2x3 =np.array([x2,x3])
x_cor,y_cor,U,V = zip(*x2x3)
ax = plt.gca()
ax.quiver(x_cor,y_cor,U,V,scale=1, color='blue')