这是我为计算SVM而编写的代码。
from matplotlib import style
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
style.use('ggplot')
class SVM:
def __init__ (self, visualization=True):
self.color = { 1:'r', -1:'b'}
if visualization:
self.fig = plt.figure()
self.ax = self.fig.add_subplot(1,1,1)
#train
def fit(self, data):
self.data = data
opt_dict = {}
transforms = [ [1,1],
[-1, 1],
[-1, -1],
[1,-1]]
all_data=[]
for yi in self.data:
for featureset in self.data[yi]:
for feature in featureset:
all_data.append(feature)
self.max_feature_value= max(all_data)
self.min_feature_value= min(all_data)
all_data = None
step_sizes = [self.max_feature_value*0.1,
self.max_feature_value*0.01,
self.max_feature_value*0.001,]
b_range_multiple = 5
b_multiple = 5
latest_optimum = self.max_feature_value*10
for step in step_sizes:
w= np.array([latest_optimum, latest_optimum])
#we can do this because it is convex
optimized = False
while not optimized:
for b in np.arange(-1*(self.max_feature_value*b_range_multiple),
self.max_feature_value*b_range_multiple,
step*b_multiple):
for transformation in transforms:
w_t = w*transformation
#weak link in the SVM fundamentally
# SMO tries to fix this a bit
# yi(xi.w+b)
for i in self.data:
for xi in self.data[i]:
yi=i
if not yi*(np.dot(w_t, xi) + b) >= 1:
found_option = False
if found_option:
opt_dict[np.linalg.norm(w_t)] = [w_t, b]
if w[0]<0:
optimized = True;
print ('Optimized a step')
else:
w=w-step
norms = sorted([n for n in opt_dict])
opt_choice = opt_dict[norms[0]]
self.w = opt_choice[0]
self.w = opt_choice[1]
latest_optimum = opt_choice[0][0]+ step*2
def predict(self, features):
classification = np.sign(np.dot(np.array(features), self.w)+ self.b)
if classification !=0 and self.visualization:
self.ax.scatter(features[0], features[1], s=200, marker='*', c=self.colors[classification])
return classification
def visualize(self):
[[self.ax.scatter(x[0], x[1], s=100, color=self.colors[i]) for x in data_dict[i]] for i in data_dict]
#hyperplane
# v = x.w + b
#psv = 1
#nsv = -1
# dec = 0
def hyperplane(x, w, b, v):
return (-w[0]*x + b -v)/w[1]
datarange = (self.min_feature_value*0.9, self.min_feature_value*1.1)
hyp_x_min = datarange[0]
hyp_x_max = datarange[1]
# (w.x + b) = 1
# positive support vector hyperplanes
psv1 = hyperplane(hyp_x_min, self.w, self.b, 1)
psv2 = hyperplane(hyp_x_max, self.w, self.b, 1)
self.ax.plot([hyp_x_min, hyp_x_max, [psv1, psv2]])
# (w.x + b) = 0
# negative support vector hyperplanes
nsv1 = hyperplane(hyp_x_min, self.w, self.b, -1)
nsv2 = hyperplane(hyp_x_max, self.w, self.b, -1)
self.ax.plot([hyp_x_min, hyp_x_max, [nsv1, nsv2]])
# (w.x + b) = 0
db1 = hyperplane(hyp_x_min, self.w, self.b, 0)
db2 = hyperplane(hyp_x_max, self.w, self.b, 0)
self.ax.plot([hyp_x_min, hyp_x_max, [db1, db2]])
plt.show()
data_dict = { -1: np.array([[1,7], [2,8], [5,8]]), 1: np.array([[5,1], [6, -1], [7,3]])}
svm = SVM()
svm.fit(data=data_dict)
svm.visualize()
行号显示此错误 - 72。
opt_choice = opt_dict[norms[0]] IndexError: list index out of range
相关参数几乎在终点线120处传递。 svm.fit(数据= data_dict)
这段代码我是根据视频教程编写的,但是它在视频中运行正常但却出错了。
答案 0 :(得分:0)
好的,我想我明白了。问题是,只有当opt_dict
为True时,才会在found_option
中添加新元素。但是,你从未将found_option
设置为True,所以我认为opt_dict
总是空的:)
found_option
是来自for
循环的局部变量。