Question

这是我为计算SVM而编写的代码。

from matplotlib import style
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
style.use('ggplot')

class SVM:
    def __init__ (self, visualization=True):
        self.color = { 1:'r', -1:'b'}
        if visualization:
            self.fig = plt.figure()
            self.ax = self.fig.add_subplot(1,1,1)

    #train

    def fit(self, data):
        self.data = data
        opt_dict = {}
        transforms = [ [1,1],
                     [-1, 1],
                     [-1, -1],
                     [1,-1]]

        all_data=[]
        for yi in self.data:
            for featureset in self.data[yi]:
                for feature in featureset:
                    all_data.append(feature)

        self.max_feature_value= max(all_data)
        self.min_feature_value= min(all_data)
        all_data = None

        step_sizes = [self.max_feature_value*0.1,
                    self.max_feature_value*0.01,
                    self.max_feature_value*0.001,]

        b_range_multiple = 5
        b_multiple = 5
        latest_optimum = self.max_feature_value*10

        for step in step_sizes:
            w= np.array([latest_optimum, latest_optimum])
            #we can do this because it is convex
            optimized = False
            while not optimized:
                for b in np.arange(-1*(self.max_feature_value*b_range_multiple),
                                    self.max_feature_value*b_range_multiple,
                                    step*b_multiple):
                    for transformation in transforms:
                        w_t = w*transformation
                        #weak link in the SVM fundamentally
                        # SMO tries to fix this a bit
                        # yi(xi.w+b)
                        for i in self.data:
                            for xi in self.data[i]:
                                yi=i
                                if not yi*(np.dot(w_t, xi) + b) >= 1:
                                    found_option = False

                        if found_option:
                            opt_dict[np.linalg.norm(w_t)] = [w_t, b]

                if w[0]<0:
                    optimized = True;
                    print ('Optimized a step')
                else:
                    w=w-step

                norms = sorted([n for n in opt_dict])

                opt_choice = opt_dict[norms[0]]
                self.w = opt_choice[0]
                self.w = opt_choice[1]
                latest_optimum = opt_choice[0][0]+ step*2

            def predict(self, features):
                classification = np.sign(np.dot(np.array(features), self.w)+ self.b)
                if classification !=0 and self.visualization:
                    self.ax.scatter(features[0], features[1], s=200, marker='*', c=self.colors[classification])
                return classification

        def visualize(self):
            [[self.ax.scatter(x[0], x[1], s=100, color=self.colors[i]) for x in data_dict[i]] for i in data_dict]

            #hyperplane
            # v = x.w + b
            #psv = 1
            #nsv = -1
            # dec = 0
            def hyperplane(x, w, b, v):
                return (-w[0]*x + b -v)/w[1]
            datarange = (self.min_feature_value*0.9, self.min_feature_value*1.1)
            hyp_x_min = datarange[0]
            hyp_x_max = datarange[1]


            # (w.x + b) = 1
            # positive support vector hyperplanes
            psv1 = hyperplane(hyp_x_min, self.w, self.b, 1)
            psv2 = hyperplane(hyp_x_max, self.w, self.b, 1)
            self.ax.plot([hyp_x_min, hyp_x_max, [psv1, psv2]])

            # (w.x + b) = 0
            # negative support vector hyperplanes
            nsv1 = hyperplane(hyp_x_min, self.w, self.b, -1)
            nsv2 = hyperplane(hyp_x_max, self.w, self.b, -1)
            self.ax.plot([hyp_x_min, hyp_x_max, [nsv1, nsv2]])

            # (w.x + b) = 0
            db1 = hyperplane(hyp_x_min, self.w, self.b, 0)
            db2 = hyperplane(hyp_x_max, self.w, self.b, 0)
            self.ax.plot([hyp_x_min, hyp_x_max, [db1, db2]])

            plt.show()

data_dict = { -1: np.array([[1,7], [2,8], [5,8]]), 1: np.array([[5,1], [6, -1], [7,3]])}

svm = SVM()
svm.fit(data=data_dict)
svm.visualize()

行号显示此错误 - 72。

opt_choice = opt_dict[norms[0]]
IndexError: list index out of range
相关参数几乎在终点线120处传递。 svm.fit（数据= data_dict）

这段代码我是根据视频教程编写的，但是它在视频中运行正常但却出错了。

Answer 1

好的，我想我明白了。问题是，只有当opt_dict为True时，才会在found_option中添加新元素。但是，你从未将found_option设置为True，所以我认为opt_dict总是空的:)

found_option是来自for循环的局部变量。

IndexError：列表索引超出Python中字典的范围，用于计算SVM

1 个答案: