sklearn SVM,Python2与Python3

时间:2017-06-19 18:15:15

标签: python python-2.7 python-3.x scikit-learn svm

我有以下代码,我在数据集上进行了4次交叉验证,特征向量大小 11156 129 数据点

但问题是相同的代码在使用 Python2 编译器运行时给出了不同结果与使用<运行它时< strong> Python3 编译器。

Python2 的情况下,它提供 90s 的准确度值,而在 Python3 的情况下,它提供 70s的准确度值 80s

from __future__ import division
import scipy.io as sio
import numpy as np
from sklearn import svm
import random
from sklearn.metrics import confusion_matrix as cm
from sklearn.metrics import accuracy_score

# Loading Data
data = sio.loadmat('data.mat')

feat_highcurve_u = np.array(data['HiCurve'])[0]
feat_lowcurve_u = np.array(data['LoCurve'])[0]

feat_highcurve = np.array([np.array(x[0]
                [int(len(x[0])/2) - 2789:
                 int(len(x[0])/2) + 2789]) 
                for x in feat_highcurve_u])
feat_lowcurve = np.array([np.array(x[0]
                [int(len(x[0])/2) - 2789:
                 int(len(x[0])/2) + 2789])
                for x in feat_lowcurve_u])

X_data = [np.concatenate((a,b), axis = 0) 
          for a,b in zip(feat_highcurve, 
                         feat_lowcurve)]

X = np.array(X_data)
X = np.transpose(X,(1,0))
avg_X = np.array([sum(x)/len(x) 
                  for x in X])

X_data = [x-avg_X for x in X_data]

y_labels = data['ClassLabels']
y_labels = np.array([(x[0]-1) 
                     for x in y_labels])


def calculate_ber(c_mat):
    val = 0
    for index, row in enumerate(c_mat):
        val += (np.sum(row) - row[index])/ np.sum(row)

    return val / len(c_mat)


def apply_svm(nu=0.1, kernel='rbf', degree=3):
    clf = svm.NuSVC(random_state=0, nu=nu, kernel=kernel, degree=degree)

    avg_accuracy = 0
    avg_ber = 0

    for n in range(10):
        # Randomizing the data
        combined = list(zip(X_data, y_labels))
        random.shuffle(combined)
        X_data[:], y_labels[:] = zip(*combined)

        # Splitting into 4 folds
        X_folds = [X_data[i:i+int(len(X_data)/4)] for i in range(0, len(X_data), int(len(X_data)/4))]
        y_folds = [y_labels[i:i+int(len(y_labels)/4)] for i in range(0, len(y_labels), int(len(y_labels)/4))]

        if(len(X_folds) == 5):
            X_folds[3] = np.concatenate((X_folds[3], X_folds[4]), axis = 0)
            X_folds.pop()

            y_folds[3] = np.concatenate((y_folds[3], y_folds[4]), axis = 0)
            y_folds.pop()

        accuracy = 0
        ber = 0

        # Iterating over folds
        for i in range(4):
            # Selecting test fold
            X_test = X_folds[i]
            y_test = y_folds[i]

            # Concatenating the rest of the folds
            o = [i for i in range(4)]
            o.remove(i)

            X_train = np.concatenate((X_folds[o[0]], X_folds[o[1]], X_folds[o[2]]), axis = 0)
            y_train = np.concatenate((y_folds[o[0]], y_folds[o[1]], y_folds[o[2]]), axis = 0)

            # Training SVM to fit the data
            clf.fit(X_train, y_train)

            # Testing the SVM
            preds = clf.predict(X_test)
            accuracy += (len([i for i in range(len(preds)) if preds[i] == y_test[i]])/len(preds))
            c_mat = cm(y_test, preds)
            ber += calculate_ber(c_mat)

        #print("Four fold cross-validation accuracy: Step("+str(n+1)+"): ",accuracy/4.0)
        avg_accuracy += (accuracy/4)
        avg_ber += (ber/4)

    print("After ten steps Average Accuracy: ", avg_accuracy/10) 
    print("After ten steps Average BER: ", avg_ber/10) 
    return ((avg_accuracy/10), (avg_ber/10))

nu_accuracies = {}
nu_values = [0.05, 0.1, 0.15, 0.20, 0.25, 0.30]

for nu_val in nu_values:
    nu_accuracies[nu_val] = apply_svm(nu=nu_val)

print("Final Metrics: ", nu_accuracies)

1 个答案:

答案 0 :(得分:0)

最新评论,但对于正在寻找两者差异的任何其他人-sklearn更改了logistic regression的默认求解器,这在某些情况下可能会有所不同。一些SVM实现对默认参数进行了其他更改。