我有以下代码,我在数据集上进行了4次交叉验证,特征向量大小 11156 和 129 数据点。
但问题是相同的代码在使用 Python2 编译器运行时给出了不同结果与使用<运行它时< strong> Python3 编译器。
在 Python2 的情况下,它提供 90s 的准确度值,而在 Python3 的情况下,它提供 70s的准确度值和 80s
from __future__ import division
import scipy.io as sio
import numpy as np
from sklearn import svm
import random
from sklearn.metrics import confusion_matrix as cm
from sklearn.metrics import accuracy_score
# Loading Data
data = sio.loadmat('data.mat')
feat_highcurve_u = np.array(data['HiCurve'])[0]
feat_lowcurve_u = np.array(data['LoCurve'])[0]
feat_highcurve = np.array([np.array(x[0]
[int(len(x[0])/2) - 2789:
int(len(x[0])/2) + 2789])
for x in feat_highcurve_u])
feat_lowcurve = np.array([np.array(x[0]
[int(len(x[0])/2) - 2789:
int(len(x[0])/2) + 2789])
for x in feat_lowcurve_u])
X_data = [np.concatenate((a,b), axis = 0)
for a,b in zip(feat_highcurve,
feat_lowcurve)]
X = np.array(X_data)
X = np.transpose(X,(1,0))
avg_X = np.array([sum(x)/len(x)
for x in X])
X_data = [x-avg_X for x in X_data]
y_labels = data['ClassLabels']
y_labels = np.array([(x[0]-1)
for x in y_labels])
def calculate_ber(c_mat):
val = 0
for index, row in enumerate(c_mat):
val += (np.sum(row) - row[index])/ np.sum(row)
return val / len(c_mat)
def apply_svm(nu=0.1, kernel='rbf', degree=3):
clf = svm.NuSVC(random_state=0, nu=nu, kernel=kernel, degree=degree)
avg_accuracy = 0
avg_ber = 0
for n in range(10):
# Randomizing the data
combined = list(zip(X_data, y_labels))
random.shuffle(combined)
X_data[:], y_labels[:] = zip(*combined)
# Splitting into 4 folds
X_folds = [X_data[i:i+int(len(X_data)/4)] for i in range(0, len(X_data), int(len(X_data)/4))]
y_folds = [y_labels[i:i+int(len(y_labels)/4)] for i in range(0, len(y_labels), int(len(y_labels)/4))]
if(len(X_folds) == 5):
X_folds[3] = np.concatenate((X_folds[3], X_folds[4]), axis = 0)
X_folds.pop()
y_folds[3] = np.concatenate((y_folds[3], y_folds[4]), axis = 0)
y_folds.pop()
accuracy = 0
ber = 0
# Iterating over folds
for i in range(4):
# Selecting test fold
X_test = X_folds[i]
y_test = y_folds[i]
# Concatenating the rest of the folds
o = [i for i in range(4)]
o.remove(i)
X_train = np.concatenate((X_folds[o[0]], X_folds[o[1]], X_folds[o[2]]), axis = 0)
y_train = np.concatenate((y_folds[o[0]], y_folds[o[1]], y_folds[o[2]]), axis = 0)
# Training SVM to fit the data
clf.fit(X_train, y_train)
# Testing the SVM
preds = clf.predict(X_test)
accuracy += (len([i for i in range(len(preds)) if preds[i] == y_test[i]])/len(preds))
c_mat = cm(y_test, preds)
ber += calculate_ber(c_mat)
#print("Four fold cross-validation accuracy: Step("+str(n+1)+"): ",accuracy/4.0)
avg_accuracy += (accuracy/4)
avg_ber += (ber/4)
print("After ten steps Average Accuracy: ", avg_accuracy/10)
print("After ten steps Average BER: ", avg_ber/10)
return ((avg_accuracy/10), (avg_ber/10))
nu_accuracies = {}
nu_values = [0.05, 0.1, 0.15, 0.20, 0.25, 0.30]
for nu_val in nu_values:
nu_accuracies[nu_val] = apply_svm(nu=nu_val)
print("Final Metrics: ", nu_accuracies)
答案 0 :(得分:0)
最新评论,但对于正在寻找两者差异的任何其他人-sklearn更改了logistic regression的默认求解器,这在某些情况下可能会有所不同。一些SVM实现对默认参数进行了其他更改。