我正在尝试在scikit-learn中使用“class_weight”参数来获取二进制svm.SVC分类器。我基本上试图通过改变类权重来改变1级的精度。 不幸的是,经过数周的尝试,我无法实现这个目标,这让我想到,sklearn仍然可能存在不一致...... 这是我的代码迷你示例:
import os
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn import svm
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
scaler = preprocessing.StandardScaler()
data = pd.read_csv("...", header=0, delimiter=";", quoting=3, low_memory=False)
def Train_Test_Split(test_size, dataframe, name_y, name_X):
X = dataframe.ix[:,name_X :]
y = dataframe[name_y]
y= np.asarray(y,dtype=int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, stratify=y)
y_train = np.asarray(y_train,dtype=int)
y_test = np.asarray(y_test,dtype=int)
return(X_train, y_train, X_test, y_test)
def Score(y_test, y_pred):
a = confusion_matrix(y_test,y_pred, labels=[1,0])
Precision_stables = a[0][0]/(a[0][0]+a[1][0])
Precision_instables = a[1][1]/(a[1][1]+a[0][1])
return(Precision_stables, Precision_instables)
def Eval_svm(class_ponder,testsize, dataframe, name_y, name_X):
X_train, y_train, X_test, y_test = Train_Test_Split(testsize, dataframe, name_y, name_X)
clf_svm = svm.SVC(kernel='linear',class_weight=class_ponder,probability=True)
clf_svm_optimal = clf_svm.fit(X_train, y_train)
y_pred_svm = clf_svm_optimal.predict(X_test)
PRS_svm, PRI_svm = Score(y_test, y_pred_svm)
return(PRS_svm, PRI_svm)
name_y = "...variableofinterest..."
name_x = "...explanatoryvariables..."
a,b=Eval_svm({0: 100, 1: 1},0.3, data, name_y, name_x)
print(a,b)
我可以选择任何我想要的加权,1级甚至0级的精度根本不会改变。
有人可以帮助我吗?这有点令人生气...
非常感谢你!
祝你好运, ˚F