我有3节课,
A类= 190个样本
B类= 233个样本
C类= 133个样本
我正在使用sklearn多类SVM(svm.svc)来解决这个分类问题但我的测试集将所有内容归类为B类我缺少什么? 我尝试改变伽马和C(惩罚参数C),但这没有帮助
import sklearn.metrics as sm
import numpy as np
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn import svm
data = '/Input/AllData.csv'
dataframe = read_csv(data, names=None)
dataframe = dataframe.sample(frac=1).reset_index(drop=True)
dataset = dataframe.values
X = dataset[:,0:-1]
Y = dataset[:,-1]
Count1=0
Count2=0
Count3=0
for i in range(X.shape[0]):
if(Y[i]==2):
Count2=Count2+1
elif(Y[i]==1):
Count1=Count1+1
else:
Count3=Count3+1
print np.isnan(X[:]).any(), np.isnan(Y).any()
print Count1,Count2 ,Count3
X_train, X_test, Y_train, Y_test = train_test_split(
X, Y, test_size=0.15, random_state=1)
model =svm.SVC(C=0.01, cache_size=700, class_weight='balanced', coef0=0.0,
decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
ACC = sm.accuracy_score(Y_test, Y_pred)
print ACC
target_names = ['A', 'B', 'C']
print(sm.classification_report(Y_test, Y_pred, target_names=target_names))
您可以找到数据here
答案 0 :(得分:1)
我使用了这部分代码而 Y_pred
并未将所有内容归类为B类。
您可以查看您的sklearn版本吗?我使用的是0.18。
<强>代码强>
import sklearn.metrics as sm
import numpy as np
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn import svm
data = 'C:/Users/seral_000/AllData.csv'
dataframe = read_csv(data, names=None)
dataframe = dataframe.sample(frac=1).reset_index(drop=True)
dataset = dataframe.values
X = dataset[:,0:-1]
Y = dataset[:,-1]
X_train, X_test, Y_train, Y_test = train_test_split(
X, Y, test_size=0.15, random_state=0)
model = svm.SVC( C = 0.01,
gamma = 'auto',
kernel = 'linear',
degree = 3,
class_weight = 'balanced',
coef0 = 0.0,
decision_function_shape = None,
probability = False,
max_iter = -1,
tol = 0.001,
cache_size = 700,
random_state = None,
shrinking = True,
verbose = False
)
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
print(Y_pred)
ACC = sm.accuracy_score(Y_test, Y_pred)
print ACC
target_names = ['A', 'B', 'C']
print(sm.classification_report(Y_test, Y_pred, target_names=target_names))
控制台中的结果:
[ 1. 3. 1. 2. 3. 3. 2. 1. 1. 1. 1. 1. 2. 2. 3. 2. 3. 3.
2. 1. 2. 1. 1. 3. 2. 2. 2. 2. 1. 2. 2. 2. 2. 3. 3. 1.
3. 2. 2. 1. 2. 3. 1. 1. 2. 2. 3. 2. 1. 3. 1. 2. 1. 1.
1. 1. 1. 2. 1. 2. 1. 3. 3. 2. 3. 1. 1. 2. 2. 2. 2. 2.
2. 1. 3. 1. 1. 3. 3. 3. 1. 2. 2. 1.]
0.52380952381
precision recall f1-score N-supports
A 0.71 0.56 0.63 39
B 0.39 0.45 0.42 29
C 0.45 0.56 0.50 16
avg / total 0.55 0.52 0.53 84