设计了一个简单的SVM预测算法:
我的代码出现错误,如下所示:
分数计算正确,但是在尝试传递要预测的值时,model.predict()
函数会出错。我不知道这个问题。试图进行整理,但未找到任何相关信息。
import pandas as pd
import pylab as pl
import numpy as np
import scipy.optimize as opt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
%matplotlib inline
import matplotlib.pyplot as plt
data = pd.read_csv(r'C:\Users\Imad\Desktop\New folder\cars.csv')
from sklearn.preprocessing import LabelEncoder
data.columns
Index(['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety',
'car'], dtype='object')
buying_1=LabelEncoder()
maint_1=LabelEncoder()
doors_1=LabelEncoder()
persons_1=LabelEncoder()
lug_boot_1=LabelEncoder()
safety_1=LabelEncoder()
car_1=LabelEncoder()
data['buying_n'] = buying_1.fit_transform(data['buying'])
data['maint_n'] = maint_1.fit_transform(data['maint'])
data['door_n'] = doors_1.fit_transform(data['doors'])
data['persons_n'] = persons_1.fit_transform(data['persons'])
data['lug_boot_n'] = lug_boot_1.fit_transform(data['lug_boot'])
data['safety_n'] = safety_1.fit_transform(data['safety'])
data['car_n'] = car_1.fit_transform(data['car'])
inputs = data.drop(['buying', 'maint', 'doors', 'persons', 'lug_boot', '
safety', 'car'], axis = 'columns')
target = data['buying_n']
X = np.asarray(inputs)
y = np.asarray(target)
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2,
random_state= 20)
print ('Train set:', X_train.shape, y_train.shape)
print ('Test set:', X_test.shape, y_test.shape)
Train set: (1382, 7) (1382,)
Test set: (346, 7) (346,)
from sklearn.svm import SVC
model = SVC(C = 2, gamma=3, random_state=5)
model.fit(X_train, y_train)
model.score(X_test,y_test)
0.9884393063583815
model.predict([[3,3,2,2,1,2]])
ValueError Traceback (most recent call
last)
<ipython-input-122-6773f55c74b9> in <module>
----> 1 model.predict([[3,3,2,2,1,2]])
~\Anaconda3\lib\site-packages\sklearn\svm\base.py in predict(self, X)
565 Class labels for samples in X.
566 """
--> 567 y = super(BaseSVC, self).predict(X)
568 return self.classes_.take(np.asarray(y, dtype=np.intp))
569
~\Anaconda3\lib\site-packages\sklearn\svm\base.py in predict(self, X)
323 y_pred : array, shape (n_samples,)
324 """
--> 325 X = self._validate_for_predict(X)
326 predict = self._sparse_predict if self._sparse else
self._dense_predict
327 return predict(X)
~\Anaconda3\lib\site-packages\sklearn\svm\base.py in
_validate_for_predict(self, X)
476 raise ValueError("X.shape[1] = %d should be equal to %d,
"
477 "the number of features at training
time" %
--> 478 (n_features, self.shape_fit_[1]))
479 return X
480
ValueError: X.shape[1] = 6 should be equal to 7, the number of features
at training time
答案 0 :(得分:0)
问题是您将目标变量与输入一起传递。
更正以下几行!
target = data['buying_n']
inputs = data.drop(['buying', 'maint', 'doors', 'persons', 'lug_boot', '
safety', 'car'. 'buying_n'], axis = 'columns')
此后,输入中的功能数量将更改为6。
注意:请勿更改这两行的顺序。