我有一个json文件,该文件在数据也是变化矢量的同时具有预处理数据,然后如何使用SVM分类方法训练数据
Vector是列的名称 另一个是值,值具有向量列的类型
import pickle
from nltk.corpus import stopwords
import string
from nltk.stem import SnowballStemmer
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn import metrics
stopwords=set(stopwords.words("english"))
exclude = set(string.punctuation)
snow=SnowballStemmer("english")
tvec = pickle.load(open("dataPackage/tfidf.pickle", 'rb'))
data=pd.read_json("dataPackage/finalData.json",orient = 'split')
inputLen = len(data["Vector"].iloc[0])
X = list(data["Vector"])
y = list(data.drop(["Vector"],axis = 1).values)
np.shape(X)
np.shape(y)
X_train, X_test, y_train, y_test = train_test_split(np.array(X), np.array(y), test_size=0.3,random_state=109)
model = svm.SVC(kernel='linear')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))