我想将此代码使用 k折叠,因为数据不平衡
我无法在此代码中插入 k折叠
def featureExtraction(data):
vectorizer = TfidfVectorizer(min_df=10, max_df=0.75, ngram_range=(1,3))
tfidf_data = vectorizer.fit_transform(data)
return tfidf_data
def learning(clf, x, y):
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(x, y, test_size=.2, random_state=43)
classifer = clf()
classifer.fit(x_train, y_train)
predict = sklearn.model_selection.cross_val_predict(classifer, x_test, y_test, cv=10)
scores = sklearn.model_selection.cross_val_score(classifer, x_test, y_test, cv=10)
print (scores)
print ("Accurecy of %s: %0.2f (+/- %0.2f)" % (classifer, scores.mean(), scores.std() *2))
print (classification_report(y_test, predict))
def main(clf):
reviews = getData("SELECT * FROM `REVIEWS` LIMIT 1000")
preprocessed_reviews = prepareDataSets(reviews)
data, target = preprocessed_reviews['text'], preprocessed_reviews['label']
tfidf_data = featureExtraction(data)
learning(clf,tfidf_data, target)
请帮助我如何在此代码中插入 k折。