我想在逻辑回归中使用arr作为load_data函数的输入来执行交叉验证。我这里有代码大纲。该函数运行但不提供输出。
import pandas as pd
import numpy as np
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import cross_val_score
from sklearn import cross_validation
def load_data(filename):
df = pd.read_csv(filename)
arr = df.values
print arr[:3]
return arr
# load_data("data.csv")
def fit_logistic_cv(arr, cv=5):
X=arr[:, :-1]
y=arr[:, -1]
print y
kf_total = cross_validation.KFold(len(X), n_folds=cv) # (indices=True, shuffle=True, random_state=4)
lr = linear_model.LogisticRegression()
lr.fit(X,y)
precisions=cross_validation.cross_val_score(lr, X, y, cv=kf_total, scoring='precision')
print 'Precision', np.mean(precisions), precisions
recalls=cross_validation.cross_val_score(lr, X, y, cv=kf_total, scoring='recall')
print 'Recalls', np.mean(recalls), recalls
f1s = cross_validation.cross_val_score(lr, X, y, cv=kf_total, scoring='f1')
print 'F1', np.mean(f1s), f1s
def test_logistic_cv(): # testing above function
data_filename = "data.csv"
fit_logistic_cv(load_data(data_filename))