逻辑回归中的交叉验证

时间:2015-02-24 15:54:07

标签: python numpy pandas scikit-learn cross-validation

我想在逻辑回归中使用arr作为load_data函数的输入来执行交叉验证。我这里有代码大纲。该函数运行但不提供输出。

import pandas as pd
import numpy as np
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import cross_val_score
from sklearn import cross_validation

def load_data(filename):
    df = pd.read_csv(filename)
    arr = df.values
    print arr[:3]
    return arr
# load_data("data.csv")

def fit_logistic_cv(arr, cv=5):
    X=arr[:, :-1]
    y=arr[:, -1]
    print y
    kf_total = cross_validation.KFold(len(X), n_folds=cv) # (indices=True, shuffle=True, random_state=4)
    lr = linear_model.LogisticRegression()
    lr.fit(X,y)
    precisions=cross_validation.cross_val_score(lr, X, y, cv=kf_total, scoring='precision')
    print 'Precision', np.mean(precisions), precisions
    recalls=cross_validation.cross_val_score(lr, X, y, cv=kf_total, scoring='recall')
    print 'Recalls', np.mean(recalls), recalls
    f1s = cross_validation.cross_val_score(lr, X, y, cv=kf_total, scoring='f1')
    print 'F1', np.mean(f1s), f1s


def test_logistic_cv():  # testing above function 
    data_filename = "data.csv"
    fit_logistic_cv(load_data(data_filename))

0 个答案:

没有答案