Question

使用sklearn并尝试通过以下代码评估KNN回归函数：

def cross_validate(X,y,n_neighbors, test_size=0.20):
    training_mses = []
    test_mses = []

    n = X.shape[ 0]
    test_n = int( np.round( test_size * n, 0))

    indices = np.arange(n)
    random.shuffle( indices)

    test_indices = indices[ 0:test_n]
    training_indices = indices[test_n:]


    X_test, y_test = X[test_indices], y[test_indices]
    X_train,y_train = X[training_indices], y[training_indices]

    knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors, weights = "distance",
                                    algorithm = 'brute')
    model = knn.fit(X_train,y_train)
    y_hat = model.predict( X_train)
    training_mse = mse( y_train - y_hat)

    model2 = knn.fit(X_test,y_test)
    y_hat = model2.predict( X_test)
    test_mse = mse( y_test - y_hat)

    return training_mse, test_mse

我对线性回归做了类似的事情。我发现的区别是，当我在KNN回归上运行它时，training_mse和test_mse都为0。如果我在装有训练集的模型上使用测试数据，它会给我一个非零的mse值。但是我只是不相信训练和测试集的拟合值与观察值相同。我究竟做错了什么？我尝试模拟的功能如下，并为mse提供了非零值：

def cross_validate( formula, data, test_size=0.20):
    training_mses = []
    test_mses = []

    n = data.shape[ 0]
    test_n = int( np.round( test_size * n, 0))

    indices = deepcopy( data.index).values
    random.shuffle( indices)

    test_indices = indices[ 0:test_n]
    training_indices = indices[test_n:]

    test_set = data.ix[ test_indices]
    training_set = data.ix[ training_indices]

    y, X = patsy.dmatrices( formula, training_set, return_type="matrix")
    model = linear.LinearRegression( fit_intercept=False).fit( X, y)
    y_hat = model.predict( X)

    training_mse = mse( y - y_hat)

    y, X = patsy.dmatrices( formula, test_set, return_type="matrix")
    y_hat = model.predict( X)

    test_mse = mse( y - y_hat)

    return training_mse, test_mse

KNN回归导致训练集（sklearn）的MSE为零

0 个答案: