sklearn LinearModel,Ridge和Lasso产生不兼容的输出?

时间:2019-07-31 13:01:31

标签: python scikit-learn lasso-regression linearmodels

我在波士顿房屋数据集上使用不同的回归模型。我发现如果我使用正常的线性模型或岭回归,则预测值的形状为(102,1),而如果我对Lasso使用相同的代码,则输出的形状为(102,)。这是为什么?这样一来,它就会在ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()行中与pearsonr一起崩溃。

关于如何使以下代码平稳运行的任何想法?

from sklearn.datasets import load_boston
import numpy as np
import pandas as pd
import sys


def evalOneModel (model, name, X, y, nRuns):
    allMse = []
    allR2 = []
    all_rho_P = []
    ################ OLS ################
    for i in range(nRuns):
        x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=None)

        model = model.fit(x_train, y_train)
        predictions = model.predict(x_test)

        mse = mean_squared_error(y_test, predictions)
        r2 = r2_score(y_test, predictions)

        allMse.append(mse)
        allR2.append(r2)

        print(type(y_test))
        print(y_test.shape)
        print(type(predictions))
        print(predictions.shape)
        rhoP, pval = pearsonr(y_test, predictions)

        rhoP = rhoP[0]

        all_rho_P.append(rhoP)

        print("run{}={:0.3f}; ".format(i, rhoP), end="")
        print(model.coef_)


    myTitle = "{} mean={:0.3f}".format(name, np.mean(all_rho_P))
    print("")
    print(myTitle)


    print("")
    sys.stdout.flush()

####### MAIN #####


pd.set_option('expand_frame_repr', False)

bosten_data = load_boston()

df = pd.DataFrame(bosten_data.data, columns=bosten_data.feature_names)
df['MEDV'] = bosten_data.target  # add the target to the data frame

target = pd.DataFrame(bosten_data.target, columns=["MEDV"])

norm_df = (df - df.mean()) / df.std()
norm_target = (target - target.mean()) / target.std()



X = norm_df[["RM", "AGE", "PTRATIO", "LSTAT"]]
y = norm_target


from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import spearmanr, pearsonr


print("\n\nstarting runs ...\n")

from sklearn import linear_model
model = linear_model.LinearRegression()
evalOneModel (model, "OLS", X, y, 1)

from sklearn.linear_model import Ridge  # L2
model = linear_model.Ridge(alpha=1.0)
evalOneModel (model, "Ridge (alpha=1)", X, y, 1)

from sklearn.linear_model import Lasso  # L1
model = linear_model.Lasso(alpha=1.0)
evalOneModel (model, "Lasso (alpha=1)", X, y, 1)

0 个答案:

没有答案