尝试使用Logistic回归时出现值错误

时间:2019-05-07 18:00:56

标签: python input scikit-learn regression

def read_data():
    print ("Main Function Started")

    # Read Excel file, limit to temp columns
    print("Reading in sensor data")

    read_fulldata = pd.read_csv("tempData.txt", delimiter=('\t'))
    fulldata_col = ["TempLR", "TempK", "TempNW", "TempSW", "TempW", "TempTV", 
    "TempO"]
    print(read_fulldata[fulldata_col].head())

    read_fulldata2 = pd.read_csv("tempData2.txt", delimiter=('\t'))
    print(read_fulldata[fulldata_col].head())


    predictThisTemp = "TempK"
    print(predictThisTemp + " Chosen for prediction")

    X = np.array(read_fulldata2[fulldata_col].drop([predictThisTemp], 1))
    y = np.array(read_fulldata2[predictThisTemp])
    X.reshape(1, -1)
    #y.reshape(1, -1)
    print("Arrays created for prediction")

    x_train, x_test, y_train, y_test = 
    sklearn.model_selection.train_test_split(X, y, test_size=0.3, 
    shuffle=False)
    return(x_train, x_test, y_train, y_test)



def fit_data(x_train, x_test, y_train, y_test):
    print("Data for test and training split up")

    lab_enc = preprocessing.LabelEncoder()
    encoded_ytrain =lab_enc.fit_transform(y_train)
    encoded_ytest = lab_enc.fit_transform(y_test)


    logisticR = LogisticRegression(max_iter=12000, solver='saga', 
    multi_class='multinomial')
    logisticR.fit(x_train, encoded_ytrain)

    acc = logisticR.score(x_test, encoded_ytest)
    acc2 = logisticR.score(x_train, encoded_ytrain)
    print(acc)
    print(acc2)

    #y_pred = logisticR.predict(encoded_ytrain)
    #print(y_pred)


if __name__ == "__main__":
    x_train, x_test, y_train, y_test = read_data()
    fit_data(x_train, x_test, y_train, y_test)
Traceback (most recent call last):
  File "C:/Users/Kabla/PycharmProjects/test/test.py", line 106, in <module>
    fit_data(x_train, x_test, y_train, y_test)
  File "C:/Users/Kabla/PycharmProjects/test/test.py", line 93, in fit_data
    logisticR.fit(x_train, encoded_ytrain)
  File "C:\Users\Kabla\Anaconda3\envs\condaEnv\Lib\site-packages\sklearn\linear_model\logistic.py", line 1288, in fit
    accept_large_sparse=solver != 'liblinear')
  File "C:\Users\Kabla\Anaconda3\envs\condaEnv\Lib\site-packages\sklearn\utils\validation.py", line 756, in check_X_y
    estimator=estimator)
  File "C:\Users\Kabla\Anaconda3\envs\condaEnv\Lib\site-packages\sklearn\utils\validation.py", line 573, in check_array
    allow_nan=force_all_finite == 'allow-nan')
  File "C:\Users\Kabla\Anaconda3\envs\condaEnv\Lib\site-packages\sklearn\utils\validation.py", line 56, in _assert_all_finite
    raise ValueError(msg_err.format(type_err, X.dtype))
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

这是错误的回溯。数据以CSV格式保存在txt文件中,并且可以正确读取。所有值均已考虑在内,没有NaN或无穷大值,对于'float64'也没有太大的值。

我所说的逻辑回归有什么问题吗?还是我还缺少其他东西?谢谢

0 个答案:

没有答案