def read_data():
print ("Main Function Started")
# Read Excel file, limit to temp columns
print("Reading in sensor data")
read_fulldata = pd.read_csv("tempData.txt", delimiter=('\t'))
fulldata_col = ["TempLR", "TempK", "TempNW", "TempSW", "TempW", "TempTV",
"TempO"]
print(read_fulldata[fulldata_col].head())
read_fulldata2 = pd.read_csv("tempData2.txt", delimiter=('\t'))
print(read_fulldata[fulldata_col].head())
predictThisTemp = "TempK"
print(predictThisTemp + " Chosen for prediction")
X = np.array(read_fulldata2[fulldata_col].drop([predictThisTemp], 1))
y = np.array(read_fulldata2[predictThisTemp])
X.reshape(1, -1)
#y.reshape(1, -1)
print("Arrays created for prediction")
x_train, x_test, y_train, y_test =
sklearn.model_selection.train_test_split(X, y, test_size=0.3,
shuffle=False)
return(x_train, x_test, y_train, y_test)
def fit_data(x_train, x_test, y_train, y_test):
print("Data for test and training split up")
lab_enc = preprocessing.LabelEncoder()
encoded_ytrain =lab_enc.fit_transform(y_train)
encoded_ytest = lab_enc.fit_transform(y_test)
logisticR = LogisticRegression(max_iter=12000, solver='saga',
multi_class='multinomial')
logisticR.fit(x_train, encoded_ytrain)
acc = logisticR.score(x_test, encoded_ytest)
acc2 = logisticR.score(x_train, encoded_ytrain)
print(acc)
print(acc2)
#y_pred = logisticR.predict(encoded_ytrain)
#print(y_pred)
if __name__ == "__main__":
x_train, x_test, y_train, y_test = read_data()
fit_data(x_train, x_test, y_train, y_test)
Traceback (most recent call last):
File "C:/Users/Kabla/PycharmProjects/test/test.py", line 106, in <module>
fit_data(x_train, x_test, y_train, y_test)
File "C:/Users/Kabla/PycharmProjects/test/test.py", line 93, in fit_data
logisticR.fit(x_train, encoded_ytrain)
File "C:\Users\Kabla\Anaconda3\envs\condaEnv\Lib\site-packages\sklearn\linear_model\logistic.py", line 1288, in fit
accept_large_sparse=solver != 'liblinear')
File "C:\Users\Kabla\Anaconda3\envs\condaEnv\Lib\site-packages\sklearn\utils\validation.py", line 756, in check_X_y
estimator=estimator)
File "C:\Users\Kabla\Anaconda3\envs\condaEnv\Lib\site-packages\sklearn\utils\validation.py", line 573, in check_array
allow_nan=force_all_finite == 'allow-nan')
File "C:\Users\Kabla\Anaconda3\envs\condaEnv\Lib\site-packages\sklearn\utils\validation.py", line 56, in _assert_all_finite
raise ValueError(msg_err.format(type_err, X.dtype))
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
这是错误的回溯。数据以CSV格式保存在txt文件中,并且可以正确读取。所有值均已考虑在内,没有NaN或无穷大值,对于'float64'也没有太大的值。
我所说的逻辑回归有什么问题吗?还是我还缺少其他东西?谢谢