我正在使用Logistic回归准备模型,但似乎无法解决此错误。
我尝试过使用标签编码器,但是效果不佳。
import pandas as pd
import numpy as np
import scipy.stats as st
import statsmodels.api as sm
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn import utils
import matplotlib.mlab as mlab
import scipy.optimize as opt
df=pd.read_csv('C:/Users/Sehej Bakshi/Desktop/New folder/lung_cancer.csv')
print(df.head())
import sklearn
new_features=df[['cigsPerDay', 'Age', 'BMI', 'Glucose', 'Age_of_initialization', 'Probability']]
x=new_features.iloc[:, :-1]
y=new_features.iloc[:, -1]
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=4)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
from sklearn.linear_model import LogisticRegression
LR=LogisticRegression(C=0.01, solver='liblinear').fit(x_train, y_train)
ypred=LR.predict(x_test)
print('Accuracy:', sklearn.metrics.accuracy_score(y_test, y_pred))
错误:
Traceback (most recent call last):
File "C:/Python36/Hack/lg.py", line 28, in <module>
LR=LogisticRegression(C=0.01, solver='liblinear').fit(x_train, y_train)
File "C:\Python36\lib\site-packages\sklearn\linear_model\logistic.py", line 1533, in fit
check_classification_targets(y)
File "C:\Python36\lib\site-packages\sklearn\utils\multiclass.py", line 169, in check_classification_targets
raise ValueError("Unknown label type: %r" % y_type)
ValueError: Unknown label type: 'continuous'