我正在使用sklearn LogisticRegression训练Logistic回归模型。 我在尝试预测测试集时遇到TypeError。
代码:
test_features=test[["Sex","Age","Pclass","Fare","Embarked"]].values
myprediction=myfit2.predict(test_features)
错误:
float()参数必须是字符串或数字
我已经检查了几次语法。这可能是因为我使用的是Python 3.5。因为这似乎在python 2.7上运行良好。非常感谢帮助解决此错误。
答案 0 :(得分:0)
问题是数据包含NaN:
代码:
import pandas as pd
from numpy import nanmean
import numpy as np
from sklearn.linear_model import LogisticRegression
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
test["Sex"][test["Sex"]=="male"]=0
test["Sex"][test["Sex"]=="female"]=1
test["Embarked"][test["Embarked"]=='S']=0
test["Embarked"][test["Embarked"]=='C']=1
test["Embarked"][test["Embarked"]=='Q']=2
train["Sex"][train["Sex"]=="male"]=0
train["Sex"][train["Sex"]=="female"]=1
train["Embarked"][train["Embarked"]=='S']=0
train["Embarked"][train["Embarked"]=='C']=1
train["Embarked"][train["Embarked"]=='Q']=2
nan_mean_age = nanmean(test.iloc[:,4])
test = test.fillna(value = nan_mean_age)
nan_mean_age2 = nanmean(train.iloc[:,5])
train = train.fillna(value = nan_mean_age2)
train_features=train[["Sex","Age","Pclass","Fare","Embarked"]].values
test_features=test[["Sex","Age","Pclass","Fare","Embarked"]].values
train_features = np.asarray(train_features)
test_features = np.asarray(test_features)
lg = LogisticRegression()
#define your target variable y and then fit
y_train = train.iloc[:,1]
lg.fit(train_features,y_train)
lg.predict(test_features)
结果:
array([0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1,
1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0,
0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1,
1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1,
0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0,
1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1,
0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,
0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1,
1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1,
1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
1, 0, 0, 0], dtype=int64)
这样的事应该可以正常工作