我是python和机器学习的新手
我的代码是
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import cross_validation, svm,cross_validation,neighbors
df=pd.read_csv('train_loan_prediction.csv')
df.drop(['Loan_ID'],1,inplace=True)
df.replace('Graduate',1,inplace=True)
df.replace('Not Graduate',0,inplace=True)
df.replace('Urban',2,inplace=True)
df.replace('Semiurban',1,inplace=True)
df.replace('Rural',1,inplace=True)
df.replace('Yes',1,inplace=True)
df.replace('No',0,inplace=True)
df.replace('Male',1,inplace=True)
df.replace('Female',0,inplace=True)
df.replace('Y',1,inplace=True)
df.replace('N',0,inplace=True)
df.replace('3+',4,inplace=True)
x=np.array(df.drop(['Loan_Status'],1))
y=np.array(df['Loan_Status'])
df.fillna(0,inplace=True)
df.head(20)
x_train,x_test,y_train,y_test=cross_validation.train_test_split(x,y,test_size=0.2
)
clf=neighbors.KNeighborsClassifier()
clf.fit(x_train, y_train)
答案 0 :(得分:0)
在设置x
和y
之前,您需要替换中的数据框中的NaN:
# replace NaNs first
df.fillna(0,inplace=True)
# then set features and labels
x = np.array(df.drop(['Loan_Status'],1))
y = np.array(df['Loan_Status'])