我想根据客户的年龄,余额以及是否已经有贷款对客户进行分类。
这就是my dataset的样子:
cid age job marital education default balance housing loan
0 C00004 47 blue-collar married NaN no 1506 yes 0
1 C00005 33 # single NaN no 1 no 0
2 C00009 58 retired married 1.0 no 121 yes 0
3 C00021 28 blue-collar married 2.0 no 723 yes 1
4 C00025 40 retired married 1.0 no 0 yes 1
这是我的代码:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors
from sklearn.neighbors import KNeighborsClassifier
n_neighbors = 15
# import data
df = pd.read_csv('/Users/Soul.C/PycharmProjects/prac01/bank_customer.csv')
df.head()
df['loan'].replace('no','0')
df['loan'].replace('yes','1')
# set variables
X = np.array(df[['age','balance']])
y = df['loan']
#split train, test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.33)
knn=neighbors.KNeighborsClassifier()
# we create an instance of Neighbours Classifier and fit the data.
knn.fit(X_train, y_train)
# Plot the decision boundary.
# point in the mesh [x_min, m_max]x[y_min, y_max].
x_min, x_max = X_train[:,0].min() - .5, X_train[:,0].max() + .5
y_min, y_max = X_train[:,1].min() - .5, X_train[:,1].max() + .5
# Create a mesh with bottom-left corner: (x_min,y_min) &
# top-right corner: (x_max,y_max).
# cell width & height: h. (Larger h leads to coarser class-boundaries)
# 'meshgrid' :to evaluate functions on a grid.
h = 0.02
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
#flatten the multidimensional arrays of xx & yy and make class predictions on those coordinates
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Reshape Z as per xx array
Z = Z.reshape(xx.shape)
# Create figure in memory with default parameter values
plt.figure(1, figsize=(4, 3))
# Create list of three colors (corresponding to three class values)
light_colors = ListedColormap(['blue', 'c', 'g']) # 'c' for cyan and 'g' for green
bold_colors = ListedColormap(['r', 'k', 'yellow']) # 'r' for red and 'k' for black
plt.pcolormesh(xx, yy, Z, cmap=light_colors)
# Plot also the training points
plt.scatter(X_train[:,0], X_train[:,1],c=y, cmap=bold_colors)
plt.xlabel('age,balance')
plt.ylabel('loan')
plt.show()