如何确定我的边界决策线? (Sklearn和python)

时间:2019-06-20 12:18:37

标签: python plot scikit-learn regression

我目前正在使用sklearn和python进行一个小项目,我想知道为什么我的边界决策线被打破了。我的数据集主要是应变仪的测量值,相关温度以及测量值是否由“故障”传感器提供。

Associated scatter

# coding=utf-8
# Libraries
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier as KN
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sn
from Model import LogisticRegressionUsingGD
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 

url = "measurementsStrainGauge3.csv"

columnsHead = ['µm', 'tmp','fault']
dataset = pd.read_csv(url, names=columnsHead)
dataset.head()

# X = feature values, all the columns except the last column
X = dataset.iloc[:, :-1]

# y = target values, last column of the data frame
y = dataset.iloc[:, -1]

#Filtering data
faultyData  = dataset.loc[y == 1]
notFaultyData = dataset.loc[y == 0]


#Model building
X = np.c_[np.ones((X.shape[0], 1)), X]
y = y[:, np.newaxis]
theta = np.zeros((X.shape[1], 1))

model = LogisticRegression()
model.fit(X, y.ravel())
predicted_classes = model.predict(X)
accuracy = accuracy_score(y.flatten(),predicted_classes)
parameters = model.coef_
params = parameters.flatten()

print(params)
print("Precision : %", accuracy)

x_values = [np.min(X[:, 1] - 5), np.max(X[:, 2] + 5)]
y_values = - (params[0] + np.dot(params[1], x_values)) / params[2]

# Plots
plt.plot(x_values, y_values, label=u'Ligne de décision')
plt.scatter(faultyData.iloc[:, 0], faultyData.iloc[:, 1], 
            s=10, label='Faute')
plt.scatter(notFaultyData.iloc[:, 0], notFaultyData.iloc[:, 1], 
            s=10, label='Non faute')

plt.xlabel(u'Déformation (µ/m)')
plt.ylabel(u'Température (C°)')
plt.legend()
plt.show()

编辑:这是我使用的数据( 1 是“故障”, 0 是“无故障”):

6973,15.02,0
3017,41.75,0
5900,61.35,1
8610,63.57,1
5405,44.42,0
3965,-5.13,0
3079,12.64,0
4562,13.09,0
4185,46.78,0
6734,34.73,0
5711,-7.34,0
5006,25.04,0
7614,51.4,1
3265,27.81,0
7218,60.65,1
5852,35.75,0
7880,46.89,0
7819,11.53,0
4775,2.16,0
5128,-14.42,0
6385,-7.32,0
3511,17.18,0
6303,28.88,0
3476,29.81,0
6285,61.21,1
3437,-2.2,0
8914,66.67,1
6306,67.7,1
3327,36.57,0
7842,-16.59,0
7336,67.02,1
4949,57,1
4036,66.4,1
3644,-0.57,0
6082,13.8,0
8044,65.51,1
7659,52.96,1
3319,40.44,0
7928,8.28,0
6812,35.83,0
7080,70.66,1
6876,79.59,1
7826,27.75,0
4514,69,1
5885,-18.39,0
4063,77.65,1
6827,-7.36,0
5085,50.1,1
7353,71.37,1
8878,11.08,0
4385,48.06,0
4204,27.01,0
6614,15.66,0
3379,-12.1,0
8312,-13.57,0
5565,21.29,0
3670,-18.79,0
4152,31.22,0
5448,-17.83,0
3081,32.11,0
8674,32.2,0
4224,21.73,0
7701,63.21,1
8984,18.09,0
6266,5.5,0
8223,32.91,0
3709,76.47,0
4888,-5.16,0
4824,-1.02,0
8579,4.81,0
8588,48.98,0
7805,73.59,1
3859,-1.31,0
4666,43.92,0
3473,-7.51,0
4301,-12.26,0
6421,65.2,1
8345,35.49,0
5840,45.75,0
4702,-1.85,0
6538,7.98,0
3217,44.56,0
6450,70.51,1
3444,12.54,0
5220,-13.33,0
8724,-16.96,0
6043,73.71,1
3187,23.54,0
6696,6.83,0
7928,34.15,0
3013,36.46,0
7376,76.77,1
7752,22.78,0
7328,-14.24,0
6690,71.65,1
6253,-1.57,0
4238,60.1,1
6569,33.7,0
6213,13.37,0
4075,48.68,0
7964,16.1,0
7810,65.45,1
6350,25.03,0
6275,61.15,1
6883,56.02,1
3622,2.82,0
4570,0.04,0
6514,37.81,0
3999,-19.13,0
5082,-6.88,0
6987,25.56,0
5706,42.42,0
5474,28.61,0
5932,4.84,0
4110,-2.27,0
7662,0.89,0
8851,-5.14,0
4370,58.47,1
5541,40.52,0
5408,11.39,0
7986,76.91,1
7124,79.9,1
3654,22.37,0
8165,2.77,0
8452,32.72,0
8849,49.46,0
8517,3.56,0
6027,2.8,0
8405,26.44,0
8313,76.85,1
3545,59.98,0
4033,77.04,1
3083,61.34,0
3041,47.35,0
4901,5.1,0
8225,0.49,0
8525,36.75,0
8402,-4.46,0
6794,36.73,0
6317,79.12,1
4961,18.47,0
5790,11.45,0
6661,-16.26,0
6211,45.59,0
4277,43.98,0
3116,-19.83,0
3971,34.46,0
5417,39.99,0
8881,73.96,1
7119,-12.92,0
7011,48.87,0
6932,31.42,0
4118,32.2,0
4412,70.49,1
5908,20.69,0
5367,3.74,0
7461,24.85,0
5154,26.32,0
6019,46.53,0
4566,-19.92,0
5633,48.09,0
6558,50.27,1
7257,-10.97,0
3896,74.1,0
8084,-5.84,0
3163,40.61,0
3983,45.91,0
4684,23.51,0
5147,75.9,1
6120,72.83,1
8039,63.16,1
6498,-1.05,0
3332,54.26,0
7504,52.7,1
3477,79.28,0
5549,13.41,0
6377,75.99,1
5114,19.59,0
8631,-3.75,0
4806,12.49,0
4923,6.8,0
8470,14.24,0
8032,-12.38,0
5387,-11.47,0
3330,21.95,0
3716,16.77,0
8085,39.17,0
3869,5.53,0
6466,71.76,1
6988,31.83,0
4922,10.24,0
8340,-9.13,0
4136,62.2,1
3747,45.66,0
5042,32.84,0
8492,14.71,0
6282,37.44,0
8732,36.03,0
7694,62.94,1
6814,67.12,1
6757,-2.81,0
5299,8.04,0
5733,71.57,1
3282,61.78,0
7036,53.86,1
3740,47.41,0
4021,53.49,1
5853,-2.98,0
7212,50.47,1
7237,21.88,0
5048,76.42,1
5289,-18.42,0
6370,40.66,0
5922,-0.84,0
4287,40.22,0
3039,50.98,0
7127,68.39,1
7718,45.12,0
5731,75.06,1
7578,76.26,1
7934,18.88,0
3404,72.66,0
8704,-3.06,0
8933,77.09,1
3789,6.55,0
4859,12.35,0
5283,32.99,0
4998,-4.25,0
6613,-1.29,0
5432,23.25,0
7086,17.65,0
4057,-2.48,0
4436,-4.3,0
8527,31.34,0
6375,63.06,1
7101,-13.35,0
5043,30.15,0
7747,29.09,0
4056,30.35,0
8823,21.67,0
4860,48.11,0
3699,69.05,0
4808,69.35,1
6619,25.9,0
4098,3.9,0
8463,73.25,1
5328,41.71,0
5073,68.73,1
4063,49.4,0
3353,29.46,0
6205,21.64,0
7663,5.2,0
6336,28.68,0
6559,64.37,1
5606,29.07,0
4768,5.83,0
5040,8.76,0
7409,36.27,0
7438,56.12,1
8719,42.81,0
3859,5.62,0
5280,-10.07,0
7795,-7.19,0
3874,-17.21,0
3356,6.77,0
3642,19.1,0
3619,65.96,0
5938,5.05,0
7545,65.69,1
5440,36.21,0
7870,30.08,0
3159,20.17,0
8689,44.11,0
5367,76.86,1
8470,-5.38,0
3394,76.58,0
8644,58.69,1
6883,0.8,0
8900,34.32,0
6060,-11.32,0
6081,45.06,0
5936,-8.27,0
3523,47.16,0
6247,77.33,1
4984,31.52,0
4176,21.07,0
3317,36.41,0
8621,10.17,0
6562,1.93,0
5837,8.01,0
5336,64.17,1
6620,44.64,0
5312,59.82,1
6323,11.16,0
7213,55.46,1
6894,30.54,0
7062,40.89,0
6575,36.44,0
3679,77.68,0
6566,29.49,0
7351,-6.37,0
5227,14.63,0
5461,0.9,0
7577,-18.63,0
4630,18.04,0
5132,37.62,0
8925,-17.93,0
8626,62.48,1
6980,21.47,0
8169,72.86,1
5566,63.81,1
7655,37.05,0
7134,-18.12,0
5795,26.67,0
6392,64.86,1
3324,-0.46,0
4810,22.8,0
8712,67.22,1
3803,62.02,0
4065,23.9,0
4695,59.94,1
7620,57.72,1
6799,67.89,1
5147,30.54,0
4629,-14.92,0
3560,-17.5,0
8586,54.64,1
3822,45.33,0
5930,-14.71,0
7754,41.33,0
3547,23.34,0
4163,32.52,0
8550,63.04,1
7552,-1.77,0
7803,-0.39,0
3628,45.4,0
6413,-17.97,0
6258,-14.1,0
7000,-16.14,0
8570,-2.87,0
3395,16.93,0
4259,41.77,0
8980,63.7,1
7635,58.79,1
3271,-5.45,0
3743,-4.47,0
3847,20.11,0
8649,26.46,0
4804,22.25,0
8054,68.84,1
5955,50.28,1
4421,13.44,0
8391,22.63,0
6611,27.72,0
4832,37.76,0
4960,9.2,0
6035,-8.52,0
6136,75.5,1
8702,52.76,1
4351,49.14,0
4085,5.4,0
7357,-11.35,0
5080,25.12,0
5243,79.92,1
6144,36.6,0
4686,27.78,0
4740,77.34,1
8634,22.09,0
3611,38.18,0
5529,13.2,0
3044,2.07,0
5618,1.39,0
3534,5.96,0
3281,21.92,0
6296,-4.04,0
6422,53.66,1
4770,36.74,0
5285,38.3,0
3466,-0.31,0
8347,78.31,1
4789,44.55,0
8260,-4.02,0
8314,8.51,0
4146,2.78,0
8530,-14.13,0
4529,71.55,1
7826,21.49,0
5980,72.18,1
7218,-1.31,0
5861,19.5,0
5662,50.07,1
6087,56.6,1
8219,66.81,1
7180,1.24,0
6594,54.13,1
8408,70.9,1
3766,-0.97,0
3113,35.67,0
7871,71.23,1
4898,-8.25,0

1 个答案:

答案 0 :(得分:1)

我在您的代码中发现了几个问题。

  • 我不明白您为什么# Filtering the data,以及您在# Model building中到底在尝试什么。您最终更改了数据。

  • 涉及绘图时,您正在绘制2 x坐标之间的线,同时使用模型生成y坐标。这将无法正常工作,因为该模型正在预测该点所属的类,并且在单个轴上已经存在两个特征,即TemperatureDeformation

This是使用数据和模型决策面为分类任务创建图的正确方法。

我已经修复了您的代码以生成图形,这是complete方法的基本实现。

columnsHead = ['µm', 'tmp','fault']
dataset = pd.read_csv(url, names=columnsHead)
print(dataset.head())

# X = feature values, all the columns except the last column
X = dataset.iloc[:, :-1].values

# y = target values, last column of the data frame
y = dataset.iloc[:, -1]

model = LogisticRegression()
model.fit(X, y)

# Creating mesh
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 100),
                     np.arange(y_min, y_max, 1))

# Plotting decision boundary
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.figure()
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
plt.title("Decision surface of LogisticRegression")
plt.axis('tight')

colors = "br"
for i, color in zip(model.classes_, colors):
    idx = np.where(y == i)
    plt.scatter(X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired,
                edgecolor='black', s=20)

plt.show()

结果图

Sample output