逻辑回归的绘图决策边界

时间:2018-08-22 02:22:53

标签: python matplotlib

我正在尝试实现逻辑回归。我已将要素映射到形式为x1 ^ 2 * x2 ^ 0 + x1 ^ 1 * x2 ^ 1 +的多项式...现在,我想为其绘制决策边界。 完成此answer之后,我编写了以下代码以使用轮廓函数

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def map_features(x, degree):
    x_old = x.copy()
    x = pd.DataFrame({"intercept" : [1]*x.shape[0]})
    column_index = 1
    for i in range(1, degree+1):
        for j in range(0, i+1):
            x.insert(column_index, str(x_old.columns[1]) + "^" + str(i-j) + str(x_old.columns[2]) + "^" + str(j), np.multiply(x_old.iloc[:,1]**(i-j), x_old.iloc[:,2]**(j)))
            column_index+=1
    return x

def normalize_features(x):
    for column_name in x.columns[1:]:
        mean = x[column_name].mean()
        std = x[column_name].std()
        x[column_name] = (x[column_name] - mean) / std
    return x

def normalize_features2(x):
    for column_name in x.columns[1:-1]:
        mean = x[column_name].mean()
        std = x[column_name].std()
        x[column_name] = (x[column_name] - mean) / std
    return x

def sigmoid(z):
    # print(z)
    return 1/(1+np.exp(-z))

def predict(x):
    global theta
    probability = np.asscalar(sigmoid(np.dot(x,theta)))
    if(probability >= 0.5):
        return 1
    else:
        return 0

def predict2(x):
    global theta
    probability = np.asscalar(sigmoid(np.dot(x.T,theta)))
    if(probability >= 0.5):
        return 1
    else:
        return 0

def cost(x, y, theta):
    m = x.shape[0]
    h_theta = pd.DataFrame(sigmoid(np.dot(x,theta)))
    cost = 1/m * ((-np.multiply(y,h_theta.apply(np.log)) - np.multiply(1-y, (1-h_theta).apply(np.log))).sum())
    return cost

def gradient_descent(x, y, theta):
    global cost_values
    m = x.shape[0]
    iterations = 1000
    alpha = 0.03
    cost_values = pd.DataFrame({'iteration' : [0], 'cost' : [cost(x,y,theta)]})

    for iteration in range(0,iterations):
        theta_old = theta.copy()
        theta.iloc[0,0] = theta.iloc[0,0] - (alpha/m) * np.asscalar((sigmoid(np.dot(x,theta_old)) - y).sum())
        for i in range(1,theta.shape[0]):
            theta.iloc[i,0] = theta.iloc[i,0] - (alpha/m) * np.asscalar(np.multiply((sigmoid(np.dot(x,theta_old)) - y), pd.DataFrame(x.iloc[:,i])).sum())
        c = cost(x,y,theta)
        cost_values = cost_values.append({"iteration" : iteration, "cost" : c}, ignore_index=True)


### Read train data
train_data = pd.read_csv("ex2data1.csv", names = ["exam1", "exam2", "admit"])

### Add intercept column
train_data.insert(0, "intercept", 1)

### Create input data
x = train_data.loc[:,"intercept":"exam2"]
# print(x.head())
x = map_features(x, 2) #map polynomial features
# print(x.head())
x = normalize_features(x) #normalize features
# print(x.head())
y = pd.DataFrame(train_data.loc[:,"admit"])
theta = pd.DataFrame({"theta" : [0] * len(x.columns)})

### Test cost of initial theta
# print(x.shape)
# print(theta.shape)
# print(np.dot(x,theta))
# print(cost(x,y,theta))

### Perform Gradient Descent
gradient_descent(x, y, theta)
# print(theta)
# print(cost(x,y,theta))

### Plot iteration vs Cost
plt.scatter(cost_values["iteration"], cost_values["cost"])
plt.show()

### Calculate Accuracy
acc = 0
for i in range(0,x.shape[0]):
    p = predict(x.iloc[i,:])
    actual = y.iloc[i,0]
    if(p == actual):
        acc+=1
print((acc/x.shape[0]) * 100)

x_grid, y_grid = np.meshgrid(np.arange(-3, 3, 0.1), np.arange(-3, 3, 0.1))
xx = pd.DataFrame(x_grid.ravel(), columns=["exam1"])
yy = pd.DataFrame(y_grid.ravel(), columns=["exam2"])
z = pd.DataFrame({"intercept" : [1]*xx.shape[0]})
z["exam1"] = xx
z["exam2"] = yy
z = map_features(z,2)
z = normalize_features(z)
p = z.apply(lambda row: predict2(pd.DataFrame(row)), axis=1)
p = np.array(p.values)
p = p.reshape(x_grid.shape)

fig, ax = plt.subplots()
train_data = normalize_features2(train_data)
ax.scatter(train_data[train_data["admit"] == 0]["exam1"], train_data[train_data["admit"] == 0]["exam2"],marker="o")
ax.scatter(train_data[train_data["admit"] == 1]["exam1"], train_data[train_data["admit"] == 1]["exam2"],marker="x")
ax.contour(x_grid, y_grid, p, levels=[0])
ax.axis('off')
plt.show()

下面是我得到的数字

enter image description here

我不确定我是否正确解释了这一点,但是这条线应该更多地是一条曲线,将这两个类别分开。 数据集在这里ex2data1.csv

1 个答案:

答案 0 :(得分:2)

所以我能够解决此问题。问题是:

  • 我正在绘制数据的空间。我以前的网格是从-3到3的范围,以0.1为增量。我将其更改为使用我的训练数据范围限制。 (x_min,x_max,y_min,y_max)
  • 在为我的决策边界绘制训练数据时,我也在规范化训练数据。我删除了这个并绘制了原始数据点。 (不确定是否会引起问题)
  • 我使用的z值是类值。我将其更改为使用从S型函数获得的概率值,以便可以使用级别参数。

下面是工作代码:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="white")

def map_features(x, degree):
    x_old = x.copy()
    x = pd.DataFrame({"intercept" : [1]*x.shape[0]})
    column_index = 1
    for i in range(1, degree+1):
        for j in range(0, i+1):
            x.insert(column_index, str(x_old.columns[1]) + "^" + str(i-j) + str(x_old.columns[2]) + "^" + str(j), np.multiply(x_old.iloc[:,1]**(i-j), x_old.iloc[:,2]**(j)))
            column_index+=1
    return x

def normalize_features(x):
    global mean_values
    global std_values
    for column_name in x.columns[1:]:
        mean = x[column_name].mean()
        std = x[column_name].std()
        x[column_name] = (x[column_name] - mean) / std
        mean_values[column_name] = mean
        std_values[column_name] = std
    return x

def sigmoid(z):
    return 1/(1+np.exp(-z))

def cost(x, y, theta):
    m = x.shape[0]
    h_theta = pd.DataFrame(sigmoid(np.dot(x,theta)))
    cost = 1/m * ((-np.multiply(y,h_theta.apply(np.log)) - np.multiply(1-y, (1-h_theta).apply(np.log))).sum())
    return np.asscalar(cost)

def gradient_descent(x, y, theta):
    global cost_values
    m = x.shape[0]
    iterations = 1000
    alpha = 0.03
    cost_values = pd.DataFrame({'iteration' : [0], 'cost' : [cost(x,y,theta)]})

    for iteration in range(0,iterations):
        theta_old = theta.copy()
        theta.iloc[0,0] = theta.iloc[0,0] - (alpha/m) * np.asscalar((sigmoid(np.dot(x,theta_old)) - y).sum())
        for i in range(1,theta.shape[0]):
            theta.iloc[i,0] = theta.iloc[i,0] - (alpha/m) * np.asscalar(np.multiply((sigmoid(np.dot(x,theta_old)) - y), pd.DataFrame(x.iloc[:,i])).sum())
        c = cost(x,y,theta)
        cost_values = cost_values.append({"iteration" : iteration, "cost" : c}, ignore_index=True)

def predict(x):
    global theta
    probability = np.asscalar(sigmoid(np.dot(x.T,theta)))
    return probability
    if(probability >= 0.5):
        return 1
    else:
        return 0

### Read train data
train_data = pd.read_csv("ex2data1.csv", names = ["exam1", "exam2", "admit"])

### Create input data
x = train_data.loc[:,"exam1":"exam2"]
### Add intercept column
x.insert(0, "intercept", 1)
mean_values = {}
std_values = {}
mapping_degree = 2
x = normalize_features(x) #normalize features
x = map_features(x, mapping_degree) #map polynomial features
y = pd.DataFrame(train_data.loc[:,"admit"])
theta = pd.DataFrame({"theta" : [0] * len(x.columns)})

### Test cost of initial theta
# print(cost(x,y,theta))

### Perform Gradient Descent
gradient_descent(x, y, theta)
# print(theta)
# print("Cost: " + str(cost(x,y,theta)))

### Plot iteration vs Cost
plt.scatter(cost_values["iteration"], cost_values["cost"])
plt.show()

### Predict an example
student = pd.DataFrame({"exam1": [52], "exam2":[63]})
student.insert(0, "intercept", 1)
#normalizing
for column_name in student.columns[1:]:
    student[column_name] = (student[column_name] - mean_values[column_name]) / std_values[column_name]
student = map_features(student, mapping_degree)
print("probability of admission: " + str(predict(student.T)))

### Calculate Accuracy
acc = 0
for i in range(0,x.shape[0]):
    p = predict(pd.DataFrame(x.iloc[i,:]))
    actual = y.iloc[i,0]
    if(p >= 0.5):
        p = 1
    else:
        p = 0
    if(p == actual):
        acc+=1
print("Accuracy : " + str((acc/x.shape[0]) * 100))

### Plot decision boundary
x_min = train_data["exam1"].min()
x_max = train_data["exam1"].max()
y_min = train_data["exam2"].min()
y_max = train_data["exam2"].max()
x_grid, y_grid = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1))
xx = pd.DataFrame(x_grid.ravel(), columns=["exam1"])
yy = pd.DataFrame(y_grid.ravel(), columns=["exam2"])
z = pd.DataFrame({"intercept" : [1]*xx.shape[0]})
z["exam1"] = xx
z["exam2"] = yy
z = normalize_features(z)
z = map_features(z,mapping_degree)
p = z.apply(lambda row: predict(pd.DataFrame(row)), axis=1)
p = np.array(p.values)
p = p.reshape(x_grid.shape)
plt.scatter(train_data[train_data["admit"] == 0]["exam1"], train_data[train_data["admit"] == 0]["exam2"],marker="o")
plt.scatter(train_data[train_data["admit"] == 1]["exam1"], train_data[train_data["admit"] == 1]["exam2"],marker="x")
plt.contour(x_grid, y_grid, p, levels = [0.5]) #displays only decision boundary
# plt.contour(x_grid, y_grid, p, 50, cmap="RdBu") #display a colored contour
# plt.colorbar()
plt.show()

下面是我获得的与我的发现相对应的边界

decision boundary

blog解释轮廓函数可能会有所帮助。