我正在尝试自己实现一个简单的神经网络来对点进行分类。我听说过我感兴趣的特定类型的激活函数,高斯。我不只是想使用relus或sigmoids,我正在尝试构建一个网络,其中输入大约300 x和y值,然后在第一层计算这些值上的高斯函数,大约有50个神经元,每个神经元都有一个单独的x和y值作为它们的平均值(我将保持sigma不变)。在数学上我预计这看起来像
exp(- [(x-Mx)^2 + (y-My)^2] / (2 * sigma^2) ) / (sqrt(2*pi*sigma))
然后我将对第一层中的所有神经元执行这些术语的加权和,添加偏差,并将其传递给sigmoid以获得我的预测。我将为每个训练示例执行此步骤并获取预测列表。我认为我会进行前向传播,但我会包含代码,以防有人在我的实现中发现明显的错误。然后我执行反向传播。我已经测试了我对权重和偏见的更新,我相信它们不是问题所在。我认为我对平均值的实现存在一些问题,因为它们总是聚集到单个点,这显然不会使成本函数最大化。我已经尝试过使用几个不同的数据集,并改变一些超级参数,但都无济于事。谁能弄明白问题是什么? 这是我的代码。
# libraries
import matplotlib.patches as patches
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pdb
# functions
def gaussian(sq_error, sigma):
return ((1/np.sqrt(2*np.pi*sigma**2))) * np.exp(-(sq_error)/(2*sigma**2))
def calc_X1(X0, Mx, My, m, sigma):
X1 = [] # shape will be (10, m)
for ex in range(0, m):
sq_error = (X0[0][ex] - Mx) **2 + (X0[1][ex] - My) **2
X1.append(gaussian(sq_error, sigma))
X1 = np.array(X1)
return X1.T
def sigmoid(Z):
return 1 / (1 + np.exp(-Z))
def calc_X2(W2, X1, b2):
return sigmoid(np.dot(W2, X1) + b2)
def cost(X2, Y, m):
return -1/m * ( np.dot(Y, np.log(X2.T)) + np.dot(1-Y, np.log(1-X2.T))) [0]
def calc_dZ2(X2, Y):
return X2 - Y
def calc_dM(dZ2, W2, X1, sigma, M, m, xOrY, X0):
cur_dM = np.zeros(M.shape)
for i in range(0, m):
# pdb.set_trace()
cur_dM += dZ2[0][i] * float(np.dot(W2, X1.T[i])) * 1/sigma**2 * (X0[xOrY][i] - M)
return cur_dM / m
def train_correct(X2, Y, m):
ct = 0
for i in range(0, m):
if np.round(X2[0][i]) == Y[i]:
ct += 1
return ct / m
# graphing functions
def plot_train_data(X, Y, m, ax):
for ex in range(0, m):
xCur = X[0][ex]
yCur = X[1][ex]
if Y[ex] == 1:
color=(1, 0, 0)
else:
color=(0,0,1)
ax.scatter(xCur, yCur, c=color)
def probability_hash(pr):
return (float(pr), float(np.round(pr)), float(1-pr))
def probability_hash_1d(pr):
return float(pr)
def plot_boundary(Mx, My, sigma, W2, b2, ax):
boundsx = [-5, 5]
boundsy = [-5, 5]
samples = [10, 10]
width = (boundsx[1] - boundsx[0]) / samples[0]
height = (boundsy[1] - boundsy[0]) / samples[1]
pt = np.zeros((2,1))
for x in np.linspace(boundsx[0], boundsx[1], samples[0]):
for y in np.linspace(boundsy[0], boundsy[1], samples[1]):
pt[0][0] = x
pt[1][0] = y
X1_cur = calc_X1(pt, Mx, My, 1, sigma)
X2_cur = calc_X2(W2, X1_cur, b2)
# ax.add_patch(patches.Rectangle((x, y), width, height, facecolor=probability_hash(X2_cur)))
ax.scatter(x, y, c=probability_hash(X2_cur))
def cool_plot_boundary(Mx, My, sigma, W2, b2, ax):
boundsx = [-2, 2]
boundsy = [-2, 2]
samples = [50, 50]
width = (boundsx[1] - boundsx[0]) / samples[0]
height = (boundsy[1] - boundsy[0]) / samples[1]
pt = np.zeros((2,1))
heats = []
xs = np.linspace(boundsx[0], boundsx[1], samples[0])
ys = np.linspace(boundsy[0], boundsy[1], samples[1])
for x in xs:
heats.append([])
for y in ys:
pt[0][0] = x
pt[1][0] = y
X1_cur = calc_X1(pt, Mx, My, 1, sigma)
X2_cur = calc_X2(W2, X1_cur, b2)
heats[-1].append(probability_hash_1d(X2_cur))
# xticks = []
# yticks = []
# for i in range(0, len(xs)):
# if i % 3 == 0:
# xticks.append(round(xs[i], 2))
# for i in range(0, len(ys)):
# if i % 3 == 0:
# yticks.append(round(ys[i], 2))
xticks = []
yticks = []
sns.heatmap(heats, ax=ax, cbar=True, xticklabels=xticks, yticklabels=yticks)
def plot_m(Mx, My, n1, ax):
for i in range(0, n1):
ax.scatter(Mx[i], My[i], c="k")
# initialize parameters
file = "data/disk2.csv"
df = pd.read_csv(file)
sigma = 2
itterations = 10000
learning_rate = 0.9
n0 = 2 # DO NOT CHANGE, formality
X0 = np.row_stack((df["0"], df["1"])) # shape is (2, m)
Y = np.array(df["2"])
m = len(Y)
n1 = 50
Mx = np.random.randn(n1)
My = np.random.randn(n1)
X1 = calc_X1(X0, Mx, My, m, sigma)
n2 = 1 # DO NOT CHANGE, formality
small_number = 0.01
W2 = np.random.randn(1, n1) * small_number
b2 = 0
X2 = calc_X2(W2, X1, b2)
J = cost(X2, Y, m)
Js = []
itters = []
fig = plt.figure()
plotGap = 200
for i in range(0, itterations):
# forward propogation
X1 = calc_X1(X0, Mx, My, m, sigma)
X2 = calc_X2(W2, X1, b2)
J = cost(X2, Y, m)
if i % plotGap == 0:
fig.clear()
costAx = fig.add_subplot(311)
plotAx = fig.add_subplot(312)
pointsAx = fig.add_subplot(313)
cool_plot_boundary(Mx, My, sigma, W2, b2, plotAx)
# plot_boundary(Mx, My, sigma, W2, b2, plotAx)
plot_train_data(X0, Y, m, pointsAx)
Js.append(J)
itters.append(i)
costAx.plot(itters, Js, c="k")
print("cost = " + str(J) + "\ttraining correct = " + str(train_correct(X2, Y, m)))
plot_m(Mx, My, n1, pointsAx)
plt.pause(0.1)
# back propogation
dZ2 = calc_dZ2(X2, Y)
dW2 = np.dot(dZ2, X1.T) / m
db2 = np.sum(dZ2) / m
dMx = calc_dM(dZ2, W2, X1, sigma, Mx, m, 0, X0)
dMy = calc_dM(dZ2, W2, X1, sigma, My, m, 1, X0)
b2 -= learning_rate * db2
W2 -= learning_rate * dW2
Mx -= learning_rate * dMx
My -= learning_rate * dMy
对于数据,我有一个带有一堆点位置和标签的csv。您可以使用此代码生成类似的csv。 (确保在运行此文件夹的文件夹中有一个名为data的文件夹。)
# makes data in R2 to learn
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
n = 2
# number of exaples
m = 300
X = []
Y = []
# hyperparamers for data
rApprox = 1
error = 0.4
noise = 0.1
name = "data/disk2"
plt.cla()
for ex in range(0, m):
xCur = np.random.randn(2)
X.append(xCur)
if abs(np.linalg.norm(xCur) + np.random.randn()*noise - rApprox) < error:
Y.append(1)
color="r"
else:
Y.append(0)
color="b"
plt.scatter(xCur[0], xCur[1], c=color)
if abs(np.random.randn()) < 0.01:
plt.pause(0.1)
plt.pause(1)
plt.savefig(name + ".png")
X = np.array(X)
Y = np.array(Y)
df = pd.DataFrame(X)
df[2] = Y
df.to_csv(name + ".csv", index=False)
感谢您的帮助。
答案 0 :(得分:0)
将此函数替换为calculate dm函数。在乘法时必须小心,维度不仅仅是足够的。
def calculuate_dMs(X0, X1, X2, Mx, My, W2, dZ2, sigma, m, n1):
# pdb.set_trace()
X0x_big = np.dot(np.ones((n1, 1)), X0[0].reshape(1, m))
X0y_big = np.dot(np.ones((n1, 1)), X0[1].reshape(1, m))
Mx_big = np.dot(Mx.reshape(n1, 1), np.ones((1, m)))
My_big = np.dot(My.reshape(n1, 1), np.ones((1, m)))
W2_big = np.dot(W2.reshape(n1, 1), np.ones((1, m)))
dZ2_big = np.dot(np.ones((n1, 1)), dZ2.reshape(1, m))
dxTemp = np.multiply(np.multiply(np.multiply((X0x_big - Mx_big), X1), W2_big), dZ2_big)
dyTemp = np.multiply(np.multiply(np.multiply((X0y_big - My_big), X1), W2_big), dZ2_big)
return (np.sum(dxTemp, axis=1)/m, np.sum(dyTemp, axis=1)/m)