我已经学习了三种带有偏向神经元的神经网络的实现方式,但是我很少搜索文献来说明它们之间的区别。假设我们有X=[x1,x2]
Xdata = np.array([[x1,x2,1]]).T
Z1 = np.dot(W1, A0)
A1 = relu(Z1)
Z2 = np.dot(W2, A1)
不考虑偏差更新。
详细信息如下:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author yaoyaoyao
@date Sun Oct 20 19:03:21 2019
@version V1.0
"""
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(1)
def f(x1,x2):
return x1**2+5*x2+1
X1 = np.linspace(-10,10,1000)
X2 = np.linspace(-10,10,1000)
Ydata = f(X1,X2)
Xdata = np.hstack((X1[:,np.newaxis],X2[:,np.newaxis]))
Ydata = Ydata[:, np.newaxis]
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_backward(x):
y = sigmoid(x)
return y * (1 - y)
def relu(x):
return np.maximum(0, x)
def tanh(x):
return np.tanh(x)
def tanh_backward(x):
return 1 - np.tanh(x) ** 2
def relu_backward(x):
xcp = np.ones(x.shape)
xcp[x <= 0] = 0
return xcp
data_size = Xdata.shape[0]
all_index = np.arange(data_size)
train = np.random.choice(all_index, np.int(data_size * 0.9))
test = np.setdiff1d(all_index, train)
X = np.array(Xdata)
y = np.array(Ydata)
Xtrain = X[train]
Xtest = X[test]
ytrain = y[train]
ytest = y[test]
eps = 0.001
h = 20
W1 = np.random.randn(h, 2) * eps
b1 = np.random.randn(h, 1) * eps
W2 = np.random.randn(1, h) * eps
b2 = np.random.randn(1, 1) * eps
m = Xtrain.shape[0]
beta1 = 0.9
beta2 = 0.999
epsilon = 0.00000001
adam_optimizer = {
'm_W1': np.zeros(W1.shape),
'v_W1': np.zeros(W1.shape),
'm_b1': np.zeros(b1.shape),
'v_b1': np.zeros(b1.shape),
'm_W2': np.zeros(W2.shape),
'v_W2': np.zeros(W2.shape),
'm_b2': np.zeros(b2.shape),
'v_b2': np.zeros(b2.shape)}
learning_rate = 0.01
reg_lambda = 10
for j in range(10000):
A0 = Xtrain.T
Z1 = np.dot(W1, A0) + b1
A1 = relu(Z1)
Z2 = np.dot(W2, A1) + b2
A2 = Z2
ytraint = ytrain.reshape(A2.shape)
if j % 100 == 0:
print('loss:', np.sqrt(np.sum(ytraint - A2) ** 2 / m))
error = - (ytraint - A2)
dZ2 = error
dW2 = np.dot(dZ2, A1.T) / m + W2 * reg_lambda / m
db2 = np.sum(dZ2, axis=1, keepdims=True) / m
dA1 = np.dot(W2.T, dZ2)
dZ1 = dA1 * relu_backward(Z1)
dW1 = np.dot(dZ1, A0.T) / m + W1 * reg_lambda / m
db1 = np.sum(dZ1, axis=1, keepdims=True) / m
t = j + 1
adam_optimizer['m_W1'] = beta1 * adam_optimizer['m_W1'] + (1 - beta1) * dW1
adam_optimizer['v_W1'] = beta2 * adam_optimizer['v_W1'] + (1 - beta2) * dW1 ** 2
m_w_corrected = adam_optimizer['m_W1'] / (1 - beta1 ** t)
v_w_corrected = adam_optimizer['v_W1'] / (1 - beta2 ** t)
# for b
adam_optimizer['m_b1'] = beta1 * adam_optimizer['m_b1'] + (1 - beta1) * db1
adam_optimizer['v_b1'] = beta2 * adam_optimizer['v_b1'] + (1 - beta2) * db1 ** 2
m_b_corrected = adam_optimizer['m_b1'] / (1 - beta1 ** t)
v_b_corrected = adam_optimizer['v_b1'] / (1 - beta2 ** t)
W1 -= learning_rate * m_w_corrected / (np.sqrt(v_w_corrected) + epsilon)
b1 -= learning_rate * m_b_corrected / (np.sqrt(v_b_corrected) + epsilon)
adam_optimizer['m_W2'] = beta1 * adam_optimizer['m_W2'] + (1 - beta1) * dW2
adam_optimizer['v_W2'] = beta2 * adam_optimizer['v_W2'] + (1 - beta2) * dW2 ** 2
m_w_corrected = adam_optimizer['m_W2'] / (1 - beta1 ** t)
v_w_corrected = adam_optimizer['v_W2'] / (1 - beta2 ** t)
# for b
adam_optimizer['m_b2'] = beta1 * adam_optimizer['m_b2'] + (1 - beta1) * db2
adam_optimizer['v_b2'] = beta2 * adam_optimizer['v_b2'] + (1 - beta2) * db2 ** 2
m_b_corrected = adam_optimizer['m_b2'] / (1 - beta1 ** t)
v_b_corrected = adam_optimizer['v_b2'] / (1 - beta2 ** t)
W2 -= learning_rate * m_w_corrected / (np.sqrt(v_w_corrected) + epsilon)
b2 -= learning_rate * m_b_corrected / (np.sqrt(v_b_corrected) + epsilon)
def nnforward(X):
A0 = X.T
Z1 = np.dot(W1, A0) + b1
A1 = relu(Z1)
Z2 = np.dot(W2, A1) + b2
A2 = Z2
return A2
A2 = nnforward(Xtest)
ytestt = ytest.reshape(A2.shape)
print('test loss:', np.sum(ytestt - A2) ** 2 / m)
# forawrd propagation
# general, b1 and b2 are a constant
Xdata = np.array([[x1,x2]]).T
Z1 = np.dot(W1, A0) + b1
A1 = relu(Z1)
Z2 = np.dot(W2, A1) + b2
# back propagation
db2 = ...
db1 = ...
b2+=db2
b1+=db1
详细信息
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author yaoyaoyao
@date Sun Oct 20 19:03:21 2019
@version V1.0
"""
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(1)
def f(x1,x2):
return x1**2+5*x2+1
K=10
X1 = np.linspace(-1,1,1000)*K
X2 = np.linspace(-1,1,1000)*K
Ydata = f(X1,X2)
Xdata = np.hstack((X1[:,np.newaxis],X2[:,np.newaxis],np.ones((Ydata.shape[0],1))))
Ydata = Ydata[:, np.newaxis]
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_backward(x):
y = sigmoid(x)
return y * (1 - y)
def relu(x):
return np.maximum(0, x)
def tanh(x):
return np.tanh(x)
def tanh_backward(x):
return 1 - np.tanh(x) ** 2
def relu_backward(x):
xcp = np.ones(x.shape)
xcp[x <= 0] = 0.
return xcp
data_size = Xdata.shape[0]
all_index = np.arange(data_size)
train = np.random.choice(all_index, np.int(data_size * 0.9))
test = np.setdiff1d(all_index, train)
X = np.array(Xdata)
y = np.array(Ydata)
Xtrain = X[train]
Xtest = X[test]
ytrain = y[train]
ytest = y[test]
eps = 0.001
h = 30
W1 = np.random.randn(h, 3) * eps
W2 = np.random.randn(1, h) * eps
m = Xtrain.shape[0]
beta1 = 0.9
beta2 = 0.999
epsilon = 0.00000001
adam_optimizer = {
'm_W1': np.zeros(W1.shape),
'v_W1': np.zeros(W1.shape),
'm_W2': np.zeros(W2.shape),
'v_W2': np.zeros(W2.shape),
}
learning_rate = 0.01
reg_lambda = 10
for j in range(10000):
A0 = Xtrain.T
Z1 = np.dot(W1, A0)
A1 = relu(Z1)
Z2 = np.dot(W2, A1)
A2 = Z2
ytraint = ytrain.reshape(A2.shape)
if j % 100 == 0:
print('loss:', np.sqrt(np.sum(ytraint - A2) ** 2 / m))
error = - (ytraint - A2)
dZ2 = error
dW2 = np.dot(dZ2, A1.T) / m + W2 * reg_lambda / m
dA1 = np.dot(W2.T, dZ2)
dZ1 = dA1 * relu_backward(Z1)
dW1 = np.dot(dZ1, A0.T) / m + W1 * reg_lambda / m
t = j + 1
adam_optimizer['m_W2'] = beta1 * adam_optimizer['m_W2'] + (1 - beta1) * dW2
adam_optimizer['v_W2'] = beta2 * adam_optimizer['v_W2'] + (1 - beta2) * dW2 ** 2
m_w_corrected = adam_optimizer['m_W2'] / (1 - beta1 ** t)
v_w_corrected = adam_optimizer['v_W2'] / (1 - beta2 ** t)
W2 -= learning_rate * m_w_corrected / (np.sqrt(v_w_corrected) + epsilon)
adam_optimizer['m_W1'] = beta1 * adam_optimizer['m_W1'] + (1 - beta1) * dW1
adam_optimizer['v_W1'] = beta2 * adam_optimizer['v_W1'] + (1 - beta2) * dW1 ** 2
m_w_corrected = adam_optimizer['m_W1'] / (1 - beta1 ** t)
v_w_corrected = adam_optimizer['v_W1'] / (1 - beta2 ** t)
W1 -= learning_rate * m_w_corrected / (np.sqrt(v_w_corrected) + epsilon)
def nnforward(X):
A0 = X.T
Z1 = np.dot(W1, A0)
A1 = relu(Z1)
Z2 = np.dot(W2, A1)
A2 = Z2
return A2
A2 = nnforward(Xtest)
ytestt = ytest.reshape(A2.shape)
print('test loss:', np.sum(ytestt - A2) ** 2 / m)
那有什么区别?我已经实现了这两种方法,但是我发现,如果在方法2的演示中更改了K,则损失不会收敛到最小。最大K变为6。
答案 0 :(得分:0)
实际上是一样的。让我们来看一个例子。让plotData
,library(shiny)
library(pheatmap)
ui = fluidPage("Test",
sidebarPanel(
fileInput("file1", "Choose CSV File",
accept = c(
"text/csv",
"text/comma-separated-values,text/plain",
".csv")
),
tags$hr(),
checkboxInput("header", "Header", TRUE)
),
tabPanel('map',
sidebarLayout(
sidebarPanel('side',
actionButton('getHmap', 'get heatmap')
),
mainPanel('main',
plotOutput("themap"),
tableOutput("table.output")
)
))
)
server = function(input, output, session) {
a <- reactive({
inFile <- input$file1
if (is.null(inFile))
return(NULL)
tbl <- read.csv(inFile$datapath, header=input$header) #, sep=input$sep, dec = input$dec)
return(tbl)
})
output$table.output <- renderTable({
a()
})
plotdata <- eventReactive(input$getHmap, {
a <- as.matrix(a()[-1])
row.names(a) <- a()$Name
a[is.na(a)] <- 0
a
})
output$themap = renderPlot({
pheatmap(plotdata())
})
}
shinyApp(ui, server)
和W=[w11,w12;w21,w22]
。然后从1(b=[b1;b2]
2x3矩阵和x=[x1;x2]
)开始,得出W'=[w11,w12,b1;w21,w22,b2]
,它等于2。