我正在制作一个基本的前馈神经网络来解决XOR门问题。
标准设置:输入层+隐藏层+输出层,恒定学习率为0.01,历元数为500。
Sigmoid激活一直。随机/梯度下降用于反向传播。
隐藏层具有2个神经元。输入和输出数据:
input = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]
output = [[0.0], [1.0], [1.0], [0.0]]
现在是问题所在:我知道偏差是一个(列)向量,并且您对示例数据完成了一个循环(正向+向后)。训练后的预测如下:
( 0.4954120458511844 )
( 0.5081637529087711 )
( 0.5153967874989785 )
( 0.5653967874989785 )
与当我将偏差设置为矩阵(行数为input.rows
)并在每个周期训练完整的样本数据相比,预测是:
⎛ 0.18379659987542804 ⎞
⎜ 0.8220424701617579 ⎥
⎜ 0.8217815808742437 ⎥
⎝ 0.18653256456589742 ⎠
是正确的。
我可以在此处发布完整的代码,但是我确定问题出在偏见上,我只是不知道为什么?
编辑 正如我在评论中所说,原因可能来自反向传播部分(随机梯度下降) 这是完整的代码(在Swift中是,不要问为什么),我正在使用Surge矩阵库
它的长处:
import Surge
// XOR TABLE DATA
let inputDataAsArray: [[Double]] = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]
let outputDataAsArray: [[Double]] = [[0.0], [1.0], [1.0], [0.0]]
let inputData: Matrix<Double> = Matrix<Double>(inputDataAsArray)
let outputData: Matrix<Double> = Matrix<Double>(outputDataAsArray)
var inputData_samples : Array<Matrix<Double>> = Array()
var outputData_samples : Array<Matrix<Double>> = Array()
for i in 0..<inputDataAsArray.count{
inputData_samples.append(Matrix<Double>([inputDataAsArray[i]]))
outputData_samples.append(Matrix<Double>([outputDataAsArray[i]]))
}
let size = inputData.rows
let neurons = 2 // NUMBER OF NEURONS IN HIDDEN LAYER
var weights0 : Matrix<Double> = random(rows: inputData.columns, columns: neurons)
var biases0 : Matrix<Double> = Matrix<Double>(rows: 1, columns: neurons, repeatedValue: 0.0)
var weights1 : Matrix<Double> = random(rows: neurons, columns: outputData.columns)
var biases1 : Matrix<Double> = Matrix<Double>(rows: 1, columns: outputData.columns, repeatedValue: 0.0)
print("Running...")
let alpha = 0.01
let loops = size * 500
var sampleIndex = 0
for i in 0..<loops{
// FORWARD PROPAGATION
// LAYER 1
sampleIndex = i % size
let j : Int = .random(in: ClosedRange<Int>(uncheckedBounds: (lower: 0, upper: size - 1)))
let a0 = inputData_samples[j]
let output = outputData_samples[j]
let z1: Matrix<Double> = a0 * weights0 + biases0
let a1: Matrix<Double> = sigmoidMatrix(x: z1)
// LAYER 2
let z2 : Matrix<Double> = a1 * weights1 + biases1
let a2 : Matrix<Double> = sigmoidMatrix(x: z2)
// let cost = cross_entropy(size: size, a: a2, y: output)
// BACKPROPAGATION
// LAYER 2
var dz2 : Matrix<Double> = subtractMatrix(x: a2, y: output)
let dw2 : Matrix<Double> = divideMatrix(x: transpose(a1) * dz2 , y: size)
let db2 : Matrix<Double> = divideMatrix(x: dz2, y: size)
// LAYER 1
dz2 = dz2 * transpose(weights1)
let dz1 : Matrix<Double> = sub(y: 1.0, x: a0)
* transpose(a0) * dz2 // multiply(x: part1, y: sub(y: 1.0, x: part2))
let dw1 : Matrix<Double> = divideMatrix(x: transpose(a0) * dz1 , y: size)
let db1 : Matrix<Double> = divideMatrix(x: dz1, y: size)
weights0 = subtractMatrix(x: weights0, y: mul(alpha, x: dw1))
biases0 = subtractMatrix(x: biases0, y: mul(alpha, x: db1))
weights1 = subtractMatrix(x: weights1, y: mul(alpha, x: dw2))
biases1 = subtractMatrix(x: biases1, y: mul(alpha, x: db2))
}
for sample in inputData_samples{
let z1: Matrix<Double> = sample * weights0 + biases0
let a1: Matrix<Double> = sigmoidMatrix(x: z1)
let z2 : Matrix<Double> = a1 * weights1 + biases1
let a2 : Matrix<Double> = sigmoidMatrix(x: z2)
print(a2.description)
}