Question

我正在制作一个基本的前馈神经网络来解决XOR门问题。

标准设置：输入层+隐藏层+输出层，恒定学习率为0.01，历元数为500。

Sigmoid激活一直。随机/梯度下降用于反向传播。

隐藏层具有2个神经元。输入和输出数据：

input = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]
output = [[0.0], [1.0], [1.0], [0.0]]

现在是问题所在：我知道偏差是一个（列）向量，并且您对示例数据完成了一个循环（正向+向后）。训练后的预测如下：

(   0.4954120458511844  )
(   0.5081637529087711  )
(   0.5153967874989785  )
(   0.5653967874989785  )

与当我将偏差设置为矩阵（行数为input.rows）并在每个周期训练完整的样本数据相比，预测是：

⎛   0.18379659987542804 ⎞
⎜   0.8220424701617579  ⎥
⎜   0.8217815808742437  ⎥
⎝   0.18653256456589742 ⎠

是正确的。

我可以在此处发布完整的代码，但是我确定问题出在偏见上，我只是不知道为什么？

编辑正如我在评论中所说，原因可能来自反向传播部分（随机梯度下降）这是完整的代码（在Swift中是，不要问为什么），我正在使用Surge矩阵库

它的长处：

import Surge
// XOR TABLE DATA
let inputDataAsArray: [[Double]] = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]
let outputDataAsArray: [[Double]] = [[0.0], [1.0], [1.0], [0.0]]

let inputData: Matrix<Double> = Matrix<Double>(inputDataAsArray)
let outputData: Matrix<Double> = Matrix<Double>(outputDataAsArray)

var inputData_samples : Array<Matrix<Double>> = Array()
var outputData_samples : Array<Matrix<Double>> = Array()

for i in 0..<inputDataAsArray.count{
    inputData_samples.append(Matrix<Double>([inputDataAsArray[i]]))
    outputData_samples.append(Matrix<Double>([outputDataAsArray[i]]))
}

let size = inputData.rows
let neurons = 2 // NUMBER OF NEURONS IN HIDDEN LAYER

var weights0 : Matrix<Double> = random(rows: inputData.columns, columns: neurons)
var biases0 : Matrix<Double> = Matrix<Double>(rows: 1, columns: neurons, repeatedValue: 0.0)

var weights1 : Matrix<Double> = random(rows: neurons, columns: outputData.columns)
var biases1 : Matrix<Double> = Matrix<Double>(rows: 1, columns: outputData.columns, repeatedValue: 0.0)

print("Running...")

let alpha = 0.01
let loops = size * 500

var sampleIndex = 0

for i in 0..<loops{

    // FORWARD PROPAGATION
    // LAYER 1

    sampleIndex = i % size

    let j : Int = .random(in: ClosedRange<Int>(uncheckedBounds: (lower: 0, upper: size - 1)))

    let a0 = inputData_samples[j]
    let output = outputData_samples[j]

    let z1: Matrix<Double> = a0 * weights0 + biases0
    let a1: Matrix<Double> = sigmoidMatrix(x: z1)

    // LAYER 2
    let z2 : Matrix<Double> = a1 * weights1 + biases1
    let a2 : Matrix<Double> = sigmoidMatrix(x: z2)

//    let cost = cross_entropy(size: size, a: a2, y: output)

    // BACKPROPAGATION
    // LAYER 2
    var dz2 : Matrix<Double> = subtractMatrix(x: a2, y: output)
    let dw2 : Matrix<Double> = divideMatrix(x: transpose(a1) * dz2 , y: size)
    let db2 : Matrix<Double> = divideMatrix(x: dz2, y: size)

    // LAYER 1

    dz2 = dz2 * transpose(weights1)

    let dz1 : Matrix<Double> = sub(y: 1.0, x: a0)
        * transpose(a0) *  dz2 // multiply(x: part1, y: sub(y: 1.0, x: part2))

    let dw1 : Matrix<Double> = divideMatrix(x: transpose(a0) * dz1 , y: size)
    let db1 : Matrix<Double> = divideMatrix(x: dz1, y: size)

    weights0 = subtractMatrix(x: weights0, y: mul(alpha, x: dw1))
    biases0 = subtractMatrix(x: biases0, y: mul(alpha, x: db1))

    weights1 = subtractMatrix(x: weights1, y: mul(alpha, x: dw2))
    biases1 = subtractMatrix(x: biases1, y: mul(alpha, x: db2))

}
for sample in inputData_samples{
    let z1: Matrix<Double> = sample * weights0 + biases0
    let a1: Matrix<Double> = sigmoidMatrix(x: z1)

    let z2 : Matrix<Double> = a1 * weights1 + biases1
    let a2 : Matrix<Double> = sigmoidMatrix(x: z2)

    print(a2.description)
}

由于偏差的维度，前馈神经网络无法分类

0 个答案: