Question

原始代码是由Phil Brierley（here）使用C ++编写的。这是最简单的MLP网络，我在这里跳过了所有不相关的内容-因此，这就像是最低限度的最小网络，但是我正在努力一个半星期，试图了解为什么它不起作用。这是我现在拥有的：

class Core {
var _hidden: Int = 1
var _epochs: Int = 500
var _lrIH: Double = 0.7     // Learning rate, input to hidden weights.
var _lrHO: Double = 0.07    // Learning rate, hidden to output weights.
var _hiddenNO: [Double]   // Hidden node outputs.
var _weightsIH: [[Double]]  // Input to Hidden weights.
var _weightsHO: [Double]    // Hidden to Output weights.

init(inputs: Int) {
    self._hiddenNO = [Double](repeating: 0.0, count: self._hidden)
    self._weightsHO = [Double](repeating: 0.0, count: self._hidden)
    self._weightsIH = [[Double]](repeating: [0.0], count: inputs)
    for i in 0..<self._weightsIH.count {
        self._weightsIH[i] = [Double](repeating: 0.0, count: self._hidden)
    }
    for j in 0..<self._hidden {
        self._weightsHO[j] = (self.getRand() - 0.5) / 2;
        for i in 0..<inputs {
            self._weightsIH[i][j] = (self.getRand() - 0.5) / 5;
        }
    }
}

func train(data: [Double], output: Double) -> Double {
    var error: Double = 0.0
    for _ in 0..<self._epochs {
        let out: Double = self.calc(data: data)
        let err: Double = out - output
        self.weightChangesHO(error: err)
        self.weightChangesIH(data: data, error: err)
        error = sqrt(err * err)
    }
    return error
}

func calc(data: [Double]) -> Double {
    for i in 0..<self._hidden {
     //  self._hiddenNO[i] = 0.0
        for j in 0..<data.count {
            self._hiddenNO[i] = self._hiddenNO[i] + (data[j] * self._weightsIH[j][i]);
        }
        self._hiddenNO[i] = tanh(self._hiddenNO[i]);
    }

    var out: Double = 0
    for i in 0..<self._hidden {
        out = out + self._hiddenNO[i] * self._weightsHO[i];
    }
    return out
}

private func weightChangesHO(error: Double) -> Void {
    for i in 0..<self._hidden {
        let weightChange: Double = self._lrHO * error * self._hiddenNO[i];
        self._weightsHO[i] = self._weightsHO[i] - weightChange;

        // Regularization of the output weights.
        if (self._weightsHO[i] < -5)
        {
            self._weightsHO[i] = -5;
        }
        else if (self._weightsHO[i] > 5)
        {
            self._weightsHO[i] = 5;
        }
    }
}

private func weightChangesIH(data: [Double], error: Double) -> Void {
    for i in 0..<self._hidden {
        for k in 0..<data.count {
            var x: Double = 1 - (self._hiddenNO[i] * self._hiddenNO[i]);
            x = x * self._weightsHO[i] * error * self._lrIH;
            x = x * data[k];
            self._weightsIH[k][i] = self._weightsIH[k][i] - x;
        }
    }
}

private func getRand() -> Double {
    return Double(Float(arc4random()) / Float(UINT32_MAX))
}

这是一个简单的培训：

let inputs: [[Double]] = [[4,4,4,4,4,4,4,4], [5,5,5,5,5,5,5,5], [1,1,1,1,1,1,1,1], [2,2,2,2,2,2,2,2]]
let inputsX: [[Double]] = [[4,4,1,4,4,4,4,4], [5,5,5,5,5,1,5,5], [1,1,2,1,1,1,1,1], [2,2,2,2,1,2,2,2]]
let outputs: [Double] = [1.0, 1.0, -1.0, -1.0]
let core: Core = Core(inputs: 8)

    print("Training")
    for i in 0..<self.inputs.count {
        print(String(format: "In: %@, Out: %.f", formatArray(array: self.inputs[i]), self.outputs[i]))
        _ = core.train(data: self.inputs[i], output: self.outputs[i])
    }

    print("Calculating")
    self.inputsX.forEach { (array) in
        let result = core.calc(data: array)
        print(String(format: "Input: %@, Output: %.f", formatArray(array: array), result))
    }

这是输出：

培训

输入：：4 :: 4 :: 4 :: 4 :: 4 :: 4 :: 4 :: 4 :，输出：1

输入：：5 :: 5 :: 5 :: 5 :: 5 :: 5 :: 5 :: 5 :，输出：1

输入：：1 :: 1 :: 1 :: 1 :: 1 :: 1 :: 1 :: 1 :，输出：-1

输入：：2 :: 2 :: 2 :: 2 :: 2 :: 2 :: 2 :: 2 :，输出：-1

计算

输入：：4 :: 4 :: 1 :: 4 :: 4 :: 4 :: 4 :: 4 :，输出：-1

输入：：5 :: 5 :: 5 :: 5 :: 5 :: 1 :: 5 :: 5 :，输出：-1

输入：：1 :: 1 :: 2 :: 1 :: 1 :: 1 :: 1 :: 1 :，输出：-1

输入：：2 :: 2 :: 2 :: 2 :: 1 :: 2 :: 2 :: 2 :，输出：-1

如果有人可以用这个指向我正确的方向，那就请依靠。

Answer 1

我将相同的程序转换为Swift 4.1。我的代码如下：

struct MLP {
    //user defineable variables
    private let numEpochs = 500 //number of training cycles
    private let numInputs: Int //number of inputs - this includes the input bias
    private let numHidden  = 4 //number of hidden units
    private let LR_IH = 0.7 //learning rate
    private let LR_HO = 0.07 //learning rate

    //process variables
    private var error: Double = 0

    private var hiddenVal: [Double]
    private var weightsIH: [[Double]]
    private var weightsHO: [Double]

    init(inputs: Int) {
        numInputs = inputs
        hiddenVal = [Double](repeating: 0, count: numHidden)

        //the weights
        weightsIH = [[Double]](repeating: [Double](repeating: 0, count: numHidden), count: numInputs)
        weightsHO = [Double](repeating: 0, count: numHidden)

        initWeights()
    }

    mutating func train(data: [[Double]], output: [Double]) {

        for _ in 0..<numEpochs {

            var rmsError: Double = 0
            for i in 0..<data.count {
                //calculate the current network output
                //and error for this pattern
                let out = calcNet(data: data[i])
                let error = out - output[i]

                //change network weights
                weightChangeHO(error: error)
                weightChangesIH(data: data[i], error: error)

                rmsError += error * error
            }

            rmsError /= Double(data.count)
            rmsError = sqrt(rmsError)

            print("RMS Error: \(rmsError)")
        }
    }

    mutating func calcNet(data: [Double]) -> Double {
        //calculate the outputs of the hidden neurons
        //the hidden neurons are tanh
        for i in 0..<numHidden {
            hiddenVal[i] = 0

            for j in 0..<data.count {
                hiddenVal[i] = hiddenVal[i] + (data[j] * weightsIH[j][i])
            }

            hiddenVal[i] = tanh(hiddenVal[i])
        }

        //calculate the output of the network
        //the output neuron is linear
        var output: Double = 0

        for i in 0..<numHidden {
            output = output + hiddenVal[i] * weightsHO[i]
        }

        return output
    }

    mutating func weightChangeHO(error: Double) {
        for k in 0..<numHidden {
            let weightChange = LR_HO * error * hiddenVal[k];
            weightsHO[k] = weightsHO[k] - weightChange;

            //regularisation on the output weights
            if weightsHO[k] < -5 {
                weightsHO[k] = -5
            } else if weightsHO[k] > 5 {
                weightsHO[k] = 5;
            }
        }
    }

    mutating func weightChangesIH(data: [Double], error: Double) {
    //adjust the weights input-hidden
        for i in 0..<numHidden {
            for k in 0..<data.count {
                var x = 1 - hiddenVal[i] * hiddenVal[i]
                x = x * weightsHO[i] * error * LR_IH
                x = x * data[k]
                let weightChange = x
                weightsIH[k][i] = weightsIH[k][i] - weightChange
            }
        }
    }

    func random() -> Double {
        return Double(arc4random_uniform(10)) / 10.0
    }

    mutating func initWeights() {
        for j in 0..<numHidden {
            weightsHO[j] = (random() - 0.5) / 2
            for i in 0..<numInputs {
                weightsIH[i][j] = (random() - 0.5) / 5
            }
        }
    }
}

这里是训练和预测：

//training data
var trainInputs: [[Double]] = [
    [4, 4, 4, 4, 4, 4, 4, 4],
    [5, 5, 5, 5, 5, 5, 5, 5],
    [1, 1, 1, 1, 1, 1, 1, 1],
    [2, 2, 2, 2, 2, 2, 2, 2]
]

var evalInputs: [[Double]] = [
    [4, 4, 1, 4, 4, 4, 4, 4],
    [5, 5, 5, 5, 5, 1, 5, 5],
    [1, 1, 2, 1, 1, 1, 1, 1],
    [2, 2, 2, 2, 1, 2, 2, 2]
]

var trainOutput: [Double] = [1, 1, -1, -1]

var mlp = MLP(inputs: 8)

mlp.train(data: trainInputs, output: trainOutput)

for i in 0..<evalInputs.count {
    let output = mlp.calcNet(data: evalInputs[i])
    print("\(evalInputs[i]): \(output)")
}

我的输出是：

[4.0, 4.0, 1.0, 4.0, 4.0, 4.0, 4.0, 4.0]: -0.304727897387028
[5.0, 5.0, 5.0, 5.0, 5.0, 1.0, 5.0, 5.0]: -0.304727510449247
[1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0]: -0.305048245167421
[2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0]: -0.304792744713887

当我运行您的链接中给出的Java代码时，输入相同，它给出以下输出：

pat = 1 actual = 1.0 neural model = -0.3994365844031852
pat = 2 actual = 1.0 neural model = -0.39943658440228524
pat = 3 actual = -1.0 neural model = -0.3994075082237779
pat = 4 actual = -1.0 neural model = -0.3994365634276437

尝试移植多层感知器神经网络以快速进行

1 个答案: