原始代码是由Phil Brierley(here)使用C ++编写的。这是最简单的MLP网络,我在这里跳过了所有不相关的内容-因此,这就像是最低限度的最小网络,但是我正在努力一个半星期,试图了解为什么它不起作用。这是我现在拥有的:
class Core {
var _hidden: Int = 1
var _epochs: Int = 500
var _lrIH: Double = 0.7 // Learning rate, input to hidden weights.
var _lrHO: Double = 0.07 // Learning rate, hidden to output weights.
var _hiddenNO: [Double] // Hidden node outputs.
var _weightsIH: [[Double]] // Input to Hidden weights.
var _weightsHO: [Double] // Hidden to Output weights.
init(inputs: Int) {
self._hiddenNO = [Double](repeating: 0.0, count: self._hidden)
self._weightsHO = [Double](repeating: 0.0, count: self._hidden)
self._weightsIH = [[Double]](repeating: [0.0], count: inputs)
for i in 0..<self._weightsIH.count {
self._weightsIH[i] = [Double](repeating: 0.0, count: self._hidden)
}
for j in 0..<self._hidden {
self._weightsHO[j] = (self.getRand() - 0.5) / 2;
for i in 0..<inputs {
self._weightsIH[i][j] = (self.getRand() - 0.5) / 5;
}
}
}
func train(data: [Double], output: Double) -> Double {
var error: Double = 0.0
for _ in 0..<self._epochs {
let out: Double = self.calc(data: data)
let err: Double = out - output
self.weightChangesHO(error: err)
self.weightChangesIH(data: data, error: err)
error = sqrt(err * err)
}
return error
}
func calc(data: [Double]) -> Double {
for i in 0..<self._hidden {
// self._hiddenNO[i] = 0.0
for j in 0..<data.count {
self._hiddenNO[i] = self._hiddenNO[i] + (data[j] * self._weightsIH[j][i]);
}
self._hiddenNO[i] = tanh(self._hiddenNO[i]);
}
var out: Double = 0
for i in 0..<self._hidden {
out = out + self._hiddenNO[i] * self._weightsHO[i];
}
return out
}
private func weightChangesHO(error: Double) -> Void {
for i in 0..<self._hidden {
let weightChange: Double = self._lrHO * error * self._hiddenNO[i];
self._weightsHO[i] = self._weightsHO[i] - weightChange;
// Regularization of the output weights.
if (self._weightsHO[i] < -5)
{
self._weightsHO[i] = -5;
}
else if (self._weightsHO[i] > 5)
{
self._weightsHO[i] = 5;
}
}
}
private func weightChangesIH(data: [Double], error: Double) -> Void {
for i in 0..<self._hidden {
for k in 0..<data.count {
var x: Double = 1 - (self._hiddenNO[i] * self._hiddenNO[i]);
x = x * self._weightsHO[i] * error * self._lrIH;
x = x * data[k];
self._weightsIH[k][i] = self._weightsIH[k][i] - x;
}
}
}
private func getRand() -> Double {
return Double(Float(arc4random()) / Float(UINT32_MAX))
}
这是一个简单的培训:
let inputs: [[Double]] = [[4,4,4,4,4,4,4,4], [5,5,5,5,5,5,5,5], [1,1,1,1,1,1,1,1], [2,2,2,2,2,2,2,2]]
let inputsX: [[Double]] = [[4,4,1,4,4,4,4,4], [5,5,5,5,5,1,5,5], [1,1,2,1,1,1,1,1], [2,2,2,2,1,2,2,2]]
let outputs: [Double] = [1.0, 1.0, -1.0, -1.0]
let core: Core = Core(inputs: 8)
print("Training")
for i in 0..<self.inputs.count {
print(String(format: "In: %@, Out: %.f", formatArray(array: self.inputs[i]), self.outputs[i]))
_ = core.train(data: self.inputs[i], output: self.outputs[i])
}
print("Calculating")
self.inputsX.forEach { (array) in
let result = core.calc(data: array)
print(String(format: "Input: %@, Output: %.f", formatArray(array: array), result))
}
这是输出:
培训
输入::4 :: 4 :: 4 :: 4 :: 4 :: 4 :: 4 :: 4 :,输出:1
输入::5 :: 5 :: 5 :: 5 :: 5 :: 5 :: 5 :: 5 :,输出:1
输入::1 :: 1 :: 1 :: 1 :: 1 :: 1 :: 1 :: 1 :,输出:-1
输入::2 :: 2 :: 2 :: 2 :: 2 :: 2 :: 2 :: 2 :,输出:-1
计算
输入::4 :: 4 :: 1 :: 4 :: 4 :: 4 :: 4 :: 4 :,输出:-1
输入::5 :: 5 :: 5 :: 5 :: 5 :: 1 :: 5 :: 5 :,输出:-1
输入::1 :: 1 :: 2 :: 1 :: 1 :: 1 :: 1 :: 1 :,输出:-1
输入::2 :: 2 :: 2 :: 2 :: 1 :: 2 :: 2 :: 2 :,输出:-1
如果有人可以用这个指向我正确的方向,那就请依靠。
答案 0 :(得分:0)
我将相同的程序转换为Swift 4.1。我的代码如下:
struct MLP {
//user defineable variables
private let numEpochs = 500 //number of training cycles
private let numInputs: Int //number of inputs - this includes the input bias
private let numHidden = 4 //number of hidden units
private let LR_IH = 0.7 //learning rate
private let LR_HO = 0.07 //learning rate
//process variables
private var error: Double = 0
private var hiddenVal: [Double]
private var weightsIH: [[Double]]
private var weightsHO: [Double]
init(inputs: Int) {
numInputs = inputs
hiddenVal = [Double](repeating: 0, count: numHidden)
//the weights
weightsIH = [[Double]](repeating: [Double](repeating: 0, count: numHidden), count: numInputs)
weightsHO = [Double](repeating: 0, count: numHidden)
initWeights()
}
mutating func train(data: [[Double]], output: [Double]) {
for _ in 0..<numEpochs {
var rmsError: Double = 0
for i in 0..<data.count {
//calculate the current network output
//and error for this pattern
let out = calcNet(data: data[i])
let error = out - output[i]
//change network weights
weightChangeHO(error: error)
weightChangesIH(data: data[i], error: error)
rmsError += error * error
}
rmsError /= Double(data.count)
rmsError = sqrt(rmsError)
print("RMS Error: \(rmsError)")
}
}
mutating func calcNet(data: [Double]) -> Double {
//calculate the outputs of the hidden neurons
//the hidden neurons are tanh
for i in 0..<numHidden {
hiddenVal[i] = 0
for j in 0..<data.count {
hiddenVal[i] = hiddenVal[i] + (data[j] * weightsIH[j][i])
}
hiddenVal[i] = tanh(hiddenVal[i])
}
//calculate the output of the network
//the output neuron is linear
var output: Double = 0
for i in 0..<numHidden {
output = output + hiddenVal[i] * weightsHO[i]
}
return output
}
mutating func weightChangeHO(error: Double) {
for k in 0..<numHidden {
let weightChange = LR_HO * error * hiddenVal[k];
weightsHO[k] = weightsHO[k] - weightChange;
//regularisation on the output weights
if weightsHO[k] < -5 {
weightsHO[k] = -5
} else if weightsHO[k] > 5 {
weightsHO[k] = 5;
}
}
}
mutating func weightChangesIH(data: [Double], error: Double) {
//adjust the weights input-hidden
for i in 0..<numHidden {
for k in 0..<data.count {
var x = 1 - hiddenVal[i] * hiddenVal[i]
x = x * weightsHO[i] * error * LR_IH
x = x * data[k]
let weightChange = x
weightsIH[k][i] = weightsIH[k][i] - weightChange
}
}
}
func random() -> Double {
return Double(arc4random_uniform(10)) / 10.0
}
mutating func initWeights() {
for j in 0..<numHidden {
weightsHO[j] = (random() - 0.5) / 2
for i in 0..<numInputs {
weightsIH[i][j] = (random() - 0.5) / 5
}
}
}
}
这里是训练和预测:
//training data
var trainInputs: [[Double]] = [
[4, 4, 4, 4, 4, 4, 4, 4],
[5, 5, 5, 5, 5, 5, 5, 5],
[1, 1, 1, 1, 1, 1, 1, 1],
[2, 2, 2, 2, 2, 2, 2, 2]
]
var evalInputs: [[Double]] = [
[4, 4, 1, 4, 4, 4, 4, 4],
[5, 5, 5, 5, 5, 1, 5, 5],
[1, 1, 2, 1, 1, 1, 1, 1],
[2, 2, 2, 2, 1, 2, 2, 2]
]
var trainOutput: [Double] = [1, 1, -1, -1]
var mlp = MLP(inputs: 8)
mlp.train(data: trainInputs, output: trainOutput)
for i in 0..<evalInputs.count {
let output = mlp.calcNet(data: evalInputs[i])
print("\(evalInputs[i]): \(output)")
}
我的输出是:
[4.0, 4.0, 1.0, 4.0, 4.0, 4.0, 4.0, 4.0]: -0.304727897387028
[5.0, 5.0, 5.0, 5.0, 5.0, 1.0, 5.0, 5.0]: -0.304727510449247
[1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0]: -0.305048245167421
[2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0]: -0.304792744713887
当我运行您的链接中给出的Java代码时,输入相同,它给出以下输出:
pat = 1 actual = 1.0 neural model = -0.3994365844031852
pat = 2 actual = 1.0 neural model = -0.39943658440228524
pat = 3 actual = -1.0 neural model = -0.3994075082237779
pat = 4 actual = -1.0 neural model = -0.3994365634276437