我想输入3个数字(可能是1.0或0.0),然后输入神经网络以根据我的数组预测输出。我找不到问题所在,我尝试了很多尝试,但没有任何效果。我更改了学习率和某些功能,但情况变得更糟。这是错误最少的代码。感谢您的提前帮助。
#include <iostream>
#include <math.h>
#include <time.h>
这里是功能。
double sigmoid(double x) {
return 1 / (1 + exp(x));
}
double randfrom(double min, double max)
{
double range = (max - min);
double div = RAND_MAX / range;
return min + (rand() / div);
}
int randfrom(int min, int max)
{
int range = (max - min);
int div = RAND_MAX / range;
return min + (rand() / div);
}
int main() {
这里是变量。
int x=0;
double a, m, c,k;
double w1;
double w2;
double w3;
double w4;
double w5;
double w6;
double w7;
double w8;
double b1;
double b2;
double b3;
double target;
double z1;
double z2;
double ze;
double pred1;
double pred2;
double prede;
double cost1;
double cost2;
double coste;
double dcost_dpred1, dcost_dpred2, dcost_dprede;
double dpred_dz1, dpred_dz2,dpred_dze;
double dz_dw1, dz_dw2, dz_dw3, dz_dw4, dz_dw5, dz_dw6, dz_dw7, dz_dw8;
double dz_db1,dz_db2, dz_db3;
double dcost_dw1, dcost_dw2, dcost_dw3, dcost_dw4, dcost_dw5, dcost_dw6,
dcost_dw7, dcost_dw8;
double dcost_db1, dcost_db2,dcost_db3;
double learning_rate = 0.1;
double a1[8][4] = { 0.0, 0.0, 0.0, 1.0,
0.0, 0.0, 1.0, 0.0,
0.0, 1.0, 0.0, 1.0,
0.0, 1.0, 1.0, 0.0,
1.0, 0.0, 0.0, 0.0,
1.0, 0.0, 1.0, 1.0,
1.0, 1.0, 0.0, 0.0,
1.0, 1.0, 1.0, 1.0 };//The first 3 numbers in each row are the inputs and the target is the fourth.
权衡并偏重初始化。
srand(time(NULL));
w1= randfrom(0.1, 0.9);
w2 = randfrom(0.1, 0.9);
w3 = randfrom(0.1, 0.9);
w4 = randfrom(0.1, 0.9);
w5 = randfrom(0.1, 0.9);
w6 = randfrom(0.1, 0.9);
w7 = randfrom(0.1, 0.9);
w8 = randfrom(0.1, 0.9);
b1 = randfrom(0.1, 0.9);
b2 = randfrom(0.1, 0.9);
b3 = randfrom(0.1, 0.9);
这是训练循环。
for (int i = 0; i < 500000; i++) {
target = a1[x][3];
z1 = w1 * a1[x][0] + w3 * a1[x][1] + w5 * a1[x][2] + b1;
z2 = w2 * a1[x][0] + w4 * a1[x][1] + w6 * a1[x][2] + b2;
ze = w7 * z1 + w8 * z2 + b3;
pred1 = sigmoid(z1);
pred2 = sigmoid(z2);
prede = sigmoid(ze);
cost1 = (pred1 - target)*(pred1-target);
cost2 = (pred2 - target)*(pred2 - target);
coste = (prede - target)*(prede - target);
dcost_dpred1 = 2.0 * (pred1 - target);
dcost_dpred2 = 2.0 * (pred2 - target);
dcost_dprede = 2.0 * (prede - target);
dpred_dz1 = sigmoid(z1)*(1 - sigmoid(z1));
dpred_dz2 = sigmoid(z2)*(1 - sigmoid(z2));
dpred_dze = sigmoid(ze)*(1 - sigmoid(ze));
dz_dw1 = a1[x][0];
dz_dw2 = a1[x][0];
dz_dw3 = a1[x][1];
dz_dw4 = a1[x][1];
dz_dw5 = a1[x][2];
dz_dw6 = a1[x][2];
dz_dw7 = z1;
dz_dw8 = z2;
dz_db1 = 1.0;
dz_db2 = 1.0;
dz_db3 = 1.0;
dcost_dw1 = dcost_dpred1 * dpred_dz1 * dz_dw1;
dcost_dw2 = dcost_dpred2 * dpred_dz2 * dz_dw2;
dcost_dw3 = dcost_dpred1 * dpred_dz1 * dz_dw3;
dcost_dw4 = dcost_dpred2 * dpred_dz2 * dz_dw4;
dcost_dw5 = dcost_dpred1 * dpred_dz1 * dz_dw5;
dcost_dw6 = dcost_dpred2 * dpred_dz2 * dz_dw6;
dcost_dw7 = dcost_dprede * dpred_dze * dz_dw7;
dcost_dw8 = dcost_dprede * dpred_dze * dz_dw8;
dcost_db1 = dcost_dpred1 * dpred_dz1 * dz_db1;
dcost_db2 = dcost_dpred2 * dpred_dz2 * dz_db2;
dcost_db3 = dcost_dprede * dpred_dze * dz_db3;
w1 += learning_rate * dcost_dw1;
w2 += learning_rate * dcost_dw2;
w3 += learning_rate * dcost_dw3;
w4 += learning_rate * dcost_dw4;
w5 += learning_rate * dcost_dw5;
w6 += learning_rate * dcost_dw6;
w7 += learning_rate * dcost_dw7;
w8 += learning_rate * dcost_dw8;
b1 += learning_rate * dcost_db1;
b2 += learning_rate * dcost_db2;
b3 += learning_rate * dcost_db3;
if (x < 7)
{
x++;
}
else if (x == 7)
{
x = 0;
}
}
我在此处添加输入以查看程序是否正常工作。
double l,j,o;
for (int i = 0; i < 9; i++) {
std::cout << "Enter first number:";
std::cin >> a;
std::cout << std::endl;
std::cout << "Enter second number:";
std::cin >> m;
std::cout << std::endl;
std::cout << "Enter third number:";
std::cin >> c;
std::cout << std::endl;
l = w1 * a + w3 * m + w5 * c + b1;
j = w2 * a + w4 * m + w6 * c + b2;
o = w7 * l + w8 * j + b3;
std::cout << "The prediction is:" << sigmoid(o)<<std::endl;
}
std::cin >> k;
}
答案 0 :(得分:1)
通常,S形等于1 /(1 + exp(-x))而不是1 /(1 + exp(x))。 我在您的程序中尝试了此修改,同时更改了权重更新的符号。但是,我得到的结果与初始程序大致相同。
此外,您要使用网络建模的公式是“ 1 xor a [0] xor a [2]”。我对神经网络和异或建模进行了快速研究,发现为此目的需要一个隐藏层。
您是否尝试过使用隐藏层和“经典” S型对它进行建模?
很抱歉没有提出真正的解决方案。我猜应该是一条评论。
但是,我不允许提供评论。希望能帮助到你。
编辑:我在您的代码中发现了另外两个问题:
在训练循环中,ze的计算应使用pred1(sigmoid(z1))和pred2代替z1和z2
以同样的方式,在main()的末尾,为了测试NN输出,计算o应该使用sigmoid(l)和sigmoid(j)代替l和j
< / li>但是,该程序仍然无法运行。该程序中仍然缺少隐藏层。