我正在编写一个基于图的反向传播神经网络,作为一个个人项目。仍然在前进道具步骤。它汇编。成功运行一半的时间,在最后一步的一半时间内崩溃。它似乎在一些垃圾收集步骤上死亡。我是虚拟函数和static_cast的新手,所以我想知道这些部分是否应该受到指责。 GDB说"程序接收信号SIGABRT,Aborted。 __gnu_cxx中的0x00000000100404740 :: new_allocator :: deallocate(double *,unsigned long)()"
构成代码前半部分的函数可能不应该受到责备,因为它们在我的神经网络的更简单的旧版本中工作(没有图形)。我打赌它在某处的结构中。
更新:如果我使用123而不是基于时间的随机种子生成随机数生成,则每次都会运行。 seed = 124每次都失败。删除随机性以支持恒定权重也允许它每次运行。我感到困惑!
#include <bits/stdc++.h>
using namespace std;
#define p(x) cout << #x << " = "<< x<< endl
#define min(a,b) a<b ? a : b
typedef vector<double> d1;
typedef vector<d1> d2;
typedef vector<d2> d3;
typedef vector<int> i1;
int argmax(d1 x){
p(x.size());
int maxIndex=0;
double maxValue=x[0];
for (int i=1; i<x.size(); i++){
if (x[i] > maxValue){
maxValue = x[i];
maxIndex = i;
}
}
return maxIndex;
}
d1 zeros(int n){
return d1(n);
}
d2 zeros(int rows, int cols){
return d2(rows, d1(cols, 0));
}
d3 zeros(int x, int rows, int cols){
return d3(x, d2(rows, d1(cols, 0)));
}
void print(d1 x){
for (double d: x)
cout << d << endl;
cout << endl;
}
void print(d2 x){
for (auto row: x){
for (double d: row){
cout << d << " ";
}
cout << endl;
}
cout << endl;
}
void print(d3 x){
for (d2 X: x)
print(X);
}
void toRank2(d1&x, int rows, d2& y){
for (int i=0; i<x.size()/rows; i++){
y.emplace_back();
for (int row=0; row<rows; row++){
y[i].push_back(x[i*rows+row]);
}
}
}
void toRank3(d1& x, int rows, int cols, d3& y){
for (int i=0; i<x.size()/rows/cols; i++){
y.emplace_back();
for (int row=0; row<rows; row++){
y[i].emplace_back();
for (int col=0; col<cols; col++){
y[i][row].push_back(x[i*rows*cols+row*cols+col]);
}
}
}
}
d1 getRandomDoubles(int size, double mean=0, double standard_deviation=1){
static normal_distribution<double> distribution(mean, standard_deviation);
int seed=time(NULL);
static default_random_engine generator(seed);
d1 data(size);
generate(data.begin(), data.end(), []() { return distribution(generator); });
return data;
}
d2 getRandomDoubles(int rows, int cols, double mean=0, double standard_deviation=1){
d1 d = getRandomDoubles(rows*cols, mean, standard_deviation);
d2 e;
toRank2(d, cols, e);
return e;
}
d3 getRandomDoubles(int depth, int rows, int cols, double mean=0, double standard_deviation=1){
d1 d = getRandomDoubles(depth*rows*cols, mean, standard_deviation);;
d3 e;
toRank3(d, rows, cols, e);
return e;
}
struct Node{
vector<Node*> parents, children;
bool ready=false;
//
// bool check_ready(){
// for (Node* n: parents)
// if (!n->check_ready())
// return false;
// return true;
// }
//
void add_child(Node& n){
children.push_back(&n);
n.parents.push_back(this);
}
void forward_propagate(){
cout << "starting r2 forward" <<endl;
// if (parents.size()==0 || updated_parents == parents.size()-1)
for (Node* n: children){
cout << "loop" << endl;
n->update_state();
// cout << "root child forward" << endl;
}
cout << "exiting r2 forward" << endl;
//updated_parents++;
}
virtual void update_state(){
//if (parents.size()==0 || updated_parents == parents.size() - 1)
forward_propagate();
}
};
struct r1:Node{
vector<double> state;
int r;
r1(){}
r1(int R){
r=R;
state = vector<double>(r);
}
};
struct r2:Node{
vector<vector<double>> state;
int r,c;
r2(){}
r2(int R, int C){
r=R;
c=C;
state = zeros(r, c);
}
};
struct r3:Node{
d3 state;
int r, c, d;
r3(){}
r3(int R, int C, int D){
r=R;
c=C;
d=D;
state = zeros(R,C,D);
}
};
struct MatrixProduct1_1: r1{
MatrixProduct1_1(int n):r1(n){}
void update_state() override{
cout << "mat11" << endl;
d2& W = static_cast<r2*>(parents[0])->state;
d1& x = static_cast<r1*>(parents[1])->state;
state = zeros(r);
for (int i=0; i<W.size(); i++)
for (int j=0; j<W[0].size(); j++)
state[i] += W[i][j]*x[j];
forward_propagate();
}
};
struct MatrixProduct2_1: r1{
MatrixProduct2_1(int n):r1(n){}
void update_state() override{
cout << "matt21" << endl;
d3& W = static_cast<r3*>(parents[0])->state;
d2& x = static_cast<r2*>(parents[1])->state;
state = zeros(r);
for (int i=0; i<W.size(); i++)
for (int j=0; j<W[0].size(); j++)
for (int k=0; k<W[0][0].size(); k++)
state[k] += W[i][j][k]*x[i][j];
forward_propagate();
}
};
struct Convolution: r2{
Convolution(int r, int c): r2(r, c){}
void update_state() override{
cout << "convolving" << endl;
state = zeros(r, c);
d2& W = static_cast<r2*>(parents[0])->state;
d2& x = static_cast<r2*>(parents[1])->state;
int wCenterX = W[0].size() / 2;
int wCenterY = W.size() / 2;
int rows = x.size(), cols = x[0].size();
int wRows = W.size(), wCols = W[0].size();
//#pragma omp parallel for
for(int i=0; i < rows; i++)
for(int j=0; j < cols; j++)
for(int m=0; m < W.size(); m++){
int mm = W.size() - 1 - m;
for(int n=0; n < wCols; n++){
int nn = wCols - 1 - n;
int ii = i + (m - wCenterY);
int jj = j + (n - wCenterX);
if (ii >= 0 && ii < rows && jj >= 0 && jj < cols)
state[i][j] += x[ii][jj] * W[mm][nn];
}
}
forward_propagate();
}
};
struct RELU: r2{
RELU(int r, int c):r2(r, c){}
void update_state() override{
cout << "relu2" << endl;
state = zeros(r,c);
d2& x = static_cast<r2*>(parents[0])->state;
for (int i=0; i<state.size(); i++)
for (int j=0; j<state[0].size(); j++)
if (x[i][j] > 0)
state[i][j] = x[i][j];
forward_propagate();
}
};
struct Softmax: r1{
Softmax(int r):r1(r){}
void update_state() override{
cout << "softmax" << endl;
state = zeros(r);
p(parents.size());
d1& x = static_cast<r1*>(parents[0])->state;
cout << "got state" << endl;
//p(x.size());
//print(x);
p(x.size());
cout << "argmax " << argmax(x) << endl;
double largest = x[argmax(x)];
double lndenom = largest;
double expsum = 0;
cout << "starting expsum" << endl;
for (int i=0; i<x.size(); i++)
//expsum += exp(x[i]-largest);
expsum += x[i] - largest;
cout << "next loop " << endl;
for (int i=0; i<x.size(); i++)
// state[i] = exp(x[i]-largest) / expsum;
state[i] = x[i]-largest;
cout << "forward proping" << endl;
cout << "weird" << endl;
// forward_propagate();
cout << "done with softmax" <<endl;
}
};
struct Add1: r1{
Add1(int r):r1(r){}
void update_state() override{
cout << "add1ing" << endl;
d1& x = static_cast<r1*>(parents[0])->state;
d1& y = static_cast<r1*>(parents[1])->state;
for (int i=0; i<r; i++)
state[i] = x[i]+y[i];
forward_propagate();
}
};
struct Add2: r2{
Add2(int r, int c): r2(r, c){}
void update_state() override{
d2& x = static_cast<r2*>(parents[0])->state;
d2& y = static_cast<r2*>(parents[1])->state;
for (int i=0; i<x.size(); i++)
for (int j=0; j<x[0].size(); j++)
state[i][j] = x[i][j] + y[i][j];
forward_propagate();
}
};
struct MaxPool: r2{
MaxPool(int r, int c): r2(r, c){}
void update_state() override{
d2& x = static_cast<r2*>(parents[0])->state;
for (int i=0; i<x.size(); i+=2)
for (int j=0; j<x[0].size(); j+=2)
state[i/2][j/2] = max(max(x[i][j], x[i+1][j]), max(x[i+1][j], x[i+1][j+1]));
forward_propagate();
}
};
int main(){
Node root;
r2 x;
x.state = getRandomDoubles(28,28);
r2 wConv;
wConv.state = getRandomDoubles(10, 10);
root.add_child(x);
root.add_child(wConv);
Convolution c(28,28);
wConv.add_child(c);
x.add_child(c);
Add2 a(28,28);
r2 bConv(28,28);
bConv.state = getRandomDoubles(28,28);
c.add_child(a);
bConv.add_child(a);
RELU r(28,28);
a.add_child(r);
MaxPool max(14, 14);
r.add_child(max);
r3 wFull(10,28,28);
wFull.state = getRandomDoubles(10,28,28);
// print(wFull.state);
// return 0;
MatrixProduct2_1 full(10);
wFull.add_child(full);
max.add_child(full);
r1 bFull(10);
bFull.state = getRandomDoubles(10);
Add1 aFull(10);
aFull.state[0] = 123;
full.add_child(aFull);
bFull.add_child(aFull);
Softmax s(10);
aFull.add_child(s);
// d1& x = static_cast<r1*>(parents[0])->state;
// d1& asdf = static_cast<r1*>(s.parents[0])->state;
// print(asdf);
//root.forward_propagate();
x.forward_propagate();
//print(s.state);
cout << "returning main";
}
答案 0 :(得分:1)
static_cast
。这也不例外。你的节点真的应该知道他们的邻居有什么类型。
我无法立即发现具体问题,但我熟悉神经网络。像struct MatrixProduct1_1: r1
这样的代码几乎是一个红色警报。为什么它是一个结构,为什么它继承自r1
?在神经网络理论中,矩阵产品是您在两层节点之间表达完整连接的方式。然后,节点通常具有标量激活。
激活功能可以使用继承实现,但您可以从Node
继承。这意味着你不能拥有那些r1
.. r3
类型,但我还是不理解它们。
TLDR:这些类型搞砸了,你用static_cast
隐藏它,但只是让它编译,它不会使它正确。
答案 1 :(得分:0)
解决!该错误是由MatrixProd2_1中使用错误的索引引起的。我通过从神经网络结束时删除节点,确定一个是bug的来源,并输入有关vector :: operator []参数的断言来捕获它。我正在访问越界,导致未定义的行为。我不清楚为什么某些种子会运行(可能有些不正确的结果)。
我还在main中创建wFull时对形状进行了更改,并对MatrixProd2_1中使用的getRandomDoubles的参数进行了更改。全新版本:
#include <bits/stdc++.h>
using namespace std;
#define p(x) cout << #x << " = "<< x<< endl
//#define min(a,b) a<b ? a : b
typedef vector<double> d1;
typedef vector<d1> d2;
typedef vector<d2> d3;
typedef vector<int> i1;
int seed;
bool time_seed = true;
int argmax(d1 x){
p(x.size());
int maxIndex=0;
double maxValue=x[0];
for (int i=1; i<x.size(); i++){
if (x[i] > maxValue){
maxValue = x[i];
maxIndex = i;
}
}
return maxIndex;
}
d1 zeros(int n){
return d1(n);
}
d2 zeros(int rows, int cols){
return d2(rows, d1(cols, 0));
}
d3 zeros(int x, int rows, int cols){
return d3(x, d2(rows, d1(cols, 0)));
}
void print(d1 x){
for (double d: x)
cout << d << endl;
cout << endl;
}
void print(d2 x){
for (auto row: x){
for (double d: row){
cout << d << " ";
}
cout << endl;
}
cout << endl;
}
void print(d3 x){
for (d2 X: x)
print(X);
}
void toRank2(d1&x, int rows, d2& y){
for (int i=0; i<x.size()/rows; i++){
y.emplace_back();
for (int row=0; row<rows; row++){
y[i].push_back(x[i*rows+row]);
}
}
}
void toRank3(d1& x, int rows, int cols, d3& y){
for (int i=0; i<x.size()/rows/cols; i++){
y.emplace_back();
for (int row=0; row<rows; row++){
y[i].emplace_back();
for (int col=0; col<cols; col++){
y[i][row].push_back(x[i*rows*cols+row*cols+col]);
}
}
}
}
d1 getRandomDoubles(int size, double mean=1, double standard_deviation=1){
static normal_distribution<double> distribution(mean, standard_deviation);
if (time_seed)
seed=time(NULL);
//int seed=123; //123 works, 124 fails
static default_random_engine generator(seed);
d1 data(size);
generate(data.begin(), data.end(), []() { return distribution(generator); });
// generate(data.begin(), data.end(), [](){return -.1;});
return data;
}
d2 getRandomDoubles(int rows, int cols, double mean=0, double standard_deviation=1){
d1 d = getRandomDoubles(rows*cols, mean, standard_deviation);
d2 e;
toRank2(d, cols, e);
return e;
}
d3 getRandomDoubles(int depth, int rows, int cols, double mean=0, double standard_deviation=1){
d1 d = getRandomDoubles(depth*rows*cols, mean, standard_deviation);;
d3 e;
toRank3(d, rows, cols, e);
return e;
}
struct Node{
vector<Node*> parents, children;
bool ready=false;
//
// bool check_ready(){
// for (Node* n: parents)
// if (!n->check_ready())
// return false;
// return true;
// }
//
void add_child(Node& n){
children.push_back(&n);
n.parents.push_back(this);
}
void forward_propagate(){
cout << "starting r2 forward" <<endl;
// if (parents.size()==0 || updated_parents == parents.size()-1)
for (Node* n: children){
cout << "loop" << endl;
n->update_state();
// cout << "root child forward" << endl;
}
cout << "exiting r2 forward" << endl;
//updated_parents++;
}
virtual void update_state(){
//if (parents.size()==0 || updated_parents == parents.size() - 1)
forward_propagate();
}
};
struct r1:Node{
vector<double> state;
int r;
r1(){}
r1(int R){
r=R;
state = vector<double>(r);
}
};
struct r2:Node{
vector<vector<double>> state;
int r,c;
r2(){}
r2(int R, int C){
r=R;
c=C;
state = zeros(r, c);
}
};
struct r3:Node{
d3 state;
int r, c, d;
r3(){}
r3(int R, int C, int D){
r=R;
c=C;
d=D;
state = zeros(R,C,D);
}
};
struct MatrixProduct1_1: r1{
MatrixProduct1_1(int n):r1(n){}
void update_state() override{
cout << "mat11" << endl;
d2& W = static_cast<r2*>(parents[0])->state;
d1& x = static_cast<r1*>(parents[1])->state;
state = zeros(r);
for (int i=0; i<W.size(); i++)
for (int j=0; j<W[0].size(); j++)
state[i] += W[i][j]*x[j];
forward_propagate();
}
};
struct MatrixProduct2_1: r1{
MatrixProduct2_1(int n):r1(n){}
void update_state() override{
cout << "matt21" << endl;
d3& W = static_cast<r3*>(parents[0])->state;
d2& x = static_cast<r2*>(parents[1])->state;
p(x.size());
p(W.size());
p(x[0].size());
p(W[0].size());
p(W[0][0].size());
p(state.size());
// assert (x.size()==W.size());
// assert (x[0].size()==W[0].size());
// assert (state.size()==W[0][0].size());
assert (state.size() == W.size());
state = zeros(r);
for (int i=0; i<W.size(); i++)
for (int j=0; j<W[0].size(); j++)
for (int k=0; k<W[0][0].size(); k++)
state[i] += W[i][j][k]*x[j][k];
forward_propagate();
}
};
struct Convolution: r2{
Convolution(int r, int c): r2(r, c){}
void update_state() override{
cout << "convolving" << endl;
state = zeros(r, c);
d2& W = static_cast<r2*>(parents[0])->state;
d2& x = static_cast<r2*>(parents[1])->state;
int wCenterX = W[0].size() / 2;
int wCenterY = W.size() / 2;
int rows = x.size(), cols = x[0].size();
int wRows = W.size(), wCols = W[0].size();
//#pragma omp parallel for
for(int i=0; i < rows; i++)
for(int j=0; j < cols; j++)
for(int m=0; m < W.size(); m++){
int mm = W.size() - 1 - m;
for(int n=0; n < wCols; n++){
int nn = wCols - 1 - n;
int ii = i + (m - wCenterY);
int jj = j + (n - wCenterX);
if (ii >= 0 && ii < rows && jj >= 0 && jj < cols)
state[i][j] += x[ii][jj] * W[mm][nn];
}
}
forward_propagate();
}
};
struct RELU: r2{
RELU(int r, int c):r2(r, c){}
void update_state() override{
cout << "relu2" << endl;
state = zeros(r,c);
d2& x = static_cast<r2*>(parents[0])->state;
for (int i=0; i<state.size(); i++)
for (int j=0; j<state[0].size(); j++)
if (x[i][j] > 0)
state[i][j] = x[i][j];
forward_propagate();
}
};
struct Softmax: r1{
Softmax(int r):r1(r){}
void update_state() override{
cout << "softmax" << endl;
state = zeros(r);
p(parents.size());
d1& x = static_cast<r1*>(parents[0])->state;
cout << "got state" << endl;
//p(x.size());
//print(x);
p(x.size());
cout << "argmax " << argmax(x) << endl;
double largest = x[argmax(x)];
double lndenom = largest;
double expsum = 0;
cout << "starting expsum" << endl;
for (int i=0; i<x.size(); i++)
expsum += exp(x[i]-largest);
//expsum += x[i] - largest;
cout << "next loop " << endl;
for (int i=0; i<x.size(); i++)
state[i] = exp(x[i]-largest) / expsum;
//state[i] = x[i]-largest;
// state[i] = 3;
cout << "forward proping" << endl;
cout << "weird" << endl;
forward_propagate();
cout << "done with softmax" <<endl;
}
};
struct Add1: r1{
Add1(int r):r1(r){}
void update_state() override{
cout << "add1ing" << endl;
d1& x = static_cast<r1*>(parents[0])->state;
d1& y = static_cast<r1*>(parents[1])->state;
for (int i=0; i<r; i++)
state[i] = x[i]+y[i];
forward_propagate();
}
};
struct Add2: r2{
Add2(int r, int c): r2(r, c){}
void update_state() override{
d2& x = static_cast<r2*>(parents[0])->state;
d2& y = static_cast<r2*>(parents[1])->state;
for (int i=0; i<x.size(); i++)
for (int j=0; j<x[0].size(); j++)
state[i][j] = x[i][j] + y[i][j];
forward_propagate();
}
};
struct MaxPool: r2{
MaxPool(int r, int c): r2(r, c){}
void update_state() override{
d2& x = static_cast<r2*>(parents[0])->state;
for (int i=0; i<x.size(); i+=2)
for (int j=0; j<x[0].size(); j+=2)
state[i/2][j/2] = max(max(x[i][j], x[i+1][j]), max(x[i+1][j], x[i+1][j+1]));
forward_propagate();
}
};
int main( int argc, char *argv[] ){
if (argc>1){
seed = atoi(argv[1]);
time_seed = false;
}
Node root;
r2 x;
x.state = getRandomDoubles(28,28);
//x.state[0][0]-=1000;
r2 wConv;
wConv.state = getRandomDoubles(10, 10);
root.add_child(x);
root.add_child(wConv);
Convolution c(28,28);
wConv.add_child(c);
x.add_child(c);
Add2 a(28,28);
r2 bConv(28,28);
bConv.state = getRandomDoubles(28,28);
c.add_child(a);
bConv.add_child(a);
RELU r(28,28);
a.add_child(r);
MaxPool max(14, 14);
r.add_child(max);
// print(max.state);
r3 wFull(10,14,14);
wFull.state = getRandomDoubles(10,14,14);
//print(wFull.state);
// return 0;
MatrixProduct2_1 full(10);
wFull.add_child(full);
max.add_child(full);
//print(full.state); //suspiciously zero
r1 bFull(10);
bFull.state = getRandomDoubles(10);
Add1 aFull(10);
aFull.state[0] = 123;
full.add_child(aFull);
bFull.add_child(aFull);
Softmax s(10);
aFull.add_child(s);
// d1& x = static_cast<r1*>(parents[0])->state;
// d1& asdf = static_cast<r1*>(s.parents[0])->state;
// print(asdf);
//root.forward_propagate();
x.forward_propagate();
//print(aFull.state);
print(s.state);
cout << "returning main";
}