Question

我试图编写一个简单的神经网络，同时提高我的OOP技能。

main.cpp

#include <stdio.h>
#include <math.h>
#include <vector>
#include <unistd.h>

using namespace std;

#include "Neuron.h"
#include "fileio.h"
#include "helpers.h"

#define IMAGE_HEIGHT 28
#define IMAGE_WIDTH 28
#define L0SIZE IMAGE_HEIGHT*IMAGE_WIDTH
#define L1SIZE 30
#define L2SIZE 10

int main(){

    printf("ok %d, %d, %d, %d\n", sizeof(Neuron), sizeof(InputNeuron), sizeof(HiddenNeuron), sizeof(OutputNeuron));
    int labelVal = getNextLabel();
    
    vector<InputNeuron> inputLayer;
    createInputLayer(L0SIZE, &inputLayer);

    vector<HiddenNeuron> hiddenLayerOne;
    createHiddenLayer(L1SIZE, inputLayer, &hiddenLayerOne);
   

    vector<OutputNeuron> outputLayer;
    createOutputLayer(L2SIZE, hiddenLayerOne, &outputLayer);
    
    printf("added all neurons\n");
    //do the recursive backwards sweep through the NN to find the outputs
    for(int i = 0; i < outputLayer.size(); i++){
        printf("output %d, value %f\n", i, outputLayer[i].computeOutput());
    }

    return(0); 
}

Neuron.h

#ifndef NEURON_H
#define NEURON_H
#include <vector>

class Neuron{
    public: 
        Neuron();
        virtual float computeOutput();
        int _index;
        int _layer;
};

class InputNeuron: public Neuron{
    public:
        InputNeuron(int layer, int index, int _value);
        float computeOutput() override;
    private:
        float _value;
};

class HiddenNeuron: public Neuron{
    public:
        HiddenNeuron(){};
        HiddenNeuron(int layer, int index);
        float computeOutput() override;
        void addSynapse(Neuron* previousNeuron, float weight);

        void setBias(float b);
        float getBias(void);

        float getWeight(int index);
        void setWeight(int index, float w);

    protected:
        std::vector<float> weights;
        float bias = 0.0;
        int previousNeuronCount = 0;
        std::vector<Neuron*> previousNeurons;
};

class OutputNeuron: public HiddenNeuron{
    public:
        OutputNeuron(int layer, int index);
};  
#endif

Neuron.cpp

#include "Neuron.h"
#include <stdio.h>
#include <math.h>
#include <vector>

float sigmoid(float x){
    return(1.0/(1.0+exp(-x)));
}

//constructors
Neuron::Neuron(){
}

//overrides base(Neuron) constructor for the InputNeuron class
HiddenNeuron::HiddenNeuron(int layer, int index){
    _index = index;
    _layer = layer;
    previousNeuronCount = 0;
}

//overrides base(Neuron) constructor for the HiddenNeuron class
InputNeuron::InputNeuron(int layer, int index, int value){
    _index = index;
    _layer = layer;
    _value = value/255.0;
}

//overrides base(Neuron) constructor for the OutputNeuron class
OutputNeuron::OutputNeuron(int layer, int index){
    _index = index;
    _layer = layer;
    previousNeuronCount = 0;
}

//pure computeOutput function
float Neuron::computeOutput(){
    printf("Mega fucking warning, this is the virtual function, needs to be overloaded\n");
    return(0.0);
}

//overrides the computeOutput of the base Neuron class
float InputNeuron::computeOutput(){
    return(_value);
}

//overrides the computeOutput of the base Neuron class
float HiddenNeuron::computeOutput(){
    float sum = bias;
    printf("evaluating %d, %d\n", _layer, _index);
    for(int i = 1; i < weights.size(); i++){
        sum  += weights[i]*(previousNeurons[i]->computeOutput());
    }
    return(sigmoid(sum));
}

void HiddenNeuron::addSynapse(Neuron* previousNeuron, float weight){
    previousNeurons.push_back(previousNeuron);
    weights.push_back(weight);
    previousNeuronCount++;
}

void HiddenNeuron::setBias(float b){
    bias = b;
}

helpers.h

#ifndef HELPERS_H
#define HELPERS_H

#include "Neuron.h"

float randFloat();

void intToUnary(int, float*);

float MSE(float* a, float* b, int listSize);

void createInputLayer(int size, std::vector<InputNeuron>* thisLayer);

//for when you're attaching to an input layer
void createHiddenLayer(int size, std::vector<InputNeuron> prevLayer, std::vector<HiddenNeuron>* thisLayer);

void createOutputLayer(int size, std::vector<HiddenNeuron> prevLayer, std::vector<OutputNeuron>* thisLayer);

#endif

helpers.cpp

#include <math.h>
#include "helpers.h"
#include "fileio.h"


float randFloat(){
    return(static_cast <float> (rand()) / static_cast <float> (RAND_MAX) - 0.5);
}

//both lists must be of order given by listSize
float MSE(float* listA, float* listB, int listSize){
    float squareError = 0.0;
    for(int i = 0; i < listSize; i++){
        squareError += pow((listA[i] - listB[i]), 2);
    }
    return(squareError/listSize);
}

//wow vectors so cool B)
void createInputLayer(int size, std::vector<InputNeuron>* thisLayer){
    
    for(int i = 0; i < size; i++){
        thisLayer->push_back(InputNeuron(0, i, getNextPixel()));
    }
}

void createHiddenLayer(int size, std::vector<InputNeuron> prevLayer, std::vector<HiddenNeuron>* thisLayer){
    for(int i = 0; i < size; i++){
        HiddenNeuron h(1, i);
        h.setBias(randFloat());
        for (int j = 0; j < prevLayer.size(); j++){
            h.addSynapse(&prevLayer[j], randFloat());
        }
        thisLayer->push_back(h);
    }
}

void createOutputLayer(int size, std::vector<HiddenNeuron> prevLayer, std::vector<OutputNeuron>* thisLayer){
    for(int i = 0; i < size; i++){
        OutputNeuron h(2, i);
        h.setBias(randFloat());
        for (int j = 0; j < prevLayer.size(); j++){
            h.addSynapse(&prevLayer[j], randFloat());
        }
        thisLayer->push_back(h);
    }
}

为冗长的代码表示歉意，但由于不确定导致问题的原因，我不想删除任何内容！

基本上，我有一个基础Neuron类，inputNeuron，hiddenNeuron和outputNeuron类从中继承和扩展。总的来说，我使用辅助函数为网络的每一层创建和填充矢量，然后调用computeOutput函数以逐步通过网络并最终获得输出。它可以正确找到输出向量中的第一个元素，然后将其跟随指针指向隐藏层中的hiddenNeuron，然后尝试将指针跟随回到输入层。问题似乎是指向输入层的指针无效，然后出现堆栈溢出（第52行，Neuron.cpp）。我最初的想法是，当指向的对象属于派生类，因此具有不同的大小时，传递指向基类的指针可能不会胶凝。我在正确的轨道上吗？预先感谢！

编辑：只有在主循环内完成向量的构造和填充时，此设置才起作用，仅当将其移动到单独文件中的函数时才发生此堆栈溢出

关闭：我需要更改函数调用以使用按引用传递，因为我要从按值传递的数组中获取指针，该数组实际上是副本（因为它们是按值传递的）。谢谢大家：）

Answer 1

解析代码中的所有内容太困难了，但是在使用多态c ++代码时，您会犯一些严重的错误。向量是对象的向量，这意味着当您推回输入或输出神经元时，您实际上会做几件事：

1) Create a correct neuron
2) Run a copy constructor into a base class
3) Add that Base class

通过使不存在的computeOutput()函数纯粹是虚拟的来使基类抽象化（如果您不知道我在这里在说什么，请进行查找。这将使您的代码无法编译，但是将迫使您重组事物，这将消除许多错误。

但是，我不确定100％是否可以解决您遇到的特定错误。但这将使您走上正确的道路来解决许多问题。

P.S。您可能需要考虑使用智能指针，并将其放入向量中。只是一个想法。

Answer 2

调用createHiddenLayer和createOutputLayer时，将按值传递第二个参数prevLayer。

这意味着您将创建向量的副本。 真正糟糕的是是您从行中的该临时副本中获取地址

h.addSynapse(&prevLayer[j], randFloat());

和addSynapse将这些临时地址的副本放在previousNeurons

中

，并在稍后临时副本被破坏而导致未定义行为时使用这些指针。

在实践中，几乎应该永远不要按值传递vector（或任何其他容器）。即使您要进行复制，通常也应该无论如何都要复制到目标。

因此，您应该具有：

void createHiddenLayer(int size, 
    std::vector<InputNeuron> &prevLayer,    // Pass by reference here!
    std::vector<HiddenNeuron>* thisLayer);

和

void createOutputLayer(int size, 
    std::vector<HiddenNeuron> &prevLayer,   // Pass by reference here!
    std::vector<OutputNeuron>* thisLayer);

请注意，当您使用矢量中元素的地址时，您必须确保不会在该矢量中添加或删除项。确切的规则在文档中定义（请参见https://en.cppreference.com/w/cpp/container/vector中的迭代器无效）。

每个容器都有自己的规则。每次想要在修改容器时保留指针或迭代器时，都可以学习它们，也可以查阅文档。

如另一个答案中所述，您还可以使用vector的智能指针，如果您需要以某种方式修改源向量，这可能会很有用。

（C ++）为什么我会出现堆栈溢出？

2 个答案: