我正在尝试学习人工神经网络和C ++向量。
以下是我用C ++编写的代码(在Python 2.7中):
我使用MSYS2(C ++ 11)中的MinGW 7.2.0。
有一些" teste"在backpropagation方法中打印,这是问题的来源(我猜)。我还重载了运算符+, - 和*以使事情变得更容易。
namespace nn
class neuralnetwork
//total number of weights. useful to reserve memory
int numWeights;
//total number of biases. useful to reserve memory
int numBiases;
//total number of layers: 1 for input, n hidden layers and 1 for output
int numLayers;
//a vector to store the number of neurons in each layer: 0 index is about the input layer, last index is about the output layer
std::vector<int> sizes;
//stores all biases: num of neurons of layer 1 + ... + num of neurons of layer (numLayers - 1) (input layer has no bias)
std::vector<std::vector<double>> biases;
//stores all weights: (num of neurons of layer 1) x (num of neurons of layer ) + ... + ( num of neurons of layer (numLayers - 1) ) x ( num of neurons of layer (numLayers - 2) ) (input layer has no bias)
std::vector<std::vector<std::vector<double>>> weights;
//stores the output of each neuron of each layer
std::vector<std::vector<double>> layersOutput;
std::vector<std::vector<std::vector<double>>> derivativeWeights;
std::vector<std::vector<double>> derivativeBiases;
std::default_random_engine generator;
std::normal_distribution<double> distribution;
double randomNormalNumber(void);
double costDerivatives(const double&, const double&);
std::vector<double> costDerivatives(const std::vector<double> &, const std::vector<double> &);
void backPropagation(const std::vector<double>& neuralNetworkInputs, const std::vector<double>& expectedOutputs, // inputs
std::vector<std::vector<std::vector<double>>>& derivativeWeights, std::vector<std::vector<double>>& derivativeBiases); // outputs
void update_mini_batch( const std::vector<std::pair<std::vector<double>,std::vector<double>>> & mini_batch, double eta);
neuralnetwork(const std::vector<int>& sizes);
std::vector<double> feedforward(const std::vector<double>&);
std::vector<double> sigmoid(const std::vector<double> &);
double sigmoid(double);
std::vector<double> sigmoid_prime(const std::vector<double> &);
//double sigmoid_prime(double);
#include "neuralnetwork.h"
#include <iostream>
#include <assert.h>
#include <algorithm>
namespace nn
int counter = 0;
neuralnetwork::neuralnetwork(const std::vector<int> &sizes)
this->distribution = std::normal_distribution<double>( 0.0 , 1.0 );
this->numLayers = sizes.size();
this->sizes = sizes;
this->numWeights = 0;
this->numBiases = 0;
for ( int i = 1 ; i < this->numLayers ; i++ )
numWeights += this->sizes[ i ] * this->sizes[ i - 1 ];
numBiases += this->sizes[ i ];
this->weights.reserve( numWeights );
this->biases.reserve( numBiases );
this->derivativeWeights.reserve( numWeights );
this->derivativeBiases.reserve( numBiases );
this->layersOutput.reserve( this->sizes[ 0 ] + numBiases );
std::vector<double> auxVectorWeights;
std::vector<std::vector<double> > auxMatrixWeights;
std::vector<double> auxVectorBiases;
std::cout << "debugging!\n";
//just to accommodate the input layer with null biases and inputs (makes things easier to iterate and reading :D).
this->layersOutput.push_back( std::vector<double>( this->sizes[ 0 ] ) );
std::vector<std::vector<double>> matrixNothing( 0 );
this->weights.push_back( matrixNothing );
this->biases.push_back( std::vector<double>( 0 ) );
//since the second layer (index 1) because there is no weights (nor biases) for the neurons of the first layer
for ( int layer = 1 ; layer < this->numLayers ; layer++ )
//preallocate memory for the output of each layer.
layersOutput.push_back( std::vector<double>( this->sizes[ layer ] ) );
//-----------weights begin--------------
//auxMatrixWeights will store the weights connections between one layer (number of columns) and its subsequent layer (number of rows)
//auxMatrixWeights = new std::vector(this->sizes[layer], std::vector<double>( this->sizes[layer - 1] )); // it is not working...
//size[layer] stores the number of neurons on the layer
for ( int i = 0 ; i < this->sizes[ layer ] ; i++ )
//auxVectorWeights will have the size of the amount of wights necessary to connect the neuron i (from this layer) to neuron j (from next layer)
auxVectorWeights = std::vector<double>( this->sizes[ layer - 1 ] );
for ( int j = 0 ; j < auxVectorWeights.size() ; j++ )
auxVectorWeights[ j ] = this->randomNormalNumber();
auxMatrixWeights.push_back( auxVectorWeights );
this->weights.push_back( auxMatrixWeights );
//-----------weights end----------------
//-----------biases begin---------------
auxVectorBiases = std::vector<double>( this->sizes[ layer ] );
for ( int i = 0 ; i < auxVectorBiases.size() ; i++ )
auxVectorBiases[ i ] = this->randomNormalNumber();
this->biases.push_back( auxVectorBiases );
//-----------biases end-----------------
#ifdef _DEBUG
for ( int i = 0 ; i < this->weights.size() ; i++ )
std::cout << "layer " << i << "\n";
for ( int j = 0 ; j < this->weights[ i ].size() ; j++ )
std::cout << "neuron" << j << std::endl;
for ( const auto k : this->weights[ i ][ j ] )
std::cout << '\t' << k << ' ';
std::cout << std::endl;
template <class T>
inline int lastIndex(std::vector<T> vector , int tail)
return (vector.size() - tail);
double neuralnetwork::randomNormalNumber(void)
return this->distribution( this->generator );
double sigmoid(double z)
return 1.0 / ( 1.0 + exp( -z ) );
std::vector<double> sigmoid(const std::vector<double> & z)
int max = z.size();
std::vector<double> output;
for(int i=0;i<max;i++)
output[i] = 1.0 / ( 1.0 + exp( -z[i] ) );
return output;
/*double sigmoid_prime(double z)
return sigmoid( z ) * ( 1 - sigmoid( z ) );
std::vector<double> sigmoid_prime(const std::vector<double>& z)
int max = z.size();
std::vector<double> output;
for(int i=0;i<max;i++)
output.push_back(sigmoid( z[i] ) * ( 1 - sigmoid( z[i] ) ) );
return output;
//scalar times vector
std::vector<double> operator* (double a , const std::vector<double> & b)
int size = b.size();
std::vector<double> result(size);
for ( int i = 0 ; i < size ; i++ )
result[i] = a * b[ i ];
return result;
// inner product
std::vector<double> operator* (const std::vector<double> & a , const std::vector<double> & b)
#ifdef _DEBUG
assert(a.size() == b.size());
int size = a.size(); // or b.size(). they should have the same size.
std::vector<double> result;
result.reserve(size); // or b.size(). they should have the same size.
for ( int i = 0 ; i < size ; i++ )
result.push_back( a[ i ] * b[ i ] );
return result;
//matrix times columns vector
std::vector<double> operator* (const std::vector<std::vector<double>> & a , const std::vector<double> & b)
#ifdef _DEBUG
assert(a[0].size() == b.size());
for(int i = 0 ; i < ( lastIndex( a , 1 )) ; i++)
assert(a[i].size() == a[i+1].size());
int lines = a.size();
int columns = a[0].size();
std::vector<double> result;
int j = 0;
for ( int i = 0 ; i < lines ; i++ )
for(j = 0 ; j < columns ; j++)
result[i] += a[ i ][ j ] * b[ j ];
return result;
//scalar times matrix (calls scalar times vector)
std::vector<std::vector<double>> operator* (double a , const std::vector<std::vector<double>> & b)
#ifdef _DEBUG
for(int i = 0 ; i < b.size()-1 ; i++)
assert(b[i].size() == b[i+1].size());
int lines = b.size();
int columns = b[0].size();
std::vector<std::vector<double>> result;
int j = 0;
for ( int i = 0 ; i < lines ; i++ )
result.push_back(a * b[ j ]);
return result;
std::vector<double> operator+(const std::vector<double>& a, const std::vector<double>& b)
assert(a.size() == b.size());
int size = a.size();
std::vector<double> result;
for(int i = 0 ; i < size ; i++)
result[i] = a[i] + b[i];
return result;
//sum of matrices
std::vector<std::vector<double>> operator+(const std::vector<std::vector<double>>& a, const std::vector<std::vector<double>>& b)
#ifdef _DEBUG
assert(a.size() == b.size());
int size = a.size();
#ifdef _DEBUG
for(int i = 0 ; i < size ; i++)
assert(a[i].size() == b[i].size());
std::vector<std::vector<double>> result;
for(int i = 0 ; i < size ; i++)
result.push_back(a[i] + b[i]);
return result;
//subtraction of vectors
std::vector<double> operator-(const std::vector<double>& a, const std::vector<double>& b)
#ifdef _DEBUG
assert(a.size() == b.size());
int size = a.size();
std::vector<double> result;
for(int i = 0 ; i < size ; i++)
result[i] = a[i] - b[i];
return result;
//subtraction of matrices (calls subtraction of vectors)
std::vector<std::vector<double>> operator-(const std::vector<std::vector<double>>& a, const std::vector<std::vector<double>>& b)
#ifdef _DEBUG
assert(a.size() == b.size());
int size = a.size();
#ifdef _DEBUG
for(int i = 0 ; i < size ; i++)
assert(a[i].size() == b[i].size());
std::vector<std::vector<double>> result;
for(int i = 0 ; i < size ; i++)
result.push_back(a[i] - b[i]);
return result;
//elementwise division
std::vector<double> operator/(const std::vector<double>& a, const std::vector<double>& b)
assert(a.size() == b.size());
int size = a.size();
std::vector<double> result;
for(int i = 0 ; i < size ; i++)
throw std::runtime_error("Can't divide by zero!");
result[i] = a[i] / b[i];
return result;
double neuralnetwork::costDerivatives(const double &networkOutput , const double &expectedOutput)
return expectedOutput - networkOutput;
std::vector<double> neuralnetwork::costDerivatives(const std::vector<double> &networkOutput , const std::vector<double> &expectedOutput)
assert(expectedOutput.size() == networkOutput.size());
int size = networkOutput.size();
std::vector<double> output;
for(int i = 0 ; i < size ; i++)
output.push_back(networkOutput[i] - expectedOutput[i]);
return output;
void neuralnetwork::backPropagation(const std::vector<double> &neuralNetworkInputs , const std::vector<double> &expectedOutputs, // inputs
std::vector<std::vector<std::vector<double>>>& derivativeWeights , std::vector<std::vector<double>>& derivativeBiases) // outputs
std::cout << "teste "<< counter++ << std::endl;
derivativeWeights.reserve( sizes.size() - 1 );
derivativeBiases.reserve( sizes.size() - 1 );
//to store one activation layer
std::vector<double> activation = neuralNetworkInputs;
//to store each one of the activation layers
std::vector<std::vector<double>> activations;
activations.reserve(sizes.size()); // numBiases is the same as the number of neurons (except 1st layer)
int maxLayerSize = 0;
std::cout << "teste "<< counter++ << std::endl;
for ( int i = 1 ; i < numBiases ; i++ )
maxLayerSize = std::max(sizes[i], maxLayerSize);
std::cout << "teste "<< counter++ << std::endl;
// to store one weighted sum
std::vector<double> z;
// to store each one of the weighted sums
std::vector<std::vector<double>> zs;
// layer and neuron counter
int layer, neuron;
for ( layer = 1 ; layer < numLayers ; layer++ )
z = (weights[layer] * activation) + biases[layer];
activation = sigmoid(z);
std::cout << "teste "<< counter++ << std::endl;
std::vector<double> delta = costDerivatives(activations[ lastIndex( activations , 1 )] , expectedOutputs) * sigmoid_prime(z);
int j;
std::vector<std::vector<double>> dummyMatrix;
for (neuron = 0; neuron < sizes[ lastIndex( sizes , 1 )]; neuron++)
dummyMatrix.push_back(std::vector<double>(activations[ lastIndex( activations , 2 )].size()));
for (j = 0; j < activations[ lastIndex( activations , 2 )].size(); j++)
dummyMatrix[neuron][j] = delta[neuron] * activations[ lastIndex( activations , 2 )][j];
std::cout << "teste "<< counter++ << std::endl;
std::vector<double> sp;
std::vector<double> dummyVector;
double dummyDouble = 0;
for(layer = 2 ; layer < numLayers ; layer++)
z = zs[ lastIndex( zs , layer )];
sp = sigmoid_prime(z);
for(j = 0 ; j < sizes[ lastIndex( weights , layer )] ; j++)
for (neuron = 0; neuron < sizes[ lastIndex( sizes , layer - 1 )]; neuron++)
dummyDouble += weights[ lastIndex( weights , layer - 1 )][neuron][j] * delta[neuron];
dummyVector.push_back(dummyDouble * sp[j]);
dummyDouble = 0;
delta = dummyVector;
for (neuron = 0; neuron < sizes[ lastIndex( sizes , layer )]; neuron++)
dummyMatrix.push_back(std::vector<double>(sizes[ lastIndex( sizes , layer + 1 )]));
for (j = 0; j < sizes[ lastIndex( sizes , layer + 1 )]; j++)
dummyMatrix[neuron][j] = activations[ lastIndex( activations , layer + 1 )][j] * delta[neuron];
std::cout << "teste "<< counter++ << std::endl;
//both derivativeWeights and derivativeBiases are reversed. so let's reverse it.
std::cout << "teste "<< counter++ << std::endl;
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include "neuralnetwork.h"
#include <string>
void printAll(const std::vector<double> & v, const std::string & name)
int size = v.size();
std::cout << "\t" << name << ":\t";
for(int i = 0 ; i < size ; i++)
std::cout << v[i] << "\t";
std::cout << std::endl;
template<class T>
void printAll(const std::vector<T> & v, const std::string & name)
int size = v.size();
std::cout << name << ":" << std::endl;
for(int i = 0 ; i < size ; i++)
printAll(v[i], "\t" + ("[" + std::to_string(i)) + "]");
int main(int argc, char** argv )
nn::neuralnetwork n({2,4,3});
n.weights = {{},{{1,2},{3,4},{5,6},{7,8}} , {{9,8,7,6},{5,4,3,2},{1,2,3,4}}};
n.biases = {{},{1, 4, 6, 8} , {9, 2, 4}};
std::vector<std::vector<std::vector<double>>> derivativeWeights;
std::vector<std::vector<double>> derivativeBiases;
return 0;
另一个评论是你不必使用这个 - &gt;对于班上的每个成员,'this-&gt;'如果您不使用它,则假定为该类成员。
