我只是不知道该怎么办...
该功能在调试中运行良好,但在发行版中运行不佳。
我正在尝试学习人工神经网络和C ++向量。
这是我用C ++编写的代码(在Python 2.7中):
http://neuralnetworksanddeeplearning.com/chap1.html#exercise_852508
(只需稍微滚动即可到达)
我正在使用MSYS2(C ++ 11)中的MinGW 7.2.0。
反向传播方法中有一些“ teste”打印,这就是问题所在(我想)。我还重载了运算符+,-和*以使事情变得更容易。
我知道有一些类似Armadillo的库可以使事情变得更轻松,但是我真的想用这个问题来学习更好。
这是文件:
神经网络
(我公开了所有内容,使事情看起来更容易看)
#define MIN_NUMBER_TOLERANCE 1e-8
namespace nn
{
class neuralnetwork
{
//private:
public:
//total number of weights. useful to reserve memory
int numWeights;
//total number of biases. useful to reserve memory
int numBiases;
//total number of layers: 1 for input, n hidden layers and 1 for output
int numLayers;
//a vector to store the number of neurons in each layer: 0 index is about the input layer, last index is about the output layer
std::vector<int> sizes;
//stores all biases: num of neurons of layer 1 + ... + num of neurons of layer (numLayers - 1) (input layer has no bias)
std::vector<std::vector<double>> biases;
//stores all weights: (num of neurons of layer 1) x (num of neurons of layer ) + ... + ( num of neurons of layer (numLayers - 1) ) x ( num of neurons of layer (numLayers - 2) ) (input layer has no bias)
std::vector<std::vector<std::vector<double>>> weights;
//stores the output of each neuron of each layer
std::vector<std::vector<double>> layersOutput;
std::vector<std::vector<std::vector<double>>> derivativeWeights;
std::vector<std::vector<double>> derivativeBiases;
std::default_random_engine generator;
std::normal_distribution<double> distribution;
double randomNormalNumber(void);
double costDerivatives(const double&, const double&);
std::vector<double> costDerivatives(const std::vector<double> &, const std::vector<double> &);
void backPropagation(const std::vector<double>& neuralNetworkInputs, const std::vector<double>& expectedOutputs, // inputs
std::vector<std::vector<std::vector<double>>>& derivativeWeights, std::vector<std::vector<double>>& derivativeBiases); // outputs
void update_mini_batch( const std::vector<std::pair<std::vector<double>,std::vector<double>>> & mini_batch, double eta);
//public:
neuralnetwork(const std::vector<int>& sizes);
std::vector<double> feedforward(const std::vector<double>&);
};
std::vector<double> sigmoid(const std::vector<double> &);
double sigmoid(double);
std::vector<double> sigmoid_prime(const std::vector<double> &);
//double sigmoid_prime(double);
}
神经网络
#include "neuralnetwork.h"
#include <iostream>
#include <assert.h>
#include <algorithm>
namespace nn
{
int counter = 0;
neuralnetwork::neuralnetwork(const std::vector<int> &sizes)
{
this->distribution = std::normal_distribution<double>( 0.0 , 1.0 );
this->numLayers = sizes.size();
this->sizes = sizes;
this->numWeights = 0;
this->numBiases = 0;
for ( int i = 1 ; i < this->numLayers ; i++ )
{
numWeights += this->sizes[ i ] * this->sizes[ i - 1 ];
numBiases += this->sizes[ i ];
}
this->weights.reserve( numWeights );
this->biases.reserve( numBiases );
this->derivativeWeights.reserve( numWeights );
this->derivativeBiases.reserve( numBiases );
this->layersOutput.reserve( this->sizes[ 0 ] + numBiases );
std::vector<double> auxVectorWeights;
std::vector<std::vector<double> > auxMatrixWeights;
std::vector<double> auxVectorBiases;
#ifdef DEBUG_BUILD
std::cout << "debugging!\n";
#endif
//just to accommodate the input layer with null biases and inputs (makes things easier to iterate and reading :D).
this->layersOutput.push_back( std::vector<double>( this->sizes[ 0 ] ) );
std::vector<std::vector<double>> matrixNothing( 0 );
this->weights.push_back( matrixNothing );
this->biases.push_back( std::vector<double>( 0 ) );
//since the second layer (index 1) because there is no weights (nor biases) for the neurons of the first layer
for ( int layer = 1 ; layer < this->numLayers ; layer++ )
{
//preallocate memory for the output of each layer.
layersOutput.push_back( std::vector<double>( this->sizes[ layer ] ) );
//-----------weights begin--------------
//auxMatrixWeights will store the weights connections between one layer (number of columns) and its subsequent layer (number of rows)
//auxMatrixWeights = new std::vector(this->sizes[layer], std::vector<double>( this->sizes[layer - 1] )); // it is not working...
//size[layer] stores the number of neurons on the layer
for ( int i = 0 ; i < this->sizes[ layer ] ; i++ )
{
//auxVectorWeights will have the size of the amount of wights necessary to connect the neuron i (from this layer) to neuron j (from next layer)
auxVectorWeights = std::vector<double>( this->sizes[ layer - 1 ] );
for ( int j = 0 ; j < auxVectorWeights.size() ; j++ )
{
auxVectorWeights[ j ] = this->randomNormalNumber();
}
auxMatrixWeights.push_back( auxVectorWeights );
}
this->weights.push_back( auxMatrixWeights );
auxMatrixWeights.clear();
//-----------weights end----------------
//-----------biases begin---------------
auxVectorBiases = std::vector<double>( this->sizes[ layer ] );
for ( int i = 0 ; i < auxVectorBiases.size() ; i++ )
{
auxVectorBiases[ i ] = this->randomNormalNumber();
}
this->biases.push_back( auxVectorBiases );
//-----------biases end-----------------
}
#ifdef _DEBUG
for ( int i = 0 ; i < this->weights.size() ; i++ )
{
std::cout << "layer " << i << "\n";
for ( int j = 0 ; j < this->weights[ i ].size() ; j++ )
{
std::cout << "neuron" << j << std::endl;
for ( const auto k : this->weights[ i ][ j ] )
{
std::cout << '\t' << k << ' ';
}
std::cout << std::endl;
}
}
#endif
}
template <class T>
inline int lastIndex(std::vector<T> vector , int tail)
{
return (vector.size() - tail);
}
double neuralnetwork::randomNormalNumber(void)
{
return this->distribution( this->generator );
}
double sigmoid(double z)
{
return 1.0 / ( 1.0 + exp( -z ) );
}
std::vector<double> sigmoid(const std::vector<double> & z)
{
int max = z.size();
std::vector<double> output;
output.reserve(max);
for(int i=0;i<max;i++)
{
output.push_back(0);
output[i] = 1.0 / ( 1.0 + exp( -z[i] ) );
}
return output;
}
/*double sigmoid_prime(double z)
{
return sigmoid( z ) * ( 1 - sigmoid( z ) );
}*/
std::vector<double> sigmoid_prime(const std::vector<double>& z)
{
int max = z.size();
std::vector<double> output;
output.reserve(max);
for(int i=0;i<max;i++)
{
output.push_back(sigmoid( z[i] ) * ( 1 - sigmoid( z[i] ) ) );
}
return output;
}
//scalar times vector
std::vector<double> operator* (double a , const std::vector<double> & b)
{
int size = b.size();
std::vector<double> result(size);
for ( int i = 0 ; i < size ; i++ )
{
result[i] = a * b[ i ];
}
return result;
}
// inner product
std::vector<double> operator* (const std::vector<double> & a , const std::vector<double> & b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif
int size = a.size(); // or b.size(). they should have the same size.
std::vector<double> result;
result.reserve(size); // or b.size(). they should have the same size.
for ( int i = 0 ; i < size ; i++ )
{
result.push_back( a[ i ] * b[ i ] );
}
return result;
}
//matrix times columns vector
std::vector<double> operator* (const std::vector<std::vector<double>> & a , const std::vector<double> & b)
{
#ifdef _DEBUG
assert(a[0].size() == b.size());
for(int i = 0 ; i < ( lastIndex( a , 1 )) ; i++)
{
assert(a[i].size() == a[i+1].size());
}
#endif
int lines = a.size();
int columns = a[0].size();
std::vector<double> result;
result.reserve(lines);
int j = 0;
for ( int i = 0 ; i < lines ; i++ )
{
result.push_back(0);
for(j = 0 ; j < columns ; j++)
{
result[i] += a[ i ][ j ] * b[ j ];
}
}
return result;
}
//scalar times matrix (calls scalar times vector)
std::vector<std::vector<double>> operator* (double a , const std::vector<std::vector<double>> & b)
{
#ifdef _DEBUG
for(int i = 0 ; i < b.size()-1 ; i++)
{
assert(b[i].size() == b[i+1].size());
}
#endif
int lines = b.size();
int columns = b[0].size();
std::vector<std::vector<double>> result;
int j = 0;
for ( int i = 0 ; i < lines ; i++ )
{
result.push_back(a * b[ j ]);
}
return result;
}
std::vector<double> operator+(const std::vector<double>& a, const std::vector<double>& b)
{
assert(a.size() == b.size());
int size = a.size();
std::vector<double> result;
result.reserve(size);
for(int i = 0 ; i < size ; i++)
{
result.push_back(0);
result[i] = a[i] + b[i];
}
return result;
}
//sum of matrices
std::vector<std::vector<double>> operator+(const std::vector<std::vector<double>>& a, const std::vector<std::vector<double>>& b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif
int size = a.size();
#ifdef _DEBUG
for(int i = 0 ; i < size ; i++)
{
assert(a[i].size() == b[i].size());
}
#endif
std::vector<std::vector<double>> result;
result.resize(size);
for(int i = 0 ; i < size ; i++)
{
result.push_back(a[i] + b[i]);
}
return result;
}
//subtraction of vectors
std::vector<double> operator-(const std::vector<double>& a, const std::vector<double>& b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif
int size = a.size();
std::vector<double> result;
result.resize(size);
for(int i = 0 ; i < size ; i++)
{
result[i] = a[i] - b[i];
}
return result;
}
//subtraction of matrices (calls subtraction of vectors)
std::vector<std::vector<double>> operator-(const std::vector<std::vector<double>>& a, const std::vector<std::vector<double>>& b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif
int size = a.size();
#ifdef _DEBUG
for(int i = 0 ; i < size ; i++)
{
assert(a[i].size() == b[i].size());
}
#endif
std::vector<std::vector<double>> result;
result.resize(size);
for(int i = 0 ; i < size ; i++)
{
result.push_back(a[i] - b[i]);
}
return result;
}
//elementwise division
std::vector<double> operator/(const std::vector<double>& a, const std::vector<double>& b)
{
assert(a.size() == b.size());
int size = a.size();
std::vector<double> result;
result.reserve(size);
for(int i = 0 ; i < size ; i++)
{
if(b[i] < MIN_NUMBER_TOLERANCE)
{
throw std::runtime_error("Can't divide by zero!");
}
result[i] = a[i] / b[i];
}
return result;
}
double neuralnetwork::costDerivatives(const double &networkOutput , const double &expectedOutput)
{
return expectedOutput - networkOutput;
}
std::vector<double> neuralnetwork::costDerivatives(const std::vector<double> &networkOutput , const std::vector<double> &expectedOutput)
{
assert(expectedOutput.size() == networkOutput.size());
int size = networkOutput.size();
std::vector<double> output;
output.reserve(size);
for(int i = 0 ; i < size ; i++)
{
output.push_back(networkOutput[i] - expectedOutput[i]);
}
return output;
}
void neuralnetwork::backPropagation(const std::vector<double> &neuralNetworkInputs , const std::vector<double> &expectedOutputs, // inputs
std::vector<std::vector<std::vector<double>>>& derivativeWeights , std::vector<std::vector<double>>& derivativeBiases) // outputs
{
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
derivativeWeights.reserve( sizes.size() - 1 );
derivativeBiases.reserve( sizes.size() - 1 );
//to store one activation layer
std::vector<double> activation = neuralNetworkInputs;
//to store each one of the activation layers
std::vector<std::vector<double>> activations;
activations.reserve(sizes.size()); // numBiases is the same as the number of neurons (except 1st layer)
activations.push_back(activation);
int maxLayerSize = 0;
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
for ( int i = 1 ; i < numBiases ; i++ )
{
maxLayerSize = std::max(sizes[i], maxLayerSize);
}
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
// to store one weighted sum
std::vector<double> z;
z.reserve(maxLayerSize);
// to store each one of the weighted sums
std::vector<std::vector<double>> zs;
zs.reserve(sizes.size());
// layer and neuron counter
int layer, neuron;
for ( layer = 1 ; layer < numLayers ; layer++ )
{
z = (weights[layer] * activation) + biases[layer];
zs.push_back(z);
activation = sigmoid(z);
activations.push_back(activation);
}
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
std::vector<double> delta = costDerivatives(activations[ lastIndex( activations , 1 )] , expectedOutputs) * sigmoid_prime(z);
delta.reserve(maxLayerSize);
derivativeBiases.push_back(delta);
int j;
std::vector<std::vector<double>> dummyMatrix;
dummyMatrix.reserve(maxLayerSize);
for (neuron = 0; neuron < sizes[ lastIndex( sizes , 1 )]; neuron++)
{
dummyMatrix.push_back(std::vector<double>(activations[ lastIndex( activations , 2 )].size()));
for (j = 0; j < activations[ lastIndex( activations , 2 )].size(); j++)
{
dummyMatrix[neuron][j] = delta[neuron] * activations[ lastIndex( activations , 2 )][j];
}
}
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
derivativeWeights.push_back(dummyMatrix);
dummyMatrix.clear();
std::vector<double> sp;
sp.reserve(maxLayerSize);
std::vector<double> dummyVector;
dummyVector.reserve(maxLayerSize);
double dummyDouble = 0;
for(layer = 2 ; layer < numLayers ; layer++)
{
z = zs[ lastIndex( zs , layer )];
sp = sigmoid_prime(z);
for(j = 0 ; j < sizes[ lastIndex( weights , layer )] ; j++)
{
for (neuron = 0; neuron < sizes[ lastIndex( sizes , layer - 1 )]; neuron++)
{
dummyDouble += weights[ lastIndex( weights , layer - 1 )][neuron][j] * delta[neuron];
}
dummyVector.push_back(dummyDouble * sp[j]);
dummyDouble = 0;
}
delta = dummyVector;
dummyVector.clear();
derivativeBiases.push_back(delta);
for (neuron = 0; neuron < sizes[ lastIndex( sizes , layer )]; neuron++)
{
dummyMatrix.push_back(std::vector<double>(sizes[ lastIndex( sizes , layer + 1 )]));
for (j = 0; j < sizes[ lastIndex( sizes , layer + 1 )]; j++)
{
dummyMatrix[neuron][j] = activations[ lastIndex( activations , layer + 1 )][j] * delta[neuron];
}
}
derivativeWeights.push_back(dummyMatrix);
dummyMatrix.clear();
}
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
//both derivativeWeights and derivativeBiases are reversed. so let's reverse it.
std::reverse(derivativeWeights.begin(),derivativeWeights.end());
std::reverse(derivativeBiases.begin(),derivativeBiases.end());
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
}
}
main.cpp
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include "neuralnetwork.h"
#include <string>
void printAll(const std::vector<double> & v, const std::string & name)
{
int size = v.size();
std::cout << "\t" << name << ":\t";
for(int i = 0 ; i < size ; i++)
{
std::cout << v[i] << "\t";
}
std::cout << std::endl;
}
template<class T>
void printAll(const std::vector<T> & v, const std::string & name)
{
int size = v.size();
std::cout << name << ":" << std::endl;
for(int i = 0 ; i < size ; i++)
{
printAll(v[i], "\t" + ("[" + std::to_string(i)) + "]");
}
}
int main(int argc, char** argv )
{
nn::neuralnetwork n({2,4,3});
n.weights = {{},{{1,2},{3,4},{5,6},{7,8}} , {{9,8,7,6},{5,4,3,2},{1,2,3,4}}};
n.biases = {{},{1, 4, 6, 8} , {9, 2, 4}};
printAll(n.weights,"weights");
printAll(n.biases,"biases");
std::vector<std::vector<std::vector<double>>> derivativeWeights;
std::vector<std::vector<double>> derivativeBiases;
n.backPropagation({1,2},{1,2,3},derivativeWeights,derivativeBiases);
printAll(n.derivativeWeights,"derivativeWeights");
printAll(n.derivativeBiases,"derivativeBiases");
system("PAUSE");
return 0;
}
最佳答案
看来您的问题是您只为构造函数中的向量保留内存,而不分配它。
reserve方法不会调整向量的大小,它是性能优化,如果您知道将来会调整向量的大小,但是优化的编译器可以忽略它。
在此特定代码中,这不会对“权重”和“偏见”造成任何问题,因为您正在使用适当大小的向量对其进行初始化,而这些向量的确将其设置为正确的大小。问题在于导数权重和导数偏差,您在其中为向量保留了内存,但实际上从未调整过它们的大小。如果您尝试取消引用该内存,则可能使该内存无效。您可以使用调整大小而不是保留大小,或者一一推回元素,这也会调整向量的大小。
另一条评论是,您不必对类的每个成员都使用this->,如果您不使用它,则对类的成员使用'this->'。