我尝试使用JAVA来实现神经网络的反向传播,我已经对其进行了编码,但是结果令人不满意。错误减少得太慢。以下是火车结果示例:
epoch:1 current error:0.5051166876846451
epoch:2 current error:0.4982484527652138
epoch:3 current error:0.4965995467118879
epoch:4 current error:0.49585659139683363
epoch:5 current error:0.4953426236386938
epoch:6 current error:0.4948766985413233
epoch:7 current error:0.49441754405152294
epoch:8 current error:0.4939551661406868
epoch:9 current error:0.49348601614718984
epoch:10 current error:0.4930078119902486
epoch:11 current error:0.49251846766886453
基于此,我开始怀疑我的代码及其算法。使用的激活函数为S型。以下是培训的示例代码。
public void learning(int epoch,double learningRateTemp,double desiredErrorTemp,DataSet ds,double momentum){
int processEpoch=0;
double sumSquaredError=0;
DataSetRow dsr;
Connector conTemp;
double sumError=0;
double errorInformation=0;
double activationValue;
double partialDerivative;
do{
processEpoch++;
sumSquaredError=0;
System.out.println("epoch:"+processEpoch);
//data training set
for(int a=0;a<ds.countRows();a++){
dsr=ds.getSpecificRow(a);
sumError=0;
double[]input=dsr.getInput();
double[]output=dsr.getdesiredOutput();
double sumDeltaInput=0;
double weightTempValue=0;
//forward calculation
this.forwardCalculation(input);
//backpropagateofError
//for output unit
for(int k=0;k<NeuralLayers[totalLayer-1].getTotalNode();k++){
activationValue=NeuralLayers[totalLayer-1].getNeuron(k).getValue();
partialDerivative=(activationValue)*(1-activationValue);
Neuron Temp=NeuralLayers[totalLayer-1].getNeuron(k);
errorInformation=(output[k]-Temp.getValue())*partialDerivative;
Temp.SetErrorInformationTerm(errorInformation);
sumError+=Math.pow((output[k]-Temp.getValue()),2);
NeuralLayers[totalLayer-1].setNeuron(k, Temp);
}
//end of output unit
//for hidden Unit
for(int l=totalLayer-2;l>0;l--){
for(int j=1;j<NeuralLayers[l].getTotalNode();j++){
sumDeltaInput=0;
for(int k=0;k<NeuralLayers[l+1].getTotalNode();k++){
conTemp=NeuralLayers[l+1].getConnector(k, j);
if(conTemp.getStatusFrom()==false){
weightTempValue=conTemp.getWeight().getValue();
sumDeltaInput+=(NeuralLayers[l+1].getNeuron(k).GetErrorInformationTerm()*weightTempValue);
}
}
activationValue=NeuralLayers[l].getNeuron(j).getValue();
partialDerivative=(activationValue)*(1-activationValue);
errorInformation= sumDeltaInput*partialDerivative;
Neuron neuTemp=NeuralLayers[l].getNeuron(j);
neuTemp.SetErrorInformationTerm(errorInformation);
NeuralLayers[l].setNeuron(j, neuTemp);
}
}
updateWeight(learningRateTemp,momentum);
sumSquaredError+=sumError;
}
sumSquaredError/=(double)(ds.countRows()*NeuralLayers[totalLayer-1].getTotalNode());
sumSquaredError=Math.sqrt(sumSquaredError);
System.out.println("current error:"+sumSquaredError);
} while(processEpoch<epoch && sumSquaredError>desiredErrorTemp);
}
}
用于向前计算
private void forwardCalculation(double[] inputValue){
Connector Contemp;
double SumNodeWeight=0;
int start=1;
int count=0;
setNodeValue(inputValue,0);
do{
count++;
if("output".equals(NeuralLayers[count].statusLayer))
start=0;
else start=1;
//get sum of all input
for(int j=start;j<NeuralLayers[count].getTotalNode();j++){
for(int i=0;i<NeuralLayers[count].sizeConnector(j);i++){
Contemp=NeuralLayers[count].getConnector(j, i);
SumNodeWeight+=Contemp.getCombinedweightInput();
}
SumNodeWeight=(1/(1+Math.exp(-SumNodeWeight)));
NeuralLayers[count].setNeuronValue(j, SumNodeWeight);
SumNodeWeight=0;
}
}while(!"output".equals(NeuralLayers[count].statusLayer));
}
并更新权重
private void updateWeight(double learningRateTemp,double momentum){
double newWeight;
double errorInformation;
Connector conTemp;
for(int LayerPosition=totalLayer-1;LayerPosition>0;LayerPosition--){
for(int node=1;node<NeuralLayers[LayerPosition].getTotalNode();node++){
errorInformation=NeuralLayers[LayerPosition].getNeuron(node).GetErrorInformationTerm();
//for bias weight
newWeight=learningRateTemp*errorInformation;
conTemp=NeuralLayers[LayerPosition].getConnector(node, 0);
conTemp.updateWeight(newWeight,false,0);
NeuralLayers[LayerPosition].updateConnector(conTemp, node, 0);
/////////////////////
//for other node weight
for(int From=1;From<NeuralLayers[LayerPosition].sizeConnector(node);From++){
conTemp=NeuralLayers[LayerPosition].getConnector(node, From);
double weightCorrection=learningRateTemp*errorInformation*NeuralLayers[LayerPosition-1].getNeuron(From).getValue();
conTemp.updateWeight(weightCorrection,true,momentum);
NeuralLayers[LayerPosition].updateConnector(conTemp,node,From);
}
}
}
}
我在正确的轨道上吗?我已经在几天内搜索了错误,但仍然一无所获。我的计算误差的公式正确吗?非常感谢你!
最佳答案
我不是这方面的专家,也不是Java编程方面的专家,但这可能会影响您,将变量sumError
在开始时声明为0
,然后从输出中添加错误,然后在隐藏层的for周期再次出现在sumSquaredError
变量中,但是如果要计算训练的误差,为什么它在“隐藏层cucle”中?
for(int l=totalLayer-2;l>0;l--){
for(int j=1;j<NeuralLayers[l].getTotalNode();j++){
}
updateWeight(learningRateTemp,momentum);
sumSquaredError+=sumError;
}
不应该在外面吗
我将参考以前回答过我的人的伪代码。
link
希望这可以帮助!
关于java - 神经网络反向传播训练中误差降低得太慢,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/20979615/