本文介绍了神经网络反向传播不起作用的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我已经用JavaScript对神经网络进行了编码,并实现了这里.这是代码(打字稿):

I have coded a neural network in JavaScript and implemented the Backpropagation algorithm described here.Here is the code (typescript):

/**
 * Net
 */


export class Net {
    private layers: Layer[] = [];
    private inputLayer: Layer;
    private outputLayer: Layer;
    public error: number = Infinity;

    private eta: number = 0.15;
    private alpha: number = 0.5;

    constructor(...topology: number[]) {
        topology.forEach((topologyLayer, iTL) => {
            var nextLayerNeuronNumber = topology[iTL + 1] || 0;
            this.layers.push(new Layer(topologyLayer, nextLayerNeuronNumber));
        });

        this.inputLayer = this.layers[0];
        this.outputLayer = this.layers[this.layers.length - 1];

    }

    public loadWeights(weights) {
        /*
        [
            [Layer
                [Node weights, ..., ...]
            ]
        ]
        */

        for (var iL = 0; iL < weights.length; iL++) {
            var neuronWeights = weights[iL];
            var layer = this.layers[iL];
            for (var iN = 0; iN < neuronWeights.length; iN++) {

                // Neuron

                var connections = neuronWeights[iN];
                for (var iC = 0; iC < connections.length; iC++) {
                    var connection = connections[iC];
                    this.layer(iL).neuron(iN).setWeights(iC, connection);

                }

            }
        }

    }


    public train(data: number[][], iterartions = 2000) {

        var inputs = this.inputLayer.neurons.length - 1;

        for (var ite = 0; ite < iterartions; ite++) {

            data.forEach(node => {

                var inputData = [];
                var outputData = [];

                for (var i = 0; i < node.length; i++) {
                    if (i < inputs) {
                        inputData.push(node[i])
                    } else {
                        outputData.push(node[i])
                    }
                }

                this.feedForward(...inputData);
                this.backProb(...outputData);


            });


        }


        return this.calcDataError(data);

    }

    private calcDataError(data){
        var overallDataErrorSum = 0;
        var inputs = this.inputLayer.neurons.length - 1;

        data.forEach(node => {
            var outputData = node.splice(inputs);
            var inputData = node;

            this.feedForward(...inputData);
            overallDataErrorSum += this.getNetError(outputData);
        });

        overallDataErrorSum /= data.length;

        return overallDataErrorSum;
    }

    public saveWeights() {
        // Ignore output layer
        var ret = []
        for (var iL = 0; iL < this.layers.length - 1; iL++) {
            var layer = this.layers[iL];
            var layer_ret = [];

            layer.neurons.forEach(neuron => {
                layer_ret.push(neuron.connections.map(c => c.weight));
            });

            ret.push(layer_ret);
        }
        return ret;
    }

    feedForward(...inputs: number[]) {
        if (inputs.length != this.inputLayer.neurons.length - 1) return false;

        this.inputLayer.neurons.forEach((neuron, i) => {
            if (!neuron.isBias) {
                neuron.output(inputs[i]);
            }
        });

        this.layers.forEach((layer, i) => {
            // Skip Input Layer
            if (i > 0) {
                var prevLayer = this.layers[i - 1]
                layer.neurons.forEach(neuron => {
                    neuron.calcOutput(prevLayer);
                });
            }
        });

    }

    public getNetError(targetVals) {
        // Calc delta error of outputs
        var deltas = [];

        this.outputLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                neuron.calcOutputDelta(targetVals[iN]);
                deltas.push(neuron.delta);
            }
        });

        deltas = deltas.map(d => Math.pow(d, 2));


        var sum = 0;

        deltas.forEach(d => sum += d);

        return sum / deltas.length;


    }

    backProb(...targetVals: number[]) {



        // Calc delta error of outputs
        this.outputLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                neuron.calcOutputDelta(targetVals[iN]);
            }
        });

        // Backprop delta error through hidden layers

        for (var iL = this.layers.length - 2; iL > 0; iL--) {
            var layer = this.layers[iL];
            var nextLayer = this.layers[iL + 1]
            layer.neurons.forEach(neuron => {
                neuron.calcHiddenDelta(nextLayer);
            });

        }

        // Update weights

        for (var iL = 1; iL < this.layers.length; iL++) {
            var layer = this.layers[iL];
            var prevLayer = this.layers[iL - 1];

            layer.neurons.forEach(neuron => {
                if (!neuron.isBias) {
                    neuron.updateWeights(prevLayer, this.eta);
                }
            });
        }

        this.error = this.getNetError(targetVals);

        return this.error;

    }

    getOutputs(...inputs: number[]) {

        var ret = [];
        this.outputLayer.neurons.forEach(neuron => {
            if (!neuron.isBias) {
                ret.push(neuron.output())
            }
        });
        return ret;

    }

    getResults(...inputs: number[]) {
        this.feedForward(...inputs)
        return this.getOutputs();
    }

    layer(i) {
        return this.layers[i];
    }
}

/**
 * Layer
 */
class Layer {
    public neurons: Neuron[] = [];
    constructor(neuronNumber: number, nextLayerNeuronNumber: number) {
        for (var iN = 0; iN < neuronNumber + 1; iN++) {
            // +1 for bias neuron, which is last
            if (iN < neuronNumber) {
                // Create normal neuron
                this.neurons.push(new Neuron(nextLayerNeuronNumber, iN, false));
            } else {
                this.neurons.push(new Neuron(nextLayerNeuronNumber, iN, true));
            }
        }
    }

    neuron(i) {
        return this.neurons[i];
    }

    bias() {
        return this.neurons[this.neurons.length - 1];
    }
}

/**
 * Neuron
 */
class Neuron {
    public connections: Connection[] = [];
    private outputVal: number;
    public delta: number;

    constructor(outputsTo: number, private index, public isBias = false) {

        // Creates connections
        for (var c = 0; c < outputsTo; c++) {
            this.connections.push(new Connection());
        }

        this.outputVal = isBias ? 1 : 0;

    }

    calcOutput(prevLayer: Layer) {

        // Only calcOutput when neuron is not a bias neuron

        if (!this.isBias) {
            var sum = 0;

            prevLayer.neurons.forEach(prevLayerNeuron => {
                sum += prevLayerNeuron.output() * prevLayerNeuron.getWeights(this.index).weight;
            });

            this.output(this.activationFunction(sum));
        }

    }

    private activationFunction(x) {

        //return Math.tanh(x);
        return 1 / (1 + Math.exp(-x))
        //return x;
    };

    private activationFunctionDerivative(x) {
        // Small approximation of tanh derivative
        //return 1 - x * x

        // Sigmoid
        var s = this.activationFunction(x);
        return s * (1 - s);

        // With general derivative formula where h = 1e-10
        /*var h = 0.0001;
        var dx = ((this.activationFunction(x + h) - this.activationFunction(x))/h)
        return dx;*/

        //return 1
    };

    // Backprop // Todo // Understand


    public calcOutputDelta(targetVal) {

        // Bias output neurons do not have delta error
        if (!this.isBias) {
            this.delta = targetVal - this.output();
        }
    }

    public calcHiddenDelta(nextLayer: Layer) {
        var sum = 0;

        // Go through all neurons of next layer excluding bias
        nextLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                sum += neuron.delta * this.getWeights(iN).weight;
            }
        });

        this.delta = sum;
    }

    public updateWeights(prevLayer: Layer, eta: number) {

        prevLayer.neurons.forEach((neuron, iN) => {
            var weight = neuron.getWeights(this.index).weight;
            var newWeight =
                weight + // old weight
                eta *   // learning weight
                this.delta * // delta error
                this.activationFunctionDerivative(neuron.output())
            neuron.getWeights(this.index).weight = newWeight;
        });


    }


    // Backprop end

    output(s?) {
        if (s && !this.isBias) {
            this.outputVal = s;
            return this.outputVal;
        } else {
            return this.outputVal;
        }
    }

    getWeights(i) {
        return this.connections[i];
    }

    setWeights(i, s) {
        return this.connections[i].weight = s;
    }
}

/**
 * Connection
 */
class Connection {
    public weight: number;
    public deltaWeight: number;

    constructor() {
        this.weight = Math.random();
        this.deltaWeight = 0;
    }
}

仅对一组数据进行训练时,它就可以正常工作. (示例来自此处)

When training it for just one set of data, it works just fine. (example from here)

import {Net} from './ml';

var myNet = new Net(2, 2, 2);


var weights = [
    [
        [0.15, 0.25],
        [0.20, 0.30],
        [0.35, 0.35]
    ],
    [
        [0.40, 0.50],
        [0.45, 0.55],
        [0.60, 0.60]
    ]
];

// Just loads the weights given in the example

myNet.loadWeights(weights)

var error = myNet.train([[0.05, 0.10, 0.01, 0.99]]);
console.log('Error: ', error);

console.log(myNet.getResults(0.05, 0.10));

控制台打印:

Error:  0.0000020735174706210714
[ 0.011556397089327321, 0.9886867357304885 ]

基本上,这很好,对吧?

Basically, that's pretty good, right?

然后,我想教网络XOR问题:

Then, I wanted to teach the network the XOR problem:

import {Net} from './ml';

var myNet = new Net(2, 3, 1);


var trainigData = [
    [0, 0, 0],
    [1, 0, 1],
    [0, 1, 1],
    [1, 1, 0]
]

var error = myNet.train(trainigData)
console.log('Error: ', error);

console.log('Input: 0, 0: ', myNet.getResults(0, 0));
console.log('Input: 1, 0: ', myNet.getResults(1, 0));

此处网络出现故障:

Error:  0.2500007370167383
Input: 0, 0:  [ 0.5008584967899313 ]
Input: 1, 0:  [ 0.5008584967899313 ]

我做错了什么?

推荐答案

如果尚未进行梯度检查,首先对整个批次进行梯度检查(针对计算梯度的函数).这样可以确保您知道问题出在哪里.

Firstly perform gradient checks on the entire batch (meaining on the function calculating gradients on the batch), if you have not done so already. This will ensure you know what the problem is.

如果梯度计算不正确,考虑到您的实现适用于单个数据集,则很可能在向后传递中混合了一些值.

If gradients are not correctly computed, taking into account that your implementation works on single data sets, you are most likely mixing some values in the backwards pass.

如果正确计算了梯度,则更新函数中将出现错误.

If gradients are correctly computed, there is an error in your update function.

可以在此处

这是使用反向传播的trainStep函数的代码段

Here is the code snippet of the trainStep function using backpropagation

    function trainStepBatch(details){
//we compute forward pass
//for each training sample in the batch
//and stored in the batch array
    var batch=[];
    var ks=[];
    for(var a=0;a<details.data.in.length;a++){
    var results=[];
    var k=1;
    results[0]={output:details.data.in[a]};
    for(var i=1;i<this.layers.length;i++){
        results[i]=layers[this.layers[i].type].evalForGrad(this.layers[i],results[i-1].output);
        k++;
    }
    batch[a]=results;
    ks[a]=k;
    }
//We compute the backward pass
//first derivative of the cost function given the output
    var grad=[];
    for(i in batch)grad[i]={grad:costs[details.cost].df(batch[i][ks[i]-1].output,details.data.out[i])};
//for each layer we compute the backwards pass
//on the results of all forward passes at a given layer
    for(var i=this.layers.length-1;i>0;i--){
    var grads=[];
    var test=true;
    for(a in batch){
        grads[a]=layers[this.layers[i].type].grad(this.layers[i],batch[a][i],batch[a][i-1],grad[a]);
        if(grads[a]==null)test=false;
        else grads[a].layer=i;
    }
//we perform the update
    if(test)stepBatch(this.layers[i].par,grads,details.stepSize);
    }
}

对于stepBatch函数

And for the stepBatch function

function stepBatch(params,grads, stepSize){
for(i in params.w){
    for(j in params.w[i]){
        for(a in grads){
            params.w[i][j]-=stepSize*grads[a].dw[i][j];
        }
    }
}
for(i in params.b){
    for(a in grads){
        params[a]-=stepSize*grads[a].db[i];
    }
}
function stepBatch(params,grads, stepSize){
    for(i in params.w){
        for(j in params.w[i]){
            for(a in grads){
                params.w[i][j]-=stepSize*grads[a].dw[i][j];
            }
        }
    }
    for(i in params.b){
        for(a in grads){
            params[a]-=stepSize*grads[a].db[i];
        }
    }
}

这篇关于神经网络反向传播不起作用的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!

07-16 15:33
查看更多