python - 训练隐藏层不起作用

从一个月开始，我就自己开始学习机器学习，尤其是深度学习，并为此而努力。
学习完所有数学概念后，我决定自己用一个神经元的python程序来完成此工作，该神经元可以正常工作。（超精度）

现在，我决定使用2个神经元，1个输出神经元和2个输入的一个隐藏层来执行此操作，但这是行不通的。实际上，成本并没有降低，准确性也没有提高。但是该程序有效（输出如下）

import numpy as np
import matplotlib.pyplot as plt


def init_variables():
    """
        Init model variables (weights, biais)
    """
    weights_11 = np.random.normal(size=2)
    weights_12 = np.random.normal(size=2)
    weight_ouput = np.random.normal(size=2)
    bias_11 = 0
    bias_12 = 0
    bias_output = 0
    return weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output

def get_dataset():
    """
        Method used to generate the dataset
    """
    #Number of rows per class
    row_per_class = 100
    #generate rows
    sick_people =  (np.random.randn(row_per_class,2)) + np.array([-2,-2])
    sick_people2 =  (np.random.randn(row_per_class,2)) + np.array([2,2])
    healthy_people = (np.random.randn(row_per_class,2)) + np.array([-2,2])
    healthy_people2 =  (np.random.randn(row_per_class,2)) + np.array([2,-2])

    features = np.vstack([sick_people,sick_people2, healthy_people, healthy_people2])
    targets = np.concatenate((np.zeros(row_per_class*2), np.zeros(row_per_class*2)+1))

    #plt.scatter(features[:,0], features[:,1], c=targets, cmap = plt.cm.Spectral)
    #plt.show()

    return features, targets

def pre_activation(features, weights, bias):
    """
        compute pre activation of the neural
    """
    return np.dot(features, weights) + bias

def activation(z):
    """
        compute the activation (sigmoide)
    """
    return 1 / ( 1 + np.exp(-z) )

def derivative_activation(z):
    """
        compute the derivative of the activation (derivative of sigmoide)
    """
    return activation(z) * (1 - activation(z))


def cost(predictions, targets):
    """
        make the difference between predictions and results
    """
    return np.mean((predictions - targets)**2)

def predict_hidden_layer(features, weights_11, weights_12, bias_11, bias_12):
    """
        This function is not generic at all and aims to understand how is made the input for the next ouput neural
    """
    predictions_11 = activation(pre_activation(features, weights_11, bias_11))
    predictions_12 = activation(pre_activation(features, weights_12, bias_12))
    layer1_result = np.stack((predictions_11, predictions_12), axis=-1)
    return layer1_result

def predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output):
    """
        Determine the prediction of the output
    """
    layer1_result = predict_hidden_layer(features, weights_11, weights_12, bias_11, bias_12)
    output_result = activation(pre_activation(layer1_result, weight_ouput, bias_output))
    return layer1_result, output_result


def train_multiple_neurals(features, targets, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output):
    """
        function of training multiple neural (ajust weights and bias in function of features and targets)
        This function is not generic or optimized and aims to understand better how it works
    """
    epochs = 100
    learning_rate = 0.1

    #display Accuracy before the training
    layer1, prediction = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
    predictions = np.around(prediction)
    print ("Accuracy", np.mean(predictions == targets))

    for epoch in range(epochs):
        layer1, predictions = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
        if epoch % 10 == 0:
            layer1, predictions = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
            print (cost(predictions, targets))
        """
            There are a lot of things to do here !
            to do the back propagation, we will first train the ouput neural
        """
        #Init gradient
        weights_gradient_output = np.zeros(weight_ouput.shape)
        bias_gradient_output = 0
        #Go throught each row
        for neural_input, target, prediction in zip(layer1, targets, predictions):
            #compute pre activation
            z = pre_activation(neural_input, weight_ouput, bias_output)
            #Update the gradient
            weights_gradient_output += (prediction - target)* derivative_activation(prediction) * neural_input
            bias_gradient_output += (prediction - target)* derivative_activation(prediction)

        """
            Now we are going to train hiddens layer of neurals
        """
        weights_gradient_11 = np.zeros(weights_11.shape)
        bias_gradient_11 = 0

        weights_gradient_12 = np.zeros(weights_12.shape)
        bias_gradient_12 = 0

        #Go throught each row
        for neural_output, feature, target, prediction in zip(layer1, features, targets, predictions):
            #compute pre activation
            z = pre_activation(neural_input, weights_11, bias_11)
            #Update the gradient
            weights_gradient_11 += (prediction - target)* derivative_activation(prediction) * weight_ouput[0]  * derivative_activation(neural_output[0]) * feature
            bias_gradient_11 += (prediction - target)* derivative_activation(prediction) * weight_ouput[0]  * derivative_activation(neural_output[0])

            #print (weights_gradient_11)
            #Update the gradient
            weights_gradient_12 += (prediction - target)* derivative_activation(prediction) * weight_ouput[1]  * derivative_activation(neural_output[1]) * feature
            bias_gradient_12 += (prediction - target)* derivative_activation(prediction) * weight_ouput[1]  * derivative_activation(neural_output[1])

        #Update the weights and bias
        weight_ouput = weight_ouput - (learning_rate * weights_gradient_output)
        bias_output = bias_output - (learning_rate * bias_gradient_output)
        weights_11 =  weights_11 - (learning_rate * weights_gradient_11)
        bias_11 =  bias_11 - (learning_rate * bias_gradient_11)
        weights_12 =  weights_12 - (learning_rate * weights_gradient_12)
        bias_12 =  bias_12 - (learning_rate * bias_gradient_12)

    layer1, prediction = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
    predictions = np.around(prediction)
    print ("Accuracy", np.mean(predictions == targets))


if __name__ == '__main__':
    #dataset
    features, targets  = get_dataset()
    #variables
    weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output = init_variables()
    layer1_result, output_result = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
    train_multiple_neurals(features, targets, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)

代码效率不高，因为我想逐步了解所有内容，我知道问题出在隐藏层的训练上，但是它们尊重我在互联网上看到的公式（神经输入*（预测-目标）*乙状结肠（预测）*（weightOfTheNextLayer），这就是为什么我真的不明白。

这是我的输出（开始时在末尾以及在其成本之间的精度），精度不会增加，成本不会减少：

Accuracy 0.6025
0.32149563353794364
0.3216454935878719
0.32177853678600526
0.32189583396850424
0.32199849304998307
0.3220876323586574
0.3221644075538757
0.32223008209366144
0.32228608192864866
0.32233396315649065
0.3223752777740352
0.32241140511378036
0.3224434401200392
0.3224721764785219
0.32249815913581226
0.32252176039218206
0.32254324818743063
0.32256283493698107
0.32258070692435065
0.3225970387325917
0.3226119980415239
0.322625745368742
0.3226384319652169
0.32265019765826863
0.3226611692835548
0.32267145957097
0.3226811659211415
0.32269036836411585
0.3226991261062232
0.32270747252405985
0.3227154094426258
0.3227229031837465
0.32272988687106613
0.3227362744197289
0.3227419889521814
0.3227470002539846
0.32275135531703975
0.3227551824643601
0.3227586613182756
0.32276197240283183
0.32276525289471264
0.32276857750543586
0.3227719648351581
0.3227753969249716
0.32277883940346674
0.3227822558361521
0.32278561551026963
0.3227888964074382
0.322792085387534
0.3227951770494241
Accuracy 0.5

如果你们可以帮助我，那就太好了！

最佳答案

可能您的导数函数有一些错误。

def derivative_activation(z):
    """
        compute the derivative of the activation (derivative of sigmoide)
    """
    return activation(z) * (1 - activation(z))

假设您在最后一个输出层有out_F = sigmod(in_F)，其中out_F是您的prediction，而in_F输入到您的最后一个节点。
正如您的函数名称所暗示的那样，此功能可能是指
对该in_F进行衍生。所以应该是d{out_F}/d{in_F} = out_F * (1 - out_F)

尝试这个：

def derivative_activation(z):
    """
        compute the derivative of the activation (derivative of sigmoide)
    """
    return z * (1 - z)