从一个月开始,我就自己开始学习机器学习,尤其是深度学习,并为此而努力。
学习完所有数学概念后,我决定自己用一个神经元的python程序来完成此工作,该神经元可以正常工作。 (超精度)
现在,我决定使用2个神经元,1个输出神经元和2个输入的一个隐藏层来执行此操作,但这是行不通的。实际上,成本并没有降低,准确性也没有提高。但是该程序有效(输出如下)
import numpy as np
import matplotlib.pyplot as plt
def init_variables():
"""
Init model variables (weights, biais)
"""
weights_11 = np.random.normal(size=2)
weights_12 = np.random.normal(size=2)
weight_ouput = np.random.normal(size=2)
bias_11 = 0
bias_12 = 0
bias_output = 0
return weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output
def get_dataset():
"""
Method used to generate the dataset
"""
#Number of rows per class
row_per_class = 100
#generate rows
sick_people = (np.random.randn(row_per_class,2)) + np.array([-2,-2])
sick_people2 = (np.random.randn(row_per_class,2)) + np.array([2,2])
healthy_people = (np.random.randn(row_per_class,2)) + np.array([-2,2])
healthy_people2 = (np.random.randn(row_per_class,2)) + np.array([2,-2])
features = np.vstack([sick_people,sick_people2, healthy_people, healthy_people2])
targets = np.concatenate((np.zeros(row_per_class*2), np.zeros(row_per_class*2)+1))
#plt.scatter(features[:,0], features[:,1], c=targets, cmap = plt.cm.Spectral)
#plt.show()
return features, targets
def pre_activation(features, weights, bias):
"""
compute pre activation of the neural
"""
return np.dot(features, weights) + bias
def activation(z):
"""
compute the activation (sigmoide)
"""
return 1 / ( 1 + np.exp(-z) )
def derivative_activation(z):
"""
compute the derivative of the activation (derivative of sigmoide)
"""
return activation(z) * (1 - activation(z))
def cost(predictions, targets):
"""
make the difference between predictions and results
"""
return np.mean((predictions - targets)**2)
def predict_hidden_layer(features, weights_11, weights_12, bias_11, bias_12):
"""
This function is not generic at all and aims to understand how is made the input for the next ouput neural
"""
predictions_11 = activation(pre_activation(features, weights_11, bias_11))
predictions_12 = activation(pre_activation(features, weights_12, bias_12))
layer1_result = np.stack((predictions_11, predictions_12), axis=-1)
return layer1_result
def predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output):
"""
Determine the prediction of the output
"""
layer1_result = predict_hidden_layer(features, weights_11, weights_12, bias_11, bias_12)
output_result = activation(pre_activation(layer1_result, weight_ouput, bias_output))
return layer1_result, output_result
def train_multiple_neurals(features, targets, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output):
"""
function of training multiple neural (ajust weights and bias in function of features and targets)
This function is not generic or optimized and aims to understand better how it works
"""
epochs = 100
learning_rate = 0.1
#display Accuracy before the training
layer1, prediction = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
predictions = np.around(prediction)
print ("Accuracy", np.mean(predictions == targets))
for epoch in range(epochs):
layer1, predictions = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
if epoch % 10 == 0:
layer1, predictions = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
print (cost(predictions, targets))
"""
There are a lot of things to do here !
to do the back propagation, we will first train the ouput neural
"""
#Init gradient
weights_gradient_output = np.zeros(weight_ouput.shape)
bias_gradient_output = 0
#Go throught each row
for neural_input, target, prediction in zip(layer1, targets, predictions):
#compute pre activation
z = pre_activation(neural_input, weight_ouput, bias_output)
#Update the gradient
weights_gradient_output += (prediction - target)* derivative_activation(prediction) * neural_input
bias_gradient_output += (prediction - target)* derivative_activation(prediction)
"""
Now we are going to train hiddens layer of neurals
"""
weights_gradient_11 = np.zeros(weights_11.shape)
bias_gradient_11 = 0
weights_gradient_12 = np.zeros(weights_12.shape)
bias_gradient_12 = 0
#Go throught each row
for neural_output, feature, target, prediction in zip(layer1, features, targets, predictions):
#compute pre activation
z = pre_activation(neural_input, weights_11, bias_11)
#Update the gradient
weights_gradient_11 += (prediction - target)* derivative_activation(prediction) * weight_ouput[0] * derivative_activation(neural_output[0]) * feature
bias_gradient_11 += (prediction - target)* derivative_activation(prediction) * weight_ouput[0] * derivative_activation(neural_output[0])
#print (weights_gradient_11)
#Update the gradient
weights_gradient_12 += (prediction - target)* derivative_activation(prediction) * weight_ouput[1] * derivative_activation(neural_output[1]) * feature
bias_gradient_12 += (prediction - target)* derivative_activation(prediction) * weight_ouput[1] * derivative_activation(neural_output[1])
#Update the weights and bias
weight_ouput = weight_ouput - (learning_rate * weights_gradient_output)
bias_output = bias_output - (learning_rate * bias_gradient_output)
weights_11 = weights_11 - (learning_rate * weights_gradient_11)
bias_11 = bias_11 - (learning_rate * bias_gradient_11)
weights_12 = weights_12 - (learning_rate * weights_gradient_12)
bias_12 = bias_12 - (learning_rate * bias_gradient_12)
layer1, prediction = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
predictions = np.around(prediction)
print ("Accuracy", np.mean(predictions == targets))
if __name__ == '__main__':
#dataset
features, targets = get_dataset()
#variables
weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output = init_variables()
layer1_result, output_result = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
train_multiple_neurals(features, targets, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
代码效率不高,因为我想逐步了解所有内容,我知道问题出在隐藏层的训练上,但是它们尊重我在互联网上看到的公式(神经输入*(预测-目标)*乙状结肠(预测)*(weightOfTheNextLayer),这就是为什么我真的不明白。
这是我的输出(开始时在末尾以及在其成本之间的精度),精度不会增加,成本不会减少:
Accuracy 0.6025
0.32149563353794364
0.3216454935878719
0.32177853678600526
0.32189583396850424
0.32199849304998307
0.3220876323586574
0.3221644075538757
0.32223008209366144
0.32228608192864866
0.32233396315649065
0.3223752777740352
0.32241140511378036
0.3224434401200392
0.3224721764785219
0.32249815913581226
0.32252176039218206
0.32254324818743063
0.32256283493698107
0.32258070692435065
0.3225970387325917
0.3226119980415239
0.322625745368742
0.3226384319652169
0.32265019765826863
0.3226611692835548
0.32267145957097
0.3226811659211415
0.32269036836411585
0.3226991261062232
0.32270747252405985
0.3227154094426258
0.3227229031837465
0.32272988687106613
0.3227362744197289
0.3227419889521814
0.3227470002539846
0.32275135531703975
0.3227551824643601
0.3227586613182756
0.32276197240283183
0.32276525289471264
0.32276857750543586
0.3227719648351581
0.3227753969249716
0.32277883940346674
0.3227822558361521
0.32278561551026963
0.3227888964074382
0.322792085387534
0.3227951770494241
Accuracy 0.5
如果你们可以帮助我,那就太好了!
最佳答案
可能您的导数函数有一些错误。
def derivative_activation(z):
"""
compute the derivative of the activation (derivative of sigmoide)
"""
return activation(z) * (1 - activation(z))
假设您在最后一个输出层有
out_F = sigmod(in_F)
,其中out_F
是您的prediction
,而in_F
输入到您的最后一个节点。正如您的函数名称所暗示的那样,此功能可能是指
对该
in_F
进行衍生。所以应该是d{out_F}/d{in_F} = out_F * (1 - out_F)
尝试这个:
def derivative_activation(z):
"""
compute the derivative of the activation (derivative of sigmoide)
"""
return z * (1 - z)