在多元线性回归中会用到梯度下降来计算参数值。这里我用python实现一个梯度下降版本。
这里多元线性方程为 y = A0+A1*x1+...+An* xn
数据输入格式,y表示
y \t x1 \t x2 \t .... xn
代码如下:
import os
import sys theta = []
training_data = []
h_value = []
alpha = 0.0000009 def load(path):
f = open(path,'r')
for x in f:
x = x.strip('\r\n')
field = x.split('\t')
v_list = []
for v in field:
v_list.append(int(v))
training_data.append(v_list)
f.close()
for x in training_data:
h_value.append(0.0) def init(path,theta_num):
for x in range(theta_num):
theta.append(1.0)
load(path); def gradient():
i = 0
loss = 100.0
theta_num = len(theta)
data_num = len(training_data)
while i < 3000 and loss > 0.0001:
#compute hvalue
for index in range(data_num):
hv = theta[0]
for k in range(1,theta_num):
hv += theta[k]*training_data[index][k]
h_value[index] = hv
#update theta
for index in range(theta_num):
s = 0.0
for k in range(data_num):
if index == 0:
s += (h_value[k] - training_data[k][0])*1
else:
s += (h_value[k] - training_data[k][0])*training_data[k][index]
theta[index] = theta[index] - alpha * 1/data_num * (s)
#compute loss
loss = 0.0
for index in range(data_num):
hv = theta[0] / (2*data_num)
for k in range(1,theta_num):
hv += theta[k]*training_data[index][k]
loss += pow((hv - training_data[index][0]),2)/(2*data_num)
print loss
i += 1
for x in theta:
print x, if __name__=='__main__':
path = sys.argv[1]
init(path,int(sys.argv[2]))
gradient()
sys.exit(0)