Python 实现多元线性回归预测

一、二元输入特征线性回归

测试数据为：ex1data2.txt

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

,,

Python代码如下：

#-*- coding: UTF- -*-

import random

import numpy as np

import matplotlib.pyplot as plt

#加载数据

def load_exdata(filename):

    data = []

    with open(filename, 'r') as f:

        for line in f.readlines():

            line = line.split(',')

            current = [int(item) for item in line] //根据数据输入的不同确定是int 还是其他类型

            #5.5277,9.1302

            data.append(current)

    return data

data = load_exdata('ex1data2.txt');

data = np.array(data,np.int64)//根据数据输入的不同确定是int 还是其他类型

#特征缩放

def featureNormalize(X):

    X_norm = X;

    mu = np.zeros((,X.shape[]))

    sigma = np.zeros((,X.shape[]))

    for i in range(X.shape[]):

        mu[,i] = np.mean(X[:,i]) # 均值

        sigma[,i] = np.std(X[:,i])     # 标准差

#     print(mu)

#     print(sigma)

    X_norm  = (X - mu) / sigma

    return X_norm,mu,sigma

#计算损失

def computeCost(X, y, theta):

    m = y.shape[]

#     J = (np.sum((X.dot(theta) - y)**)) / (*m)

    C = X.dot(theta) - y

    J2 = (C.T.dot(C))/ (*m)

    return J2

#梯度下降

def gradientDescent(X, y, theta, alpha, num_iters):

    m = y.shape[]

    #print(m)

    # 存储历史误差

    J_history = np.zeros((num_iters, ))

    for iter in range(num_iters):

        # 对J求导，得到 alpha/m * (WX - Y)*x(i)， (,m)*(m,)  X (m,)*(,) = (m,)

        theta = theta - (alpha/m) * (X.T.dot(X.dot(theta) - y))

        J_history[iter] = computeCost(X, y, theta)

    return J_history,theta

iterations =   #迭代次数

alpha = 0.01    #学习率

x = data[:,(,)].reshape((-,))

y = data[:,].reshape((-,))

m = y.shape[]

x,mu,sigma = featureNormalize(x)

X = np.hstack([x,np.ones((x.shape[], ))])

# X = X[range(),:]

# y = y[range(),:]

theta = np.zeros((, ))

j = computeCost(X,y,theta)

J_history,theta = gradientDescent(X, y, theta, alpha, iterations)

print('Theta found by gradient descent',theta)

def predict(data):

    testx = np.array(data)

    testx = ((testx - mu) / sigma)

    testx = np.hstack([testx,np.ones((testx.shape[], ))])

    price = testx.dot(theta)

    print('price is %d ' % (price))

predict([,])

二、多元线性回归，以三个特征输入为例

输入数据：testdata.txt。其中第一列是指输入的数据序列，不可读入

,230.1,37.8,69.2,22.1

,44.5,39.3,45.1,10.4

,17.2,45.9,69.3,9.3

,151.5,41.3,58.5,18.5

,180.8,10.8,58.4,12.9

,8.7,48.9,,7.2

,57.5,32.8,23.5,11.8

,120.2,19.6,11.6,13.2

,8.6,2.1,,4.8

,199.8,2.6,21.2,10.6

,66.1,5.8,24.2,8.6

,214.7,,,17.4

,23.8,35.1,65.9,9.2

,97.5,7.6,7.2,9.7

,204.1,32.9,,

,195.4,47.7,52.9,22.4

,67.8,36.6,,12.5

,281.4,39.6,55.8,24.4

,69.2,20.5,18.3,11.3

,147.3,23.9,19.1,14.6

,218.4,27.7,53.4,

,237.4,5.1,23.5,12.5

,13.2,15.9,49.6,5.6

,228.3,16.9,26.2,15.5

,62.3,12.6,18.3,9.7

,262.9,3.5,19.5,

,142.9,29.3,12.6,

,240.1,16.7,22.9,15.9

,248.8,27.1,22.9,18.9

,70.6,,40.8,10.5

,292.9,28.3,43.2,21.4

,112.9,17.4,38.6,11.9

,97.2,1.5,,9.6

,265.6,,0.3,17.4

,95.7,1.4,7.4,9.5

,290.7,4.1,8.5,12.8

,266.9,43.8,,25.4

,74.7,49.4,45.7,14.7

,43.1,26.7,35.1,10.1

,,37.7,,21.5

,202.5,22.3,31.6,16.6

,,33.4,38.7,17.1

,293.6,27.7,1.8,20.7

,206.9,8.4,26.4,12.9

,25.1,25.7,43.3,8.5

,175.1,22.5,31.5,14.9

,89.7,9.9,35.7,10.6

,239.9,41.5,18.5,23.2

,227.2,15.8,49.9,14.8

,66.9,11.7,36.8,9.7

,199.8,3.1,34.6,11.4

,100.4,9.6,3.6,10.7

,216.4,41.7,39.6,22.6

,182.6,46.2,58.7,21.2

,262.7,28.8,15.9,20.2

,198.9,49.4,,23.7

,7.3,28.1,41.4,5.5

,136.2,19.2,16.6,13.2

,210.8,49.6,37.7,23.8

,210.7,29.5,9.3,18.4

,53.5,,21.4,8.1

,261.3,42.7,54.7,24.2

,239.3,15.5,27.3,15.7

,102.7,29.6,8.4,

,131.1,42.8,28.9,

,,9.3,0.9,9.3

,31.5,24.6,2.2,9.5

,139.3,14.5,10.2,13.4

,237.4,27.5,,18.9

,216.8,43.9,27.2,22.3

,199.1,30.6,38.7,18.3

,109.8,14.3,31.7,12.4

,26.8,,19.3,8.8

,129.4,5.7,31.3,

,213.4,24.6,13.1,

,16.9,43.7,89.4,8.7

,27.5,1.6,20.7,6.9

,120.5,28.5,14.2,14.2

,5.4,29.9,9.4,5.3

,,7.7,23.1,

,76.4,26.7,22.3,11.8

,239.8,4.1,36.9,12.3

,75.3,20.3,32.5,11.3

,68.4,44.5,35.6,13.6

,213.5,,33.8,21.7

,193.2,18.4,65.7,15.2

,76.3,27.5,,

,110.7,40.6,63.2,

,88.3,25.5,73.4,12.9

,109.8,47.8,51.4,16.7

,134.3,4.9,9.3,11.2

,28.6,1.5,,7.3

,217.7,33.5,,19.4

,250.9,36.5,72.3,22.2

,107.4,,10.9,11.5

,163.3,31.6,52.9,16.9

,197.6,3.5,5.9,11.7

,184.9,,,15.5

,289.7,42.3,51.2,25.4

,135.2,41.7,45.9,17.2

,222.4,4.3,49.8,11.7

,296.4,36.3,100.9,23.8

,280.2,10.1,21.4,14.8

,187.9,17.2,17.9,14.7

,238.2,34.3,5.3,20.7

,137.9,46.4,,19.2

,,,29.7,7.2

,90.4,0.3,23.2,8.7

,13.1,0.4,25.6,5.3

,255.4,26.9,5.5,19.8

,225.8,8.2,56.5,13.4

,241.7,,23.2,21.8

,175.7,15.4,2.4,14.1

,209.6,20.6,10.7,15.9

,78.2,46.8,34.5,14.6

,75.1,,52.7,12.6

,139.2,14.3,25.6,12.2

,76.4,0.8,14.8,9.4

,125.7,36.9,79.2,15.9

,19.4,,22.3,6.6

,141.3,26.8,46.2,15.5

,18.8,21.7,50.4,

,,2.4,15.6,11.6

,123.1,34.6,12.4,15.2

,229.5,32.3,74.2,19.7

,87.2,11.8,25.9,10.6

,7.8,38.9,50.6,6.6

,80.2,,9.2,8.8

,220.3,,3.2,24.7

,59.6,,43.1,9.7

,0.7,39.6,8.7,1.6

,265.2,2.9,,12.7

,8.4,27.2,2.1,5.7

,219.8,33.5,45.1,19.6

,36.9,38.6,65.6,10.8

,48.3,,8.5,11.6

,25.6,,9.3,9.5

,273.7,28.9,59.7,20.8

,,25.9,20.5,9.6

,184.9,43.9,1.7,20.7

,73.4,,12.9,10.9

,193.7,35.4,75.6,19.2

,220.5,33.2,37.9,20.1

,104.6,5.7,34.4,10.4

,96.2,14.8,38.9,11.4

,140.3,1.9,,10.3

,240.1,7.3,8.7,13.2

,243.2,,44.3,25.4

,,40.3,11.9,10.9

,44.7,25.8,20.6,10.1

,280.7,13.9,,16.1

,,8.4,48.7,11.6

,197.6,23.3,14.2,16.6

,171.3,39.7,37.7,

,187.8,21.1,9.5,15.6

,4.1,11.6,5.7,3.2

,93.9,43.5,50.5,15.3

,149.8,1.3,24.3,10.1

,11.7,36.9,45.2,7.3

,131.7,18.4,34.6,12.9

,172.5,18.1,30.7,14.4

,85.7,35.8,49.3,13.3

,188.4,18.1,25.6,14.9

,163.5,36.8,7.4,

,117.2,14.7,5.4,11.9

,234.5,3.4,84.8,11.9

,17.9,37.6,21.6,

,206.8,5.2,19.4,12.2

,215.4,23.6,57.6,17.1

,284.3,10.6,6.4,

,,11.6,18.4,8.4

,164.5,20.9,47.4,14.5

,19.6,20.1,,7.6

,168.4,7.1,12.8,11.7

,222.4,3.4,13.1,11.5

,276.9,48.9,41.8,

,248.4,30.2,20.3,20.2

,170.2,7.8,35.2,11.7

,276.7,2.3,23.7,11.8

,165.6,,17.6,12.6

,156.6,2.6,8.3,10.5

,218.5,5.4,27.4,12.2

,56.2,5.7,29.7,8.7

,287.6,,71.8,26.2

,253.8,21.3,,17.6

,,45.1,19.6,22.6

,139.5,2.1,26.6,10.3

,191.1,28.7,18.2,17.3

,,13.9,3.7,15.9

,18.7,12.1,23.4,6.7

,39.5,41.1,5.8,10.8

,75.5,10.8,,9.9

,17.2,4.1,31.6,5.9

,166.8,,3.6,19.6

,149.7,35.6,,17.3

,38.2,3.7,13.8,7.6

,94.2,4.9,8.1,9.7

,,9.3,6.4,12.8

,283.6,,66.2,25.5

,232.1,8.6,8.7,13.4

python 代码：

#-*- coding: UTF- -*-

import random

import numpy as np

import matplotlib.pyplot as plt

#加载数据

def load_exdata(filename):

    data = []

    with open(filename, 'r') as f:

        for line in f.readlines():

            line = line.split(',')

            current = [float(item) for item in line]

            #5.5277,9.1302

            data.append(current)

    return data

data = load_exdata('testdata.txt');

data = np.array(data,np.float64)//数据是浮点型

# 特征缩放

def featureNormalize(X):

    X_norm = X;

    mu = np.zeros((, X.shape[]))

    sigma = np.zeros((, X.shape[]))

    for i in range(X.shape[]):

        mu[, i] = np.mean(X[:, i])  # 均值

        sigma[, i] = np.std(X[:, i])  # 标准差

    # print(mu)

    #     print(sigma)

    X_norm = (X - mu) / sigma

    return X_norm, mu, sigma

# 计算损失

def computeCost(X, y, theta):

    m = y.shape[]

    #     J = (np.sum((X.dot(theta) - y)**)) / (*m)

    C = X.dot(theta) - y

    J2 = (C.T.dot(C)) / ( * m)

    return J2

# 梯度下降

def gradientDescent(X, y, theta, alpha, num_iters):

    m = y.shape[]

    # print(m)

    # 存储历史误差

    J_history = np.zeros((num_iters, ))

    for iter in range(num_iters):

        # 对J求导，得到 alpha/m * (WX - Y)*x(i)， (,m)*(m,)  X (m,)*(,) = (m,)

        theta = theta - (alpha / m) * (X.T.dot(X.dot(theta) - y))

        J_history[iter] = computeCost(X, y, theta)

    return J_history, theta

iterations =   # 迭代次数

alpha = 0.01  # 学习率

x = data[:, ( ,,)].reshape((-, ))//数据特征输入，采用数据集一行的，第1，2，3个数据，然后将其变成一行，所以用shape

y = data[:, ].reshape((-, ))//输出特征，数据集的第四位

m = y.shape[]

x, mu, sigma = featureNormalize(x)

X = np.hstack([x, np.ones((x.shape[], ))])

# X = X[range(),:]

# y = y[range(),:]

theta = np.zeros((, ))//因为x+y.总共有四个输入，所以theta是四维

j = computeCost(X, y, theta)

J_history, theta = gradientDescent(X, y, theta, alpha, iterations)

print('Theta found by gradient descent', theta)

def predict(data):

    testx = np.array(data)

    testx = ((testx - mu) / sigma)

    testx = np.hstack([testx, np.ones((testx.shape[], ))])

    price = testx.dot(theta)

    print('predit value is %f ' % (price))

predict([151.5,41.3,58.5])//输入为3维