点击(此处)折叠或打开
- # coding: utf-8
- import numpy as np
- import statsmodels.api as sm
- import seaborn as sns
- import matplotlib.pyplot as plt
- import pandas as pd
- from sklearn.linear_model import LinearRegression
- sns.set()
- data=pd.read_csv('data-analysis/python-jupyter/1.01. Simple linear regression.csv')
- y=data['GPA']
- X=data['SAT']
- print(X.shape)
- print(y.shape)
- reg=LinearRegression()
- '''
- run reg.fit(X,y)
- error message:
- ValueError: Expected 2D array, got 1D array instead:
- check X type
- type(X) == Series,
- '''
- #reshape
- X_matrix=X.values.reshape(-1,1)
- print(X_matrix.shape)
- reg.fit(X_matrix, y)
- '''
- reg.score: R-squared
- reg.coef_: coefficient / slope
- reg.intercept_: intercept
- '''
- print(reg.score(X_matrix, y))
- print(reg.coef_)
- print(reg.intercept_)
- #make prediction
- gen_data=np.linspace(1700,1800, num=10, dtype=int)
- new_data=pd.DataFrame(data=gen_data, columns=['SAT'])
- reg.predict(new_data)
- new_data['Predicted_GPA']=reg.predict(new_data)
- print(new_data)
下面是前两天的用statsmodel.api的predict部分
点击(此处)折叠或打开
- #predict
- gen_data=np.linspace(1700,1800, num=10, dtype=int)
- new_data=pd.DataFrame(data=gen_data, columns=['SAT'])
- new_x=sm.add_constant(new_data)
- predicted_y=results.predict(new_x)
- new_x['Predicted_GPA']=predicted_y
- #drop const-column
- new_x=new_x.drop(['const'], axis=1)
- print(new_x)