python迴歸分析學習筆記1

# == 1 == #
import pandas as pd
data=pd.read_csv("advertising.csv")
print(data.head())

在這裏插入圖片描述

# == 2 == #
%matplotlib inline
import matplotlib.pyplot as plt
fig, axs = plt.subplots(1, 3, sharey=True)
data.plot(kind='scatter', x='TV', y='sales', ax=axs[0], figsize=(16, 8))
data.plot(kind='scatter', x='radio', y='sales', ax=axs[1])
data.plot(kind='scatter', x='newspaper', y='sales', ax=axs[2])
plt.show()

在這裏插入圖片描述

# == 3 == #
# import statsmodels.formula.api as sm
# import statsmodels.formula.api as smf
# lm_1=smf.ols(formula="sales~TV",data=data).fit()
# print(lm_1.summary())


# 另一種寫法
import statsmodels.api as sm
data['const']=1
lm_1 = sm.OLS(endog=data['sales'], exog=data[['const', 'TV']], missing='drop').fit()
print(lm_1.summary())

在這裏插入圖片描述

# == 4 == #
lm_1.predict(pd.DataFrame({'TV': [50]}))

在這裏插入圖片描述

# == 5 == #
print("regression parameters:\n")
print(lm_1.params)
print("\n")
print("confidence intervals:\n")
print(lm_1.conf_int())
print("\n")
print("p-values:\n")
print(lm_1.pvalues)
print("\n")
print("R-squared value\n")
print(lm_1.rsquared)
print("\n")

在這裏插入圖片描述

# == 6 == #
from statsmodels.iolib.summary2 import summary_col
import statsmodels.formula.api as smf
lm_2=smf.ols(formula="sales ~ TV + radio",data=data).fit()
lm_3=smf.ols(formula="sales ~ TV + radio + newspaper",data=data).fit()
info_dict={'R-squared' : lambda x: "{:.2f}".format(x.rsquared),
           'No. observations' : lambda x: "{0:d}".format(int(x.nobs))}
results_table = summary_col(results=[lm_1,lm_2,lm_3],
                            float_format='%0.2f',
                            stars = True,
                            model_names=['Model 1',
                                         'Model 2',
                                         'Model 3'],
                            info_dict=info_dict,
                            regressor_order=['Intercept',
                                             'TV',
                                             'radio',
                                             'newspaper'])
results_table.add_title('Table 1 - OLS Regressions')
print(results_table)

在這裏插入圖片描述

import statsmodels.api as sm
advertising=pd.read_csv("advertising.csv")
y = advertising.sales
x = advertising.TV
X = sm.add_constant(x)#給自變量中加入常數項
model = sm.OLS(y,X).fix()
model.summary()
model.params[0]+models.params[1]*media.TV
y_hat = model.predict(x)#獲得擬合值
plt.scatter(x,y,alpha = 0.4)
plt.xlabel('TV')
plt.ylabel('Sales')
plt.plot(x,y_hat,'r',alpha = 0.8)
plt.show()
x1 = advertising[["TV","radio","newspaper"]]
y1 = advertising['sales']
X = sm.add_constant(x1)
model2 = sm.OLS(y1,X).fit()
model2.summary()
import statsmodels.formula.api as smf
model3 = smf.ols('sales~TV *radio +newspaper,data = advertising).fit()
#創建TV與radio的交互作用
model4 = smf.ols("sale~TV+radio**2+newspaper",data = advertising).fit
#將radio這一項轉化爲二次項
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章