import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
cred = pd.read_csv("creditcard_exp.csv",skipinitialspace=True)
cred.head()
#數據清洗 用於建模
cred2 = cred[cred['avg_exp'].notnull()].iloc[:, :].copy()
cred3 = cred[cred['avg_exp'].isnull()].iloc[:, :].copy()
cred2.head()
#相關分析 散點圖
cred2.plot('Income','avg_exp',kind='scatter')
#線性迴歸 迴歸建模時 y 不能有空數據
lm_s = ols('avg_exp ~ Income',data=cred2).fit()
print(lm_s.params)
lm_s.summary()
#預測 預測用原始數據
pre = lm_s.predict(cred) #每條記錄根據迴歸係數預測出y avg_exp = 258.04+97.72*Income
cred['pre'] = pre
cred.head()