import os
os.chdir(r"E:\BaiduNetdiskDownload\6inference")
import pandas as pd
house_price_gr = pd.read_csv(r'house_price_gr.csv', encoding='gbk')
house_price_gr.head()
#先查看一下數據
dis_name | rate | |
---|---|---|
0 | 東城區甘南小區 | 0.169747 |
1 | 東城區察慈小區 | 0.165484 |
2 | 東城區胡家園小區 | 0.141358 |
3 | 東城區臺基廠小區 | 0.063197 |
4 | 東城區青年湖小區 | 0.101528 |
house_price_gr.describe()
rate | |
---|---|
count | 150.000000 |
mean | 0.110061 |
std | 0.041333 |
min | 0.029540 |
25% | 0.080027 |
50% | 0.104908 |
75% | 0.140066 |
max | 0.243743 |
house_price_gr.describe(include='all')
dis_name | rate | |
---|---|---|
count | 150 | 150.000000 |
unique | 150 | NaN |
top | 朝陽區小關北里24號院 | NaN |
freq | 1 | NaN |
mean | NaN | 0.110061 |
std | NaN | 0.041333 |
min | NaN | 0.029540 |
25% | NaN | 0.080027 |
50% | NaN | 0.104908 |
75% | NaN | 0.140066 |
max | NaN | 0.243743 |
get_ipython().magic('matplotlib inline')
import seaborn as sns
from scipy import stats
sns.distplot(house_price_gr.rate, kde=True, fit=stats.norm) # Histograph 返回直方圖和曲線圖,fit是加上正態分佈
import statsmodels.api as sm
from matplotlib import pyplot as plt
fig = sm.qqplot(house_price_gr.rate, fit=True, line='45')
fig.show() #查看圖像
house_price_gr.plot(kind='box') # Box Plots
>
# 置信度區間估計
#方差s^2=[(x1-x)^2+(x2-x)^2+......(xn-x)^2]/(n)(x爲平均數)
se = house_price_gr.rate.std() / len(house_price_gr) ** 0.5 # 標準誤=標準差/根號樣本量
print(se)
LB = house_price_gr.rate.mean() - 1.96 * se #下界 均值減去1.96倍標準誤 表示95%的置信區間
UB = house_price_gr.rate.mean() + 1.96 * se
(LB, UB) #95%的置信區間,95%的把握認爲增長度在LB,UB之間
0.003374832409178327
(0.10344632517993363, 0.11667566822391268)
#所以可以得出結論,想要以10%增長率買到房的置信度小於2.5%,認爲是不可能事件
# 如果要求任意置信度下的置信區間的話,可以自己編一個函數
def confint(x, alpha=0.05):
n = len(x)
xb = x.mean()
df = n-1
tmp = (x.std() / n ** 0.5) * stats.t.ppf(1-alpha/2, df)
return {'Mean': xb, 'Degree of Freedom':df, 'LB':xb-tmp, 'UB':xb+tmp}
confint(house_price_gr.rate, 0.05)
{'Mean': 0.11006099670192315,
'Degree of Freedom': 149,
'LB': 0.10339228338892809,
'UB': 0.11672971001491822}
x=house_price_gr.rate
n = len(x)
df = n-1
xb = x.mean()
xb-(x.std() / n ** 0.5) * stats.t.ppf(0.025, df)
0.11672971001491822
# 或者使用DescrStatsW 包來計算zhix置信度
d1= sm.stats.DescrStatsW(house_price_gr.rate)
d1.tconfint_mean(0.0 5) #
(0.10339228338892814, 0.11672971001491828)