python 圖形統計
x連續變量 y 分類變量 logit圖 手工分箱
from woe import WoE
woe = WoE(v_type='d')
woe.fit(auto.age_group,auto.Loss)
fig = woe.plot([8,5])
plt.show(fig)
地圖統計
from pyecharts import Map
gdp_data = list(zip(gdp[gdp.Year == 2014].loc[:,'Prov'],gdp[gdp.Year == 2014].loc[:,'GDP']))
attr, value = Map.cast(gdp_data)
GDP_max = gdp[gdp.Year == 2014].GDP.max()
GDP_min = gdp[gdp.Year == 2014].GDP.min()
map = Map('各省GDP', width = 1200, height = 600)
map.add('', attr, value, maptype = 'china', is_visualmap = True, visual_range=[GDP_min, GDP_max],
visual_text_color = '#000', is_label_show = True)
map.render()
from pyecharts import Map
snd_price = list(zip(snd.price.groupby(snd.district).mean().index,
snd.price.groupby(snd.district).mean().values))
attr, value = Map.cast(snd_price)
min_ = snd.price.groupby(snd.dist).mean().min()
max_ = snd.price.groupby(snd.dist).mean().max()
map = Map('北京各區房價', width = 1200, height = 600)
map.add('', attr, value, maptype = '北京', is_visualmap = True, visual_range=[min_, max_],
visual_text_color = '#000', is_label_show =True)
map.render()
堆疊柱形圖
def stack2dim(raw, i, j, rotation = 0, location = 'upper left'):
'''
此函數是爲了畫兩個維度標準化的堆積柱狀圖
要求是目標變量j是二分類的
raw爲pandas的DataFrame數據框
i、j爲兩個分類變量的變量名稱,要求帶引號,比如"school"
rotation:水平標籤旋轉角度,默認水平方向,如標籤過長,可設置一定角度,比如設置rotation = 40
location:分類標籤的位置,如果被主體圖形擋住,可更改爲'upper left'
'''
import math
data_raw = pd.crosstab(raw[i], raw[j])
data = data_raw.div(data_raw.sum(1), axis=0) # 交叉錶轉換成比率,爲得到標準化堆積柱狀圖
# 計算x座標,及bar寬度
createVar = locals()
x = [0] #每個bar的中心x軸座標
width = [] #bar的寬度
k = 0
for n in range(len(data)):
# 根據頻數計算每一列bar的寬度
createVar['width' + str(n)] = data_raw.sum(axis=1)[n] / sum(data_raw.sum(axis=1))
width.append(createVar['width' + str(n)])
if n == 0:
continue
else:
k += createVar['width' + str(n - 1)] / 2 + createVar['width' + str(n)] / 2 + 0.05
x.append(k)
# 以下是通過頻率交叉表矩陣生成一列對應堆積圖每一塊位置數據的數組,再把數組轉化爲矩陣
y_mat = []
n = 0
for p in range(data.shape[0]):
for q in range(data.shape[1]):
n += 1
y_mat.append(data.iloc[p, q])
if n == data.shape[0] * 2:
break
elif n % 2 == 1:
y_mat.extend([0] * (len(data) - 1))
elif n % 2 == 0:
y_mat.extend([0] * len(data))
y_mat = np.array(y_mat).reshape(len(data) * 2, len(data))
y_mat = pd.DataFrame(y_mat) # bar圖中的y變量矩陣,每一行是一個y變量
# 通過x,y_mat中的每一行y,依次繪製每一塊堆積圖中的每一塊圖
createVar = locals()
for row in range(len(y_mat)):
createVar['a' + str(row)] = y_mat.iloc[row, :]
if row % 2 == 0:
if math.floor(row / 2) == 0:
label = data.columns.name + ': ' + str(data.columns[row])
plt.bar(x, createVar['a' + str(row)],
width=width[math.floor(row / 2)], label='not Loss', color='#5F9EA0')
else:
plt.bar(x, createVar['a' + str(row)],
width=width[math.floor(row / 2)], color='#5F9EA0')
elif row % 2 == 1:
if math.floor(row / 2) == 0:
label = data.columns.name + ': ' + str(data.columns[row])
plt.bar(x, createVar['a' + str(row)], bottom=createVar['a' + str(row - 1)],
width=width[math.floor(row / 2)], label='Loss', color='#8FBC8F')
else:
plt.bar(x, createVar['a' + str(row)], bottom=createVar['a' + str(row - 1)],
width=width[math.floor(row / 2)], color='#8FBC8F')
plt.title(j + ' vs ' + i)
group_labels = [data.index.name + ': ' + str(name) for name in data.index]
plt.xticks(x, group_labels, rotation = rotation)
plt.ylabel(j)
plt.legend(shadow=True, loc=location)
plt.show()
stack2dim(snd, i="subway", j="school")
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.