if __name__ =='__main__':# 讀文件 csv :
data = pd.read_csv("titanic_train.csv")# 我們需要的數據 :
cols =["PassengerId","Pclass","Fare","Survived","Sex","Age"]
data = data[cols]# 空值處理 : 全部去掉,刪除行,還剩下的數據 (714, 6)
data = data.dropna(axis=0)# 聚合函數 : 分組列,計算列,函數
res = data.pivot_table(index="Sex",values="Survived",aggfunc=np.mean)print(res)print(type(res))=====================================
Survived
Sex
female 0.754789
male 0.205298<class'pandas.core.frame.DataFrame'>
得到 : dataframe
取一個值 : 女性倖存率
4 ,聚合,結果取值 : res.loc[“female”][0]
目的 :
從結果中把一個具體的元素取出來
代碼 :
if __name__ =='__main__':# 讀文件 csv :
data = pd.read_csv("titanic_train.csv")# 我們需要的數據 :
cols =["PassengerId","Pclass","Fare","Survived","Sex","Age"]
data = data[cols]# 空值處理 : 全部去掉,刪除行,還剩下的數據 (714, 6)
data = data.dropna(axis=0)# 聚合函數 : 分組列,計算列,函數
res = data.pivot_table(index="Sex",values="Survived",aggfunc=np.mean)print(res)print("=========================")
res = res.loc["female"][0]print(res)print(type(res))====================================
Survived
Sex
female 0.754789
male 0.205298=========================0.7547892720306514<class'numpy.float64'>
5 ,聚合,總數 :groupby
目的 : 每個等級的人數
精華代碼 :
data.groupby(by="Pclass").size()
代碼 :
if __name__ =='__main__':# 讀文件 csv :
data = pd.read_csv("titanic_train.csv")# 我們需要的數據 :
cols =["PassengerId","Pclass","Fare","Survived","Sex","Age"]
data = data[cols]# 空值處理 : 全部去掉,刪除行,還剩下的數據 (714, 6)
data = data.dropna(axis=0)# 聚合計算
res = data.groupby(by="Pclass").size()print(res)=========================================================
Pclass
118621733355
dtype: int64
6 ,groupby 中的 size() 和 count()
意義不同 :
1 ,size : 一共幾行
2 ,count : 不算 Nan ,幾行
作用對象不同 :
1 ,size : 針對整體
2 ,count : 針對每一列
7 ,聚合,總和 :pivot_table
目的 : 每個等級,船票總額
精華代碼 :
res = data.pivot_table(index="Pclass",values="Fare",aggfunc=[np.sum,np.mean])
代碼 :
if __name__ =='__main__':# 讀文件 csv :
data = pd.read_csv("titanic_train.csv")# 我們需要的數據 :
cols =["PassengerId","Pclass","Fare","Survived","Sex","Age"]
data = data[cols]# 聚合運算,每個等級的船票總額
res = data.pivot_table(index="Pclass",values="Fare",aggfunc=[np.sum,np.mean])print(res)===========================================sum mean
Fare Fare
Pclass
118177.412584.15468723801.841720.66218336714.695113.675550