seaborn —— 課後練✋
%matplotlib inline
import numpy as np
import pandas as pd
from scipy import stats, integrate
import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns
練習1:鳶尾花花型尺寸分析
- 鳶尾花萼片(sepal)和花瓣(petal)的大小關係(散點圖)
- 不同種類(species)鳶尾花萼片和花瓣的分佈情況(箱圖或者提琴圖)
- 鳶尾花萼片和花瓣大小的聯合分佈情況(六角箱圖或者核密度估計)
data = sns.load_dataset("iris")
data.head()
|
sepal_length |
sepal_width |
petal_length |
petal_width |
species |
0 |
5.1 |
3.5 |
1.4 |
0.2 |
setosa |
1 |
4.9 |
3.0 |
1.4 |
0.2 |
setosa |
2 |
4.7 |
3.2 |
1.3 |
0.2 |
setosa |
3 |
4.6 |
3.1 |
1.5 |
0.2 |
setosa |
4 |
5.0 |
3.6 |
1.4 |
0.2 |
setosa |
data['sepal_size']=data['sepal_length']*data['sepal_width']
data['petal_size']=data['petal_length']*data['petal_width']
萼片與花瓣
sns.lmplot(x='sepal_size',y='petal_size',data=data)
<seaborn.axisgrid.FacetGrid at 0x7fea04b37550>
不同種類 萼片與花瓣分佈
g = sns.PairGrid(data,
x_vars=["species"],
y_vars=["sepal_size", "petal_size"],
aspect=2, size=4)
g.map(sns.violinplot, palette="pastel");
萼片與花瓣大小聯合分佈
sns.jointplot(x='sepal_length',y='petal_length',data=data,kind='kde')
/opt/ds/local/lib/python2.7/site-packages/numpy/ma/core.py:6385: MaskedArrayFutureWarning: In the future the default for ma.minimum.reduce will be axis=0, not the current None, to match np.minimum.reduce. Explicitly pass 0 or None to silence this warning.
return self.reduce(a)
/opt/ds/local/lib/python2.7/site-packages/numpy/ma/core.py:6385: MaskedArrayFutureWarning: In the future the default for ma.maximum.reduce will be axis=0, not the current None, to match np.maximum.reduce. Explicitly pass 0 or None to silence this warning.
return self.reduce(a)
<seaborn.axisgrid.JointGrid at 0x7fe9fc6fd250>
練習2:餐廳小費情況分析
- 小費和總消費之間的關係(散點圖+迴歸分析)
- 男性顧客和女性顧客,誰更慷慨(箱圖或者提琴圖)
- 抽菸與否是否會對小費金額產生影響(箱圖或者提琴圖)
- 工作日和週末,什麼時候顧客給的小費更慷慨(箱圖或者提琴圖)
- 午飯和晚飯,哪一頓顧客更願意給小費(箱圖或者提琴圖)
- 就餐人數是否會對慷慨度產生影響(箱圖或者提琴圖)
- 性別+抽菸的組合因素對慷慨度的影響(統計柱狀圖)
data = sns.load_dataset("tips")
data.head()
|
total_bill |
tip |
sex |
smoker |
day |
time |
size |
0 |
16.99 |
1.01 |
Female |
No |
Sun |
Dinner |
2 |
1 |
10.34 |
1.66 |
Male |
No |
Sun |
Dinner |
3 |
2 |
21.01 |
3.50 |
Male |
No |
Sun |
Dinner |
3 |
3 |
23.68 |
3.31 |
Male |
No |
Sun |
Dinner |
2 |
4 |
24.59 |
3.61 |
Female |
No |
Sun |
Dinner |
4 |
小費與總消費
sns.lmplot(x='total_bill',y='tip',data=data)
<seaborn.axisgrid.FacetGrid at 0x7fe9ff3afed0>
小費:男性vs女性
sns.boxplot(y='tip',x='sex',data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe9ff1651d0>
小費:抽菸vs不抽菸
sns.boxplot(y='tip',x='smoker',data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe9fd4dac10>
小費:工作日vs週末
day=data['day'].unique()
day
[Sun, Sat, Thur, Fri]
Categories (4, object): [Sun, Sat, Thur, Fri]
data_week=pd.DataFrame(('weekend' if x in ['Sun','Sat'] else 'weekday' for x in data.day),index=data.index,columns=['week'])
data_expand=pd.merge(data,data_week,left_index=True,right_index=True)
data_expand.head()
|
total_bill |
tip |
sex |
smoker |
day |
time |
size |
week |
0 |
16.99 |
1.01 |
Female |
No |
Sun |
Dinner |
2 |
weekend |
1 |
10.34 |
1.66 |
Male |
No |
Sun |
Dinner |
3 |
weekend |
2 |
21.01 |
3.50 |
Male |
No |
Sun |
Dinner |
3 |
weekend |
3 |
23.68 |
3.31 |
Male |
No |
Sun |
Dinner |
2 |
weekend |
4 |
24.59 |
3.61 |
Female |
No |
Sun |
Dinner |
4 |
weekend |
sns.boxplot(y='tip',x='week',data=data_expand)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe9ff533c10>
小費:午餐vs晚餐
sns.violinplot(x='time',y='tip',data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe9ffcd35d0>
小費:就餐人數
sns.violinplot(x='size',y='tip',data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe9ffef5650>
小費:性別+抽菸
sns.barplot(x='sex',y='tip',hue='smoker',data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe9ff4e3d10>
練習3:泰坦尼克號海難倖存狀況分析
- 不同倉位等級中倖存和遇難乘客的分佈(箱圖或者提琴圖)
- 倖存和遇難乘客的票價分佈(箱圖或者提琴圖)
- 倖存和遇難乘客的年齡分佈(箱圖或者提琴圖)
- 不同上船港口的乘客倉位等級分佈(箱圖或者提琴圖)
- 倖存和遇難乘客堂兄弟姐妹的數量分佈(箱圖或者提琴圖)
- 倖存和遇難乘客父母子女的數量分佈(箱圖或者提琴圖)
- 單獨乘船與否和倖存之間的關係(統計柱狀圖)
- 乘客年齡和船票價格之間的關係(線性迴歸模型)
- 乘客性別和倉位等級之間的關係(統計柱狀圖)
- 乘客年齡和倉位等級之間的關係(帶抖動的散點圖)
data = sns.load_dataset("titanic")
data.head()
|
survived |
pclass |
sex |
age |
sibsp |
parch |
fare |
embarked |
class |
who |
adult_male |
deck |
embark_town |
alive |
alone |
0 |
0 |
3 |
male |
22.0 |
1 |
0 |
7.2500 |
S |
Third |
man |
True |
NaN |
Southampton |
no |
False |
1 |
1 |
1 |
female |
38.0 |
1 |
0 |
71.2833 |
C |
First |
woman |
False |
C |
Cherbourg |
yes |
False |
2 |
1 |
3 |
female |
26.0 |
0 |
0 |
7.9250 |
S |
Third |
woman |
False |
NaN |
Southampton |
yes |
True |
3 |
1 |
1 |
female |
35.0 |
1 |
0 |
53.1000 |
S |
First |
woman |
False |
C |
Southampton |
yes |
False |
4 |
0 |
3 |
male |
35.0 |
0 |
0 |
8.0500 |
S |
Third |
man |
True |
NaN |
Southampton |
no |
True |
倖存or遇難:不同倉位影響?
sns.violinplot(x='class',y='survived',data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe9fcdcea50>
倖存or遇難:票價分佈?
sns.violinplot(x='alive',y='fare',data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe9fcd213d0>
倖存or遇難:年齡分佈?
sns.violinplot(x='alive',y='age',data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe9fcbd83d0>
不同上船港口的倉位等級分佈
sns.violinplot(x='embark_town',y='pclass',data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe9fcaa7690>
倖存or遇難:堂兄弟姐妹數量分佈?
sns.violinplot(x='alive',y='sibsp',data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe9fc9f41d0>
倖存or遇難:父母子女數量分佈?
sns.violinplot(x='alive',y='parch',data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe9fc944d10>
倖存or遇難:是否單獨乘船?
sns.barplot(x='alone',y='survived',data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe9fc632ad0>
年齡與票價的關係
sns.lmplot(x='age',y='fare',data=data)
<seaborn.axisgrid.FacetGrid at 0x7fe9fc5ba110>
性別與倉位等級
sns.barplot(x='sex',y='pclass',data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe9fc508c10>
乘客年齡與倉位等級的關係
sns.lmplot(x='pclass',y='age',data=data,x_jitter=0.2)
<seaborn.axisgrid.FacetGrid at 0x7fe9fc499150>