直方圖
首先需要區分清楚概念:直方圖和條形圖。
- 條形圖:條形圖用長條形表示每一個類別,長條形的長度表示類別的頻數,寬度表示表示類別。
- 直方圖:直方圖是一種統計報告圖,形式上也是一個個的長條形,但是直方圖用長條形的面積表示頻數,所以長條形的高度表示
頻數/組距
,寬度表示組距,其長度和寬度均有意義。當寬度相同時,一般就用長條形長度表示頻數。
直方圖一般用來描述等距數據,柱狀圖一般用來描述名稱(類別)數據或順序數據。直觀上,直方圖各個長條形是銜接在一起的,表示數據間的數學關係;條形圖各長條形之間留有空隙,區分不同的類。
單分佈
data = np.random.normal(0,20,1000)
bins = np.arange(-100,100,5)
plt.hist(data,bins=bins, edgecolor="black")
plt.xlim([min(data)-5,max(data)+5])
plt.title("直方圖")
多分佈
import random
data1 = [random.gauss(15,10) for i in range(500)]
data2 = [random.gauss(5,5) for i in range(500)]
bins = np.arange(-50,50,2.5)
plt.hist(data1,bins=bins,label='class 1',alpha = 0.3, edgecolor="black")
plt.hist(data2,bins=bins,label='class 2',alpha = 0.3, edgecolor="black")
plt.legend(loc='best')
散點圖
mu_vec1 = np.array([0,0])
cov_mat1 = np.array([[2,0],[0,2]])
#構造數據,np.random.multivariate_normal構造一個二元正態分佈矩陣
x1_samples = np.random.multivariate_normal(mu_vec1, cov_mat1, 100)
x2_samples = np.random.multivariate_normal(mu_vec1+0.2, cov_mat1+0.2, 100)
x3_samples = np.random.multivariate_normal(mu_vec1+0.4, cov_mat1+0.4, 100)
plt.figure(figsize = (8,6))
plt.scatter(x1_samples[:,0],x1_samples[:,1],marker ='x',color='blue',alpha=0.6,label='x1')
plt.scatter(x2_samples[:,0],x2_samples[:,1],marker ='o',color='red',alpha=0.6,label='x2')
plt.scatter(x3_samples[:,0],x3_samples[:,1],marker ='^',color='green',alpha=0.6,label='x3')
plt.legend(loc='best')
plt.show()
餅圖
m = 51212.
f = 40742.
m_perc = m/(m+f)
f_perc = f/(m+f)
colors = ['navy','lightcoral']
labels = ["Male","Female"]
plt.figure(figsize=(5,5))
#explode縫隙大小,autopct顯示百分比
paches,texts,autotexts = plt.pie([m_perc,f_perc],labels = labels,autopct = '%1.1f%%',explode=[0,0.05],colors = colors)
for text in texts+autotexts:
text.set_fontsize(15)
for text in autotexts:
text.set_color('white')
嵌套組合
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
#數據標籤
def autolabel(rects):
for rect in rects:
height = rect.get_height()
ax1.text(rect.get_x() + rect.get_width()/2., 1.02*height,
"{:,}".format(float(height)),
ha='center', va='bottom',fontsize=18)
#構造數據
top10_arrivals_countries = ['CANADA','MEXICO','UNITED\nKINGDOM',\
'JAPAN','CHINA','GERMANY','SOUTH\nKOREA',\
'FRANCE','BRAZIL','AUSTRALIA']
top10_arrivals_values = [16.625687, 15.378026, 3.934508, 2.999718,\
2.618737, 1.769498, 1.628563, 1.419409,\
1.393710, 1.136974]
arrivals_countries = ['WESTERN\nEUROPE','ASIA','SOUTH\nAMERICA',\
'OCEANIA','CARIBBEAN','MIDDLE\nEAST',\
'CENTRAL\nAMERICA','EASTERN\nEUROPE','AFRICA']
arrivals_percent = [36.9,30.4,13.8,4.4,4.0,3.6,2.9,2.6,1.5]
fig, ax1 = plt.subplots(figsize=(20,12))
ax = ax1.bar(range(10),top10_arrivals_values,color='blue')
plt.xticks(range(10),top10_arrivals_countries,fontsize=18)
ax2 = inset_axes(ax1,width = 6,height = 6,loc = 5)
explode = (0.08, 0.08, 0.05, 0.05,0.05,0.05,0.05,0.05,0.05)
patches, texts, autotexts = ax2.pie(arrivals_percent,labels=arrivals_countries,autopct='%1.1f%%',explode=explode)
for text in texts+autotexts:
text.set_fontsize(16)
for spine in ax1.spines.values():
spine.set_visible(False)
autolabel(ax)
3D
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
#設置3D
ax = Axes3D(fig)
x = np.arange(-4,4,0.25)
y = np.arange(-4,4,0.25)
X,Y = np.meshgrid(x,y)
Z = (X**2+Y**2)
ax.plot_surface(X,Y,Z,cmap='rainbow')
ax.contour(X,Y,Z,zdim='z',offset = -2 ,cmap='rainbow')
#ax.set_zlim(-2,2)
plt.show()