Pearson相關係數,幫助我們來篩選特徵
用PairGrid 可以按我們的需求去自定義下需要展示的部分
plot_data = features[['score', 'A', 'B', 'C']]
plot_data = plot_data.replace({np.inf: np.nan, -np.inf: np.nan}) # 無窮大和無窮小替換爲nan
plot_data = plot_data.rename(columns = {'A': 'a', 'B': 'b', 'C': 'c'})
plot_data = plot_data.dropna()
def corr_func(x, y, **kwargs):
r = np.corrcoef(x, y)[0][1] # x和y的皮爾遜相關係數
ax = plt.gca()
ax.annotate('r = {:.2f}'.format(r), xy = (.2, .8), xycoords=ax.transAxes, size=30)
grid = sns.PairGrid(data = plot_data, height = 4)
grid.map_upper(plt.scatter, alpha = 0.6)
grid.map_diag(plt.hist, edgecolor = 'black')
grid.map_lower(corr_func)
grid.map_lower(sns.kdeplot, cmap = plt.cm.Reds)
plt.suptitle('Pairs Plot of Energe Data', fontsize = 28, y = 1.05)