import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
Using matplotlib backend: MacOSX
讀取數據集並觀察數據特點
字段意義和數據觀察結果
- longitude: 經度
- latitude: 緯度
- housing_median_age: 房齡中位數
- total_rooms: 房間總數
- total_bedrooms: 臥室總數
- population: 人口數
- households: 家庭戶數
- median_income: 收入中位數
- median_house_value: 房價中位數
- ocean_proximity: 距離海邊的距離
數據加載
# 讀取數據,原數據可查看 https://github.com/ageron/handson-ml/tree/master/datasets/housing
housing_df = pd.read_csv('https://query.data.world/s/yffqqcx3rsjlzspztxr6zt5iqd45kn')
# 查看數據結構
housing_df.head(10)
longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | ocean_proximity | |
---|---|---|---|---|---|---|---|---|---|---|
0 | -122.23 | 37.88 | 41.0 | 880.0 | 129.0 | 322.0 | 126.0 | 8.3252 | 452600.0 | NEAR BAY |
1 | -122.22 | 37.86 | 21.0 | 7099.0 | 1106.0 | 2401.0 | 1138.0 | 8.3014 | 358500.0 | NEAR BAY |
2 | -122.24 | 37.85 | 52.0 | 1467.0 | 190.0 | 496.0 | 177.0 | 7.2574 | 352100.0 | NEAR BAY |
3 | -122.25 | 37.85 | 52.0 | 1274.0 | 235.0 | 558.0 | 219.0 | 5.6431 | 341300.0 | NEAR BAY |
4 | -122.25 | 37.85 | 52.0 | 1627.0 | 280.0 | 565.0 | 259.0 | 3.8462 | 342200.0 | NEAR BAY |
5 | -122.25 | 37.85 | 52.0 | 919.0 | 213.0 | 413.0 | 193.0 | 4.0368 | 269700.0 | NEAR BAY |
6 | -122.25 | 37.84 | 52.0 | 2535.0 | 489.0 | 1094.0 | 514.0 | 3.6591 | 299200.0 | NEAR BAY |
7 | -122.25 | 37.84 | 52.0 | 3104.0 | 687.0 | 1157.0 | 647.0 | 3.1200 | 241400.0 | NEAR BAY |
8 | -122.26 | 37.84 | 42.0 | 2555.0 | 665.0 | 1206.0 | 595.0 | 2.0804 | 226700.0 | NEAR BAY |
9 | -122.25 | 37.84 | 52.0 | 3549.0 | 707.0 | 1551.0 | 714.0 | 3.6912 | 261100.0 | NEAR BAY |
數據的描述性統計
數據屬性
# 查看數據信息
# 1. 通過以下數據可發現除了total_bedrooms字段數據缺失(非空20433,總數爲20640),其他字段數據都是完整的。
# 2. 除了ocean_proximity,其他都是float64類型,可以進一步查看ocean_proximity非枚舉值分佈
housing_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 10 columns):
longitude 20640 non-null float64
latitude 20640 non-null float64
housing_median_age 20640 non-null float64
total_rooms 20640 non-null float64
total_bedrooms 20433 non-null float64
population 20640 non-null float64
households 20640 non-null float64
median_income 20640 non-null float64
median_house_value 20640 non-null float64
ocean_proximity 20640 non-null object
dtypes: float64(9), object(1)
memory usage: 1.6+ MB
# 查看ocean_proximity枚舉值分佈
housing_df['ocean_proximity'].value_counts()
<1H OCEAN 9136
INLAND 6551
NEAR OCEAN 2658
NEAR BAY 2290
ISLAND 5
Name: ocean_proximity, dtype: int64
枚舉值釋義:
- INLAND: 內地,內陸
- NEAR OCEAN: 靠海
- NEAR BAY: 靠海
- ISLAND: 島上
數據的描述性統計
# 統計信息
housing_df.describe()
longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
---|---|---|---|---|---|---|---|---|---|
count | 20640.000000 | 20640.000000 | 20640.000000 | 20640.000000 | 20433.000000 | 20640.000000 | 20640.000000 | 20640.000000 | 20640.000000 |
mean | -119.569704 | 35.631861 | 28.639486 | 2635.763081 | 537.870553 | 1425.476744 | 499.539680 | 3.870671 | 206855.816909 |
std | 2.003532 | 2.135952 | 12.585558 | 2181.615252 | 421.385070 | 1132.462122 | 382.329753 | 1.899822 | 115395.615874 |
min | -124.350000 | 32.540000 | 1.000000 | 2.000000 | 1.000000 | 3.000000 | 1.000000 | 0.499900 | 14999.000000 |
25% | -121.800000 | 33.930000 | 18.000000 | 1447.750000 | 296.000000 | 787.000000 | 280.000000 | 2.563400 | 119600.000000 |
50% | -118.490000 | 34.260000 | 29.000000 | 2127.000000 | 435.000000 | 1166.000000 | 409.000000 | 3.534800 | 179700.000000 |
75% | -118.010000 | 37.710000 | 37.000000 | 3148.000000 | 647.000000 | 1725.000000 | 605.000000 | 4.743250 | 264725.000000 |
max | -114.310000 | 41.950000 | 52.000000 | 39320.000000 | 6445.000000 | 35682.000000 | 6082.000000 | 15.000100 | 500001.000000 |
數據分佈
# 各個屬性的數據分佈
housing_df.hist(bins=50, figsize=(20,15))
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x10d8efd68>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10e45d198>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10e48e748>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x118efccf8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x118f392e8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x118f6a898>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x119283e48>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1192bf470>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1192bf4a8>]],
dtype=object)
地理數據可視化
參考:https://www.bigendiandata.com/2017-06-27-Mapping_in_Jupyter/
housing_df.plot(kind='scatter', x='longitude', y='latitude',
s=housing_df['population']/100, c='median_house_value', cmap=plt.get_cmap('jet'),
colorbar=True, alpha=0.1, figsize=(10,10))
<matplotlib.axes._subplots.AxesSubplot at 0x1199350b8>
https://public.tableau.com/views/1990_15748722575690/1990?:display_count=y&publish=yes&:origin=viz_share_link
尋找相關性
探索每個屬性與房價中位數(median_house_value)的相關性
使用相關性矩陣
由於數據集不大,這裏使用皮爾遜係數。可使用dataframe自帶有corr(method=‘pearson’)函數
直接使用現成字段
corr = housing_df.corr(method='pearson')
corr
longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
---|---|---|---|---|---|---|---|---|---|
longitude | 1.000000 | -0.924664 | -0.108197 | 0.044568 | 0.069608 | 0.099773 | 0.055310 | -0.015176 | -0.045967 |
latitude | -0.924664 | 1.000000 | 0.011173 | -0.036100 | -0.066983 | -0.108785 | -0.071035 | -0.079809 | -0.144160 |
housing_median_age | -0.108197 | 0.011173 | 1.000000 | -0.361262 | -0.320451 | -0.296244 | -0.302916 | -0.119034 | 0.105623 |
total_rooms | 0.044568 | -0.036100 | -0.361262 | 1.000000 | 0.930380 | 0.857126 | 0.918484 | 0.198050 | 0.134153 |
total_bedrooms | 0.069608 | -0.066983 | -0.320451 | 0.930380 | 1.000000 | 0.877747 | 0.979728 | -0.007723 | 0.049686 |
population | 0.099773 | -0.108785 | -0.296244 | 0.857126 | 0.877747 | 1.000000 | 0.907222 | 0.004834 | -0.024650 |
households | 0.055310 | -0.071035 | -0.302916 | 0.918484 | 0.979728 | 0.907222 | 1.000000 | 0.013033 | 0.065843 |
median_income | -0.015176 | -0.079809 | -0.119034 | 0.198050 | -0.007723 | 0.004834 | 0.013033 | 1.000000 | 0.688075 |
median_house_value | -0.045967 | -0.144160 | 0.105623 | 0.134153 | 0.049686 | -0.024650 | 0.065843 | 0.688075 | 1.000000 |
# 只查看median_house_value與其他屬性的皮爾遜係數。相關係數的數值範圍 -1~1.
# 1.趨近於1表示正相關性越強;2.趨近於-1表示負相關性越強;3. 趨近於0表示沒有任何關係
corr['median_house_value'].sort_values(ascending=False)
median_house_value 1.000000
median_income 0.688075
total_rooms 0.134153
housing_median_age 0.105623
households 0.065843
total_bedrooms 0.049686
population -0.024650
longitude -0.045967
latitude -0.144160
Name: median_house_value, dtype: float64
組合字段
- population_per_household: 每個家庭的人口數
- rooms_household: 每個家庭的房間數
- bedrooms_per_room: 臥室的佔比
tmp_df = housing_df.copy()
tmp_df['population_per_household'] = tmp_df['population'] / tmp_df['households']
tmp_df['rooms_per_household'] = tmp_df['total_rooms'] / tmp_df['households']
tmp_df['bedrooms_per_room'] = tmp_df['total_bedrooms'] / tmp_df['total_rooms']
tmp_df.head(10)
longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | ocean_proximity | population_per_household | rooms_per_household | bedrooms_per_room | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -122.23 | 37.88 | 41.0 | 880.0 | 129.0 | 322.0 | 126.0 | 8.3252 | 452600.0 | NEAR BAY | 2.555556 | 6.984127 | 0.146591 |
1 | -122.22 | 37.86 | 21.0 | 7099.0 | 1106.0 | 2401.0 | 1138.0 | 8.3014 | 358500.0 | NEAR BAY | 2.109842 | 6.238137 | 0.155797 |
2 | -122.24 | 37.85 | 52.0 | 1467.0 | 190.0 | 496.0 | 177.0 | 7.2574 | 352100.0 | NEAR BAY | 2.802260 | 8.288136 | 0.129516 |
3 | -122.25 | 37.85 | 52.0 | 1274.0 | 235.0 | 558.0 | 219.0 | 5.6431 | 341300.0 | NEAR BAY | 2.547945 | 5.817352 | 0.184458 |
4 | -122.25 | 37.85 | 52.0 | 1627.0 | 280.0 | 565.0 | 259.0 | 3.8462 | 342200.0 | NEAR BAY | 2.181467 | 6.281853 | 0.172096 |
5 | -122.25 | 37.85 | 52.0 | 919.0 | 213.0 | 413.0 | 193.0 | 4.0368 | 269700.0 | NEAR BAY | 2.139896 | 4.761658 | 0.231774 |
6 | -122.25 | 37.84 | 52.0 | 2535.0 | 489.0 | 1094.0 | 514.0 | 3.6591 | 299200.0 | NEAR BAY | 2.128405 | 4.931907 | 0.192899 |
7 | -122.25 | 37.84 | 52.0 | 3104.0 | 687.0 | 1157.0 | 647.0 | 3.1200 | 241400.0 | NEAR BAY | 1.788253 | 4.797527 | 0.221327 |
8 | -122.26 | 37.84 | 42.0 | 2555.0 | 665.0 | 1206.0 | 595.0 | 2.0804 | 226700.0 | NEAR BAY | 2.026891 | 4.294118 | 0.260274 |
9 | -122.25 | 37.84 | 52.0 | 3549.0 | 707.0 | 1551.0 | 714.0 | 3.6912 | 261100.0 | NEAR BAY | 2.172269 | 4.970588 | 0.199211 |
corr = tmp_df.corr(method='pearson')
corr['median_house_value'].sort_values(ascending=False)
median_house_value 1.000000
median_income 0.688075
rooms_per_household 0.151948
total_rooms 0.134153
housing_median_age 0.105623
households 0.065843
total_bedrooms 0.049686
population_per_household -0.023737
population -0.024650
longitude -0.045967
latitude -0.144160
bedrooms_per_room -0.255880
Name: median_house_value, dtype: float64
可以看出bedrooms_per_room在負數最小,於房價呈負相關。population_per_household比較接近於0,與房價的關係不大
數據清理與特徵工程
- total_bedrooms字段數據缺失(非空20433,總數爲20640),使用sklearn自帶的缺失值處理函數imputer
- ocean_proximity文本信息處理;
- 轉化數據流,定義任務步驟,自動順序執行;
缺失值處理
from sklearn.impute import SimpleImputer
tmp_df = housing_df.copy()
# axis=0代表drop索引,axis=1代表drop列
imputer = SimpleImputer(strategy='median')
tmp_df['total_bedrooms'] = imputer.fit_transform(tmp_df[['total_bedrooms']])
tmp_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 10 columns):
longitude 20640 non-null float64
latitude 20640 non-null float64
housing_median_age 20640 non-null float64
total_rooms 20640 non-null float64
total_bedrooms 20640 non-null float64
population 20640 non-null float64
households 20640 non-null float64
median_income 20640 non-null float64
median_house_value 20640 non-null float64
ocean_proximity 20640 non-null object
dtypes: float64(9), object(1)
memory usage: 1.6+ MB
處理文本信息
- 方法一:使用LabelEncoder文本轉化爲整數,再使用OneHotEncode將整數one-hot編碼
- 方法二:使用LabelBinarizer直接完成這兩步驟
from sklearn.preprocessing import LabelBinarizer
encode = LabelBinarizer()
encode.fit_transform(housing_df['ocean_proximity'])
array([[0, 0, 0, 1, 0],
[0, 0, 0, 1, 0],
[0, 0, 0, 1, 0],
...,
[0, 1, 0, 0, 0],
[0, 1, 0, 0, 0],
[0, 1, 0, 0, 0]])
轉化流水線
定義數據轉化任務的步驟,讓任務流水化執行.
from sklearn.base import BaseEstimator, TransformerMixin
class DataFrameSelector(BaseEstimator, TransformerMixin):
"""
sklearn庫主要使用numpy數組, 所以將dataframe全部轉化爲numpy數組.
自定義轉化器需添加基類:BaseEstimator, TransformerMixin
"""
def __init__(self, attribute_names):
self.attribute_names=attribute_names
def fit(self, x, y=None):
return self
def transform(self, x):
"""返回numpy數組"""
return x[self.attribute_names].values
def get_columns_index(df, columns):
return [list(df.columns).index(column) for column in list(columns)]
def add_extra_features(x, rooms_ix, bedrooms_ix, population_ix, household_ix):
"""補充字段"""
population_per_household = x[:, population_ix] / x[:, household_ix]
rooms_per_household = x[:, rooms_ix] / x[:, household_ix]
bedrooms_per_room = x[:, bedrooms_ix] / x[:, rooms_ix]
return np.c_[x, population_per_household, rooms_per_household, bedrooms_per_room]
# 轉化流水線
from sklearn.pipeline import Pipeline
from sklearn.pipeline import FeatureUnion
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
# 數值類型流水線
num_attribute_names = ['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population',
'households', 'median_income']
rooms_ix, bedrooms_ix, population_ix, household_ix = get_columns_index(housing_df, ['total_rooms', 'total_bedrooms', 'population', 'households'])
num_pipline = Pipeline([
('selector', DataFrameSelector(num_attribute_names)),
('imputer', SimpleImputer(strategy='median')),
('attribs_adder', FunctionTransformer(add_extra_features, kw_args={'rooms_ix':rooms_ix, 'bedrooms_ix':bedrooms_ix,
'population_ix':population_ix, 'household_ix':household_ix})),
('std_scaler', StandardScaler())
])
# 文本類型流水線
text_pipline = Pipeline([
('selector', DataFrameSelector(['ocean_proximity'])),
('text_encoder', OneHotEncoder(sparse=False)),
])
# 合併
union_pipplines = FeatureUnion(transformer_list=[
('num_pipline', num_pipline),
('text_pipline', text_pipline),
])
housing_prepares = union_pipplines.fit_transform(housing_df)
print('shape: ', housing_prepares.shape)
print('data head 5: \n', housing_prepares[0:5, :])
shape: (20640, 16)
data head 5:
[[-1.32783522 1.05254828 0.98214266 -0.8048191 -0.97247648 -0.9744286
-0.97703285 2.34476576 -0.04959654 0.62855945 -1.02998783 0.
0. 0. 1. 0. ]
[-1.32284391 1.04318455 -0.60701891 2.0458901 1.35714343 0.86143887
1.66996103 2.33223796 -0.09251223 0.32704136 -0.8888972 0.
0. 0. 1. 0. ]
[-1.33282653 1.03850269 1.85618152 -0.53574589 -0.82702426 -0.82077735
-0.84363692 1.7826994 -0.02584253 1.15562047 -1.29168566 0.
0. 0. 1. 0. ]
[-1.33781784 1.03850269 1.85618152 -0.62421459 -0.71972345 -0.76602806
-0.73378144 0.93296751 -0.0503293 0.15696608 -0.4496128 0.
0. 0. 1. 0. ]
[-1.33781784 1.03850269 1.85618152 -0.46240395 -0.61242263 -0.75984669
-0.62915718 -0.012881 -0.08561576 0.3447108 -0.63908657 0.
0. 0. 1. 0. ]]
/Users/cleland/.pyenv/versions/3.7.1/envs/base/lib/python3.7/site-packages/sklearn/preprocessing/_function_transformer.py:97: FutureWarning: The default validate=True will be replaced by validate=False in 0.22.
"validate=False in 0.22.", FutureWarning)
/Users/cleland/.pyenv/versions/3.7.1/envs/base/lib/python3.7/site-packages/sklearn/preprocessing/_function_transformer.py:97: FutureWarning: The default validate=True will be replaced by validate=False in 0.22.
"validate=False in 0.22.", FutureWarning)
模型訓練
medain_housing_values 25%~75%: 119600~264725 美元
訓練數據集和測試數據集劃分
from sklearn.model_selection import train_test_split
housing_label = housing_df['median_house_value'].values
x_train, x_test, y_train, y_test = train_test_split(housing_prepares, housing_label)
線性迴歸
測試數據集最好得分67906。而實際數據大部分分佈在119600~264725 美元,預測偏差較大。
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
estimator = LinearRegression()
estimator.fit(x_train, y_train)
estimator = GridSearchCV(estimator, param_grid={}, cv=5)
estimator.fit(x_train, y_train)
y_test_predict = estimator.predict(x_test)
print('MSE: ', mean_squared_error(y_test, y_test_predict))
print('RMSE: ', np.sqrt(mean_squared_error(y_test, y_test_predict)))
# 最佳參數
print(u'最佳參數: \n', estimator.best_params_)
# 最佳結果
print(u'結果: \n', estimator.best_score_)
# 最佳估計器
print(u'估計器: \n', estimator.best_estimator_)
# 交叉驗證結果
print(u'交叉驗證結果: \n', estimator.cv_results_)
MSE: 4543924391.051387
RMSE: 67408.63736236913
最佳參數:
{}
結果:
0.6432229836429453
估計器:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
交叉驗證結果:
{'mean_fit_time': array([0.14581642]), 'std_fit_time': array([0.04799657]), 'mean_score_time': array([0.00797019]), 'std_score_time': array([0.00313379]), 'params': [{}], 'split0_test_score': array([0.65722112]), 'split1_test_score': array([0.64810884]), 'split2_test_score': array([0.65080866]), 'split3_test_score': array([0.62162591]), 'split4_test_score': array([0.63835039]), 'mean_test_score': array([0.64322298]), 'std_test_score': array([0.01238981]), 'rank_test_score': array([1], dtype=int32)}
決策樹
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
estimator = DecisionTreeRegressor()
estimator.fit(x_train, y_train)
estimator = GridSearchCV(estimator, param_grid={}, cv=5)
estimator.fit(x_train, y_train)
y_test_predict = estimator.predict(x_test)
print('MSE: ', mean_squared_error(y_test, y_test_predict))
print('RMSE: ', np.sqrt(mean_squared_error(y_test, y_test_predict)))
# 最佳參數
print(u'最佳參數: \n', estimator.best_params_)
# 最佳結果
print(u'結果: \n', estimator.best_score_)
# 最佳估計器
print(u'估計器: \n', estimator.best_estimator_)
# 交叉驗證結果
print(u'交叉驗證結果: \n', estimator.cv_results_)
MSE: 4484788001.235852
RMSE: 66968.5597966378
最佳參數:
{}
結果:
0.6330416409343302
估計器:
DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
max_leaf_nodes=None, min_impurity_decrease=0.0,
min_impurity_split=None, min_samples_leaf=1,
min_samples_split=2, min_weight_fraction_leaf=0.0,
presort=False, random_state=None, splitter='best')
交叉驗證結果:
{'mean_fit_time': array([0.36660266]), 'std_fit_time': array([0.02361012]), 'mean_score_time': array([0.00709462]), 'std_score_time': array([0.00589681]), 'params': [{}], 'split0_test_score': array([0.64208581]), 'split1_test_score': array([0.63046887]), 'split2_test_score': array([0.64717912]), 'split3_test_score': array([0.60435885]), 'split4_test_score': array([0.64111556]), 'mean_test_score': array([0.63304164]), 'std_test_score': array([0.01533738]), 'rank_test_score': array([1], dtype=int32)}
隨機森林
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
estimator = RandomForestRegressor()
estimator.fit(x_train, y_train)
estimator = GridSearchCV(estimator, param_grid={}, cv=5)
estimator.fit(x_train, y_train)
y_test_predict = estimator.predict(x_test)
print('MSE: ', mean_squared_error(y_test, y_test_predict))
print('RMSE: ', np.sqrt(mean_squared_error(y_test, y_test_predict)))
# 最佳參數
print(u'最佳參數: \n', estimator.best_params_)
# 最佳結果
print(u'結果: \n', estimator.best_score_)
# 最佳估計器
print(u'估計器: \n', estimator.best_estimator_)
# 交叉驗證結果
print(u'交叉驗證結果: \n', estimator.cv_results_)
/Users/cleland/.pyenv/versions/3.7.1/envs/base/lib/python3.7/site-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
"10 in version 0.20 to 100 in 0.22.", FutureWarning)
MSE: 2599366029.248785
RMSE: 50983.97816225
最佳參數:
{}
結果:
0.7895634451066635
估計器:
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
max_features='auto', max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=10,
n_jobs=None, oob_score=False, random_state=None,
verbose=0, warm_start=False)
交叉驗證結果:
{'mean_fit_time': array([2.7657424]), 'std_fit_time': array([0.1215011]), 'mean_score_time': array([0.02362027]), 'std_score_time': array([0.00405571]), 'params': [{}], 'split0_test_score': array([0.79759449]), 'split1_test_score': array([0.78053044]), 'split2_test_score': array([0.80471751]), 'split3_test_score': array([0.77450088]), 'split4_test_score': array([0.79047391]), 'mean_test_score': array([0.78956345]), 'std_test_score': array([0.01098588]), 'rank_test_score': array([1], dtype=int32)}
參考
- 機器學習實戰-基於Scikit-Learn和TensorFlow