from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error
from lightgbm import LGBMRegressor
import numpy as np
導入數據集
boston = load_boston()
X,y = boston.data,boston.target
print(X.shape)
print(y.shape)
定義交叉驗證分數
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV
def mse_cv(model=None,X_train=None,y_train=None):
kf=KFold(n_splits=10,shuffle=True,random_state=42).get_n_splits(X_train)
return -cross_val_score(model,X_train,y_train,scoring='neg_mean_absolute_error',cv=kf,n_jobs=-1)
定義模型
model = LGBMRegressor()
score = mse_cv(model,X,y)
print('{:.4f}({:.4f})'.format(score.mean(),score.std()))
model.fit(X,y)
print('{:.4f}'.format(mean_squared_error(model.predict(X),y)))
定義候選參數列表
param_dist = {'n_estimators': (10, 250),
'min_samples_split': (2, 25),
'max_features': (0.1, 0.999),
'max_depth': (5, 15)}
1.網格搜索調參
from scipy.stats import randint as sp_randint
from sklearn.model_selection import GridSearchCV
grid_search = GridSearchCV(estimator = model, param_grid=param_dist,
scoring="neg_mean_squared_error" ,n_jobs=-1, cv=5)
grid_search.fit(X, y)
print(grid_search.best_estimator_)
2.隨機搜索調參
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
random_search = RandomizedSearchCV(estimator = model, param_distributions=param_dist,
n_iter=100, scoring="neg_mean_squared_error" ,n_jobs=-1, cv=5)
random_search.fit(X, y)
print(random_search.best_estimator_)
3.貝葉斯調參
from bayes_opt import BayesianOptimization
def lgb_cv(n_estimators, min_samples_split, max_features, max_depth):
val = cross_val_score(
LGBMRegressor(n_estimators=int(n_estimators),
min_samples_split=int(min_samples_split),
max_features=min(max_features, 0.999), # float
max_depth=int(max_depth),
random_state=2
),
X, y, scoring='neg_mean_absolute_error', cv=5
).mean()
return val
lgb_bo = BayesianOptimization(
lgb_cv,
{'n_estimators': (10, 250),
'min_samples_split': (2, 25),
'max_features': (0.1, 0.999),
'max_depth': (5, 15)}
)
lgb_bo.maximize(init_points=21,n_iter=90)
print(lgb_bo.max['params'])