python xgboost 調參

原創

2019-09-07 14:41

def modelfit(model, X_train, X_test, y_train, y_test,useTrainCV, eval_metric='auc',  cv_folds=4,
             early_stopping_rounds=20):
    #early_stopping_rounds  在20輪迭代裏沒有提升的話，就停止
    print('Model eval_metric is % s' % eval_metric)

    if useTrainCV:
        xgb_param = model.get_xgb_params()
        xgtrain = xgb.DMatrix(X_train, label=y_train)
        cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=model.get_params()['n_estimators'], nfold=cv_folds,
                          metrics=eval_metric, early_stopping_rounds=early_stopping_rounds)
        #cvresult.to_csv(cvresult_path, index_label='n_estimators')
        print("Best Iteration: %d" % cvresult.shape[0])
        model.set_params(n_estimators=cvresult.shape[0])

    # Fit the algorithm on the data
    model.fit(X_train, y_train, eval_metric=eval_metric)

    Report_Df=myscoring(model, X_train, y_train, X_test, y_test,threshold=0.5)
    print(Report_Df)
    return model

def myscoring(model, X_train, y_train, X_test, y_test,threshold=0.5):
    print("threshold %s"%threshold)
    # Predict training set:
    predprob_train = model.predict_proba(X_train)[:, 1]
    y_train_pred = (predprob_train >= threshold)*1
    #predictions_train = model.predict(X_train)

    # Predict test set:
    predprob_test = model.predict_proba(X_test)[:, 1]
    y_test_pred = (predprob_test >= threshold)*1
    #predictions_test = model.predict(X_test)

    # Print model report:
    # print ( "\nModel Report" )
    Report_Df = pd.DataFrame(
        data={'evaluating indicator': ['Accuracy', 'AUC Score', 'Recall', 'F1-score', 'Precesion'],
              'Train': [metrics.accuracy_score(y_train, y_train_pred), metrics.roc_auc_score(y_train, predprob_train),
                        metrics.recall_score(y_train, y_train_pred), metrics.f1_score(y_train, y_train_pred),
                        metrics.precision_score(y_train, y_train_pred)],
              'Test': [metrics.accuracy_score(y_test, y_test_pred),
                       metrics.roc_auc_score(y_test, predprob_test),
                       metrics.recall_score(y_test, y_test_pred), metrics.f1_score(y_test, y_test_pred),
                       metrics.precision_score(y_test, y_test_pred)]}
    )

    # print(Report_Df[['evaluating indicator','Train','Test']])
    return Report_Df[['evaluating indicator', 'Train', 'Test']]

def features_importance(xgb,features):
    features_importance=[]
    for each in zip(features,xgb.feature_importances_):
        features_importance.append({"feature":each[0],"importance":each[1]})
    features_importance=pd.DataFrame(sorted(features_importance,key=(lambda x:x["importance"]),reverse=True))
    features_importance=features_importance[features_importance["importance"]>0]
    return features_importance

##給定基本參數，通過model_fit得到合適的n_estimator參數，
##網格調參或者貝葉斯調參
##訓練集測試集指標計算
##特徵重要性輸出
##特徵重要性繪圖

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

python xgboost 調參

torch learning 1

windows下配置pytorch環境的步驟。

Python數據可視化常用案例

可視化 --python 窗圖

python正則模塊re

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結