之前介紹了隨機森林、極端隨機森林以及深度森林的原理,本次介紹一下相關的代碼
本次實驗全部使用糖尿病數據集
數據導入
import pandas as pd
train = pd.read_csv("/Users/admin/Desktop/database/diabetes/diabetes_train.txt",header=None,index_col=False)
test = pd.read_csv("/Users/admin/Desktop/database/diabetes/diabetes_test.txt",header=None,index_col=False)
#數據轉換
label = train.loc[:,[8]].values.reshape(-1)
data = train.drop(columns=8).values.reshape(-1,8)
y_test = test.loc[:,[8]].values.reshape(-1)
X_test = test.drop(columns=8).values.reshape(-1,8)
隨機森林
rf = RandomForestRegressor(n_estimators=1000)
rf=rf.fit(data, label)
predictions_rf=rf.predict(X_test)
#將概率轉換成0、1
pred_rf=judge(predictions_rf)
acc = accuracy_score(y_test, pred_rf)
print("Test Accuracy of rf = {:.2f} %".format(acc * 100))
Test Accuracy of rf = 80.97 %
極端隨機森林
etr=ExtraTreesRegressor(n_estimators=1000)
etr=etr.fit(data, label)
predictions_etr=etr.predict(X_test)
pred_etr=judge(predictions_etr)
acc = accuracy_score(y_test, pred_etr)
print("Test Accuracy of pred_etr = {:.2f} %".format(acc * 100))
Test Accuracy of pred_etr = 79.85 %
深度森林
from gcforest.gcforest import GCForest
def get_toy_config():
config = {}
ca_config = {}
ca_config["random_state"] = 0 # 0 or 1
ca_config["n_cascadeRFtree"] = 1000
ca_config["max_layers"] = 100 # 最大的層數,layer對應論文中的level
ca_config["early_stopping_rounds"] = 3 #如果出現某層的三層以內的準確率都沒有提升,層中止
ca_config["n_classes"] = 2 #判別的類別數量
ca_config["estimators"] = []
ca_config["estimators"].append({"n_folds": 2, "type": "RandomForestClassifier", "n_estimators": 10, "max_depth": None, "n_jobs": -1})
ca_config["estimators"].append({"n_folds": 2, "type": "ExtraTreesClassifier", "n_estimators": 10, "max_depth": None, "n_jobs": -1})
ca_config["estimators"].append({"n_folds": 2, "type": "LogisticRegression"})
config["cascade"] = ca_config #共使用了3個基學習器
return config
config=get_toy_config()
gc = GCForest(config)
#X_train_enc是每個模型最後一層輸出的結果,每一個類別的可能性
X_train_enc = gc.fit_transform(data, label)
y_pred = gc.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Test Accuracy of GcForest = {:.2f} %".format(acc * 100))
Test Accuracy of GcForest = 80.22 %
MNIST
可以用深度森林跑一下深度學習的常用數據集MNIST
y_pred = gc.predict(x_valid)
acc = accuracy_score(y_valid, y_pred)
print("Test Accuracy of GcForest = {:.2f} %".format(acc * 100))
Test Accuracy of GcForest = 97.32 %