機器學習之分類算法應用篇

和上一篇聚類算法類似,本文主要分享一個包裝好的使用sklearn中常用分類器分類結果對比的代碼,方便初步比較模型的時候使用,程序運行結果如下圖所示。先給出代碼,後續再做詳細分析。

可執行Python3代碼:

# 分類問題可運行代碼
from collections import Counter
from sklearn.datasets import load_iris
from imblearn.combine import SMOTEENN
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import threading
import warnings
import numpy as np
import time
warnings.filterwarnings("ignore")

model_list = [
    ("LR", LogisticRegression()),
    ("DT", DecisionTreeClassifier()),
    ("NB", GaussianNB()),
    ("RF", RandomForestClassifier()),
    ("GBDT", GradientBoostingClassifier())
]

iris = load_iris()
X, y = iris.data, iris.target

# 上採樣
x_smo, y_smo = SMOTE(random_state=0).fit_sample(X, y)

time_, acc_ = [], []
for model in model_list:
    print("Now ", model[0], end=" ")
    pipe_lr = Pipeline([('st', MinMaxScaler()),
                        ('pca', PCA(n_components=0.99)),
                        ('clf', model[1])
                        ])
    t0 = time.time()
    y_pred = cross_val_predict(pipe_lr, x_smo, y_smo, cv=5)
    t1 = time.time()
    report = classification_report(y_smo, y_pred)
    print("({:.2f} s)".format(t1 - t0))
    print(report)
    time_.append(t1 - t0)
    acc_.append(float(report.split()[20]))

fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_size_inches(18, 7)

ax1.bar(np.arange(len(time_)), time_)
ax1.set_title("Time costed for each classifier (s)")
ax1.set_xticks(range(0,len(time_), 1))
ax1.set_xticklabels([model[0] for model in model_list])
ax1.set_ylabel("Time(s)")

ax2.bar(np.arange(len(time_)), acc_)
ax2.set_title("Accuracy for each classifier (s)")
ax2.set_xticks(range(0,len(time_), 1))
ax2.set_xticklabels([model[0] for model in model_list])
ax2.set_ylabel("Accuracy")

plt.suptitle("Results on different classifiers", fontweight="bold", fontsize=15)
plt.show()

(完)

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章