6.13 sklearn

\


機器學習相關內容(不是特別懂)

簡單的來說就自己隨機生成一個數據集,然後用3種擬合算法擬合,然後評估這個模型擬合的效果。

代碼如下

from sklearn import datasets
from sklearn import model_selection
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

X, Y = datasets.make_classification(n_samples=1000, n_features=10,
                                       n_informative=2, n_redundant=2, n_repeated=0, n_classes=2)

kf = model_selection.KFold(n_splits=10)

i = 0
acc_sum1 = 0
f1_sum1 = 0
auc_sum1 = 0
acc_sum2 = 0
f1_sum2 = 0
auc_sum2 = 0
acc_sum3 = 0
f1_sum3 = 0
auc_sum3 = 0
for train_index, test_index in kf.split(X):
    i = i + 1
    # print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = Y[train_index], Y[test_index]
    clf1 = GaussianNB()
    clf1.fit(X_train, y_train)
    pred1 = clf1.predict(X_test)

    acc = metrics.accuracy_score(y_test, pred1)
    f1 = metrics.f1_score(y_test, pred1)
    auc = metrics.roc_auc_score(y_test, pred1)
    # print(i, "GaussianNB result:")
    # print("data:", X_test)
    # print("result:", pred1)
    # print("acc:", acc)
    # print("f1:", f1)
    # print("auc:", auc)
    acc_sum1 += acc
    f1_sum1 += f1
    auc_sum1 += auc
    # print()

    clf2 = SVC(C=1e00, kernel='rbf')
    clf2.fit(X_train, y_train)
    pred2 = clf2.predict(X_test)

    acc = metrics.accuracy_score(y_test, pred2)
    f1 = metrics.f1_score(y_test, pred2)
    auc = metrics.roc_auc_score(y_test, pred2)
    # print(i, "SVC result:")
    # print("data:", X_test)
    # print("result:", pred2)
    # print("acc:", acc)
    # print("f1:", f1)
    # print("auc:", auc)
    acc_sum2 += acc
    f1_sum2 += f1
    auc_sum2 += auc
    # print()

    clf3 = RandomForestClassifier(n_estimators=100)
    clf3.fit(X_train, y_train)
    pred3 = clf3.predict(X_test)
    acc = metrics.accuracy_score(y_test, pred3)
    f1 = metrics.f1_score(y_test, pred3)
    auc = metrics.roc_auc_score(y_test, pred3)
    # print(i, "RandomForestClassifier result:")
    # print("data:", X_test)
    # print("result:", pred3)
    # print("acc:", acc)
    # print("f1:", f1)
    # print("auc:", auc)
    acc_sum3 += acc
    f1_sum3 += f1
    auc_sum3 += auc
    # print()


print("GaussianNB average result:")
print(acc_sum1/kf.get_n_splits(X))
print(f1_sum1/kf.get_n_splits(X))
print(auc_sum1/kf.get_n_splits(X))
print()
print(i, "SVC result:")
print(acc_sum2/kf.get_n_splits(X))
print(f1_sum2/kf.get_n_splits(X))
print(auc_sum2/kf.get_n_splits(X))
print()
print(i, "RandomForestClassifier result:")
print(acc_sum3/kf.get_n_splits(X))
print(f1_sum3/kf.get_n_splits(X))
print(auc_sum3/kf.get_n_splits(X))

其中的一次結果如下:

GaussianNB average result:
0.9039999999999999
0.9041081812466965
0.9043535790181867


SVC average result:
0.9560000000000001
0.9562470859437677
0.955828927779609


RandomForestClassifier average result:
0.97
0.9703207038834705

0.9698537808461255


經過多次測試,通常情況下

準確度從小到大爲:

樸素貝葉斯<SVC<RandomForest


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章