初識機器學習 | 2.如何評價模型好壞

在這裏插入圖片描述
下例爲乳房癌的概率,通過多個角度評估模型。

import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

# 1)讀取數據
cancer = load_breast_cancer()

# 2)數據集劃分
x_train, x_test, y_train, y_test = train_test_split(cancer.data, cancer.target)

# 3)特徵工程
transformer = StandardScaler()
x_train = transformer.fit_transform(x_train)
x_test = transformer.fit_transform(x_test)

# 4)KNN算法
estimator = LogisticRegression()
estimator.fit(x_train, y_train)

# 5)模型評估
score = estimator.score(x_test, y_test)
print(u"準確率: \n", score)
y_predict = estimator.predict(x_test)
準確率: 
 0.986013986013986


/Users/cleland/.pyenv/versions/3.7.1/envs/base/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)

準確度

accuracy = 正確的數據量 / 樣本量

def score(y_predict, y_ture):
    return sum(y_predict == y_test) / y_ture.size

print(u"準確率: ", score(y_predict, y_test))
print(u"sklearn 準確率: ", estimator.score(x_test, y_test))
準確率:  0.986013986013986
sklearn 準確率:  0.986013986013986

混淆矩陣

  • 0 - Negative(陰性,消極)
  • 1 - Postive(陽性,積極)
預測值_0 預測值_1
真實值_0 TN FP
真實值_1 FN TP
def TN(y_predict, y_true):
    return sum((y_predict == 0) & (y_true == 0))

def FN(y_predict, y_true):
    return sum((y_predict == 0) & (y_true == 1))

def FP(y_predict, y_true):
    return sum((y_predict == 1) & (y_true == 0))

def TP(y_predict, y_true):
    return sum((y_predict == 1) & (y_true == 1))

print("TN: ", TN(y_predict, y_test))
print("FN: ", FN(y_predict, y_test))
print("FP: ", FP(y_predict, y_test))
print("TP: ", TP(y_predict, y_test))


from sklearn.metrics import confusion_matrix
print(u'sklearn 混淆矩陣: \n', confusion_matrix(y_test, y_predict))
TN:  51
FN:  1
FP:  1
TP:  90
sklearn 混淆矩陣: 
 [[51  1]
 [ 1 90]]

精確度

 precision =TPTP+FP \text { precision }=\frac{T P}{T P+F P}

def precision(y_predict, y_true):
    result = 0.0
    try:
        result = TP(y_predict, y_true) / (TP(y_predict, y_true) +  FP(y_predict, y_true))
    except ZeroDivisionError:
        result = 0.0
    return result

print(u"精確率: \n", precision(y_predict, y_test))

from sklearn.metrics import precision_score
print(u'sklearn 精確度: \n', precision_score(y_test, y_predict))
精確率: 
 0.989010989010989
sklearn 精確度: 
 0.989010989010989

召回率

 recall =TPTP+FN \text { recall }=\frac{T P}{T P+F N}

def recall(y_predict, y_true):
    result = 0.0
    try:
        result = TP(y_predict, y_true) / (TP(y_predict, y_true) +  FN(y_predict, y_true))
    except ZeroDivisionError:
        result = 0.0
    return result

print(u"召回率: \n", recall(y_predict, y_test))

from sklearn.metrics import recall_score
print(u'sklearn 召回率: \n', recall_score(y_test, y_predict))
召回率: 
 0.989010989010989
sklearn 召回率: 
 0.989010989010989

F1 Score

F1=2 precision  recall  precision + recall  F 1=\frac{2 \cdot \text { precision } \cdot \text { recall }}{\text { precision }+\text { recall }}

def f1(y_predict, y_true):
    precision_score = precision(y_predict, y_true)
    recall_score = recall(y_predict, y_true)
    result = 0.0
    try:
        result = (2 * precision_score * recall_score) / (precision_score + recall_score)
    except ZeroDivisionError:
        result = 0.0
    return result

print(u"F1 Score: \n", f1(y_predict, y_test))

from sklearn.metrics import f1_score
print(u'sklearn F1 Score: \n', f1_score(y_test, y_predict))

F1 Score: 
 0.989010989010989
sklearn F1 Score: 
 0.989010989010989

平衡精確率與召回率

decision_scores = estimator.decision_function(x_test)

自己繪製準確率與召回率的關係

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

precisions = []
recalls = []
steps = np.arange(np.min(decision_scores), np.max(decision_scores), 0.1)
for step in steps:
    y_predict = np.array(decision_scores >= step, dtype='int')
    precisions.append(precision_score(y_test, y_predict))
    recalls.append(recall_score(y_test, y_predict))

plt.plot(steps, precisions, label='precisions')
plt.plot(steps, recalls, label='recalls')
plt.legend(loc='lower left')
plt.show()

plt.plot(precisions, recalls)
plt.show()

sklearn

from sklearn.metrics import precision_recall_curve
precisions, recalls, steps = precision_recall_curve(y_test, decision_scores)

print('precisions', precisions.shape)
print('recalls', recalls.shape)
print('steps', steps.shape)
precisions (93,)
recalls (93,)
steps (92,)
plt.plot(steps, precisions[:-1], label='precisions')
plt.plot(steps, recalls[:-1], label='recalls')
plt.legend(loc='lower left')
plt.show()

ROC

TPR=TPTP+FN T P R=\frac{T P}{T P+F N}

FPR=FPTN+FP F P R=\frac{F P}{T N+F P}

from sklearn.metrics import roc_curve

fprs, tprs, steps = roc_curve(y_test, decision_scores)
plt.plot(fprs, tprs)
plt.show()

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章