樸素貝葉斯進行新聞分類

樸素貝葉斯

# coding=utf8
from sklearn.naive_bayes import MultinomialNB
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report

def naive_bayes():
    """
    使用樸素貝葉斯進行新聞分類
    :return: 
    """
    # 獲取數據
    data=fetch_20newsgroups()

    # 進行數據分割
    X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.25, random_state=42)

    # 對數據集進行特徵抽取
    tf = TfidfVectorizer()

    X_train = tf.fit_transform(X_train)
    X_test = tf.transform(X_test)

    print(tf.get_feature_names())

    # 使用樸素貝葉斯算法進行預測
    mlt = MultinomialNB(alpha=1.0)
    mlt.fit(X_train, y_train)

    # 文章的預測類別
    y_pred = mlt.predict(X_test)
    print("文章的預測類別:", y_pred)

    # 分類模型評估
    print("預測準確率:", mlt.score(X_test, y_test))
    print("recall:", classification_report(y_test, y_pred, target_names=data.target_names))

if __name__ == "__main__":
    naive_bayes()

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章