朴素贝叶斯进行新闻分类

朴素贝叶斯

# coding=utf8
from sklearn.naive_bayes import MultinomialNB
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report

def naive_bayes():
    """
    使用朴素贝叶斯进行新闻分类
    :return: 
    """
    # 获取数据
    data=fetch_20newsgroups()

    # 进行数据分割
    X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.25, random_state=42)

    # 对数据集进行特征抽取
    tf = TfidfVectorizer()

    X_train = tf.fit_transform(X_train)
    X_test = tf.transform(X_test)

    print(tf.get_feature_names())

    # 使用朴素贝叶斯算法进行预测
    mlt = MultinomialNB(alpha=1.0)
    mlt.fit(X_train, y_train)

    # 文章的预测类别
    y_pred = mlt.predict(X_test)
    print("文章的预测类别:", y_pred)

    # 分类模型评估
    print("预测准确率:", mlt.score(X_test, y_test))
    print("recall:", classification_report(y_test, y_pred, target_names=data.target_names))

if __name__ == "__main__":
    naive_bayes()

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章