朴素贝叶斯
# coding=utf8
from sklearn.naive_bayes import MultinomialNB
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report
def naive_bayes():
"""
使用朴素贝叶斯进行新闻分类
:return:
"""
# 获取数据
data=fetch_20newsgroups()
# 进行数据分割
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.25, random_state=42)
# 对数据集进行特征抽取
tf = TfidfVectorizer()
X_train = tf.fit_transform(X_train)
X_test = tf.transform(X_test)
print(tf.get_feature_names())
# 使用朴素贝叶斯算法进行预测
mlt = MultinomialNB(alpha=1.0)
mlt.fit(X_train, y_train)
# 文章的预测类别
y_pred = mlt.predict(X_test)
print("文章的预测类别:", y_pred)
# 分类模型评估
print("预测准确率:", mlt.score(X_test, y_test))
print("recall:", classification_report(y_test, y_pred, target_names=data.target_names))
if __name__ == "__main__":
naive_bayes()