樸素貝葉斯
# coding=utf8
from sklearn.naive_bayes import MultinomialNB
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report
def naive_bayes():
"""
使用樸素貝葉斯進行新聞分類
:return:
"""
# 獲取數據
data=fetch_20newsgroups()
# 進行數據分割
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.25, random_state=42)
# 對數據集進行特徵抽取
tf = TfidfVectorizer()
X_train = tf.fit_transform(X_train)
X_test = tf.transform(X_test)
print(tf.get_feature_names())
# 使用樸素貝葉斯算法進行預測
mlt = MultinomialNB(alpha=1.0)
mlt.fit(X_train, y_train)
# 文章的預測類別
y_pred = mlt.predict(X_test)
print("文章的預測類別:", y_pred)
# 分類模型評估
print("預測準確率:", mlt.score(X_test, y_test))
print("recall:", classification_report(y_test, y_pred, target_names=data.target_names))
if __name__ == "__main__":
naive_bayes()