sklearn多標籤分類算法練習

1.例1

import numpy as np
import pandas as pd
import scipy
from scipy.io import arff
#數據集
data, meta = scipy.io.arff.loadarff('D:/Programs/meka1.9.2/data/Yeast.arff')
df = pd.DataFrame(data)
columns1=df.columns.tolist()
df.info()
#DataFrame 轉爲array()
df_y=df[columns1[:14]].astype(int).values
df_x=df[columns1[14:]].values


#方法1:x_train 對每一個單標籤.
from skmultilearn.problem_transform import BinaryRelevance
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.33, random_state=42)
# initialize binary relevance multi-label classifier
# with a gaussian naive bayes base classifier
classifier = BinaryRelevance(GaussianNB())
# train
classifier.fit(X_train, y_train)
# predict
predictions = classifier.predict(X_test)
from sklearn.metrics import accuracy_score
accuracy_score(y_test,predictions)


#例2:onevsrest:想要分類的作爲正類,其他的類作爲反類。
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
# 分類器使用1對多,SVM用linear kernel
#clf1 = OneVsRestClassifier(SVC(kernel='linear'), n_jobs=-1)
clf1 = OneVsRestClassifier(SVC(kernel='poly'), n_jobs=-1)
# 訓練
clf1.fit(X_train, y_train)
# output
OneVsRestClassifier(estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,decision_function_shape=None, degree=3, gamma='auto', kernel='linear', max_iter=-1, probability=False, random_state=None, shrinking=True,tol=0.001, verbose=False),n_jobs=-1)
#輸出預測的標籤結果
predict_class = clf1.predict(X_test)
#準確率,預測的結果和實際的結果
clf1.score(X_test, y_test)


#例3:powerset:隨機抽取k個label,將這k類(有2^k種組合)轉化爲單標籤. 
from skmultilearn.problem_transform import LabelPowerset
from sklearn.naive_bayes import GaussianNB
# initialize Label Powerset multi-label classifier
# with a gaussian naive bayes base classifier
classifier = LabelPowerset(GaussianNB())
# train
classifier.fit(X_train, y_train)
# predict
predictions = classifier.predict(X_test)
accuracy_score(y_test,predictions)


#例4:Adapted Algorithm:多標籤KNN算法MLKNN
from skmultilearn.adapt import MLkNN
classifier = MLkNN(k=20)
# train
classifier.fit(X_train, y_train)
# predict
predictions = classifier.predict(X_test)
accuracy_score(y_test,predictions)


#例5:分類器鏈
from skmultilearn.problem_transform import ClassifierChain
from sklearn.naive_bayes import GaussianNB

# initialize classifier chains multi-label classifier
# with a gaussian naive bayes base classifier
classifier = ClassifierChain(GaussianNB())
# train
classifier.fit(X_train, y_train)
# predict
predictions = classifier.predict(X_test)
accuracy_score(y_test,predictions)

2.例2

from skmultilearn.dataset import load_dataset
X_train, y_train, feature_names, label_names = load_dataset('emotions', 'train')
X_test, y_test, _, _ = load_dataset('emotions', 'test')
feature_names[:10]
label_names
from skmultilearn.problem_transform import BinaryRelevance
from sklearn.svm import SVC
clf = BinaryRelevance(
    classifier=SVC(),
    require_dense=[False, True]
)
clf.fit(X_train, y_train)
clf.classifier
prediction = clf.predict(X_test)
import sklearn.metrics as metrics
print("x=",metrics.hamming_loss(y_test, prediction))
print("s=",metrics.accuracy_score(y_test, prediction))
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章